codegencommon.cpp source code [CoreCLR/jit/codegencommon.cpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4
5	/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*
6	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7	XX XX
8	XX Code Generator Common: XX
9	XX Methods common to all architectures and register allocation strategies XX
10	XX XX
11	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13	*/
14
15	// TODO-Cleanup: There are additional methods in CodeGen.cpp that are almost*
16	// identical, and which should probably be moved here.
17
18	#include "jitpch.h"
19	#ifdef _MSC_VER
20	#pragma hdrstop
21	#endif
22	#include "codegen.h"
23
24	#include "gcinfo.h"
25	#include "emit.h"
26
27	#ifndef JIT32_GCENCODER
28	#include "gcinfoencoder.h"
29	#endif
30
31	/***************************************************************************/
32
33	const BYTE genTypeSizes[] = {
34	#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
35	#include "typelist.h"
36	#undef DEF_TP
37	};
38
39	const BYTE genTypeAlignments[] = {
40	#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
41	#include "typelist.h"
42	#undef DEF_TP
43	};
44
45	const BYTE genTypeStSzs[] = {
46	#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
47	#include "typelist.h"
48	#undef DEF_TP
49	};
50
51	const BYTE genActualTypes[] = {
52	#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
53	#include "typelist.h"
54	#undef DEF_TP
55	};
56
57	void CodeGenInterface::setFramePointerRequiredEH(bool value)
58	{
59	m_cgFramePointerRequired = value;
60
61	#ifndef JIT32_GCENCODER
62	if (value)
63	{
64	// EnumGcRefs will only enumerate slots in aborted frames
65	// if they are fully-interruptible. So if we have a catch
66	// or finally that will keep frame-vars alive, we need to
67	// force fully-interruptible.
68	CLANG_FORMAT_COMMENT_ANCHOR;
69
70	#ifdef DEBUG
71	if (verbose)
72	{
73	printf("Method has EH, marking method as fully interruptible\n");
74	}
75	#endif
76
77	m_cgInterruptible = true;
78	}
79	#endif // JIT32_GCENCODER
80	}
81
82	/***************************************************************************/
83	CodeGenInterface* getCodeGenerator(Compiler* comp)
84	{
85	return new (comp, CMK_Codegen) CodeGen (comp);
86	}
87
88	// CodeGen constructor
89	CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
90	: gcInfo (theCompiler), regSet (theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
91	{
92	}
93
94	/***************************************************************************/
95
96	CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface (theCompiler)
97	{
98	#if defined(_TARGET_XARCH_)
99	negBitmaskFlt = nullptr;
100	negBitmaskDbl = nullptr;
101	absBitmaskFlt = nullptr;
102	absBitmaskDbl = nullptr;
103	u8ToDblBitmask = nullptr;
104	#endif // defined(_TARGET_XARCH_)
105
106	#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
107	m_stkArgVarNum = BAD_VAR_NUM;
108	#endif
109
110	#if defined(UNIX_X86_ABI)
111	curNestedAlignment = `0`;
112	maxNestedAlignment = `0`;
113	#endif
114
115	gcInfo.regSet = &regSet;
116	m_cgEmitter = new (compiler->getAllocator()) emitter ();
117	m_cgEmitter->codeGen = this;
118	m_cgEmitter->gcInfo = &gcInfo;
119
120	#ifdef DEBUG
121	setVerbose(compiler->verbose);
122	#endif // DEBUG
123
124	regSet.tmpInit();
125
126	instInit();
127
128	#ifdef LATE_DISASM
129	getDisAssembler().disInit(compiler);
130	#endif
131
132	#ifdef DEBUG
133	genTempLiveChg = true;
134	genTrnslLocalVarCount = `0`;
135
136	// Shouldn't be used before it is set in genFnProlog()
137	compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
138
139	#if defined(_TARGET_XARCH_)
140	// Shouldn't be used before it is set in genFnProlog()
141	compiler->compCalleeFPRegsSavedMask = (regMaskTP)-`1`;
142	#endif // defined(_TARGET_XARCH_)
143	#endif // DEBUG
144
145	#ifdef _TARGET_AMD64_
146	// This will be set before final frame layout.
147	compiler->compVSQuirkStackPaddingNeeded = `0`;
148
149	// Set to true if we perform the Quirk that fixes the PPP issue
150	compiler->compQuirkForPPPflag = false;
151	#endif // _TARGET_AMD64_
152
153	// Initialize the IP-mapping logic.
154	compiler->genIPmappingList = nullptr;
155	compiler->genIPmappingLast = nullptr;
156	compiler->genCallSite2ILOffsetMap = nullptr;
157
158	/ Assume that we not fully interruptible /
159
160	genInterruptible = false;
161	#ifdef _TARGET_ARMARCH_
162	hasTailCalls = false;
163	#endif // _TARGET_ARMARCH_
164	#ifdef DEBUG
165	genInterruptibleUsed = false;
166	genCurDispOffset = (unsigned)-`1`;
167	#endif
168	}
169
170	void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
171	{
172	tree->gtRegNum = reg;
173	}
174
175	#if defined(_TARGET_X86_) \|\| defined(_TARGET_ARM_)
176
177	//---------------------------------------------------------------------
178	// genTotalFrameSize - return the "total" size of the stack frame, including local size
179	// and callee-saved register size. There are a few things "missing" depending on the
180	// platform. The function genCallerSPtoInitialSPdelta() includes those things.
181	//
182	// For ARM, this doesn't include the prespilled registers.
183	//
184	// For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
185	// It also doesn't include the pushed return address.
186	//
187	// Return value:
188	// Frame size
189
190	int CodeGenInterface::genTotalFrameSize()
191	{
192	assert(!IsUninitialized(compiler->compCalleeRegsPushed));
193
194	int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
195
196	assert(totalFrameSize >= `0`);
197	return totalFrameSize;
198	}
199
200	//---------------------------------------------------------------------
201	// genSPtoFPdelta - return the offset from SP to the frame pointer.
202	// This number is going to be positive, since SP must be at the lowest
203	// address.
204	//
205	// There must be a frame pointer to call this function!
206
207	int CodeGenInterface::genSPtoFPdelta()
208	{
209	assert(isFramePointerUsed());
210
211	int delta;
212
213	delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
214
215	assert(delta >= `0`);
216	return delta;
217	}
218
219	//---------------------------------------------------------------------
220	// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
221	// This number is going to be negative, since the Caller-SP is at a higher
222	// address than the frame pointer.
223	//
224	// There must be a frame pointer to call this function!
225
226	int CodeGenInterface::genCallerSPtoFPdelta()
227	{
228	assert(isFramePointerUsed());
229	int callerSPtoFPdelta = `0`;
230
231	#if defined(_TARGET_ARM_)
232	// On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
233	callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
234	callerSPtoFPdelta -= `2` * REGSIZE_BYTES;
235	#elif defined(_TARGET_X86_)
236	// Thanks to ebp chaining, the difference between ebp-based addresses
237	// and caller-SP-relative addresses is just the 2 pointers:
238	// return address
239	// pushed ebp
240	callerSPtoFPdelta -= `2` * REGSIZE_BYTES;
241	#else
242	#error "Unknown _TARGET_"
243	#endif // _TARGET_*
244
245	assert(callerSPtoFPdelta <= `0`);
246	return callerSPtoFPdelta;
247	}
248
249	//---------------------------------------------------------------------
250	// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
251	//
252	// This number will be negative.
253
254	int CodeGenInterface::genCallerSPtoInitialSPdelta()
255	{
256	int callerSPtoSPdelta = `0`;
257
258	#if defined(_TARGET_ARM_)
259	callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
260	callerSPtoSPdelta -= genTotalFrameSize();
261	#elif defined(_TARGET_X86_)
262	callerSPtoSPdelta -= genTotalFrameSize();
263	callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
264
265	// compCalleeRegsPushed does not account for the frame pointer
266	// TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
267	if (isFramePointerUsed())
268	{
269	callerSPtoSPdelta -= REGSIZE_BYTES;
270	}
271	#else
272	#error "Unknown _TARGET_"
273	#endif // _TARGET_*
274
275	assert(callerSPtoSPdelta <= `0`);
276	return callerSPtoSPdelta;
277	}
278
279	#endif // defined(_TARGET_X86_) \|\| defined(_TARGET_ARM_)
280
281	/*****************************************************************************
282	* Should we round simple operations (assignments, arithmetic operations, etc.)
283	*/
284
285	// inline
286	// static
287	bool CodeGen::genShouldRoundFP()
288	{
289	RoundLevel roundLevel = getRoundFloatLevel();
290
291	switch (roundLevel)
292	{
293	case ROUND_NEVER:
294	case ROUND_CMP_CONST:
295	case ROUND_CMP:
296	return false;
297
298	default:
299	assert(roundLevel == ROUND_ALWAYS);
300	return true;
301	}
302	}
303
304	/*****************************************************************************
305	*
306	* Initialize some global variables.
307	*/
308
309	void CodeGen::genPrepForCompiler()
310	{
311	treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
312
313	/ Figure out which non-register variables hold pointers /
314
315	VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
316
317	// Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
318	// in a register (i.e. they live on the stack for all or part of their lifetime).
319	// Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
320
321	unsigned varNum;
322	LclVarDsc* varDsc;
323	for (varNum = `0`, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
324	{
325	if (varDsc->lvTracked \|\| varDsc->lvIsRegCandidate())
326	{
327	if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
328	{
329	VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
330	}
331	}
332	}
333	VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
334	genLastLiveMask = RBM_NONE;
335	#ifdef DEBUG
336	compiler->fgBBcountAtCodegen = compiler->fgBBcount;
337	#endif
338	}
339
340	/*****************************************************************************
341	* To report exception handling information to the VM, we need the size of the exception
342	* handling regions. To compute that, we need to emit labels for the beginning block of
343	* an EH region, and the block that immediately follows a region. Go through the EH
344	* table and mark all these blocks with BBF_HAS_LABEL to make this happen.
345	*
346	* The beginning blocks of the EH regions already should have this flag set.
347	*
348	* No blocks should be added or removed after this.
349	*
350	* This code is closely couple with genReportEH() in the sense that any block
351	* that this procedure has determined it needs to have a label has to be selected
352	* using the same logic both here and in genReportEH(), so basically any time there is
353	* a change in the way we handle EH reporting, we have to keep the logic of these two
354	* methods 'in sync'.
355	*/
356
357	void CodeGen::genPrepForEHCodegen()
358	{
359	assert(!compiler->fgSafeBasicBlockCreation);
360
361	EHblkDsc* HBtab;
362	EHblkDsc* HBtabEnd;
363
364	bool anyFinallys = false;
365
366	for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
367	HBtab < HBtabEnd; HBtab++)
368	{
369	assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
370	assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
371
372	if (HBtab->ebdTryLast->bbNext != nullptr)
373	{
374	HBtab->ebdTryLast->bbNext->bbFlags \|= BBF_HAS_LABEL;
375	}
376
377	if (HBtab->ebdHndLast->bbNext != nullptr)
378	{
379	HBtab->ebdHndLast->bbNext->bbFlags \|= BBF_HAS_LABEL;
380	}
381
382	if (HBtab->HasFilter())
383	{
384	assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
385	// The block after the last block of the filter is
386	// the handler begin block, which we already asserted
387	// has BBF_HAS_LABEL set.
388	}
389
390	#if FEATURE_EH_CALLFINALLY_THUNKS
391	if (HBtab->HasFinallyHandler())
392	{
393	anyFinallys = true;
394	}
395	#endif // FEATURE_EH_CALLFINALLY_THUNKS
396	}
397
398	#if FEATURE_EH_CALLFINALLY_THUNKS
399	if (anyFinallys)
400	{
401	for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
402	{
403	if (block->bbJumpKind == BBJ_CALLFINALLY)
404	{
405	BasicBlock* bbToLabel = block->bbNext;
406	if (block->isBBCallAlwaysPair())
407	{
408	bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
409	}
410	if (bbToLabel != nullptr)
411	{
412	bbToLabel->bbFlags \|= BBF_HAS_LABEL;
413	}
414	} // block is BBJ_CALLFINALLY
415	} // for each block
416	} // if (anyFinallys)
417	#endif // FEATURE_EH_CALLFINALLY_THUNKS
418	}
419
420	void CodeGenInterface::genUpdateLife(GenTree* tree)
421	{
422	treeLifeUpdater->UpdateLife(tree);
423	}
424
425	void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
426	{
427	compiler->compUpdateLife</ForCodeGen/ true>(newLife);
428	}
429
430	// Return the register mask for the given register variable
431	// inline
432	regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
433	{
434	regMaskTP regMask = RBM_NONE;
435
436	assert(varDsc->lvIsInReg());
437
438	if (varTypeIsFloating(varDsc->TypeGet()))
439	{
440	regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
441	}
442	else
443	{
444	regMask = genRegMask(varDsc->lvRegNum);
445	}
446	return regMask;
447	}
448
449	// Return the register mask for the given lclVar or regVar tree node
450	// inline
451	regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
452	{
453	assert(tree->gtOper == GT_LCL_VAR);
454
455	regMaskTP regMask = RBM_NONE;
456	const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
457	if (varDsc->lvPromoted)
458	{
459	for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
460	{
461	noway_assert(compiler->lvaTable[i].lvIsStructField);
462	if (compiler->lvaTable[i].lvIsInReg())
463	{
464	regMask \|= genGetRegMask(&compiler->lvaTable[i]);
465	}
466	}
467	}
468	else if (varDsc->lvIsInReg())
469	{
470	regMask = genGetRegMask(varDsc);
471	}
472	return regMask;
473	}
474
475	// The given lclVar is either going live (being born) or dying.
476	// It might be both going live and dying (that is, it is a dead store) under MinOpts.
477	// Update regSet.rsMaskVars accordingly.
478	// inline
479	void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
480	{
481	regMaskTP regMask = genGetRegMask(varDsc);
482
483	#ifdef DEBUG
484	if (compiler->verbose)
485	{
486	printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
487	varDsc->PrintVarReg();
488	printf(" is becoming %s ", (isDying) ? "dead" : "live");
489	Compiler::printTreeID(tree);
490	printf("\n");
491	}
492	#endif // DEBUG
493
494	if (isDying)
495	{
496	// We'd like to be able to assert the following, however if we are walking
497	// through a qmark/colon tree, we may encounter multiple last-use nodes.
498	// assert((regSet.rsMaskVars & regMask) == regMask);
499	regSet.RemoveMaskVars(regMask);
500	}
501	else
502	{
503	assert((regSet.rsMaskVars & regMask) == `0`);
504	regSet.AddMaskVars(regMask);
505	}
506	}
507
508	//----------------------------------------------------------------------
509	// compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
510	// Not all JIT Helper calls follow the standard ABI on the target architecture.
511	//
512	// TODO-CQ: Currently this list is incomplete (not all helpers calls are
513	// enumerated) and not 100% accurate (some killsets are bigger than
514	// what they really are).
515	// There's some work to be done in several places in the JIT to
516	// accurately track the registers that are getting killed by
517	// helper calls:
518	// a) LSRA needs several changes to accomodate more precise killsets
519	// for every helper call it sees (both explicitly [easy] and
520	// implicitly [hard])
521	// b) Currently for AMD64, when we generate code for a helper call
522	// we're independently over-pessimizing the killsets of the call
523	// (independently from LSRA) and this needs changes
524	// both in CodeGenAmd64.cpp and emitx86.cpp.
525	//
526	// The best solution for this problem would be to try to centralize
527	// the killset information in a single place but then make the
528	// corresponding changes so every code generation phase is in sync
529	// about this.
530	//
531	// The interim solution is to only add known helper calls that don't
532	// follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
533	//
534	// Arguments:
535	// helper - The helper being inquired about
536	//
537	// Return Value:
538	// Mask of register kills -- registers whose values are no longer guaranteed to be the same.
539	//
540	regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
541	{
542	switch (helper)
543	{
544	case CORINFO_HELP_ASSIGN_BYREF:
545	#if defined(_TARGET_AMD64_)
546	return RBM_RSI \| RBM_RDI \| RBM_CALLEE_TRASH_NOGC;
547	#elif defined(_TARGET_ARMARCH_)
548	return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
549	#elif defined(_TARGET_X86_)
550	return RBM_ESI \| RBM_EDI \| RBM_ECX;
551	#else
552	NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
553	return RBM_CALLEE_TRASH;
554	#endif
555
556	#if defined(_TARGET_ARMARCH_)
557	case CORINFO_HELP_ASSIGN_REF:
558	case CORINFO_HELP_CHECKED_ASSIGN_REF:
559	return RBM_CALLEE_TRASH_WRITEBARRIER;
560	#endif
561
562	case CORINFO_HELP_PROF_FCN_ENTER:
563	#ifdef RBM_PROFILER_ENTER_TRASH
564	return RBM_PROFILER_ENTER_TRASH;
565	#else
566	NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
567	#endif
568
569	case CORINFO_HELP_PROF_FCN_LEAVE:
570	#ifdef RBM_PROFILER_LEAVE_TRASH
571	return RBM_PROFILER_LEAVE_TRASH;
572	#else
573	NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
574	#endif
575
576	case CORINFO_HELP_PROF_FCN_TAILCALL:
577	#ifdef RBM_PROFILER_TAILCALL_TRASH
578	return RBM_PROFILER_TAILCALL_TRASH;
579	#else
580	NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
581	#endif
582
583	#ifdef _TARGET_X86_
584	case CORINFO_HELP_ASSIGN_REF_EAX:
585	case CORINFO_HELP_ASSIGN_REF_ECX:
586	case CORINFO_HELP_ASSIGN_REF_EBX:
587	case CORINFO_HELP_ASSIGN_REF_EBP:
588	case CORINFO_HELP_ASSIGN_REF_ESI:
589	case CORINFO_HELP_ASSIGN_REF_EDI:
590
591	case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
592	case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
593	case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
594	case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
595	case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
596	case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
597	return RBM_EDX;
598
599	#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
600	case CORINFO_HELP_ASSIGN_REF:
601	case CORINFO_HELP_CHECKED_ASSIGN_REF:
602	return RBM_EAX \| RBM_EDX;
603	#endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
604	#endif
605
606	case CORINFO_HELP_STOP_FOR_GC:
607	return RBM_STOP_FOR_GC_TRASH;
608
609	case CORINFO_HELP_INIT_PINVOKE_FRAME:
610	return RBM_INIT_PINVOKE_FRAME_TRASH;
611
612	default:
613	return RBM_CALLEE_TRASH;
614	}
615	}
616
617	//----------------------------------------------------------------------
618	// compNoGCHelperCallKillSet: Gets a register mask that represents the set of registers that no longer
619	// contain GC or byref pointers, for "NO GC" helper calls. This is used by the emitter when determining
620	// what registers to remove from the current live GC/byref sets (and thus what to report as dead in the
621	// GC info). Note that for the CORINFO_HELP_ASSIGN_BYREF helper, in particular, the kill set reported by
622	// compHelperCallKillSet() doesn't match this kill set. compHelperCallKillSet() reports the dst/src
623	// address registers as killed for liveness purposes, since their values change. However, they still are
624	// valid byref pointers after the call, so the dst/src address registers are NOT reported as killed here.
625	//
626	// Note: This list may not be complete and defaults to the default RBM_CALLEE_TRASH_NOGC registers.
627	//
628	// Arguments:
629	// helper - The helper being inquired about
630	//
631	// Return Value:
632	// Mask of GC register kills
633	//
634	regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
635	{
636	assert(emitter::emitNoGChelper(helper));
637
638	switch (helper)
639	{
640	case CORINFO_HELP_ASSIGN_BYREF:
641	#if defined(_TARGET_X86_)
642	// This helper only trashes ECX.
643	return RBM_ECX;
644	#elif defined(_TARGET_AMD64_)
645	// This uses and defs RDI and RSI.
646	return RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI \| RBM_RSI);
647	#elif defined(_TARGET_ARMARCH_)
648	return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
649	#else
650	assert(!"unknown arch");
651	#endif
652
653	#if defined(_TARGET_XARCH_)
654	case CORINFO_HELP_PROF_FCN_ENTER:
655	return RBM_PROFILER_ENTER_TRASH;
656
657	case CORINFO_HELP_PROF_FCN_LEAVE:
658	return RBM_PROFILER_LEAVE_TRASH;
659
660	case CORINFO_HELP_PROF_FCN_TAILCALL:
661	return RBM_PROFILER_TAILCALL_TRASH;
662	#endif // defined(_TARGET_XARCH_)
663
664	#if defined(_TARGET_ARMARCH_)
665	case CORINFO_HELP_ASSIGN_REF:
666	case CORINFO_HELP_CHECKED_ASSIGN_REF:
667	return RBM_CALLEE_GCTRASH_WRITEBARRIER;
668	case CORINFO_HELP_PROF_FCN_LEAVE:
669	// In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH on ARMARCH.
670	return RBM_CALLEE_TRASH_NOGC & ~RBM_PROFILER_RET_SCRATCH;
671	#endif
672
673	#if defined(_TARGET_X86_)
674	case CORINFO_HELP_INIT_PINVOKE_FRAME:
675	return RBM_INIT_PINVOKE_FRAME_TRASH;
676	#endif // defined(_TARGET_X86_)
677
678	default:
679	return RBM_CALLEE_TRASH_NOGC;
680	}
681	}
682
683	template <bool ForCodeGen>
684	void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
685	{
686	LclVarDsc* varDsc;
687
688	#ifdef DEBUG
689	if (verbose)
690	{
691	printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
692	dumpConvertedVarSet(this, compCurLife);
693	printf(" -> %s ", VarSetOps::ToString(this, newLife));
694	dumpConvertedVarSet(this, newLife);
695	printf("\n");
696	}
697	#endif // DEBUG
698
699	/ We should only be called when the live set has actually changed /
700
701	noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
702
703	if (!ForCodeGen)
704	{
705	VarSetOps::Assign(this, compCurLife, newLife);
706	return;
707	}
708
709	/ Figure out which variables are becoming live/dead at this point /
710
711	// deadSet = compCurLife - newLife
712	VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
713
714	// bornSet = newLife - compCurLife
715	VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
716
717	/ Can't simultaneously become live and dead at the same time /
718
719	// (deadSet UNION bornSet) != EMPTY
720	noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
721	// (deadSet INTERSECTION bornSet) == EMPTY
722	noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
723
724	VarSetOps::Assign(this, compCurLife, newLife);
725
726	// Handle the dying vars first, then the newly live vars.
727	// This is because, in the RyuJIT backend case, they may occupy registers that
728	// will be occupied by another var that is newly live.
729	VarSetOps::Iter deadIter(this, deadSet);
730	unsigned deadVarIndex = `0`;
731	while (deadIter.NextElem(&deadVarIndex))
732	{
733	unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
734	varDsc = lvaTable + varNum;
735	bool isGCRef = (varDsc->TypeGet() == TYP_REF);
736	bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
737
738	if (varDsc->lvIsInReg())
739	{
740	// TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
741	// gc sets
742	regMaskTP regMask = varDsc->lvRegMask();
743	if (isGCRef)
744	{
745	codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
746	}
747	else if (isByRef)
748	{
749	codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
750	}
751	codeGen->genUpdateRegLife(varDsc, false /isBorn/, true /isDying/ DEBUGARG(nullptr));
752	}
753	// This isn't in a register, so update the gcVarPtrSetCur.
754	else if (isGCRef \|\| isByRef)
755	{
756	VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
757	JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
758	}
759	}
760
761	VarSetOps::Iter bornIter(this, bornSet);
762	unsigned bornVarIndex = `0`;
763	while (bornIter.NextElem(&bornVarIndex))
764	{
765	unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
766	varDsc = lvaTable + varNum;
767	bool isGCRef = (varDsc->TypeGet() == TYP_REF);
768	bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
769
770	if (varDsc->lvIsInReg())
771	{
772	#ifdef DEBUG
773	if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
774	{
775	JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
776	}
777	#endif // DEBUG
778	VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
779	codeGen->genUpdateRegLife(varDsc, true /isBorn/, false /isDying/ DEBUGARG(nullptr));
780	regMaskTP regMask = varDsc->lvRegMask();
781	if (isGCRef)
782	{
783	codeGen->gcInfo.gcRegGCrefSetCur \|= regMask;
784	}
785	else if (isByRef)
786	{
787	codeGen->gcInfo.gcRegByrefSetCur \|= regMask;
788	}
789	}
790	// This isn't in a register, so update the gcVarPtrSetCur
791	else if (lvaIsGCTracked(varDsc))
792	{
793	VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
794	JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
795	}
796	}
797
798	codeGen->siUpdate();
799	}
800
801	// Need an explicit instantiation.
802	template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
803
804	/*****************************************************************************
805	*
806	* Generate a spill.
807	*/
808	void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
809	{
810	getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), `0`);
811	}
812
813	/*****************************************************************************
814	*
815	* Generate a reload.
816	*/
817	void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
818	{
819	getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), `0`);
820	}
821
822	// inline
823	regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
824	{
825	return REG_ARG_0;
826	}
827
828	//----------------------------------------------------------------------
829	// getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
830	//
831	// Arguments:
832	// tree - spilled GenTree node
833	//
834	// Return Value:
835	// TempDsc corresponding to tree
836	TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
837	{
838	// tree must be in spilled state.
839	assert((tree->gtFlags & GTF_SPILLED) != `0`);
840
841	// Get the tree's SpillDsc.
842	RegSet::SpillDsc* prevDsc;
843	RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
844	assert(spillDsc != nullptr);
845
846	// Get the temp desc.
847	TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
848	return temp;
849	}
850
851	#ifdef _TARGET_XARCH_
852
853	#ifdef _TARGET_AMD64_
854	// Returns relocation type hint for an addr.
855	// Note that there are no reloc hints on x86.
856	//
857	// Arguments
858	// addr - data address
859	//
860	// Returns
861	// relocation type hint
862	//
863	unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
864	{
865	return compiler->eeGetRelocTypeHint((void*)addr);
866	}
867	#endif //_TARGET_AMD64_
868
869	// Return true if an absolute indirect data address can be encoded as IP-relative.
870	// offset. Note that this method should be used only when the caller knows that
871	// the address is an icon value that VM has given and there is no GenTree node
872	// representing it. Otherwise, one should always use FitsInAddrBase().
873	//
874	// Arguments
875	// addr - an absolute indirect data address
876	//
877	// Returns
878	// true if indir data addr could be encoded as IP-relative offset.
879	//
880	bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
881	{
882	#ifdef _TARGET_AMD64_
883	return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
884	#else
885	// x86: PC-relative addressing is available only for control flow instructions (jmp and call)
886	return false;
887	#endif
888	}
889
890	// Return true if an indirect code address can be encoded as IP-relative offset.
891	// Note that this method should be used only when the caller knows that the
892	// address is an icon value that VM has given and there is no GenTree node
893	// representing it. Otherwise, one should always use FitsInAddrBase().
894	//
895	// Arguments
896	// addr - an absolute indirect code address
897	//
898	// Returns
899	// true if indir code addr could be encoded as IP-relative offset.
900	//
901	bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
902	{
903	#ifdef _TARGET_AMD64_
904	return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
905	#else
906	// x86: PC-relative addressing is available only for control flow instructions (jmp and call)
907	return true;
908	#endif
909	}
910
911	// Return true if an indirect code address can be encoded as 32-bit displacement
912	// relative to zero. Note that this method should be used only when the caller
913	// knows that the address is an icon value that VM has given and there is no
914	// GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
915	//
916	// Arguments
917	// addr - absolute indirect code address
918	//
919	// Returns
920	// true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
921	//
922	bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
923	{
924	return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
925	}
926
927	// Return true if an absolute indirect code address needs a relocation recorded with VM.
928	//
929	// Arguments
930	// addr - an absolute indirect code address
931	//
932	// Returns
933	// true if indir code addr needs a relocation recorded with VM
934	//
935	bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
936	{
937	// If generating relocatable ngen code, then all code addr should go through relocation
938	if (compiler->opts.compReloc)
939	{
940	return true;
941	}
942
943	#ifdef _TARGET_AMD64_
944	// See if the code indir addr can be encoded as 32-bit displacement relative to zero.
945	// We don't need a relocation in that case.
946	if (genCodeIndirAddrCanBeEncodedAsZeroRelOffset(addr))
947	{
948	return false;
949	}
950
951	// Else we need a relocation.
952	return true;
953	#else //_TARGET_X86_
954	// On x86 there is no need to record or ask for relocations during jitting,
955	// because all addrs fit within 32-bits.
956	return false;
957	#endif //_TARGET_X86_
958	}
959
960	// Return true if a direct code address needs to be marked as relocatable.
961	//
962	// Arguments
963	// addr - absolute direct code address
964	//
965	// Returns
966	// true if direct code addr needs a relocation recorded with VM
967	//
968	bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
969	{
970	// If generating relocatable ngen code, then all code addr should go through relocation
971	if (compiler->opts.compReloc)
972	{
973	return true;
974	}
975
976	#ifdef _TARGET_AMD64_
977	// By default all direct code addresses go through relocation so that VM will setup
978	// a jump stub if addr cannot be encoded as pc-relative offset.
979	return true;
980	#else //_TARGET_X86_
981	// On x86 there is no need for recording relocations during jitting,
982	// because all addrs fit within 32-bits.
983	return false;
984	#endif //_TARGET_X86_
985	}
986	#endif //_TARGET_XARCH_
987
988	/*****************************************************************************
989	*
990	* The following can be used to create basic blocks that serve as labels for
991	* the emitter. Use with caution - these are not real basic blocks!
992	*
993	*/
994
995	// inline
996	BasicBlock* CodeGen::genCreateTempLabel()
997	{
998	#ifdef DEBUG
999	// These blocks don't affect FP
1000	compiler->fgSafeBasicBlockCreation = true;
1001	#endif
1002
1003	BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
1004
1005	#ifdef DEBUG
1006	compiler->fgSafeBasicBlockCreation = false;
1007	#endif
1008
1009	block->bbFlags \|= BBF_JMP_TARGET \| BBF_HAS_LABEL;
1010
1011	// Use coldness of current block, as this label will
1012	// be contained in it.
1013	block->bbFlags \|= (compiler->compCurBB->bbFlags & BBF_COLD);
1014
1015	#ifdef DEBUG
1016	#ifdef UNIX_X86_ABI
1017	block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
1018	#else
1019	block->bbTgtStkDepth = genStackLevel / sizeof(int);
1020	#endif
1021	#endif
1022	return block;
1023	}
1024
1025	// inline
1026	void CodeGen::genDefineTempLabel(BasicBlock* label)
1027	{
1028	#ifdef DEBUG
1029	if (compiler->opts.dspCode)
1030	{
1031	printf("\n L_M%03u_" FMT_BB ":\n", Compiler::s_compMethodsCount, label->bbNum);
1032	}
1033	#endif
1034
1035	label->bbEmitCookie =
1036	getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
1037	}
1038
1039	/*****************************************************************************
1040	*
1041	* Adjust the stack pointer by the given value; assumes that this follows
1042	* a call so only callee-saved registers (and registers that may hold a
1043	* return value) are used at this point.
1044	*/
1045
1046	void CodeGen::genAdjustSP(target_ssize_t delta)
1047	{
1048	#if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
1049	if (delta == sizeof(int))
1050	inst_RV(INS_pop, REG_ECX, TYP_INT);
1051	else
1052	#endif
1053	inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
1054	}
1055
1056	//------------------------------------------------------------------------
1057	// genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
1058	//
1059	// Arguments:
1060	// block - The BasicBlock for which we are about to generate code.
1061	//
1062	// Assumptions:
1063	// Must be called just prior to generating code for 'block'.
1064	//
1065	// Notes:
1066	// This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
1067	// and if 'block' is a throw helper block with a non-zero stack level.
1068
1069	void CodeGen::genAdjustStackLevel(BasicBlock* block)
1070	{
1071	#if !FEATURE_FIXED_OUT_ARGS
1072	// Check for inserted throw blocks and adjust genStackLevel.
1073	CLANG_FORMAT_COMMENT_ANCHOR;
1074
1075	#if defined(UNIX_X86_ABI)
1076	if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1077	{
1078	// x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
1079	// at this point if a jump to this block is made in the middle of pushing arugments.
1080	//
1081	// Here we restore SP to prevent potential stack alignment issues.
1082	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
1083	}
1084	#endif
1085
1086	if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1087	{
1088	noway_assert(block->bbFlags & BBF_JMP_TARGET);
1089
1090	SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
1091
1092	if (genStackLevel != `0`)
1093	{
1094	#ifdef _TARGET_X86_
1095	getEmitter()->emitMarkStackLvl(genStackLevel);
1096	inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
1097	SetStackLevel(`0`);
1098	#else // _TARGET_X86_
1099	NYI("Need emitMarkStackLvl()");
1100	#endif // _TARGET_X86_
1101	}
1102	}
1103	#endif // !FEATURE_FIXED_OUT_ARGS
1104	}
1105
1106	#ifdef _TARGET_ARMARCH_
1107	// return size
1108	// alignmentWB is out param
1109	unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1110	{
1111	unsigned alignment = `0`;
1112	unsigned opSize = `0`;
1113
1114	if (op->gtType == TYP_STRUCT \|\| op->OperIsCopyBlkOp())
1115	{
1116	opSize = InferStructOpSizeAlign(op, &alignment);
1117	}
1118	else
1119	{
1120	alignment = genTypeAlignments[op->TypeGet()];
1121	opSize = genTypeSizes[op->TypeGet()];
1122	}
1123
1124	assert(opSize != `0`);
1125	assert(alignment != `0`);
1126
1127	(*alignmentWB) = alignment;
1128	return opSize;
1129	}
1130	// return size
1131	// alignmentWB is out param
1132	unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1133	{
1134	unsigned alignment = `0`;
1135	unsigned opSize = `0`;
1136
1137	while (op->gtOper == GT_COMMA)
1138	{
1139	op = op->gtOp.gtOp2;
1140	}
1141
1142	if (op->gtOper == GT_OBJ)
1143	{
1144	CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
1145	opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
1146	alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1147	}
1148	else if (op->gtOper == GT_LCL_VAR)
1149	{
1150	unsigned varNum = op->gtLclVarCommon.gtLclNum;
1151	LclVarDsc* varDsc = compiler->lvaTable + varNum;
1152	assert(varDsc->lvType == TYP_STRUCT);
1153	opSize = varDsc->lvSize();
1154	#ifndef _TARGET_64BIT_
1155	if (varDsc->lvStructDoubleAlign)
1156	{
1157	alignment = TARGET_POINTER_SIZE * `2`;
1158	}
1159	else
1160	#endif // !_TARGET_64BIT_
1161	{
1162	alignment = TARGET_POINTER_SIZE;
1163	}
1164	}
1165	else if (op->OperIsCopyBlkOp())
1166	{
1167	GenTree* op2 = op->gtOp.gtOp2;
1168
1169	if (op2->OperGet() == GT_CNS_INT)
1170	{
1171	if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
1172	{
1173	CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
1174	opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1175	alignment =
1176	roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1177	}
1178	else
1179	{
1180	opSize = (unsigned)op2->gtIntCon.gtIconVal;
1181	GenTree* op1 = op->gtOp.gtOp1;
1182	assert(op1->OperGet() == GT_LIST);
1183	GenTree* dstAddr = op1->gtOp.gtOp1;
1184	if (dstAddr->OperGet() == GT_ADDR)
1185	{
1186	InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
1187	}
1188	else
1189	{
1190	assert(!"Unhandle dstAddr node");
1191	alignment = TARGET_POINTER_SIZE;
1192	}
1193	}
1194	}
1195	else
1196	{
1197	noway_assert(!"Variable sized COPYBLK register arg!");
1198	opSize = `0`;
1199	alignment = TARGET_POINTER_SIZE;
1200	}
1201	}
1202	else if (op->gtOper == GT_MKREFANY)
1203	{
1204	opSize = TARGET_POINTER_SIZE * `2`;
1205	alignment = TARGET_POINTER_SIZE;
1206	}
1207	else if (op->IsArgPlaceHolderNode())
1208	{
1209	CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
1210	assert(clsHnd != `0`);
1211	opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1212	alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1213	}
1214	else
1215	{
1216	assert(!"Unhandled gtOper");
1217	opSize = TARGET_POINTER_SIZE;
1218	alignment = TARGET_POINTER_SIZE;
1219	}
1220
1221	assert(opSize != `0`);
1222	assert(alignment != `0`);
1223
1224	(*alignmentWB) = alignment;
1225	return opSize;
1226	}
1227
1228	#endif // _TARGET_ARMARCH_
1229
1230	/*****************************************************************************
1231	*
1232	* Take an address expression and try to find the best set of components to
1233	* form an address mode; returns non-zero if this is successful.
1234	*
1235	* TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
1236	* Refactor this code so that the underlying analysis can be used in
1237	* the RyuJIT Backend to do lowering, instead of having to call this method with the
1238	* option to not generate the code.
1239	*
1240	* 'fold' specifies if it is OK to fold the array index which hangs off
1241	* a GT_NOP node.
1242	*
1243	* If successful, the parameters will be set to the following values:
1244	*
1245	* *rv1Ptr ... base operand
1246	* *rv2Ptr ... optional operand
1247	* *revPtr ... true if rv2 is before rv1 in the evaluation order
1248	* #if SCALED_ADDR_MODES
1249	* *mulPtr ... optional multiplier (2/4/8) for rv2
1250	* Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1251	* #endif
1252	* *cnsPtr ... integer constant [optional]
1253	*
1254	* IMPORTANT NOTE: This routine doesn't generate any code, it merely
1255	* identifies the components that might be used to
1256	* form an address mode later on.
1257	*/
1258
1259	bool CodeGen::genCreateAddrMode(GenTree* addr,
1260	bool fold,
1261	bool* revPtr,
1262	GenTree** rv1Ptr,
1263	GenTree** rv2Ptr,
1264	#if SCALED_ADDR_MODES
1265	unsigned* mulPtr,
1266	#endif // SCALED_ADDR_MODES
1267	ssize_t* cnsPtr)
1268	{
1269	/*
1270	The following indirections are valid address modes on x86/x64:
1271
1272	[ icon] not handled here*
1273	[reg ]
1274	[reg + icon]
1275	[reg1 + reg2 ]
1276	[reg1 + reg2 + icon]
1277	[reg1 + 2 reg2 ]*
1278	[reg1 + 4 reg2 ]*
1279	[reg1 + 8 reg2 ]*
1280	[ 2 reg2 + icon]*
1281	[ 4 reg2 + icon]*
1282	[ 8 reg2 + icon]*
1283	[reg1 + 2 reg2 + icon]*
1284	[reg1 + 4 reg2 + icon]*
1285	[reg1 + 8 reg2 + icon]*
1286
1287	The following indirections are valid address modes on arm64:
1288
1289	[reg]
1290	[reg + icon]
1291	[reg1 + reg2]
1292	[reg1 + reg2 natural-scale]*
1293
1294	*/
1295
1296	/ All indirect address modes require the address to be an addition /
1297
1298	if (addr->gtOper != GT_ADD)
1299	{
1300	return false;
1301	}
1302
1303	// Can't use indirect addressing mode as we need to check for overflow.
1304	// Also, can't use 'lea' as it doesn't set the flags.
1305
1306	if (addr->gtOverflow())
1307	{
1308	return false;
1309	}
1310
1311	GenTree* rv1 = nullptr;
1312	GenTree* rv2 = nullptr;
1313
1314	GenTree* op1;
1315	GenTree* op2;
1316
1317	ssize_t cns;
1318	#if SCALED_ADDR_MODES
1319	unsigned mul;
1320	#endif // SCALED_ADDR_MODES
1321
1322	GenTree* tmp;
1323
1324	/ What order are the sub-operands to be evaluated /
1325
1326	if (addr->gtFlags & GTF_REVERSE_OPS)
1327	{
1328	op1 = addr->gtOp.gtOp2;
1329	op2 = addr->gtOp.gtOp1;
1330	}
1331	else
1332	{
1333	op1 = addr->gtOp.gtOp1;
1334	op2 = addr->gtOp.gtOp2;
1335	}
1336
1337	bool rev = false; // Is op2 first in the evaluation order?
1338
1339	/*
1340	A complex address mode can combine the following operands:
1341
1342	op1 ... base address
1343	op2 ... optional scaled index
1344	#if SCALED_ADDR_MODES
1345	mul ... optional multiplier (2/4/8) for op2
1346	#endif
1347	cns ... optional displacement
1348
1349	Here we try to find such a set of operands and arrange for these
1350	to sit in registers.
1351	*/
1352
1353	cns = `0`;
1354	#if SCALED_ADDR_MODES
1355	mul = `0`;
1356	#endif // SCALED_ADDR_MODES
1357
1358	AGAIN:
1359	/ We come back to 'AGAIN' if we have an add of a constant, and we are folding that*
1360	constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1361	here if we find a scaled index.
1362	*/
1363	CLANG_FORMAT_COMMENT_ANCHOR;
1364
1365	#if SCALED_ADDR_MODES
1366	assert(mul == `0`);
1367	#endif // SCALED_ADDR_MODES
1368
1369	/ Special case: keep constants as 'op2' /
1370
1371	if (op1->IsCnsIntOrI())
1372	{
1373	// Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1374	tmp = op1;
1375	op1 = op2;
1376	op2 = tmp;
1377	}
1378
1379	/ Check for an addition of a constant /
1380
1381	if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
1382	{
1383	/ We're adding a constant /
1384
1385	cns += op2->gtIntConCommon.IconValue();
1386
1387	#if defined(_TARGET_ARMARCH_)
1388	if (cns == `0`)
1389	#endif
1390	{
1391	/ Inspect the operand the constant is being added to /
1392
1393	switch (op1->gtOper)
1394	{
1395	case GT_ADD:
1396
1397	if (op1->gtOverflow())
1398	{
1399	break;
1400	}
1401
1402	op2 = op1->gtOp.gtOp2;
1403	op1 = op1->gtOp.gtOp1;
1404
1405	goto AGAIN;
1406
1407	#if SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1408	// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1409	case GT_MUL:
1410	if (op1->gtOverflow())
1411	{
1412	return false; // Need overflow check
1413	}
1414
1415	__fallthrough;
1416
1417	case GT_LSH:
1418
1419	mul = op1->GetScaledIndex();
1420	if (mul)
1421	{
1422	/ We can use "[mulrv2 + icon]" /*
1423
1424	rv1 = nullptr;
1425	rv2 = op1->gtOp.gtOp1;
1426
1427	goto FOUND_AM;
1428	}
1429	break;
1430	#endif // SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1431
1432	default:
1433	break;
1434	}
1435	}
1436
1437	/ The best we can do is "[rv1 + icon]" /
1438
1439	rv1 = op1;
1440	rv2 = nullptr;
1441
1442	goto FOUND_AM;
1443	}
1444
1445	// op2 is not a constant. So keep on trying.
1446
1447	/ Neither op1 nor op2 are sitting in a register right now /
1448
1449	switch (op1->gtOper)
1450	{
1451	#if !defined(_TARGET_ARMARCH_)
1452	// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1453	case GT_ADD:
1454
1455	if (op1->gtOverflow())
1456	{
1457	break;
1458	}
1459
1460	if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
1461	{
1462	cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
1463	op1 = op1->gtOp.gtOp1;
1464
1465	goto AGAIN;
1466	}
1467
1468	break;
1469
1470	#if SCALED_ADDR_MODES
1471
1472	case GT_MUL:
1473
1474	if (op1->gtOverflow())
1475	{
1476	break;
1477	}
1478
1479	__fallthrough;
1480
1481	case GT_LSH:
1482
1483	mul = op1->GetScaledIndex();
1484	if (mul)
1485	{
1486	/ 'op1' is a scaled value /
1487
1488	rv1 = op2;
1489	rv2 = op1->gtOp.gtOp1;
1490
1491	int argScale;
1492	while ((rv2->gtOper == GT_MUL \|\| rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != `0`)
1493	{
1494	if (jitIsScaleIndexMul(argScale * mul))
1495	{
1496	mul = mul * argScale;
1497	rv2 = rv2->gtOp.gtOp1;
1498	}
1499	else
1500	{
1501	break;
1502	}
1503	}
1504
1505	noway_assert(rev == false);
1506	rev = true;
1507
1508	goto FOUND_AM;
1509	}
1510	break;
1511
1512	#endif // SCALED_ADDR_MODES
1513	#endif // !_TARGET_ARMARCH
1514
1515	case GT_NOP:
1516
1517	op1 = op1->gtOp.gtOp1;
1518	goto AGAIN;
1519
1520	case GT_COMMA:
1521
1522	op1 = op1->gtOp.gtOp2;
1523	goto AGAIN;
1524
1525	default:
1526	break;
1527	}
1528
1529	noway_assert(op2);
1530	switch (op2->gtOper)
1531	{
1532	#if !defined(_TARGET_ARMARCH_)
1533	// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1534	case GT_ADD:
1535
1536	if (op2->gtOverflow())
1537	{
1538	break;
1539	}
1540
1541	if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
1542	{
1543	cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1544	op2 = op2->gtOp.gtOp1;
1545
1546	goto AGAIN;
1547	}
1548
1549	break;
1550
1551	#if SCALED_ADDR_MODES
1552
1553	case GT_MUL:
1554
1555	if (op2->gtOverflow())
1556	{
1557	break;
1558	}
1559
1560	__fallthrough;
1561
1562	case GT_LSH:
1563
1564	mul = op2->GetScaledIndex();
1565	if (mul)
1566	{
1567	// 'op2' is a scaled value...is it's argument also scaled?
1568	int argScale;
1569	rv2 = op2->gtOp.gtOp1;
1570	while ((rv2->gtOper == GT_MUL \|\| rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != `0`)
1571	{
1572	if (jitIsScaleIndexMul(argScale * mul))
1573	{
1574	mul = mul * argScale;
1575	rv2 = rv2->gtOp.gtOp1;
1576	}
1577	else
1578	{
1579	break;
1580	}
1581	}
1582
1583	rv1 = op1;
1584
1585	goto FOUND_AM;
1586	}
1587	break;
1588
1589	#endif // SCALED_ADDR_MODES
1590	#endif // !_TARGET_ARMARCH
1591
1592	case GT_NOP:
1593
1594	op2 = op2->gtOp.gtOp1;
1595	goto AGAIN;
1596
1597	case GT_COMMA:
1598
1599	op2 = op2->gtOp.gtOp2;
1600	goto AGAIN;
1601
1602	default:
1603	break;
1604	}
1605
1606	/ The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" /
1607
1608	rv1 = op1;
1609	rv2 = op2;
1610	#ifdef _TARGET_ARM64_
1611	assert(cns == `0`);
1612	#endif
1613
1614	FOUND_AM:
1615
1616	if (rv2)
1617	{
1618	/ Make sure a GC address doesn't end up in 'rv2' /
1619
1620	if (varTypeIsGC(rv2->TypeGet()))
1621	{
1622	noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
1623
1624	tmp = rv1;
1625	rv1 = rv2;
1626	rv2 = tmp;
1627
1628	rev = !rev;
1629	}
1630
1631	/ Special case: constant array index (that is range-checked) /
1632
1633	if (fold)
1634	{
1635	ssize_t tmpMul;
1636	GenTree* index;
1637
1638	if ((rv2->gtOper == GT_MUL \|\| rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
1639	{
1640	/ For valuetype arrays where we can't use the scaled address*
1641	mode, rv2 will point to the scaled index. So we have to do
1642	more work /*
1643
1644	tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
1645	if (mul)
1646	{
1647	tmpMul *= mul;
1648	}
1649	}
1650	else
1651	{
1652	/ May be a simple array. rv2 will points to the actual index /
1653
1654	index = rv2;
1655	tmpMul = mul;
1656	}
1657
1658	/ Get hold of the array index and see if it's a constant /
1659	if (index->IsIntCnsFitsInI32())
1660	{
1661	/ Get hold of the index value /
1662	ssize_t ixv = index->AsIntConCommon()->IconValue();
1663
1664	#if SCALED_ADDR_MODES
1665	/ Scale the index if necessary /
1666	if (tmpMul)
1667	{
1668	ixv *= tmpMul;
1669	}
1670	#endif
1671
1672	if (FitsIn<INT32>(cns + ixv))
1673	{
1674	/ Add the scaled index to the offset value /
1675
1676	cns += ixv;
1677
1678	#if SCALED_ADDR_MODES
1679	/ There is no scaled operand any more /
1680	mul = `0`;
1681	#endif
1682	rv2 = nullptr;
1683	}
1684	}
1685	}
1686	}
1687
1688	// We shouldn't have [rv21 + cns] - this is equivalent to [rv1 + cns]*
1689	noway_assert(rv1 \|\| mul != `1`);
1690
1691	noway_assert(FitsIn<INT32>(cns));
1692
1693	if (rv1 == nullptr && rv2 == nullptr)
1694	{
1695	return false;
1696	}
1697
1698	/ Success - return the various components to the caller /
1699
1700	*revPtr = rev;
1701	*rv1Ptr = rv1;
1702	*rv2Ptr = rv2;
1703	#if SCALED_ADDR_MODES
1704	*mulPtr = mul;
1705	#endif
1706	*cnsPtr = cns;
1707
1708	return true;
1709	}
1710
1711	/*****************************************************************************
1712	* The condition to use for (the jmp/set for) the given type of operation
1713	*
1714	* In case of amd64, this routine should be used when there is no gentree available
1715	* and one needs to generate jumps based on integer comparisons. When gentree is
1716	* available always use its overloaded version.
1717	*
1718	*/
1719
1720	// static
1721	emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
1722	{
1723	const static BYTE genJCCinsSigned[] = {
1724	#if defined(_TARGET_XARCH_)
1725	EJ_je, // GT_EQ
1726	EJ_jne, // GT_NE
1727	EJ_jl, // GT_LT
1728	EJ_jle, // GT_LE
1729	EJ_jge, // GT_GE
1730	EJ_jg, // GT_GT
1731	EJ_je, // GT_TEST_EQ
1732	EJ_jne, // GT_TEST_NE
1733	#elif defined(_TARGET_ARMARCH_)
1734	EJ_eq, // GT_EQ
1735	EJ_ne, // GT_NE
1736	EJ_lt, // GT_LT
1737	EJ_le, // GT_LE
1738	EJ_ge, // GT_GE
1739	EJ_gt, // GT_GT
1740	#if defined(_TARGET_ARM64_)
1741	EJ_eq, // GT_TEST_EQ
1742	EJ_ne, // GT_TEST_NE
1743	#endif
1744	#endif
1745	};
1746
1747	const static BYTE genJCCinsUnsigned[] = / unsigned comparison /
1748	{
1749	#if defined(_TARGET_XARCH_)
1750	EJ_je, // GT_EQ
1751	EJ_jne, // GT_NE
1752	EJ_jb, // GT_LT
1753	EJ_jbe, // GT_LE
1754	EJ_jae, // GT_GE
1755	EJ_ja, // GT_GT
1756	EJ_je, // GT_TEST_EQ
1757	EJ_jne, // GT_TEST_NE
1758	#elif defined(_TARGET_ARMARCH_)
1759	EJ_eq, // GT_EQ
1760	EJ_ne, // GT_NE
1761	EJ_lo, // GT_LT
1762	EJ_ls, // GT_LE
1763	EJ_hs, // GT_GE
1764	EJ_hi, // GT_GT
1765	#if defined(_TARGET_ARM64_)
1766	EJ_eq, // GT_TEST_EQ
1767	EJ_ne, // GT_TEST_NE
1768	#endif
1769	#endif
1770	};
1771
1772	const static BYTE genJCCinsLogical[] = / logical operation /
1773	{
1774	#if defined(_TARGET_XARCH_)
1775	EJ_je, // GT_EQ (Z == 1)
1776	EJ_jne, // GT_NE (Z == 0)
1777	EJ_js, // GT_LT (S == 1)
1778	EJ_NONE, // GT_LE
1779	EJ_jns, // GT_GE (S == 0)
1780	EJ_NONE, // GT_GT
1781	EJ_NONE, // GT_TEST_EQ
1782	EJ_NONE, // GT_TEST_NE
1783	#elif defined(_TARGET_ARMARCH_)
1784	EJ_eq, // GT_EQ (Z == 1)
1785	EJ_ne, // GT_NE (Z == 0)
1786	EJ_mi, // GT_LT (N == 1)
1787	EJ_NONE, // GT_LE
1788	EJ_pl, // GT_GE (N == 0)
1789	EJ_NONE, // GT_GT
1790	#if defined(_TARGET_ARM64_)
1791	EJ_eq, // GT_TEST_EQ
1792	EJ_ne, // GT_TEST_NE
1793	#endif
1794	#endif
1795	};
1796
1797	#if defined(_TARGET_XARCH_)
1798	assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
1799	assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
1800	assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
1801	assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
1802	assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
1803	assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
1804	assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1805	assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1806
1807	assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
1808	assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
1809	assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
1810	assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
1811	assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
1812	assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
1813	assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1814	assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1815
1816	assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
1817	assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
1818	assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
1819	assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
1820	#elif defined(_TARGET_ARMARCH_)
1821	assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
1822	assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
1823	assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
1824	assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
1825	assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
1826	assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
1827
1828	assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
1829	assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
1830	assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
1831	assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
1832	assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
1833	assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
1834
1835	assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
1836	assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
1837	assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
1838	assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
1839	#else
1840	assert(!"unknown arch");
1841	#endif
1842	assert(GenTree::OperIsCompare(cmp));
1843
1844	emitJumpKind result = EJ_COUNT;
1845
1846	if (compareKind == CK_UNSIGNED)
1847	{
1848	result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
1849	}
1850	else if (compareKind == CK_SIGNED)
1851	{
1852	result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
1853	}
1854	else if (compareKind == CK_LOGICAL)
1855	{
1856	result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
1857	}
1858	assert(result != EJ_COUNT);
1859	return result;
1860	}
1861
1862	#ifdef _TARGET_ARMARCH_
1863	//------------------------------------------------------------------------
1864	// genEmitGSCookieCheck: Generate code to check that the GS cookie
1865	// wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64.
1866	//
1867	void CodeGen::genEmitGSCookieCheck(bool pushReg)
1868	{
1869	noway_assert(compiler->gsGlobalSecurityCookieAddr \|\| compiler->gsGlobalSecurityCookieVal);
1870
1871	// Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
1872	// executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
1873	if (!pushReg && (compiler->info.compRetType == TYP_REF))
1874	gcInfo.gcRegGCrefSetCur \|= RBM_INTRET;
1875
1876	// We need two temporary registers, to load the GS cookie values and compare them. We can't use
1877	// any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
1878	// callee-trash registers, which should not contain anything interesting at this point.
1879	// We don't have any IR node representing this check, so LSRA can't communicate registers
1880	// for us to use.
1881
1882	regNumber regGSConst = REG_GSCOOKIE_TMP_0;
1883	regNumber regGSValue = REG_GSCOOKIE_TMP_1;
1884
1885	if (compiler->gsGlobalSecurityCookieAddr == nullptr)
1886	{
1887	// load the GS cookie constant into a reg
1888	//
1889	genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
1890	}
1891	else
1892	{
1893	// Ngen case - GS cookie constant needs to be accessed through an indirection.
1894	instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
1895	getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, `0`);
1896	}
1897	// Load this method's GS value from the stack frame
1898	getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, `0`);
1899	// Compare with the GC cookie constant
1900	getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
1901
1902	BasicBlock* gsCheckBlk = genCreateTempLabel();
1903	emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1904	inst_JMP(jmpEqual, gsCheckBlk);
1905	// regGSConst and regGSValue aren't needed anymore, we can use them for helper call
1906	genEmitHelperCall(CORINFO_HELP_FAIL_FAST, `0`, EA_UNKNOWN, regGSConst);
1907	genDefineTempLabel(gsCheckBlk);
1908	}
1909	#endif // _TARGET_ARMARCH_
1910
1911	/*****************************************************************************
1912	*
1913	* Generate an exit sequence for a return from a method (note: when compiling
1914	* for speed there might be multiple exit points).
1915	*/
1916
1917	void CodeGen::genExitCode(BasicBlock* block)
1918	{
1919	/ Just wrote the first instruction of the epilog - inform debugger*
1920	Note that this may result in a duplicate IPmapping entry, and
1921	that this is ok /*
1922
1923	// For non-optimized debuggable code, there is only one epilog.
1924	genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
1925
1926	bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != `0`);
1927	if (compiler->getNeedsGSSecurityCookie())
1928	{
1929	genEmitGSCookieCheck(jmpEpilog);
1930
1931	if (jmpEpilog)
1932	{
1933	// Dev10 642944 -
1934	// The GS cookie check created a temp label that has no live
1935	// incoming GC registers, we need to fix that
1936
1937	unsigned varNum;
1938	LclVarDsc* varDsc;
1939
1940	/ Figure out which register parameters hold pointers /
1941
1942	for (varNum = `0`, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
1943	varNum++, varDsc++)
1944	{
1945	noway_assert(varDsc->lvIsParam);
1946
1947	gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
1948	}
1949
1950	getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
1951	getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
1952	}
1953	}
1954
1955	genReserveEpilog(block);
1956	}
1957
1958	//------------------------------------------------------------------------
1959	// genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
1960	//
1961	// Notes:
1962	// For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
1963	// Otherwise, we generate the 'throw' inline.
1964	//
1965	// Arguments:
1966	// jumpKind - jump kind to generate;
1967	// codeKind - the special throw-helper kind;
1968	// failBlk - optional fail target block, if it is already known;
1969	//
1970	void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTree* failBlk)
1971	{
1972	bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
1973	#if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
1974	// Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
1975	useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
1976	#endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
1977
1978	if (useThrowHlpBlk)
1979	{
1980	// For code with throw helper blocks, find and use the helper block for
1981	// raising the exception. The block may be shared by other trees too.
1982
1983	BasicBlock* excpRaisingBlock;
1984
1985	if (failBlk != nullptr)
1986	{
1987	// We already know which block to jump to. Use that.
1988	assert(failBlk->gtOper == GT_LABEL);
1989	excpRaisingBlock = failBlk->gtLabel.gtLabBB;
1990
1991	#ifdef DEBUG
1992	Compiler::AddCodeDsc* add =
1993	compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1994	assert(excpRaisingBlock == add->acdDstBlk);
1995	#if !FEATURE_FIXED_OUT_ARGS
1996	assert(add->acdStkLvlInit \|\| isFramePointerUsed());
1997	#endif // !FEATURE_FIXED_OUT_ARGS
1998	#endif // DEBUG
1999	}
2000	else
2001	{
2002	// Find the helper-block which raises the exception.
2003	Compiler::AddCodeDsc* add =
2004	compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
2005	PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
2006	excpRaisingBlock = add->acdDstBlk;
2007	#if !FEATURE_FIXED_OUT_ARGS
2008	assert(add->acdStkLvlInit \|\| isFramePointerUsed());
2009	#endif // !FEATURE_FIXED_OUT_ARGS
2010	}
2011
2012	noway_assert(excpRaisingBlock != nullptr);
2013
2014	// Jump to the exception-throwing block on error.
2015	inst_JMP(jumpKind, excpRaisingBlock);
2016	}
2017	else
2018	{
2019	// The code to throw the exception will be generated inline, and
2020	// we will jump around it in the normal non-exception case.
2021
2022	BasicBlock* tgtBlk = nullptr;
2023	emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
2024	if (reverseJumpKind != jumpKind)
2025	{
2026	tgtBlk = genCreateTempLabel();
2027	inst_JMP(reverseJumpKind, tgtBlk);
2028	}
2029
2030	genEmitHelperCall(compiler->acdHelper(codeKind), `0`, EA_UNKNOWN);
2031
2032	// Define the spot for the normal non-exception case to jump to.
2033	if (tgtBlk != nullptr)
2034	{
2035	assert(reverseJumpKind != jumpKind);
2036	genDefineTempLabel(tgtBlk);
2037	}
2038	}
2039	}
2040
2041	/*****************************************************************************
2042	*
2043	* The last operation done was generating code for "tree" and that would
2044	* have set the flags. Check if the operation caused an overflow.
2045	*/
2046
2047	// inline
2048	void CodeGen::genCheckOverflow(GenTree* tree)
2049	{
2050	// Overflow-check should be asked for this tree
2051	noway_assert(tree->gtOverflow());
2052
2053	const var_types type = tree->TypeGet();
2054
2055	// Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
2056	noway_assert(!varTypeIsSmall(type));
2057
2058	emitJumpKind jumpKind;
2059
2060	#ifdef _TARGET_ARM64_
2061	if (tree->OperGet() == GT_MUL)
2062	{
2063	jumpKind = EJ_ne;
2064	}
2065	else
2066	#endif
2067	{
2068	bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != `0`);
2069
2070	#if defined(_TARGET_XARCH_)
2071
2072	jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
2073
2074	#elif defined(_TARGET_ARMARCH_)
2075
2076	jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
2077
2078	if (jumpKind == EJ_lo)
2079	{
2080	if (tree->OperGet() != GT_SUB)
2081	{
2082	jumpKind = EJ_hs;
2083	}
2084	}
2085
2086	#endif // defined(_TARGET_ARMARCH_)
2087	}
2088
2089	// Jump to the block which will throw the expection
2090
2091	genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
2092	}
2093
2094	#if FEATURE_EH_FUNCLETS
2095
2096	/*****************************************************************************
2097	*
2098	* Update the current funclet as needed by calling genUpdateCurrentFunclet().
2099	* For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
2100	* is up-to-date.
2101	*
2102	*/
2103
2104	void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
2105	{
2106	if (block->bbFlags & BBF_FUNCLET_BEG)
2107	{
2108	compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
2109	if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2110	{
2111	assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
2112	}
2113	else
2114	{
2115	// We shouldn't see FUNC_ROOT
2116	assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2117	assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
2118	}
2119	}
2120	else
2121	{
2122	assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
2123	if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2124	{
2125	assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
2126	}
2127	else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
2128	{
2129	assert(!block->hasHndIndex());
2130	}
2131	else
2132	{
2133	assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2134	assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
2135	}
2136	}
2137	}
2138
2139	#if defined(_TARGET_ARM_)
2140	void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
2141	{
2142	// If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
2143	// so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
2144	// calls the funclet during non-exceptional control flow.
2145	if (block->bbFlags & BBF_FINALLY_TARGET)
2146	{
2147	assert(block->bbFlags & BBF_JMP_TARGET);
2148
2149	#ifdef DEBUG
2150	if (compiler->verbose)
2151	{
2152	printf("\nEmitting finally target NOP predecessor for " FMT_BB "\n", block->bbNum);
2153	}
2154	#endif
2155	// Create a label that we'll use for computing the start of an EH region, if this block is
2156	// at the beginning of such a region. If we used the existing bbEmitCookie as is for
2157	// determining the EH regions, then this NOP would end up outside of the region, if this
2158	// block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
2159	// would be executed, which we would prefer not to do.
2160
2161	block->bbUnwindNopEmitCookie =
2162	getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
2163
2164	instGen(INS_nop);
2165	}
2166	}
2167	#endif
2168
2169	#endif // FEATURE_EH_FUNCLETS
2170
2171	/*****************************************************************************
2172	*
2173	* Generate code for the function.
2174	*/
2175
2176	void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
2177	{
2178	#ifdef DEBUG
2179	if (verbose)
2180	{
2181	printf("*************** In genGenerateCode()\n");
2182	compiler->fgDispBasicBlocks(compiler->verboseTrees);
2183	}
2184	#endif
2185
2186	unsigned codeSize;
2187	unsigned prologSize;
2188	unsigned epilogSize;
2189
2190	void* consPtr;
2191
2192	#ifdef DEBUG
2193	genInterruptibleUsed = true;
2194
2195	#if STACK_PROBES
2196	genNeedPrologStackProbe = false;
2197	#endif
2198
2199	compiler->fgDebugCheckBBlist();
2200	#endif // DEBUG
2201
2202	/ This is the real thing /
2203
2204	genPrepForCompiler();
2205
2206	/ Prepare the emitter /
2207	getEmitter()->Init();
2208	#ifdef DEBUG
2209	VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
2210	#endif
2211
2212	#ifdef DEBUG
2213	if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
2214	{
2215	compiler->opts.disAsm = true;
2216	}
2217
2218	if (compiler->opts.disAsm)
2219	{
2220	printf("; Assembly listing for method %s\n", compiler->info.compFullName);
2221
2222	printf("; Emitting ");
2223
2224	if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
2225	{
2226	printf("SMALL_CODE");
2227	}
2228	else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
2229	{
2230	printf("FAST_CODE");
2231	}
2232	else
2233	{
2234	printf("BLENDED_CODE");
2235	}
2236
2237	printf(" for ");
2238
2239	if (compiler->info.genCPU == CPU_X86)
2240	{
2241	printf("generic X86 CPU");
2242	}
2243	else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
2244	{
2245	printf("Pentium 4");
2246	}
2247	else if (compiler->info.genCPU == CPU_X64)
2248	{
2249	if (compiler->canUseVexEncoding())
2250	{
2251	printf("X64 CPU with AVX");
2252	}
2253	else
2254	{
2255	printf("X64 CPU with SSE2");
2256	}
2257	}
2258	else if (compiler->info.genCPU == CPU_ARM)
2259	{
2260	printf("generic ARM CPU");
2261	}
2262	else if (compiler->info.genCPU == CPU_ARM64)
2263	{
2264	printf("generic ARM64 CPU");
2265	}
2266	else
2267	{
2268	printf("unknown architecture");
2269	}
2270
2271	#if defined(_TARGET_WINDOWS_)
2272	printf(" - Windows");
2273	#elif defined(_TARGET_UNIX_)
2274	printf(" - Unix");
2275	#endif
2276
2277	printf("\n");
2278
2279	if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2280	{
2281	printf("; Tier-0 compilation\n");
2282	}
2283	if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1))
2284	{
2285	printf("; Tier-1 compilation\n");
2286	}
2287
2288	if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
2289	{
2290	printf("; optimized code\n");
2291	}
2292	else if (compiler->opts.compDbgCode)
2293	{
2294	printf("; debuggable code\n");
2295	}
2296	else if (compiler->opts.MinOpts())
2297	{
2298	printf("; compiler->opts.MinOpts() is true\n");
2299	}
2300	else
2301	{
2302	printf("; unknown optimization flags\n");
2303	}
2304
2305	#if DOUBLE_ALIGN
2306	if (compiler->genDoubleAlign())
2307	printf("; double-aligned frame\n");
2308	else
2309	#endif
2310	printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
2311
2312	if (genInterruptible)
2313	{
2314	printf("; fully interruptible\n");
2315	}
2316	else
2317	{
2318	printf("; partially interruptible\n");
2319	}
2320
2321	if (compiler->fgHaveProfileData())
2322	{
2323	printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
2324	compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
2325	}
2326
2327	if (compiler->fgProfileData_ILSizeMismatch)
2328	{
2329	printf("; discarded IBC profile data due to mismatch in ILSize\n");
2330	}
2331	}
2332	#endif // DEBUG
2333
2334	// We compute the final frame layout before code generation. This is because LSRA
2335	// has already computed exactly the maximum concurrent number of spill temps of each type that are
2336	// required during code generation. So, there is nothing left to estimate: we can be precise in the frame
2337	// layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
2338	// memory from the VM.
2339
2340	genFinalizeFrame();
2341
2342	unsigned maxTmpSize = regSet.tmpGetTotalSize(); // This is precise after LSRA has pre-allocated the temps.
2343
2344	getEmitter()->emitBegFN(isFramePointerUsed()
2345	#if defined(DEBUG)
2346	,
2347	(compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
2348	!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
2349	#endif
2350	,
2351	maxTmpSize);
2352
2353	/ Now generate code for the function /
2354	genCodeForBBlist();
2355
2356	#ifdef DEBUG
2357	// After code generation, dump the frame layout again. It should be the same as before code generation, if code
2358	// generation hasn't touched it (it shouldn't!).
2359	if (verbose)
2360	{
2361	compiler->lvaTableDump();
2362	}
2363	#endif // DEBUG
2364
2365	/ We can now generate the function prolog and epilog /
2366
2367	genGeneratePrologsAndEpilogs();
2368
2369	/ Bind jump distances /
2370
2371	getEmitter()->emitJumpDistBind();
2372
2373	/ The code is now complete and final; it should not change after this. /
2374
2375	/ Compute the size of the code sections that we are going to ask the VM*
2376	to allocate. Note that this might not be precisely the size of the
2377	code we emit, though it's fatal if we emit more code than the size we
2378	compute here.
2379	(Note: an example of a case where we emit less code would be useful.)
2380	*/
2381
2382	getEmitter()->emitComputeCodeSizes();
2383
2384	#ifdef DEBUG
2385
2386	// Code to test or stress our ability to run a fallback compile.
2387	// We trigger the fallback here, before asking the VM for any memory,
2388	// because if not, we will leak mem, as the current codebase can't free
2389	// the mem after the emitter asks the VM for it. As this is only a stress
2390	// mode, we only want the functionality, and don't care about the relative
2391	// ugliness of having the failure here.
2392	if (!compiler->jitFallbackCompile)
2393	{
2394	// Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
2395	// especially that caused by enabling JIT stress.
2396	if (!JitConfig.JitNoForceFallback())
2397	{
2398	if (JitConfig.JitForceFallback() \|\| compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, `5`))
2399	{
2400	NO_WAY_NOASSERT("Stress failure");
2401	}
2402	}
2403	}
2404
2405	#endif // DEBUG
2406
2407	/ We've finished collecting all the unwind information for the function. Now reserve*
2408	space for it from the VM.
2409	*/
2410
2411	compiler->unwindReserve();
2412
2413	#if DISPLAY_SIZES
2414
2415	size_t dataSize = getEmitter()->emitDataSize();
2416
2417	#endif // DISPLAY_SIZES
2418
2419	void* coldCodePtr;
2420
2421	bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
2422
2423	#if defined(_TARGET_AMD64_) \|\| defined(_TARGET_ARM64_)
2424	trackedStackPtrsContig = false;
2425	#elif defined(_TARGET_ARM_)
2426	// On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
2427	trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
2428	#else
2429	trackedStackPtrsContig = !compiler->opts.compDbgEnC;
2430	#endif
2431
2432	#ifdef DEBUG
2433	/ We're done generating code for this function /
2434	compiler->compCodeGenDone = true;
2435	#endif
2436
2437	compiler->EndPhase(PHASE_GENERATE_CODE);
2438
2439	codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
2440	(compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
2441	&prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
2442
2443	compiler->EndPhase(PHASE_EMIT_CODE);
2444
2445	#ifdef DEBUG
2446	if (compiler->opts.disAsm)
2447	{
2448	printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
2449	compiler->info.compFullName);
2450	printf("; ============================================================\n");
2451	printf(""); // in our logic this causes a flush
2452	}
2453
2454	if (verbose)
2455	{
2456	printf("*************** After end code gen, before unwindEmit()\n");
2457	getEmitter()->emitDispIGlist(true);
2458	}
2459	#endif
2460
2461	#if EMIT_TRACK_STACK_DEPTH
2462	// Check our max stack level. Needed for fgAddCodeRef().
2463	// We need to relax the assert as our estimation won't include code-gen
2464	// stack changes (which we know don't affect fgAddCodeRef()).
2465	// NOTE: after emitEndCodeGen (including here), emitMaxStackDepth is a
2466	// count of DWORD-sized arguments, NOT argument size in bytes.
2467	{
2468	unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
2469	compiler->compHndBBtabCount + // Return address for locally-called finallys
2470	genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
2471	(compiler->compTailCallUsed ? `4` : `0`); // CORINFO_HELP_TAILCALL args
2472	#if defined(UNIX_X86_ABI)
2473	// Convert maxNestedAlignment to DWORD count before adding to maxAllowedStackDepth.
2474	assert(maxNestedAlignment % sizeof(int) == `0`);
2475	maxAllowedStackDepth += maxNestedAlignment / sizeof(int);
2476	#endif
2477	noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
2478	}
2479	#endif // EMIT_TRACK_STACK_DEPTH
2480
2481	*nativeSizeOfCode = codeSize;
2482	compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
2483
2484	// printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
2485
2486	// Make sure that the x86 alignment and cache prefetch optimization rules
2487	// were obeyed.
2488
2489	// Don't start a method in the last 7 bytes of a 16-byte alignment area
2490	// unless we are generating SMALL_CODE
2491	// noway_assert( (((unsigned)(codePtr) % 16) <= 8) \|\| (compiler->compCodeOpt() == SMALL_CODE));*
2492
2493	/ Now that the code is issued, we can finalize and emit the unwind data /
2494
2495	compiler->unwindEmit(*codePtr, coldCodePtr);
2496
2497	/ Finalize the line # tracking logic after we know the exact block sizes/offsets /
2498
2499	genIPmappingGen();
2500
2501	/ Finalize the Local Var info in terms of generated code /
2502
2503	genSetScopeInfo();
2504
2505	#ifdef LATE_DISASM
2506	unsigned finalHotCodeSize;
2507	unsigned finalColdCodeSize;
2508	if (compiler->fgFirstColdBlock != nullptr)
2509	{
2510	// We did some hot/cold splitting. The hot section is always padded out to the
2511	// size we thought it would be, but the cold section is not.
2512	assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
2513	assert(compiler->info.compTotalHotCodeSize > `0`);
2514	assert(compiler->info.compTotalColdCodeSize > `0`);
2515	finalHotCodeSize = compiler->info.compTotalHotCodeSize;
2516	finalColdCodeSize = codeSize - finalHotCodeSize;
2517	}
2518	else
2519	{
2520	// No hot/cold splitting
2521	assert(codeSize <= compiler->info.compTotalHotCodeSize);
2522	assert(compiler->info.compTotalHotCodeSize > `0`);
2523	assert(compiler->info.compTotalColdCodeSize == `0`);
2524	finalHotCodeSize = codeSize;
2525	finalColdCodeSize = `0`;
2526	}
2527	getDisAssembler().disAsmCode((BYTE)codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
2528	#endif // LATE_DISASM
2529
2530	/ Report any exception handlers to the VM /
2531
2532	genReportEH();
2533
2534	#ifdef JIT32_GCENCODER
2535	#ifdef DEBUG
2536	void* infoPtr =
2537	#endif // DEBUG
2538	#endif
2539	// Create and store the GC info for this method.
2540	genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
2541
2542	#ifdef DEBUG
2543	FILE* dmpf = jitstdout;
2544
2545	compiler->opts.dmpHex = false;
2546	if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
2547	{
2548	FILE* codf;
2549	errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
2550	if (ec != `0`)
2551	{
2552	assert(codf);
2553	dmpf = codf;
2554	compiler->opts.dmpHex = true;
2555	}
2556	}
2557	if (compiler->opts.dmpHex)
2558	{
2559	size_t consSize = getEmitter()->emitDataSize();
2560	size_t infoSize = compiler->compInfoBlkSize;
2561
2562	fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
2563	fprintf(dmpf, "\n");
2564
2565	if (codeSize)
2566	{
2567	fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
2568	}
2569	if (consSize)
2570	{
2571	fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
2572	}
2573	#ifdef JIT32_GCENCODER
2574	if (infoSize)
2575	fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
2576	#endif // JIT32_GCENCODER
2577
2578	fprintf(dmpf, "\n");
2579
2580	if (codeSize)
2581	{
2582	hexDump(dmpf, "Code", (BYTE)codePtr, codeSize);
2583	}
2584	if (consSize)
2585	{
2586	hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
2587	}
2588	#ifdef JIT32_GCENCODER
2589	if (infoSize)
2590	hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
2591	#endif // JIT32_GCENCODER
2592
2593	fflush(dmpf);
2594	}
2595
2596	if (dmpf != jitstdout)
2597	{
2598	fclose(dmpf);
2599	}
2600
2601	#endif // DEBUG
2602
2603	/ Tell the emitter that we're done with this function /
2604
2605	getEmitter()->emitEndFN();
2606
2607	/ Shut down the spill logic /
2608
2609	regSet.rsSpillDone();
2610
2611	/ Shut down the temp logic /
2612
2613	regSet.tmpDone();
2614
2615	#if DISPLAY_SIZES
2616
2617	grossVMsize += compiler->info.compILCodeSize;
2618	totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
2619	grossNCsize += codeSize + dataSize;
2620
2621	#endif // DISPLAY_SIZES
2622
2623	compiler->EndPhase(PHASE_EMIT_GCEH);
2624	}
2625
2626	/*****************************************************************************
2627	*
2628	* Report EH clauses to the VM
2629	*/
2630
2631	void CodeGen::genReportEH()
2632	{
2633	if (compiler->compHndBBtabCount == `0`)
2634	{
2635	return;
2636	}
2637
2638	#ifdef DEBUG
2639	if (compiler->opts.dspEHTable)
2640	{
2641	printf("*************** EH table for %s\n", compiler->info.compFullName);
2642	}
2643	#endif // DEBUG
2644
2645	unsigned XTnum;
2646	EHblkDsc* HBtab;
2647	EHblkDsc* HBtabEnd;
2648
2649	bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
2650
2651	unsigned EHCount = compiler->compHndBBtabCount;
2652
2653	#if FEATURE_EH_FUNCLETS
2654	// Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
2655	// VM.
2656	unsigned duplicateClauseCount = `0`;
2657	unsigned enclosingTryIndex;
2658
2659	// Duplicate clauses are not used by CoreRT ABI
2660	if (!isCoreRTABI)
2661	{
2662	for (XTnum = `0`; XTnum < compiler->compHndBBtabCount; XTnum++)
2663	{
2664	for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
2665	// ignoring 'mutual protect' trys
2666	enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2667	enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2668	{
2669	++duplicateClauseCount;
2670	}
2671	}
2672	EHCount += duplicateClauseCount;
2673	}
2674
2675	#if FEATURE_EH_CALLFINALLY_THUNKS
2676	unsigned clonedFinallyCount = `0`;
2677
2678	// Duplicate clauses are not used by CoreRT ABI
2679	if (!isCoreRTABI)
2680	{
2681	// We don't keep track of how many cloned finally there are. So, go through and count.
2682	// We do a quick pass first through the EH table to see if there are any try/finally
2683	// clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
2684
2685	bool anyFinallys = false;
2686	for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2687	HBtab < HBtabEnd; HBtab++)
2688	{
2689	if (HBtab->HasFinallyHandler())
2690	{
2691	anyFinallys = true;
2692	break;
2693	}
2694	}
2695	if (anyFinallys)
2696	{
2697	for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
2698	{
2699	if (block->bbJumpKind == BBJ_CALLFINALLY)
2700	{
2701	++clonedFinallyCount;
2702	}
2703	}
2704
2705	EHCount += clonedFinallyCount;
2706	}
2707	}
2708	#endif // FEATURE_EH_CALLFINALLY_THUNKS
2709
2710	#endif // FEATURE_EH_FUNCLETS
2711
2712	#ifdef DEBUG
2713	if (compiler->opts.dspEHTable)
2714	{
2715	#if FEATURE_EH_FUNCLETS
2716	#if FEATURE_EH_CALLFINALLY_THUNKS
2717	printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
2718	compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
2719	assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
2720	#else // !FEATURE_EH_CALLFINALLY_THUNKS
2721	printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
2722	compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
2723	assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
2724	#endif // !FEATURE_EH_CALLFINALLY_THUNKS
2725	#else // !FEATURE_EH_FUNCLETS
2726	printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
2727	assert(compiler->compHndBBtabCount == EHCount);
2728	#endif // !FEATURE_EH_FUNCLETS
2729	}
2730	#endif // DEBUG
2731
2732	// Tell the VM how many EH clauses to expect.
2733	compiler->eeSetEHcount(EHCount);
2734
2735	XTnum = `0`; // This is the index we pass to the VM
2736
2737	for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2738	HBtab < HBtabEnd; HBtab++)
2739	{
2740	UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2741
2742	tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
2743	hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
2744
2745	tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2746	: compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
2747	hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2748	: compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
2749
2750	if (HBtab->HasFilter())
2751	{
2752	hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
2753	}
2754	else
2755	{
2756	hndTyp = HBtab->ebdTyp;
2757	}
2758
2759	CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
2760
2761	if (isCoreRTABI && (XTnum > `0`))
2762	{
2763	// For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
2764	// try block as the previous one. The runtime cannot reliably infer this information from
2765	// native code offsets because of different try blocks can have same offsets. Alternative
2766	// solution to this problem would be inserting extra nops to ensure that different try
2767	// blocks have different offsets.
2768	if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - `1`))
2769	{
2770	// The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
2771	// allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
2772	// IL as "try { try {} catch {} catch {} } finally {}".
2773	assert(HBtab->HasCatchHandler());
2774	flags = (CORINFO_EH_CLAUSE_FLAGS)(flags \| CORINFO_EH_CLAUSE_SAMETRY);
2775	}
2776	}
2777
2778	// Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
2779	// the fields aren't accurate.
2780
2781	CORINFO_EH_CLAUSE clause;
2782	clause.ClassToken = hndTyp; / filter offset is passed back here for filter-based exception handlers /
2783	clause.Flags = flags;
2784	clause.TryOffset = tryBeg;
2785	clause.TryLength = tryEnd;
2786	clause.HandlerOffset = hndBeg;
2787	clause.HandlerLength = hndEnd;
2788
2789	assert(XTnum < EHCount);
2790
2791	// Tell the VM about this EH clause.
2792	compiler->eeSetEHinfo(XTnum, &clause);
2793
2794	++XTnum;
2795	}
2796
2797	#if FEATURE_EH_FUNCLETS
2798	// Now output duplicated clauses.
2799	//
2800	// If a funclet has been created by moving a handler out of a try region that it was originally nested
2801	// within, then we need to report a "duplicate" clause representing the fact that an exception in that
2802	// handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
2803	// descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
2804	// no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
2805	// region as the enclosing try region's handler region. This is the sense in which it is duplicated:
2806	// there is now a "duplicate" clause with the same handler region as another, but a different 'try'
2807	// region.
2808	//
2809	// For example, consider this (capital letters represent an unknown code sequence, numbers identify a
2810	// try or handler region):
2811	//
2812	// A
2813	// try (1) {
2814	// B
2815	// try (2) {
2816	// C
2817	// } catch (3) {
2818	// D
2819	// } catch (4) {
2820	// E
2821	// }
2822	// F
2823	// } catch (5) {
2824	// G
2825	// }
2826	// H
2827	//
2828	// Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
2829	// by catch (3) D and catch (4) E. Note that catch (4) E does NOT* protect the code "D".*
2830	// This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
2831	// to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
2832	// note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
2833	// The code "D" and "E" won't be contiguous with the protected region for try (1) (which
2834	// will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
2835	// representing try (1) protecting the new funclets catch (3) and (4).
2836	// The code will be generated as follows:
2837	//
2838	// ABCFH // "main" code
2839	// D // funclet
2840	// E // funclet
2841	// G // funclet
2842	//
2843	// The EH regions are:
2844	//
2845	// C -> D
2846	// C -> E
2847	// BCF -> G
2848	// D -> G // "duplicate" clause
2849	// E -> G // "duplicate" clause
2850	//
2851	// Note that we actually need to generate one of these additional "duplicate" clauses for every
2852	// region the funclet is nested in. Take this example:
2853	//
2854	// A
2855	// try (1) {
2856	// B
2857	// try (2,3) {
2858	// C
2859	// try (4) {
2860	// D
2861	// try (5,6) {
2862	// E
2863	// } catch {
2864	// F
2865	// } catch {
2866	// G
2867	// }
2868	// H
2869	// } catch {
2870	// I
2871	// }
2872	// J
2873	// } catch {
2874	// K
2875	// } catch {
2876	// L
2877	// }
2878	// M
2879	// } catch {
2880	// N
2881	// }
2882	// O
2883	//
2884	// When we pull out funclets, we get the following generated code:
2885	//
2886	// ABCDEHJMO // "main" function
2887	// F // funclet
2888	// G // funclet
2889	// I // funclet
2890	// K // funclet
2891	// L // funclet
2892	// N // funclet
2893	//
2894	// And the EH regions we report to the VM are (in order; main clauses
2895	// first in most-to-least nested order, funclets ("duplicated clauses")
2896	// last, in most-to-least nested) are:
2897	//
2898	// E -> F
2899	// E -> G
2900	// DEH -> I
2901	// CDEHJ -> K
2902	// CDEHJ -> L
2903	// BCDEHJM -> N
2904	// F -> I // funclet clause #1 for F
2905	// F -> K // funclet clause #2 for F
2906	// F -> L // funclet clause #3 for F
2907	// F -> N // funclet clause #4 for F
2908	// G -> I // funclet clause #1 for G
2909	// G -> K // funclet clause #2 for G
2910	// G -> L // funclet clause #3 for G
2911	// G -> N // funclet clause #4 for G
2912	// I -> K // funclet clause #1 for I
2913	// I -> L // funclet clause #2 for I
2914	// I -> N // funclet clause #3 for I
2915	// K -> N // funclet clause #1 for K
2916	// L -> N // funclet clause #1 for L
2917	//
2918	// So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
2919	// Note that due to the nature of 'mutually protect' clauses, it would be incorrect
2920	// to add a clause "F -> G" because F is NOT protected by G, but we still have
2921	// both "F -> K" and "F -> L" because F IS protected by both of those handlers.
2922	//
2923	// The overall ordering of the clauses is still the same most-to-least nesting
2924	// after front-to-back start offset. Because we place the funclets at the end
2925	// these new clauses should also go at the end by this ordering.
2926	//
2927
2928	if (duplicateClauseCount > `0`)
2929	{
2930	unsigned reportedDuplicateClauseCount = `0`; // How many duplicated clauses have we reported?
2931	unsigned XTnum2;
2932	for (XTnum2 = `0`, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
2933	{
2934	unsigned enclosingTryIndex;
2935
2936	EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
2937
2938	for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
2939	// ignoring 'mutual protect' trys
2940	enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2941	enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2942	{
2943	// The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
2944	// that will have the enclosing try protecting the funclet.
2945
2946	noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
2947	// greater EH table index
2948
2949	EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
2950
2951	// The try region is the handler of the funclet. Note that for filters, we don't protect the
2952	// filter region, only the filter handler region. This is because exceptions in filters never
2953	// escape; the VM swallows them.
2954
2955	BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
2956	BasicBlock* bbTryLast = fletTab->ebdHndLast;
2957
2958	BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
2959	BasicBlock* bbHndLast = encTab->ebdHndLast;
2960
2961	UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2962
2963	tryBeg = compiler->ehCodeOffset(bbTryBeg);
2964	hndBeg = compiler->ehCodeOffset(bbHndBeg);
2965
2966	tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2967	: compiler->ehCodeOffset(bbTryLast->bbNext);
2968	hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2969	: compiler->ehCodeOffset(bbHndLast->bbNext);
2970
2971	if (encTab->HasFilter())
2972	{
2973	hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
2974	}
2975	else
2976	{
2977	hndTyp = encTab->ebdTyp;
2978	}
2979
2980	CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
2981
2982	// Tell the VM this is an extra clause caused by moving funclets out of line.
2983	flags = (CORINFO_EH_CLAUSE_FLAGS)(flags \| CORINFO_EH_CLAUSE_DUPLICATE);
2984
2985	// Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
2986	// the fields aren't really accurate. For example, we set "TryLength" to the offset of the
2987	// instruction immediately after the 'try' body. So, it really could be more accurately named
2988	// "TryEndOffset".
2989
2990	CORINFO_EH_CLAUSE clause;
2991	clause.ClassToken = hndTyp; / filter offset is passed back here for filter-based exception handlers /
2992	clause.Flags = flags;
2993	clause.TryOffset = tryBeg;
2994	clause.TryLength = tryEnd;
2995	clause.HandlerOffset = hndBeg;
2996	clause.HandlerLength = hndEnd;
2997
2998	assert(XTnum < EHCount);
2999
3000	// Tell the VM about this EH clause (a duplicated clause).
3001	compiler->eeSetEHinfo(XTnum, &clause);
3002
3003	++XTnum;
3004	++reportedDuplicateClauseCount;
3005
3006	#ifndef DEBUG
3007	if (duplicateClauseCount == reportedDuplicateClauseCount)
3008	{
3009	break; // we've reported all of them; no need to continue looking
3010	}
3011	#endif // !DEBUG
3012
3013	} // for each 'true' enclosing 'try'
3014	} // for each EH table entry
3015
3016	assert(duplicateClauseCount == reportedDuplicateClauseCount);
3017	} // if (duplicateClauseCount > 0)
3018
3019	#if FEATURE_EH_CALLFINALLY_THUNKS
3020	if (clonedFinallyCount > `0`)
3021	{
3022	unsigned reportedClonedFinallyCount = `0`;
3023	for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3024	{
3025	if (block->bbJumpKind == BBJ_CALLFINALLY)
3026	{
3027	UNATIVE_OFFSET hndBeg, hndEnd;
3028
3029	hndBeg = compiler->ehCodeOffset(block);
3030
3031	// How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
3032	// a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
3033	BasicBlock* bbLabel = block->bbNext;
3034	if (block->isBBCallAlwaysPair())
3035	{
3036	bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
3037	}
3038	if (bbLabel == nullptr)
3039	{
3040	hndEnd = compiler->info.compNativeCodeSize;
3041	}
3042	else
3043	{
3044	assert(bbLabel->bbEmitCookie != nullptr);
3045	hndEnd = compiler->ehCodeOffset(bbLabel);
3046	}
3047
3048	CORINFO_EH_CLAUSE clause;
3049	clause.ClassToken = `0`; // unused
3050	clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY \| CORINFO_EH_CLAUSE_DUPLICATE);
3051	clause.TryOffset = hndBeg;
3052	clause.TryLength = hndBeg;
3053	clause.HandlerOffset = hndBeg;
3054	clause.HandlerLength = hndEnd;
3055
3056	assert(XTnum < EHCount);
3057
3058	// Tell the VM about this EH clause (a cloned finally clause).
3059	compiler->eeSetEHinfo(XTnum, &clause);
3060
3061	++XTnum;
3062	++reportedClonedFinallyCount;
3063
3064	#ifndef DEBUG
3065	if (clonedFinallyCount == reportedClonedFinallyCount)
3066	{
3067	break; // we're done; no need to keep looking
3068	}
3069	#endif // !DEBUG
3070	} // block is BBJ_CALLFINALLY
3071	} // for each block
3072
3073	assert(clonedFinallyCount == reportedClonedFinallyCount);
3074	} // if (clonedFinallyCount > 0)
3075	#endif // FEATURE_EH_CALLFINALLY_THUNKS
3076
3077	#endif // FEATURE_EH_FUNCLETS
3078
3079	assert(XTnum == EHCount);
3080	}
3081
3082	//----------------------------------------------------------------------
3083	// genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3084	// helper should be used.
3085	//
3086	// Arguments:
3087	// wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
3088	//
3089	// Return Value:
3090	// true if an optimized write barrier helper should be used, false otherwise.
3091	// Note: only x86 implements register-specific source optimized write
3092	// barriers currently.
3093	//
3094	bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
3095	{
3096	#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3097	#ifdef DEBUG
3098	return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3099	#else
3100	return true;
3101	#endif
3102	#else
3103	return false;
3104	#endif
3105	}
3106
3107	//----------------------------------------------------------------------
3108	// genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3109	// helper should be used.
3110	//
3111	// This has the same functionality as the version of
3112	// genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
3113	// determining what the required write barrier form is, if possible.
3114	//
3115	// Arguments:
3116	// tgt - target tree of write (e.g., GT_STOREIND)
3117	// assignVal - tree with value to write
3118	//
3119	// Return Value:
3120	// true if an optimized write barrier helper should be used, false otherwise.
3121	// Note: only x86 implements register-specific source optimized write
3122	// barriers currently.
3123	//
3124	bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal)
3125	{
3126	#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3127	#ifdef DEBUG
3128	GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3129	return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3130	#else
3131	return true;
3132	#endif
3133	#else
3134	return false;
3135	#endif
3136	}
3137
3138	//----------------------------------------------------------------------
3139	// genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write
3140	// barrier, and the write barrier form required, determine the helper to call.
3141	//
3142	// Arguments:
3143	// tgt - target tree of write (e.g., GT_STOREIND)
3144	// wbf - already computed write barrier form to use
3145	//
3146	// Return Value:
3147	// Write barrier helper to use.
3148	//
3149	// Note: do not call this function to get an optimized write barrier helper (e.g.,
3150	// for x86).
3151	//
3152	CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3153	{
3154	noway_assert(tgt->gtOper == GT_STOREIND);
3155
3156	CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF;
3157
3158	#ifdef DEBUG
3159	if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
3160	{
3161	helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
3162	}
3163	else
3164	#endif
3165	if (tgt->gtOper != GT_CLS_VAR)
3166	{
3167	if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
3168	{
3169	if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
3170	{
3171	helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3172	}
3173	else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
3174	{
3175	helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3176	}
3177	}
3178	}
3179	assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) \|\|
3180	((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
3181	(wbf == GCInfo::WBF_BarrierChecked \|\| wbf == GCInfo::WBF_BarrierUnknown)) \|\|
3182	((helper == CORINFO_HELP_ASSIGN_REF) &&
3183	(wbf == GCInfo::WBF_BarrierUnchecked \|\| wbf == GCInfo::WBF_BarrierUnknown)));
3184
3185	return helper;
3186	}
3187
3188	//----------------------------------------------------------------------
3189	// genGCWriteBarrier: Generate a write barrier for a node.
3190	//
3191	// Arguments:
3192	// tgt - target tree of write (e.g., GT_STOREIND)
3193	// wbf - already computed write barrier form to use
3194	//
3195	void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3196	{
3197	CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf);
3198
3199	#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
3200	// We classify the "tgt" trees as follows:
3201	// If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
3202	// IND [-> ADDR -> IND] -> { GT_LCL_VAR, ADD({GT_LCL_VAR}, X), ADD(X, (GT_LCL_VAR)) }
3203	// then let "v" be the GT_LCL_VAR.
3204	// If "v" is the return buffer argument, classify as CWBKind_RetBuf.*
3205	// If "v" is another by-ref argument, classify as CWBKind_ByRefArg.*
3206	// Otherwise, classify as CWBKind_OtherByRefLocal.*
3207	// If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
3208	// Otherwise, classify as CWBKind_Unclassified.
3209
3210	CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
3211	if (tgt->gtOper == GT_IND)
3212	{
3213	GenTree* lcl = NULL;
3214
3215	GenTree* indArg = tgt->gtOp.gtOp1;
3216	if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
3217	{
3218	indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
3219	}
3220	if (indArg->gtOper == GT_LCL_VAR)
3221	{
3222	lcl = indArg;
3223	}
3224	else if (indArg->gtOper == GT_ADD)
3225	{
3226	if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR)
3227	{
3228	lcl = indArg->gtOp.gtOp1;
3229	}
3230	else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR)
3231	{
3232	lcl = indArg->gtOp.gtOp2;
3233	}
3234	}
3235	if (lcl != NULL)
3236	{
3237	wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
3238	unsigned lclNum = lcl->AsLclVar()->GetLclNum();
3239	if (lclNum == compiler->info.compRetBuffArg)
3240	{
3241	wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
3242	}
3243	else
3244	{
3245	LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
3246	if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
3247	{
3248	wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
3249	}
3250	}
3251	}
3252	else
3253	{
3254	// We should have eliminated the barrier for this case.
3255	assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
3256	}
3257	}
3258
3259	if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
3260	{
3261	#if 0
3262	#ifdef DEBUG
3263	// Enable this to sample the unclassified trees.
3264	static int unclassifiedBarrierSite = `0`;
3265	if (wbKind == CWBKind_Unclassified)
3266	{
3267	unclassifiedBarrierSite++;
3268	printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
3269	}
3270	#endif // DEBUG
3271	#endif // 0
3272	AddStackLevel(`4`);
3273	inst_IV(INS_push, wbKind);
3274	genEmitHelperCall(helper,
3275	`4`, // argSize
3276	EA_PTRSIZE); // retSize
3277	SubtractStackLevel(`4`);
3278	}
3279	else
3280	{
3281	genEmitHelperCall(helper,
3282	`0`, // argSize
3283	EA_PTRSIZE); // retSize
3284	}
3285
3286	#else // !FEATURE_COUNT_GC_WRITE_BARRIERS
3287	genEmitHelperCall(helper,
3288	`0`, // argSize
3289	EA_PTRSIZE); // retSize
3290	#endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
3291	}
3292
3293	/*
3294	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3295	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3296	XX XX
3297	XX Prolog / Epilog XX
3298	XX XX
3299	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3300	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3301	*/
3302
3303	/*****************************************************************************
3304	*
3305	* Generates code for moving incoming register arguments to their
3306	* assigned location, in the function prolog.
3307	*/
3308
3309	#ifdef _PREFAST_
3310	#pragma warning(push)
3311	#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3312	#endif
3313	void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
3314	{
3315	#ifdef DEBUG
3316	if (verbose)
3317	{
3318	printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
3319	}
3320	#endif
3321
3322	unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
3323	unsigned argNum; // current argNum, always in [0..argMax-1]
3324	unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
3325	unsigned regArgNum; // index into the regArgTab[] table
3326	regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
3327	bool doingFloat = regState->rsIsFloat;
3328
3329	// We should be generating the prolog block when we are called
3330	assert(compiler->compGeneratingProlog);
3331
3332	// We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
3333	noway_assert(regArgMaskLive != `0`);
3334
3335	// If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
3336	// If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
3337	//
3338	// The regArgTab can always have unused entries,
3339	// for example if an architecture always increments the arg register number but uses either
3340	// an integer register or a floating point register to hold the next argument
3341	// then with a mix of float and integer args you could have:
3342	//
3343	// sampleMethod(int i, float x, int j, float y, int k, float z);
3344	// r0, r2 and r4 as valid integer arguments with argMax as 5
3345	// and f1, f3 and f5 and valid floating point arguments with argMax as 6
3346	// The first one is doingFloat==false and the second one is doingFloat==true
3347	//
3348	// If a fixed return buffer (in r8) was also present then the first one would become:
3349	// r0, r2, r4 and r8 as valid integer arguments with argMax as 9
3350	//
3351
3352	argMax = regState->rsCalleeRegArgCount;
3353	fixedRetBufIndex = (unsigned)-`1`; // Invalid value
3354
3355	// If necessary we will select a correct xtraReg for circular floating point args later.
3356	if (doingFloat)
3357	{
3358	xtraReg = REG_NA;
3359	noway_assert(argMax <= MAX_FLOAT_REG_ARG);
3360	}
3361	else // we are doing the integer registers
3362	{
3363	noway_assert(argMax <= MAX_REG_ARG);
3364	if (hasFixedRetBuffReg())
3365	{
3366	fixedRetBufIndex = theFixedRetBuffArgNum();
3367	// We have an additional integer register argument when hasFixedRetBuffReg() is true
3368	argMax = fixedRetBufIndex + `1`;
3369	assert(argMax == (MAX_REG_ARG + `1`));
3370	}
3371	}
3372
3373	//
3374	// Construct a table with the register arguments, for detecting circular and
3375	// non-circular dependencies between the register arguments. A dependency is when
3376	// an argument register Rn needs to be moved to register Rm that is also an argument
3377	// register. The table is constructed in the order the arguments are passed in
3378	// registers: the first register argument is in regArgTab[0], the second in
3379	// regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
3380	// at an even index. The regArgTab is indexed from 0 to argMax - 1.
3381	// Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
3382	// we have increased the allocated size of the regArgTab[] by one.
3383	//
3384	struct regArgElem
3385	{
3386	unsigned varNum; // index into compiler->lvaTable[] for this register argument
3387	#if defined(UNIX_AMD64_ABI)
3388	var_types type; // the Jit type of this regArgTab entry
3389	#endif // defined(UNIX_AMD64_ABI)
3390	unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
3391	// That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
3392	// argument register number 'x'. Only used when circular = true.
3393	char slot; // 0 means the register is not used for a register argument
3394	// 1 means the first part of a register argument
3395	// 2, 3 or 4 means the second,third or fourth part of a multireg argument
3396	bool stackArg; // true if the argument gets homed to the stack
3397	bool processed; // true after we've processed the argument (and it is in its final location)
3398	bool circular; // true if this register participates in a circular dependency loop.
3399
3400	#ifdef UNIX_AMD64_ABI
3401
3402	// For UNIX AMD64 struct passing, the type of the register argument slot can differ from
3403	// the type of the lclVar in ways that are not ascertainable from lvType.
3404	// So, for that case we retain the type of the register in the regArgTab.
3405
3406	var_types getRegType(Compiler* compiler)
3407	{
3408	return type; // UNIX_AMD64 implementation
3409	}
3410
3411	#else // !UNIX_AMD64_ABI
3412
3413	// In other cases, we simply use the type of the lclVar to determine the type of the register.
3414	var_types getRegType(Compiler* compiler)
3415	{
3416	const LclVarDsc& varDsc = compiler->lvaTable[varNum];
3417	// Check if this is an HFA register arg and return the HFA type
3418	if (varDsc.lvIsHfaRegArg())
3419	{
3420	#if defined(_TARGET_WINDOWS_)
3421	// Cannot have hfa types on windows arm targets
3422	// in vararg methods.
3423	assert(!compiler->info.compIsVarArgs);
3424	#endif // defined(_TARGET_WINDOWS_)
3425	return varDsc.GetHfaType();
3426	}
3427	return compiler->mangleVarArgsType(varDsc.lvType);
3428	}
3429
3430	#endif // !UNIX_AMD64_ABI
3431	} regArgTab[max(MAX_REG_ARG + `1`, MAX_FLOAT_REG_ARG)] = {};
3432
3433	unsigned varNum;
3434	LclVarDsc* varDsc;
3435
3436	for (varNum = `0`; varNum < compiler->lvaCount; ++varNum)
3437	{
3438	varDsc = compiler->lvaTable + varNum;
3439
3440	// Is this variable a register arg?
3441	if (!varDsc->lvIsParam)
3442	{
3443	continue;
3444	}
3445
3446	if (!varDsc->lvIsRegArg)
3447	{
3448	continue;
3449	}
3450
3451	// When we have a promoted struct we have two possible LclVars that can represent the incoming argument
3452	// in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
3453	// We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
3454	// use the the original TYP_STRUCT argument.
3455	//
3456	if (varDsc->lvPromoted \|\| varDsc->lvIsStructField)
3457	{
3458	LclVarDsc* parentVarDsc = varDsc;
3459	if (varDsc->lvIsStructField)
3460	{
3461	assert(!varDsc->lvPromoted);
3462	parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
3463	}
3464
3465	Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
3466
3467	if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
3468	{
3469	noway_assert(parentVarDsc->lvFieldCnt == `1`); // We only handle one field here
3470
3471	// For register arguments that are independent promoted structs we put the promoted field varNum in the
3472	// regArgTab[]
3473	if (varDsc->lvPromoted)
3474	{
3475	continue;
3476	}
3477	}
3478	else
3479	{
3480	// For register arguments that are not independent promoted structs we put the parent struct varNum in
3481	// the regArgTab[]
3482	if (varDsc->lvIsStructField)
3483	{
3484	continue;
3485	}
3486	}
3487	}
3488
3489	var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
3490	// Change regType to the HFA type when we have a HFA argument
3491	if (varDsc->lvIsHfaRegArg())
3492	{
3493	#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3494	if (compiler->info.compIsVarArgs)
3495	{
3496	assert(!"Illegal incoming HFA arg encountered in Vararg method.");
3497	}
3498	#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3499	regType = varDsc->GetHfaType();
3500	}
3501
3502	#if defined(UNIX_AMD64_ABI)
3503	if (!varTypeIsStruct(regType))
3504	#endif // defined(UNIX_AMD64_ABI)
3505	{
3506	// A struct might be passed partially in XMM register for System V calls.
3507	// So a single arg might use both register files.
3508	if (isFloatRegType(regType) != doingFloat)
3509	{
3510	continue;
3511	}
3512	}
3513
3514	int slots = `0`;
3515
3516	#if defined(UNIX_AMD64_ABI)
3517	if (varTypeIsStruct(varDsc))
3518	{
3519	CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
3520	assert(typeHnd != nullptr);
3521	SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3522	compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
3523	if (!structDesc.passedInRegisters)
3524	{
3525	// The var is not passed in registers.
3526	continue;
3527	}
3528
3529	unsigned firstRegSlot = `0`;
3530	for (unsigned slotCounter = `0`; slotCounter < structDesc.eightByteCount; slotCounter++)
3531	{
3532	regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
3533	var_types regType;
3534
3535	#ifdef FEATURE_SIMD
3536	// Assumption 1:
3537	// RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
3538	// to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
3539	// reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
3540	// home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
3541	//
3542	// Assumption 2:
3543	// RyuJit backend is making another implicit assumption that Vector3 type args when passed in
3544	// registers or on stack, the upper most 4-bytes will be zero.
3545	//
3546	// For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
3547	// that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
3548	// invalid.
3549	//
3550	// RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
3551	// bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
3552	// passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
3553	// there is no need to clear upper 4-bytes of Vector3 type args.
3554	//
3555	// RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
3556	// Vector3 return values are returned two return registers and Caller assembles them into a
3557	// single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
3558	// type args in prolog and Vector3 type return value of a call
3559
3560	if (varDsc->lvType == TYP_SIMD12)
3561	{
3562	regType = TYP_DOUBLE;
3563	}
3564	else
3565	#endif
3566	{
3567	regType = compiler->GetEightByteType(structDesc, slotCounter);
3568	}
3569
3570	regArgNum = genMapRegNumToRegArgNum(regNum, regType);
3571
3572	if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) \|\|
3573	(doingFloat && (structDesc.IsSseSlot(slotCounter))))
3574	{
3575	// Store the reg for the first slot.
3576	if (slots == `0`)
3577	{
3578	firstRegSlot = regArgNum;
3579	}
3580
3581	// Bingo - add it to our table
3582	noway_assert(regArgNum < argMax);
3583	noway_assert(regArgTab[regArgNum].slot == `0`); // we better not have added it already (there better
3584	// not be multiple vars representing this argument
3585	// register)
3586	regArgTab[regArgNum].varNum = varNum;
3587	regArgTab[regArgNum].slot = (char)(slotCounter + `1`);
3588	regArgTab[regArgNum].type = regType;
3589	slots++;
3590	}
3591	}
3592
3593	if (slots == `0`)
3594	{
3595	continue; // Nothing to do for this regState set.
3596	}
3597
3598	regArgNum = firstRegSlot;
3599	}
3600	else
3601	#endif // defined(UNIX_AMD64_ABI)
3602	{
3603	// Bingo - add it to our table
3604	regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
3605
3606	noway_assert(regArgNum < argMax);
3607	// We better not have added it already (there better not be multiple vars representing this argument
3608	// register)
3609	noway_assert(regArgTab[regArgNum].slot == `0`);
3610
3611	#if defined(UNIX_AMD64_ABI)
3612	// Set the register type.
3613	regArgTab[regArgNum].type = regType;
3614	#endif // defined(UNIX_AMD64_ABI)
3615
3616	regArgTab[regArgNum].varNum = varNum;
3617	regArgTab[regArgNum].slot = `1`;
3618
3619	slots = `1`;
3620
3621	#if FEATURE_MULTIREG_ARGS
3622	if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
3623	{
3624	if (varDsc->lvIsHfaRegArg())
3625	{
3626	// We have an HFA argument, set slots to the number of registers used
3627	slots = varDsc->lvHfaSlots();
3628	}
3629	else
3630	{
3631	// Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
3632	assert(varDsc->lvSize() == (`2` * TARGET_POINTER_SIZE));
3633	// We have a non-HFA multireg argument, set slots to two
3634	slots = `2`;
3635	}
3636
3637	// Note that regArgNum+1 represents an argument index not an actual argument register.
3638	// see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
3639
3640	// This is the setup for the rest of a multireg struct arg
3641
3642	for (int i = `1`; i < slots; i++)
3643	{
3644	noway_assert((regArgNum + i) < argMax);
3645
3646	// We better not have added it already (there better not be multiple vars representing this argument
3647	// register)
3648	noway_assert(regArgTab[regArgNum + i].slot == `0`);
3649
3650	regArgTab[regArgNum + i].varNum = varNum;
3651	regArgTab[regArgNum + i].slot = (char)(i + `1`);
3652	}
3653	}
3654	#endif // FEATURE_MULTIREG_ARGS
3655	}
3656
3657	#ifdef _TARGET_ARM_
3658	int lclSize = compiler->lvaLclSize(varNum);
3659
3660	if (lclSize > REGSIZE_BYTES)
3661	{
3662	unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
3663	slots = lclSize / REGSIZE_BYTES;
3664	if (regArgNum + slots > maxRegArgNum)
3665	{
3666	slots = maxRegArgNum - regArgNum;
3667	}
3668	}
3669	C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
3670	assert(slots < INT8_MAX);
3671	for (char i = `1`; i < slots; i++)
3672	{
3673	regArgTab[regArgNum + i].varNum = varNum;
3674	regArgTab[regArgNum + i].slot = i + `1`;
3675	}
3676	#endif // _TARGET_ARM_
3677
3678	for (int i = `0`; i < slots; i++)
3679	{
3680	regType = regArgTab[regArgNum + i].getRegType(compiler);
3681	regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
3682
3683	#if !defined(UNIX_AMD64_ABI)
3684	assert((i > `0`) \|\| (regNum == varDsc->lvArgReg));
3685	#endif // defined(UNIX_AMD64_ABI)
3686
3687	// Is the arg dead on entry to the method ?
3688
3689	if ((regArgMaskLive & genRegMask(regNum)) == `0`)
3690	{
3691	if (varDsc->lvTrackedNonStruct())
3692	{
3693	// We may now see some tracked locals with zero refs.
3694	// See Lowering::DoPhase. Tolerate these.
3695	if (varDsc->lvRefCnt() > `0`)
3696	{
3697	noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
3698	}
3699	}
3700	else
3701	{
3702	#ifdef _TARGET_X86_
3703	noway_assert(varDsc->lvType == TYP_STRUCT);
3704	#else // !_TARGET_X86_
3705	// For LSRA, it may not be in regArgMaskLive if it has a zero
3706	// refcnt. This is in contrast with the non-LSRA case in which all
3707	// non-tracked args are assumed live on entry.
3708	noway_assert((varDsc->lvRefCnt() == `0`) \|\| (varDsc->lvType == TYP_STRUCT) \|\|
3709	(varDsc->lvAddrExposed && compiler->info.compIsVarArgs) \|\|
3710	(varDsc->lvAddrExposed && compiler->opts.compUseSoftFP));
3711	#endif // !_TARGET_X86_
3712	}
3713	// Mark it as processed and be done with it
3714	regArgTab[regArgNum + i].processed = true;
3715	goto NON_DEP;
3716	}
3717
3718	#ifdef _TARGET_ARM_
3719	// On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
3720	// could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
3721	// they've already been spilled.
3722	//
3723	if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == `0`)
3724	#endif // _TARGET_ARM_
3725	{
3726	#if !defined(UNIX_AMD64_ABI)
3727	noway_assert(xtraReg != (varDsc->lvArgReg + i));
3728	#endif
3729	noway_assert(regArgMaskLive & genRegMask(regNum));
3730	}
3731
3732	regArgTab[regArgNum + i].processed = false;
3733
3734	/ mark stack arguments since we will take care of those first /
3735	regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
3736
3737	/ If it goes on the stack or in a register that doesn't hold*
3738	* an argument anymore -> CANNOT form a circular dependency */
3739
3740	if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
3741	{
3742	/ will trash another argument -> possible dependency*
3743	* We may need several passes after the table is constructed
3744	* to decide on that */
3745
3746	/ Maybe the argument stays in the register (IDEAL) /
3747
3748	if ((i == `0`) && (varDsc->lvRegNum == regNum))
3749	{
3750	goto NON_DEP;
3751	}
3752
3753	#if !defined(_TARGET_64BIT_)
3754	if ((i == `1`) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
3755	{
3756	goto NON_DEP;
3757	}
3758	if ((i == `1`) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
3759	{
3760	goto NON_DEP;
3761	}
3762
3763	if ((i == `1`) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
3764	(REG_NEXT(varDsc->lvRegNum) == regNum))
3765	{
3766	goto NON_DEP;
3767	}
3768	#endif // !defined(_TARGET_64BIT_)
3769	regArgTab[regArgNum + i].circular = true;
3770	}
3771	else
3772	{
3773	NON_DEP:
3774	regArgTab[regArgNum + i].circular = false;
3775
3776	/ mark the argument register as free /
3777	regArgMaskLive &= ~genRegMask(regNum);
3778	}
3779	}
3780	}
3781
3782	/ Find the circular dependencies for the argument registers, if any.*
3783	* A circular dependency is a set of registers R1, R2, ..., Rn
3784	* such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
3785
3786	bool change = true;
3787	if (regArgMaskLive)
3788	{
3789	/ Possible circular dependencies still exist; the previous pass was not enough*
3790	* to filter them out. Use a "sieve" strategy to find all circular dependencies. */
3791
3792	while (change)
3793	{
3794	change = false;
3795
3796	for (argNum = `0`; argNum < argMax; argNum++)
3797	{
3798	// If we already marked the argument as non-circular then continue
3799
3800	if (!regArgTab[argNum].circular)
3801	{
3802	continue;
3803	}
3804
3805	if (regArgTab[argNum].slot == `0`) // Not a register argument
3806	{
3807	continue;
3808	}
3809
3810	varNum = regArgTab[argNum].varNum;
3811	noway_assert(varNum < compiler->lvaCount);
3812	varDsc = compiler->lvaTable + varNum;
3813	noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3814
3815	/ cannot possibly have stack arguments /
3816	noway_assert(varDsc->lvIsInReg());
3817	noway_assert(!regArgTab[argNum].stackArg);
3818
3819	var_types regType = regArgTab[argNum].getRegType(compiler);
3820	regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3821
3822	regNumber destRegNum = REG_NA;
3823	if (regArgTab[argNum].slot == `1`)
3824	{
3825	destRegNum = varDsc->lvRegNum;
3826	}
3827	#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
3828	else
3829	{
3830	assert(regArgTab[argNum].slot == `2`);
3831	assert(argNum > `0`);
3832	assert(regArgTab[argNum - `1`].slot == `1`);
3833	assert(regArgTab[argNum - `1`].varNum == varNum);
3834	assert((varDsc->lvType == TYP_SIMD12) \|\| (varDsc->lvType == TYP_SIMD16));
3835	regArgMaskLive &= ~genRegMask(regNum);
3836	regArgTab[argNum].circular = false;
3837	change = true;
3838	continue;
3839	}
3840	#elif !defined(_TARGET_64BIT_)
3841	else if (regArgTab[argNum].slot == `2` && genActualType(varDsc->TypeGet()) == TYP_LONG)
3842	{
3843	destRegNum = varDsc->lvOtherReg;
3844	}
3845	else
3846	{
3847	assert(regArgTab[argNum].slot == `2`);
3848	assert(varDsc->TypeGet() == TYP_DOUBLE);
3849	destRegNum = REG_NEXT(varDsc->lvRegNum);
3850	}
3851	#endif // !defined(_TARGET_64BIT_)
3852	noway_assert(destRegNum != REG_NA);
3853	if (genRegMask(destRegNum) & regArgMaskLive)
3854	{
3855	/ we are trashing a live argument register - record it /
3856	unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
3857	noway_assert(destRegArgNum < argMax);
3858	regArgTab[destRegArgNum].trashBy = argNum;
3859	}
3860	else
3861	{
3862	/ argument goes to a free register /
3863	regArgTab[argNum].circular = false;
3864	change = true;
3865
3866	/ mark the argument register as free /
3867	regArgMaskLive &= ~genRegMask(regNum);
3868	}
3869	}
3870	}
3871	}
3872
3873	/ At this point, everything that has the "circular" flag*
3874	* set to "true" forms a circular dependency */
3875	CLANG_FORMAT_COMMENT_ANCHOR;
3876
3877	#ifdef DEBUG
3878	if (regArgMaskLive)
3879	{
3880	if (verbose)
3881	{
3882	printf("Circular dependencies found while home-ing the incoming arguments.\n");
3883	}
3884	}
3885	#endif
3886
3887	// LSRA allocates registers to incoming parameters in order and will not overwrite
3888	// a register still holding a live parameter.
3889
3890	noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == `0`) &&
3891	"Homing of float argument registers with circular dependencies not implemented.");
3892
3893	/ Now move the arguments to their locations.*
3894	* First consider ones that go on the stack since they may
3895	* free some registers. */
3896
3897	regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
3898	for (argNum = `0`; argNum < argMax; argNum++)
3899	{
3900	emitAttr size;
3901
3902	#if defined(UNIX_AMD64_ABI)
3903	// If this is the wrong register file, just continue.
3904	if (regArgTab[argNum].type == TYP_UNDEF)
3905	{
3906	// This could happen if the reg in regArgTab[argNum] is of the other register file -
3907	// for System V register passed structs where the first reg is GPR and the second an XMM reg.
3908	// The next register file processing will process it.
3909	continue;
3910	}
3911	#endif // defined(UNIX_AMD64_ABI)
3912
3913	// If the arg is dead on entry to the method, skip it
3914
3915	if (regArgTab[argNum].processed)
3916	{
3917	continue;
3918	}
3919
3920	if (regArgTab[argNum].slot == `0`) // Not a register argument
3921	{
3922	continue;
3923	}
3924
3925	varNum = regArgTab[argNum].varNum;
3926	noway_assert(varNum < compiler->lvaCount);
3927	varDsc = compiler->lvaTable + varNum;
3928
3929	#ifndef _TARGET_64BIT_
3930	// If not a stack arg go to the next one
3931	if (varDsc->lvType == TYP_LONG)
3932	{
3933	if (regArgTab[argNum].slot == `1` && !regArgTab[argNum].stackArg)
3934	{
3935	continue;
3936	}
3937	else if (varDsc->lvOtherReg != REG_STK)
3938	{
3939	continue;
3940	}
3941	}
3942	else
3943	#endif // !_TARGET_64BIT_
3944	{
3945	// If not a stack arg go to the next one
3946	if (!regArgTab[argNum].stackArg)
3947	{
3948	continue;
3949	}
3950	}
3951
3952	#if defined(_TARGET_ARM_)
3953	if (varDsc->lvType == TYP_DOUBLE)
3954	{
3955	if (regArgTab[argNum].slot == `2`)
3956	{
3957	// We handled the entire double when processing the first half (slot == 1)
3958	continue;
3959	}
3960	}
3961	#endif
3962
3963	noway_assert(regArgTab[argNum].circular == false);
3964
3965	noway_assert(varDsc->lvIsParam);
3966	noway_assert(varDsc->lvIsRegArg);
3967	noway_assert(varDsc->lvIsInReg() == false \|\|
3968	(varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == `2`));
3969
3970	var_types storeType = TYP_UNDEF;
3971	unsigned slotSize = TARGET_POINTER_SIZE;
3972
3973	if (varTypeIsStruct(varDsc))
3974	{
3975	storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
3976	#if FEATURE_MULTIREG_ARGS
3977	// Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
3978	noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
3979	#endif // FEATURE_MULTIREG_ARGS
3980	#ifdef UNIX_AMD64_ABI
3981	storeType = regArgTab[argNum].type;
3982	#endif // !UNIX_AMD64_ABI
3983	if (varDsc->lvIsHfaRegArg())
3984	{
3985	#ifdef _TARGET_ARM_
3986	// On ARM32 the storeType for HFA args is always TYP_FLOAT
3987	storeType = TYP_FLOAT;
3988	slotSize = (unsigned)emitActualTypeSize(storeType);
3989	#else // _TARGET_ARM64_
3990	storeType = genActualType(varDsc->GetHfaType());
3991	slotSize = (unsigned)emitActualTypeSize(storeType);
3992	#endif // _TARGET_ARM64_
3993	}
3994	}
3995	else // Not a struct type
3996	{
3997	storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
3998	}
3999	size = emitActualTypeSize(storeType);
4000	#ifdef _TARGET_X86_
4001	noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
4002	#endif //_TARGET_X86_
4003
4004	regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
4005
4006	// Stack argument - if the ref count is 0 don't care about it
4007
4008	if (!varDsc->lvOnFrame)
4009	{
4010	noway_assert(varDsc->lvRefCnt() == `0`);
4011	}
4012	else
4013	{
4014	// Since slot is typically 1, baseOffset is typically 0
4015	int baseOffset = (regArgTab[argNum].slot - `1`) * slotSize;
4016
4017	getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
4018
4019	#ifndef UNIX_AMD64_ABI
4020	// Check if we are writing past the end of the struct
4021	if (varTypeIsStruct(varDsc))
4022	{
4023	assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
4024	}
4025	#endif // !UNIX_AMD64_ABI
4026
4027	if (regArgTab[argNum].slot == `1`)
4028	{
4029	psiMoveToStack(varNum);
4030	}
4031	}
4032
4033	/ mark the argument as processed /
4034
4035	regArgTab[argNum].processed = true;
4036	regArgMaskLive &= ~genRegMask(srcRegNum);
4037
4038	#if defined(_TARGET_ARM_)
4039	if (storeType == TYP_DOUBLE)
4040	{
4041	regArgTab[argNum + `1`].processed = true;
4042	regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
4043	}
4044	#endif
4045	}
4046
4047	/ Process any circular dependencies /
4048	if (regArgMaskLive)
4049	{
4050	unsigned begReg, destReg, srcReg;
4051	unsigned varNumDest, varNumSrc;
4052	LclVarDsc* varDscDest;
4053	LclVarDsc* varDscSrc;
4054	instruction insCopy = INS_mov;
4055
4056	if (doingFloat)
4057	{
4058	#if defined(FEATURE_HFA) \|\| defined(UNIX_AMD64_ABI)
4059	insCopy = ins_Copy(TYP_DOUBLE);
4060	// Compute xtraReg here when we have a float argument
4061	assert(xtraReg == REG_NA);
4062
4063	regMaskTP fpAvailMask;
4064
4065	fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
4066	#if defined(FEATURE_HFA)
4067	fpAvailMask &= RBM_ALLDOUBLE;
4068	#else
4069	#if !defined(UNIX_AMD64_ABI)
4070	#error Error. Wrong architecture.
4071	#endif // !defined(UNIX_AMD64_ABI)
4072	#endif // defined(FEATURE_HFA)
4073
4074	if (fpAvailMask == RBM_NONE)
4075	{
4076	fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
4077	#if defined(FEATURE_HFA)
4078	fpAvailMask &= RBM_ALLDOUBLE;
4079	#else
4080	#if !defined(UNIX_AMD64_ABI)
4081	#error Error. Wrong architecture.
4082	#endif // !defined(UNIX_AMD64_ABI)
4083	#endif // defined(FEATURE_HFA)
4084	}
4085
4086	assert(fpAvailMask != RBM_NONE);
4087
4088	// We pick the lowest avail register number
4089	regMaskTP tempMask = genFindLowestBit(fpAvailMask);
4090	xtraReg = genRegNumFromMask(tempMask);
4091	#elif defined(_TARGET_X86_)
4092	// This case shouldn't occur on x86 since NYI gets converted to an assert
4093	NYI("Homing circular FP registers via xtraReg");
4094	#endif
4095	}
4096
4097	for (argNum = `0`; argNum < argMax; argNum++)
4098	{
4099	// If not a circular dependency then continue
4100	if (!regArgTab[argNum].circular)
4101	{
4102	continue;
4103	}
4104
4105	// If already processed the dependency then continue
4106
4107	if (regArgTab[argNum].processed)
4108	{
4109	continue;
4110	}
4111
4112	if (regArgTab[argNum].slot == `0`) // Not a register argument
4113	{
4114	continue;
4115	}
4116
4117	destReg = begReg = argNum;
4118	srcReg = regArgTab[argNum].trashBy;
4119
4120	varNumDest = regArgTab[destReg].varNum;
4121	noway_assert(varNumDest < compiler->lvaCount);
4122	varDscDest = compiler->lvaTable + varNumDest;
4123	noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
4124
4125	noway_assert(srcReg < argMax);
4126	varNumSrc = regArgTab[srcReg].varNum;
4127	noway_assert(varNumSrc < compiler->lvaCount);
4128	varDscSrc = compiler->lvaTable + varNumSrc;
4129	noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4130
4131	emitAttr size = EA_PTRSIZE;
4132
4133	#ifdef _TARGET_XARCH_
4134	//
4135	// The following code relies upon the target architecture having an
4136	// 'xchg' instruction which directly swaps the values held in two registers.
4137	// On the ARM architecture we do not have such an instruction.
4138	//
4139	if (destReg == regArgTab[srcReg].trashBy)
4140	{
4141	/ only 2 registers form the circular dependency - use "xchg" /
4142
4143	varNum = regArgTab[argNum].varNum;
4144	noway_assert(varNum < compiler->lvaCount);
4145	varDsc = compiler->lvaTable + varNum;
4146	noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4147
4148	noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
4149
4150	/ Set "size" to indicate GC if one and only one of*
4151	* the operands is a pointer
4152	* RATIONALE: If both are pointers, nothing changes in
4153	* the GC pointer tracking. If only one is a pointer we
4154	* have to "swap" the registers in the GC reg pointer mask
4155	*/
4156
4157	if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
4158	{
4159	size = EA_GCREF;
4160	}
4161
4162	noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
4163
4164	getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
4165	regSet.verifyRegUsed(varDscSrc->lvRegNum);
4166	regSet.verifyRegUsed(varDscSrc->lvArgReg);
4167
4168	/ mark both arguments as processed /
4169	regArgTab[destReg].processed = true;
4170	regArgTab[srcReg].processed = true;
4171
4172	regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
4173	regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
4174
4175	psiMoveToReg(varNumSrc);
4176	psiMoveToReg(varNumDest);
4177	}
4178	else
4179	#endif // _TARGET_XARCH_
4180	{
4181	var_types destMemType = varDscDest->TypeGet();
4182
4183	#ifdef _TARGET_ARM_
4184	bool cycleAllDouble = true; // assume the best
4185
4186	unsigned iter = begReg;
4187	do
4188	{
4189	if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
4190	{
4191	cycleAllDouble = false;
4192	break;
4193	}
4194	iter = regArgTab[iter].trashBy;
4195	} while (iter != begReg);
4196
4197	// We may treat doubles as floats for ARM because we could have partial circular
4198	// dependencies of a float with a lo/hi part of the double. We mark the
4199	// trashBy values for each slot of the double, so let the circular dependency
4200	// logic work its way out for floats rather than doubles. If a cycle has all
4201	// doubles, then optimize so that instead of two vmov.f32's to move a double,
4202	// we can use one vmov.f64.
4203	//
4204	if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4205	{
4206	destMemType = TYP_FLOAT;
4207	}
4208	#endif // _TARGET_ARM_
4209
4210	if (destMemType == TYP_REF)
4211	{
4212	size = EA_GCREF;
4213	}
4214	else if (destMemType == TYP_BYREF)
4215	{
4216	size = EA_BYREF;
4217	}
4218	else if (destMemType == TYP_DOUBLE)
4219	{
4220	size = EA_8BYTE;
4221	}
4222	else if (destMemType == TYP_FLOAT)
4223	{
4224	size = EA_4BYTE;
4225	}
4226
4227	/ move the dest reg (begReg) in the extra reg /
4228
4229	assert(xtraReg != REG_NA);
4230
4231	regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
4232
4233	getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
4234
4235	regSet.verifyRegUsed(xtraReg);
4236
4237	pXtraRegClobbered = true*;
4238
4239	psiMoveToReg(varNumDest, xtraReg);
4240
4241	/ start moving everything to its right place /
4242
4243	while (srcReg != begReg)
4244	{
4245	/ mov dest, src /
4246
4247	regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4248	regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
4249
4250	getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
4251
4252	regSet.verifyRegUsed(destRegNum);
4253
4254	/ mark 'src' as processed /
4255	noway_assert(srcReg < argMax);
4256	regArgTab[srcReg].processed = true;
4257	#ifdef _TARGET_ARM_
4258	if (size == EA_8BYTE)
4259	regArgTab[srcReg + `1`].processed = true;
4260	#endif
4261	regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4262
4263	/ move to the next pair /
4264	destReg = srcReg;
4265	srcReg = regArgTab[srcReg].trashBy;
4266
4267	varDscDest = varDscSrc;
4268	destMemType = varDscDest->TypeGet();
4269	#ifdef _TARGET_ARM_
4270	if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4271	{
4272	destMemType = TYP_FLOAT;
4273	}
4274	#endif
4275	varNumSrc = regArgTab[srcReg].varNum;
4276	noway_assert(varNumSrc < compiler->lvaCount);
4277	varDscSrc = compiler->lvaTable + varNumSrc;
4278	noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4279
4280	if (destMemType == TYP_REF)
4281	{
4282	size = EA_GCREF;
4283	}
4284	else if (destMemType == TYP_DOUBLE)
4285	{
4286	size = EA_8BYTE;
4287	}
4288	else
4289	{
4290	size = EA_4BYTE;
4291	}
4292	}
4293
4294	/ take care of the beginning register /
4295
4296	noway_assert(srcReg == begReg);
4297
4298	/ move the dest reg (begReg) in the extra reg /
4299
4300	regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4301
4302	getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
4303
4304	regSet.verifyRegUsed(destRegNum);
4305
4306	psiMoveToReg(varNumSrc);
4307
4308	/ mark the beginning register as processed /
4309
4310	regArgTab[srcReg].processed = true;
4311	#ifdef _TARGET_ARM_
4312	if (size == EA_8BYTE)
4313	regArgTab[srcReg + `1`].processed = true;
4314	#endif
4315	regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4316	}
4317	}
4318	}
4319
4320	/ Finally take care of the remaining arguments that must be enregistered /
4321	while (regArgMaskLive)
4322	{
4323	regMaskTP regArgMaskLiveSave = regArgMaskLive;
4324
4325	for (argNum = `0`; argNum < argMax; argNum++)
4326	{
4327	/ If already processed go to the next one /
4328	if (regArgTab[argNum].processed)
4329	{
4330	continue;
4331	}
4332
4333	if (regArgTab[argNum].slot == `0`)
4334	{ // Not a register argument
4335	continue;
4336	}
4337
4338	varNum = regArgTab[argNum].varNum;
4339	noway_assert(varNum < compiler->lvaCount);
4340	varDsc = compiler->lvaTable + varNum;
4341	var_types regType = regArgTab[argNum].getRegType(compiler);
4342	regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
4343
4344	#if defined(UNIX_AMD64_ABI)
4345	if (regType == TYP_UNDEF)
4346	{
4347	// This could happen if the reg in regArgTab[argNum] is of the other register file -
4348	// for System V register passed structs where the first reg is GPR and the second an XMM reg.
4349	// The next register file processing will process it.
4350	regArgMaskLive &= ~genRegMask(regNum);
4351	continue;
4352	}
4353	#endif // defined(UNIX_AMD64_ABI)
4354
4355	noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4356	#ifndef _TARGET_64BIT_
4357	#ifndef _TARGET_ARM_
4358	// Right now we think that incoming arguments are not pointer sized. When we eventually
4359	// understand the calling convention, this still won't be true. But maybe we'll have a better
4360	// idea of how to ignore it.
4361
4362	// On Arm, a long can be passed in register
4363	noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == TARGET_POINTER_SIZE);
4364	#endif
4365	#endif //_TARGET_64BIT_
4366
4367	noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
4368
4369	/ Register argument - hopefully it stays in the same register /
4370	regNumber destRegNum = REG_NA;
4371	var_types destMemType = varDsc->TypeGet();
4372
4373	if (regArgTab[argNum].slot == `1`)
4374	{
4375	destRegNum = varDsc->lvRegNum;
4376
4377	#ifdef _TARGET_ARM_
4378	if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + `1`].processed)
4379	{
4380	// The second half of the double has already been processed! Treat this as a single.
4381	destMemType = TYP_FLOAT;
4382	}
4383	#endif // _TARGET_ARM_
4384	}
4385	#ifndef _TARGET_64BIT_
4386	else if (regArgTab[argNum].slot == `2` && genActualType(destMemType) == TYP_LONG)
4387	{
4388	assert(genActualType(varDsc->TypeGet()) == TYP_LONG \|\| genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
4389	if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
4390	{
4391	destRegNum = regNum;
4392	}
4393	else
4394	{
4395	destRegNum = varDsc->lvOtherReg;
4396	}
4397
4398	assert(destRegNum != REG_STK);
4399	}
4400	else
4401	{
4402	assert(regArgTab[argNum].slot == `2`);
4403	assert(destMemType == TYP_DOUBLE);
4404
4405	// For doubles, we move the entire double using the argNum representing
4406	// the first half of the double. There are two things we won't do:
4407	// (1) move the double when the 1st half of the destination is free but the
4408	// 2nd half is occupied, and (2) move the double when the 2nd half of the
4409	// destination is free but the 1st half is occupied. Here we consider the
4410	// case where the first half can't be moved initially because its target is
4411	// still busy, but the second half can be moved. We wait until the entire
4412	// double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
4413	// and F2 single moving to F16. When we process F0, its target F2 is busy,
4414	// so we skip it on the first pass. When we process F1, its target F3 is
4415	// available. However, we want to move F0/F1 all at once, so we skip it here.
4416	// We process F2, which frees up F2. The next pass through, we process F0 and
4417	// F2/F3 are empty, so we move it. Note that if half of a double is involved
4418	// in a circularity with a single, then we will have already moved that half
4419	// above, so we go ahead and move the remaining half as a single.
4420	// Because there are no circularities left, we are guaranteed to terminate.
4421
4422	assert(argNum > `0`);
4423	assert(regArgTab[argNum - `1`].slot == `1`);
4424
4425	if (!regArgTab[argNum - `1`].processed)
4426	{
4427	// The first half of the double hasn't been processed; try to be processed at the same time
4428	continue;
4429	}
4430
4431	// The first half of the double has been processed but the second half hasn't!
4432	// This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
4433	// In that case, there is a F0/F2 loop that is not a double-only loop. The circular
4434	// dependency logic above will move them as singles, leaving just F3 to move. Treat
4435	// it as a single to finish the shuffling.
4436
4437	destMemType = TYP_FLOAT;
4438	destRegNum = REG_NEXT(varDsc->lvRegNum);
4439	}
4440	#endif // !_TARGET_64BIT_
4441	#if (defined(UNIX_AMD64_ABI) \|\| defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4442	else
4443	{
4444	assert(regArgTab[argNum].slot == `2`);
4445	assert(argNum > `0`);
4446	assert(regArgTab[argNum - `1`].slot == `1`);
4447	assert((varDsc->lvType == TYP_SIMD12) \|\| (varDsc->lvType == TYP_SIMD16));
4448	destRegNum = varDsc->lvRegNum;
4449	noway_assert(regNum != destRegNum);
4450	continue;
4451	}
4452	#endif // (defined(UNIX_AMD64_ABI) \|\| defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4453	noway_assert(destRegNum != REG_NA);
4454	if (destRegNum != regNum)
4455	{
4456	/ Cannot trash a currently live register argument.*
4457	* Skip this one until its target will be free
4458	* which is guaranteed to happen since we have no circular dependencies. */
4459
4460	regMaskTP destMask = genRegMask(destRegNum);
4461	#ifdef _TARGET_ARM_
4462	// Don't process the double until both halves of the destination are clear.
4463	if (genActualType(destMemType) == TYP_DOUBLE)
4464	{
4465	assert((destMask & RBM_DBL_REGS) != `0`);
4466	destMask \|= genRegMask(REG_NEXT(destRegNum));
4467	}
4468	#endif
4469
4470	if (destMask & regArgMaskLive)
4471	{
4472	continue;
4473	}
4474
4475	/ Move it to the new register /
4476
4477	emitAttr size = emitActualTypeSize(destMemType);
4478
4479	#if defined(_TARGET_ARM64_)
4480	if (varTypeIsSIMD(varDsc) && argNum < (argMax - `1`) && regArgTab[argNum + `1`].slot == `2`)
4481	{
4482	// For a SIMD type that is passed in two integer registers,
4483	// Limit the copy below to the first 8 bytes from the first integer register.
4484	// Handle the remaining 8 bytes from the second slot in the code further below
4485	assert(EA_SIZE(size) >= `8`);
4486	size = EA_8BYTE;
4487	}
4488	#endif
4489
4490	getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
4491
4492	psiMoveToReg(varNum);
4493	}
4494
4495	/ mark the argument as processed /
4496
4497	assert(!regArgTab[argNum].processed);
4498	regArgTab[argNum].processed = true;
4499	regArgMaskLive &= ~genRegMask(regNum);
4500	#if FEATURE_MULTIREG_ARGS
4501	int argRegCount = `1`;
4502	#ifdef _TARGET_ARM_
4503	if (genActualType(destMemType) == TYP_DOUBLE)
4504	{
4505	argRegCount = `2`;
4506	}
4507	#endif
4508	#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4509	if (varTypeIsStruct(varDsc) && argNum < (argMax - `1`) && regArgTab[argNum + `1`].slot == `2`)
4510	{
4511	argRegCount = `2`;
4512	int nextArgNum = argNum + `1`;
4513	regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4514	noway_assert(regArgTab[nextArgNum].varNum == varNum);
4515	// Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
4516	// and moves the 0th element of the src reg into the 1st element of the dest reg.
4517	getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, `0`);
4518	// Set destRegNum to regNum so that we skip the setting of the register below,
4519	// but mark argNum as processed and clear regNum from the live mask.
4520	destRegNum = regNum;
4521	}
4522	#endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4523	#if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4524	if (varTypeIsSIMD(varDsc) && argNum < (argMax - `1`) && regArgTab[argNum + `1`].slot == `2`)
4525	{
4526	// For a SIMD type that is passed in two integer registers,
4527	// Code above copies the first integer argument register into the lower 8 bytes
4528	// of the target register. Here we must handle the second 8 bytes of the slot pair by
4529	// inserting the second integer register into the upper 8 bytes of the target
4530	// SIMD floating point register.
4531	argRegCount = `2`;
4532	int nextArgNum = argNum + `1`;
4533	regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4534	noway_assert(regArgTab[nextArgNum].varNum == varNum);
4535	noway_assert(genIsValidIntReg(nextRegNum));
4536	noway_assert(genIsValidFloatReg(destRegNum));
4537	getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, `1`);
4538	}
4539	#endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4540
4541	// Mark the rest of the argument registers corresponding to this multi-reg type as
4542	// being processed and no longer live.
4543	for (int regSlot = `1`; regSlot < argRegCount; regSlot++)
4544	{
4545	int nextArgNum = argNum + regSlot;
4546	assert(!regArgTab[nextArgNum].processed);
4547	regArgTab[nextArgNum].processed = true;
4548	regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4549	regArgMaskLive &= ~genRegMask(nextRegNum);
4550	}
4551	#endif // FEATURE_MULTIREG_ARGS
4552	}
4553
4554	noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
4555	}
4556	}
4557	#ifdef _PREFAST_
4558	#pragma warning(pop)
4559	#endif
4560
4561	/*****************************************************************************
4562	* If any incoming stack arguments live in registers, load them.
4563	*/
4564	void CodeGen::genEnregisterIncomingStackArgs()
4565	{
4566	#ifdef DEBUG
4567	if (verbose)
4568	{
4569	printf("*************** In genEnregisterIncomingStackArgs()\n");
4570	}
4571	#endif
4572
4573	assert(compiler->compGeneratingProlog);
4574
4575	unsigned varNum = `0`;
4576
4577	for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4578	{
4579	/ Is this variable a parameter? /
4580
4581	if (!varDsc->lvIsParam)
4582	{
4583	continue;
4584	}
4585
4586	/ If it's a register argument then it's already been taken care of.*
4587	But, on Arm when under a profiler, we would have prespilled a register argument
4588	and hence here we need to load it from its prespilled location.
4589	*/
4590	bool isPrespilledForProfiling = false;
4591	#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4592	isPrespilledForProfiling =
4593	compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
4594	#endif
4595
4596	if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
4597	{
4598	continue;
4599	}
4600
4601	/ Has the parameter been assigned to a register? /
4602
4603	if (!varDsc->lvIsInReg())
4604	{
4605	continue;
4606	}
4607
4608	var_types type = genActualType(varDsc->TypeGet());
4609
4610	/ Is the variable dead on entry /
4611
4612	if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4613	{
4614	continue;
4615	}
4616
4617	/ Load the incoming parameter into the register /
4618
4619	/ Figure out the home offset of the incoming argument /
4620
4621	regNumber regNum = varDsc->lvArgInitReg;
4622	assert(regNum != REG_STK);
4623
4624	getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, `0`);
4625	regSet.verifyRegUsed(regNum);
4626
4627	psiMoveToReg(varNum);
4628	}
4629	}
4630
4631	/-------------------------------------------------------------------------*
4632	*
4633	* We have to decide whether we're going to use block initialization
4634	* in the prolog before we assign final stack offsets. This is because
4635	* when using block initialization we may need additional callee-saved
4636	* registers which need to be saved on the frame, thus increasing the
4637	* frame size.
4638	*
4639	* We'll count the number of locals we have to initialize,
4640	* and if there are lots of them we'll use block initialization.
4641	* Thus, the local variable table must have accurate register location
4642	* information for enregistered locals for their register state on entry
4643	* to the function.
4644	*
4645	* At the same time we set lvMustInit for locals (enregistered or on stack)
4646	* that must be initialized (e.g. initialize memory (comInitMem),
4647	* untracked pointers or disable DFA)
4648	*/
4649	void CodeGen::genCheckUseBlockInit()
4650	{
4651	assert(!compiler->compGeneratingProlog);
4652
4653	unsigned initStkLclCnt = `0`; // The number of int-sized stack local variables that need to be initialized (variables
4654	// larger than int count for more than 1).
4655	unsigned largeGcStructs = `0`; // The number of "large" structs with GC pointers. Used as part of the heuristic to
4656	// determine whether to use block init.
4657
4658	unsigned varNum;
4659	LclVarDsc* varDsc;
4660
4661	for (varNum = `0`, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4662	{
4663	if (varDsc->lvIsParam)
4664	{
4665	continue;
4666	}
4667
4668	if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
4669	{
4670	noway_assert(varDsc->lvRefCnt() == `0`);
4671	continue;
4672	}
4673
4674	if (varNum == compiler->lvaInlinedPInvokeFrameVar \|\| varNum == compiler->lvaStubArgumentVar)
4675	{
4676	continue;
4677	}
4678
4679	#if FEATURE_FIXED_OUT_ARGS
4680	if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
4681	{
4682	continue;
4683	}
4684	if (varNum == compiler->lvaOutgoingArgSpaceVar)
4685	{
4686	continue;
4687	}
4688	#endif
4689
4690	#if FEATURE_EH_FUNCLETS
4691	// There's no need to force 0-initialization of the PSPSym, it will be
4692	// initialized with a real value in the prolog
4693	if (varNum == compiler->lvaPSPSym)
4694	{
4695	continue;
4696	}
4697	#endif
4698
4699	if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4700	{
4701	// For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
4702	// initialized by the parent struct. No need to set the lvMustInit bit in the
4703	// field locals.
4704	continue;
4705	}
4706
4707	if (compiler->info.compInitMem \|\| varTypeIsGC(varDsc->TypeGet()) \|\| (varDsc->lvStructGcCount > `0`) \|\|
4708	varDsc->lvMustInit)
4709	{
4710	if (varDsc->lvTracked)
4711	{
4712	/ For uninitialized use of tracked variables, the liveness*
4713	* will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
4714	*/
4715	if (varDsc->lvMustInit \|\|
4716	VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4717	{
4718	/ This var must be initialized /
4719
4720	varDsc->lvMustInit = `1`;
4721
4722	/ See if the variable is on the stack will be initialized*
4723	* using rep stos - compute the total size to be zero-ed */
4724
4725	if (varDsc->lvOnFrame)
4726	{
4727	if (!varDsc->lvRegister)
4728	{
4729	if (!varDsc->lvIsInReg())
4730	{
4731	// Var is on the stack at entry.
4732	initStkLclCnt +=
4733	roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4734	}
4735	}
4736	else
4737	{
4738	// Var is partially enregistered
4739	noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
4740	initStkLclCnt += genTypeStSz(TYP_INT);
4741	}
4742	}
4743	}
4744	}
4745
4746	/ With compInitMem, all untracked vars will have to be init'ed /
4747	/ VSW 102460 - Do not force initialization of compiler generated temps,*
4748	unless they are untracked GC type or structs that contain GC pointers /*
4749	CLANG_FORMAT_COMMENT_ANCHOR;
4750
4751	#if FEATURE_SIMD
4752	// TODO-1stClassStructs
4753	// This is here to duplicate previous behavior, where TYP_SIMD8 locals
4754	// were not being re-typed correctly.
4755	if ((!varDsc->lvTracked \|\| (varDsc->lvType == TYP_STRUCT) \|\| (varDsc->lvType == TYP_SIMD8)) &&
4756	#else // !FEATURE_SIMD
4757	if ((!varDsc->lvTracked \|\| (varDsc->lvType == TYP_STRUCT)) &&
4758	#endif // !FEATURE_SIMD
4759	varDsc->lvOnFrame &&
4760	(!varDsc->lvIsTemp \|\| varTypeIsGC(varDsc->TypeGet()) \|\| (varDsc->lvStructGcCount > `0`)))
4761	{
4762	varDsc->lvMustInit = true;
4763
4764	initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4765	}
4766
4767	continue;
4768	}
4769
4770	/ Ignore if not a pointer variable or value class with a GC field /
4771
4772	if (!compiler->lvaTypeIsGC(varNum))
4773	{
4774	continue;
4775	}
4776
4777	/ If we don't know lifetimes of variables, must be conservative /
4778	if (!compiler->backendRequiresLocalVarLifetimes())
4779	{
4780	varDsc->lvMustInit = true;
4781	noway_assert(!varDsc->lvRegister);
4782	}
4783	else
4784	{
4785	if (!varDsc->lvTracked)
4786	{
4787	varDsc->lvMustInit = true;
4788	}
4789	}
4790
4791	/ Is this a 'must-init' stack pointer local? /
4792
4793	if (varDsc->lvMustInit && varDsc->lvOnFrame)
4794	{
4795	initStkLclCnt += varDsc->lvStructGcCount;
4796	}
4797
4798	if ((compiler->lvaLclSize(varNum) > (`3` * TARGET_POINTER_SIZE)) && (largeGcStructs <= `4`))
4799	{
4800	largeGcStructs++;
4801	}
4802	}
4803
4804	/ Don't forget about spill temps that hold pointers /
4805
4806	if (!TRACK_GC_TEMP_LIFETIMES)
4807	{
4808	assert(regSet.tmpAllFree());
4809	for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4810	{
4811	if (varTypeIsGC(tempThis->tdTempType()))
4812	{
4813	initStkLclCnt++;
4814	}
4815	}
4816	}
4817
4818	// After debugging this further it was found that this logic is incorrect:
4819	// it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
4820	// and this also double counts variables (we saw this in the debugger) around line 4829.
4821	// Even though this doesn't pose a problem with correctness it will improperly decide to
4822	// zero init the stack using a block operation instead of a 'case by case' basis.
4823	genInitStkLclCnt = initStkLclCnt;
4824
4825	/ If we have more than 4 untracked locals, use block initialization /
4826	/ TODO-Review: If we have large structs, bias toward not using block initialization since*
4827	we waste all the other slots. Really need to compute the correct
4828	and compare that against zeroing the slots individually /*
4829
4830	genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + `4`));
4831
4832	if (genUseBlockInit)
4833	{
4834	regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
4835
4836	// If there is a secret stub param, don't count it, as it will no longer
4837	// be live when we do block init.
4838	if (compiler->info.compPublishStubParam)
4839	{
4840	maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
4841	}
4842
4843	#ifdef _TARGET_XARCH_
4844	// If we're going to use "REP STOS", remember that we will trash EDI
4845	// For fastcall we will have to save ECX, EAX
4846	// so reserve two extra callee saved
4847	// This is better than pushing eax, ecx, because we in the later
4848	// we will mess up already computed offsets on the stack (for ESP frames)
4849	regSet.rsSetRegsModified(RBM_EDI);
4850
4851	#ifdef UNIX_AMD64_ABI
4852	// For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
4853	// In such case use R12 and R13 registers.
4854	if (maskCalleeRegArgMask & RBM_RCX)
4855	{
4856	regSet.rsSetRegsModified(RBM_R12);
4857	}
4858
4859	if (maskCalleeRegArgMask & RBM_RDI)
4860	{
4861	regSet.rsSetRegsModified(RBM_R13);
4862	}
4863	#else // !UNIX_AMD64_ABI
4864	if (maskCalleeRegArgMask & RBM_ECX)
4865	{
4866	regSet.rsSetRegsModified(RBM_ESI);
4867	}
4868	#endif // !UNIX_AMD64_ABI
4869
4870	if (maskCalleeRegArgMask & RBM_EAX)
4871	{
4872	regSet.rsSetRegsModified(RBM_EBX);
4873	}
4874
4875	#endif // _TARGET_XARCH_
4876	#ifdef _TARGET_ARM_
4877	//
4878	// On the Arm if we are using a block init to initialize, then we
4879	// must force spill R4/R5/R6 so that we can use them during
4880	// zero-initialization process.
4881	//
4882	int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - `1`;
4883	if (forceSpillRegCount > `0`)
4884	regSet.rsSetRegsModified(RBM_R4);
4885	if (forceSpillRegCount > `1`)
4886	regSet.rsSetRegsModified(RBM_R5);
4887	if (forceSpillRegCount > `2`)
4888	regSet.rsSetRegsModified(RBM_R6);
4889	#endif // _TARGET_ARM_
4890	}
4891	}
4892
4893	/-----------------------------------------------------------------------------*
4894	*
4895	* Push any callee-saved registers we have used
4896	*/
4897
4898	#if defined(_TARGET_ARM64_)
4899	void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
4900	#else
4901	void CodeGen::genPushCalleeSavedRegisters()
4902	#endif
4903	{
4904	assert(compiler->compGeneratingProlog);
4905
4906	#if defined(_TARGET_XARCH_)
4907	// x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
4908	// here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
4909	// here.
4910	regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
4911	#else // !defined(_TARGET_XARCH_)
4912	regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
4913	#endif
4914
4915	#if ETW_EBP_FRAMED
4916	if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
4917	{
4918	noway_assert(!"Used register RBM_FPBASE as a scratch register!");
4919	}
4920	#endif
4921
4922	#ifdef _TARGET_XARCH_
4923	// On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
4924	if (isFramePointerUsed())
4925	{
4926	rsPushRegs &= ~RBM_FPBASE;
4927	}
4928	#endif
4929
4930	#ifdef _TARGET_ARMARCH_
4931	// On ARM we push the FP (frame-pointer) here along with all other callee saved registers
4932	if (isFramePointerUsed())
4933	rsPushRegs \|= RBM_FPBASE;
4934
4935	//
4936	// It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
4937	// changes in GC suspension architecture.
4938	//
4939	// We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
4940	// generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
4941	// method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
4942	// on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
4943	// be saved on the stack and the GC suspension would time out.
4944	//
4945	// So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
4946	// the following to make GC suspension work in the above scenario:
4947	// - Make return address hijacking work even when lr is not saved on the stack.
4948	// - Generate fully interruptible code for loops that contains calls
4949	// - Generate fully interruptible code for leaf methods
4950	//
4951	// Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
4952	// is not worth it.
4953	//
4954	rsPushRegs \|= RBM_LR; // We must save the return address (in the LR register)
4955
4956	regSet.rsMaskCalleeSaved = rsPushRegs;
4957	#endif // _TARGET_ARMARCH_
4958
4959	#ifdef DEBUG
4960	if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
4961	{
4962	printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
4963	compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
4964	dspRegMask(rsPushRegs);
4965	printf("\n");
4966	assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
4967	}
4968	#endif // DEBUG
4969
4970	#if defined(_TARGET_ARM_)
4971	regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
4972	regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
4973
4974	maskPushRegsInt \|= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
4975
4976	assert(FitsIn<int>(maskPushRegsInt));
4977	inst_IV(INS_push, (int)maskPushRegsInt);
4978	compiler->unwindPushMaskInt(maskPushRegsInt);
4979
4980	if (maskPushRegsFloat != `0`)
4981	{
4982	genPushFltRegs(maskPushRegsFloat);
4983	compiler->unwindPushMaskFloat(maskPushRegsFloat);
4984	}
4985	#elif defined(_TARGET_ARM64_)
4986	// See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
4987	// options. Case numbers in comments here refer to this document.
4988	//
4989	// For most frames, generate, e.g.:
4990	// stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
4991	// // ensures stack stays aligned.
4992	// stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
4993	// // at top of frame (highest addresses).
4994	// stp r21, r22, [sp, 0x70]
4995	//
4996	// Notes:
4997	// 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
4998	// at the top of the frame.
4999	// 2. If we save FP, then the first store is FP, LR.
5000	// 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
5001	// preserve their lower 8 bytes, by calling convention.
5002	// 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
5003	// consecutive.
5004	// 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
5005
5006	int totalFrameSize = genTotalFrameSize();
5007
5008	int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
5009
5010	regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5011	regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
5012
5013	int frameType = `0`; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
5014	// generate based on various sizes.
5015	int calleeSaveSPDelta = `0`;
5016	int calleeSaveSPDeltaUnaligned = `0`;
5017
5018	if (isFramePointerUsed())
5019	{
5020	// We need to save both FP and LR.
5021
5022	assert((maskSaveRegsInt & RBM_FP) != `0`);
5023	assert((maskSaveRegsInt & RBM_LR) != `0`);
5024
5025	if ((compiler->lvaOutgoingArgSpaceSize == `0`) && (totalFrameSize < `512`))
5026	{
5027	// Case #1.
5028	//
5029	// Generate:
5030	// stp fp,lr,[sp,#-framesz]!
5031	//
5032	// The (totalFrameSize < 512) condition ensures that both the predecrement
5033	// and the postincrement of SP can occur with STP.
5034	//
5035	// After saving callee-saved registers, we establish the frame pointer with:
5036	// mov fp,sp
5037	// We do this after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.*
5038
5039	frameType = `1`;
5040
5041	getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
5042	INS_OPTS_PRE_INDEX);
5043	compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5044
5045	maskSaveRegsInt &= ~(RBM_FP \| RBM_LR); // We've already saved FP/LR
5046	offset = (int)compiler->compLclFrameSize + `2` * REGSIZE_BYTES; // 2 for FP/LR
5047	}
5048	else if (totalFrameSize <= `512`)
5049	{
5050	// Case #2.
5051	//
5052	// Generate:
5053	// sub sp,sp,#framesz
5054	// stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
5055	//
5056	// The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
5057	// signed offset encoding.
5058	//
5059	// After saving callee-saved registers, we establish the frame pointer with:
5060	// add fp,sp,#outsz
5061	// We do this after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.*
5062
5063	frameType = `2`;
5064
5065	assert(compiler->lvaOutgoingArgSpaceSize + `2` * REGSIZE_BYTES <= (unsigned)totalFrameSize);
5066
5067	getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5068	compiler->unwindAllocStack(totalFrameSize);
5069
5070	getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5071	compiler->lvaOutgoingArgSpaceSize);
5072	compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5073
5074	maskSaveRegsInt &= ~(RBM_FP \| RBM_LR); // We've already saved FP/LR
5075	offset = (int)compiler->compLclFrameSize + `2` * REGSIZE_BYTES; // 2 for FP/LR
5076	}
5077	else
5078	{
5079	// Case 5 or 6.
5080	//
5081	// First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
5082	// to subtract from SP as the first instruction. It must also leave space for varargs registers to be
5083	// stored. For example:
5084	// stp r19,r20,[sp,#-96]!
5085	// stp d8,d9,[sp,#16]
5086	// ... save varargs incoming integer registers ...
5087	// Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
5088	// lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
5089	// So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
5090	// register):
5091	// sub sp,sp,#16
5092	// str r19,[sp,#8]
5093	// This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
5094	// possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
5095	// above them. If that is preferable, we could implement it.
5096	// Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
5097	//
5098	// Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
5099	// padding from above).
5100	// Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
5101	//
5102	// Generate:
5103	// sub sp,sp,#remainingFrameSz
5104	// or, for large frames:
5105	// mov rX, #remainingFrameSz // maybe multiple instructions
5106	// sub sp,sp,rX
5107	//
5108	// followed by:
5109	// stp fp,lr,[sp,#outsz]
5110	// add fp,sp,#outsz
5111	//
5112	// However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
5113	// handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
5114	// STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
5115	// following sequences:
5116	//
5117	// Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
5118	//
5119	// sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
5120	// stp fp,lr,[sp]
5121	// mov fp,sp
5122	// sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
5123	//
5124	// Or:
5125	//
5126	// sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
5127	// // always guaranteed to be 8 byte aligned).
5128	// stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
5129	// add fp,sp,#8
5130	// sub sp,sp,#outsz - #8
5131	//
5132	// (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
5133	// mov rX, #outsz - #8 // maybe multiple instructions
5134	// sub sp,sp,rX
5135	// )
5136
5137	frameType = `3`;
5138
5139	calleeSaveSPDeltaUnaligned =
5140	totalFrameSize - compiler->compLclFrameSize - `2` * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
5141	assert(calleeSaveSPDeltaUnaligned >= `0`);
5142	assert((calleeSaveSPDeltaUnaligned % `8`) == `0`); // It better at least be 8 byte aligned.
5143	calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5144
5145	offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5146	assert((offset == `0`) \|\| (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
5147	// store the callee-saved registers.
5148
5149	// We'll take care of these later, but callee-saved regs code shouldn't see them.
5150	maskSaveRegsInt &= ~(RBM_FP \| RBM_LR);
5151	}
5152	}
5153	else
5154	{
5155	// No frame pointer (no chaining).
5156	assert((maskSaveRegsInt & RBM_FP) == `0`);
5157	assert((maskSaveRegsInt & RBM_LR) != `0`);
5158
5159	// Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
5160	// if we only have one callee-saved register plus LR to save.
5161
5162	NYI("Frame without frame pointer");
5163	offset = `0`;
5164	}
5165
5166	assert(frameType != `0`);
5167
5168	genSaveCalleeSavedRegistersHelp(maskSaveRegsInt \| maskSaveRegsFloat, offset, -calleeSaveSPDelta);
5169
5170	offset += genCountBits(maskSaveRegsInt \| maskSaveRegsFloat) * REGSIZE_BYTES;
5171
5172	// For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
5173	// so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
5174	// need to add codes at all.
5175
5176	if (compiler->info.compIsVarArgs)
5177	{
5178	// There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
5179	assert((offset % `16`) == `0`);
5180	for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
5181	{
5182	regNumber reg2 = REG_NEXT(reg1);
5183	// stp REG, REG + 1, [SP, #offset]
5184	getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
5185	compiler->unwindNop();
5186	offset += `2` * REGSIZE_BYTES;
5187	}
5188	}
5189
5190	if (frameType == `1`)
5191	{
5192	getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
5193	compiler->unwindSetFrameReg(REG_FPBASE, `0`);
5194	}
5195	else if (frameType == `2`)
5196	{
5197	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5198	compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5199	}
5200	else if (frameType == `3`)
5201	{
5202	int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5203	assert(remainingFrameSz > `0`);
5204	assert((remainingFrameSz % `16`) == `0`); // this is guaranteed to be 16-byte aligned because each component --
5205	// totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
5206
5207	if (compiler->lvaOutgoingArgSpaceSize >= `504`)
5208	{
5209	// We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
5210	// If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5211	assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5212	int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5213	int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5214	int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5215	assert((alignmentAdjustment2 == `0`) \|\| (alignmentAdjustment2 == `8`));
5216
5217	genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
5218	offset += spAdjustment2;
5219
5220	// Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
5221	// some of it)
5222
5223	int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5224	assert(spAdjustment3 > `0`);
5225	assert((spAdjustment3 % `16`) == `0`);
5226
5227	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
5228	compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5229
5230	genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
5231	offset += spAdjustment3;
5232	}
5233	else
5234	{
5235	genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
5236	pInitRegZeroed);
5237	offset += remainingFrameSz;
5238
5239	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5240	compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5241	}
5242	}
5243
5244	assert(offset == totalFrameSize);
5245
5246	#elif defined(_TARGET_XARCH_)
5247	// Push backwards so we match the order we will pop them in the epilog
5248	// and all the other code that expects it to be in this order.
5249	for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
5250	{
5251	regMaskTP regBit = genRegMask(reg);
5252
5253	if ((regBit & rsPushRegs) != `0`)
5254	{
5255	inst_RV(INS_push, reg, TYP_REF);
5256	compiler->unwindPush(reg);
5257
5258	if (!doubleAlignOrFramePointerUsed())
5259	{
5260	psiAdjustStackLevel(REGSIZE_BYTES);
5261	}
5262
5263	rsPushRegs &= ~regBit;
5264	}
5265	}
5266
5267	#else
5268	assert(!"Unknown TARGET");
5269	#endif // _TARGET_*
5270	}
5271
5272	#if defined(_TARGET_ARM_)
5273
5274	void CodeGen::genPushFltRegs(regMaskTP regMask)
5275	{
5276	assert(regMask != `0`); // Don't call uness we have some registers to push
5277	assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5278
5279	regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5280	int slots = genCountBits(regMask);
5281	// regMask should be contiguously set
5282	regMaskTP tmpMask = ((regMask >> lowReg) + `1`); // tmpMask should have a single bit set
5283	assert((tmpMask & (tmpMask - `1`)) == `0`);
5284	assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
5285
5286	// Our calling convention requires that we only use vpush for TYP_DOUBLE registers
5287	noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5288	noway_assert((slots % `2`) == `0`);
5289
5290	getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / `2`);
5291	}
5292
5293	void CodeGen::genPopFltRegs(regMaskTP regMask)
5294	{
5295	assert(regMask != `0`); // Don't call uness we have some registers to pop
5296	assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5297
5298	regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5299	int slots = genCountBits(regMask);
5300	// regMask should be contiguously set
5301	regMaskTP tmpMask = ((regMask >> lowReg) + `1`); // tmpMask should have a single bit set
5302	assert((tmpMask & (tmpMask - `1`)) == `0`);
5303
5304	// Our calling convention requires that we only use vpop for TYP_DOUBLE registers
5305	noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5306	noway_assert((slots % `2`) == `0`);
5307
5308	getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / `2`);
5309	}
5310
5311	/-----------------------------------------------------------------------------*
5312	*
5313	* If we have a jmp call, then the argument registers cannot be used in the
5314	* epilog. So return the current call's argument registers as the argument
5315	* registers for the jmp call.
5316	*/
5317	regMaskTP CodeGen::genJmpCallArgMask()
5318	{
5319	assert(compiler->compGeneratingEpilog);
5320
5321	regMaskTP argMask = RBM_NONE;
5322	for (unsigned varNum = `0`; varNum < compiler->info.compArgsCount; ++varNum)
5323	{
5324	const LclVarDsc& desc = compiler->lvaTable[varNum];
5325	if (desc.lvIsRegArg)
5326	{
5327	argMask \|= genRegMask(desc.lvArgReg);
5328	}
5329	}
5330	return argMask;
5331	}
5332
5333	/-----------------------------------------------------------------------------*
5334	*
5335	* Free the local stack frame: add to SP.
5336	* If epilog unwind hasn't been started, and we generate code, we start unwind
5337	* and set *pUnwindStarted = true.
5338	*/
5339
5340	void CodeGen::genFreeLclFrame(unsigned frameSize, / IN OUT / bool* pUnwindStarted, bool jmpEpilog)
5341	{
5342	assert(compiler->compGeneratingEpilog);
5343
5344	if (frameSize == `0`)
5345	return;
5346
5347	// Add 'frameSize' to SP.
5348	//
5349	// Unfortunately, we can't just use:
5350	//
5351	// inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
5352	//
5353	// because we need to generate proper unwind codes for each instruction generated,
5354	// and large frame sizes might generate a temp register load which might
5355	// need an unwind code. We don't want to generate a "NOP" code for this
5356	// temp register load; we want the unwind codes to start after that.
5357
5358	if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
5359	{
5360	if (!*pUnwindStarted)
5361	{
5362	compiler->unwindBegEpilog();
5363	pUnwindStarted = true*;
5364	}
5365
5366	getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
5367	}
5368	else
5369	{
5370	regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
5371	if (jmpEpilog)
5372	{
5373	// Do not use argument registers as scratch registers in the jmp epilog.
5374	grabMask &= ~genJmpCallArgMask();
5375	}
5376	regNumber tmpReg = REG_TMP_0;
5377	instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
5378	if (*pUnwindStarted)
5379	{
5380	compiler->unwindPadding();
5381	}
5382
5383	// We're going to generate an unwindable instruction, so check again if
5384	// we need to start the unwind codes.
5385
5386	if (!*pUnwindStarted)
5387	{
5388	compiler->unwindBegEpilog();
5389	pUnwindStarted = true*;
5390	}
5391
5392	getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
5393	}
5394
5395	compiler->unwindAllocStack(frameSize);
5396	}
5397
5398	/-----------------------------------------------------------------------------*
5399	*
5400	* Move of relocatable displacement value to register
5401	*/
5402	void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg)
5403	{
5404	getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg);
5405	getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg);
5406
5407	if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5408	{
5409	getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC);
5410	}
5411	}
5412
5413	/-----------------------------------------------------------------------------*
5414	*
5415	* Move of relocatable data-label to register
5416	*/
5417	void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg)
5418	{
5419	getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg);
5420	getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg);
5421
5422	if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5423	{
5424	getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC);
5425	}
5426	}
5427
5428	/-----------------------------------------------------------------------------*
5429	*
5430	* Move of relocatable immediate to register
5431	*/
5432	void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber reg)
5433	{
5434	_ASSERTE(EA_IS_RELOC(size));
5435
5436	getEmitter()->emitIns_MovRelocatableImmediate(INS_movw, size, reg, addr);
5437	getEmitter()->emitIns_MovRelocatableImmediate(INS_movt, size, reg, addr);
5438
5439	if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5440	{
5441	getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC);
5442	}
5443	}
5444
5445	/-----------------------------------------------------------------------------*
5446	*
5447	* Returns register mask to push/pop to allocate a small stack frame,
5448	* instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
5449	* is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
5450	*/
5451	regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
5452	{
5453	assert(compiler->compGeneratingProlog \|\| compiler->compGeneratingEpilog);
5454
5455	// We can't do this optimization with callee saved floating point registers because
5456	// the stack would be allocated in a wrong spot.
5457	if (maskCalleeSavedFloat != RBM_NONE)
5458	return RBM_NONE;
5459
5460	// Allocate space for small frames by pushing extra registers. It generates smaller and faster code
5461	// that extra sub sp,XXX/add sp,XXX.
5462	// R0 and R1 may be used by return value. Keep things simple and just skip the optimization
5463	// for the 3REGSIZE_BYTES and 4REGSIZE_BYTES cases. They are less common and they have more
5464	// significant negative side-effects (more memory bus traffic).
5465	switch (frameSize)
5466	{
5467	case REGSIZE_BYTES:
5468	return RBM_R3;
5469	case `2` * REGSIZE_BYTES:
5470	return RBM_R2 \| RBM_R3;
5471	default:
5472	return RBM_NONE;
5473	}
5474	}
5475
5476	#endif // _TARGET_ARM_
5477
5478	/*****************************************************************************
5479	*
5480	* initFltRegs -- The mask of float regs to be zeroed.
5481	* initDblRegs -- The mask of double regs to be zeroed.
5482	* initReg -- A zero initialized integer reg to copy from.
5483	*
5484	* Does best effort to move between VFP/xmm regs if one is already
5485	* initialized to 0. (Arm Only) Else copies from the integer register which
5486	* is slower.
5487	*/
5488	void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
5489	{
5490	assert(compiler->compGeneratingProlog);
5491
5492	// The first float/double reg that is initialized to 0. So they can be used to
5493	// initialize the remaining registers.
5494	regNumber fltInitReg = REG_NA;
5495	regNumber dblInitReg = REG_NA;
5496
5497	// Iterate through float/double registers and initialize them to 0 or
5498	// copy from already initialized register of the same type.
5499	regMaskTP regMask = genRegMask(REG_FP_FIRST);
5500	for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= `1`)
5501	{
5502	if (regMask & initFltRegs)
5503	{
5504	// Do we have a float register already set to 0?
5505	if (fltInitReg != REG_NA)
5506	{
5507	// Copy from float.
5508	inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
5509	}
5510	else
5511	{
5512	#ifdef _TARGET_ARM_
5513	// Do we have a double register initialized to 0?
5514	if (dblInitReg != REG_NA)
5515	{
5516	// Copy from double.
5517	inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
5518	}
5519	else
5520	{
5521	// Copy from int.
5522	inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
5523	}
5524	#elif defined(_TARGET_XARCH_)
5525	// XORPS is the fastest and smallest way to initialize a XMM register to zero.
5526	inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5527	dblInitReg = reg;
5528	#elif defined(_TARGET_ARM64_)
5529	// We will just zero out the entire vector register. This sets it to a double/float zero value
5530	getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, `0x00`, INS_OPTS_16B);
5531	#else // _TARGET_*
5532	#error Unsupported or unset target architecture
5533	#endif
5534	fltInitReg = reg;
5535	}
5536	}
5537	else if (regMask & initDblRegs)
5538	{
5539	// Do we have a double register already set to 0?
5540	if (dblInitReg != REG_NA)
5541	{
5542	// Copy from double.
5543	inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
5544	}
5545	else
5546	{
5547	#ifdef _TARGET_ARM_
5548	// Do we have a float register initialized to 0?
5549	if (fltInitReg != REG_NA)
5550	{
5551	// Copy from float.
5552	inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
5553	}
5554	else
5555	{
5556	// Copy from int.
5557	inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
5558	}
5559	#elif defined(_TARGET_XARCH_)
5560	// XORPS is the fastest and smallest way to initialize a XMM register to zero.
5561	inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5562	fltInitReg = reg;
5563	#elif defined(_TARGET_ARM64_)
5564	// We will just zero out the entire vector register. This sets it to a double/float zero value
5565	getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, `0x00`, INS_OPTS_16B);
5566	#else // _TARGET_*
5567	#error Unsupported or unset target architecture
5568	#endif
5569	dblInitReg = reg;
5570	}
5571	}
5572	}
5573	}
5574
5575	/-----------------------------------------------------------------------------*
5576	*
5577	* Restore any callee-saved registers we have used
5578	*/
5579
5580	#if defined(_TARGET_ARM_)
5581
5582	bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
5583	{
5584	assert(compiler->compGeneratingEpilog);
5585
5586	if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
5587	return true;
5588	else
5589	return false;
5590	}
5591
5592	void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5593	{
5594	assert(compiler->compGeneratingEpilog);
5595
5596	regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5597	regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
5598	regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
5599
5600	// First, pop float registers
5601
5602	if (maskPopRegsFloat != RBM_NONE)
5603	{
5604	genPopFltRegs(maskPopRegsFloat);
5605	compiler->unwindPopMaskFloat(maskPopRegsFloat);
5606	}
5607
5608	// Next, pop integer registers
5609
5610	if (!jmpEpilog)
5611	{
5612	regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
5613	maskPopRegsInt \|= maskStackAlloc;
5614	}
5615
5616	if (isFramePointerUsed())
5617	{
5618	assert(!regSet.rsRegsModified(RBM_FPBASE));
5619	maskPopRegsInt \|= RBM_FPBASE;
5620	}
5621
5622	if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
5623	{
5624	maskPopRegsInt \|= RBM_PC;
5625	// Record the fact that we use a pop to the PC to perform the return
5626	genUsedPopToReturn = true;
5627	}
5628	else
5629	{
5630	maskPopRegsInt \|= RBM_LR;
5631	// Record the fact that we did not use a pop to the PC to perform the return
5632	genUsedPopToReturn = false;
5633	}
5634
5635	assert(FitsIn<int>(maskPopRegsInt));
5636	inst_IV(INS_pop, (int)maskPopRegsInt);
5637	compiler->unwindPopMaskInt(maskPopRegsInt);
5638	}
5639
5640	#elif defined(_TARGET_ARM64_)
5641
5642	void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
5643	{
5644	assert(compiler->compGeneratingEpilog);
5645
5646	regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5647
5648	if (isFramePointerUsed())
5649	{
5650	rsRestoreRegs \|= RBM_FPBASE;
5651	}
5652
5653	rsRestoreRegs \|= RBM_LR; // We must save/restore the return address (in the LR register)
5654
5655	regMaskTP regsToRestoreMask = rsRestoreRegs;
5656
5657	int totalFrameSize = genTotalFrameSize();
5658
5659	int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
5660	// order.
5661	int frameType = `0`; // An indicator of what type of frame we are popping.
5662	int calleeSaveSPDelta = `0`;
5663	int calleeSaveSPDeltaUnaligned = `0`;
5664
5665	if (isFramePointerUsed())
5666	{
5667	if ((compiler->lvaOutgoingArgSpaceSize == `0`) && (totalFrameSize < `512`))
5668	{
5669	frameType = `1`;
5670	if (compiler->compLocallocUsed)
5671	{
5672	// Restore sp from fp
5673	// mov sp, fp
5674	inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
5675	compiler->unwindSetFrameReg(REG_FPBASE, `0`);
5676	}
5677
5678	regsToRestoreMask &= ~(RBM_FP \| RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5679
5680	// Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5681	// of stack.
5682	calleeSaveSPOffset = compiler->compLclFrameSize + `2` * REGSIZE_BYTES;
5683	}
5684	else if (totalFrameSize <= `512`)
5685	{
5686	frameType = `2`;
5687	if (compiler->compLocallocUsed)
5688	{
5689	// Restore sp from fp
5690	// sub sp, fp, #outsz
5691	getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5692	compiler->lvaOutgoingArgSpaceSize);
5693	compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5694	}
5695
5696	regsToRestoreMask &= ~(RBM_FP \| RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5697
5698	// Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5699	// of stack.
5700	calleeSaveSPOffset = compiler->compLclFrameSize + `2` * REGSIZE_BYTES;
5701	}
5702	else
5703	{
5704	frameType = `3`;
5705
5706	calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
5707	`2` * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
5708	assert(calleeSaveSPDeltaUnaligned >= `0`);
5709	assert((calleeSaveSPDeltaUnaligned % `8`) == `0`); // It better at least be 8 byte aligned.
5710	calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5711
5712	regsToRestoreMask &= ~(RBM_FP \| RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
5713
5714	int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5715	assert(remainingFrameSz > `0`);
5716
5717	if (compiler->lvaOutgoingArgSpaceSize >= `504`)
5718	{
5719	// We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
5720	// If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5721	assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5722	int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5723	int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5724	int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5725	assert((alignmentAdjustment2 == `0`) \|\| (alignmentAdjustment2 == REGSIZE_BYTES));
5726
5727	if (compiler->compLocallocUsed)
5728	{
5729	// Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
5730	// prolog.
5731	// sub sp, fp, #alignmentAdjustment2
5732	getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
5733	compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5734	}
5735	else
5736	{
5737	// Generate:
5738	// add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
5739	// ; careful
5740	int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5741	assert(spAdjustment3 > `0`);
5742	assert((spAdjustment3 % `16`) == `0`);
5743	genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
5744	}
5745
5746	// Generate:
5747	// ldp fp,lr,[sp]
5748	// add sp,sp,#remainingFrameSz
5749	genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP1, nullptr);
5750	}
5751	else
5752	{
5753	if (compiler->compLocallocUsed)
5754	{
5755	// Restore sp from fp
5756	// sub sp, fp, #outsz
5757	getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5758	compiler->lvaOutgoingArgSpaceSize);
5759	compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5760	}
5761
5762	// Generate:
5763	// ldp fp,lr,[sp,#outsz]
5764	// add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
5765	// ; it's large
5766
5767	genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP1,
5768	nullptr);
5769	}
5770
5771	// Unlike frameType=1 or frameType=2 that restore SP at the end,
5772	// frameType=3 already adjusted SP above to delete local frame.
5773	// There is at most one alignment slot between SP and where we store the callee-saved registers.
5774	calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5775	assert((calleeSaveSPOffset == `0`) \|\| (calleeSaveSPOffset == REGSIZE_BYTES));
5776	}
5777	}
5778	else
5779	{
5780	// No frame pointer (no chaining).
5781	NYI("Frame without frame pointer");
5782	calleeSaveSPOffset = `0`;
5783	}
5784
5785	genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
5786
5787	if (frameType == `1`)
5788	{
5789	// Generate:
5790	// ldp fp,lr,[sp],#framesz
5791
5792	getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
5793	INS_OPTS_POST_INDEX);
5794	compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5795	}
5796	else if (frameType == `2`)
5797	{
5798	// Generate:
5799	// ldr fp,lr,[sp,#outsz]
5800	// add sp,sp,#framesz
5801
5802	getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5803	compiler->lvaOutgoingArgSpaceSize);
5804	compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5805
5806	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5807	compiler->unwindAllocStack(totalFrameSize);
5808	}
5809	else if (frameType == `3`)
5810	{
5811	// Nothing to do after restoring callee-saved registers.
5812	}
5813	else
5814	{
5815	unreached();
5816	}
5817	}
5818
5819	#elif defined(_TARGET_XARCH_)
5820
5821	void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5822	{
5823	assert(compiler->compGeneratingEpilog);
5824
5825	unsigned popCount = `0`;
5826	if (regSet.rsRegsModified(RBM_EBX))
5827	{
5828	popCount++;
5829	inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5830	}
5831	if (regSet.rsRegsModified(RBM_FPBASE))
5832	{
5833	// EBP cannot be directly modified for EBP frame and double-aligned frames
5834	assert(!doubleAlignOrFramePointerUsed());
5835
5836	popCount++;
5837	inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5838	}
5839
5840	#ifndef UNIX_AMD64_ABI
5841	// For System V AMD64 calling convention ESI and EDI are volatile registers.
5842	if (regSet.rsRegsModified(RBM_ESI))
5843	{
5844	popCount++;
5845	inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5846	}
5847	if (regSet.rsRegsModified(RBM_EDI))
5848	{
5849	popCount++;
5850	inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5851	}
5852	#endif // !defined(UNIX_AMD64_ABI)
5853
5854	#ifdef _TARGET_AMD64_
5855	if (regSet.rsRegsModified(RBM_R12))
5856	{
5857	popCount++;
5858	inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
5859	}
5860	if (regSet.rsRegsModified(RBM_R13))
5861	{
5862	popCount++;
5863	inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
5864	}
5865	if (regSet.rsRegsModified(RBM_R14))
5866	{
5867	popCount++;
5868	inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
5869	}
5870	if (regSet.rsRegsModified(RBM_R15))
5871	{
5872	popCount++;
5873	inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
5874	}
5875	#endif // _TARGET_AMD64_
5876
5877	// Amd64/x86 doesn't support push/pop of xmm registers.
5878	// These will get saved to stack separately after allocating
5879	// space on stack in prolog sequence. PopCount is essentially
5880	// tracking the count of integer registers pushed.
5881
5882	noway_assert(compiler->compCalleeRegsPushed == popCount);
5883	}
5884
5885	#elif defined(_TARGET_X86_)
5886
5887	void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5888	{
5889	assert(compiler->compGeneratingEpilog);
5890
5891	unsigned popCount = `0`;
5892
5893	/ NOTE: The EBP-less frame code below depends on the fact that*
5894	all of the pops are generated right at the start and
5895	each takes one byte of machine code.
5896	*/
5897
5898	if (regSet.rsRegsModified(RBM_FPBASE))
5899	{
5900	// EBP cannot be directly modified for EBP frame and double-aligned frames
5901	noway_assert(!doubleAlignOrFramePointerUsed());
5902
5903	inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5904	popCount++;
5905	}
5906	if (regSet.rsRegsModified(RBM_EBX))
5907	{
5908	popCount++;
5909	inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5910	}
5911	if (regSet.rsRegsModified(RBM_ESI))
5912	{
5913	popCount++;
5914	inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5915	}
5916	if (regSet.rsRegsModified(RBM_EDI))
5917	{
5918	popCount++;
5919	inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5920	}
5921	noway_assert(compiler->compCalleeRegsPushed == popCount);
5922	}
5923
5924	#endif // _TARGET_*
5925
5926	// We need a register with value zero. Zero the initReg, if necessary, and set pInitRegZeroed if so.*
5927	// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
5928	regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
5929	{
5930	#ifdef _TARGET_ARM64_
5931	return REG_ZR;
5932	#else // !_TARGET_ARM64_
5933	if (pInitRegZeroed == false*)
5934	{
5935	instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
5936	pInitRegZeroed = true*;
5937	}
5938	return initReg;
5939	#endif // !_TARGET_ARM64_
5940	}
5941
5942	/-----------------------------------------------------------------------------*
5943	*
5944	* Do we have any untracked pointer locals at all,
5945	* or do we need to initialize memory for locspace?
5946	*
5947	* untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
5948	* initializing memory (not inclusive).
5949	* untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
5950	* initializing memory.
5951	* initReg - A scratch register (that gets set to zero on some platforms).
5952	* pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
5953	*/
5954	void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
5955	{
5956	assert(compiler->compGeneratingProlog);
5957
5958	if (genUseBlockInit)
5959	{
5960	assert(untrLclHi > untrLclLo);
5961	#ifdef _TARGET_ARMARCH_
5962	/*
5963	Generate the following code:
5964
5965	For cnt less than 10
5966
5967	mov rZero1, 0
5968	mov rZero2, 0
5969	mov rCnt, <cnt>
5970	stm <rZero1,rZero2>,[rAddr!]
5971	<optional> stm <rZero1,rZero2>,[rAddr!]
5972	<optional> stm <rZero1,rZero2>,[rAddr!]
5973	<optional> stm <rZero1,rZero2>,[rAddr!]
5974	<optional> str rZero1,[rAddr]
5975
5976	For rCnt greater than or equal to 10
5977
5978	mov rZero1, 0
5979	mov rZero2, 0
5980	mov rCnt, <cnt/2>
5981	sub rAddr, sp, OFFS
5982
5983	loop:
5984	stm <rZero1,rZero2>,[rAddr!]
5985	sub rCnt,rCnt,1
5986	jnz loop
5987
5988	<optional> str rZero1,[rAddr] // When cnt is odd
5989
5990	NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
5991	*/
5992
5993	regNumber rAddr;
5994	regNumber rCnt = REG_NA; // Invalid
5995	regMaskTP regMask;
5996
5997	regMaskTP availMask = regSet.rsGetModifiedRegsMask() \| RBM_INT_CALLEE_TRASH; // Set of available registers
5998	availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
5999	// currently live
6000	availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
6001	// a large constant.
6002
6003	#if defined(_TARGET_ARM_)
6004
6005	if (compiler->compLocallocUsed)
6006	{
6007	availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
6008	}
6009
6010	regNumber rZero1; // We're going to use initReg for rZero1
6011	regNumber rZero2;
6012
6013	// We pick the next lowest register number for rZero2
6014	noway_assert(availMask != RBM_NONE);
6015	regMask = genFindLowestBit(availMask);
6016	rZero2 = genRegNumFromMask(regMask);
6017	availMask &= ~regMask;
6018	assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6019	`0`); // rZero2 is not a live incoming argument reg
6020
6021	// We pick the next lowest register number for rAddr
6022	noway_assert(availMask != RBM_NONE);
6023	regMask = genFindLowestBit(availMask);
6024	rAddr = genRegNumFromMask(regMask);
6025	availMask &= ~regMask;
6026
6027	#else // !define(_TARGET_ARM_)
6028
6029	regNumber rZero1 = REG_ZR;
6030	rAddr = initReg;
6031	pInitRegZeroed = false*;
6032
6033	#endif // !defined(_TARGET_ARM_)
6034
6035	bool useLoop = false;
6036	unsigned uCntBytes = untrLclHi - untrLclLo;
6037	assert((uCntBytes % sizeof(int)) == `0`); // The smallest stack slot is always 4 bytes.
6038	unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
6039
6040	// When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
6041	// When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
6042	// In both of these cases the stm/stp instruction will write two zeros to memory
6043	// and we will use a single str instruction at the end whenever we have an odd count.
6044	if (uCntSlots >= `10`)
6045	useLoop = true;
6046
6047	if (useLoop)
6048	{
6049	// We pick the next lowest register number for rCnt
6050	noway_assert(availMask != RBM_NONE);
6051	regMask = genFindLowestBit(availMask);
6052	rCnt = genRegNumFromMask(regMask);
6053	availMask &= ~regMask;
6054	}
6055
6056	assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6057	`0`); // rAddr is not a live incoming argument reg
6058	#if defined(_TARGET_ARM_)
6059	if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
6060	#else // !_TARGET_ARM_
6061	if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
6062	#endif // !_TARGET_ARM_
6063	{
6064	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
6065	}
6066	else
6067	{
6068	// Load immediate into the InitReg register
6069	instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
6070	getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
6071	pInitRegZeroed = false*;
6072	}
6073
6074	if (useLoop)
6075	{
6076	noway_assert(uCntSlots >= `2`);
6077	assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6078	`0`); // rCnt is not a live incoming argument reg
6079	instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / `2`);
6080	}
6081
6082	#if defined(_TARGET_ARM_)
6083	rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
6084	instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
6085	target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) \| genRegMask(rZero2));
6086	#endif // _TARGET_ARM_
6087
6088	if (!useLoop)
6089	{
6090	while (uCntBytes >= REGSIZE_BYTES * `2`)
6091	{
6092	#ifdef _TARGET_ARM_
6093	getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
6094	#else // !_TARGET_ARM_
6095	getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, `2` * REGSIZE_BYTES,
6096	INS_OPTS_POST_INDEX);
6097	#endif // !_TARGET_ARM_
6098	uCntBytes -= REGSIZE_BYTES * `2`;
6099	}
6100	}
6101	else // useLoop is true
6102	{
6103	#ifdef _TARGET_ARM_
6104	getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
6105	getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, `1`, INS_FLAGS_SET);
6106	#else // !_TARGET_ARM_
6107	getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, `2` * REGSIZE_BYTES,
6108	INS_OPTS_POST_INDEX); // zero stack slots
6109	getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, `1`);
6110	#endif // !_TARGET_ARM_
6111	getEmitter()->emitIns_J(INS_bhi, NULL, -`3`);
6112	uCntBytes %= REGSIZE_BYTES * `2`;
6113	}
6114
6115	if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
6116	{
6117	#ifdef _TARGET_ARM_
6118	getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, `0`);
6119	#else // _TARGET_ARM_
6120	if ((uCntBytes - REGSIZE_BYTES) == `0`)
6121	{
6122	getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, `0`);
6123	}
6124	else
6125	{
6126	getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
6127	}
6128	#endif // !_TARGET_ARM_
6129	uCntBytes -= REGSIZE_BYTES;
6130	}
6131	#ifdef _TARGET_ARM64_
6132	if (uCntBytes > `0`)
6133	{
6134	assert(uCntBytes == sizeof(int));
6135	getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, `0`);
6136	uCntBytes -= sizeof(int);
6137	}
6138	#endif // _TARGET_ARM64_
6139	noway_assert(uCntBytes == `0`);
6140
6141	#elif defined(_TARGET_XARCH_)
6142	/*
6143	Generate the following code:
6144
6145	lea edi, [ebp/esp-OFFS]
6146	mov ecx, <size>
6147	xor eax, eax
6148	rep stosd
6149	*/
6150
6151	noway_assert(regSet.rsRegsModified(RBM_EDI));
6152
6153	#ifdef UNIX_AMD64_ABI
6154	// For register arguments we may have to save ECX and RDI on Amd64 System V OSes
6155	if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6156	{
6157	noway_assert(regSet.rsRegsModified(RBM_R12));
6158	inst_RV_RV(INS_mov, REG_R12, REG_RCX);
6159	regSet.verifyRegUsed(REG_R12);
6160	}
6161
6162	if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6163	{
6164	noway_assert(regSet.rsRegsModified(RBM_R13));
6165	inst_RV_RV(INS_mov, REG_R13, REG_RDI);
6166	regSet.verifyRegUsed(REG_R13);
6167	}
6168	#else // !UNIX_AMD64_ABI
6169	// For register arguments we may have to save ECX
6170	if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6171	{
6172	noway_assert(regSet.rsRegsModified(RBM_ESI));
6173	inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
6174	regSet.verifyRegUsed(REG_ESI);
6175	}
6176	#endif // !UNIX_AMD64_ABI
6177
6178	noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == `0`);
6179
6180	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
6181	regSet.verifyRegUsed(REG_EDI);
6182
6183	inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
6184	instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
6185	instGen(INS_r_stosd);
6186
6187	#ifdef UNIX_AMD64_ABI
6188	// Move back the argument registers
6189	if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6190	{
6191	inst_RV_RV(INS_mov, REG_RCX, REG_R12);
6192	}
6193
6194	if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6195	{
6196	inst_RV_RV(INS_mov, REG_RDI, REG_R13);
6197	}
6198	#else // !UNIX_AMD64_ABI
6199	// Move back the argument registers
6200	if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6201	{
6202	inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
6203	}
6204	#endif // !UNIX_AMD64_ABI
6205
6206	#else // _TARGET_*
6207	#error Unsupported or unset target architecture
6208	#endif // _TARGET_*
6209	}
6210	else if (genInitStkLclCnt > `0`)
6211	{
6212	assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6213	`0`); // initReg is not a live incoming argument reg
6214
6215	/ Initialize any lvMustInit vars on the stack /
6216
6217	LclVarDsc* varDsc;
6218	unsigned varNum;
6219
6220	for (varNum = `0`, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
6221	{
6222	if (!varDsc->lvMustInit)
6223	{
6224	continue;
6225	}
6226
6227	// TODO-Review: I'm not sure that we're correctly handling the mustInit case for
6228	// partially-enregistered vars in the case where we don't use a block init.
6229	noway_assert(varDsc->lvIsInReg() \|\| varDsc->lvOnFrame);
6230
6231	// lvMustInit can only be set for GC types or TYP_STRUCT types
6232	// or when compInitMem is true
6233	// or when in debug code
6234
6235	noway_assert(varTypeIsGC(varDsc->TypeGet()) \|\| (varDsc->TypeGet() == TYP_STRUCT) \|\|
6236	compiler->info.compInitMem \|\| compiler->opts.compDbgCode);
6237
6238	if (!varDsc->lvOnFrame)
6239	{
6240	continue;
6241	}
6242
6243	if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
6244	(varDsc->lvExactSize >= TARGET_POINTER_SIZE))
6245	{
6246	// We only initialize the GC variables in the TYP_STRUCT
6247	const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
6248	const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
6249
6250	for (unsigned i = `0`; i < slots; i++)
6251	{
6252	if (gcPtrs[i] != TYPE_GC_NONE)
6253	{
6254	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
6255	genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
6256	}
6257	}
6258	}
6259	else
6260	{
6261	regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
6262
6263	// zero out the whole thing rounded up to a single stack slot size
6264	unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int));
6265	unsigned i;
6266	for (i = `0`; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
6267	{
6268	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
6269	}
6270
6271	#ifdef _TARGET_64BIT_
6272	assert(i == lclSize \|\| (i + sizeof(int) == lclSize));
6273	if (i != lclSize)
6274	{
6275	getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
6276	i += sizeof(int);
6277	}
6278	#endif // _TARGET_64BIT_
6279	assert(i == lclSize);
6280	}
6281	}
6282
6283	if (!TRACK_GC_TEMP_LIFETIMES)
6284	{
6285	assert(regSet.tmpAllFree());
6286	for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
6287	{
6288	if (!varTypeIsGC(tempThis->tdTempType()))
6289	{
6290	continue;
6291	}
6292
6293	// printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
6294
6295	inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, `0`, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
6296	}
6297	}
6298	}
6299	}
6300
6301	/-----------------------------------------------------------------------------*
6302	*
6303	* Save the generic context argument.
6304	*
6305	* We need to do this within the "prolog" in case anyone tries to inspect
6306	* the param-type-arg/this (which can be done after the prolog) using
6307	* ICodeManager::GetParamTypeArg().
6308	*/
6309
6310	void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
6311	{
6312	assert(compiler->compGeneratingProlog);
6313
6314	bool reportArg = compiler->lvaReportParamTypeArg();
6315
6316	// We should report either generic context arg or "this" when used so.
6317	if (!reportArg)
6318	{
6319	#ifndef JIT32_GCENCODER
6320	if (!compiler->lvaKeepAliveAndReportThis())
6321	#endif
6322	{
6323	return;
6324	}
6325	}
6326
6327	// For JIT32_GCENCODER, we won't be here if reportArg is false.
6328	unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
6329
6330	noway_assert(contextArg != BAD_VAR_NUM);
6331	LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
6332
6333	// We are still in the prolog and compiler->info.compTypeCtxtArg has not been
6334	// moved to its final home location. So we need to use it from the
6335	// incoming location.
6336
6337	regNumber reg;
6338
6339	bool isPrespilledForProfiling = false;
6340	#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
6341	isPrespilledForProfiling =
6342	compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
6343	#endif
6344
6345	// Load from the argument register only if it is not prespilled.
6346	if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
6347	{
6348	reg = varDsc->lvArgReg;
6349	}
6350	else
6351	{
6352	if (isFramePointerUsed())
6353	{
6354	#if defined(_TARGET_ARM_)
6355	// lvStkOffs is always valid for incoming stack-arguments, even if the argument
6356	// will become enregistered.
6357	// On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2REGSIZE_BYTES*
6358	noway_assert((`2` * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
6359	(size_t(varDsc->lvStkOffs) < compiler->compArgSize + `2` * REGSIZE_BYTES));
6360	#else
6361	// lvStkOffs is always valid for incoming stack-arguments, even if the argument
6362	// will become enregistered.
6363	noway_assert((`0` < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
6364	#endif
6365	}
6366
6367	// We will just use the initReg since it is an available register
6368	// and we are probably done using it anyway...
6369	reg = initReg;
6370	pInitRegZeroed = false*;
6371
6372	// mov reg, [compiler->info.compTypeCtxtArg]
6373	getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
6374	regSet.verifyRegUsed(reg);
6375	}
6376
6377	#if CPU_LOAD_STORE_ARCH
6378	getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6379	compiler->lvaCachedGenericContextArgOffset());
6380	#else // CPU_LOAD_STORE_ARCH
6381	// mov [ebp-lvaCachedGenericContextArgOffset()], reg
6382	getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6383	compiler->lvaCachedGenericContextArgOffset());
6384	#endif // !CPU_LOAD_STORE_ARCH
6385	}
6386
6387	/-----------------------------------------------------------------------------*
6388	*
6389	* Set the "GS" security cookie in the prolog.
6390	*/
6391
6392	void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
6393	{
6394	assert(compiler->compGeneratingProlog);
6395
6396	if (!compiler->getNeedsGSSecurityCookie())
6397	{
6398	return;
6399	}
6400
6401	noway_assert(compiler->gsGlobalSecurityCookieAddr \|\| compiler->gsGlobalSecurityCookieVal);
6402
6403	if (compiler->gsGlobalSecurityCookieAddr == nullptr)
6404	{
6405	#ifdef _TARGET_AMD64_
6406	// eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
6407	getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
6408	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, `0`);
6409	#else
6410	// mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
6411	instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
6412	compiler->lvaGSSecurityCookie, `0`, initReg);
6413	#endif
6414	}
6415	else
6416	{
6417	regNumber reg;
6418	#ifdef _TARGET_XARCH_
6419	// Always use EAX on x86 and x64
6420	// On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
6421	reg = REG_EAX;
6422	#else
6423	// We will just use the initReg since it is an available register
6424	reg = initReg;
6425	#endif
6426
6427	pInitRegZeroed = false*;
6428
6429	#if CPU_LOAD_STORE_ARCH
6430	instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6431	getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, `0`);
6432	regSet.verifyRegUsed(reg);
6433	#else
6434	// mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
6435	// mov dword ptr [frame.GSSecurityCookie], reg
6436	getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6437	regSet.verifyRegUsed(reg);
6438	#endif
6439	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, `0`);
6440	}
6441	}
6442
6443	#ifdef PROFILING_SUPPORTED
6444
6445	//-----------------------------------------------------------------------------------
6446	// genProfilingEnterCallback: Generate the profiling function enter callback.
6447	//
6448	// Arguments:
6449	// initReg - register to use as scratch register
6450	// pInitRegZeroed - OUT parameter. pInitRegZeroed set to 'false' if 'initReg' is*
6451	// not zero after this call.
6452	//
6453	// Return Value:
6454	// None
6455	//
6456	// Notes:
6457	// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
6458	// VM\i386\asmhelpers.asm for details):
6459	// 1. The calling sequence for calling the helper is:
6460	// push FunctionIDOrClientID
6461	// call ProfileEnterHelper
6462	// 2. The calling function has an EBP frame.
6463	// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6464	// the following prolog is assumed:
6465	// push ESP
6466	// mov EBP, ESP
6467	// 4. All registers are preserved.
6468	// 5. The helper pops the FunctionIDOrClientID argument from the stack.
6469	//
6470	void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
6471	{
6472	assert(compiler->compGeneratingProlog);
6473
6474	// Give profiler a chance to back out of hooking this method
6475	if (!compiler->compIsProfilerHookNeeded())
6476	{
6477	return;
6478	}
6479
6480	#if defined(_TARGET_AMD64_)
6481	#if !defined(UNIX_AMD64_ABI)
6482
6483	unsigned varNum;
6484	LclVarDsc* varDsc;
6485
6486	// Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6487	noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6488	noway_assert(compiler->lvaOutgoingArgSpaceSize >= (`4` * REGSIZE_BYTES));
6489
6490	// Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
6491	// In case of vararg methods, arg regs are already homed.
6492	//
6493	// Note: Here we don't need to worry about updating gc'info since enter
6494	// callback is generated as part of prolog which is non-gc interruptible.
6495	// Moreover GC cannot kick while executing inside profiler callback which is a
6496	// profiler requirement so it can examine arguments which could be obj refs.
6497	if (!compiler->info.compIsVarArgs)
6498	{
6499	for (varNum = `0`, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6500	{
6501	noway_assert(varDsc->lvIsParam);
6502
6503	if (!varDsc->lvIsRegArg)
6504	{
6505	continue;
6506	}
6507
6508	var_types storeType = varDsc->lvaArgType();
6509	regNumber argReg = varDsc->lvArgReg;
6510
6511	instruction store_ins = ins_Store(storeType);
6512
6513	#ifdef FEATURE_SIMD
6514	if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
6515	{
6516	store_ins = INS_mov;
6517	}
6518	#endif // FEATURE_SIMD
6519
6520	getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, `0`);
6521	}
6522	}
6523
6524	// Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6525	// RCX = ProfilerMethHnd
6526	if (compiler->compProfilerMethHndIndirected)
6527	{
6528	// Profiler hooks enabled during Ngen time.
6529	// Profiler handle needs to be accessed through an indirection of a pointer.
6530	getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6531	}
6532	else
6533	{
6534	// No need to record relocations, if we are generating ELT hooks under the influence
6535	// of COMPlus_JitELTHookEnabled=1
6536	if (compiler->opts.compJitELTHookEnabled)
6537	{
6538	genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6539	}
6540	else
6541	{
6542	instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6543	}
6544	}
6545
6546	// RDX = caller's SP
6547	// Notes
6548	// 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6549	// 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6550	// of that offset to FramePointer to obtain caller's SP value.
6551	assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6552	int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(`0`, isFramePointerUsed());
6553	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6554
6555	// Can't have a call until we have enough padding for rejit
6556	genPrologPadForReJit();
6557
6558	// This will emit either
6559	// "call ip-relative 32-bit offset" or
6560	// "mov rax, helper addr; call rax"
6561	genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, `0`, EA_UNKNOWN);
6562
6563	// TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
6564	// generation logic that moves args around as required by first BB entry point conditions
6565	// computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
6566	// and genEnregisterIncomingStackArgs().
6567	//
6568	// Now reload arg registers from home locations.
6569	// Vararg methods:
6570	// - we need to reload only known (i.e. fixed) reg args.
6571	// - if floating point type, also reload it into corresponding integer reg
6572	for (varNum = `0`, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6573	{
6574	noway_assert(varDsc->lvIsParam);
6575
6576	if (!varDsc->lvIsRegArg)
6577	{
6578	continue;
6579	}
6580
6581	var_types loadType = varDsc->lvaArgType();
6582	regNumber argReg = varDsc->lvArgReg;
6583
6584	instruction load_ins = ins_Load(loadType);
6585
6586	#ifdef FEATURE_SIMD
6587	if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
6588	{
6589	load_ins = INS_mov;
6590	}
6591	#endif // FEATURE_SIMD
6592
6593	getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, `0`);
6594
6595	#if FEATURE_VARARG
6596	if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
6597	{
6598	regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
6599	instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
6600	inst_RV_RV(ins, argReg, intArgReg, loadType);
6601	}
6602	#endif // FEATURE_VARARG
6603	}
6604
6605	// If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6606	if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != `0`)
6607	{
6608	pInitRegZeroed = false*;
6609	}
6610
6611	#else // !defined(UNIX_AMD64_ABI)
6612
6613	// Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6614	// R14 = ProfilerMethHnd
6615	if (compiler->compProfilerMethHndIndirected)
6616	{
6617	// Profiler hooks enabled during Ngen time.
6618	// Profiler handle needs to be accessed through an indirection of a pointer.
6619	getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
6620	(ssize_t)compiler->compProfilerMethHnd);
6621	}
6622	else
6623	{
6624	// No need to record relocations, if we are generating ELT hooks under the influence
6625	// of COMPlus_JitELTHookEnabled=1
6626	if (compiler->opts.compJitELTHookEnabled)
6627	{
6628	genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6629	}
6630	else
6631	{
6632	instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6633	}
6634	}
6635
6636	// R15 = caller's SP
6637	// Notes
6638	// 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6639	// 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6640	// of that offset to FramePointer to obtain caller's SP value.
6641	assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6642	int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(`0`, isFramePointerUsed());
6643	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
6644
6645	// Can't have a call until we have enough padding for rejit
6646	genPrologPadForReJit();
6647
6648	// We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6649	// We use R11 here. This will emit either
6650	// "call ip-relative 32-bit offset" or
6651	// "mov r11, helper addr; call r11"
6652	genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, `0`, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6653
6654	// If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6655	if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != `0`)
6656	{
6657	pInitRegZeroed = false*;
6658	}
6659
6660	#endif // !defined(UNIX_AMD64_ABI)
6661
6662	#elif defined(_TARGET_X86_) \|\| defined(_TARGET_ARM_)
6663
6664	unsigned saveStackLvl2 = genStackLevel;
6665
6666	#if defined(_TARGET_X86_)
6667	// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
6668	// for x86 stack unwinding
6669
6670	#if defined(UNIX_X86_ABI)
6671	// Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6672	getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, `0xC`);
6673	#endif // UNIX_X86_ABI
6674
6675	// Push the profilerHandle
6676	if (compiler->compProfilerMethHndIndirected)
6677	{
6678	getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6679	}
6680	else
6681	{
6682	inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6683	}
6684
6685	#elif defined(_TARGET_ARM_)
6686	// On Arm arguments are prespilled on stack, which frees r0-r3.
6687	// For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
6688	// The call target register could be any free register.
6689	regNumber argReg = REG_PROFILER_ENTER_ARG;
6690	regMaskTP argRegMask = genRegMask(argReg);
6691	assert((regSet.rsMaskPreSpillRegArg & argRegMask) != `0`);
6692
6693	if (compiler->compProfilerMethHndIndirected)
6694	{
6695	getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
6696	regSet.verifyRegUsed(argReg);
6697	}
6698	else
6699	{
6700	instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
6701	}
6702	#else // _TARGET_*
6703	NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6704	#endif // _TARGET_*
6705
6706	//
6707	// Can't have a call until we have enough padding for rejit
6708	//
6709	genPrologPadForReJit();
6710
6711	// This will emit either
6712	// "call ip-relative 32-bit offset" or
6713	// "mov rax, helper addr; call rax"
6714	genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
6715	`0`, // argSize. Again, we have to lie about it
6716	EA_UNKNOWN); // retSize
6717
6718	#if defined(_TARGET_X86_)
6719	// Check that we have place for the push.
6720	assert(compiler->fgPtrArgCntMax >= `1`);
6721
6722	#if defined(UNIX_X86_ABI)
6723	// Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6724	getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, `0x10`);
6725	#endif // UNIX_X86_ABI
6726
6727	#elif defined(_TARGET_ARM_)
6728	if (initReg == argReg)
6729	{
6730	pInitRegZeroed = false*;
6731	}
6732	#else // _TARGET_*
6733	NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6734	#endif // _TARGET_*
6735
6736	/ Restore the stack level /
6737
6738	SetStackLevel(saveStackLvl2);
6739
6740	#else // target
6741	NYI("Emit Profiler Enter callback");
6742	#endif // target
6743	}
6744
6745	//-----------------------------------------------------------------------------------
6746	// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
6747	// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
6748	//
6749	// Arguments:
6750	// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
6751	//
6752	// Return Value:
6753	// None
6754	//
6755	// Notes:
6756	// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
6757	// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
6758	// 1. The calling sequence for calling the helper is:
6759	// push FunctionIDOrClientID
6760	// call ProfileLeaveHelper or ProfileTailcallHelper
6761	// 2. The calling function has an EBP frame.
6762	// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6763	// the following prolog is assumed:
6764	// push ESP
6765	// mov EBP, ESP
6766	// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
6767	// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
6768	// 5. The helper pops the FunctionIDOrClientID argument from the stack.
6769	//
6770	void CodeGen::genProfilingLeaveCallback(unsigned helper /= CORINFO_HELP_PROF_FCN_LEAVE/)
6771	{
6772	assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) \|\| (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
6773
6774	// Only hook if profiler says it's okay.
6775	if (!compiler->compIsProfilerHookNeeded())
6776	{
6777	return;
6778	}
6779
6780	compiler->info.compProfilerCallback = true;
6781
6782	// Need to save on to the stack level, since the helper call will pop the argument
6783	unsigned saveStackLvl2 = genStackLevel;
6784
6785	#if defined(_TARGET_AMD64_)
6786	#if !defined(UNIX_AMD64_ABI)
6787
6788	// Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6789	noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6790	noway_assert(compiler->lvaOutgoingArgSpaceSize >= (`4` * REGSIZE_BYTES));
6791
6792	// If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
6793	// registers that profiler callback kills.
6794	if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
6795	{
6796	regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
6797	noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == `0`);
6798	}
6799
6800	// At this point return value is computed and stored in RAX or XMM0.
6801	// On Amd64, Leave callback preserves the return register. We keep
6802	// RAX alive by not reporting as trashed by helper call. Also note
6803	// that GC cannot kick-in while executing inside profiler callback,
6804	// which is a requirement of profiler as well since it needs to examine
6805	// return value which could be an obj ref.
6806
6807	// RCX = ProfilerMethHnd
6808	if (compiler->compProfilerMethHndIndirected)
6809	{
6810	// Profiler hooks enabled during Ngen time.
6811	// Profiler handle needs to be accessed through an indirection of an address.
6812	getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6813	}
6814	else
6815	{
6816	// Don't record relocations, if we are generating ELT hooks under the influence
6817	// of COMPlus_JitELTHookEnabled=1
6818	if (compiler->opts.compJitELTHookEnabled)
6819	{
6820	genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6821	}
6822	else
6823	{
6824	instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6825	}
6826	}
6827
6828	// RDX = caller's SP
6829	// TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
6830	// of the stmnts to execute unconditionally and clean-up rest.
6831	if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6832	{
6833	// Caller's SP relative offset to FramePointer will be negative. We need to add absolute
6834	// value of that offset to FramePointer to obtain caller's SP value.
6835	int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(`0`, isFramePointerUsed());
6836	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6837	}
6838	else
6839	{
6840	// If we are here means that it is a tentative frame layout during which we
6841	// cannot use caller's SP offset since it is an estimate. For now we require the
6842	// method to have at least a single arg so that we can use it to obtain caller's
6843	// SP.
6844	LclVarDsc* varDsc = compiler->lvaTable;
6845	NYI_IF((varDsc == nullptr) \|\| !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6846
6847	// lea rdx, [FramePointer + Arg0's offset]
6848	getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, `0`, `0`);
6849	}
6850
6851	// We can use any callee trash register (other than RAX, RCX, RDX) for call target.
6852	// We use R8 here. This will emit either
6853	// "call ip-relative 32-bit offset" or
6854	// "mov r8, helper addr; call r8"
6855	genEmitHelperCall(helper, `0`, EA_UNKNOWN, REG_ARG_2);
6856
6857	#else // !defined(UNIX_AMD64_ABI)
6858
6859	// RDI = ProfilerMethHnd
6860	if (compiler->compProfilerMethHndIndirected)
6861	{
6862	getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6863	}
6864	else
6865	{
6866	if (compiler->opts.compJitELTHookEnabled)
6867	{
6868	genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6869	}
6870	else
6871	{
6872	instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6873	}
6874	}
6875
6876	// RSI = caller's SP
6877	if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6878	{
6879	int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(`0`, isFramePointerUsed());
6880	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6881	}
6882	else
6883	{
6884	LclVarDsc* varDsc = compiler->lvaTable;
6885	NYI_IF((varDsc == nullptr) \|\| !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6886
6887	// lea rdx, [FramePointer + Arg0's offset]
6888	getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, `0`, `0`);
6889	}
6890
6891	// We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6892	// We use R11 here. This will emit either
6893	// "call ip-relative 32-bit offset" or
6894	// "mov r11, helper addr; call r11"
6895	genEmitHelperCall(helper, `0`, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6896
6897	#endif // !defined(UNIX_AMD64_ABI)
6898
6899	#elif defined(_TARGET_X86_)
6900
6901	#if defined(UNIX_X86_ABI)
6902	// Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6903	getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, `0xC`);
6904	AddStackLevel(`0xC`);
6905	AddNestedAlignment(`0xC`);
6906	#endif // UNIX_X86_ABI
6907
6908	//
6909	// Push the profilerHandle
6910	//
6911
6912	if (compiler->compProfilerMethHndIndirected)
6913	{
6914	getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6915	}
6916	else
6917	{
6918	inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6919	}
6920	genSinglePush();
6921
6922	#if defined(UNIX_X86_ABI)
6923	int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
6924	#else
6925	int argSize = REGSIZE_BYTES;
6926	#endif
6927	genEmitHelperCall(helper, argSize, EA_UNKNOWN / retSize /);
6928
6929	// Check that we have place for the push.
6930	assert(compiler->fgPtrArgCntMax >= `1`);
6931
6932	#if defined(UNIX_X86_ABI)
6933	// Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6934	getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, `0x10`);
6935	SubtractStackLevel(`0x10`);
6936	SubtractNestedAlignment(`0xC`);
6937	#endif // UNIX_X86_ABI
6938
6939	#elif defined(_TARGET_ARM_)
6940	//
6941	// Push the profilerHandle
6942	//
6943
6944	// Contract between JIT and Profiler Leave callout on arm:
6945	// Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
6946	// Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
6947	// Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
6948	// It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
6949	//
6950	// In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
6951	// callback.
6952	bool r0Trashed;
6953	emitAttr attr = EA_UNKNOWN;
6954
6955	if (compiler->info.compRetType == TYP_VOID \|\| (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
6956	(varTypeIsFloating(compiler->info.compRetType) \|\|
6957	compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
6958	{
6959	r0Trashed = false;
6960	}
6961	else
6962	{
6963	// Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
6964	// profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
6965	if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
6966	{
6967	attr = EA_GCREF;
6968	gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
6969	}
6970	else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
6971	{
6972	attr = EA_BYREF;
6973	gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
6974	}
6975	else
6976	{
6977	attr = EA_4BYTE;
6978	}
6979
6980	getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
6981	regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
6982	gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
6983	r0Trashed = true;
6984	}
6985
6986	if (compiler->compProfilerMethHndIndirected)
6987	{
6988	getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6989	regSet.verifyRegUsed(REG_ARG_0);
6990	}
6991	else
6992	{
6993	instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6994	}
6995
6996	genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
6997	`0`, // argSize
6998	EA_UNKNOWN); // retSize
6999
7000	// Restore state that existed before profiler callback
7001	if (r0Trashed)
7002	{
7003	getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
7004	regSet.verifyRegUsed(REG_ARG_0);
7005	gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
7006	}
7007
7008	#else // target
7009	NYI("Emit Profiler Leave callback");
7010	#endif // target
7011
7012	/ Restore the stack level /
7013	SetStackLevel(saveStackLvl2);
7014	}
7015
7016	#endif // PROFILING_SUPPORTED
7017
7018	/*****************************************************************************
7019
7020	Esp frames :
7021	----------
7022
7023	These instructions are just a reordering of the instructions used today.
7024
7025	push ebp
7026	push esi
7027	push edi
7028	push ebx
7029	sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void)*
7030	...
7031	add esp, LOCALS_SIZE / pop dummyReg
7032	pop ebx
7033	pop edi
7034	pop esi
7035	pop ebp
7036	ret
7037
7038	Ebp frames :
7039	----------
7040
7041	The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
7042	Everything else is similar, though in a different order.
7043
7044	The security object will no longer be at a fixed offset. However, the
7045	offset can still be determined by looking up the GC-info and determining
7046	how many callee-saved registers are pushed.
7047
7048	push ebp
7049	mov ebp, esp
7050	push esi
7051	push edi
7052	push ebx
7053	sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void)*
7054	...
7055	add esp, LOCALS_SIZE / pop dummyReg
7056	pop ebx
7057	pop edi
7058	pop esi
7059	(mov esp, ebp if there are no callee-saved registers)
7060	pop ebp
7061	ret
7062
7063	Double-aligned frame :
7064	--------------------
7065
7066	LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
7067	of callee-saved registers are pushed on the stack so that the locals
7068	themselves are qword-aligned. The instructions are the same as today,
7069	just in a different order.
7070
7071	push ebp
7072	mov ebp, esp
7073	and esp, 0xFFFFFFFC
7074	push esi
7075	push edi
7076	push ebx
7077	sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void)*
7078	...
7079	add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
7080	pop ebx
7081	pop edi
7082	pop esi
7083	pop ebp
7084	mov esp, ebp
7085	pop ebp
7086	ret
7087
7088	localloc (with ebp) frames :
7089	--------------------------
7090
7091	The instructions are the same as today, just in a different order.
7092	Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
7093	which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
7094
7095	push ebp
7096	mov ebp, esp
7097	push esi
7098	push edi
7099	push ebx
7100	sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void)*
7101	...
7102	lea esp, [ebp-calleeSavedRegsPushedSize]
7103	pop ebx
7104	pop edi
7105	pop esi
7106	(mov esp, ebp if there are no callee-saved registers)
7107	pop ebp
7108	ret
7109
7110	*****************************************************************************/
7111
7112	/*****************************************************************************
7113	*
7114	* Generates appropriate NOP padding for a function prolog to support ReJIT.
7115	*/
7116
7117	void CodeGen::genPrologPadForReJit()
7118	{
7119	assert(compiler->compGeneratingProlog);
7120
7121	#ifdef _TARGET_XARCH_
7122	if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
7123	{
7124	return;
7125	}
7126
7127	#if FEATURE_EH_FUNCLETS
7128
7129	// No need to generate pad (nops) for funclets.
7130	// When compiling the main function (and not a funclet)
7131	// the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
7132	if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
7133	{
7134	return;
7135	}
7136
7137	#endif // FEATURE_EH_FUNCLETS
7138
7139	unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
7140	if (size < `5`)
7141	{
7142	instNop(`5` - size);
7143	}
7144	#endif
7145	}
7146
7147	/*****************************************************************************
7148	*
7149	* Reserve space for a function prolog.
7150	*/
7151
7152	void CodeGen::genReserveProlog(BasicBlock* block)
7153	{
7154	assert(block != nullptr);
7155
7156	JITDUMP("Reserving prolog IG for block " FMT_BB "\n", block->bbNum);
7157
7158	/ Nothing is live on entry to the prolog /
7159
7160	getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), `0`, `0`, false);
7161	}
7162
7163	/*****************************************************************************
7164	*
7165	* Reserve space for a function epilog.
7166	*/
7167
7168	void CodeGen::genReserveEpilog(BasicBlock* block)
7169	{
7170	regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
7171	regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
7172
7173	/ The return value is special-cased: make sure it goes live for the epilog /
7174
7175	bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != `0`);
7176
7177	if (genFullPtrRegMap && !jmpEpilog)
7178	{
7179	if (varTypeIsGC(compiler->info.compRetNativeType))
7180	{
7181	noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
7182
7183	gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
7184
7185	switch (compiler->info.compRetNativeType)
7186	{
7187	case TYP_REF:
7188	gcrefRegsArg \|= RBM_INTRET;
7189	break;
7190	case TYP_BYREF:
7191	byrefRegsArg \|= RBM_INTRET;
7192	break;
7193	default:
7194	break;
7195	}
7196	}
7197	}
7198
7199	JITDUMP("Reserving epilog IG for block " FMT_BB "\n", block->bbNum);
7200
7201	assert(block != nullptr);
7202	const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars);
7203	bool last = (block->bbNext == nullptr);
7204	getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
7205	}
7206
7207	#if FEATURE_EH_FUNCLETS
7208
7209	/*****************************************************************************
7210	*
7211	* Reserve space for a funclet prolog.
7212	*/
7213
7214	void CodeGen::genReserveFuncletProlog(BasicBlock* block)
7215	{
7216	assert(block != nullptr);
7217
7218	/ Currently, no registers are live on entry to the prolog, except maybe*
7219	the exception object. There might be some live stack vars, but they
7220	cannot be accessed until after the frame pointer is re-established.
7221	In order to potentially prevent emitting a death before the prolog
7222	and a birth right after it, we just report it as live during the
7223	prolog, and rely on the prolog being non-interruptible. Trust
7224	genCodeForBBlist to correctly initialize all the sets.
7225
7226	We might need to relax these asserts if the VM ever starts
7227	restoring any registers, then we could have live-in reg vars...
7228	*/
7229
7230	noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
7231	noway_assert(gcInfo.gcRegByrefSetCur == `0`);
7232
7233	JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n", block->bbNum);
7234
7235	getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7236	gcInfo.gcRegByrefSetCur, false);
7237	}
7238
7239	/*****************************************************************************
7240	*
7241	* Reserve space for a funclet epilog.
7242	*/
7243
7244	void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
7245	{
7246	assert(block != nullptr);
7247
7248	JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
7249
7250	bool last = (block->bbNext == nullptr);
7251	getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7252	gcInfo.gcRegByrefSetCur, last);
7253	}
7254
7255	#endif // FEATURE_EH_FUNCLETS
7256
7257	/*****************************************************************************
7258	* Finalize the frame size and offset assignments.
7259	*
7260	* No changes can be made to the modified register set after this, since that can affect how many
7261	* callee-saved registers get saved.
7262	*/
7263	void CodeGen::genFinalizeFrame()
7264	{
7265	JITDUMP("Finalizing stack frame\n");
7266
7267	// Initializations need to happen based on the var locations at the start
7268	// of the first basic block, so load those up. In particular, the determination
7269	// of whether or not to use block init in the prolog is dependent on the variable
7270	// locations on entry to the function.
7271	compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
7272
7273	genCheckUseBlockInit();
7274
7275	// Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7276	CLANG_FORMAT_COMMENT_ANCHOR;
7277
7278	#if defined(_TARGET_X86_)
7279
7280	if (compiler->compTailCallUsed)
7281	{
7282	// If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
7283	// argument to "1", indicating to the tailcall helper that we've saved the callee-saved
7284	// registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
7285	// actually get saved.
7286
7287	regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
7288	}
7289	#endif // _TARGET_X86_
7290
7291	#if defined(_TARGET_ARMARCH_)
7292	// We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
7293	// to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
7294	if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7295	{
7296	regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
7297	}
7298	#endif // defined(_TARGET_ARMARCH_)
7299
7300	#if defined(_TARGET_ARM_)
7301	// If there are any reserved registers, add them to the
7302	if (regSet.rsMaskResvd != RBM_NONE)
7303	{
7304	regSet.rsSetRegsModified(regSet.rsMaskResvd);
7305	}
7306	#endif // _TARGET_ARM_
7307
7308	#ifdef DEBUG
7309	if (verbose)
7310	{
7311	printf("Modified regs: ");
7312	dspRegMask(regSet.rsGetModifiedRegsMask());
7313	printf("\n");
7314	}
7315	#endif // DEBUG
7316
7317	// Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7318	if (compiler->opts.compDbgEnC)
7319	{
7320	// We always save FP.
7321	noway_assert(isFramePointerUsed());
7322	#ifdef _TARGET_AMD64_
7323	// On x64 we always save exactly RBP, RSI and RDI for EnC.
7324	regMaskTP okRegs = (RBM_CALLEE_TRASH \| RBM_FPBASE \| RBM_RSI \| RBM_RDI);
7325	regSet.rsSetRegsModified(RBM_RSI \| RBM_RDI);
7326	noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == `0`);
7327	#else // !_TARGET_AMD64_
7328	// On x86 we save all callee saved regs so the saved reg area size is consistent
7329	regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7330	#endif // !_TARGET_AMD64_
7331	}
7332
7333	/ If we have any pinvoke calls, we might potentially trash everything /
7334	if (compiler->info.compCallUnmanaged)
7335	{
7336	noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
7337	regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7338	}
7339
7340	#ifdef UNIX_AMD64_ABI
7341	// On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
7342	if (compiler->compIsProfilerHookNeeded())
7343	{
7344	regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 \| RBM_PROFILER_ENTER_ARG_1);
7345	}
7346	#endif
7347
7348	/ Count how many callee-saved registers will actually be saved (pushed) /
7349
7350	// EBP cannot be (directly) modified for EBP frame and double-aligned frames
7351	noway_assert(!doubleAlignOrFramePointerUsed() \|\| !regSet.rsRegsModified(RBM_FPBASE));
7352
7353	#if ETW_EBP_FRAMED
7354	// EBP cannot be (directly) modified
7355	noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
7356	#endif
7357
7358	regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
7359
7360	#ifdef _TARGET_ARMARCH_
7361	if (isFramePointerUsed())
7362	{
7363	// For a FP based frame we have to push/pop the FP register
7364	//
7365	maskCalleeRegsPushed \|= RBM_FPBASE;
7366
7367	// This assert check that we are not using REG_FP
7368	// as both the frame pointer and as a codegen register
7369	//
7370	assert(!regSet.rsRegsModified(RBM_FPBASE));
7371	}
7372
7373	// we always push LR. See genPushCalleeSavedRegisters
7374	//
7375	maskCalleeRegsPushed \|= RBM_LR;
7376
7377	#if defined(_TARGET_ARM_)
7378	// TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
7379	regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
7380	regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
7381
7382	if ((maskPushRegsFloat != RBM_NONE) \|\|
7383	(compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
7384	{
7385	// Here we try to keep stack double-aligned before the vpush
7386	if ((genCountBits(regSet.rsMaskPreSpillRegs(true) \| maskPushRegsInt) % `2`) != `0`)
7387	{
7388	regNumber extraPushedReg = REG_R4;
7389	while (maskPushRegsInt & genRegMask(extraPushedReg))
7390	{
7391	extraPushedReg = REG_NEXT(extraPushedReg);
7392	}
7393	if (extraPushedReg < REG_R11)
7394	{
7395	maskPushRegsInt \|= genRegMask(extraPushedReg);
7396	regSet.rsSetRegsModified(genRegMask(extraPushedReg));
7397	}
7398	}
7399	maskCalleeRegsPushed = maskPushRegsInt \| maskPushRegsFloat;
7400	}
7401
7402	// We currently only expect to push/pop consecutive FP registers
7403	// and these have to be double-sized registers as well.
7404	// Here we will insure that maskPushRegsFloat obeys these requirements.
7405	//
7406	if (maskPushRegsFloat != RBM_NONE)
7407	{
7408	regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
7409	while (maskPushRegsFloat > contiguousMask)
7410	{
7411	contiguousMask <<= `2`;
7412	contiguousMask \|= genRegMaskFloat(REG_F16, TYP_DOUBLE);
7413	}
7414	if (maskPushRegsFloat != contiguousMask)
7415	{
7416	regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
7417	maskPushRegsFloat \|= maskExtraRegs;
7418	regSet.rsSetRegsModified(maskExtraRegs);
7419	maskCalleeRegsPushed \|= maskExtraRegs;
7420	}
7421	}
7422	#endif // _TARGET_ARM_
7423	#endif // _TARGET_ARMARCH_
7424
7425	#if defined(_TARGET_XARCH_)
7426	// Compute the count of callee saved float regs saved on stack.
7427	// On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
7428	// regs are stack allocated and preserved in their stack locations.
7429	compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
7430	maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
7431	#endif // defined(_TARGET_XARCH_)
7432
7433	compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
7434
7435	#ifdef DEBUG
7436	if (verbose)
7437	{
7438	printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
7439	dspRegMask(maskCalleeRegsPushed);
7440	printf("\n");
7441	}
7442	#endif // DEBUG
7443
7444	/ Assign the final offsets to things living on the stack frame /
7445
7446	compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
7447
7448	/ We want to make sure that the prolog size calculated here is accurate*
7449	(that is instructions will not shrink because of conservative stack
7450	frame approximations). We do this by filling in the correct size
7451	here (where we have committed to the final numbers for the frame offsets)
7452	This will ensure that the prolog size is always correct
7453	*/
7454	getEmitter()->emitMaxTmpSize = regSet.tmpGetTotalSize();
7455
7456	#ifdef DEBUG
7457	if (compiler->opts.dspCode \|\| compiler->opts.disAsm \|\| compiler->opts.disAsm2 \|\| verbose)
7458	{
7459	compiler->lvaTableDump();
7460	}
7461	#endif
7462	}
7463
7464	//------------------------------------------------------------------------
7465	// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
7466	//
7467	// Arguments:
7468	// delta - the offset to add to the current stack pointer to establish the frame pointer
7469	// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
7470
7471	void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
7472	{
7473	assert(compiler->compGeneratingProlog);
7474
7475	#if defined(_TARGET_XARCH_)
7476
7477	if (delta == `0`)
7478	{
7479	getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
7480	psiMoveESPtoEBP();
7481	}
7482	else
7483	{
7484	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7485	// We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
7486	// anyway.
7487	}
7488
7489	if (reportUnwindData)
7490	{
7491	compiler->unwindSetFrameReg(REG_FPBASE, delta);
7492	}
7493
7494	#elif defined(_TARGET_ARM_)
7495
7496	assert(arm_Valid_Imm_For_Add_SP(delta));
7497	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7498
7499	if (reportUnwindData)
7500	{
7501	compiler->unwindPadding();
7502	}
7503
7504	#else
7505	NYI("establish frame pointer");
7506	#endif
7507	}
7508
7509	/*****************************************************************************
7510	*
7511	* Generates code for a function prolog.
7512	*
7513	* NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
7514	*
7515	* The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
7516	* implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
7517	* only instructions which result in control not going to the next instruction. Basically, any time execution would
7518	* not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
7519	* decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
7520	* can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
7521	*
7522	* If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
7523	* debugger team to ensure that stepping still works.
7524	*
7525	* ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
7526	*/
7527
7528	#ifdef _PREFAST_
7529	#pragma warning(push)
7530	#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
7531	#endif
7532	void CodeGen::genFnProlog()
7533	{
7534	ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
7535
7536	compiler->funSetCurrentFunc(`0`);
7537
7538	#ifdef DEBUG
7539	if (verbose)
7540	{
7541	printf("*************** In genFnProlog()\n");
7542	}
7543	#endif
7544
7545	#ifdef DEBUG
7546	genInterruptibleUsed = true;
7547	#endif
7548
7549	assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
7550
7551	/ Ready to start on the prolog proper /
7552
7553	getEmitter()->emitBegProlog();
7554	compiler->unwindBegProlog();
7555
7556	// Do this so we can put the prolog instruction group ahead of
7557	// other instruction groups
7558	genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
7559
7560	#ifdef DEBUG
7561	if (compiler->opts.dspCode)
7562	{
7563	printf("\n__prolog:\n");
7564	}
7565	#endif
7566
7567	if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > `0`))
7568	{
7569	// Create new scopes for the method-parameters for the prolog-block.
7570	psiBegProlog();
7571	}
7572
7573	#ifdef DEBUG
7574
7575	if (compiler->compJitHaltMethod())
7576	{
7577	/ put a nop first because the debugger and other tools are likely to*
7578	put an int3 at the begining and we don't want to confuse them /*
7579
7580	instGen(INS_nop);
7581	instGen(INS_BREAKPOINT);
7582
7583	#ifdef _TARGET_ARMARCH_
7584	// Avoid asserts in the unwind info because these instructions aren't accounted for.
7585	compiler->unwindPadding();
7586	#endif // _TARGET_ARMARCH_
7587	}
7588	#endif // DEBUG
7589
7590	#if FEATURE_EH_FUNCLETS && defined(DEBUG)
7591
7592	// We cannot force 0-initialization of the PSPSym
7593	// as it will overwrite the real value
7594	if (compiler->lvaPSPSym != BAD_VAR_NUM)
7595	{
7596	LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
7597	assert(!varDsc->lvMustInit);
7598	}
7599
7600	#endif // FEATURE_EH_FUNCLETS && DEBUG
7601
7602	/-------------------------------------------------------------------------*
7603	*
7604	* Record the stack frame ranges that will cover all of the tracked
7605	* and untracked pointer variables.
7606	* Also find which registers will need to be zero-initialized.
7607	*
7608	* 'initRegs': - Generally, enregistered variables should not need to be
7609	* zero-inited. They only need to be zero-inited when they
7610	* have a possibly uninitialized read on some control
7611	* flow path. Apparently some of the IL_STUBs that we
7612	* generate have this property.
7613	*/
7614
7615	int untrLclLo = +INT_MAX;
7616	int untrLclHi = -INT_MAX;
7617	// 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
7618	// Note that they may be tracked, but simply not allocated to a register.
7619	bool hasUntrLcl = false;
7620
7621	int GCrefLo = +INT_MAX;
7622	int GCrefHi = -INT_MAX;
7623	bool hasGCRef = false;
7624
7625	regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
7626	regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
7627	regMaskTP initDblRegs = RBM_NONE;
7628
7629	unsigned varNum;
7630	LclVarDsc* varDsc;
7631
7632	for (varNum = `0`, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
7633	{
7634	if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
7635	{
7636	continue;
7637	}
7638
7639	if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
7640	{
7641	noway_assert(varDsc->lvRefCnt() == `0`);
7642	continue;
7643	}
7644
7645	signed int loOffs = varDsc->lvStkOffs;
7646	signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
7647
7648	/ We need to know the offset range of tracked stack GC refs /
7649	/ We assume that the GC reference can be anywhere in the TYP_STRUCT /
7650
7651	if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
7652	{
7653	// For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
7654	// taken care of by the parent struct.
7655	if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
7656	{
7657	hasGCRef = true;
7658
7659	if (loOffs < GCrefLo)
7660	{
7661	GCrefLo = loOffs;
7662	}
7663	if (hiOffs > GCrefHi)
7664	{
7665	GCrefHi = hiOffs;
7666	}
7667	}
7668	}
7669
7670	/ For lvMustInit vars, gather pertinent info /
7671
7672	if (!varDsc->lvMustInit)
7673	{
7674	continue;
7675	}
7676
7677	if (varDsc->lvIsInReg())
7678	{
7679	regMaskTP regMask = genRegMask(varDsc->lvRegNum);
7680	if (!varDsc->IsFloatRegType())
7681	{
7682	initRegs \|= regMask;
7683
7684	if (varTypeIsMultiReg(varDsc))
7685	{
7686	if (varDsc->lvOtherReg != REG_STK)
7687	{
7688	initRegs \|= genRegMask(varDsc->lvOtherReg);
7689	}
7690	else
7691	{
7692	/ Upper DWORD is on the stack, and needs to be inited /
7693
7694	loOffs += sizeof(int);
7695	goto INIT_STK;
7696	}
7697	}
7698	}
7699	else if (varDsc->TypeGet() == TYP_DOUBLE)
7700	{
7701	initDblRegs \|= regMask;
7702	}
7703	else
7704	{
7705	initFltRegs \|= regMask;
7706	}
7707	}
7708	else
7709	{
7710	INIT_STK:
7711
7712	hasUntrLcl = true;
7713
7714	if (loOffs < untrLclLo)
7715	{
7716	untrLclLo = loOffs;
7717	}
7718	if (hiOffs > untrLclHi)
7719	{
7720	untrLclHi = hiOffs;
7721	}
7722	}
7723	}
7724
7725	/ Don't forget about spill temps that hold pointers /
7726
7727	if (!TRACK_GC_TEMP_LIFETIMES)
7728	{
7729	assert(regSet.tmpAllFree());
7730	for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
7731	{
7732	if (!varTypeIsGC(tempThis->tdTempType()))
7733	{
7734	continue;
7735	}
7736
7737	signed int loOffs = tempThis->tdTempOffs();
7738	signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
7739
7740	// If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
7741	// previous frame pointer. Thus, stkOffs can't be zero.
7742	CLANG_FORMAT_COMMENT_ANCHOR;
7743
7744	#if !defined(_TARGET_AMD64_)
7745	// However, on amd64 there is no requirement to chain frame pointers.
7746
7747	noway_assert(!isFramePointerUsed() \|\| loOffs != `0`);
7748	#endif // !defined(_TARGET_AMD64_)
7749
7750	// printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
7751
7752	hasUntrLcl = true;
7753
7754	if (loOffs < untrLclLo)
7755	{
7756	untrLclLo = loOffs;
7757	}
7758	if (hiOffs > untrLclHi)
7759	{
7760	untrLclHi = hiOffs;
7761	}
7762	}
7763	}
7764
7765	assert((genInitStkLclCnt > `0`) == hasUntrLcl);
7766
7767	#ifdef DEBUG
7768	if (verbose)
7769	{
7770	if (genInitStkLclCnt > `0`)
7771	{
7772	printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
7773	-untrLclHi);
7774	}
7775	}
7776	#endif
7777
7778	#ifdef _TARGET_ARM_
7779	// On the ARM we will spill any incoming struct args in the first instruction in the prolog
7780	// Ditto for all enregistered user arguments in a varargs method.
7781	// These registers will be available to use for the initReg. We just remove
7782	// all of these registers from the rsCalleeRegArgMaskLiveIn.
7783	//
7784	intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
7785	#endif
7786
7787	/ Choose the register to use for zero initialization /
7788
7789	regNumber initReg = REG_SCRATCH; // Unless we find a better register below
7790	bool initRegZeroed = false;
7791	regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
7792	regMaskTP tempMask;
7793
7794	// We should not use the special PINVOKE registers as the initReg
7795	// since they are trashed by the jithelper call to setup the PINVOKE frame
7796	if (compiler->info.compCallUnmanaged)
7797	{
7798	excludeMask \|= RBM_PINVOKE_FRAME;
7799
7800	assert((!compiler->opts.ShouldUsePInvokeHelpers()) \|\| (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
7801	if (!compiler->opts.ShouldUsePInvokeHelpers())
7802	{
7803	noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
7804
7805	excludeMask \|= (RBM_PINVOKE_TCB \| RBM_PINVOKE_SCRATCH);
7806
7807	// We also must exclude the register used by compLvFrameListRoot when it is enregistered
7808	//
7809	LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
7810	if (varDsc->lvRegister)
7811	{
7812	excludeMask \|= genRegMask(varDsc->lvRegNum);
7813	}
7814	}
7815	}
7816
7817	#ifdef _TARGET_ARM_
7818	// If we have a variable sized frame (compLocallocUsed is true)
7819	// then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
7820	if (compiler->compLocallocUsed)
7821	{
7822	excludeMask \|= RBM_SAVED_LOCALLOC_SP;
7823	}
7824	#endif // _TARGET_ARM_
7825
7826	#if defined(_TARGET_XARCH_)
7827	if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7828	{
7829	// We currently must use REG_EAX on x86 here
7830	// because the loop's backwards branch depends upon the size of EAX encodings
7831	assert(initReg == REG_EAX);
7832	}
7833	else
7834	#endif // _TARGET_XARCH_
7835	{
7836	tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
7837
7838	if (tempMask != RBM_NONE)
7839	{
7840	// We will use one of the registers that we were planning to zero init anyway.
7841	// We pick the lowest register number.
7842	tempMask = genFindLowestBit(tempMask);
7843	initReg = genRegNumFromMask(tempMask);
7844	}
7845	// Next we prefer to use one of the unused argument registers.
7846	// If they aren't available we use one of the caller-saved integer registers.
7847	else
7848	{
7849	tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
7850	if (tempMask != RBM_NONE)
7851	{
7852	// We pick the lowest register number
7853	tempMask = genFindLowestBit(tempMask);
7854	initReg = genRegNumFromMask(tempMask);
7855	}
7856	}
7857	}
7858
7859	noway_assert(!compiler->info.compCallUnmanaged \|\| (initReg != REG_PINVOKE_FRAME));
7860
7861	#if defined(_TARGET_AMD64_)
7862	// If we are a varargs call, in order to set up the arguments correctly this
7863	// must be done in a 2 step process. As per the x64 ABI:
7864	// a) The caller sets up the argument shadow space (just before the return
7865	// address, 4 pointer sized slots).
7866	// b) The callee is responsible to home the arguments on the shadow space
7867	// provided by the caller.
7868	// This way, the varargs iterator will be able to retrieve the
7869	// call arguments properly since both the arg regs and the stack allocated
7870	// args will be contiguous.
7871	if (compiler->info.compIsVarArgs)
7872	{
7873	getEmitter()->spillIntArgRegsToShadowSlots();
7874	}
7875
7876	#endif // _TARGET_AMD64_
7877
7878	#ifdef _TARGET_ARM_
7879	/-------------------------------------------------------------------------*
7880	*
7881	* Now start emitting the part of the prolog which sets up the frame
7882	*/
7883
7884	if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
7885	{
7886	inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
7887	compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
7888	}
7889	#endif // _TARGET_ARM_
7890
7891	#ifdef _TARGET_XARCH_
7892	if (doubleAlignOrFramePointerUsed())
7893	{
7894	inst_RV(INS_push, REG_FPBASE, TYP_REF);
7895	compiler->unwindPush(REG_FPBASE);
7896	psiAdjustStackLevel(REGSIZE_BYTES);
7897
7898	#ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
7899	genEstablishFramePointer(`0`, /reportUnwindData/ true);
7900	#endif // !_TARGET_AMD64_
7901
7902	#if DOUBLE_ALIGN
7903	if (compiler->genDoubleAlign())
7904	{
7905	noway_assert(isFramePointerUsed() == false);
7906	noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); / Trashing EBP is out. /
7907
7908	inst_RV_IV(INS_AND, REG_SPBASE, -`8`, EA_PTRSIZE);
7909	}
7910	#endif // DOUBLE_ALIGN
7911	}
7912	#endif // _TARGET_XARCH_
7913
7914	#ifdef _TARGET_ARM64_
7915	// Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
7916	genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7917	genPushCalleeSavedRegisters(initReg, &initRegZeroed);
7918	#else // !_TARGET_ARM64_
7919	genPushCalleeSavedRegisters();
7920	#endif // !_TARGET_ARM64_
7921
7922	#ifdef _TARGET_ARM_
7923	bool needToEstablishFP = false;
7924	int afterLclFrameSPtoFPdelta = `0`;
7925	if (doubleAlignOrFramePointerUsed())
7926	{
7927	needToEstablishFP = true;
7928
7929	// If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
7930	// This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
7931	// too big, we go ahead and do it here.
7932
7933	int SPtoFPdelta = (compiler->compCalleeRegsPushed - `2`) * REGSIZE_BYTES;
7934	afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
7935	if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
7936	{
7937	// Oh well, it looks too big. Go ahead and establish the frame pointer here.
7938	genEstablishFramePointer(SPtoFPdelta, /reportUnwindData/ true);
7939	needToEstablishFP = false;
7940	}
7941	}
7942	#endif // _TARGET_ARM_
7943
7944	//-------------------------------------------------------------------------
7945	//
7946	// Subtract the local frame size from SP.
7947	//
7948	//-------------------------------------------------------------------------
7949	CLANG_FORMAT_COMMENT_ANCHOR;
7950
7951	#ifndef _TARGET_ARM64_
7952	regMaskTP maskStackAlloc = RBM_NONE;
7953
7954	#ifdef _TARGET_ARM_
7955	maskStackAlloc =
7956	genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
7957	#endif // _TARGET_ARM_
7958
7959	if (maskStackAlloc == RBM_NONE)
7960	{
7961	genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7962	}
7963	#endif // !_TARGET_ARM64_
7964
7965	//-------------------------------------------------------------------------
7966
7967	#ifdef _TARGET_ARM_
7968	if (compiler->compLocallocUsed)
7969	{
7970	getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
7971	regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
7972	compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, `0`);
7973	}
7974	#endif // _TARGET_ARMARCH_
7975
7976	#if defined(_TARGET_XARCH_)
7977	// Preserve callee saved float regs to stack.
7978	genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
7979	#endif // defined(_TARGET_XARCH_)
7980
7981	#ifdef _TARGET_AMD64_
7982	// Establish the AMD64 frame pointer after the OS-reported prolog.
7983	if (doubleAlignOrFramePointerUsed())
7984	{
7985	bool reportUnwindData = compiler->compLocallocUsed \|\| compiler->opts.compDbgEnC;
7986	genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
7987	}
7988	#endif //_TARGET_AMD64_
7989
7990	//-------------------------------------------------------------------------
7991	//
7992	// This is the end of the OS-reported prolog for purposes of unwinding
7993	//
7994	//-------------------------------------------------------------------------
7995
7996	#ifdef _TARGET_ARM_
7997	if (needToEstablishFP)
7998	{
7999	genEstablishFramePointer(afterLclFrameSPtoFPdelta, /reportUnwindData/ false);
8000	needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
8001	}
8002	#endif // _TARGET_ARM_
8003
8004	if (compiler->info.compPublishStubParam)
8005	{
8006	#if CPU_LOAD_STORE_ARCH
8007	getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8008	compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8009	#else
8010	// mov [lvaStubArgumentVar], EAX
8011	getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8012	compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8013	#endif
8014	assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
8015
8016	// It's no longer live; clear it out so it can be used after this in the prolog
8017	intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
8018	}
8019
8020	#if STACK_PROBES
8021	// We could probably fold this into the loop for the FrameSize >= 0x3000 probing
8022	// when creating the stack frame. Don't think it's worth it, though.
8023	if (genNeedPrologStackProbe)
8024	{
8025	//
8026	// Can't have a call until we have enough padding for rejit
8027	//
8028	genPrologPadForReJit();
8029	noway_assert(compiler->opts.compNeedStackProbes);
8030	genGenerateStackProbe();
8031	compiler->compStackProbePrologDone = true;
8032	}
8033	#endif // STACK_PROBES
8034
8035	//
8036	// Zero out the frame as needed
8037	//
8038
8039	genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
8040
8041	#if FEATURE_EH_FUNCLETS
8042
8043	genSetPSPSym(initReg, &initRegZeroed);
8044
8045	#else // !FEATURE_EH_FUNCLETS
8046
8047	// when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
8048	if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
8049	{
8050	// The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
8051	unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
8052
8053	// Zero out the slot for nesting level 0
8054	unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
8055
8056	if (!initRegZeroed)
8057	{
8058	instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8059	initRegZeroed = true;
8060	}
8061
8062	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
8063	firstSlotOffs);
8064	}
8065
8066	#endif // !FEATURE_EH_FUNCLETS
8067
8068	genReportGenericContextArg(initReg, &initRegZeroed);
8069
8070	// The local variable representing the security object must be on the stack frame
8071	// and must be 0 initialized.
8072	noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) \|\|
8073	(compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
8074	compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
8075
8076	#ifdef JIT32_GCENCODER
8077	// Initialize the LocalAllocSP slot if there is localloc in the function.
8078	if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
8079	{
8080	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, `0`);
8081	}
8082	#endif // JIT32_GCENCODER
8083
8084	// Set up the GS security cookie
8085
8086	genSetGSSecurityCookie(initReg, &initRegZeroed);
8087
8088	#ifdef PROFILING_SUPPORTED
8089
8090	// Insert a function entry callback for profiling, if requested.
8091	genProfilingEnterCallback(initReg, &initRegZeroed);
8092
8093	#endif // PROFILING_SUPPORTED
8094
8095	if (!genInterruptible)
8096	{
8097	/-------------------------------------------------------------------------*
8098	*
8099	* The 'real' prolog ends here for non-interruptible methods.
8100	* For fully-interruptible methods, we extend the prolog so that
8101	* we do not need to track GC inforation while shuffling the
8102	* arguments.
8103	*
8104	* Make sure there's enough padding for ReJIT.
8105	*
8106	*/
8107	genPrologPadForReJit();
8108	getEmitter()->emitMarkPrologEnd();
8109	}
8110
8111	#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
8112	// The unused bits of Vector3 arguments must be cleared
8113	// since native compiler doesn't initize the upper bits to zeros.
8114	//
8115	// TODO-Cleanup: This logic can be implemented in
8116	// genFnPrologCalleeRegArgs() for argument registers and
8117	// genEnregisterIncomingStackArgs() for stack arguments.
8118	genClearStackVec3ArgUpperBits();
8119	#endif // UNIX_AMD64_ABI && FEATURE_SIMD
8120
8121	/-----------------------------------------------------------------------------*
8122	* Take care of register arguments first
8123	*/
8124
8125	RegState* regState;
8126
8127	// Update the arg initial register locations.
8128	compiler->lvaUpdateArgsWithInitialReg();
8129
8130	FOREACH_REGISTER_FILE(regState)
8131	{
8132	if (regState->rsCalleeRegArgMaskLiveIn)
8133	{
8134	// If we need an extra register to shuffle around the incoming registers
8135	// we will use xtraReg (initReg) and set the xtraRegClobbered flag,
8136	// if we don't need to use the xtraReg then this flag will stay false
8137	//
8138	regNumber xtraReg;
8139	bool xtraRegClobbered = false;
8140
8141	if (genRegMask(initReg) & RBM_ARG_REGS)
8142	{
8143	xtraReg = initReg;
8144	}
8145	else
8146	{
8147	xtraReg = REG_SCRATCH;
8148	initRegZeroed = false;
8149	}
8150
8151	genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
8152
8153	if (xtraRegClobbered)
8154	{
8155	initRegZeroed = false;
8156	}
8157	}
8158	}
8159
8160	// Home the incoming arguments
8161	genEnregisterIncomingStackArgs();
8162
8163	/ Initialize any must-init registers variables now /
8164
8165	if (initRegs)
8166	{
8167	regMaskTP regMask = `0x1`;
8168
8169	for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= `1`)
8170	{
8171	if (regMask & initRegs)
8172	{
8173	// Check if we have already zeroed this register
8174	if ((reg == initReg) && initRegZeroed)
8175	{
8176	continue;
8177	}
8178	else
8179	{
8180	instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
8181	if (reg == initReg)
8182	{
8183	initRegZeroed = true;
8184	}
8185	}
8186	}
8187	}
8188	}
8189
8190	if (initFltRegs \| initDblRegs)
8191	{
8192	// If initReg is not in initRegs then we will use REG_SCRATCH
8193	if ((genRegMask(initReg) & initRegs) == `0`)
8194	{
8195	initReg = REG_SCRATCH;
8196	initRegZeroed = false;
8197	}
8198
8199	#ifdef _TARGET_ARM_
8200	// This is needed only for Arm since it can use a zero initialized int register
8201	// to initialize vfp registers.
8202	if (!initRegZeroed)
8203	{
8204	instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8205	initRegZeroed = true;
8206	}
8207	#endif // _TARGET_ARM_
8208
8209	genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
8210	}
8211
8212	//-----------------------------------------------------------------------------
8213
8214	//
8215	// Increase the prolog size here only if fully interruptible.
8216	// And again make sure it's big enough for ReJIT
8217	//
8218
8219	if (genInterruptible)
8220	{
8221	genPrologPadForReJit();
8222	getEmitter()->emitMarkPrologEnd();
8223	}
8224
8225	if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > `0`))
8226	{
8227	psiEndProlog();
8228	}
8229
8230	if (hasGCRef)
8231	{
8232	getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
8233	}
8234	else
8235	{
8236	noway_assert(GCrefLo == +INT_MAX);
8237	noway_assert(GCrefHi == -INT_MAX);
8238	}
8239
8240	#ifdef DEBUG
8241	if (compiler->opts.dspCode)
8242	{
8243	printf("\n");
8244	}
8245	#endif
8246
8247	#ifdef _TARGET_X86_
8248	// On non-x86 the VARARG cookie does not need any special treatment.
8249
8250	// Load up the VARARG argument pointer register so it doesn't get clobbered.
8251	// only do this if we actually access any statically declared args
8252	// (our argument pointer register has a refcount > 0).
8253	unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
8254
8255	if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt() > `0`)
8256	{
8257	varDsc = &compiler->lvaTable[argsStartVar];
8258
8259	noway_assert(compiler->info.compArgsCount > `0`);
8260
8261	// MOV EAX, <VARARGS HANDLE>
8262	getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - `1`, `0`);
8263	regSet.verifyRegUsed(REG_EAX);
8264
8265	// MOV EAX, [EAX]
8266	getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, `0`);
8267
8268	// EDX might actually be holding something here. So make sure to only use EAX for this code
8269	// sequence.
8270
8271	LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - `1`];
8272	noway_assert(!lastArg->lvRegister);
8273	signed offset = lastArg->lvStkOffs;
8274	assert(offset != BAD_STK_OFFS);
8275	noway_assert(lastArg->lvFramePointerBased);
8276
8277	// LEA EAX, &<VARARGS HANDLE> + EAX
8278	getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
8279
8280	if (varDsc->lvIsInReg())
8281	{
8282	if (varDsc->lvRegNum != REG_EAX)
8283	{
8284	getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
8285	regSet.verifyRegUsed(varDsc->lvRegNum);
8286	}
8287	}
8288	else
8289	{
8290	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, `0`);
8291	}
8292	}
8293
8294	#endif // _TARGET_X86_
8295
8296	#if defined(DEBUG) && defined(_TARGET_XARCH_)
8297	if (compiler->opts.compStackCheckOnRet)
8298	{
8299	noway_assert(compiler->lvaReturnSpCheck != `0xCCCCCCCC` &&
8300	compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister &&
8301	compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame);
8302	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, `0`);
8303	}
8304	#endif // defined(DEBUG) && defined(_TARGET_XARCH_)
8305
8306	getEmitter()->emitEndProlog();
8307	compiler->unwindEndProlog();
8308
8309	noway_assert(getEmitter()->emitMaxTmpSize == regSet.tmpGetTotalSize());
8310	}
8311	#ifdef _PREFAST_
8312	#pragma warning(pop)
8313	#endif
8314
8315	/*****************************************************************************
8316	*
8317	* Generates code for a function epilog.
8318	*
8319	* Please consult the "debugger team notification" comment in genFnProlog().
8320	*/
8321
8322	#if defined(_TARGET_ARMARCH_)
8323
8324	void CodeGen::genFnEpilog(BasicBlock* block)
8325	{
8326	#ifdef DEBUG
8327	if (verbose)
8328	printf("*************** In genFnEpilog()\n");
8329	#endif // DEBUG
8330
8331	ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8332
8333	VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8334	gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8335	gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8336
8337	#ifdef DEBUG
8338	if (compiler->opts.dspCode)
8339	printf("\n__epilog:\n");
8340
8341	if (verbose)
8342	{
8343	printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8344	dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8345	printf(", gcRegGCrefSetCur=");
8346	printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8347	getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8348	printf(", gcRegByrefSetCur=");
8349	printRegMaskInt(gcInfo.gcRegByrefSetCur);
8350	getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8351	printf("\n");
8352	}
8353	#endif // DEBUG
8354
8355	bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != `0`);
8356
8357	GenTree* lastNode = block->lastNode();
8358
8359	// Method handle and address info used in case of jump epilog
8360	CORINFO_METHOD_HANDLE methHnd = nullptr;
8361	CORINFO_CONST_LOOKUP addrInfo;
8362	addrInfo.addr = nullptr;
8363	addrInfo.accessType = IAT_VALUE;
8364
8365	if (jmpEpilog && lastNode->gtOper == GT_JMP)
8366	{
8367	methHnd = (CORINFO_METHOD_HANDLE)lastNode->gtVal.gtVal1;
8368	compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8369	}
8370
8371	#ifdef _TARGET_ARM_
8372	// We delay starting the unwind codes until we have an instruction which we know
8373	// needs an unwind code. In particular, for large stack frames in methods without
8374	// localloc, the sequence might look something like this:
8375	// movw r3, 0x38e0
8376	// add sp, r3
8377	// pop {r4,r5,r6,r10,r11,pc}
8378	// In this case, the "movw" should not be part of the unwind codes, since it will
8379	// be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
8380	// also sets the current location as the beginning offset of the epilog, so every
8381	// instruction afterwards needs an unwind code. In the case above, if you call
8382	// unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
8383
8384	bool unwindStarted = false;
8385
8386	// Tear down the stack frame
8387
8388	if (compiler->compLocallocUsed)
8389	{
8390	if (!unwindStarted)
8391	{
8392	compiler->unwindBegEpilog();
8393	unwindStarted = true;
8394	}
8395
8396	// mov R9 into SP
8397	inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
8398	compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, `0`);
8399	}
8400
8401	if (jmpEpilog \|\|
8402	genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
8403	RBM_NONE)
8404	{
8405	genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
8406	}
8407
8408	if (!unwindStarted)
8409	{
8410	// If we haven't generated anything yet, we're certainly going to generate a "pop" next.
8411	compiler->unwindBegEpilog();
8412	unwindStarted = true;
8413	}
8414
8415	if (jmpEpilog && lastNode->gtOper == GT_JMP && addrInfo.accessType == IAT_RELPVALUE)
8416	{
8417	// IAT_RELPVALUE jump at the end is done using relative indirection, so,
8418	// additional helper register is required.
8419	// We use LR just before it is going to be restored from stack, i.e.
8420	//
8421	// movw r12, laddr
8422	// movt r12, haddr
8423	// mov lr, r12
8424	// ldr r12, [r12]
8425	// add r12, r12, lr
8426	// pop {lr}
8427	// ...
8428	// bx r12
8429
8430	regNumber indCallReg = REG_R12;
8431	regNumber vptrReg1 = REG_LR;
8432
8433	instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8434	getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg);
8435	getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, `0`);
8436	getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, indCallReg, vptrReg1);
8437	}
8438
8439	genPopCalleeSavedRegisters(jmpEpilog);
8440
8441	if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
8442	{
8443	// We better not have used a pop PC to return otherwise this will be unreachable code
8444	noway_assert(!genUsedPopToReturn);
8445
8446	int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
8447	inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
8448	compiler->unwindAllocStack(preSpillRegArgSize);
8449	}
8450
8451	if (jmpEpilog)
8452	{
8453	// We better not have used a pop PC to return otherwise this will be unreachable code
8454	noway_assert(!genUsedPopToReturn);
8455	}
8456
8457	#else // _TARGET_ARM64_
8458	compiler->unwindBegEpilog();
8459
8460	genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
8461	#endif // _TARGET_ARM64_
8462
8463	if (jmpEpilog)
8464	{
8465	hasTailCalls = true;
8466
8467	noway_assert(block->bbJumpKind == BBJ_RETURN);
8468	noway_assert(block->bbTreeList != nullptr);
8469
8470	/ figure out what jump we have /
8471	GenTree* jmpNode = lastNode;
8472	#if !FEATURE_FASTTAILCALL
8473	noway_assert(jmpNode->gtOper == GT_JMP);
8474	#else // FEATURE_FASTTAILCALL
8475	// armarch
8476	// If jmpNode is GT_JMP then gtNext must be null.
8477	// If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8478	noway_assert((jmpNode->gtOper != GT_JMP) \|\| (jmpNode->gtNext == nullptr));
8479
8480	// Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8481	noway_assert((jmpNode->gtOper == GT_JMP) \|\|
8482	((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8483
8484	// The next block is associated with this "if" stmt
8485	if (jmpNode->gtOper == GT_JMP)
8486	#endif // FEATURE_FASTTAILCALL
8487	{
8488	// Simply emit a jump to the methodHnd. This is similar to a call so we can use
8489	// the same descriptor with some minor adjustments.
8490	assert(methHnd != nullptr);
8491	assert(addrInfo.addr != nullptr);
8492
8493	#ifdef _TARGET_ARMARCH_
8494	emitter::EmitCallType callType;
8495	void* addr;
8496	regNumber indCallReg;
8497	switch (addrInfo.accessType)
8498	{
8499	case IAT_VALUE:
8500	if (validImmForBL((ssize_t)addrInfo.addr))
8501	{
8502	// Simple direct call
8503	callType = emitter::EC_FUNC_TOKEN;
8504	addr = addrInfo.addr;
8505	indCallReg = REG_NA;
8506	break;
8507	}
8508
8509	// otherwise the target address doesn't fit in an immediate
8510	// so we have to burn a register...
8511	__fallthrough;
8512
8513	case IAT_PVALUE:
8514	// Load the address into a register, load indirect and call through a register
8515	// We have to use R12 since we assume the argument registers are in use
8516	callType = emitter::EC_INDIR_R;
8517	indCallReg = REG_INDIRECT_CALL_TARGET_REG;
8518	addr = NULL;
8519	instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8520	if (addrInfo.accessType == IAT_PVALUE)
8521	{
8522	getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, `0`);
8523	regSet.verifyRegUsed(indCallReg);
8524	}
8525	break;
8526
8527	case IAT_RELPVALUE:
8528	{
8529	// Load the address into a register, load relative indirect and call through a register
8530	// We have to use R12 since we assume the argument registers are in use
8531	// LR is used as helper register right before it is restored from stack, thus,
8532	// all relative address calculations are performed before LR is restored.
8533	callType = emitter::EC_INDIR_R;
8534	indCallReg = REG_R12;
8535	addr = NULL;
8536
8537	regSet.verifyRegUsed(indCallReg);
8538	break;
8539	}
8540
8541	case IAT_PPVALUE:
8542	default:
8543	NO_WAY("Unsupported JMP indirection");
8544	}
8545
8546	/ Simply emit a jump to the methodHnd. This is similar to a call so we can use*
8547	* the same descriptor with some minor adjustments.
8548	*/
8549
8550	// clang-format off
8551	getEmitter()->emitIns_Call(callType,
8552	methHnd,
8553	INDEBUG_LDISASM_COMMA(nullptr)
8554	addr,
8555	`0`, // argSize
8556	EA_UNKNOWN, // retSize
8557	#if defined(_TARGET_ARM64_)
8558	EA_UNKNOWN, // secondRetSize
8559	#endif
8560	gcInfo.gcVarPtrSetCur,
8561	gcInfo.gcRegGCrefSetCur,
8562	gcInfo.gcRegByrefSetCur,
8563	BAD_IL_OFFSET, // IL offset
8564	indCallReg, // ireg
8565	REG_NA, // xreg
8566	`0`, // xmul
8567	`0`, // disp
8568	true); // isJump
8569	// clang-format on
8570	CLANG_FORMAT_COMMENT_ANCHOR;
8571	#endif //_TARGET_ARMARCH_
8572	}
8573	#if FEATURE_FASTTAILCALL
8574	else
8575	{
8576	// Fast tail call.
8577	// Call target = REG_FASTTAILCALL_TARGET
8578	// https://github.com/dotnet/coreclr/issues/4827
8579	// Do we need a special encoding for stack walker like rex.w prefix for x64?
8580	getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
8581	}
8582	#endif // FEATURE_FASTTAILCALL
8583	}
8584	else
8585	{
8586	#ifdef _TARGET_ARM_
8587	if (!genUsedPopToReturn)
8588	{
8589	// If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
8590	// so we need a "bx lr" instruction to return from the function.
8591	inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
8592	compiler->unwindBranch16();
8593	}
8594	#else // _TARGET_ARM64_
8595	inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
8596	compiler->unwindReturn(REG_LR);
8597	#endif // _TARGET_ARM64_
8598	}
8599
8600	compiler->unwindEndEpilog();
8601	}
8602
8603	#elif defined(_TARGET_XARCH_)
8604
8605	void CodeGen::genFnEpilog(BasicBlock* block)
8606	{
8607	#ifdef DEBUG
8608	if (verbose)
8609	{
8610	printf("*************** In genFnEpilog()\n");
8611	}
8612	#endif
8613
8614	ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8615
8616	VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8617	gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8618	gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8619
8620	noway_assert(!compiler->opts.MinOpts() \|\| isFramePointerUsed()); // FPO not allowed with minOpts
8621
8622	#ifdef DEBUG
8623	genInterruptibleUsed = true;
8624	#endif
8625
8626	bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != `0`);
8627
8628	#ifdef DEBUG
8629	if (compiler->opts.dspCode)
8630	{
8631	printf("\n__epilog:\n");
8632	}
8633
8634	if (verbose)
8635	{
8636	printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8637	dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8638	printf(", gcRegGCrefSetCur=");
8639	printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8640	getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8641	printf(", gcRegByrefSetCur=");
8642	printRegMaskInt(gcInfo.gcRegByrefSetCur);
8643	getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8644	printf("\n");
8645	}
8646	#endif
8647
8648	// Restore float registers that were saved to stack before SP is modified.
8649	genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
8650
8651	#ifdef JIT32_GCENCODER
8652	// When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
8653	// the above call to `genRestoreCalleeSavedFltRegs` because that function
8654	// a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
8655	// which is the only target that uses the JIT32 GC encoder
8656	// b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
8657	// Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
8658	// callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
8659	// unwinder (and break binary compat with older versions of the runtime) by starting the epilog
8660	// after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
8661	// we will need to rethink this.
8662	getEmitter()->emitStartEpilog();
8663	#endif
8664
8665	/ Compute the size in bytes we've pushed/popped /
8666
8667	if (!doubleAlignOrFramePointerUsed())
8668	{
8669	// We have an ESP frame /*
8670
8671	noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
8672
8673	/ Get rid of our local variables /
8674
8675	if (compiler->compLclFrameSize)
8676	{
8677	#ifdef _TARGET_X86_
8678	/ Add 'compiler->compLclFrameSize' to ESP /
8679	/ Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true /
8680
8681	if ((compiler->compLclFrameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed)
8682	{
8683	inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8684	regSet.verifyRegUsed(REG_ECX);
8685	}
8686	else
8687	#endif // _TARGET_X86
8688	{
8689	/ Add 'compiler->compLclFrameSize' to ESP /
8690	/ Generate "add esp, <stack-size>" /
8691	inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8692	}
8693	}
8694
8695	genPopCalleeSavedRegisters();
8696	}
8697	else
8698	{
8699	noway_assert(doubleAlignOrFramePointerUsed());
8700
8701	/ Tear down the stack frame /
8702
8703	bool needMovEspEbp = false;
8704
8705	#if DOUBLE_ALIGN
8706	if (compiler->genDoubleAlign())
8707	{
8708	//
8709	// add esp, compLclFrameSize
8710	//
8711	// We need not do anything (except the "mov esp, ebp") if
8712	// compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
8713	// also complicates the code manager. Hence, we ignore that case.
8714
8715	noway_assert(compiler->compLclFrameSize != `0`);
8716	inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8717
8718	needMovEspEbp = true;
8719	}
8720	else
8721	#endif // DOUBLE_ALIGN
8722	{
8723	bool needLea = false;
8724
8725	if (compiler->compLocallocUsed)
8726	{
8727	// ESP may be variable if a localloc was actually executed. Reset it.
8728	// lea esp, [ebp - compiler->compCalleeRegsPushed REGSIZE_BYTES]*
8729
8730	needLea = true;
8731	}
8732	else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
8733	{
8734	if (compiler->compLclFrameSize != `0`)
8735	{
8736	#ifdef _TARGET_AMD64_
8737	// AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
8738	// do an LEA to "pop off" the frame allocation.
8739	needLea = true;
8740	#else // !_TARGET_AMD64_
8741	// We will just generate "mov esp, ebp" and be done with it.
8742	needMovEspEbp = true;
8743	#endif // !_TARGET_AMD64_
8744	}
8745	}
8746	else if (compiler->compLclFrameSize == `0`)
8747	{
8748	// do nothing before popping the callee-saved registers
8749	}
8750	#ifdef _TARGET_X86_
8751	else if (compiler->compLclFrameSize == REGSIZE_BYTES)
8752	{
8753	// "pop ecx" will make ESP point to the callee-saved registers
8754	inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8755	regSet.verifyRegUsed(REG_ECX);
8756	}
8757	#endif // _TARGET_X86
8758	else
8759	{
8760	// We need to make ESP point to the callee-saved registers
8761	needLea = true;
8762	}
8763
8764	if (needLea)
8765	{
8766	int offset;
8767
8768	#ifdef _TARGET_AMD64_
8769	// lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
8770	//
8771	// Case 1: localloc not used.
8772	// genSPToFPDelta = compiler->compCalleeRegsPushed REGSIZE_BYTES + compiler->compLclFrameSize*
8773	// offset = compiler->compCalleeRegsPushed REGSIZE_BYTES;*
8774	// The amount to be subtracted from RBP to point at callee saved int regs.
8775	//
8776	// Case 2: localloc used
8777	// genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
8778	// Offset = Amount to be added to RBP to point at callee saved int regs.
8779	offset = genSPtoFPdelta() - compiler->compLclFrameSize;
8780
8781	// Offset should fit within a byte if localloc is not used.
8782	if (!compiler->compLocallocUsed)
8783	{
8784	noway_assert(offset < UCHAR_MAX);
8785	}
8786	#else
8787	// lea esp, [ebp - compiler->compCalleeRegsPushed REGSIZE_BYTES]*
8788	offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8789	noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
8790	#endif
8791
8792	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
8793	}
8794	}
8795
8796	//
8797	// Pop the callee-saved registers (if any)
8798	//
8799
8800	genPopCalleeSavedRegisters();
8801
8802	#ifdef _TARGET_AMD64_
8803	assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
8804	#else // !_TARGET_AMD64_
8805	if (needMovEspEbp)
8806	{
8807	// mov esp, ebp
8808	inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
8809	}
8810	#endif // !_TARGET_AMD64_
8811
8812	// pop ebp
8813	inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
8814	}
8815
8816	getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
8817
8818	/ Check if this a special return block i.e.*
8819	* CEE_JMP instruction */
8820
8821	if (jmpEpilog)
8822	{
8823	noway_assert(block->bbJumpKind == BBJ_RETURN);
8824	noway_assert(block->bbTreeList);
8825
8826	// figure out what jump we have
8827	GenTree* jmpNode = block->lastNode();
8828	#if !FEATURE_FASTTAILCALL
8829	// x86
8830	noway_assert(jmpNode->gtOper == GT_JMP);
8831	#else
8832	// amd64
8833	// If jmpNode is GT_JMP then gtNext must be null.
8834	// If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8835	noway_assert((jmpNode->gtOper != GT_JMP) \|\| (jmpNode->gtNext == nullptr));
8836
8837	// Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8838	noway_assert((jmpNode->gtOper == GT_JMP) \|\|
8839	((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8840
8841	// The next block is associated with this "if" stmt
8842	if (jmpNode->gtOper == GT_JMP)
8843	#endif
8844	{
8845	// Simply emit a jump to the methodHnd. This is similar to a call so we can use
8846	// the same descriptor with some minor adjustments.
8847	CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
8848
8849	CORINFO_CONST_LOOKUP addrInfo;
8850	compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8851	if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
8852	{
8853	NO_WAY("Unsupported JMP indirection");
8854	}
8855
8856	const emitter::EmitCallType callType =
8857	(addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
8858
8859	// Simply emit a jump to the methodHnd. This is similar to a call so we can use
8860	// the same descriptor with some minor adjustments.
8861
8862	// clang-format off
8863	getEmitter()->emitIns_Call(callType,
8864	methHnd,
8865	INDEBUG_LDISASM_COMMA(nullptr)
8866	addrInfo.addr,
8867	`0`, // argSize
8868	EA_UNKNOWN // retSize
8869	MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
8870	gcInfo.gcVarPtrSetCur,
8871	gcInfo.gcRegGCrefSetCur,
8872	gcInfo.gcRegByrefSetCur,
8873	BAD_IL_OFFSET, REG_NA, REG_NA, `0`, `0`, / iloffset, ireg, xreg, xmul, disp /
8874	true / isJump /
8875	);
8876	// clang-format on
8877	}
8878	#if FEATURE_FASTTAILCALL
8879	else
8880	{
8881	#ifdef _TARGET_AMD64_
8882	// Fast tail call.
8883	// Call target = RAX.
8884	// Stack walker requires that a register indirect tail call be rex.w prefixed.
8885	getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
8886	#else
8887	assert(!"Fast tail call as epilog+jmp");
8888	unreached();
8889	#endif //_TARGET_AMD64_
8890	}
8891	#endif // FEATURE_FASTTAILCALL
8892	}
8893	else
8894	{
8895	unsigned stkArgSize = `0`; // Zero on all platforms except x86
8896
8897	#if defined(_TARGET_X86_)
8898	bool fCalleePop = true;
8899
8900	// varargs has caller pop
8901	if (compiler->info.compIsVarArgs)
8902	fCalleePop = false;
8903
8904	#ifdef UNIX_X86_ABI
8905	if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
8906	fCalleePop = false;
8907	#endif // UNIX_X86_ABI
8908
8909	if (fCalleePop)
8910	{
8911	noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
8912	stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
8913
8914	noway_assert(compiler->compArgSize < `0x10000`); // "ret" only has 2 byte operand
8915	}
8916	#endif // _TARGET_X86_
8917
8918	/ Return, popping our arguments (if any) /
8919	instGen_Return(stkArgSize);
8920	}
8921	}
8922
8923	#else // _TARGET_*
8924	#error Unsupported or unset target architecture
8925	#endif // _TARGET_*
8926
8927	#if FEATURE_EH_FUNCLETS
8928
8929	#ifdef _TARGET_ARM_
8930
8931	/*****************************************************************************
8932	*
8933	* Generates code for an EH funclet prolog.
8934	*
8935	* Funclets have the following incoming arguments:
8936	*
8937	* catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
8938	* filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
8939	* finally/fault: none
8940	*
8941	* Funclets set the following registers on exit:
8942	*
8943	* catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
8944	* filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
8945	* finally/fault: none
8946	*
8947	* The ARM funclet prolog sequence is:
8948	*
8949	* push {regs,lr} ; We push the callee-saved regs and 'lr'.
8950	* ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
8951	* ; actually use in the funclet. Currently, we save the same set of callee-saved regs
8952	* ; calculated for the entire function.
8953	* sub sp, XXX ; Establish the rest of the frame.
8954	* ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
8955	* ; up to preserve stack alignment. If we push an odd number of registers, we also
8956	* ; generate this, to keep the stack aligned.
8957	*
8958	* ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
8959	* ; filters.
8960	* ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
8961	* ; epilog.
8962	*
8963	* if (this is a filter funclet)
8964	* {
8965	* // r1 on entry to a filter funclet is CallerSP of the containing function:
8966	* // either the main function, or the funclet for a handler that this filter is dynamically nested within.
8967	* // Note that a filter can be dynamically nested within a funclet even if it is not statically within
8968	* // a funclet. Consider:
8969	* //
8970	* // try {
8971	* // try {
8972	* // throw new Exception();
8973	* // } catch(Exception) {
8974	* // throw new Exception(); // The exception thrown here ...
8975	* // }
8976	* // } filter { // ... will be processed here, while the "catch" funclet frame is
8977	* // // still on the stack
8978	* // } filter-handler {
8979	* // }
8980	* //
8981	* // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
8982	* // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
8983	* // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
8984	*
8985	* ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
8986	* ; the dynamically containing funclet or function)
8987	* str r1, [sp + PSP_slot_SP_offset] ; store the PSP
8988	* sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
8989	* }
8990	* else
8991	* {
8992	* // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
8993	* // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
8994	*
8995	* add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
8996	* str r3, [sp + PSP_slot_SP_offset] ; store the PSP
8997	* }
8998	*
8999	* The epilog sequence is then:
9000	*
9001	* add sp, XXX ; if necessary
9002	* pop {regs,pc}
9003	*
9004	* If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
9005	* Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
9006	*
9007	* The funclet frame is thus:
9008	*
9009	* \| \|
9010	* \|-----------------------\|
9011	* \| incoming \|
9012	* \| arguments \|
9013	* +=======================+ <---- Caller's SP
9014	* \|Callee saved registers \|
9015	* \|-----------------------\|
9016	* \|Pre-spill regs space \| // This is only necessary to keep the PSP slot at the same offset
9017	* \| \| // in function and funclet
9018	* \|-----------------------\|
9019	* \| PSP slot \| // Omitted in CoreRT ABI
9020	* \|-----------------------\|
9021	* ~ possible 4 byte pad ~
9022	* ~ for alignment ~
9023	* \|-----------------------\|
9024	* \| Outgoing arg space \|
9025	* \|-----------------------\| <---- Ambient SP
9026	* \| \| \|
9027	* ~ \| Stack grows ~
9028	* \| \| downward \|
9029	* V
9030	*/
9031
9032	void CodeGen::genFuncletProlog(BasicBlock* block)
9033	{
9034	#ifdef DEBUG
9035	if (verbose)
9036	printf("*************** In genFuncletProlog()\n");
9037	#endif
9038
9039	assert(block != NULL);
9040	assert(block->bbFlags & BBF_FUNCLET_BEG);
9041
9042	ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9043
9044	gcInfo.gcResetForBB();
9045
9046	compiler->unwindBegProlog();
9047
9048	regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9049	regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
9050
9051	regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
9052	maskPushRegsInt \|= maskStackAlloc;
9053
9054	assert(FitsIn<int>(maskPushRegsInt));
9055	inst_IV(INS_push, (int)maskPushRegsInt);
9056	compiler->unwindPushMaskInt(maskPushRegsInt);
9057
9058	if (maskPushRegsFloat != RBM_NONE)
9059	{
9060	genPushFltRegs(maskPushRegsFloat);
9061	compiler->unwindPushMaskFloat(maskPushRegsFloat);
9062	}
9063
9064	bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
9065
9066	regMaskTP maskArgRegsLiveIn;
9067	if (isFilter)
9068	{
9069	maskArgRegsLiveIn = RBM_R0 \| RBM_R1;
9070	}
9071	else if ((block->bbCatchTyp == BBCT_FINALLY) \|\| (block->bbCatchTyp == BBCT_FAULT))
9072	{
9073	maskArgRegsLiveIn = RBM_NONE;
9074	}
9075	else
9076	{
9077	maskArgRegsLiveIn = RBM_R0;
9078	}
9079
9080	regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
9081	bool initRegZeroed = false;
9082
9083	if (maskStackAlloc == RBM_NONE)
9084	{
9085	genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9086	}
9087
9088	// This is the end of the OS-reported prolog for purposes of unwinding
9089	compiler->unwindEndProlog();
9090
9091	if (isFilter)
9092	{
9093	// This is the first block of a filter
9094
9095	getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
9096	genFuncletInfo.fiPSP_slot_CallerSP_offset);
9097	regSet.verifyRegUsed(REG_R1);
9098	getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
9099	genFuncletInfo.fiPSP_slot_SP_offset);
9100	getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
9101	genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9102	}
9103	else
9104	{
9105	// This is a non-filter funclet
9106	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
9107	genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9108	regSet.verifyRegUsed(REG_R3);
9109	getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
9110	genFuncletInfo.fiPSP_slot_SP_offset);
9111	}
9112	}
9113
9114	/*****************************************************************************
9115	*
9116	* Generates code for an EH funclet epilog.
9117	*/
9118
9119	void CodeGen::genFuncletEpilog()
9120	{
9121	#ifdef DEBUG
9122	if (verbose)
9123	printf("*************** In genFuncletEpilog()\n");
9124	#endif
9125
9126	ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9127
9128	// Just as for the main function, we delay starting the unwind codes until we have
9129	// an instruction which we know needs an unwind code. This is to support code like
9130	// this:
9131	// movw r3, 0x38e0
9132	// add sp, r3
9133	// pop {r4,r5,r6,r10,r11,pc}
9134	// where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
9135
9136	bool unwindStarted = false;
9137
9138	/ The saved regs info saves the LR register. We need to pop the PC register to return /
9139	assert(genFuncletInfo.fiSaveRegs & RBM_LR);
9140
9141	regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9142	regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
9143
9144	regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
9145	maskPopRegsInt \|= maskStackAlloc;
9146
9147	if (maskStackAlloc == RBM_NONE)
9148	{
9149	genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
9150	}
9151
9152	if (!unwindStarted)
9153	{
9154	// We'll definitely generate an unwindable instruction next
9155	compiler->unwindBegEpilog();
9156	unwindStarted = true;
9157	}
9158
9159	maskPopRegsInt &= ~RBM_LR;
9160	maskPopRegsInt \|= RBM_PC;
9161
9162	if (maskPopRegsFloat != RBM_NONE)
9163	{
9164	genPopFltRegs(maskPopRegsFloat);
9165	compiler->unwindPopMaskFloat(maskPopRegsFloat);
9166	}
9167
9168	assert(FitsIn<int>(maskPopRegsInt));
9169	inst_IV(INS_pop, (int)maskPopRegsInt);
9170	compiler->unwindPopMaskInt(maskPopRegsInt);
9171
9172	compiler->unwindEndEpilog();
9173	}
9174
9175	/*****************************************************************************
9176	*
9177	* Capture the information used to generate the funclet prologs and epilogs.
9178	* Note that all funclet prologs are identical, and all funclet epilogs are
9179	* identical (per type: filters are identical, and non-filters are identical).
9180	* Thus, we compute the data used for these just once.
9181	*
9182	* See genFuncletProlog() for more information about the prolog/epilog sequences.
9183	*/
9184
9185	void CodeGen::genCaptureFuncletPrologEpilogInfo()
9186	{
9187	if (compiler->ehAnyFunclets())
9188	{
9189	assert(isFramePointerUsed());
9190	assert(compiler->lvaDoneFrameLayout ==
9191	Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9192
9193	// Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
9194	// of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
9195	// (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
9196	// (also assumed in genFnProlog()).
9197	assert((regSet.rsMaskCalleeSaved & (RBM_R12 \| RBM_R13)) == `0`);
9198	unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
9199	genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + `2` * REGSIZE_BYTES;
9200
9201	regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
9202	unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
9203	unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
9204	assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == `0`);
9205	unsigned funcletFrameSize =
9206	preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES / PSP slot / + compiler->lvaOutgoingArgSpaceSize;
9207
9208	unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
9209	unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
9210	unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
9211
9212	unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
9213	int PSP_slot_CallerSP_offset =
9214	-(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
9215
9216	/ Now save it for future use /
9217
9218	genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
9219	genFuncletInfo.fiSpDelta = spDelta;
9220	genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
9221	genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
9222
9223	#ifdef DEBUG
9224	if (verbose)
9225	{
9226	printf("\n");
9227	printf("Funclet prolog / epilog info\n");
9228	printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9229	printf(" Save regs: ");
9230	dspRegMask(rsMaskSaveRegs);
9231	printf("\n");
9232	printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9233	printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
9234	printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
9235
9236	if (PSP_slot_CallerSP_offset !=
9237	compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
9238	printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
9239	compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
9240	}
9241	#endif // DEBUG
9242
9243	assert(PSP_slot_CallerSP_offset < `0`);
9244	if (compiler->lvaPSPSym != BAD_VAR_NUM)
9245	{
9246	assert(PSP_slot_CallerSP_offset ==
9247	compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main
9248	// function and funclet!
9249	}
9250	}
9251	}
9252
9253	#elif defined(_TARGET_AMD64_)
9254
9255	/*****************************************************************************
9256	*
9257	* Generates code for an EH funclet prolog.
9258	*
9259	* Funclets have the following incoming arguments:
9260	*
9261	* catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
9262	* filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
9263	* finally/fault: rcx = InitialSP
9264	*
9265	* Funclets set the following registers on exit:
9266	*
9267	* catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
9268	* filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9269	* finally/fault: none
9270	*
9271	* The AMD64 funclet prolog sequence is:
9272	*
9273	* push ebp
9274	* push callee-saved regs
9275	* ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
9276	* ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
9277	* ; the entire function.
9278	* sub sp, XXX ; Establish the rest of the frame.
9279	* ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
9280	* ; up to preserve stack alignment. If we push an odd number of registers, we also
9281	* ; generate this, to keep the stack aligned.
9282	*
9283	* ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
9284	* ; filters.
9285	* ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
9286	* ; epilog.
9287	* ; Also, re-establish the frame pointer from the PSP.
9288	*
9289	* mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
9290	* ; PSP of the dynamically containing funclet or function)
9291	* mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
9292	* lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
9293	* ; Function_InitialSP_to_FP_delta==0, we don't need this
9294	* ; instruction.
9295	*
9296	* The epilog sequence is then:
9297	*
9298	* add rsp, XXX
9299	* pop callee-saved regs ; if necessary
9300	* pop rbp
9301	* ret
9302	*
9303	* The funclet frame is thus:
9304	*
9305	* \| \|
9306	* \|-----------------------\|
9307	* \| incoming \|
9308	* \| arguments \|
9309	* +=======================+ <---- Caller's SP
9310	* \| Return address \|
9311	* \|-----------------------\|
9312	* \| Saved EBP \|
9313	* \|-----------------------\|
9314	* \|Callee saved registers \|
9315	* \|-----------------------\|
9316	* ~ possible 8 byte pad ~
9317	* ~ for alignment ~
9318	* \|-----------------------\|
9319	* \| PSP slot \| // Omitted in CoreRT ABI
9320	* \|-----------------------\|
9321	* \| Outgoing arg space \| // this only exists if the function makes a call
9322	* \|-----------------------\| <---- Initial SP
9323	* \| \| \|
9324	* ~ \| Stack grows ~
9325	* \| \| downward \|
9326	* V
9327	*
9328	* TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
9329	* in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
9330	* UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
9331	* "FRAMEPTR OFFSETS" for details.
9332	*/
9333
9334	void CodeGen::genFuncletProlog(BasicBlock* block)
9335	{
9336	#ifdef DEBUG
9337	if (verbose)
9338	{
9339	printf("*************** In genFuncletProlog()\n");
9340	}
9341	#endif
9342
9343	assert(!regSet.rsRegsModified(RBM_FPBASE));
9344	assert(block != nullptr);
9345	assert(block->bbFlags & BBF_FUNCLET_BEG);
9346	assert(isFramePointerUsed());
9347
9348	ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9349
9350	gcInfo.gcResetForBB();
9351
9352	compiler->unwindBegProlog();
9353
9354	// We need to push ebp, since it's callee-saved.
9355	// We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
9356	// keep track of that on a per-funclet basis, so we push the same set as in the main function.
9357	// The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
9358	// is stored here (all temps are allocated in the parent frame).
9359	// We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
9360	// size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
9361
9362	inst_RV(INS_push, REG_FPBASE, TYP_REF);
9363	compiler->unwindPush(REG_FPBASE);
9364
9365	// Callee saved int registers are pushed to stack.
9366	genPushCalleeSavedRegisters();
9367
9368	regMaskTP maskArgRegsLiveIn;
9369	if ((block->bbCatchTyp == BBCT_FINALLY) \|\| (block->bbCatchTyp == BBCT_FAULT))
9370	{
9371	maskArgRegsLiveIn = RBM_ARG_0;
9372	}
9373	else
9374	{
9375	maskArgRegsLiveIn = RBM_ARG_0 \| RBM_ARG_2;
9376	}
9377
9378	regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
9379	bool initRegZeroed = false;
9380
9381	genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9382
9383	// Callee saved float registers are copied to stack in their assigned stack slots
9384	// after allocating space for them as part of funclet frame.
9385	genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9386
9387	// This is the end of the OS-reported prolog for purposes of unwinding
9388	compiler->unwindEndProlog();
9389
9390	// If there is no PSPSym (CoreRT ABI), we are done.
9391	if (compiler->lvaPSPSym == BAD_VAR_NUM)
9392	{
9393	return;
9394	}
9395
9396	getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9397
9398	regSet.verifyRegUsed(REG_FPBASE);
9399
9400	getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9401
9402	if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != `0`)
9403	{
9404	getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
9405	genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9406	}
9407
9408	// We've modified EBP, but not really. Say that we haven't...
9409	regSet.rsRemoveRegsModified(RBM_FPBASE);
9410	}
9411
9412	/*****************************************************************************
9413	*
9414	* Generates code for an EH funclet epilog.
9415	*
9416	* Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
9417	*/
9418
9419	void CodeGen::genFuncletEpilog()
9420	{
9421	#ifdef DEBUG
9422	if (verbose)
9423	{
9424	printf("*************** In genFuncletEpilog()\n");
9425	}
9426	#endif
9427
9428	ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9429
9430	// Restore callee saved XMM regs from their stack slots before modifying SP
9431	// to position at callee saved int regs.
9432	genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9433	inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
9434	genPopCalleeSavedRegisters();
9435	inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
9436	instGen_Return(`0`);
9437	}
9438
9439	/*****************************************************************************
9440	*
9441	* Capture the information used to generate the funclet prologs and epilogs.
9442	*/
9443
9444	void CodeGen::genCaptureFuncletPrologEpilogInfo()
9445	{
9446	if (!compiler->ehAnyFunclets())
9447	{
9448	return;
9449	}
9450
9451	// Note that compLclFrameSize can't be used (for can we call functions that depend on it),
9452	// because we're not going to allocate the same size frame as the parent.
9453
9454	assert(isFramePointerUsed());
9455	assert(compiler->lvaDoneFrameLayout ==
9456	Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9457	assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-`1`); // The float registers to be preserved is finalized
9458
9459	// Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
9460	// that's ok, because we're figuring out an offset in the parent frame.
9461	genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
9462	compiler->lvaToInitialSPRelativeOffset(`0`, true); // trick to find the Initial-SP-relative offset of the frame
9463	// pointer.
9464
9465	assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == `0`);
9466	#ifndef UNIX_AMD64_ABI
9467	// No 4 slots for outgoing params on the stack for System V systems.
9468	assert((compiler->lvaOutgoingArgSpaceSize == `0`) \|\|
9469	(compiler->lvaOutgoingArgSpaceSize >= (`4` * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
9470	// slots if there are any calls in the function.
9471	#endif // UNIX_AMD64_ABI
9472	unsigned offset = compiler->lvaOutgoingArgSpaceSize;
9473
9474	genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
9475
9476	// How much stack do we allocate in the funclet?
9477	// We need to 16-byte align the stack.
9478
9479	unsigned totalFrameSize =
9480	REGSIZE_BYTES // return address
9481	+ REGSIZE_BYTES // pushed EBP
9482	+ (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
9483
9484	// Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
9485	// Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
9486	unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
9487	unsigned FPRegsPad = (calleeFPRegsSavedSize > `0`) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : `0`;
9488
9489	unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : `0`;
9490
9491	totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
9492	+ calleeFPRegsSavedSize // pushed callee-saved float regs
9493	// below calculated 'pad' will go here
9494	+ PSPSymSize // PSPSym
9495	+ compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9496	;
9497
9498	unsigned pad = AlignmentPad(totalFrameSize, `16`);
9499
9500	genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
9501	+ calleeFPRegsSavedSize // Callee saved xmm regs
9502	+ pad + PSPSymSize // PSPSym
9503	+ compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9504	;
9505
9506	#ifdef DEBUG
9507	if (verbose)
9508	{
9509	printf("\n");
9510	printf("Funclet prolog / epilog info\n");
9511	printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9512	printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9513	printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
9514	}
9515
9516	if (compiler->lvaPSPSym != BAD_VAR_NUM)
9517	{
9518	assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
9519	compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
9520	// funclet!
9521	}
9522	#endif // DEBUG
9523	}
9524
9525	#elif defined(_TARGET_ARM64_)
9526
9527	// Look in CodeGenArm64.cpp
9528
9529	#elif defined(_TARGET_X86_)
9530
9531	/*****************************************************************************
9532	*
9533	* Generates code for an EH funclet prolog.
9534	*
9535	*
9536	* Funclets have the following incoming arguments:
9537	*
9538	* catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
9539	* filter: eax = the exception object that was caught (see GT_CATCH_ARG)
9540	* finally/fault: none
9541	*
9542	* Funclets set the following registers on exit:
9543	*
9544	* catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
9545	* filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9546	* finally/fault: none
9547	*
9548	* Funclet prolog/epilog sequence and funclet frame layout are TBD.
9549	*
9550	*/
9551
9552	void CodeGen::genFuncletProlog(BasicBlock* block)
9553	{
9554	#ifdef DEBUG
9555	if (verbose)
9556	{
9557	printf("*************** In genFuncletProlog()\n");
9558	}
9559	#endif
9560
9561	ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9562
9563	gcInfo.gcResetForBB();
9564
9565	compiler->unwindBegProlog();
9566
9567	// This is the end of the OS-reported prolog for purposes of unwinding
9568	compiler->unwindEndProlog();
9569
9570	// TODO We may need EBP restore sequence here if we introduce PSPSym
9571
9572	// Add a padding for 16-byte alignment
9573	inst_RV_IV(INS_sub, REG_SPBASE, `12`, EA_PTRSIZE);
9574	}
9575
9576	/*****************************************************************************
9577	*
9578	* Generates code for an EH funclet epilog.
9579	*/
9580
9581	void CodeGen::genFuncletEpilog()
9582	{
9583	#ifdef DEBUG
9584	if (verbose)
9585	{
9586	printf("*************** In genFuncletEpilog()\n");
9587	}
9588	#endif
9589
9590	ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9591
9592	// Revert a padding that was added for 16-byte alignment
9593	inst_RV_IV(INS_add, REG_SPBASE, `12`, EA_PTRSIZE);
9594
9595	instGen_Return(`0`);
9596	}
9597
9598	/*****************************************************************************
9599	*
9600	* Capture the information used to generate the funclet prologs and epilogs.
9601	*/
9602
9603	void CodeGen::genCaptureFuncletPrologEpilogInfo()
9604	{
9605	if (!compiler->ehAnyFunclets())
9606	{
9607	return;
9608	}
9609	}
9610
9611	#else // _TARGET_*
9612
9613	/*****************************************************************************
9614	*
9615	* Generates code for an EH funclet prolog.
9616	*/
9617
9618	void CodeGen::genFuncletProlog(BasicBlock* block)
9619	{
9620	NYI("Funclet prolog");
9621	}
9622
9623	/*****************************************************************************
9624	*
9625	* Generates code for an EH funclet epilog.
9626	*/
9627
9628	void CodeGen::genFuncletEpilog()
9629	{
9630	NYI("Funclet epilog");
9631	}
9632
9633	/*****************************************************************************
9634	*
9635	* Capture the information used to generate the funclet prologs and epilogs.
9636	*/
9637
9638	void CodeGen::genCaptureFuncletPrologEpilogInfo()
9639	{
9640	if (compiler->ehAnyFunclets())
9641	{
9642	NYI("genCaptureFuncletPrologEpilogInfo()");
9643	}
9644	}
9645
9646	#endif // _TARGET_*
9647
9648	/-----------------------------------------------------------------------------*
9649	*
9650	* Set the main function PSPSym value in the frame.
9651	* Funclets use different code to load the PSP sym and save it in their frame.
9652	* See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
9653	* The PSPSym section of that document is copied here.
9654	*
9655	***********************************
9656	* The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
9657	* accesses locals from the main function body.
9658	*
9659	* First, two definitions.
9660	*
9661	* Caller-SP is the value of the stack pointer in a function's caller before the call
9662	* instruction is executed. That is, when function A calls function B, Caller-SP for B
9663	* is the value of the stack pointer immediately before the call instruction in A
9664	* (calling B) was executed. Note that this definition holds for both AMD64, which
9665	* pushes the return value when a call instruction is executed, and for ARM, which
9666	* doesn't. For AMD64, Caller-SP is the address above the call return address.
9667	*
9668	* Initial-SP is the initial value of the stack pointer after the fixed-size portion of
9669	* the frame has been allocated. That is, before any "alloca"-type allocations.
9670	*
9671	* The PSPSym is a pointer-sized local variable in the frame of the main function and
9672	* of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
9673	* for the main function. The stack offset of the PSPSym is reported to the VM in the
9674	* GC information header. The value reported in the GC information is the offset of the
9675	* PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
9676	* value is reported to the VM, differs between architectures. In particular, note that
9677	* most things in the GC information header are reported as offsets relative to Caller-SP,
9678	* but PSPSym on AMD64 is one (maybe the only) exception.)
9679	*
9680	* The VM uses the PSPSym to find other locals it cares about (such as the generics context
9681	* in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
9682	* the frame pointer is the same value in a funclet as it is in the main function body.
9683	*
9684	* When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
9685	* true for all funclets and it is passed as the first argument in RCX, but for ARM this is
9686	* only true for first pass funclets (currently just filters) and it is passed as the second
9687	* argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
9688	* frame in the exception processing system. For the CLR, it points either to the main function
9689	* frame or a dynamically enclosing funclet frame from the same function, for the funclet being
9690	* invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
9691	*
9692	* Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
9693	* don't know if the Establisher Frame is from the main function or a funclet, we design the
9694	* main function and funclet frame layouts to place the PSPSym at an identical, small, constant
9695	* offset from the Establisher Frame in each case. (This is also required because we only report
9696	* a single offset to the PSPSym in the GC information, and that offset must be valid for the main
9697	* function and all of its funclets). Then, the funclet uses this known offset to compute the
9698	* PSPSym address and read its value. From this, it can compute the value of the frame pointer
9699	* (which is a constant offset from the PSPSym value) and set the frame register to be the same
9700	* as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
9701	* PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
9702	* for every nested funclet invocation.
9703	*
9704	* On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
9705	* restores all non-volatile registers to their values within the parent frame. This includes
9706	* the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
9707	* in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
9708	*
9709	* Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
9710	* (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
9711	* ARM this is the first argument and passed in R0.
9712	*
9713	* (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
9714	* pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
9715	* funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
9716	* is required in all funclets as well as the main function, whereas if the establisher frame was
9717	* correctly reported, the PSPSym could be omitted in some cases.)
9718	***********************************
9719	*/
9720	void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
9721	{
9722	assert(compiler->compGeneratingProlog);
9723
9724	if (compiler->lvaPSPSym == BAD_VAR_NUM)
9725	{
9726	return;
9727	}
9728
9729	noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
9730
9731	#if defined(_TARGET_ARM_)
9732
9733	// We either generate:
9734	// add r1, r11, 8
9735	// str r1, [reg + PSPSymOffset]
9736	// or:
9737	// add r1, sp, 76
9738	// str r1, [reg + PSPSymOffset]
9739	// depending on the smallest encoding
9740
9741	int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9742
9743	int callerSPOffs;
9744	regNumber regBase;
9745
9746	if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
9747	{
9748	// use the "add <reg>, sp, imm" form
9749
9750	callerSPOffs = SPtoCallerSPdelta;
9751	regBase = REG_SPBASE;
9752	}
9753	else
9754	{
9755	// use the "add <reg>, r11, imm" form
9756
9757	int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
9758	noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
9759
9760	callerSPOffs = FPtoCallerSPdelta;
9761	regBase = REG_FPBASE;
9762	}
9763
9764	// We will just use the initReg since it is an available register
9765	// and we are probably done using it anyway...
9766	regNumber regTmp = initReg;
9767	pInitRegZeroed = false*;
9768
9769	getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
9770	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, `0`);
9771
9772	#elif defined(_TARGET_ARM64_)
9773
9774	int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9775
9776	// We will just use the initReg since it is an available register
9777	// and we are probably done using it anyway...
9778	regNumber regTmp = initReg;
9779	pInitRegZeroed = false*;
9780
9781	getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
9782	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, `0`);
9783
9784	#elif defined(_TARGET_AMD64_)
9785
9786	// The PSP sym value is Initial-SP, not Caller-SP!
9787	// We assume that RSP is Initial-SP when this function is called. That is, the stack frame
9788	// has been established.
9789	//
9790	// We generate:
9791	// mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
9792
9793	getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, `0`);
9794
9795	#else // _TARGET_*
9796
9797	NYI("Set function PSP sym");
9798
9799	#endif // _TARGET_*
9800	}
9801
9802	#endif // FEATURE_EH_FUNCLETS
9803
9804	/*****************************************************************************
9805	*
9806	* Generates code for all the function and funclet prologs and epilogs.
9807	*/
9808
9809	void CodeGen::genGeneratePrologsAndEpilogs()
9810	{
9811	#ifdef DEBUG
9812	if (verbose)
9813	{
9814	printf("*************** Before prolog / epilog generation\n");
9815	getEmitter()->emitDispIGlist(false);
9816	}
9817	#endif
9818
9819	// Before generating the prolog, we need to reset the variable locations to what they will be on entry.
9820	// This affects our code that determines which untracked locals need to be zero initialized.
9821	compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
9822
9823	// Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
9824
9825	getEmitter()->emitStartPrologEpilogGeneration();
9826
9827	gcInfo.gcResetForBB();
9828	genFnProlog();
9829
9830	// Generate all the prologs and epilogs.
9831	CLANG_FORMAT_COMMENT_ANCHOR;
9832
9833	#if FEATURE_EH_FUNCLETS
9834
9835	// Capture the data we're going to use in the funclet prolog and epilog generation. This is
9836	// information computed during codegen, or during function prolog generation, like
9837	// frame offsets. It must run after main function prolog generation.
9838
9839	genCaptureFuncletPrologEpilogInfo();
9840
9841	#endif // FEATURE_EH_FUNCLETS
9842
9843	// Walk the list of prologs and epilogs and generate them.
9844	// We maintain a list of prolog and epilog basic blocks in
9845	// the insGroup structure in the emitter. This list was created
9846	// during code generation by the genReserve() functions.*
9847	//
9848	// TODO: it seems like better design would be to create a list of prologs/epilogs
9849	// in the code generator (not the emitter), and then walk that list. But we already
9850	// have the insGroup list, which serves well, so we don't need the extra allocations
9851	// for a prolog/epilog list in the code generator.
9852
9853	getEmitter()->emitGeneratePrologEpilog();
9854
9855	// Tell the emitter we're done with all prolog and epilog generation.
9856
9857	getEmitter()->emitFinishPrologEpilogGeneration();
9858
9859	#ifdef DEBUG
9860	if (verbose)
9861	{
9862	printf("*************** After prolog / epilog generation\n");
9863	getEmitter()->emitDispIGlist(false);
9864	}
9865	#endif
9866	}
9867
9868	/*
9869	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9870	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9871	XX XX
9872	XX End Prolog / Epilog XX
9873	XX XX
9874	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9875	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9876	*/
9877
9878	#if STACK_PROBES
9879	void CodeGen::genGenerateStackProbe()
9880	{
9881	noway_assert(compiler->opts.compNeedStackProbes);
9882
9883	// If this assert fires, it means somebody has changed the value
9884	// CORINFO_STACKPROBE_DEPTH.
9885	// Why does the EE need such a deep probe? It should just need a couple
9886	// of bytes, to set up a frame in the unmanaged code..
9887
9888	static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
9889
9890	JITDUMP("Emitting stack probe:\n");
9891	getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
9892	-(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
9893	}
9894	#endif // STACK_PROBES
9895
9896	#if defined(_TARGET_XARCH_)
9897	// Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9898	// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)XMM_REG_SIZE]*
9899	// Here offset = 16-byte aligned offset after pushing integer registers.
9900	//
9901	// Params
9902	// lclFrameSize - Fixed frame size excluding callee pushed int regs.
9903	// non-funclet: this will be compLclFrameSize.
9904	// funclet frames: this will be FuncletInfo.fiSpDelta.
9905	void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
9906	{
9907	genVzeroupperIfNeeded(false);
9908	regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9909
9910	// Only callee saved floating point registers should be in regMask
9911	assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9912
9913	// fast path return
9914	if (regMask == RBM_NONE)
9915	{
9916	return;
9917	}
9918
9919	#ifdef _TARGET_AMD64_
9920	unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : `0`;
9921	unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9922
9923	// Offset is 16-byte aligned since we use movaps for preserving xmm regs.
9924	assert((offset % `16`) == `0`);
9925	instruction copyIns = ins_Copy(TYP_FLOAT);
9926	#else // !_TARGET_AMD64_
9927	unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
9928	instruction copyIns = INS_movupd;
9929	#endif // !_TARGET_AMD64_
9930
9931	for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9932	{
9933	regMaskTP regBit = genRegMask(reg);
9934	if ((regBit & regMask) != `0`)
9935	{
9936	// ABI requires us to preserve lower 128-bits of YMM register.
9937	getEmitter()->emitIns_AR_R(copyIns,
9938	EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
9939	// EA_16BYTE
9940	reg, REG_SPBASE, offset);
9941	compiler->unwindSaveReg(reg, offset);
9942	regMask &= ~regBit;
9943	offset -= XMM_REGSIZE_BYTES;
9944	}
9945	}
9946	}
9947
9948	// Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9949	// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)XMM_REG_SIZE]*
9950	// Here offset = 16-byte aligned offset after pushing integer registers.
9951	//
9952	// Params
9953	// lclFrameSize - Fixed frame size excluding callee pushed int regs.
9954	// non-funclet: this will be compLclFrameSize.
9955	// funclet frames: this will be FuncletInfo.fiSpDelta.
9956	void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
9957	{
9958	regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9959
9960	// Only callee saved floating point registers should be in regMask
9961	assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9962
9963	// fast path return
9964	if (regMask == RBM_NONE)
9965	{
9966	genVzeroupperIfNeeded();
9967	return;
9968	}
9969
9970	#ifdef _TARGET_AMD64_
9971	unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : `0`;
9972	instruction copyIns = ins_Copy(TYP_FLOAT);
9973	#else // !_TARGET_AMD64_
9974	unsigned firstFPRegPadding = `0`;
9975	instruction copyIns = INS_movupd;
9976	#endif // !_TARGET_AMD64_
9977
9978	unsigned offset;
9979	regNumber regBase;
9980	if (compiler->compLocallocUsed)
9981	{
9982	// localloc frame: use frame pointer relative offset
9983	assert(isFramePointerUsed());
9984	regBase = REG_FPBASE;
9985	offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
9986	}
9987	else
9988	{
9989	regBase = REG_SPBASE;
9990	offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9991	}
9992
9993	#ifdef _TARGET_AMD64_
9994	// Offset is 16-byte aligned since we use movaps for restoring xmm regs
9995	assert((offset % `16`) == `0`);
9996	#endif // _TARGET_AMD64_
9997
9998	for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9999	{
10000	regMaskTP regBit = genRegMask(reg);
10001	if ((regBit & regMask) != `0`)
10002	{
10003	// ABI requires us to restore lower 128-bits of YMM register.
10004	getEmitter()->emitIns_R_AR(copyIns,
10005	EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
10006	// EA_16BYTE
10007	reg, regBase, offset);
10008	regMask &= ~regBit;
10009	offset -= XMM_REGSIZE_BYTES;
10010	}
10011	}
10012	genVzeroupperIfNeeded();
10013	}
10014
10015	// Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
10016	// AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
10017	// (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
10018	// 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
10019	// code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
10020	// if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
10021	//
10022	// Params
10023	// check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
10024	// instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
10025	//
10026	void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly / = true/)
10027	{
10028	bool emitVzeroUpper = false;
10029	if (check256bitOnly)
10030	{
10031	emitVzeroUpper = getEmitter()->Contains256bitAVX();
10032	}
10033	else
10034	{
10035	emitVzeroUpper = getEmitter()->ContainsAVX();
10036	}
10037
10038	if (emitVzeroUpper)
10039	{
10040	assert(compiler->canUseVexEncoding());
10041	instGen(INS_vzeroupper);
10042	}
10043	}
10044
10045	#endif // defined(_TARGET_XARCH_)
10046
10047	//-----------------------------------------------------------------------------------
10048	// IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
10049	//
10050	// Arguments:
10051	// hClass - type handle
10052	//
10053	// Return Value:
10054	// true if type is returned in multiple registers, false otherwise.
10055	//
10056	bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
10057	{
10058	if (hClass == NO_CLASS_HANDLE)
10059	{
10060	return false;
10061	}
10062
10063	structPassingKind howToReturnStruct;
10064	var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
10065
10066	return (varTypeIsStruct(returnType));
10067	}
10068
10069	//----------------------------------------------
10070	// Methods that support HFA's for ARM32/ARM64
10071	//----------------------------------------------
10072
10073	bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
10074	{
10075	#ifdef FEATURE_HFA
10076	return varTypeIsFloating(GetHfaType(hClass));
10077	#else
10078	return false;
10079	#endif
10080	}
10081
10082	bool Compiler::IsHfa(GenTree* tree)
10083	{
10084	#ifdef FEATURE_HFA
10085	return IsHfa(gtGetStructHandleIfPresent(tree));
10086	#else
10087	return false;
10088	#endif
10089	}
10090
10091	var_types Compiler::GetHfaType(GenTree* tree)
10092	{
10093	#ifdef FEATURE_HFA
10094	return GetHfaType(gtGetStructHandleIfPresent(tree));
10095	#else
10096	return TYP_UNDEF;
10097	#endif
10098	}
10099
10100	unsigned Compiler::GetHfaCount(GenTree* tree)
10101	{
10102	return GetHfaCount(gtGetStructHandleIfPresent(tree));
10103	}
10104
10105	var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
10106	{
10107	var_types result = TYP_UNDEF;
10108	if (hClass != NO_CLASS_HANDLE)
10109	{
10110	#ifdef FEATURE_HFA
10111	CorInfoType corType = info.compCompHnd->getHFAType(hClass);
10112	if (corType != CORINFO_TYPE_UNDEF)
10113	{
10114	result = JITtype2varType(corType);
10115	}
10116	#endif // FEATURE_HFA
10117	}
10118	return result;
10119	}
10120
10121	//------------------------------------------------------------------------
10122	// GetHfaCount: Given a class handle for an HFA struct
10123	// return the number of registers needed to hold the HFA
10124	//
10125	// Note that on ARM32 the single precision registers overlap with
10126	// the double precision registers and for that reason each
10127	// double register is considered to be two single registers.
10128	// Thus for ARM32 an HFA of 4 doubles this function will return 8.
10129	// On ARM64 given an HFA of 4 singles or 4 doubles this function will
10130	// will return 4 for both.
10131	// Arguments:
10132	// hClass: the class handle of a HFA struct
10133	//
10134	unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
10135	{
10136	assert(IsHfa(hClass));
10137	#ifdef _TARGET_ARM_
10138	// A HFA of doubles is twice as large as an HFA of singles for ARM32
10139	// (i.e. uses twice the number of single precison registers)
10140	return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
10141	#else // _TARGET_ARM64_
10142	var_types hfaType = GetHfaType(hClass);
10143	unsigned classSize = info.compCompHnd->getClassSize(hClass);
10144	// Note that the retail build issues a warning about a potential divsion by zero without the Max function
10145	unsigned elemSize = Max((unsigned)`1`, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
10146	return classSize / elemSize;
10147	#endif // _TARGET_ARM64_
10148	}
10149
10150	#ifdef _TARGET_XARCH_
10151
10152	//------------------------------------------------------------------------
10153	// genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
10154	// map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
10155	// X86/x64 has a special encoding for shift/rotate-by-constant-1.
10156	//
10157	// Arguments:
10158	// ins: the base shift/rotate instruction
10159	// shiftByValue: the constant value by which we are shifting/rotating
10160	//
10161	instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
10162	{
10163	assert(ins == INS_rcl \|\| ins == INS_rcr \|\| ins == INS_rol \|\| ins == INS_ror \|\| ins == INS_shl \|\| ins == INS_shr \|\|
10164	ins == INS_sar);
10165
10166	// Which format should we use?
10167
10168	instruction shiftByConstantIns;
10169
10170	if (shiftByValue == `1`)
10171	{
10172	// Use the shift-by-one format.
10173
10174	assert(INS_rcl + `1` == INS_rcl_1);
10175	assert(INS_rcr + `1` == INS_rcr_1);
10176	assert(INS_rol + `1` == INS_rol_1);
10177	assert(INS_ror + `1` == INS_ror_1);
10178	assert(INS_shl + `1` == INS_shl_1);
10179	assert(INS_shr + `1` == INS_shr_1);
10180	assert(INS_sar + `1` == INS_sar_1);
10181
10182	shiftByConstantIns = (instruction)(ins + `1`);
10183	}
10184	else
10185	{
10186	// Use the shift-by-NNN format.
10187
10188	assert(INS_rcl + `2` == INS_rcl_N);
10189	assert(INS_rcr + `2` == INS_rcr_N);
10190	assert(INS_rol + `2` == INS_rol_N);
10191	assert(INS_ror + `2` == INS_ror_N);
10192	assert(INS_shl + `2` == INS_shl_N);
10193	assert(INS_shr + `2` == INS_shr_N);
10194	assert(INS_sar + `2` == INS_sar_N);
10195
10196	shiftByConstantIns = (instruction)(ins + `2`);
10197	}
10198
10199	return shiftByConstantIns;
10200	}
10201
10202	#endif // _TARGET_XARCH_
10203
10204	//------------------------------------------------------------------------------------------------ //
10205	// getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
10206	//
10207	// Return value:
10208	// The number of the first argument with stack slot on the caller's frame.
10209	//
10210	// Note:
10211	// On x64 Windows the caller always creates slots (homing space) in its frame for the
10212	// first 4 arguments of a callee (register passed args). So, the the variable number
10213	// (lclNum) for the first argument with a stack slot is always 0.
10214	// For System V systems or armarch, there is no such calling convention requirement, and the code
10215	// needs to find the first stack passed argument from the caller. This is done by iterating over
10216	// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
10217	//
10218	unsigned CodeGen::getFirstArgWithStackSlot()
10219	{
10220	#if defined(UNIX_AMD64_ABI) \|\| defined(_TARGET_ARMARCH_)
10221	unsigned baseVarNum = `0`;
10222	// Iterate over all the lvParam variables in the Lcl var table until we find the first one
10223	// that's passed on the stack.
10224	LclVarDsc* varDsc = nullptr;
10225	for (unsigned i = `0`; i < compiler->info.compArgsCount; i++)
10226	{
10227	varDsc = &(compiler->lvaTable[i]);
10228
10229	// We should have found a stack parameter (and broken out of this loop) before
10230	// we find any non-parameters.
10231	assert(varDsc->lvIsParam);
10232
10233	if (varDsc->lvArgReg == REG_STK)
10234	{
10235	baseVarNum = i;
10236	break;
10237	}
10238	}
10239	assert(varDsc != nullptr);
10240
10241	return baseVarNum;
10242	#elif defined(_TARGET_AMD64_)
10243	return `0`;
10244	#else // _TARGET_X86
10245	// Not implemented for x86.
10246	NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
10247	return BAD_VAR_NUM;
10248	#endif // _TARGET_X86_
10249	}
10250
10251	//------------------------------------------------------------------------
10252	// genSinglePush: Report a change in stack level caused by a single word-sized push instruction
10253	//
10254	void CodeGen::genSinglePush()
10255	{
10256	AddStackLevel(REGSIZE_BYTES);
10257	}
10258
10259	//------------------------------------------------------------------------
10260	// genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
10261	//
10262	void CodeGen::genSinglePop()
10263	{
10264	SubtractStackLevel(REGSIZE_BYTES);
10265	}
10266
10267	//------------------------------------------------------------------------
10268	// genPushRegs: Push the given registers.
10269	//
10270	// Arguments:
10271	// regs - mask or registers to push
10272	// byrefRegs - OUT arg. Set to byref registers that were pushed.
10273	// noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
10274	//
10275	// Return Value:
10276	// Mask of registers pushed.
10277	//
10278	// Notes:
10279	// This function does not check if the register is marked as used, etc.
10280	//
10281	regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
10282	{
10283	*byrefRegs = RBM_NONE;
10284	*noRefRegs = RBM_NONE;
10285
10286	if (regs == RBM_NONE)
10287	{
10288	return RBM_NONE;
10289	}
10290
10291	#if FEATURE_FIXED_OUT_ARGS
10292
10293	NYI("Don't call genPushRegs with real regs!");
10294	return RBM_NONE;
10295
10296	#else // FEATURE_FIXED_OUT_ARGS
10297
10298	noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
10299	noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
10300
10301	regMaskTP pushedRegs = regs;
10302
10303	for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
10304	{
10305	regMaskTP regBit = regMaskTP(`1`) << reg;
10306
10307	if ((regBit & regs) == RBM_NONE)
10308	continue;
10309
10310	var_types type;
10311	if (regBit & gcInfo.gcRegGCrefSetCur)
10312	{
10313	type = TYP_REF;
10314	}
10315	else if (regBit & gcInfo.gcRegByrefSetCur)
10316	{
10317	*byrefRegs \|= regBit;
10318	type = TYP_BYREF;
10319	}
10320	else if (noRefRegs != NULL)
10321	{
10322	*noRefRegs \|= regBit;
10323	type = TYP_I_IMPL;
10324	}
10325	else
10326	{
10327	continue;
10328	}
10329
10330	inst_RV(INS_push, reg, type);
10331
10332	genSinglePush();
10333	gcInfo.gcMarkRegSetNpt(regBit);
10334
10335	regs &= ~regBit;
10336	}
10337
10338	return pushedRegs;
10339
10340	#endif // FEATURE_FIXED_OUT_ARGS
10341	}
10342
10343	//------------------------------------------------------------------------
10344	// genPopRegs: Pop the registers that were pushed by genPushRegs().
10345	//
10346	// Arguments:
10347	// regs - mask of registers to pop
10348	// byrefRegs - The byref registers that were pushed by genPushRegs().
10349	// noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
10350	//
10351	// Return Value:
10352	// None
10353	//
10354	void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
10355	{
10356	if (regs == RBM_NONE)
10357	{
10358	return;
10359	}
10360
10361	#if FEATURE_FIXED_OUT_ARGS
10362
10363	NYI("Don't call genPopRegs with real regs!");
10364
10365	#else // FEATURE_FIXED_OUT_ARGS
10366
10367	noway_assert((regs & byrefRegs) == byrefRegs);
10368	noway_assert((regs & noRefRegs) == noRefRegs);
10369	noway_assert((regs & (gcInfo.gcRegGCrefSetCur \| gcInfo.gcRegByrefSetCur)) == RBM_NONE);
10370
10371	noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
10372	noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
10373
10374	// Walk the registers in the reverse order as genPushRegs()
10375	for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
10376	{
10377	regMaskTP regBit = regMaskTP(`1`) << reg;
10378
10379	if ((regBit & regs) == RBM_NONE)
10380	continue;
10381
10382	var_types type;
10383	if (regBit & byrefRegs)
10384	{
10385	type = TYP_BYREF;
10386	}
10387	else if (regBit & noRefRegs)
10388	{
10389	type = TYP_INT;
10390	}
10391	else
10392	{
10393	type = TYP_REF;
10394	}
10395
10396	inst_RV(INS_pop, reg, type);
10397	genSinglePop();
10398
10399	if (type != TYP_INT)
10400	gcInfo.gcMarkRegPtrVal(reg, type);
10401
10402	regs &= ~regBit;
10403	}
10404
10405	#endif // FEATURE_FIXED_OUT_ARGS
10406	}
10407
10408	/*****************************************************************************
10409	* genSetScopeInfo
10410	*
10411	* This function should be called only after the sizes of the emitter blocks
10412	* have been finalized.
10413	*/
10414
10415	void CodeGen::genSetScopeInfo()
10416	{
10417	if (!compiler->opts.compScopeInfo)
10418	{
10419	return;
10420	}
10421
10422	#ifdef DEBUG
10423	if (verbose)
10424	{
10425	printf("*************** In genSetScopeInfo()\n");
10426	}
10427	#endif
10428
10429	if (compiler->info.compVarScopesCount == `0`)
10430	{
10431	compiler->eeSetLVcount(`0`);
10432	compiler->eeSetLVdone();
10433	return;
10434	}
10435
10436	noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > `0`));
10437	noway_assert(psiOpenScopeList.scNext == nullptr);
10438
10439	unsigned i;
10440	unsigned scopeCnt = siScopeCnt + psiScopeCnt;
10441
10442	compiler->eeSetLVcount(scopeCnt);
10443
10444	#ifdef DEBUG
10445	genTrnslLocalVarCount = scopeCnt;
10446	if (scopeCnt)
10447	{
10448	genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
10449	}
10450	#endif
10451
10452	// Record the scopes found for the parameters over the prolog.
10453	// The prolog needs to be treated differently as a variable may not
10454	// have the same info in the prolog block as is given by compiler->lvaTable.
10455	// eg. A register parameter is actually on the stack, before it is loaded to reg.
10456
10457	CodeGen::psiScope* scopeP;
10458
10459	for (i = `0`, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
10460	{
10461	noway_assert(scopeP != nullptr);
10462	noway_assert(scopeP->scStartLoc.Valid());
10463	noway_assert(scopeP->scEndLoc.Valid());
10464
10465	UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
10466	UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
10467
10468	unsigned varNum = scopeP->scSlotNum;
10469	noway_assert(startOffs <= endOffs);
10470
10471	// The range may be 0 if the prolog is empty. For such a case,
10472	// report the liveness of arguments to span at least the first
10473	// instruction in the method. This will be incorrect (except on
10474	// entry to the method) if the very first instruction of the method
10475	// is part of a loop. However, this should happen
10476	// very rarely, and the incorrectness is worth being able to look
10477	// at the argument on entry to the method.
10478	if (startOffs == endOffs)
10479	{
10480	noway_assert(startOffs == `0`);
10481	endOffs++;
10482	}
10483
10484	Compiler::siVarLoc varLoc;
10485
10486	if (scopeP->scRegister)
10487	{
10488	varLoc.vlType = Compiler::VLT_REG;
10489	varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
10490	}
10491	else
10492	{
10493	varLoc.vlType = Compiler::VLT_STK;
10494	varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
10495	varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
10496	}
10497
10498	genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
10499	}
10500
10501	// Record the scopes for the rest of the method.
10502	// Check that the LocalVarInfo scopes look OK
10503	noway_assert(siOpenScopeList.scNext == nullptr);
10504
10505	CodeGen::siScope* scopeL;
10506
10507	for (i = `0`, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
10508	{
10509	noway_assert(scopeL != nullptr);
10510	noway_assert(scopeL->scStartLoc.Valid());
10511	noway_assert(scopeL->scEndLoc.Valid());
10512
10513	// Find the start and end IP
10514
10515	UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
10516	UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
10517
10518	noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
10519
10520	// For stack vars, find the base register, and offset
10521
10522	regNumber baseReg;
10523	signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
10524
10525	if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
10526	{
10527	baseReg = REG_SPBASE;
10528	offset += scopeL->scStackLevel;
10529	}
10530	else
10531	{
10532	baseReg = REG_FPBASE;
10533	}
10534
10535	// Now fill in the varLoc
10536
10537	Compiler::siVarLoc varLoc;
10538
10539	// TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
10540	// for part of its lifetime, or in different registers for different parts of its lifetime.
10541	// This should only matter for non-debug code, where we do variable enregistration.
10542	// We should store the ranges of variable enregistration in the scope table.
10543	if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
10544	{
10545	var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
10546	switch (type)
10547	{
10548	case TYP_INT:
10549	case TYP_REF:
10550	case TYP_BYREF:
10551	#ifdef _TARGET_64BIT_
10552	case TYP_LONG:
10553	#endif // _TARGET_64BIT_
10554
10555	varLoc.vlType = Compiler::VLT_REG;
10556	varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10557	break;
10558
10559	#ifndef _TARGET_64BIT_
10560	case TYP_LONG:
10561	#if !CPU_HAS_FP_SUPPORT
10562	case TYP_DOUBLE:
10563	#endif
10564
10565	if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
10566	{
10567	varLoc.vlType = Compiler::VLT_REG_REG;
10568	varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10569	varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
10570	}
10571	else
10572	{
10573	varLoc.vlType = Compiler::VLT_REG_STK;
10574	varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10575	varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
10576	if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
10577	{
10578	varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10579	}
10580	varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
10581	}
10582	break;
10583	#endif // !_TARGET_64BIT_
10584
10585	#ifdef _TARGET_64BIT_
10586
10587	case TYP_FLOAT:
10588	case TYP_DOUBLE:
10589	// TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10590	// so no XMM registers can get debug information.
10591	varLoc.vlType = Compiler::VLT_REG_FP;
10592	varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10593	break;
10594
10595	#else // !_TARGET_64BIT_
10596
10597	#if CPU_HAS_FP_SUPPORT
10598	case TYP_FLOAT:
10599	case TYP_DOUBLE:
10600	if (isFloatRegType(type))
10601	{
10602	varLoc.vlType = Compiler::VLT_FPSTK;
10603	varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10604	}
10605	break;
10606	#endif // CPU_HAS_FP_SUPPORT
10607
10608	#endif // !_TARGET_64BIT_
10609
10610	#ifdef FEATURE_SIMD
10611	case TYP_SIMD8:
10612	case TYP_SIMD12:
10613	case TYP_SIMD16:
10614	case TYP_SIMD32:
10615	varLoc.vlType = Compiler::VLT_REG_FP;
10616
10617	// TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10618	// so no XMM registers can get debug information.
10619	//
10620	// Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
10621	// in eeDispVar() --> getRegName() that regNumber is valid.
10622	varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10623	break;
10624	#endif // FEATURE_SIMD
10625
10626	default:
10627	noway_assert(!"Invalid type");
10628	}
10629	}
10630	else
10631	{
10632	assert(offset != BAD_STK_OFFS);
10633	LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
10634	switch (genActualType(varDsc->TypeGet()))
10635	{
10636	case TYP_INT:
10637	case TYP_REF:
10638	case TYP_BYREF:
10639	case TYP_FLOAT:
10640	case TYP_STRUCT:
10641	case TYP_BLK: // Needed because of the TYP_BLK stress mode
10642	#ifdef FEATURE_SIMD
10643	case TYP_SIMD8:
10644	case TYP_SIMD12:
10645	case TYP_SIMD16:
10646	case TYP_SIMD32:
10647	#endif
10648	#ifdef _TARGET_64BIT_
10649	case TYP_LONG:
10650	case TYP_DOUBLE:
10651	#endif // _TARGET_64BIT_
10652	#if defined(_TARGET_AMD64_) \|\| defined(_TARGET_ARM64_)
10653	// In the AMD64 ABI we are supposed to pass a struct by reference when its
10654	// size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
10655	// the IR to comply with the ABI and therefore changes the type of the lclVar
10656	// that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
10657	// this is still a struct by setting the lvIsTemp flag.
10658	// The same is true for ARM64 and structs > 16 bytes.
10659	// (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
10660	// Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
10661	// to accomodate for this situation.
10662	if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
10663	{
10664	assert(varDsc->lvIsParam);
10665	varLoc.vlType = Compiler::VLT_STK_BYREF;
10666	}
10667	else
10668	#endif // defined(_TARGET_AMD64_) \|\| defined(_TARGET_ARM64_)
10669	{
10670	varLoc.vlType = Compiler::VLT_STK;
10671	}
10672	varLoc.vlStk.vlsBaseReg = baseReg;
10673	varLoc.vlStk.vlsOffset = offset;
10674	if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
10675	{
10676	varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10677	}
10678	break;
10679
10680	#ifndef _TARGET_64BIT_
10681	case TYP_LONG:
10682	case TYP_DOUBLE:
10683	varLoc.vlType = Compiler::VLT_STK2;
10684	varLoc.vlStk2.vls2BaseReg = baseReg;
10685	varLoc.vlStk2.vls2Offset = offset;
10686	if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
10687	{
10688	varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10689	}
10690	break;
10691	#endif // !_TARGET_64BIT_
10692
10693	default:
10694	noway_assert(!"Invalid type");
10695	}
10696	}
10697
10698	genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
10699	scopeL->scAvailable, varLoc);
10700	}
10701
10702	compiler->eeSetLVdone();
10703	}
10704
10705	//------------------------------------------------------------------------
10706	// genSetScopeInfo: Record scope information for debug info
10707	//
10708	// Arguments:
10709	// which
10710	// startOffs - the starting offset for this scope
10711	// length - the length of this scope
10712	// varNum - the lclVar for this scope info
10713	// LVnum
10714	// avail
10715	// varLoc
10716	//
10717	// Notes:
10718	// Called for every scope info piece to record by the main genSetScopeInfo()
10719
10720	void CodeGen::genSetScopeInfo(unsigned which,
10721	UNATIVE_OFFSET startOffs,
10722	UNATIVE_OFFSET length,
10723	unsigned varNum,
10724	unsigned LVnum,
10725	bool avail,
10726	Compiler::siVarLoc& varLoc)
10727	{
10728	// We need to do some mapping while reporting back these variables.
10729
10730	unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
10731	noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
10732
10733	#ifdef _TARGET_X86_
10734	// Non-x86 platforms are allowed to access all arguments directly
10735	// so we don't need this code.
10736
10737	// Is this a varargs function?
10738
10739	if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
10740	varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
10741	{
10742	noway_assert(varLoc.vlType == Compiler::VLT_STK \|\| varLoc.vlType == Compiler::VLT_STK2);
10743
10744	// All stack arguments (except the varargs handle) have to be
10745	// accessed via the varargs cookie. Discard generated info,
10746	// and just find its position relative to the varargs handle
10747
10748	PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
10749	if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
10750	{
10751	noway_assert(!compiler->opts.compDbgCode);
10752	return;
10753	}
10754
10755	// Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
10756	// arguments of vararg functions to avoid reporting them to GC.
10757	noway_assert(!compiler->lvaTable[varNum].lvRegister);
10758	unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
10759	unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
10760
10761	noway_assert(cookieOffset < varOffset);
10762	unsigned offset = varOffset - cookieOffset;
10763	unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
10764	noway_assert(offset < stkArgSize);
10765	offset = stkArgSize - offset;
10766
10767	varLoc.vlType = Compiler::VLT_FIXED_VA;
10768	varLoc.vlFixedVarArg.vlfvOffset = offset;
10769	}
10770
10771	#endif // _TARGET_X86_
10772
10773	VarName name = nullptr;
10774
10775	#ifdef DEBUG
10776
10777	for (unsigned scopeNum = `0`; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
10778	{
10779	if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
10780	{
10781	name = compiler->info.compVarScopes[scopeNum].vsdName;
10782	}
10783	}
10784
10785	// Hang on to this compiler->info.
10786
10787	TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
10788
10789	tlvi.tlviVarNum = ilVarNum;
10790	tlvi.tlviLVnum = LVnum;
10791	tlvi.tlviName = name;
10792	tlvi.tlviStartPC = startOffs;
10793	tlvi.tlviLength = length;
10794	tlvi.tlviAvailable = avail;
10795	tlvi.tlviVarLoc = varLoc;
10796
10797	#endif // DEBUG
10798
10799	compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
10800	}
10801
10802	/***************************************************************************/
10803	#ifdef LATE_DISASM
10804	#if defined(DEBUG)
10805	/*****************************************************************************
10806	* CompilerRegName
10807	*
10808	* Can be called only after lviSetLocalVarInfo() has been called
10809	*/
10810
10811	/ virtual /
10812	const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
10813	{
10814	if (!compiler->opts.compScopeInfo)
10815	return nullptr;
10816
10817	if (compiler->info.compVarScopesCount == `0`)
10818	return nullptr;
10819
10820	noway_assert(genTrnslLocalVarCount == `0` \|\| genTrnslLocalVarInfo);
10821
10822	for (unsigned i = `0`; i < genTrnslLocalVarCount; i++)
10823	{
10824	if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
10825	(genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10826	(genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10827	{
10828	return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10829	}
10830	}
10831
10832	return NULL;
10833	}
10834
10835	/*****************************************************************************
10836	* CompilerStkName
10837	*
10838	* Can be called only after lviSetLocalVarInfo() has been called
10839	*/
10840
10841	/ virtual /
10842	const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
10843	{
10844	if (!compiler->opts.compScopeInfo)
10845	return nullptr;
10846
10847	if (compiler->info.compVarScopesCount == `0`)
10848	return nullptr;
10849
10850	noway_assert(genTrnslLocalVarCount == `0` \|\| genTrnslLocalVarInfo);
10851
10852	for (unsigned i = `0`; i < genTrnslLocalVarCount; i++)
10853	{
10854	if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
10855	(genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10856	(genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10857	{
10858	return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10859	}
10860	}
10861
10862	return NULL;
10863	}
10864
10865	/***************************************************************************/
10866	#endif // defined(DEBUG)
10867	#endif // LATE_DISASM
10868
10869	#ifdef DEBUG
10870
10871	/*****************************************************************************
10872	* Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
10873	*/
10874
10875	void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
10876	{
10877	if (mappingNum != unsigned(-`1`))
10878	{
10879	printf("%d: ", mappingNum);
10880	}
10881
10882	IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
10883
10884	if (offsx == BAD_IL_OFFSET)
10885	{
10886	printf("???");
10887	}
10888	else
10889	{
10890	Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
10891
10892	if (jitIsStackEmpty(offsx))
10893	{
10894	printf(" STACK_EMPTY");
10895	}
10896
10897	if (jitIsCallInstruction(offsx))
10898	{
10899	printf(" CALL_INSTRUCTION");
10900	}
10901	}
10902
10903	printf(" ");
10904	ipMapping->ipmdNativeLoc.Print();
10905	// We can only call this after code generation. Is there any way to tell when it's legal to call?
10906	// printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
10907
10908	if (ipMapping->ipmdIsLabel)
10909	{
10910	printf(" label");
10911	}
10912
10913	printf("\n");
10914	}
10915
10916	void CodeGen::genIPmappingListDisp()
10917	{
10918	unsigned mappingNum = `0`;
10919	Compiler::IPmappingDsc* ipMapping;
10920
10921	for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
10922	{
10923	genIPmappingDisp(mappingNum, ipMapping);
10924	++mappingNum;
10925	}
10926	}
10927
10928	#endif // DEBUG
10929
10930	/*****************************************************************************
10931	*
10932	* Append an IPmappingDsc struct to the list that we're maintaining
10933	* for the debugger.
10934	* Record the instr offset as being at the current code gen position.
10935	*/
10936
10937	void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
10938	{
10939	if (!compiler->opts.compDbgInfo)
10940	{
10941	return;
10942	}
10943
10944	assert(offsx != BAD_IL_OFFSET);
10945
10946	switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10947	{
10948	case ICorDebugInfo::PROLOG:
10949	case ICorDebugInfo::EPILOG:
10950	break;
10951
10952	default:
10953
10954	if (offsx != ICorDebugInfo::NO_MAPPING)
10955	{
10956	noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
10957	}
10958
10959	// Ignore this one if it's the same IL offset as the last one we saw.
10960	// Note that we'll let through two identical IL offsets if the flag bits
10961	// differ, or two identical "special" mappings (e.g., PROLOG).
10962	if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
10963	{
10964	JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
10965	return;
10966	}
10967	break;
10968	}
10969
10970	/ Create a mapping entry and append it to the list /
10971
10972	Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(`1`);
10973	addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
10974	addMapping->ipmdILoffsx = offsx;
10975	addMapping->ipmdIsLabel = isLabel;
10976	addMapping->ipmdNext = nullptr;
10977
10978	if (compiler->genIPmappingList != nullptr)
10979	{
10980	assert(compiler->genIPmappingLast != nullptr);
10981	assert(compiler->genIPmappingLast->ipmdNext == nullptr);
10982	compiler->genIPmappingLast->ipmdNext = addMapping;
10983	}
10984	else
10985	{
10986	assert(compiler->genIPmappingLast == nullptr);
10987	compiler->genIPmappingList = addMapping;
10988	}
10989
10990	compiler->genIPmappingLast = addMapping;
10991
10992	#ifdef DEBUG
10993	if (verbose)
10994	{
10995	printf("Added IP mapping: ");
10996	genIPmappingDisp(unsigned(-`1`), addMapping);
10997	}
10998	#endif // DEBUG
10999	}
11000
11001	/*****************************************************************************
11002	*
11003	* Prepend an IPmappingDsc struct to the list that we're maintaining
11004	* for the debugger.
11005	* Record the instr offset as being at the current code gen position.
11006	*/
11007	void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
11008	{
11009	if (!compiler->opts.compDbgInfo)
11010	{
11011	return;
11012	}
11013
11014	assert(offsx != BAD_IL_OFFSET);
11015	assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
11016
11017	switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11018	{
11019	case ICorDebugInfo::NO_MAPPING:
11020	case ICorDebugInfo::PROLOG:
11021	case ICorDebugInfo::EPILOG:
11022	break;
11023
11024	default:
11025	noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
11026	break;
11027	}
11028
11029	/ Create a mapping entry and prepend it to the list /
11030
11031	Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(`1`);
11032	addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
11033	addMapping->ipmdILoffsx = offsx;
11034	addMapping->ipmdIsLabel = true;
11035	addMapping->ipmdNext = nullptr;
11036
11037	addMapping->ipmdNext = compiler->genIPmappingList;
11038	compiler->genIPmappingList = addMapping;
11039
11040	if (compiler->genIPmappingLast == nullptr)
11041	{
11042	compiler->genIPmappingLast = addMapping;
11043	}
11044
11045	#ifdef DEBUG
11046	if (verbose)
11047	{
11048	printf("Added IP mapping to front: ");
11049	genIPmappingDisp(unsigned(-`1`), addMapping);
11050	}
11051	#endif // DEBUG
11052	}
11053
11054	/***************************************************************************/
11055
11056	C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
11057	C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
11058	C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
11059
11060	C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
11061	C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
11062	C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
11063	C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
11064
11065	//------------------------------------------------------------------------
11066	// jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
11067	// Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
11068	// is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
11069	//
11070	// Arguments:
11071	// offsx - the IL_OFFSETX value with the IL offset to extract.
11072	//
11073	// Return Value:
11074	// The IL offset.
11075
11076	IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
11077	{
11078	assert(offsx != BAD_IL_OFFSET);
11079
11080	switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11081	{
11082	case ICorDebugInfo::NO_MAPPING:
11083	case ICorDebugInfo::PROLOG:
11084	case ICorDebugInfo::EPILOG:
11085	unreached();
11086
11087	default:
11088	return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11089	}
11090	}
11091
11092	//------------------------------------------------------------------------
11093	// jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
11094	// distinguished values. Asserts if passed BAD_IL_OFFSET.
11095	//
11096	// Arguments:
11097	// offsx - the IL_OFFSETX value with the IL offset to extract.
11098	//
11099	// Return Value:
11100	// The IL offset.
11101
11102	IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
11103	{
11104	assert(offsx != BAD_IL_OFFSET);
11105
11106	switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11107	{
11108	case ICorDebugInfo::NO_MAPPING:
11109	case ICorDebugInfo::PROLOG:
11110	case ICorDebugInfo::EPILOG:
11111	return IL_OFFSET(offsx);
11112
11113	default:
11114	return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11115	}
11116	}
11117
11118	//------------------------------------------------------------------------
11119	// jitIsStackEmpty: Does the IL offset have the stack empty bit set?
11120	// Asserts if passed BAD_IL_OFFSET.
11121	//
11122	// Arguments:
11123	// offsx - the IL_OFFSETX value to check
11124	//
11125	// Return Value:
11126	// 'true' if the stack empty bit is set; 'false' otherwise.
11127
11128	bool jitIsStackEmpty(IL_OFFSETX offsx)
11129	{
11130	assert(offsx != BAD_IL_OFFSET);
11131
11132	switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11133	{
11134	case ICorDebugInfo::NO_MAPPING:
11135	case ICorDebugInfo::PROLOG:
11136	case ICorDebugInfo::EPILOG:
11137	return true;
11138
11139	default:
11140	return (offsx & IL_OFFSETX_STKBIT) == `0`;
11141	}
11142	}
11143
11144	//------------------------------------------------------------------------
11145	// jitIsCallInstruction: Does the IL offset have the call instruction bit set?
11146	// Asserts if passed BAD_IL_OFFSET.
11147	//
11148	// Arguments:
11149	// offsx - the IL_OFFSETX value to check
11150	//
11151	// Return Value:
11152	// 'true' if the call instruction bit is set; 'false' otherwise.
11153
11154	bool jitIsCallInstruction(IL_OFFSETX offsx)
11155	{
11156	assert(offsx != BAD_IL_OFFSET);
11157
11158	switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11159	{
11160	case ICorDebugInfo::NO_MAPPING:
11161	case ICorDebugInfo::PROLOG:
11162	case ICorDebugInfo::EPILOG:
11163	return false;
11164
11165	default:
11166	return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != `0`;
11167	}
11168	}
11169
11170	/***************************************************************************/
11171
11172	void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
11173	{
11174	if (!compiler->opts.compDbgCode)
11175	{
11176	return;
11177	}
11178
11179	if (offsx == BAD_IL_OFFSET)
11180	{
11181	return;
11182	}
11183
11184	/ If other IL were offsets reported, skip /
11185
11186	if (compiler->genIPmappingLast == nullptr)
11187	{
11188	return;
11189	}
11190
11191	if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
11192	{
11193	return;
11194	}
11195
11196	/ offsx was the last reported offset. Make sure that we generated native code /
11197
11198	if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
11199	{
11200	instGen(INS_nop);
11201	}
11202	}
11203
11204	/*****************************************************************************
11205	*
11206	* Shut down the IP-mapping logic, report the info to the EE.
11207	*/
11208
11209	void CodeGen::genIPmappingGen()
11210	{
11211	if (!compiler->opts.compDbgInfo)
11212	{
11213	return;
11214	}
11215
11216	#ifdef DEBUG
11217	if (verbose)
11218	{
11219	printf("*************** In genIPmappingGen()\n");
11220	}
11221	#endif
11222
11223	if (compiler->genIPmappingList == nullptr)
11224	{
11225	compiler->eeSetLIcount(`0`);
11226	compiler->eeSetLIdone();
11227	return;
11228	}
11229
11230	Compiler::IPmappingDsc* tmpMapping;
11231	Compiler::IPmappingDsc* prevMapping;
11232	unsigned mappingCnt;
11233	UNATIVE_OFFSET lastNativeOfs;
11234
11235	/ First count the number of distinct mapping records /
11236
11237	mappingCnt = `0`;
11238	lastNativeOfs = UNATIVE_OFFSET(~`0`);
11239
11240	for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
11241	tmpMapping = tmpMapping->ipmdNext)
11242	{
11243	IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11244
11245	// Managed RetVal - since new sequence points are emitted to identify IL calls,
11246	// make sure that those are not filtered and do not interfere with filtering of
11247	// other sequence points.
11248	if (jitIsCallInstruction(srcIP))
11249	{
11250	mappingCnt++;
11251	continue;
11252	}
11253
11254	UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11255
11256	if (nextNativeOfs != lastNativeOfs)
11257	{
11258	mappingCnt++;
11259	lastNativeOfs = nextNativeOfs;
11260	prevMapping = tmpMapping;
11261	continue;
11262	}
11263
11264	/ If there are mappings with the same native offset, then:*
11265	o If one of them is NO_MAPPING, ignore it
11266	o If one of them is a label, report that and ignore the other one
11267	o Else report the higher IL offset
11268	*/
11269
11270	PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
11271	if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11272	{
11273	// If the previous entry was NO_MAPPING, ignore it
11274	prevMapping->ipmdNativeLoc.Init();
11275	prevMapping = tmpMapping;
11276	}
11277	else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11278	{
11279	// If the current entry is NO_MAPPING, ignore it
11280	// Leave prevMapping unchanged as tmpMapping is no longer valid
11281	tmpMapping->ipmdNativeLoc.Init();
11282	}
11283	else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG \|\| srcIP == `0`)
11284	{
11285	// counting for special cases: see below
11286	mappingCnt++;
11287	prevMapping = tmpMapping;
11288	}
11289	else
11290	{
11291	noway_assert(prevMapping != nullptr);
11292	noway_assert(!prevMapping->ipmdNativeLoc.Valid() \|\|
11293	lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
11294
11295	/ The previous block had the same native offset. We have to*
11296	discard one of the mappings. Simply reinitialize ipmdNativeLoc
11297	and prevMapping will be ignored later. /*
11298
11299	if (prevMapping->ipmdIsLabel)
11300	{
11301	// Leave prevMapping unchanged as tmpMapping is no longer valid
11302	tmpMapping->ipmdNativeLoc.Init();
11303	}
11304	else
11305	{
11306	prevMapping->ipmdNativeLoc.Init();
11307	prevMapping = tmpMapping;
11308	}
11309	}
11310	}
11311
11312	/ Tell them how many mapping records we've got /
11313
11314	compiler->eeSetLIcount(mappingCnt);
11315
11316	/ Now tell them about the mappings /
11317
11318	mappingCnt = `0`;
11319	lastNativeOfs = UNATIVE_OFFSET(~`0`);
11320
11321	for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
11322	{
11323	// Do we have to skip this record ?
11324	if (!tmpMapping->ipmdNativeLoc.Valid())
11325	{
11326	continue;
11327	}
11328
11329	UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11330	IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11331
11332	if (jitIsCallInstruction(srcIP))
11333	{
11334	compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
11335	}
11336	else if (nextNativeOfs != lastNativeOfs)
11337	{
11338	compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11339	lastNativeOfs = nextNativeOfs;
11340	}
11341	else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG \|\| srcIP == `0`)
11342	{
11343	// For the special case of an IL instruction with no body
11344	// followed by the epilog (say ret void immediately preceding
11345	// the method end), we put two entries in, so that we'll stop
11346	// at the (empty) ret statement if the user tries to put a
11347	// breakpoint there, and then have the option of seeing the
11348	// epilog or not based on SetUnmappedStopMask for the stepper.
11349	compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11350	}
11351	}
11352
11353	#if 0
11354	// TODO-Review:
11355	//This check is disabled. It is always true that any time this check asserts, the debugger would have a
11356	//problem with IL source level debugging. However, for a C# file, it only matters if things are on
11357	//different source lines. As a result, we have all sorts of latent problems with how we emit debug
11358	//info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
11359	//assert back on.
11360	if (compiler->opts.compDbgCode)
11361	{
11362	//Assert that the first instruction of every basic block with more than one incoming edge has a
11363	//different sequence point from each incoming block.
11364	//
11365	//It turns out that the only thing we really have to assert is that the first statement in each basic
11366	//block has an IL offset and appears in eeBoundaries.
11367	for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
11368	{
11369	if ((block->bbRefs > `1`) && (block->bbTreeList != nullptr))
11370	{
11371	noway_assert(block->bbTreeList->gtOper == GT_STMT);
11372	bool found = false;
11373	if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
11374	{
11375	IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
11376	for (unsigned i = `0`; i < eeBoundariesCount; ++i)
11377	{
11378	if (eeBoundaries[i].ilOffset == ilOffs)
11379	{
11380	found = true;
11381	break;
11382	}
11383	}
11384	}
11385	noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
11386	}
11387	}
11388	}
11389	#endif // 0
11390
11391	compiler->eeSetLIdone();
11392	}
11393
11394	/============================================================================*
11395	*
11396	* These are empty stubs to help the late dis-assembler to compile
11397	* if the late disassembler is being built into a non-DEBUG build.
11398	*
11399	*============================================================================
11400	*/
11401
11402	#if defined(LATE_DISASM)
11403	#if !defined(DEBUG)
11404
11405	/ virtual /
11406	const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
11407	{
11408	return NULL;
11409	}
11410
11411	/ virtual /
11412	const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
11413	{
11414	return NULL;
11415	}
11416
11417	/***************************************************************************/
11418	#endif // !defined(DEBUG)
11419	#endif // defined(LATE_DISASM)
11420	/***************************************************************************/
11421
11422	//------------------------------------------------------------------------
11423	// indirForm: Make a temporary indir we can feed to pattern matching routines
11424	// in cases where we don't want to instantiate all the indirs that happen.
11425	//
11426	GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
11427	{
11428	GenTreeIndir i(GT_IND, type, base, nullptr);
11429	i.gtRegNum = REG_NA;
11430	i.SetContained();
11431	return i;
11432	}
11433
11434	//------------------------------------------------------------------------
11435	// intForm: Make a temporary int we can feed to pattern matching routines
11436	// in cases where we don't want to instantiate.
11437	//
11438	GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
11439	{
11440	GenTreeIntCon i(type, value);
11441	i.gtRegNum = REG_NA;
11442	return i;
11443	}
11444
11445	#if defined(_TARGET_X86_) \|\| defined(_TARGET_ARM_)
11446	//------------------------------------------------------------------------
11447	// genLongReturn: Generates code for long return statement for x86 and arm.
11448	//
11449	// Note: treeNode's and op1's registers are already consumed.
11450	//
11451	// Arguments:
11452	// treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
11453	//
11454	// Return Value:
11455	// None
11456	//
11457	void CodeGen::genLongReturn(GenTree* treeNode)
11458	{
11459	assert(treeNode->OperGet() == GT_RETURN \|\| treeNode->OperGet() == GT_RETFILT);
11460	assert(treeNode->TypeGet() == TYP_LONG);
11461	GenTree* op1 = treeNode->gtGetOp1();
11462	var_types targetType = treeNode->TypeGet();
11463
11464	assert(op1 != nullptr);
11465	assert(op1->OperGet() == GT_LONG);
11466	GenTree* loRetVal = op1->gtGetOp1();
11467	GenTree* hiRetVal = op1->gtGetOp2();
11468	assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
11469
11470	genConsumeReg(loRetVal);
11471	genConsumeReg(hiRetVal);
11472	if (loRetVal->gtRegNum != REG_LNGRET_LO)
11473	{
11474	inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
11475	}
11476	if (hiRetVal->gtRegNum != REG_LNGRET_HI)
11477	{
11478	inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
11479	}
11480	}
11481	#endif // _TARGET_X86_ \|\| _TARGET_ARM_
11482
11483	//------------------------------------------------------------------------
11484	// genReturn: Generates code for return statement.
11485	// In case of struct return, delegates to the genStructReturn method.
11486	//
11487	// Arguments:
11488	// treeNode - The GT_RETURN or GT_RETFILT tree node.
11489	//
11490	// Return Value:
11491	// None
11492	//
11493	void CodeGen::genReturn(GenTree* treeNode)
11494	{
11495	assert(treeNode->OperGet() == GT_RETURN \|\| treeNode->OperGet() == GT_RETFILT);
11496	GenTree* op1 = treeNode->gtGetOp1();
11497	var_types targetType = treeNode->TypeGet();
11498
11499	// A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
11500	// register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
11501	// result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
11502	assert(!treeNode->OperIs(GT_RETFILT) \|\| (targetType == TYP_VOID) \|\| (targetType == TYP_INT));
11503
11504	#ifdef DEBUG
11505	if (targetType == TYP_VOID)
11506	{
11507	assert(op1 == nullptr);
11508	}
11509	#endif // DEBUG
11510
11511	#if defined(_TARGET_X86_) \|\| defined(_TARGET_ARM_)
11512	if (targetType == TYP_LONG)
11513	{
11514	genLongReturn(treeNode);
11515	}
11516	else
11517	#endif // _TARGET_X86_ \|\| _TARGET_ARM_
11518	{
11519	if (isStructReturn(treeNode))
11520	{
11521	genStructReturn(treeNode);
11522	}
11523	else if (targetType != TYP_VOID)
11524	{
11525	assert(op1 != nullptr);
11526	noway_assert(op1->gtRegNum != REG_NA);
11527
11528	// !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
11529	// consumed a reg for the operand. This is because the variable
11530	// is dead after return. But we are issuing more instructions
11531	// like "profiler leave callback" after this consumption. So
11532	// if you are issuing more instructions after this point,
11533	// remember to keep the variable live up until the new method
11534	// exit point where it is actually dead.
11535	genConsumeReg(op1);
11536
11537	#if defined(_TARGET_ARM64_)
11538	genSimpleReturn(treeNode);
11539	#else // !_TARGET_ARM64_
11540	#if defined(_TARGET_X86_)
11541	if (varTypeIsFloating(treeNode))
11542	{
11543	genFloatReturn(treeNode);
11544	}
11545	else
11546	#elif defined(_TARGET_ARM_)
11547	if (varTypeIsFloating(treeNode) && (compiler->opts.compUseSoftFP \|\| compiler->info.compIsVarArgs))
11548	{
11549	if (targetType == TYP_FLOAT)
11550	{
11551	getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
11552	}
11553	else
11554	{
11555	assert(targetType == TYP_DOUBLE);
11556	getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
11557	op1->gtRegNum);
11558	}
11559	}
11560	else
11561	#endif // _TARGET_ARM_
11562	{
11563	regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
11564	if (op1->gtRegNum != retReg)
11565	{
11566	inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
11567	}
11568	}
11569	#endif // !_TARGET_ARM64_
11570	}
11571	}
11572
11573	#ifdef PROFILING_SUPPORTED
11574	// !! Note !!
11575	// TODO-AMD64-Unix: If the profiler hook is implemented on nix, make sure for 2 register returned structs*
11576	// the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
11577	// in the handling of the GT_RETURN statement.
11578	// Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
11579	// for the return registers containing GC refs.
11580
11581	// There will be a single return block while generating profiler ELT callbacks.
11582	//
11583	// Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
11584	// In flowgraph and other places assert that the last node of a block marked as
11585	// BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
11586	// maintain such an invariant irrespective of whether profiler hook needed or not.
11587	// Also, there is not much to be gained by materializing it as an explicit node.
11588	if (compiler->compCurBB == compiler->genReturnBB)
11589	{
11590	// !! NOTE !!
11591	// Since we are invalidating the assumption that we would slip into the epilog
11592	// right after the "return", we need to preserve the return reg's GC state
11593	// across the call until actual method return.
11594	ReturnTypeDesc retTypeDesc;
11595	unsigned regCount = `0`;
11596	if (compiler->compMethodReturnsMultiRegRetType())
11597	{
11598	if (varTypeIsLong(compiler->info.compRetNativeType))
11599	{
11600	retTypeDesc.InitializeLongReturnType(compiler);
11601	}
11602	else // we must have a struct return type
11603	{
11604	retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
11605	}
11606	regCount = retTypeDesc.GetReturnRegCount();
11607	}
11608
11609	if (varTypeIsGC(compiler->info.compRetType))
11610	{
11611	gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
11612	}
11613	else if (compiler->compMethodReturnsMultiRegRetType())
11614	{
11615	for (unsigned i = `0`; i < regCount; ++i)
11616	{
11617	if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11618	{
11619	gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
11620	}
11621	}
11622	}
11623
11624	genProfilingLeaveCallback();
11625
11626	if (varTypeIsGC(compiler->info.compRetType))
11627	{
11628	gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
11629	}
11630	else if (compiler->compMethodReturnsMultiRegRetType())
11631	{
11632	for (unsigned i = `0`; i < regCount; ++i)
11633	{
11634	if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11635	{
11636	gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
11637	}
11638	}
11639	}
11640	}
11641	#endif // PROFILING_SUPPORTED
11642
11643	#if defined(DEBUG) && defined(_TARGET_XARCH_)
11644	bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
11645
11646	#if FEATURE_EH_FUNCLETS
11647	// Don't do stack pointer check at the return from a funclet; only for the main function.
11648	if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
11649	{
11650	doStackPointerCheck = false;
11651	}
11652	#else // !FEATURE_EH_FUNCLETS
11653	// Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
11654	// with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
11655	if ((compiler->compCurBB->bbJumpKind == BBJ_EHFINALLYRET) \|\| (compiler->compCurBB->bbJumpKind == BBJ_EHFILTERRET))
11656	{
11657	doStackPointerCheck = false;
11658	}
11659	#endif // !FEATURE_EH_FUNCLETS
11660
11661	genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
11662	#endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11663	}
11664
11665	#if defined(DEBUG) && defined(_TARGET_XARCH_)
11666
11667	//------------------------------------------------------------------------
11668	// genStackPointerCheck: Generate code to check the stack pointer against a saved value.
11669	// This is a debug check.
11670	//
11671	// Arguments:
11672	// doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing.
11673	// lvaStackPointerVar - The local variable number that holds the value of the stack pointer
11674	// we are comparing against.
11675	//
11676	// Return Value:
11677	// None
11678	//
11679	void CodeGen::genStackPointerCheck(bool doStackPointerCheck, unsigned lvaStackPointerVar)
11680	{
11681	if (doStackPointerCheck)
11682	{
11683	noway_assert(lvaStackPointerVar != `0xCCCCCCCC` && compiler->lvaTable[lvaStackPointerVar].lvDoNotEnregister &&
11684	compiler->lvaTable[lvaStackPointerVar].lvOnFrame);
11685	getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, `0`);
11686
11687	BasicBlock* sp_check = genCreateTempLabel();
11688	getEmitter()->emitIns_J(INS_je, sp_check);
11689	instGen(INS_BREAKPOINT);
11690	genDefineTempLabel(sp_check);
11691	}
11692	}
11693
11694	#endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11695

Browse the source code of CoreCLR/jit/codegencommon.cpp