1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX Code Generator Common: XX
9XX Methods common to all architectures and register allocation strategies XX
10XX XX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13*/
14
15// TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
16// identical, and which should probably be moved here.
17
18#include "jitpch.h"
19#ifdef _MSC_VER
20#pragma hdrstop
21#endif
22#include "codegen.h"
23
24#include "gcinfo.h"
25#include "emit.h"
26
27#ifndef JIT32_GCENCODER
28#include "gcinfoencoder.h"
29#endif
30
31/*****************************************************************************/
32
33const BYTE genTypeSizes[] = {
34#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
35#include "typelist.h"
36#undef DEF_TP
37};
38
39const BYTE genTypeAlignments[] = {
40#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
41#include "typelist.h"
42#undef DEF_TP
43};
44
45const BYTE genTypeStSzs[] = {
46#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
47#include "typelist.h"
48#undef DEF_TP
49};
50
51const BYTE genActualTypes[] = {
52#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
53#include "typelist.h"
54#undef DEF_TP
55};
56
57void CodeGenInterface::setFramePointerRequiredEH(bool value)
58{
59 m_cgFramePointerRequired = value;
60
61#ifndef JIT32_GCENCODER
62 if (value)
63 {
64 // EnumGcRefs will only enumerate slots in aborted frames
65 // if they are fully-interruptible. So if we have a catch
66 // or finally that will keep frame-vars alive, we need to
67 // force fully-interruptible.
68 CLANG_FORMAT_COMMENT_ANCHOR;
69
70#ifdef DEBUG
71 if (verbose)
72 {
73 printf("Method has EH, marking method as fully interruptible\n");
74 }
75#endif
76
77 m_cgInterruptible = true;
78 }
79#endif // JIT32_GCENCODER
80}
81
82/*****************************************************************************/
83CodeGenInterface* getCodeGenerator(Compiler* comp)
84{
85 return new (comp, CMK_Codegen) CodeGen(comp);
86}
87
88// CodeGen constructor
89CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
90 : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr)
91{
92}
93
94/*****************************************************************************/
95
96CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
97{
98#if defined(_TARGET_XARCH_)
99 negBitmaskFlt = nullptr;
100 negBitmaskDbl = nullptr;
101 absBitmaskFlt = nullptr;
102 absBitmaskDbl = nullptr;
103 u8ToDblBitmask = nullptr;
104#endif // defined(_TARGET_XARCH_)
105
106#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
107 m_stkArgVarNum = BAD_VAR_NUM;
108#endif
109
110#if defined(UNIX_X86_ABI)
111 curNestedAlignment = 0;
112 maxNestedAlignment = 0;
113#endif
114
115 gcInfo.regSet = &regSet;
116 m_cgEmitter = new (compiler->getAllocator()) emitter();
117 m_cgEmitter->codeGen = this;
118 m_cgEmitter->gcInfo = &gcInfo;
119
120#ifdef DEBUG
121 setVerbose(compiler->verbose);
122#endif // DEBUG
123
124 regSet.tmpInit();
125
126 instInit();
127
128#ifdef LATE_DISASM
129 getDisAssembler().disInit(compiler);
130#endif
131
132#ifdef DEBUG
133 genTempLiveChg = true;
134 genTrnslLocalVarCount = 0;
135
136 // Shouldn't be used before it is set in genFnProlog()
137 compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler);
138
139#if defined(_TARGET_XARCH_)
140 // Shouldn't be used before it is set in genFnProlog()
141 compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
142#endif // defined(_TARGET_XARCH_)
143#endif // DEBUG
144
145#ifdef _TARGET_AMD64_
146 // This will be set before final frame layout.
147 compiler->compVSQuirkStackPaddingNeeded = 0;
148
149 // Set to true if we perform the Quirk that fixes the PPP issue
150 compiler->compQuirkForPPPflag = false;
151#endif // _TARGET_AMD64_
152
153 // Initialize the IP-mapping logic.
154 compiler->genIPmappingList = nullptr;
155 compiler->genIPmappingLast = nullptr;
156 compiler->genCallSite2ILOffsetMap = nullptr;
157
158 /* Assume that we not fully interruptible */
159
160 genInterruptible = false;
161#ifdef _TARGET_ARMARCH_
162 hasTailCalls = false;
163#endif // _TARGET_ARMARCH_
164#ifdef DEBUG
165 genInterruptibleUsed = false;
166 genCurDispOffset = (unsigned)-1;
167#endif
168}
169
170void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
171{
172 tree->gtRegNum = reg;
173}
174
175#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
176
177//---------------------------------------------------------------------
178// genTotalFrameSize - return the "total" size of the stack frame, including local size
179// and callee-saved register size. There are a few things "missing" depending on the
180// platform. The function genCallerSPtoInitialSPdelta() includes those things.
181//
182// For ARM, this doesn't include the prespilled registers.
183//
184// For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
185// It also doesn't include the pushed return address.
186//
187// Return value:
188// Frame size
189
190int CodeGenInterface::genTotalFrameSize()
191{
192 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
193
194 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
195
196 assert(totalFrameSize >= 0);
197 return totalFrameSize;
198}
199
200//---------------------------------------------------------------------
201// genSPtoFPdelta - return the offset from SP to the frame pointer.
202// This number is going to be positive, since SP must be at the lowest
203// address.
204//
205// There must be a frame pointer to call this function!
206
207int CodeGenInterface::genSPtoFPdelta()
208{
209 assert(isFramePointerUsed());
210
211 int delta;
212
213 delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
214
215 assert(delta >= 0);
216 return delta;
217}
218
219//---------------------------------------------------------------------
220// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
221// This number is going to be negative, since the Caller-SP is at a higher
222// address than the frame pointer.
223//
224// There must be a frame pointer to call this function!
225
226int CodeGenInterface::genCallerSPtoFPdelta()
227{
228 assert(isFramePointerUsed());
229 int callerSPtoFPdelta = 0;
230
231#if defined(_TARGET_ARM_)
232 // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
233 callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
234 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
235#elif defined(_TARGET_X86_)
236 // Thanks to ebp chaining, the difference between ebp-based addresses
237 // and caller-SP-relative addresses is just the 2 pointers:
238 // return address
239 // pushed ebp
240 callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
241#else
242#error "Unknown _TARGET_"
243#endif // _TARGET_*
244
245 assert(callerSPtoFPdelta <= 0);
246 return callerSPtoFPdelta;
247}
248
249//---------------------------------------------------------------------
250// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
251//
252// This number will be negative.
253
254int CodeGenInterface::genCallerSPtoInitialSPdelta()
255{
256 int callerSPtoSPdelta = 0;
257
258#if defined(_TARGET_ARM_)
259 callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
260 callerSPtoSPdelta -= genTotalFrameSize();
261#elif defined(_TARGET_X86_)
262 callerSPtoSPdelta -= genTotalFrameSize();
263 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
264
265 // compCalleeRegsPushed does not account for the frame pointer
266 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
267 if (isFramePointerUsed())
268 {
269 callerSPtoSPdelta -= REGSIZE_BYTES;
270 }
271#else
272#error "Unknown _TARGET_"
273#endif // _TARGET_*
274
275 assert(callerSPtoSPdelta <= 0);
276 return callerSPtoSPdelta;
277}
278
279#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
280
281/*****************************************************************************
282 * Should we round simple operations (assignments, arithmetic operations, etc.)
283 */
284
285// inline
286// static
287bool CodeGen::genShouldRoundFP()
288{
289 RoundLevel roundLevel = getRoundFloatLevel();
290
291 switch (roundLevel)
292 {
293 case ROUND_NEVER:
294 case ROUND_CMP_CONST:
295 case ROUND_CMP:
296 return false;
297
298 default:
299 assert(roundLevel == ROUND_ALWAYS);
300 return true;
301 }
302}
303
304/*****************************************************************************
305 *
306 * Initialize some global variables.
307 */
308
309void CodeGen::genPrepForCompiler()
310{
311 treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler);
312
313 /* Figure out which non-register variables hold pointers */
314
315 VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
316
317 // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
318 // in a register (i.e. they live on the stack for all or part of their lifetime).
319 // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
320
321 unsigned varNum;
322 LclVarDsc* varDsc;
323 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
324 {
325 if (varDsc->lvTracked || varDsc->lvIsRegCandidate())
326 {
327 if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc))
328 {
329 VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
330 }
331 }
332 }
333 VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
334 genLastLiveMask = RBM_NONE;
335#ifdef DEBUG
336 compiler->fgBBcountAtCodegen = compiler->fgBBcount;
337#endif
338}
339
340/*****************************************************************************
341 * To report exception handling information to the VM, we need the size of the exception
342 * handling regions. To compute that, we need to emit labels for the beginning block of
343 * an EH region, and the block that immediately follows a region. Go through the EH
344 * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
345 *
346 * The beginning blocks of the EH regions already should have this flag set.
347 *
348 * No blocks should be added or removed after this.
349 *
350 * This code is closely couple with genReportEH() in the sense that any block
351 * that this procedure has determined it needs to have a label has to be selected
352 * using the same logic both here and in genReportEH(), so basically any time there is
353 * a change in the way we handle EH reporting, we have to keep the logic of these two
354 * methods 'in sync'.
355 */
356
357void CodeGen::genPrepForEHCodegen()
358{
359 assert(!compiler->fgSafeBasicBlockCreation);
360
361 EHblkDsc* HBtab;
362 EHblkDsc* HBtabEnd;
363
364 bool anyFinallys = false;
365
366 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
367 HBtab < HBtabEnd; HBtab++)
368 {
369 assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
370 assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
371
372 if (HBtab->ebdTryLast->bbNext != nullptr)
373 {
374 HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
375 }
376
377 if (HBtab->ebdHndLast->bbNext != nullptr)
378 {
379 HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
380 }
381
382 if (HBtab->HasFilter())
383 {
384 assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
385 // The block after the last block of the filter is
386 // the handler begin block, which we already asserted
387 // has BBF_HAS_LABEL set.
388 }
389
390#if FEATURE_EH_CALLFINALLY_THUNKS
391 if (HBtab->HasFinallyHandler())
392 {
393 anyFinallys = true;
394 }
395#endif // FEATURE_EH_CALLFINALLY_THUNKS
396 }
397
398#if FEATURE_EH_CALLFINALLY_THUNKS
399 if (anyFinallys)
400 {
401 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
402 {
403 if (block->bbJumpKind == BBJ_CALLFINALLY)
404 {
405 BasicBlock* bbToLabel = block->bbNext;
406 if (block->isBBCallAlwaysPair())
407 {
408 bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
409 }
410 if (bbToLabel != nullptr)
411 {
412 bbToLabel->bbFlags |= BBF_HAS_LABEL;
413 }
414 } // block is BBJ_CALLFINALLY
415 } // for each block
416 } // if (anyFinallys)
417#endif // FEATURE_EH_CALLFINALLY_THUNKS
418}
419
420void CodeGenInterface::genUpdateLife(GenTree* tree)
421{
422 treeLifeUpdater->UpdateLife(tree);
423}
424
425void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
426{
427 compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
428}
429
430// Return the register mask for the given register variable
431// inline
432regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
433{
434 regMaskTP regMask = RBM_NONE;
435
436 assert(varDsc->lvIsInReg());
437
438 if (varTypeIsFloating(varDsc->TypeGet()))
439 {
440 regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
441 }
442 else
443 {
444 regMask = genRegMask(varDsc->lvRegNum);
445 }
446 return regMask;
447}
448
449// Return the register mask for the given lclVar or regVar tree node
450// inline
451regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree)
452{
453 assert(tree->gtOper == GT_LCL_VAR);
454
455 regMaskTP regMask = RBM_NONE;
456 const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
457 if (varDsc->lvPromoted)
458 {
459 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
460 {
461 noway_assert(compiler->lvaTable[i].lvIsStructField);
462 if (compiler->lvaTable[i].lvIsInReg())
463 {
464 regMask |= genGetRegMask(&compiler->lvaTable[i]);
465 }
466 }
467 }
468 else if (varDsc->lvIsInReg())
469 {
470 regMask = genGetRegMask(varDsc);
471 }
472 return regMask;
473}
474
475// The given lclVar is either going live (being born) or dying.
476// It might be both going live and dying (that is, it is a dead store) under MinOpts.
477// Update regSet.rsMaskVars accordingly.
478// inline
479void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree))
480{
481 regMaskTP regMask = genGetRegMask(varDsc);
482
483#ifdef DEBUG
484 if (compiler->verbose)
485 {
486 printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
487 varDsc->PrintVarReg();
488 printf(" is becoming %s ", (isDying) ? "dead" : "live");
489 Compiler::printTreeID(tree);
490 printf("\n");
491 }
492#endif // DEBUG
493
494 if (isDying)
495 {
496 // We'd like to be able to assert the following, however if we are walking
497 // through a qmark/colon tree, we may encounter multiple last-use nodes.
498 // assert((regSet.rsMaskVars & regMask) == regMask);
499 regSet.RemoveMaskVars(regMask);
500 }
501 else
502 {
503 assert((regSet.rsMaskVars & regMask) == 0);
504 regSet.AddMaskVars(regMask);
505 }
506}
507
508//----------------------------------------------------------------------
509// compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call.
510// Not all JIT Helper calls follow the standard ABI on the target architecture.
511//
512// TODO-CQ: Currently this list is incomplete (not all helpers calls are
513// enumerated) and not 100% accurate (some killsets are bigger than
514// what they really are).
515// There's some work to be done in several places in the JIT to
516// accurately track the registers that are getting killed by
517// helper calls:
518// a) LSRA needs several changes to accomodate more precise killsets
519// for every helper call it sees (both explicitly [easy] and
520// implicitly [hard])
521// b) Currently for AMD64, when we generate code for a helper call
522// we're independently over-pessimizing the killsets of the call
523// (independently from LSRA) and this needs changes
524// both in CodeGenAmd64.cpp and emitx86.cpp.
525//
526// The best solution for this problem would be to try to centralize
527// the killset information in a single place but then make the
528// corresponding changes so every code generation phase is in sync
529// about this.
530//
531// The interim solution is to only add known helper calls that don't
532// follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
533//
534// Arguments:
535// helper - The helper being inquired about
536//
537// Return Value:
538// Mask of register kills -- registers whose values are no longer guaranteed to be the same.
539//
540regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
541{
542 switch (helper)
543 {
544 case CORINFO_HELP_ASSIGN_BYREF:
545#if defined(_TARGET_AMD64_)
546 return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
547#elif defined(_TARGET_ARMARCH_)
548 return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
549#elif defined(_TARGET_X86_)
550 return RBM_ESI | RBM_EDI | RBM_ECX;
551#else
552 NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
553 return RBM_CALLEE_TRASH;
554#endif
555
556#if defined(_TARGET_ARMARCH_)
557 case CORINFO_HELP_ASSIGN_REF:
558 case CORINFO_HELP_CHECKED_ASSIGN_REF:
559 return RBM_CALLEE_TRASH_WRITEBARRIER;
560#endif
561
562 case CORINFO_HELP_PROF_FCN_ENTER:
563#ifdef RBM_PROFILER_ENTER_TRASH
564 return RBM_PROFILER_ENTER_TRASH;
565#else
566 NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
567#endif
568
569 case CORINFO_HELP_PROF_FCN_LEAVE:
570#ifdef RBM_PROFILER_LEAVE_TRASH
571 return RBM_PROFILER_LEAVE_TRASH;
572#else
573 NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
574#endif
575
576 case CORINFO_HELP_PROF_FCN_TAILCALL:
577#ifdef RBM_PROFILER_TAILCALL_TRASH
578 return RBM_PROFILER_TAILCALL_TRASH;
579#else
580 NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
581#endif
582
583#ifdef _TARGET_X86_
584 case CORINFO_HELP_ASSIGN_REF_EAX:
585 case CORINFO_HELP_ASSIGN_REF_ECX:
586 case CORINFO_HELP_ASSIGN_REF_EBX:
587 case CORINFO_HELP_ASSIGN_REF_EBP:
588 case CORINFO_HELP_ASSIGN_REF_ESI:
589 case CORINFO_HELP_ASSIGN_REF_EDI:
590
591 case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
592 case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
593 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
594 case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
595 case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
596 case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
597 return RBM_EDX;
598
599#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
600 case CORINFO_HELP_ASSIGN_REF:
601 case CORINFO_HELP_CHECKED_ASSIGN_REF:
602 return RBM_EAX | RBM_EDX;
603#endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS
604#endif
605
606 case CORINFO_HELP_STOP_FOR_GC:
607 return RBM_STOP_FOR_GC_TRASH;
608
609 case CORINFO_HELP_INIT_PINVOKE_FRAME:
610 return RBM_INIT_PINVOKE_FRAME_TRASH;
611
612 default:
613 return RBM_CALLEE_TRASH;
614 }
615}
616
617//----------------------------------------------------------------------
618// compNoGCHelperCallKillSet: Gets a register mask that represents the set of registers that no longer
619// contain GC or byref pointers, for "NO GC" helper calls. This is used by the emitter when determining
620// what registers to remove from the current live GC/byref sets (and thus what to report as dead in the
621// GC info). Note that for the CORINFO_HELP_ASSIGN_BYREF helper, in particular, the kill set reported by
622// compHelperCallKillSet() doesn't match this kill set. compHelperCallKillSet() reports the dst/src
623// address registers as killed for liveness purposes, since their values change. However, they still are
624// valid byref pointers after the call, so the dst/src address registers are NOT reported as killed here.
625//
626// Note: This list may not be complete and defaults to the default RBM_CALLEE_TRASH_NOGC registers.
627//
628// Arguments:
629// helper - The helper being inquired about
630//
631// Return Value:
632// Mask of GC register kills
633//
634regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
635{
636 assert(emitter::emitNoGChelper(helper));
637
638 switch (helper)
639 {
640 case CORINFO_HELP_ASSIGN_BYREF:
641#if defined(_TARGET_X86_)
642 // This helper only trashes ECX.
643 return RBM_ECX;
644#elif defined(_TARGET_AMD64_)
645 // This uses and defs RDI and RSI.
646 return RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI);
647#elif defined(_TARGET_ARMARCH_)
648 return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
649#else
650 assert(!"unknown arch");
651#endif
652
653#if defined(_TARGET_XARCH_)
654 case CORINFO_HELP_PROF_FCN_ENTER:
655 return RBM_PROFILER_ENTER_TRASH;
656
657 case CORINFO_HELP_PROF_FCN_LEAVE:
658 return RBM_PROFILER_LEAVE_TRASH;
659
660 case CORINFO_HELP_PROF_FCN_TAILCALL:
661 return RBM_PROFILER_TAILCALL_TRASH;
662#endif // defined(_TARGET_XARCH_)
663
664#if defined(_TARGET_ARMARCH_)
665 case CORINFO_HELP_ASSIGN_REF:
666 case CORINFO_HELP_CHECKED_ASSIGN_REF:
667 return RBM_CALLEE_GCTRASH_WRITEBARRIER;
668 case CORINFO_HELP_PROF_FCN_LEAVE:
669 // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH on ARMARCH.
670 return RBM_CALLEE_TRASH_NOGC & ~RBM_PROFILER_RET_SCRATCH;
671#endif
672
673#if defined(_TARGET_X86_)
674 case CORINFO_HELP_INIT_PINVOKE_FRAME:
675 return RBM_INIT_PINVOKE_FRAME_TRASH;
676#endif // defined(_TARGET_X86_)
677
678 default:
679 return RBM_CALLEE_TRASH_NOGC;
680 }
681}
682
683template <bool ForCodeGen>
684void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
685{
686 LclVarDsc* varDsc;
687
688#ifdef DEBUG
689 if (verbose)
690 {
691 printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
692 dumpConvertedVarSet(this, compCurLife);
693 printf(" -> %s ", VarSetOps::ToString(this, newLife));
694 dumpConvertedVarSet(this, newLife);
695 printf("\n");
696 }
697#endif // DEBUG
698
699 /* We should only be called when the live set has actually changed */
700
701 noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
702
703 if (!ForCodeGen)
704 {
705 VarSetOps::Assign(this, compCurLife, newLife);
706 return;
707 }
708
709 /* Figure out which variables are becoming live/dead at this point */
710
711 // deadSet = compCurLife - newLife
712 VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife));
713
714 // bornSet = newLife - compCurLife
715 VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife));
716
717 /* Can't simultaneously become live and dead at the same time */
718
719 // (deadSet UNION bornSet) != EMPTY
720 noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
721 // (deadSet INTERSECTION bornSet) == EMPTY
722 noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
723
724 VarSetOps::Assign(this, compCurLife, newLife);
725
726 // Handle the dying vars first, then the newly live vars.
727 // This is because, in the RyuJIT backend case, they may occupy registers that
728 // will be occupied by another var that is newly live.
729 VarSetOps::Iter deadIter(this, deadSet);
730 unsigned deadVarIndex = 0;
731 while (deadIter.NextElem(&deadVarIndex))
732 {
733 unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
734 varDsc = lvaTable + varNum;
735 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
736 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
737
738 if (varDsc->lvIsInReg())
739 {
740 // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
741 // gc sets
742 regMaskTP regMask = varDsc->lvRegMask();
743 if (isGCRef)
744 {
745 codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
746 }
747 else if (isByRef)
748 {
749 codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
750 }
751 codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr));
752 }
753 // This isn't in a register, so update the gcVarPtrSetCur.
754 else if (isGCRef || isByRef)
755 {
756 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
757 JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
758 }
759 }
760
761 VarSetOps::Iter bornIter(this, bornSet);
762 unsigned bornVarIndex = 0;
763 while (bornIter.NextElem(&bornVarIndex))
764 {
765 unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
766 varDsc = lvaTable + varNum;
767 bool isGCRef = (varDsc->TypeGet() == TYP_REF);
768 bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
769
770 if (varDsc->lvIsInReg())
771 {
772#ifdef DEBUG
773 if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
774 {
775 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
776 }
777#endif // DEBUG
778 VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
779 codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr));
780 regMaskTP regMask = varDsc->lvRegMask();
781 if (isGCRef)
782 {
783 codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
784 }
785 else if (isByRef)
786 {
787 codeGen->gcInfo.gcRegByrefSetCur |= regMask;
788 }
789 }
790 // This isn't in a register, so update the gcVarPtrSetCur
791 else if (lvaIsGCTracked(varDsc))
792 {
793 VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
794 JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
795 }
796 }
797
798 codeGen->siUpdate();
799}
800
801// Need an explicit instantiation.
802template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);
803
804/*****************************************************************************
805 *
806 * Generate a spill.
807 */
808void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
809{
810 getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
811}
812
813/*****************************************************************************
814 *
815 * Generate a reload.
816 */
817void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
818{
819 getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
820}
821
822// inline
823regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
824{
825 return REG_ARG_0;
826}
827
828//----------------------------------------------------------------------
829// getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
830//
831// Arguments:
832// tree - spilled GenTree node
833//
834// Return Value:
835// TempDsc corresponding to tree
836TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
837{
838 // tree must be in spilled state.
839 assert((tree->gtFlags & GTF_SPILLED) != 0);
840
841 // Get the tree's SpillDsc.
842 RegSet::SpillDsc* prevDsc;
843 RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
844 assert(spillDsc != nullptr);
845
846 // Get the temp desc.
847 TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
848 return temp;
849}
850
851#ifdef _TARGET_XARCH_
852
853#ifdef _TARGET_AMD64_
854// Returns relocation type hint for an addr.
855// Note that there are no reloc hints on x86.
856//
857// Arguments
858// addr - data address
859//
860// Returns
861// relocation type hint
862//
863unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
864{
865 return compiler->eeGetRelocTypeHint((void*)addr);
866}
867#endif //_TARGET_AMD64_
868
869// Return true if an absolute indirect data address can be encoded as IP-relative.
870// offset. Note that this method should be used only when the caller knows that
871// the address is an icon value that VM has given and there is no GenTree node
872// representing it. Otherwise, one should always use FitsInAddrBase().
873//
874// Arguments
875// addr - an absolute indirect data address
876//
877// Returns
878// true if indir data addr could be encoded as IP-relative offset.
879//
880bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
881{
882#ifdef _TARGET_AMD64_
883 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
884#else
885 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
886 return false;
887#endif
888}
889
890// Return true if an indirect code address can be encoded as IP-relative offset.
891// Note that this method should be used only when the caller knows that the
892// address is an icon value that VM has given and there is no GenTree node
893// representing it. Otherwise, one should always use FitsInAddrBase().
894//
895// Arguments
896// addr - an absolute indirect code address
897//
898// Returns
899// true if indir code addr could be encoded as IP-relative offset.
900//
901bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
902{
903#ifdef _TARGET_AMD64_
904 return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
905#else
906 // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
907 return true;
908#endif
909}
910
911// Return true if an indirect code address can be encoded as 32-bit displacement
912// relative to zero. Note that this method should be used only when the caller
913// knows that the address is an icon value that VM has given and there is no
914// GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
915//
916// Arguments
917// addr - absolute indirect code address
918//
919// Returns
920// true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
921//
922bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
923{
924 return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
925}
926
927// Return true if an absolute indirect code address needs a relocation recorded with VM.
928//
929// Arguments
930// addr - an absolute indirect code address
931//
932// Returns
933// true if indir code addr needs a relocation recorded with VM
934//
935bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
936{
937 // If generating relocatable ngen code, then all code addr should go through relocation
938 if (compiler->opts.compReloc)
939 {
940 return true;
941 }
942
943#ifdef _TARGET_AMD64_
944 // See if the code indir addr can be encoded as 32-bit displacement relative to zero.
945 // We don't need a relocation in that case.
946 if (genCodeIndirAddrCanBeEncodedAsZeroRelOffset(addr))
947 {
948 return false;
949 }
950
951 // Else we need a relocation.
952 return true;
953#else //_TARGET_X86_
954 // On x86 there is no need to record or ask for relocations during jitting,
955 // because all addrs fit within 32-bits.
956 return false;
957#endif //_TARGET_X86_
958}
959
960// Return true if a direct code address needs to be marked as relocatable.
961//
962// Arguments
963// addr - absolute direct code address
964//
965// Returns
966// true if direct code addr needs a relocation recorded with VM
967//
968bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
969{
970 // If generating relocatable ngen code, then all code addr should go through relocation
971 if (compiler->opts.compReloc)
972 {
973 return true;
974 }
975
976#ifdef _TARGET_AMD64_
977 // By default all direct code addresses go through relocation so that VM will setup
978 // a jump stub if addr cannot be encoded as pc-relative offset.
979 return true;
980#else //_TARGET_X86_
981 // On x86 there is no need for recording relocations during jitting,
982 // because all addrs fit within 32-bits.
983 return false;
984#endif //_TARGET_X86_
985}
986#endif //_TARGET_XARCH_
987
988/*****************************************************************************
989 *
990 * The following can be used to create basic blocks that serve as labels for
991 * the emitter. Use with caution - these are not real basic blocks!
992 *
993 */
994
995// inline
996BasicBlock* CodeGen::genCreateTempLabel()
997{
998#ifdef DEBUG
999 // These blocks don't affect FP
1000 compiler->fgSafeBasicBlockCreation = true;
1001#endif
1002
1003 BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
1004
1005#ifdef DEBUG
1006 compiler->fgSafeBasicBlockCreation = false;
1007#endif
1008
1009 block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
1010
1011 // Use coldness of current block, as this label will
1012 // be contained in it.
1013 block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
1014
1015#ifdef DEBUG
1016#ifdef UNIX_X86_ABI
1017 block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int);
1018#else
1019 block->bbTgtStkDepth = genStackLevel / sizeof(int);
1020#endif
1021#endif
1022 return block;
1023}
1024
1025// inline
1026void CodeGen::genDefineTempLabel(BasicBlock* label)
1027{
1028#ifdef DEBUG
1029 if (compiler->opts.dspCode)
1030 {
1031 printf("\n L_M%03u_" FMT_BB ":\n", Compiler::s_compMethodsCount, label->bbNum);
1032 }
1033#endif
1034
1035 label->bbEmitCookie =
1036 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
1037}
1038
1039/*****************************************************************************
1040 *
1041 * Adjust the stack pointer by the given value; assumes that this follows
1042 * a call so only callee-saved registers (and registers that may hold a
1043 * return value) are used at this point.
1044 */
1045
1046void CodeGen::genAdjustSP(target_ssize_t delta)
1047{
1048#if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
1049 if (delta == sizeof(int))
1050 inst_RV(INS_pop, REG_ECX, TYP_INT);
1051 else
1052#endif
1053 inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
1054}
1055
1056//------------------------------------------------------------------------
1057// genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
1058//
1059// Arguments:
1060// block - The BasicBlock for which we are about to generate code.
1061//
1062// Assumptions:
1063// Must be called just prior to generating code for 'block'.
1064//
1065// Notes:
1066// This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
1067// and if 'block' is a throw helper block with a non-zero stack level.
1068
1069void CodeGen::genAdjustStackLevel(BasicBlock* block)
1070{
1071#if !FEATURE_FIXED_OUT_ARGS
1072 // Check for inserted throw blocks and adjust genStackLevel.
1073 CLANG_FORMAT_COMMENT_ANCHOR;
1074
1075#if defined(UNIX_X86_ABI)
1076 if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1077 {
1078 // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned
1079 // at this point if a jump to this block is made in the middle of pushing arugments.
1080 //
1081 // Here we restore SP to prevent potential stack alignment issues.
1082 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta());
1083 }
1084#endif
1085
1086 if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
1087 {
1088 noway_assert(block->bbFlags & BBF_JMP_TARGET);
1089
1090 SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
1091
1092 if (genStackLevel != 0)
1093 {
1094#ifdef _TARGET_X86_
1095 getEmitter()->emitMarkStackLvl(genStackLevel);
1096 inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
1097 SetStackLevel(0);
1098#else // _TARGET_X86_
1099 NYI("Need emitMarkStackLvl()");
1100#endif // _TARGET_X86_
1101 }
1102 }
1103#endif // !FEATURE_FIXED_OUT_ARGS
1104}
1105
1106#ifdef _TARGET_ARMARCH_
1107// return size
1108// alignmentWB is out param
1109unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1110{
1111 unsigned alignment = 0;
1112 unsigned opSize = 0;
1113
1114 if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
1115 {
1116 opSize = InferStructOpSizeAlign(op, &alignment);
1117 }
1118 else
1119 {
1120 alignment = genTypeAlignments[op->TypeGet()];
1121 opSize = genTypeSizes[op->TypeGet()];
1122 }
1123
1124 assert(opSize != 0);
1125 assert(alignment != 0);
1126
1127 (*alignmentWB) = alignment;
1128 return opSize;
1129}
1130// return size
1131// alignmentWB is out param
1132unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB)
1133{
1134 unsigned alignment = 0;
1135 unsigned opSize = 0;
1136
1137 while (op->gtOper == GT_COMMA)
1138 {
1139 op = op->gtOp.gtOp2;
1140 }
1141
1142 if (op->gtOper == GT_OBJ)
1143 {
1144 CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
1145 opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
1146 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1147 }
1148 else if (op->gtOper == GT_LCL_VAR)
1149 {
1150 unsigned varNum = op->gtLclVarCommon.gtLclNum;
1151 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1152 assert(varDsc->lvType == TYP_STRUCT);
1153 opSize = varDsc->lvSize();
1154#ifndef _TARGET_64BIT_
1155 if (varDsc->lvStructDoubleAlign)
1156 {
1157 alignment = TARGET_POINTER_SIZE * 2;
1158 }
1159 else
1160#endif // !_TARGET_64BIT_
1161 {
1162 alignment = TARGET_POINTER_SIZE;
1163 }
1164 }
1165 else if (op->OperIsCopyBlkOp())
1166 {
1167 GenTree* op2 = op->gtOp.gtOp2;
1168
1169 if (op2->OperGet() == GT_CNS_INT)
1170 {
1171 if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
1172 {
1173 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
1174 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1175 alignment =
1176 roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1177 }
1178 else
1179 {
1180 opSize = (unsigned)op2->gtIntCon.gtIconVal;
1181 GenTree* op1 = op->gtOp.gtOp1;
1182 assert(op1->OperGet() == GT_LIST);
1183 GenTree* dstAddr = op1->gtOp.gtOp1;
1184 if (dstAddr->OperGet() == GT_ADDR)
1185 {
1186 InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
1187 }
1188 else
1189 {
1190 assert(!"Unhandle dstAddr node");
1191 alignment = TARGET_POINTER_SIZE;
1192 }
1193 }
1194 }
1195 else
1196 {
1197 noway_assert(!"Variable sized COPYBLK register arg!");
1198 opSize = 0;
1199 alignment = TARGET_POINTER_SIZE;
1200 }
1201 }
1202 else if (op->gtOper == GT_MKREFANY)
1203 {
1204 opSize = TARGET_POINTER_SIZE * 2;
1205 alignment = TARGET_POINTER_SIZE;
1206 }
1207 else if (op->IsArgPlaceHolderNode())
1208 {
1209 CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
1210 assert(clsHnd != 0);
1211 opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
1212 alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
1213 }
1214 else
1215 {
1216 assert(!"Unhandled gtOper");
1217 opSize = TARGET_POINTER_SIZE;
1218 alignment = TARGET_POINTER_SIZE;
1219 }
1220
1221 assert(opSize != 0);
1222 assert(alignment != 0);
1223
1224 (*alignmentWB) = alignment;
1225 return opSize;
1226}
1227
1228#endif // _TARGET_ARMARCH_
1229
1230/*****************************************************************************
1231 *
1232 * Take an address expression and try to find the best set of components to
1233 * form an address mode; returns non-zero if this is successful.
1234 *
1235 * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
1236 * Refactor this code so that the underlying analysis can be used in
1237 * the RyuJIT Backend to do lowering, instead of having to call this method with the
1238 * option to not generate the code.
1239 *
1240 * 'fold' specifies if it is OK to fold the array index which hangs off
1241 * a GT_NOP node.
1242 *
1243 * If successful, the parameters will be set to the following values:
1244 *
1245 * *rv1Ptr ... base operand
1246 * *rv2Ptr ... optional operand
1247 * *revPtr ... true if rv2 is before rv1 in the evaluation order
1248 * #if SCALED_ADDR_MODES
1249 * *mulPtr ... optional multiplier (2/4/8) for rv2
1250 * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
1251 * #endif
1252 * *cnsPtr ... integer constant [optional]
1253 *
1254 * IMPORTANT NOTE: This routine doesn't generate any code, it merely
1255 * identifies the components that might be used to
1256 * form an address mode later on.
1257 */
1258
1259bool CodeGen::genCreateAddrMode(GenTree* addr,
1260 bool fold,
1261 bool* revPtr,
1262 GenTree** rv1Ptr,
1263 GenTree** rv2Ptr,
1264#if SCALED_ADDR_MODES
1265 unsigned* mulPtr,
1266#endif // SCALED_ADDR_MODES
1267 ssize_t* cnsPtr)
1268{
1269 /*
1270 The following indirections are valid address modes on x86/x64:
1271
1272 [ icon] * not handled here
1273 [reg ]
1274 [reg + icon]
1275 [reg1 + reg2 ]
1276 [reg1 + reg2 + icon]
1277 [reg1 + 2 * reg2 ]
1278 [reg1 + 4 * reg2 ]
1279 [reg1 + 8 * reg2 ]
1280 [ 2 * reg2 + icon]
1281 [ 4 * reg2 + icon]
1282 [ 8 * reg2 + icon]
1283 [reg1 + 2 * reg2 + icon]
1284 [reg1 + 4 * reg2 + icon]
1285 [reg1 + 8 * reg2 + icon]
1286
1287 The following indirections are valid address modes on arm64:
1288
1289 [reg]
1290 [reg + icon]
1291 [reg1 + reg2]
1292 [reg1 + reg2 * natural-scale]
1293
1294 */
1295
1296 /* All indirect address modes require the address to be an addition */
1297
1298 if (addr->gtOper != GT_ADD)
1299 {
1300 return false;
1301 }
1302
1303 // Can't use indirect addressing mode as we need to check for overflow.
1304 // Also, can't use 'lea' as it doesn't set the flags.
1305
1306 if (addr->gtOverflow())
1307 {
1308 return false;
1309 }
1310
1311 GenTree* rv1 = nullptr;
1312 GenTree* rv2 = nullptr;
1313
1314 GenTree* op1;
1315 GenTree* op2;
1316
1317 ssize_t cns;
1318#if SCALED_ADDR_MODES
1319 unsigned mul;
1320#endif // SCALED_ADDR_MODES
1321
1322 GenTree* tmp;
1323
1324 /* What order are the sub-operands to be evaluated */
1325
1326 if (addr->gtFlags & GTF_REVERSE_OPS)
1327 {
1328 op1 = addr->gtOp.gtOp2;
1329 op2 = addr->gtOp.gtOp1;
1330 }
1331 else
1332 {
1333 op1 = addr->gtOp.gtOp1;
1334 op2 = addr->gtOp.gtOp2;
1335 }
1336
1337 bool rev = false; // Is op2 first in the evaluation order?
1338
1339 /*
1340 A complex address mode can combine the following operands:
1341
1342 op1 ... base address
1343 op2 ... optional scaled index
1344#if SCALED_ADDR_MODES
1345 mul ... optional multiplier (2/4/8) for op2
1346#endif
1347 cns ... optional displacement
1348
1349 Here we try to find such a set of operands and arrange for these
1350 to sit in registers.
1351 */
1352
1353 cns = 0;
1354#if SCALED_ADDR_MODES
1355 mul = 0;
1356#endif // SCALED_ADDR_MODES
1357
1358AGAIN:
1359 /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
1360 constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
1361 here if we find a scaled index.
1362 */
1363 CLANG_FORMAT_COMMENT_ANCHOR;
1364
1365#if SCALED_ADDR_MODES
1366 assert(mul == 0);
1367#endif // SCALED_ADDR_MODES
1368
1369 /* Special case: keep constants as 'op2' */
1370
1371 if (op1->IsCnsIntOrI())
1372 {
1373 // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
1374 tmp = op1;
1375 op1 = op2;
1376 op2 = tmp;
1377 }
1378
1379 /* Check for an addition of a constant */
1380
1381 if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
1382 {
1383 /* We're adding a constant */
1384
1385 cns += op2->gtIntConCommon.IconValue();
1386
1387#if defined(_TARGET_ARMARCH_)
1388 if (cns == 0)
1389#endif
1390 {
1391 /* Inspect the operand the constant is being added to */
1392
1393 switch (op1->gtOper)
1394 {
1395 case GT_ADD:
1396
1397 if (op1->gtOverflow())
1398 {
1399 break;
1400 }
1401
1402 op2 = op1->gtOp.gtOp2;
1403 op1 = op1->gtOp.gtOp1;
1404
1405 goto AGAIN;
1406
1407#if SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1408 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1409 case GT_MUL:
1410 if (op1->gtOverflow())
1411 {
1412 return false; // Need overflow check
1413 }
1414
1415 __fallthrough;
1416
1417 case GT_LSH:
1418
1419 mul = op1->GetScaledIndex();
1420 if (mul)
1421 {
1422 /* We can use "[mul*rv2 + icon]" */
1423
1424 rv1 = nullptr;
1425 rv2 = op1->gtOp.gtOp1;
1426
1427 goto FOUND_AM;
1428 }
1429 break;
1430#endif // SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_)
1431
1432 default:
1433 break;
1434 }
1435 }
1436
1437 /* The best we can do is "[rv1 + icon]" */
1438
1439 rv1 = op1;
1440 rv2 = nullptr;
1441
1442 goto FOUND_AM;
1443 }
1444
1445 // op2 is not a constant. So keep on trying.
1446
1447 /* Neither op1 nor op2 are sitting in a register right now */
1448
1449 switch (op1->gtOper)
1450 {
1451#if !defined(_TARGET_ARMARCH_)
1452 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1453 case GT_ADD:
1454
1455 if (op1->gtOverflow())
1456 {
1457 break;
1458 }
1459
1460 if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
1461 {
1462 cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
1463 op1 = op1->gtOp.gtOp1;
1464
1465 goto AGAIN;
1466 }
1467
1468 break;
1469
1470#if SCALED_ADDR_MODES
1471
1472 case GT_MUL:
1473
1474 if (op1->gtOverflow())
1475 {
1476 break;
1477 }
1478
1479 __fallthrough;
1480
1481 case GT_LSH:
1482
1483 mul = op1->GetScaledIndex();
1484 if (mul)
1485 {
1486 /* 'op1' is a scaled value */
1487
1488 rv1 = op2;
1489 rv2 = op1->gtOp.gtOp1;
1490
1491 int argScale;
1492 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1493 {
1494 if (jitIsScaleIndexMul(argScale * mul))
1495 {
1496 mul = mul * argScale;
1497 rv2 = rv2->gtOp.gtOp1;
1498 }
1499 else
1500 {
1501 break;
1502 }
1503 }
1504
1505 noway_assert(rev == false);
1506 rev = true;
1507
1508 goto FOUND_AM;
1509 }
1510 break;
1511
1512#endif // SCALED_ADDR_MODES
1513#endif // !_TARGET_ARMARCH
1514
1515 case GT_NOP:
1516
1517 op1 = op1->gtOp.gtOp1;
1518 goto AGAIN;
1519
1520 case GT_COMMA:
1521
1522 op1 = op1->gtOp.gtOp2;
1523 goto AGAIN;
1524
1525 default:
1526 break;
1527 }
1528
1529 noway_assert(op2);
1530 switch (op2->gtOper)
1531 {
1532#if !defined(_TARGET_ARMARCH_)
1533 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
1534 case GT_ADD:
1535
1536 if (op2->gtOverflow())
1537 {
1538 break;
1539 }
1540
1541 if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
1542 {
1543 cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
1544 op2 = op2->gtOp.gtOp1;
1545
1546 goto AGAIN;
1547 }
1548
1549 break;
1550
1551#if SCALED_ADDR_MODES
1552
1553 case GT_MUL:
1554
1555 if (op2->gtOverflow())
1556 {
1557 break;
1558 }
1559
1560 __fallthrough;
1561
1562 case GT_LSH:
1563
1564 mul = op2->GetScaledIndex();
1565 if (mul)
1566 {
1567 // 'op2' is a scaled value...is it's argument also scaled?
1568 int argScale;
1569 rv2 = op2->gtOp.gtOp1;
1570 while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
1571 {
1572 if (jitIsScaleIndexMul(argScale * mul))
1573 {
1574 mul = mul * argScale;
1575 rv2 = rv2->gtOp.gtOp1;
1576 }
1577 else
1578 {
1579 break;
1580 }
1581 }
1582
1583 rv1 = op1;
1584
1585 goto FOUND_AM;
1586 }
1587 break;
1588
1589#endif // SCALED_ADDR_MODES
1590#endif // !_TARGET_ARMARCH
1591
1592 case GT_NOP:
1593
1594 op2 = op2->gtOp.gtOp1;
1595 goto AGAIN;
1596
1597 case GT_COMMA:
1598
1599 op2 = op2->gtOp.gtOp2;
1600 goto AGAIN;
1601
1602 default:
1603 break;
1604 }
1605
1606 /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
1607
1608 rv1 = op1;
1609 rv2 = op2;
1610#ifdef _TARGET_ARM64_
1611 assert(cns == 0);
1612#endif
1613
1614FOUND_AM:
1615
1616 if (rv2)
1617 {
1618 /* Make sure a GC address doesn't end up in 'rv2' */
1619
1620 if (varTypeIsGC(rv2->TypeGet()))
1621 {
1622 noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
1623
1624 tmp = rv1;
1625 rv1 = rv2;
1626 rv2 = tmp;
1627
1628 rev = !rev;
1629 }
1630
1631 /* Special case: constant array index (that is range-checked) */
1632
1633 if (fold)
1634 {
1635 ssize_t tmpMul;
1636 GenTree* index;
1637
1638 if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
1639 {
1640 /* For valuetype arrays where we can't use the scaled address
1641 mode, rv2 will point to the scaled index. So we have to do
1642 more work */
1643
1644 tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
1645 if (mul)
1646 {
1647 tmpMul *= mul;
1648 }
1649 }
1650 else
1651 {
1652 /* May be a simple array. rv2 will points to the actual index */
1653
1654 index = rv2;
1655 tmpMul = mul;
1656 }
1657
1658 /* Get hold of the array index and see if it's a constant */
1659 if (index->IsIntCnsFitsInI32())
1660 {
1661 /* Get hold of the index value */
1662 ssize_t ixv = index->AsIntConCommon()->IconValue();
1663
1664#if SCALED_ADDR_MODES
1665 /* Scale the index if necessary */
1666 if (tmpMul)
1667 {
1668 ixv *= tmpMul;
1669 }
1670#endif
1671
1672 if (FitsIn<INT32>(cns + ixv))
1673 {
1674 /* Add the scaled index to the offset value */
1675
1676 cns += ixv;
1677
1678#if SCALED_ADDR_MODES
1679 /* There is no scaled operand any more */
1680 mul = 0;
1681#endif
1682 rv2 = nullptr;
1683 }
1684 }
1685 }
1686 }
1687
1688 // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
1689 noway_assert(rv1 || mul != 1);
1690
1691 noway_assert(FitsIn<INT32>(cns));
1692
1693 if (rv1 == nullptr && rv2 == nullptr)
1694 {
1695 return false;
1696 }
1697
1698 /* Success - return the various components to the caller */
1699
1700 *revPtr = rev;
1701 *rv1Ptr = rv1;
1702 *rv2Ptr = rv2;
1703#if SCALED_ADDR_MODES
1704 *mulPtr = mul;
1705#endif
1706 *cnsPtr = cns;
1707
1708 return true;
1709}
1710
1711/*****************************************************************************
1712* The condition to use for (the jmp/set for) the given type of operation
1713*
1714* In case of amd64, this routine should be used when there is no gentree available
1715* and one needs to generate jumps based on integer comparisons. When gentree is
1716* available always use its overloaded version.
1717*
1718*/
1719
1720// static
1721emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
1722{
1723 const static BYTE genJCCinsSigned[] = {
1724#if defined(_TARGET_XARCH_)
1725 EJ_je, // GT_EQ
1726 EJ_jne, // GT_NE
1727 EJ_jl, // GT_LT
1728 EJ_jle, // GT_LE
1729 EJ_jge, // GT_GE
1730 EJ_jg, // GT_GT
1731 EJ_je, // GT_TEST_EQ
1732 EJ_jne, // GT_TEST_NE
1733#elif defined(_TARGET_ARMARCH_)
1734 EJ_eq, // GT_EQ
1735 EJ_ne, // GT_NE
1736 EJ_lt, // GT_LT
1737 EJ_le, // GT_LE
1738 EJ_ge, // GT_GE
1739 EJ_gt, // GT_GT
1740#if defined(_TARGET_ARM64_)
1741 EJ_eq, // GT_TEST_EQ
1742 EJ_ne, // GT_TEST_NE
1743#endif
1744#endif
1745 };
1746
1747 const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
1748 {
1749#if defined(_TARGET_XARCH_)
1750 EJ_je, // GT_EQ
1751 EJ_jne, // GT_NE
1752 EJ_jb, // GT_LT
1753 EJ_jbe, // GT_LE
1754 EJ_jae, // GT_GE
1755 EJ_ja, // GT_GT
1756 EJ_je, // GT_TEST_EQ
1757 EJ_jne, // GT_TEST_NE
1758#elif defined(_TARGET_ARMARCH_)
1759 EJ_eq, // GT_EQ
1760 EJ_ne, // GT_NE
1761 EJ_lo, // GT_LT
1762 EJ_ls, // GT_LE
1763 EJ_hs, // GT_GE
1764 EJ_hi, // GT_GT
1765#if defined(_TARGET_ARM64_)
1766 EJ_eq, // GT_TEST_EQ
1767 EJ_ne, // GT_TEST_NE
1768#endif
1769#endif
1770 };
1771
1772 const static BYTE genJCCinsLogical[] = /* logical operation */
1773 {
1774#if defined(_TARGET_XARCH_)
1775 EJ_je, // GT_EQ (Z == 1)
1776 EJ_jne, // GT_NE (Z == 0)
1777 EJ_js, // GT_LT (S == 1)
1778 EJ_NONE, // GT_LE
1779 EJ_jns, // GT_GE (S == 0)
1780 EJ_NONE, // GT_GT
1781 EJ_NONE, // GT_TEST_EQ
1782 EJ_NONE, // GT_TEST_NE
1783#elif defined(_TARGET_ARMARCH_)
1784 EJ_eq, // GT_EQ (Z == 1)
1785 EJ_ne, // GT_NE (Z == 0)
1786 EJ_mi, // GT_LT (N == 1)
1787 EJ_NONE, // GT_LE
1788 EJ_pl, // GT_GE (N == 0)
1789 EJ_NONE, // GT_GT
1790#if defined(_TARGET_ARM64_)
1791 EJ_eq, // GT_TEST_EQ
1792 EJ_ne, // GT_TEST_NE
1793#endif
1794#endif
1795 };
1796
1797#if defined(_TARGET_XARCH_)
1798 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
1799 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
1800 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
1801 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
1802 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
1803 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
1804 assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1805 assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1806
1807 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
1808 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
1809 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
1810 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
1811 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
1812 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
1813 assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
1814 assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
1815
1816 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
1817 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
1818 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
1819 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
1820#elif defined(_TARGET_ARMARCH_)
1821 assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
1822 assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
1823 assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
1824 assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
1825 assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
1826 assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
1827
1828 assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
1829 assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
1830 assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
1831 assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
1832 assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
1833 assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
1834
1835 assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
1836 assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
1837 assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
1838 assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
1839#else
1840 assert(!"unknown arch");
1841#endif
1842 assert(GenTree::OperIsCompare(cmp));
1843
1844 emitJumpKind result = EJ_COUNT;
1845
1846 if (compareKind == CK_UNSIGNED)
1847 {
1848 result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
1849 }
1850 else if (compareKind == CK_SIGNED)
1851 {
1852 result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
1853 }
1854 else if (compareKind == CK_LOGICAL)
1855 {
1856 result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
1857 }
1858 assert(result != EJ_COUNT);
1859 return result;
1860}
1861
1862#ifdef _TARGET_ARMARCH_
1863//------------------------------------------------------------------------
1864// genEmitGSCookieCheck: Generate code to check that the GS cookie
1865// wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64.
1866//
1867void CodeGen::genEmitGSCookieCheck(bool pushReg)
1868{
1869 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
1870
1871 // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
1872 // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
1873 if (!pushReg && (compiler->info.compRetType == TYP_REF))
1874 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
1875
1876 // We need two temporary registers, to load the GS cookie values and compare them. We can't use
1877 // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
1878 // callee-trash registers, which should not contain anything interesting at this point.
1879 // We don't have any IR node representing this check, so LSRA can't communicate registers
1880 // for us to use.
1881
1882 regNumber regGSConst = REG_GSCOOKIE_TMP_0;
1883 regNumber regGSValue = REG_GSCOOKIE_TMP_1;
1884
1885 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
1886 {
1887 // load the GS cookie constant into a reg
1888 //
1889 genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
1890 }
1891 else
1892 {
1893 // Ngen case - GS cookie constant needs to be accessed through an indirection.
1894 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
1895 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
1896 }
1897 // Load this method's GS value from the stack frame
1898 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
1899 // Compare with the GC cookie constant
1900 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
1901
1902 BasicBlock* gsCheckBlk = genCreateTempLabel();
1903 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
1904 inst_JMP(jmpEqual, gsCheckBlk);
1905 // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
1906 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
1907 genDefineTempLabel(gsCheckBlk);
1908}
1909#endif // _TARGET_ARMARCH_
1910
1911/*****************************************************************************
1912 *
1913 * Generate an exit sequence for a return from a method (note: when compiling
1914 * for speed there might be multiple exit points).
1915 */
1916
1917void CodeGen::genExitCode(BasicBlock* block)
1918{
1919 /* Just wrote the first instruction of the epilog - inform debugger
1920 Note that this may result in a duplicate IPmapping entry, and
1921 that this is ok */
1922
1923 // For non-optimized debuggable code, there is only one epilog.
1924 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
1925
1926 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
1927 if (compiler->getNeedsGSSecurityCookie())
1928 {
1929 genEmitGSCookieCheck(jmpEpilog);
1930
1931 if (jmpEpilog)
1932 {
1933 // Dev10 642944 -
1934 // The GS cookie check created a temp label that has no live
1935 // incoming GC registers, we need to fix that
1936
1937 unsigned varNum;
1938 LclVarDsc* varDsc;
1939
1940 /* Figure out which register parameters hold pointers */
1941
1942 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
1943 varNum++, varDsc++)
1944 {
1945 noway_assert(varDsc->lvIsParam);
1946
1947 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
1948 }
1949
1950 getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
1951 getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
1952 }
1953 }
1954
1955 genReserveEpilog(block);
1956}
1957
1958//------------------------------------------------------------------------
1959// genJumpToThrowHlpBlk: Generate code for an out-of-line exception.
1960//
1961// Notes:
1962// For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef().
1963// Otherwise, we generate the 'throw' inline.
1964//
1965// Arguments:
1966// jumpKind - jump kind to generate;
1967// codeKind - the special throw-helper kind;
1968// failBlk - optional fail target block, if it is already known;
1969//
1970void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTree* failBlk)
1971{
1972 bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
1973#if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS
1974 // Inline exception-throwing code in funclet to make it possible to unwind funclet frames.
1975 useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT);
1976#endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS
1977
1978 if (useThrowHlpBlk)
1979 {
1980 // For code with throw helper blocks, find and use the helper block for
1981 // raising the exception. The block may be shared by other trees too.
1982
1983 BasicBlock* excpRaisingBlock;
1984
1985 if (failBlk != nullptr)
1986 {
1987 // We already know which block to jump to. Use that.
1988 assert(failBlk->gtOper == GT_LABEL);
1989 excpRaisingBlock = failBlk->gtLabel.gtLabBB;
1990
1991#ifdef DEBUG
1992 Compiler::AddCodeDsc* add =
1993 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
1994 assert(excpRaisingBlock == add->acdDstBlk);
1995#if !FEATURE_FIXED_OUT_ARGS
1996 assert(add->acdStkLvlInit || isFramePointerUsed());
1997#endif // !FEATURE_FIXED_OUT_ARGS
1998#endif // DEBUG
1999 }
2000 else
2001 {
2002 // Find the helper-block which raises the exception.
2003 Compiler::AddCodeDsc* add =
2004 compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
2005 PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
2006 excpRaisingBlock = add->acdDstBlk;
2007#if !FEATURE_FIXED_OUT_ARGS
2008 assert(add->acdStkLvlInit || isFramePointerUsed());
2009#endif // !FEATURE_FIXED_OUT_ARGS
2010 }
2011
2012 noway_assert(excpRaisingBlock != nullptr);
2013
2014 // Jump to the exception-throwing block on error.
2015 inst_JMP(jumpKind, excpRaisingBlock);
2016 }
2017 else
2018 {
2019 // The code to throw the exception will be generated inline, and
2020 // we will jump around it in the normal non-exception case.
2021
2022 BasicBlock* tgtBlk = nullptr;
2023 emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
2024 if (reverseJumpKind != jumpKind)
2025 {
2026 tgtBlk = genCreateTempLabel();
2027 inst_JMP(reverseJumpKind, tgtBlk);
2028 }
2029
2030 genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
2031
2032 // Define the spot for the normal non-exception case to jump to.
2033 if (tgtBlk != nullptr)
2034 {
2035 assert(reverseJumpKind != jumpKind);
2036 genDefineTempLabel(tgtBlk);
2037 }
2038 }
2039}
2040
2041/*****************************************************************************
2042 *
2043 * The last operation done was generating code for "tree" and that would
2044 * have set the flags. Check if the operation caused an overflow.
2045 */
2046
2047// inline
2048void CodeGen::genCheckOverflow(GenTree* tree)
2049{
2050 // Overflow-check should be asked for this tree
2051 noway_assert(tree->gtOverflow());
2052
2053 const var_types type = tree->TypeGet();
2054
2055 // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
2056 noway_assert(!varTypeIsSmall(type));
2057
2058 emitJumpKind jumpKind;
2059
2060#ifdef _TARGET_ARM64_
2061 if (tree->OperGet() == GT_MUL)
2062 {
2063 jumpKind = EJ_ne;
2064 }
2065 else
2066#endif
2067 {
2068 bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
2069
2070#if defined(_TARGET_XARCH_)
2071
2072 jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
2073
2074#elif defined(_TARGET_ARMARCH_)
2075
2076 jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
2077
2078 if (jumpKind == EJ_lo)
2079 {
2080 if (tree->OperGet() != GT_SUB)
2081 {
2082 jumpKind = EJ_hs;
2083 }
2084 }
2085
2086#endif // defined(_TARGET_ARMARCH_)
2087 }
2088
2089 // Jump to the block which will throw the expection
2090
2091 genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
2092}
2093
2094#if FEATURE_EH_FUNCLETS
2095
2096/*****************************************************************************
2097 *
2098 * Update the current funclet as needed by calling genUpdateCurrentFunclet().
2099 * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
2100 * is up-to-date.
2101 *
2102 */
2103
2104void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
2105{
2106 if (block->bbFlags & BBF_FUNCLET_BEG)
2107 {
2108 compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
2109 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2110 {
2111 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
2112 }
2113 else
2114 {
2115 // We shouldn't see FUNC_ROOT
2116 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2117 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
2118 }
2119 }
2120 else
2121 {
2122 assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
2123 if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
2124 {
2125 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
2126 }
2127 else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
2128 {
2129 assert(!block->hasHndIndex());
2130 }
2131 else
2132 {
2133 assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
2134 assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
2135 }
2136 }
2137}
2138
2139#if defined(_TARGET_ARM_)
2140void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
2141{
2142 // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
2143 // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
2144 // calls the funclet during non-exceptional control flow.
2145 if (block->bbFlags & BBF_FINALLY_TARGET)
2146 {
2147 assert(block->bbFlags & BBF_JMP_TARGET);
2148
2149#ifdef DEBUG
2150 if (compiler->verbose)
2151 {
2152 printf("\nEmitting finally target NOP predecessor for " FMT_BB "\n", block->bbNum);
2153 }
2154#endif
2155 // Create a label that we'll use for computing the start of an EH region, if this block is
2156 // at the beginning of such a region. If we used the existing bbEmitCookie as is for
2157 // determining the EH regions, then this NOP would end up outside of the region, if this
2158 // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
2159 // would be executed, which we would prefer not to do.
2160
2161 block->bbUnwindNopEmitCookie =
2162 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
2163
2164 instGen(INS_nop);
2165 }
2166}
2167#endif
2168
2169#endif // FEATURE_EH_FUNCLETS
2170
2171/*****************************************************************************
2172 *
2173 * Generate code for the function.
2174 */
2175
2176void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
2177{
2178#ifdef DEBUG
2179 if (verbose)
2180 {
2181 printf("*************** In genGenerateCode()\n");
2182 compiler->fgDispBasicBlocks(compiler->verboseTrees);
2183 }
2184#endif
2185
2186 unsigned codeSize;
2187 unsigned prologSize;
2188 unsigned epilogSize;
2189
2190 void* consPtr;
2191
2192#ifdef DEBUG
2193 genInterruptibleUsed = true;
2194
2195#if STACK_PROBES
2196 genNeedPrologStackProbe = false;
2197#endif
2198
2199 compiler->fgDebugCheckBBlist();
2200#endif // DEBUG
2201
2202 /* This is the real thing */
2203
2204 genPrepForCompiler();
2205
2206 /* Prepare the emitter */
2207 getEmitter()->Init();
2208#ifdef DEBUG
2209 VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
2210#endif
2211
2212#ifdef DEBUG
2213 if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
2214 {
2215 compiler->opts.disAsm = true;
2216 }
2217
2218 if (compiler->opts.disAsm)
2219 {
2220 printf("; Assembly listing for method %s\n", compiler->info.compFullName);
2221
2222 printf("; Emitting ");
2223
2224 if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
2225 {
2226 printf("SMALL_CODE");
2227 }
2228 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
2229 {
2230 printf("FAST_CODE");
2231 }
2232 else
2233 {
2234 printf("BLENDED_CODE");
2235 }
2236
2237 printf(" for ");
2238
2239 if (compiler->info.genCPU == CPU_X86)
2240 {
2241 printf("generic X86 CPU");
2242 }
2243 else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
2244 {
2245 printf("Pentium 4");
2246 }
2247 else if (compiler->info.genCPU == CPU_X64)
2248 {
2249 if (compiler->canUseVexEncoding())
2250 {
2251 printf("X64 CPU with AVX");
2252 }
2253 else
2254 {
2255 printf("X64 CPU with SSE2");
2256 }
2257 }
2258 else if (compiler->info.genCPU == CPU_ARM)
2259 {
2260 printf("generic ARM CPU");
2261 }
2262 else if (compiler->info.genCPU == CPU_ARM64)
2263 {
2264 printf("generic ARM64 CPU");
2265 }
2266 else
2267 {
2268 printf("unknown architecture");
2269 }
2270
2271#if defined(_TARGET_WINDOWS_)
2272 printf(" - Windows");
2273#elif defined(_TARGET_UNIX_)
2274 printf(" - Unix");
2275#endif
2276
2277 printf("\n");
2278
2279 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
2280 {
2281 printf("; Tier-0 compilation\n");
2282 }
2283 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1))
2284 {
2285 printf("; Tier-1 compilation\n");
2286 }
2287
2288 if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
2289 {
2290 printf("; optimized code\n");
2291 }
2292 else if (compiler->opts.compDbgCode)
2293 {
2294 printf("; debuggable code\n");
2295 }
2296 else if (compiler->opts.MinOpts())
2297 {
2298 printf("; compiler->opts.MinOpts() is true\n");
2299 }
2300 else
2301 {
2302 printf("; unknown optimization flags\n");
2303 }
2304
2305#if DOUBLE_ALIGN
2306 if (compiler->genDoubleAlign())
2307 printf("; double-aligned frame\n");
2308 else
2309#endif
2310 printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
2311
2312 if (genInterruptible)
2313 {
2314 printf("; fully interruptible\n");
2315 }
2316 else
2317 {
2318 printf("; partially interruptible\n");
2319 }
2320
2321 if (compiler->fgHaveProfileData())
2322 {
2323 printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
2324 compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
2325 }
2326
2327 if (compiler->fgProfileData_ILSizeMismatch)
2328 {
2329 printf("; discarded IBC profile data due to mismatch in ILSize\n");
2330 }
2331 }
2332#endif // DEBUG
2333
2334 // We compute the final frame layout before code generation. This is because LSRA
2335 // has already computed exactly the maximum concurrent number of spill temps of each type that are
2336 // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
2337 // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
2338 // memory from the VM.
2339
2340 genFinalizeFrame();
2341
2342 unsigned maxTmpSize = regSet.tmpGetTotalSize(); // This is precise after LSRA has pre-allocated the temps.
2343
2344 getEmitter()->emitBegFN(isFramePointerUsed()
2345#if defined(DEBUG)
2346 ,
2347 (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
2348 !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
2349#endif
2350 ,
2351 maxTmpSize);
2352
2353 /* Now generate code for the function */
2354 genCodeForBBlist();
2355
2356#ifdef DEBUG
2357 // After code generation, dump the frame layout again. It should be the same as before code generation, if code
2358 // generation hasn't touched it (it shouldn't!).
2359 if (verbose)
2360 {
2361 compiler->lvaTableDump();
2362 }
2363#endif // DEBUG
2364
2365 /* We can now generate the function prolog and epilog */
2366
2367 genGeneratePrologsAndEpilogs();
2368
2369 /* Bind jump distances */
2370
2371 getEmitter()->emitJumpDistBind();
2372
2373 /* The code is now complete and final; it should not change after this. */
2374
2375 /* Compute the size of the code sections that we are going to ask the VM
2376 to allocate. Note that this might not be precisely the size of the
2377 code we emit, though it's fatal if we emit more code than the size we
2378 compute here.
2379 (Note: an example of a case where we emit less code would be useful.)
2380 */
2381
2382 getEmitter()->emitComputeCodeSizes();
2383
2384#ifdef DEBUG
2385
2386 // Code to test or stress our ability to run a fallback compile.
2387 // We trigger the fallback here, before asking the VM for any memory,
2388 // because if not, we will leak mem, as the current codebase can't free
2389 // the mem after the emitter asks the VM for it. As this is only a stress
2390 // mode, we only want the functionality, and don't care about the relative
2391 // ugliness of having the failure here.
2392 if (!compiler->jitFallbackCompile)
2393 {
2394 // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
2395 // especially that caused by enabling JIT stress.
2396 if (!JitConfig.JitNoForceFallback())
2397 {
2398 if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
2399 {
2400 NO_WAY_NOASSERT("Stress failure");
2401 }
2402 }
2403 }
2404
2405#endif // DEBUG
2406
2407 /* We've finished collecting all the unwind information for the function. Now reserve
2408 space for it from the VM.
2409 */
2410
2411 compiler->unwindReserve();
2412
2413#if DISPLAY_SIZES
2414
2415 size_t dataSize = getEmitter()->emitDataSize();
2416
2417#endif // DISPLAY_SIZES
2418
2419 void* coldCodePtr;
2420
2421 bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
2422
2423#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
2424 trackedStackPtrsContig = false;
2425#elif defined(_TARGET_ARM_)
2426 // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
2427 trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
2428#else
2429 trackedStackPtrsContig = !compiler->opts.compDbgEnC;
2430#endif
2431
2432#ifdef DEBUG
2433 /* We're done generating code for this function */
2434 compiler->compCodeGenDone = true;
2435#endif
2436
2437 compiler->EndPhase(PHASE_GENERATE_CODE);
2438
2439 codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
2440 (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
2441 &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
2442
2443 compiler->EndPhase(PHASE_EMIT_CODE);
2444
2445#ifdef DEBUG
2446 if (compiler->opts.disAsm)
2447 {
2448 printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
2449 compiler->info.compFullName);
2450 printf("; ============================================================\n");
2451 printf(""); // in our logic this causes a flush
2452 }
2453
2454 if (verbose)
2455 {
2456 printf("*************** After end code gen, before unwindEmit()\n");
2457 getEmitter()->emitDispIGlist(true);
2458 }
2459#endif
2460
2461#if EMIT_TRACK_STACK_DEPTH
2462 // Check our max stack level. Needed for fgAddCodeRef().
2463 // We need to relax the assert as our estimation won't include code-gen
2464 // stack changes (which we know don't affect fgAddCodeRef()).
2465 // NOTE: after emitEndCodeGen (including here), emitMaxStackDepth is a
2466 // count of DWORD-sized arguments, NOT argument size in bytes.
2467 {
2468 unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
2469 compiler->compHndBBtabCount + // Return address for locally-called finallys
2470 genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
2471 (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
2472#if defined(UNIX_X86_ABI)
2473 // Convert maxNestedAlignment to DWORD count before adding to maxAllowedStackDepth.
2474 assert(maxNestedAlignment % sizeof(int) == 0);
2475 maxAllowedStackDepth += maxNestedAlignment / sizeof(int);
2476#endif
2477 noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
2478 }
2479#endif // EMIT_TRACK_STACK_DEPTH
2480
2481 *nativeSizeOfCode = codeSize;
2482 compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
2483
2484 // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
2485
2486 // Make sure that the x86 alignment and cache prefetch optimization rules
2487 // were obeyed.
2488
2489 // Don't start a method in the last 7 bytes of a 16-byte alignment area
2490 // unless we are generating SMALL_CODE
2491 // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
2492
2493 /* Now that the code is issued, we can finalize and emit the unwind data */
2494
2495 compiler->unwindEmit(*codePtr, coldCodePtr);
2496
2497 /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
2498
2499 genIPmappingGen();
2500
2501 /* Finalize the Local Var info in terms of generated code */
2502
2503 genSetScopeInfo();
2504
2505#ifdef LATE_DISASM
2506 unsigned finalHotCodeSize;
2507 unsigned finalColdCodeSize;
2508 if (compiler->fgFirstColdBlock != nullptr)
2509 {
2510 // We did some hot/cold splitting. The hot section is always padded out to the
2511 // size we thought it would be, but the cold section is not.
2512 assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
2513 assert(compiler->info.compTotalHotCodeSize > 0);
2514 assert(compiler->info.compTotalColdCodeSize > 0);
2515 finalHotCodeSize = compiler->info.compTotalHotCodeSize;
2516 finalColdCodeSize = codeSize - finalHotCodeSize;
2517 }
2518 else
2519 {
2520 // No hot/cold splitting
2521 assert(codeSize <= compiler->info.compTotalHotCodeSize);
2522 assert(compiler->info.compTotalHotCodeSize > 0);
2523 assert(compiler->info.compTotalColdCodeSize == 0);
2524 finalHotCodeSize = codeSize;
2525 finalColdCodeSize = 0;
2526 }
2527 getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
2528#endif // LATE_DISASM
2529
2530 /* Report any exception handlers to the VM */
2531
2532 genReportEH();
2533
2534#ifdef JIT32_GCENCODER
2535#ifdef DEBUG
2536 void* infoPtr =
2537#endif // DEBUG
2538#endif
2539 // Create and store the GC info for this method.
2540 genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
2541
2542#ifdef DEBUG
2543 FILE* dmpf = jitstdout;
2544
2545 compiler->opts.dmpHex = false;
2546 if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
2547 {
2548 FILE* codf;
2549 errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
2550 if (ec != 0)
2551 {
2552 assert(codf);
2553 dmpf = codf;
2554 compiler->opts.dmpHex = true;
2555 }
2556 }
2557 if (compiler->opts.dmpHex)
2558 {
2559 size_t consSize = getEmitter()->emitDataSize();
2560 size_t infoSize = compiler->compInfoBlkSize;
2561
2562 fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
2563 fprintf(dmpf, "\n");
2564
2565 if (codeSize)
2566 {
2567 fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
2568 }
2569 if (consSize)
2570 {
2571 fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
2572 }
2573#ifdef JIT32_GCENCODER
2574 if (infoSize)
2575 fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
2576#endif // JIT32_GCENCODER
2577
2578 fprintf(dmpf, "\n");
2579
2580 if (codeSize)
2581 {
2582 hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
2583 }
2584 if (consSize)
2585 {
2586 hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
2587 }
2588#ifdef JIT32_GCENCODER
2589 if (infoSize)
2590 hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
2591#endif // JIT32_GCENCODER
2592
2593 fflush(dmpf);
2594 }
2595
2596 if (dmpf != jitstdout)
2597 {
2598 fclose(dmpf);
2599 }
2600
2601#endif // DEBUG
2602
2603 /* Tell the emitter that we're done with this function */
2604
2605 getEmitter()->emitEndFN();
2606
2607 /* Shut down the spill logic */
2608
2609 regSet.rsSpillDone();
2610
2611 /* Shut down the temp logic */
2612
2613 regSet.tmpDone();
2614
2615#if DISPLAY_SIZES
2616
2617 grossVMsize += compiler->info.compILCodeSize;
2618 totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
2619 grossNCsize += codeSize + dataSize;
2620
2621#endif // DISPLAY_SIZES
2622
2623 compiler->EndPhase(PHASE_EMIT_GCEH);
2624}
2625
2626/*****************************************************************************
2627 *
2628 * Report EH clauses to the VM
2629 */
2630
2631void CodeGen::genReportEH()
2632{
2633 if (compiler->compHndBBtabCount == 0)
2634 {
2635 return;
2636 }
2637
2638#ifdef DEBUG
2639 if (compiler->opts.dspEHTable)
2640 {
2641 printf("*************** EH table for %s\n", compiler->info.compFullName);
2642 }
2643#endif // DEBUG
2644
2645 unsigned XTnum;
2646 EHblkDsc* HBtab;
2647 EHblkDsc* HBtabEnd;
2648
2649 bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
2650
2651 unsigned EHCount = compiler->compHndBBtabCount;
2652
2653#if FEATURE_EH_FUNCLETS
2654 // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
2655 // VM.
2656 unsigned duplicateClauseCount = 0;
2657 unsigned enclosingTryIndex;
2658
2659 // Duplicate clauses are not used by CoreRT ABI
2660 if (!isCoreRTABI)
2661 {
2662 for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
2663 {
2664 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
2665 // ignoring 'mutual protect' trys
2666 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2667 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2668 {
2669 ++duplicateClauseCount;
2670 }
2671 }
2672 EHCount += duplicateClauseCount;
2673 }
2674
2675#if FEATURE_EH_CALLFINALLY_THUNKS
2676 unsigned clonedFinallyCount = 0;
2677
2678 // Duplicate clauses are not used by CoreRT ABI
2679 if (!isCoreRTABI)
2680 {
2681 // We don't keep track of how many cloned finally there are. So, go through and count.
2682 // We do a quick pass first through the EH table to see if there are any try/finally
2683 // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
2684
2685 bool anyFinallys = false;
2686 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2687 HBtab < HBtabEnd; HBtab++)
2688 {
2689 if (HBtab->HasFinallyHandler())
2690 {
2691 anyFinallys = true;
2692 break;
2693 }
2694 }
2695 if (anyFinallys)
2696 {
2697 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
2698 {
2699 if (block->bbJumpKind == BBJ_CALLFINALLY)
2700 {
2701 ++clonedFinallyCount;
2702 }
2703 }
2704
2705 EHCount += clonedFinallyCount;
2706 }
2707 }
2708#endif // FEATURE_EH_CALLFINALLY_THUNKS
2709
2710#endif // FEATURE_EH_FUNCLETS
2711
2712#ifdef DEBUG
2713 if (compiler->opts.dspEHTable)
2714 {
2715#if FEATURE_EH_FUNCLETS
2716#if FEATURE_EH_CALLFINALLY_THUNKS
2717 printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
2718 compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
2719 assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
2720#else // !FEATURE_EH_CALLFINALLY_THUNKS
2721 printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
2722 compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
2723 assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
2724#endif // !FEATURE_EH_CALLFINALLY_THUNKS
2725#else // !FEATURE_EH_FUNCLETS
2726 printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
2727 assert(compiler->compHndBBtabCount == EHCount);
2728#endif // !FEATURE_EH_FUNCLETS
2729 }
2730#endif // DEBUG
2731
2732 // Tell the VM how many EH clauses to expect.
2733 compiler->eeSetEHcount(EHCount);
2734
2735 XTnum = 0; // This is the index we pass to the VM
2736
2737 for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
2738 HBtab < HBtabEnd; HBtab++)
2739 {
2740 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2741
2742 tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
2743 hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
2744
2745 tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2746 : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
2747 hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2748 : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
2749
2750 if (HBtab->HasFilter())
2751 {
2752 hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
2753 }
2754 else
2755 {
2756 hndTyp = HBtab->ebdTyp;
2757 }
2758
2759 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
2760
2761 if (isCoreRTABI && (XTnum > 0))
2762 {
2763 // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
2764 // try block as the previous one. The runtime cannot reliably infer this information from
2765 // native code offsets because of different try blocks can have same offsets. Alternative
2766 // solution to this problem would be inserting extra nops to ensure that different try
2767 // blocks have different offsets.
2768 if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
2769 {
2770 // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
2771 // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
2772 // IL as "try { try {} catch {} catch {} } finally {}".
2773 assert(HBtab->HasCatchHandler());
2774 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
2775 }
2776 }
2777
2778 // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
2779 // the fields aren't accurate.
2780
2781 CORINFO_EH_CLAUSE clause;
2782 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2783 clause.Flags = flags;
2784 clause.TryOffset = tryBeg;
2785 clause.TryLength = tryEnd;
2786 clause.HandlerOffset = hndBeg;
2787 clause.HandlerLength = hndEnd;
2788
2789 assert(XTnum < EHCount);
2790
2791 // Tell the VM about this EH clause.
2792 compiler->eeSetEHinfo(XTnum, &clause);
2793
2794 ++XTnum;
2795 }
2796
2797#if FEATURE_EH_FUNCLETS
2798 // Now output duplicated clauses.
2799 //
2800 // If a funclet has been created by moving a handler out of a try region that it was originally nested
2801 // within, then we need to report a "duplicate" clause representing the fact that an exception in that
2802 // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
2803 // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
2804 // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
2805 // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
2806 // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
2807 // region.
2808 //
2809 // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
2810 // try or handler region):
2811 //
2812 // A
2813 // try (1) {
2814 // B
2815 // try (2) {
2816 // C
2817 // } catch (3) {
2818 // D
2819 // } catch (4) {
2820 // E
2821 // }
2822 // F
2823 // } catch (5) {
2824 // G
2825 // }
2826 // H
2827 //
2828 // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
2829 // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
2830 // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
2831 // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
2832 // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
2833 // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
2834 // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
2835 // representing try (1) protecting the new funclets catch (3) and (4).
2836 // The code will be generated as follows:
2837 //
2838 // ABCFH // "main" code
2839 // D // funclet
2840 // E // funclet
2841 // G // funclet
2842 //
2843 // The EH regions are:
2844 //
2845 // C -> D
2846 // C -> E
2847 // BCF -> G
2848 // D -> G // "duplicate" clause
2849 // E -> G // "duplicate" clause
2850 //
2851 // Note that we actually need to generate one of these additional "duplicate" clauses for every
2852 // region the funclet is nested in. Take this example:
2853 //
2854 // A
2855 // try (1) {
2856 // B
2857 // try (2,3) {
2858 // C
2859 // try (4) {
2860 // D
2861 // try (5,6) {
2862 // E
2863 // } catch {
2864 // F
2865 // } catch {
2866 // G
2867 // }
2868 // H
2869 // } catch {
2870 // I
2871 // }
2872 // J
2873 // } catch {
2874 // K
2875 // } catch {
2876 // L
2877 // }
2878 // M
2879 // } catch {
2880 // N
2881 // }
2882 // O
2883 //
2884 // When we pull out funclets, we get the following generated code:
2885 //
2886 // ABCDEHJMO // "main" function
2887 // F // funclet
2888 // G // funclet
2889 // I // funclet
2890 // K // funclet
2891 // L // funclet
2892 // N // funclet
2893 //
2894 // And the EH regions we report to the VM are (in order; main clauses
2895 // first in most-to-least nested order, funclets ("duplicated clauses")
2896 // last, in most-to-least nested) are:
2897 //
2898 // E -> F
2899 // E -> G
2900 // DEH -> I
2901 // CDEHJ -> K
2902 // CDEHJ -> L
2903 // BCDEHJM -> N
2904 // F -> I // funclet clause #1 for F
2905 // F -> K // funclet clause #2 for F
2906 // F -> L // funclet clause #3 for F
2907 // F -> N // funclet clause #4 for F
2908 // G -> I // funclet clause #1 for G
2909 // G -> K // funclet clause #2 for G
2910 // G -> L // funclet clause #3 for G
2911 // G -> N // funclet clause #4 for G
2912 // I -> K // funclet clause #1 for I
2913 // I -> L // funclet clause #2 for I
2914 // I -> N // funclet clause #3 for I
2915 // K -> N // funclet clause #1 for K
2916 // L -> N // funclet clause #1 for L
2917 //
2918 // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
2919 // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
2920 // to add a clause "F -> G" because F is NOT protected by G, but we still have
2921 // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
2922 //
2923 // The overall ordering of the clauses is still the same most-to-least nesting
2924 // after front-to-back start offset. Because we place the funclets at the end
2925 // these new clauses should also go at the end by this ordering.
2926 //
2927
2928 if (duplicateClauseCount > 0)
2929 {
2930 unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
2931 unsigned XTnum2;
2932 for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
2933 {
2934 unsigned enclosingTryIndex;
2935
2936 EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
2937
2938 for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
2939 // ignoring 'mutual protect' trys
2940 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
2941 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
2942 {
2943 // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
2944 // that will have the enclosing try protecting the funclet.
2945
2946 noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
2947 // greater EH table index
2948
2949 EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
2950
2951 // The try region is the handler of the funclet. Note that for filters, we don't protect the
2952 // filter region, only the filter handler region. This is because exceptions in filters never
2953 // escape; the VM swallows them.
2954
2955 BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
2956 BasicBlock* bbTryLast = fletTab->ebdHndLast;
2957
2958 BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
2959 BasicBlock* bbHndLast = encTab->ebdHndLast;
2960
2961 UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
2962
2963 tryBeg = compiler->ehCodeOffset(bbTryBeg);
2964 hndBeg = compiler->ehCodeOffset(bbHndBeg);
2965
2966 tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2967 : compiler->ehCodeOffset(bbTryLast->bbNext);
2968 hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
2969 : compiler->ehCodeOffset(bbHndLast->bbNext);
2970
2971 if (encTab->HasFilter())
2972 {
2973 hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
2974 }
2975 else
2976 {
2977 hndTyp = encTab->ebdTyp;
2978 }
2979
2980 CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
2981
2982 // Tell the VM this is an extra clause caused by moving funclets out of line.
2983 flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
2984
2985 // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
2986 // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
2987 // instruction immediately after the 'try' body. So, it really could be more accurately named
2988 // "TryEndOffset".
2989
2990 CORINFO_EH_CLAUSE clause;
2991 clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
2992 clause.Flags = flags;
2993 clause.TryOffset = tryBeg;
2994 clause.TryLength = tryEnd;
2995 clause.HandlerOffset = hndBeg;
2996 clause.HandlerLength = hndEnd;
2997
2998 assert(XTnum < EHCount);
2999
3000 // Tell the VM about this EH clause (a duplicated clause).
3001 compiler->eeSetEHinfo(XTnum, &clause);
3002
3003 ++XTnum;
3004 ++reportedDuplicateClauseCount;
3005
3006#ifndef DEBUG
3007 if (duplicateClauseCount == reportedDuplicateClauseCount)
3008 {
3009 break; // we've reported all of them; no need to continue looking
3010 }
3011#endif // !DEBUG
3012
3013 } // for each 'true' enclosing 'try'
3014 } // for each EH table entry
3015
3016 assert(duplicateClauseCount == reportedDuplicateClauseCount);
3017 } // if (duplicateClauseCount > 0)
3018
3019#if FEATURE_EH_CALLFINALLY_THUNKS
3020 if (clonedFinallyCount > 0)
3021 {
3022 unsigned reportedClonedFinallyCount = 0;
3023 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
3024 {
3025 if (block->bbJumpKind == BBJ_CALLFINALLY)
3026 {
3027 UNATIVE_OFFSET hndBeg, hndEnd;
3028
3029 hndBeg = compiler->ehCodeOffset(block);
3030
3031 // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
3032 // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
3033 BasicBlock* bbLabel = block->bbNext;
3034 if (block->isBBCallAlwaysPair())
3035 {
3036 bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
3037 }
3038 if (bbLabel == nullptr)
3039 {
3040 hndEnd = compiler->info.compNativeCodeSize;
3041 }
3042 else
3043 {
3044 assert(bbLabel->bbEmitCookie != nullptr);
3045 hndEnd = compiler->ehCodeOffset(bbLabel);
3046 }
3047
3048 CORINFO_EH_CLAUSE clause;
3049 clause.ClassToken = 0; // unused
3050 clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
3051 clause.TryOffset = hndBeg;
3052 clause.TryLength = hndBeg;
3053 clause.HandlerOffset = hndBeg;
3054 clause.HandlerLength = hndEnd;
3055
3056 assert(XTnum < EHCount);
3057
3058 // Tell the VM about this EH clause (a cloned finally clause).
3059 compiler->eeSetEHinfo(XTnum, &clause);
3060
3061 ++XTnum;
3062 ++reportedClonedFinallyCount;
3063
3064#ifndef DEBUG
3065 if (clonedFinallyCount == reportedClonedFinallyCount)
3066 {
3067 break; // we're done; no need to keep looking
3068 }
3069#endif // !DEBUG
3070 } // block is BBJ_CALLFINALLY
3071 } // for each block
3072
3073 assert(clonedFinallyCount == reportedClonedFinallyCount);
3074 } // if (clonedFinallyCount > 0)
3075#endif // FEATURE_EH_CALLFINALLY_THUNKS
3076
3077#endif // FEATURE_EH_FUNCLETS
3078
3079 assert(XTnum == EHCount);
3080}
3081
3082//----------------------------------------------------------------------
3083// genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3084// helper should be used.
3085//
3086// Arguments:
3087// wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening.
3088//
3089// Return Value:
3090// true if an optimized write barrier helper should be used, false otherwise.
3091// Note: only x86 implements register-specific source optimized write
3092// barriers currently.
3093//
3094bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf)
3095{
3096#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3097#ifdef DEBUG
3098 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3099#else
3100 return true;
3101#endif
3102#else
3103 return false;
3104#endif
3105}
3106
3107//----------------------------------------------------------------------
3108// genUseOptimizedWriteBarriers: Determine if an optimized write barrier
3109// helper should be used.
3110//
3111// This has the same functionality as the version of
3112// genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids
3113// determining what the required write barrier form is, if possible.
3114//
3115// Arguments:
3116// tgt - target tree of write (e.g., GT_STOREIND)
3117// assignVal - tree with value to write
3118//
3119// Return Value:
3120// true if an optimized write barrier helper should be used, false otherwise.
3121// Note: only x86 implements register-specific source optimized write
3122// barriers currently.
3123//
3124bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal)
3125{
3126#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3127#ifdef DEBUG
3128 GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3129 return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
3130#else
3131 return true;
3132#endif
3133#else
3134 return false;
3135#endif
3136}
3137
3138//----------------------------------------------------------------------
3139// genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write
3140// barrier, and the write barrier form required, determine the helper to call.
3141//
3142// Arguments:
3143// tgt - target tree of write (e.g., GT_STOREIND)
3144// wbf - already computed write barrier form to use
3145//
3146// Return Value:
3147// Write barrier helper to use.
3148//
3149// Note: do not call this function to get an optimized write barrier helper (e.g.,
3150// for x86).
3151//
3152CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3153{
3154 noway_assert(tgt->gtOper == GT_STOREIND);
3155
3156 CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF;
3157
3158#ifdef DEBUG
3159 if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
3160 {
3161 helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
3162 }
3163 else
3164#endif
3165 if (tgt->gtOper != GT_CLS_VAR)
3166 {
3167 if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
3168 {
3169 if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
3170 {
3171 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3172 }
3173 else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
3174 {
3175 helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
3176 }
3177 }
3178 }
3179 assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
3180 ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
3181 (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
3182 ((helper == CORINFO_HELP_ASSIGN_REF) &&
3183 (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
3184
3185 return helper;
3186}
3187
3188//----------------------------------------------------------------------
3189// genGCWriteBarrier: Generate a write barrier for a node.
3190//
3191// Arguments:
3192// tgt - target tree of write (e.g., GT_STOREIND)
3193// wbf - already computed write barrier form to use
3194//
3195void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf)
3196{
3197 CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf);
3198
3199#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
3200 // We classify the "tgt" trees as follows:
3201 // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
3202 // IND [-> ADDR -> IND] -> { GT_LCL_VAR, ADD({GT_LCL_VAR}, X), ADD(X, (GT_LCL_VAR)) }
3203 // then let "v" be the GT_LCL_VAR.
3204 // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
3205 // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
3206 // * Otherwise, classify as CWBKind_OtherByRefLocal.
3207 // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
3208 // Otherwise, classify as CWBKind_Unclassified.
3209
3210 CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
3211 if (tgt->gtOper == GT_IND)
3212 {
3213 GenTree* lcl = NULL;
3214
3215 GenTree* indArg = tgt->gtOp.gtOp1;
3216 if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
3217 {
3218 indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
3219 }
3220 if (indArg->gtOper == GT_LCL_VAR)
3221 {
3222 lcl = indArg;
3223 }
3224 else if (indArg->gtOper == GT_ADD)
3225 {
3226 if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR)
3227 {
3228 lcl = indArg->gtOp.gtOp1;
3229 }
3230 else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR)
3231 {
3232 lcl = indArg->gtOp.gtOp2;
3233 }
3234 }
3235 if (lcl != NULL)
3236 {
3237 wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
3238 unsigned lclNum = lcl->AsLclVar()->GetLclNum();
3239 if (lclNum == compiler->info.compRetBuffArg)
3240 {
3241 wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
3242 }
3243 else
3244 {
3245 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
3246 if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
3247 {
3248 wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
3249 }
3250 }
3251 }
3252 else
3253 {
3254 // We should have eliminated the barrier for this case.
3255 assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
3256 }
3257 }
3258
3259 if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
3260 {
3261#if 0
3262#ifdef DEBUG
3263 // Enable this to sample the unclassified trees.
3264 static int unclassifiedBarrierSite = 0;
3265 if (wbKind == CWBKind_Unclassified)
3266 {
3267 unclassifiedBarrierSite++;
3268 printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
3269 }
3270#endif // DEBUG
3271#endif // 0
3272 AddStackLevel(4);
3273 inst_IV(INS_push, wbKind);
3274 genEmitHelperCall(helper,
3275 4, // argSize
3276 EA_PTRSIZE); // retSize
3277 SubtractStackLevel(4);
3278 }
3279 else
3280 {
3281 genEmitHelperCall(helper,
3282 0, // argSize
3283 EA_PTRSIZE); // retSize
3284 }
3285
3286#else // !FEATURE_COUNT_GC_WRITE_BARRIERS
3287 genEmitHelperCall(helper,
3288 0, // argSize
3289 EA_PTRSIZE); // retSize
3290#endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
3291}
3292
3293/*
3294XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3295XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3296XX XX
3297XX Prolog / Epilog XX
3298XX XX
3299XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3300XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
3301*/
3302
3303/*****************************************************************************
3304 *
3305 * Generates code for moving incoming register arguments to their
3306 * assigned location, in the function prolog.
3307 */
3308
3309#ifdef _PREFAST_
3310#pragma warning(push)
3311#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3312#endif
3313void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
3314{
3315#ifdef DEBUG
3316 if (verbose)
3317 {
3318 printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
3319 }
3320#endif
3321
3322 unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
3323 unsigned argNum; // current argNum, always in [0..argMax-1]
3324 unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
3325 unsigned regArgNum; // index into the regArgTab[] table
3326 regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
3327 bool doingFloat = regState->rsIsFloat;
3328
3329 // We should be generating the prolog block when we are called
3330 assert(compiler->compGeneratingProlog);
3331
3332 // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
3333 noway_assert(regArgMaskLive != 0);
3334
3335 // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
3336 // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
3337 //
3338 // The regArgTab can always have unused entries,
3339 // for example if an architecture always increments the arg register number but uses either
3340 // an integer register or a floating point register to hold the next argument
3341 // then with a mix of float and integer args you could have:
3342 //
3343 // sampleMethod(int i, float x, int j, float y, int k, float z);
3344 // r0, r2 and r4 as valid integer arguments with argMax as 5
3345 // and f1, f3 and f5 and valid floating point arguments with argMax as 6
3346 // The first one is doingFloat==false and the second one is doingFloat==true
3347 //
3348 // If a fixed return buffer (in r8) was also present then the first one would become:
3349 // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
3350 //
3351
3352 argMax = regState->rsCalleeRegArgCount;
3353 fixedRetBufIndex = (unsigned)-1; // Invalid value
3354
3355 // If necessary we will select a correct xtraReg for circular floating point args later.
3356 if (doingFloat)
3357 {
3358 xtraReg = REG_NA;
3359 noway_assert(argMax <= MAX_FLOAT_REG_ARG);
3360 }
3361 else // we are doing the integer registers
3362 {
3363 noway_assert(argMax <= MAX_REG_ARG);
3364 if (hasFixedRetBuffReg())
3365 {
3366 fixedRetBufIndex = theFixedRetBuffArgNum();
3367 // We have an additional integer register argument when hasFixedRetBuffReg() is true
3368 argMax = fixedRetBufIndex + 1;
3369 assert(argMax == (MAX_REG_ARG + 1));
3370 }
3371 }
3372
3373 //
3374 // Construct a table with the register arguments, for detecting circular and
3375 // non-circular dependencies between the register arguments. A dependency is when
3376 // an argument register Rn needs to be moved to register Rm that is also an argument
3377 // register. The table is constructed in the order the arguments are passed in
3378 // registers: the first register argument is in regArgTab[0], the second in
3379 // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
3380 // at an even index. The regArgTab is indexed from 0 to argMax - 1.
3381 // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
3382 // we have increased the allocated size of the regArgTab[] by one.
3383 //
3384 struct regArgElem
3385 {
3386 unsigned varNum; // index into compiler->lvaTable[] for this register argument
3387#if defined(UNIX_AMD64_ABI)
3388 var_types type; // the Jit type of this regArgTab entry
3389#endif // defined(UNIX_AMD64_ABI)
3390 unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
3391 // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
3392 // argument register number 'x'. Only used when circular = true.
3393 char slot; // 0 means the register is not used for a register argument
3394 // 1 means the first part of a register argument
3395 // 2, 3 or 4 means the second,third or fourth part of a multireg argument
3396 bool stackArg; // true if the argument gets homed to the stack
3397 bool processed; // true after we've processed the argument (and it is in its final location)
3398 bool circular; // true if this register participates in a circular dependency loop.
3399
3400#ifdef UNIX_AMD64_ABI
3401
3402 // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
3403 // the type of the lclVar in ways that are not ascertainable from lvType.
3404 // So, for that case we retain the type of the register in the regArgTab.
3405
3406 var_types getRegType(Compiler* compiler)
3407 {
3408 return type; // UNIX_AMD64 implementation
3409 }
3410
3411#else // !UNIX_AMD64_ABI
3412
3413 // In other cases, we simply use the type of the lclVar to determine the type of the register.
3414 var_types getRegType(Compiler* compiler)
3415 {
3416 const LclVarDsc& varDsc = compiler->lvaTable[varNum];
3417 // Check if this is an HFA register arg and return the HFA type
3418 if (varDsc.lvIsHfaRegArg())
3419 {
3420#if defined(_TARGET_WINDOWS_)
3421 // Cannot have hfa types on windows arm targets
3422 // in vararg methods.
3423 assert(!compiler->info.compIsVarArgs);
3424#endif // defined(_TARGET_WINDOWS_)
3425 return varDsc.GetHfaType();
3426 }
3427 return compiler->mangleVarArgsType(varDsc.lvType);
3428 }
3429
3430#endif // !UNIX_AMD64_ABI
3431 } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
3432
3433 unsigned varNum;
3434 LclVarDsc* varDsc;
3435
3436 for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
3437 {
3438 varDsc = compiler->lvaTable + varNum;
3439
3440 // Is this variable a register arg?
3441 if (!varDsc->lvIsParam)
3442 {
3443 continue;
3444 }
3445
3446 if (!varDsc->lvIsRegArg)
3447 {
3448 continue;
3449 }
3450
3451 // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
3452 // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
3453 // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
3454 // use the the original TYP_STRUCT argument.
3455 //
3456 if (varDsc->lvPromoted || varDsc->lvIsStructField)
3457 {
3458 LclVarDsc* parentVarDsc = varDsc;
3459 if (varDsc->lvIsStructField)
3460 {
3461 assert(!varDsc->lvPromoted);
3462 parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
3463 }
3464
3465 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
3466
3467 if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
3468 {
3469 noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
3470
3471 // For register arguments that are independent promoted structs we put the promoted field varNum in the
3472 // regArgTab[]
3473 if (varDsc->lvPromoted)
3474 {
3475 continue;
3476 }
3477 }
3478 else
3479 {
3480 // For register arguments that are not independent promoted structs we put the parent struct varNum in
3481 // the regArgTab[]
3482 if (varDsc->lvIsStructField)
3483 {
3484 continue;
3485 }
3486 }
3487 }
3488
3489 var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet());
3490 // Change regType to the HFA type when we have a HFA argument
3491 if (varDsc->lvIsHfaRegArg())
3492 {
3493#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3494 if (compiler->info.compIsVarArgs)
3495 {
3496 assert(!"Illegal incoming HFA arg encountered in Vararg method.");
3497 }
3498#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3499 regType = varDsc->GetHfaType();
3500 }
3501
3502#if defined(UNIX_AMD64_ABI)
3503 if (!varTypeIsStruct(regType))
3504#endif // defined(UNIX_AMD64_ABI)
3505 {
3506 // A struct might be passed partially in XMM register for System V calls.
3507 // So a single arg might use both register files.
3508 if (isFloatRegType(regType) != doingFloat)
3509 {
3510 continue;
3511 }
3512 }
3513
3514 int slots = 0;
3515
3516#if defined(UNIX_AMD64_ABI)
3517 if (varTypeIsStruct(varDsc))
3518 {
3519 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
3520 assert(typeHnd != nullptr);
3521 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3522 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
3523 if (!structDesc.passedInRegisters)
3524 {
3525 // The var is not passed in registers.
3526 continue;
3527 }
3528
3529 unsigned firstRegSlot = 0;
3530 for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
3531 {
3532 regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
3533 var_types regType;
3534
3535#ifdef FEATURE_SIMD
3536 // Assumption 1:
3537 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
3538 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
3539 // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
3540 // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
3541 //
3542 // Assumption 2:
3543 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
3544 // registers or on stack, the upper most 4-bytes will be zero.
3545 //
3546 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
3547 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
3548 // invalid.
3549 //
3550 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
3551 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
3552 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
3553 // there is no need to clear upper 4-bytes of Vector3 type args.
3554 //
3555 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
3556 // Vector3 return values are returned two return registers and Caller assembles them into a
3557 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
3558 // type args in prolog and Vector3 type return value of a call
3559
3560 if (varDsc->lvType == TYP_SIMD12)
3561 {
3562 regType = TYP_DOUBLE;
3563 }
3564 else
3565#endif
3566 {
3567 regType = compiler->GetEightByteType(structDesc, slotCounter);
3568 }
3569
3570 regArgNum = genMapRegNumToRegArgNum(regNum, regType);
3571
3572 if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
3573 (doingFloat && (structDesc.IsSseSlot(slotCounter))))
3574 {
3575 // Store the reg for the first slot.
3576 if (slots == 0)
3577 {
3578 firstRegSlot = regArgNum;
3579 }
3580
3581 // Bingo - add it to our table
3582 noway_assert(regArgNum < argMax);
3583 noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
3584 // not be multiple vars representing this argument
3585 // register)
3586 regArgTab[regArgNum].varNum = varNum;
3587 regArgTab[regArgNum].slot = (char)(slotCounter + 1);
3588 regArgTab[regArgNum].type = regType;
3589 slots++;
3590 }
3591 }
3592
3593 if (slots == 0)
3594 {
3595 continue; // Nothing to do for this regState set.
3596 }
3597
3598 regArgNum = firstRegSlot;
3599 }
3600 else
3601#endif // defined(UNIX_AMD64_ABI)
3602 {
3603 // Bingo - add it to our table
3604 regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
3605
3606 noway_assert(regArgNum < argMax);
3607 // We better not have added it already (there better not be multiple vars representing this argument
3608 // register)
3609 noway_assert(regArgTab[regArgNum].slot == 0);
3610
3611#if defined(UNIX_AMD64_ABI)
3612 // Set the register type.
3613 regArgTab[regArgNum].type = regType;
3614#endif // defined(UNIX_AMD64_ABI)
3615
3616 regArgTab[regArgNum].varNum = varNum;
3617 regArgTab[regArgNum].slot = 1;
3618
3619 slots = 1;
3620
3621#if FEATURE_MULTIREG_ARGS
3622 if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
3623 {
3624 if (varDsc->lvIsHfaRegArg())
3625 {
3626 // We have an HFA argument, set slots to the number of registers used
3627 slots = varDsc->lvHfaSlots();
3628 }
3629 else
3630 {
3631 // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
3632 assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
3633 // We have a non-HFA multireg argument, set slots to two
3634 slots = 2;
3635 }
3636
3637 // Note that regArgNum+1 represents an argument index not an actual argument register.
3638 // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
3639
3640 // This is the setup for the rest of a multireg struct arg
3641
3642 for (int i = 1; i < slots; i++)
3643 {
3644 noway_assert((regArgNum + i) < argMax);
3645
3646 // We better not have added it already (there better not be multiple vars representing this argument
3647 // register)
3648 noway_assert(regArgTab[regArgNum + i].slot == 0);
3649
3650 regArgTab[regArgNum + i].varNum = varNum;
3651 regArgTab[regArgNum + i].slot = (char)(i + 1);
3652 }
3653 }
3654#endif // FEATURE_MULTIREG_ARGS
3655 }
3656
3657#ifdef _TARGET_ARM_
3658 int lclSize = compiler->lvaLclSize(varNum);
3659
3660 if (lclSize > REGSIZE_BYTES)
3661 {
3662 unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
3663 slots = lclSize / REGSIZE_BYTES;
3664 if (regArgNum + slots > maxRegArgNum)
3665 {
3666 slots = maxRegArgNum - regArgNum;
3667 }
3668 }
3669 C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
3670 assert(slots < INT8_MAX);
3671 for (char i = 1; i < slots; i++)
3672 {
3673 regArgTab[regArgNum + i].varNum = varNum;
3674 regArgTab[regArgNum + i].slot = i + 1;
3675 }
3676#endif // _TARGET_ARM_
3677
3678 for (int i = 0; i < slots; i++)
3679 {
3680 regType = regArgTab[regArgNum + i].getRegType(compiler);
3681 regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
3682
3683#if !defined(UNIX_AMD64_ABI)
3684 assert((i > 0) || (regNum == varDsc->lvArgReg));
3685#endif // defined(UNIX_AMD64_ABI)
3686
3687 // Is the arg dead on entry to the method ?
3688
3689 if ((regArgMaskLive & genRegMask(regNum)) == 0)
3690 {
3691 if (varDsc->lvTrackedNonStruct())
3692 {
3693 // We may now see some tracked locals with zero refs.
3694 // See Lowering::DoPhase. Tolerate these.
3695 if (varDsc->lvRefCnt() > 0)
3696 {
3697 noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
3698 }
3699 }
3700 else
3701 {
3702#ifdef _TARGET_X86_
3703 noway_assert(varDsc->lvType == TYP_STRUCT);
3704#else // !_TARGET_X86_
3705 // For LSRA, it may not be in regArgMaskLive if it has a zero
3706 // refcnt. This is in contrast with the non-LSRA case in which all
3707 // non-tracked args are assumed live on entry.
3708 noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) ||
3709 (varDsc->lvAddrExposed && compiler->info.compIsVarArgs) ||
3710 (varDsc->lvAddrExposed && compiler->opts.compUseSoftFP));
3711#endif // !_TARGET_X86_
3712 }
3713 // Mark it as processed and be done with it
3714 regArgTab[regArgNum + i].processed = true;
3715 goto NON_DEP;
3716 }
3717
3718#ifdef _TARGET_ARM_
3719 // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
3720 // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
3721 // they've already been spilled.
3722 //
3723 if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
3724#endif // _TARGET_ARM_
3725 {
3726#if !defined(UNIX_AMD64_ABI)
3727 noway_assert(xtraReg != (varDsc->lvArgReg + i));
3728#endif
3729 noway_assert(regArgMaskLive & genRegMask(regNum));
3730 }
3731
3732 regArgTab[regArgNum + i].processed = false;
3733
3734 /* mark stack arguments since we will take care of those first */
3735 regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
3736
3737 /* If it goes on the stack or in a register that doesn't hold
3738 * an argument anymore -> CANNOT form a circular dependency */
3739
3740 if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
3741 {
3742 /* will trash another argument -> possible dependency
3743 * We may need several passes after the table is constructed
3744 * to decide on that */
3745
3746 /* Maybe the argument stays in the register (IDEAL) */
3747
3748 if ((i == 0) && (varDsc->lvRegNum == regNum))
3749 {
3750 goto NON_DEP;
3751 }
3752
3753#if !defined(_TARGET_64BIT_)
3754 if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
3755 {
3756 goto NON_DEP;
3757 }
3758 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
3759 {
3760 goto NON_DEP;
3761 }
3762
3763 if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
3764 (REG_NEXT(varDsc->lvRegNum) == regNum))
3765 {
3766 goto NON_DEP;
3767 }
3768#endif // !defined(_TARGET_64BIT_)
3769 regArgTab[regArgNum + i].circular = true;
3770 }
3771 else
3772 {
3773 NON_DEP:
3774 regArgTab[regArgNum + i].circular = false;
3775
3776 /* mark the argument register as free */
3777 regArgMaskLive &= ~genRegMask(regNum);
3778 }
3779 }
3780 }
3781
3782 /* Find the circular dependencies for the argument registers, if any.
3783 * A circular dependency is a set of registers R1, R2, ..., Rn
3784 * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
3785
3786 bool change = true;
3787 if (regArgMaskLive)
3788 {
3789 /* Possible circular dependencies still exist; the previous pass was not enough
3790 * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
3791
3792 while (change)
3793 {
3794 change = false;
3795
3796 for (argNum = 0; argNum < argMax; argNum++)
3797 {
3798 // If we already marked the argument as non-circular then continue
3799
3800 if (!regArgTab[argNum].circular)
3801 {
3802 continue;
3803 }
3804
3805 if (regArgTab[argNum].slot == 0) // Not a register argument
3806 {
3807 continue;
3808 }
3809
3810 varNum = regArgTab[argNum].varNum;
3811 noway_assert(varNum < compiler->lvaCount);
3812 varDsc = compiler->lvaTable + varNum;
3813 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
3814
3815 /* cannot possibly have stack arguments */
3816 noway_assert(varDsc->lvIsInReg());
3817 noway_assert(!regArgTab[argNum].stackArg);
3818
3819 var_types regType = regArgTab[argNum].getRegType(compiler);
3820 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
3821
3822 regNumber destRegNum = REG_NA;
3823 if (regArgTab[argNum].slot == 1)
3824 {
3825 destRegNum = varDsc->lvRegNum;
3826 }
3827#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
3828 else
3829 {
3830 assert(regArgTab[argNum].slot == 2);
3831 assert(argNum > 0);
3832 assert(regArgTab[argNum - 1].slot == 1);
3833 assert(regArgTab[argNum - 1].varNum == varNum);
3834 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
3835 regArgMaskLive &= ~genRegMask(regNum);
3836 regArgTab[argNum].circular = false;
3837 change = true;
3838 continue;
3839 }
3840#elif !defined(_TARGET_64BIT_)
3841 else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
3842 {
3843 destRegNum = varDsc->lvOtherReg;
3844 }
3845 else
3846 {
3847 assert(regArgTab[argNum].slot == 2);
3848 assert(varDsc->TypeGet() == TYP_DOUBLE);
3849 destRegNum = REG_NEXT(varDsc->lvRegNum);
3850 }
3851#endif // !defined(_TARGET_64BIT_)
3852 noway_assert(destRegNum != REG_NA);
3853 if (genRegMask(destRegNum) & regArgMaskLive)
3854 {
3855 /* we are trashing a live argument register - record it */
3856 unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
3857 noway_assert(destRegArgNum < argMax);
3858 regArgTab[destRegArgNum].trashBy = argNum;
3859 }
3860 else
3861 {
3862 /* argument goes to a free register */
3863 regArgTab[argNum].circular = false;
3864 change = true;
3865
3866 /* mark the argument register as free */
3867 regArgMaskLive &= ~genRegMask(regNum);
3868 }
3869 }
3870 }
3871 }
3872
3873 /* At this point, everything that has the "circular" flag
3874 * set to "true" forms a circular dependency */
3875 CLANG_FORMAT_COMMENT_ANCHOR;
3876
3877#ifdef DEBUG
3878 if (regArgMaskLive)
3879 {
3880 if (verbose)
3881 {
3882 printf("Circular dependencies found while home-ing the incoming arguments.\n");
3883 }
3884 }
3885#endif
3886
3887 // LSRA allocates registers to incoming parameters in order and will not overwrite
3888 // a register still holding a live parameter.
3889
3890 noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
3891 "Homing of float argument registers with circular dependencies not implemented.");
3892
3893 /* Now move the arguments to their locations.
3894 * First consider ones that go on the stack since they may
3895 * free some registers. */
3896
3897 regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
3898 for (argNum = 0; argNum < argMax; argNum++)
3899 {
3900 emitAttr size;
3901
3902#if defined(UNIX_AMD64_ABI)
3903 // If this is the wrong register file, just continue.
3904 if (regArgTab[argNum].type == TYP_UNDEF)
3905 {
3906 // This could happen if the reg in regArgTab[argNum] is of the other register file -
3907 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
3908 // The next register file processing will process it.
3909 continue;
3910 }
3911#endif // defined(UNIX_AMD64_ABI)
3912
3913 // If the arg is dead on entry to the method, skip it
3914
3915 if (regArgTab[argNum].processed)
3916 {
3917 continue;
3918 }
3919
3920 if (regArgTab[argNum].slot == 0) // Not a register argument
3921 {
3922 continue;
3923 }
3924
3925 varNum = regArgTab[argNum].varNum;
3926 noway_assert(varNum < compiler->lvaCount);
3927 varDsc = compiler->lvaTable + varNum;
3928
3929#ifndef _TARGET_64BIT_
3930 // If not a stack arg go to the next one
3931 if (varDsc->lvType == TYP_LONG)
3932 {
3933 if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
3934 {
3935 continue;
3936 }
3937 else if (varDsc->lvOtherReg != REG_STK)
3938 {
3939 continue;
3940 }
3941 }
3942 else
3943#endif // !_TARGET_64BIT_
3944 {
3945 // If not a stack arg go to the next one
3946 if (!regArgTab[argNum].stackArg)
3947 {
3948 continue;
3949 }
3950 }
3951
3952#if defined(_TARGET_ARM_)
3953 if (varDsc->lvType == TYP_DOUBLE)
3954 {
3955 if (regArgTab[argNum].slot == 2)
3956 {
3957 // We handled the entire double when processing the first half (slot == 1)
3958 continue;
3959 }
3960 }
3961#endif
3962
3963 noway_assert(regArgTab[argNum].circular == false);
3964
3965 noway_assert(varDsc->lvIsParam);
3966 noway_assert(varDsc->lvIsRegArg);
3967 noway_assert(varDsc->lvIsInReg() == false ||
3968 (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
3969
3970 var_types storeType = TYP_UNDEF;
3971 unsigned slotSize = TARGET_POINTER_SIZE;
3972
3973 if (varTypeIsStruct(varDsc))
3974 {
3975 storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
3976#if FEATURE_MULTIREG_ARGS
3977 // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
3978 noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
3979#endif // FEATURE_MULTIREG_ARGS
3980#ifdef UNIX_AMD64_ABI
3981 storeType = regArgTab[argNum].type;
3982#endif // !UNIX_AMD64_ABI
3983 if (varDsc->lvIsHfaRegArg())
3984 {
3985#ifdef _TARGET_ARM_
3986 // On ARM32 the storeType for HFA args is always TYP_FLOAT
3987 storeType = TYP_FLOAT;
3988 slotSize = (unsigned)emitActualTypeSize(storeType);
3989#else // _TARGET_ARM64_
3990 storeType = genActualType(varDsc->GetHfaType());
3991 slotSize = (unsigned)emitActualTypeSize(storeType);
3992#endif // _TARGET_ARM64_
3993 }
3994 }
3995 else // Not a struct type
3996 {
3997 storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
3998 }
3999 size = emitActualTypeSize(storeType);
4000#ifdef _TARGET_X86_
4001 noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
4002#endif //_TARGET_X86_
4003
4004 regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
4005
4006 // Stack argument - if the ref count is 0 don't care about it
4007
4008 if (!varDsc->lvOnFrame)
4009 {
4010 noway_assert(varDsc->lvRefCnt() == 0);
4011 }
4012 else
4013 {
4014 // Since slot is typically 1, baseOffset is typically 0
4015 int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
4016
4017 getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
4018
4019#ifndef UNIX_AMD64_ABI
4020 // Check if we are writing past the end of the struct
4021 if (varTypeIsStruct(varDsc))
4022 {
4023 assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
4024 }
4025#endif // !UNIX_AMD64_ABI
4026
4027 if (regArgTab[argNum].slot == 1)
4028 {
4029 psiMoveToStack(varNum);
4030 }
4031 }
4032
4033 /* mark the argument as processed */
4034
4035 regArgTab[argNum].processed = true;
4036 regArgMaskLive &= ~genRegMask(srcRegNum);
4037
4038#if defined(_TARGET_ARM_)
4039 if (storeType == TYP_DOUBLE)
4040 {
4041 regArgTab[argNum + 1].processed = true;
4042 regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
4043 }
4044#endif
4045 }
4046
4047 /* Process any circular dependencies */
4048 if (regArgMaskLive)
4049 {
4050 unsigned begReg, destReg, srcReg;
4051 unsigned varNumDest, varNumSrc;
4052 LclVarDsc* varDscDest;
4053 LclVarDsc* varDscSrc;
4054 instruction insCopy = INS_mov;
4055
4056 if (doingFloat)
4057 {
4058#if defined(FEATURE_HFA) || defined(UNIX_AMD64_ABI)
4059 insCopy = ins_Copy(TYP_DOUBLE);
4060 // Compute xtraReg here when we have a float argument
4061 assert(xtraReg == REG_NA);
4062
4063 regMaskTP fpAvailMask;
4064
4065 fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
4066#if defined(FEATURE_HFA)
4067 fpAvailMask &= RBM_ALLDOUBLE;
4068#else
4069#if !defined(UNIX_AMD64_ABI)
4070#error Error. Wrong architecture.
4071#endif // !defined(UNIX_AMD64_ABI)
4072#endif // defined(FEATURE_HFA)
4073
4074 if (fpAvailMask == RBM_NONE)
4075 {
4076 fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
4077#if defined(FEATURE_HFA)
4078 fpAvailMask &= RBM_ALLDOUBLE;
4079#else
4080#if !defined(UNIX_AMD64_ABI)
4081#error Error. Wrong architecture.
4082#endif // !defined(UNIX_AMD64_ABI)
4083#endif // defined(FEATURE_HFA)
4084 }
4085
4086 assert(fpAvailMask != RBM_NONE);
4087
4088 // We pick the lowest avail register number
4089 regMaskTP tempMask = genFindLowestBit(fpAvailMask);
4090 xtraReg = genRegNumFromMask(tempMask);
4091#elif defined(_TARGET_X86_)
4092 // This case shouldn't occur on x86 since NYI gets converted to an assert
4093 NYI("Homing circular FP registers via xtraReg");
4094#endif
4095 }
4096
4097 for (argNum = 0; argNum < argMax; argNum++)
4098 {
4099 // If not a circular dependency then continue
4100 if (!regArgTab[argNum].circular)
4101 {
4102 continue;
4103 }
4104
4105 // If already processed the dependency then continue
4106
4107 if (regArgTab[argNum].processed)
4108 {
4109 continue;
4110 }
4111
4112 if (regArgTab[argNum].slot == 0) // Not a register argument
4113 {
4114 continue;
4115 }
4116
4117 destReg = begReg = argNum;
4118 srcReg = regArgTab[argNum].trashBy;
4119
4120 varNumDest = regArgTab[destReg].varNum;
4121 noway_assert(varNumDest < compiler->lvaCount);
4122 varDscDest = compiler->lvaTable + varNumDest;
4123 noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
4124
4125 noway_assert(srcReg < argMax);
4126 varNumSrc = regArgTab[srcReg].varNum;
4127 noway_assert(varNumSrc < compiler->lvaCount);
4128 varDscSrc = compiler->lvaTable + varNumSrc;
4129 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4130
4131 emitAttr size = EA_PTRSIZE;
4132
4133#ifdef _TARGET_XARCH_
4134 //
4135 // The following code relies upon the target architecture having an
4136 // 'xchg' instruction which directly swaps the values held in two registers.
4137 // On the ARM architecture we do not have such an instruction.
4138 //
4139 if (destReg == regArgTab[srcReg].trashBy)
4140 {
4141 /* only 2 registers form the circular dependency - use "xchg" */
4142
4143 varNum = regArgTab[argNum].varNum;
4144 noway_assert(varNum < compiler->lvaCount);
4145 varDsc = compiler->lvaTable + varNum;
4146 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4147
4148 noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
4149
4150 /* Set "size" to indicate GC if one and only one of
4151 * the operands is a pointer
4152 * RATIONALE: If both are pointers, nothing changes in
4153 * the GC pointer tracking. If only one is a pointer we
4154 * have to "swap" the registers in the GC reg pointer mask
4155 */
4156
4157 if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
4158 {
4159 size = EA_GCREF;
4160 }
4161
4162 noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
4163
4164 getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
4165 regSet.verifyRegUsed(varDscSrc->lvRegNum);
4166 regSet.verifyRegUsed(varDscSrc->lvArgReg);
4167
4168 /* mark both arguments as processed */
4169 regArgTab[destReg].processed = true;
4170 regArgTab[srcReg].processed = true;
4171
4172 regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
4173 regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
4174
4175 psiMoveToReg(varNumSrc);
4176 psiMoveToReg(varNumDest);
4177 }
4178 else
4179#endif // _TARGET_XARCH_
4180 {
4181 var_types destMemType = varDscDest->TypeGet();
4182
4183#ifdef _TARGET_ARM_
4184 bool cycleAllDouble = true; // assume the best
4185
4186 unsigned iter = begReg;
4187 do
4188 {
4189 if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
4190 {
4191 cycleAllDouble = false;
4192 break;
4193 }
4194 iter = regArgTab[iter].trashBy;
4195 } while (iter != begReg);
4196
4197 // We may treat doubles as floats for ARM because we could have partial circular
4198 // dependencies of a float with a lo/hi part of the double. We mark the
4199 // trashBy values for each slot of the double, so let the circular dependency
4200 // logic work its way out for floats rather than doubles. If a cycle has all
4201 // doubles, then optimize so that instead of two vmov.f32's to move a double,
4202 // we can use one vmov.f64.
4203 //
4204 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4205 {
4206 destMemType = TYP_FLOAT;
4207 }
4208#endif // _TARGET_ARM_
4209
4210 if (destMemType == TYP_REF)
4211 {
4212 size = EA_GCREF;
4213 }
4214 else if (destMemType == TYP_BYREF)
4215 {
4216 size = EA_BYREF;
4217 }
4218 else if (destMemType == TYP_DOUBLE)
4219 {
4220 size = EA_8BYTE;
4221 }
4222 else if (destMemType == TYP_FLOAT)
4223 {
4224 size = EA_4BYTE;
4225 }
4226
4227 /* move the dest reg (begReg) in the extra reg */
4228
4229 assert(xtraReg != REG_NA);
4230
4231 regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
4232
4233 getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
4234
4235 regSet.verifyRegUsed(xtraReg);
4236
4237 *pXtraRegClobbered = true;
4238
4239 psiMoveToReg(varNumDest, xtraReg);
4240
4241 /* start moving everything to its right place */
4242
4243 while (srcReg != begReg)
4244 {
4245 /* mov dest, src */
4246
4247 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4248 regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
4249
4250 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
4251
4252 regSet.verifyRegUsed(destRegNum);
4253
4254 /* mark 'src' as processed */
4255 noway_assert(srcReg < argMax);
4256 regArgTab[srcReg].processed = true;
4257#ifdef _TARGET_ARM_
4258 if (size == EA_8BYTE)
4259 regArgTab[srcReg + 1].processed = true;
4260#endif
4261 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4262
4263 /* move to the next pair */
4264 destReg = srcReg;
4265 srcReg = regArgTab[srcReg].trashBy;
4266
4267 varDscDest = varDscSrc;
4268 destMemType = varDscDest->TypeGet();
4269#ifdef _TARGET_ARM_
4270 if (!cycleAllDouble && destMemType == TYP_DOUBLE)
4271 {
4272 destMemType = TYP_FLOAT;
4273 }
4274#endif
4275 varNumSrc = regArgTab[srcReg].varNum;
4276 noway_assert(varNumSrc < compiler->lvaCount);
4277 varDscSrc = compiler->lvaTable + varNumSrc;
4278 noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
4279
4280 if (destMemType == TYP_REF)
4281 {
4282 size = EA_GCREF;
4283 }
4284 else if (destMemType == TYP_DOUBLE)
4285 {
4286 size = EA_8BYTE;
4287 }
4288 else
4289 {
4290 size = EA_4BYTE;
4291 }
4292 }
4293
4294 /* take care of the beginning register */
4295
4296 noway_assert(srcReg == begReg);
4297
4298 /* move the dest reg (begReg) in the extra reg */
4299
4300 regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
4301
4302 getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
4303
4304 regSet.verifyRegUsed(destRegNum);
4305
4306 psiMoveToReg(varNumSrc);
4307
4308 /* mark the beginning register as processed */
4309
4310 regArgTab[srcReg].processed = true;
4311#ifdef _TARGET_ARM_
4312 if (size == EA_8BYTE)
4313 regArgTab[srcReg + 1].processed = true;
4314#endif
4315 regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
4316 }
4317 }
4318 }
4319
4320 /* Finally take care of the remaining arguments that must be enregistered */
4321 while (regArgMaskLive)
4322 {
4323 regMaskTP regArgMaskLiveSave = regArgMaskLive;
4324
4325 for (argNum = 0; argNum < argMax; argNum++)
4326 {
4327 /* If already processed go to the next one */
4328 if (regArgTab[argNum].processed)
4329 {
4330 continue;
4331 }
4332
4333 if (regArgTab[argNum].slot == 0)
4334 { // Not a register argument
4335 continue;
4336 }
4337
4338 varNum = regArgTab[argNum].varNum;
4339 noway_assert(varNum < compiler->lvaCount);
4340 varDsc = compiler->lvaTable + varNum;
4341 var_types regType = regArgTab[argNum].getRegType(compiler);
4342 regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
4343
4344#if defined(UNIX_AMD64_ABI)
4345 if (regType == TYP_UNDEF)
4346 {
4347 // This could happen if the reg in regArgTab[argNum] is of the other register file -
4348 // for System V register passed structs where the first reg is GPR and the second an XMM reg.
4349 // The next register file processing will process it.
4350 regArgMaskLive &= ~genRegMask(regNum);
4351 continue;
4352 }
4353#endif // defined(UNIX_AMD64_ABI)
4354
4355 noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
4356#ifndef _TARGET_64BIT_
4357#ifndef _TARGET_ARM_
4358 // Right now we think that incoming arguments are not pointer sized. When we eventually
4359 // understand the calling convention, this still won't be true. But maybe we'll have a better
4360 // idea of how to ignore it.
4361
4362 // On Arm, a long can be passed in register
4363 noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == TARGET_POINTER_SIZE);
4364#endif
4365#endif //_TARGET_64BIT_
4366
4367 noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
4368
4369 /* Register argument - hopefully it stays in the same register */
4370 regNumber destRegNum = REG_NA;
4371 var_types destMemType = varDsc->TypeGet();
4372
4373 if (regArgTab[argNum].slot == 1)
4374 {
4375 destRegNum = varDsc->lvRegNum;
4376
4377#ifdef _TARGET_ARM_
4378 if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
4379 {
4380 // The second half of the double has already been processed! Treat this as a single.
4381 destMemType = TYP_FLOAT;
4382 }
4383#endif // _TARGET_ARM_
4384 }
4385#ifndef _TARGET_64BIT_
4386 else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
4387 {
4388 assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
4389 if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
4390 {
4391 destRegNum = regNum;
4392 }
4393 else
4394 {
4395 destRegNum = varDsc->lvOtherReg;
4396 }
4397
4398 assert(destRegNum != REG_STK);
4399 }
4400 else
4401 {
4402 assert(regArgTab[argNum].slot == 2);
4403 assert(destMemType == TYP_DOUBLE);
4404
4405 // For doubles, we move the entire double using the argNum representing
4406 // the first half of the double. There are two things we won't do:
4407 // (1) move the double when the 1st half of the destination is free but the
4408 // 2nd half is occupied, and (2) move the double when the 2nd half of the
4409 // destination is free but the 1st half is occupied. Here we consider the
4410 // case where the first half can't be moved initially because its target is
4411 // still busy, but the second half can be moved. We wait until the entire
4412 // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
4413 // and F2 single moving to F16. When we process F0, its target F2 is busy,
4414 // so we skip it on the first pass. When we process F1, its target F3 is
4415 // available. However, we want to move F0/F1 all at once, so we skip it here.
4416 // We process F2, which frees up F2. The next pass through, we process F0 and
4417 // F2/F3 are empty, so we move it. Note that if half of a double is involved
4418 // in a circularity with a single, then we will have already moved that half
4419 // above, so we go ahead and move the remaining half as a single.
4420 // Because there are no circularities left, we are guaranteed to terminate.
4421
4422 assert(argNum > 0);
4423 assert(regArgTab[argNum - 1].slot == 1);
4424
4425 if (!regArgTab[argNum - 1].processed)
4426 {
4427 // The first half of the double hasn't been processed; try to be processed at the same time
4428 continue;
4429 }
4430
4431 // The first half of the double has been processed but the second half hasn't!
4432 // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
4433 // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
4434 // dependency logic above will move them as singles, leaving just F3 to move. Treat
4435 // it as a single to finish the shuffling.
4436
4437 destMemType = TYP_FLOAT;
4438 destRegNum = REG_NEXT(varDsc->lvRegNum);
4439 }
4440#endif // !_TARGET_64BIT_
4441#if (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4442 else
4443 {
4444 assert(regArgTab[argNum].slot == 2);
4445 assert(argNum > 0);
4446 assert(regArgTab[argNum - 1].slot == 1);
4447 assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
4448 destRegNum = varDsc->lvRegNum;
4449 noway_assert(regNum != destRegNum);
4450 continue;
4451 }
4452#endif // (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
4453 noway_assert(destRegNum != REG_NA);
4454 if (destRegNum != regNum)
4455 {
4456 /* Cannot trash a currently live register argument.
4457 * Skip this one until its target will be free
4458 * which is guaranteed to happen since we have no circular dependencies. */
4459
4460 regMaskTP destMask = genRegMask(destRegNum);
4461#ifdef _TARGET_ARM_
4462 // Don't process the double until both halves of the destination are clear.
4463 if (genActualType(destMemType) == TYP_DOUBLE)
4464 {
4465 assert((destMask & RBM_DBL_REGS) != 0);
4466 destMask |= genRegMask(REG_NEXT(destRegNum));
4467 }
4468#endif
4469
4470 if (destMask & regArgMaskLive)
4471 {
4472 continue;
4473 }
4474
4475 /* Move it to the new register */
4476
4477 emitAttr size = emitActualTypeSize(destMemType);
4478
4479#if defined(_TARGET_ARM64_)
4480 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4481 {
4482 // For a SIMD type that is passed in two integer registers,
4483 // Limit the copy below to the first 8 bytes from the first integer register.
4484 // Handle the remaining 8 bytes from the second slot in the code further below
4485 assert(EA_SIZE(size) >= 8);
4486 size = EA_8BYTE;
4487 }
4488#endif
4489
4490 getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
4491
4492 psiMoveToReg(varNum);
4493 }
4494
4495 /* mark the argument as processed */
4496
4497 assert(!regArgTab[argNum].processed);
4498 regArgTab[argNum].processed = true;
4499 regArgMaskLive &= ~genRegMask(regNum);
4500#if FEATURE_MULTIREG_ARGS
4501 int argRegCount = 1;
4502#ifdef _TARGET_ARM_
4503 if (genActualType(destMemType) == TYP_DOUBLE)
4504 {
4505 argRegCount = 2;
4506 }
4507#endif
4508#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4509 if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4510 {
4511 argRegCount = 2;
4512 int nextArgNum = argNum + 1;
4513 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4514 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4515 // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
4516 // and moves the 0th element of the src reg into the 1st element of the dest reg.
4517 getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
4518 // Set destRegNum to regNum so that we skip the setting of the register below,
4519 // but mark argNum as processed and clear regNum from the live mask.
4520 destRegNum = regNum;
4521 }
4522#endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
4523#if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4524 if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
4525 {
4526 // For a SIMD type that is passed in two integer registers,
4527 // Code above copies the first integer argument register into the lower 8 bytes
4528 // of the target register. Here we must handle the second 8 bytes of the slot pair by
4529 // inserting the second integer register into the upper 8 bytes of the target
4530 // SIMD floating point register.
4531 argRegCount = 2;
4532 int nextArgNum = argNum + 1;
4533 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4534 noway_assert(regArgTab[nextArgNum].varNum == varNum);
4535 noway_assert(genIsValidIntReg(nextRegNum));
4536 noway_assert(genIsValidFloatReg(destRegNum));
4537 getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1);
4538 }
4539#endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
4540
4541 // Mark the rest of the argument registers corresponding to this multi-reg type as
4542 // being processed and no longer live.
4543 for (int regSlot = 1; regSlot < argRegCount; regSlot++)
4544 {
4545 int nextArgNum = argNum + regSlot;
4546 assert(!regArgTab[nextArgNum].processed);
4547 regArgTab[nextArgNum].processed = true;
4548 regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
4549 regArgMaskLive &= ~genRegMask(nextRegNum);
4550 }
4551#endif // FEATURE_MULTIREG_ARGS
4552 }
4553
4554 noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
4555 }
4556}
4557#ifdef _PREFAST_
4558#pragma warning(pop)
4559#endif
4560
4561/*****************************************************************************
4562 * If any incoming stack arguments live in registers, load them.
4563 */
4564void CodeGen::genEnregisterIncomingStackArgs()
4565{
4566#ifdef DEBUG
4567 if (verbose)
4568 {
4569 printf("*************** In genEnregisterIncomingStackArgs()\n");
4570 }
4571#endif
4572
4573 assert(compiler->compGeneratingProlog);
4574
4575 unsigned varNum = 0;
4576
4577 for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4578 {
4579 /* Is this variable a parameter? */
4580
4581 if (!varDsc->lvIsParam)
4582 {
4583 continue;
4584 }
4585
4586 /* If it's a register argument then it's already been taken care of.
4587 But, on Arm when under a profiler, we would have prespilled a register argument
4588 and hence here we need to load it from its prespilled location.
4589 */
4590 bool isPrespilledForProfiling = false;
4591#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
4592 isPrespilledForProfiling =
4593 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
4594#endif
4595
4596 if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
4597 {
4598 continue;
4599 }
4600
4601 /* Has the parameter been assigned to a register? */
4602
4603 if (!varDsc->lvIsInReg())
4604 {
4605 continue;
4606 }
4607
4608 var_types type = genActualType(varDsc->TypeGet());
4609
4610 /* Is the variable dead on entry */
4611
4612 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4613 {
4614 continue;
4615 }
4616
4617 /* Load the incoming parameter into the register */
4618
4619 /* Figure out the home offset of the incoming argument */
4620
4621 regNumber regNum = varDsc->lvArgInitReg;
4622 assert(regNum != REG_STK);
4623
4624 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
4625 regSet.verifyRegUsed(regNum);
4626
4627 psiMoveToReg(varNum);
4628 }
4629}
4630
4631/*-------------------------------------------------------------------------
4632 *
4633 * We have to decide whether we're going to use block initialization
4634 * in the prolog before we assign final stack offsets. This is because
4635 * when using block initialization we may need additional callee-saved
4636 * registers which need to be saved on the frame, thus increasing the
4637 * frame size.
4638 *
4639 * We'll count the number of locals we have to initialize,
4640 * and if there are lots of them we'll use block initialization.
4641 * Thus, the local variable table must have accurate register location
4642 * information for enregistered locals for their register state on entry
4643 * to the function.
4644 *
4645 * At the same time we set lvMustInit for locals (enregistered or on stack)
4646 * that must be initialized (e.g. initialize memory (comInitMem),
4647 * untracked pointers or disable DFA)
4648 */
4649void CodeGen::genCheckUseBlockInit()
4650{
4651 assert(!compiler->compGeneratingProlog);
4652
4653 unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
4654 // larger than int count for more than 1).
4655 unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
4656 // determine whether to use block init.
4657
4658 unsigned varNum;
4659 LclVarDsc* varDsc;
4660
4661 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
4662 {
4663 if (varDsc->lvIsParam)
4664 {
4665 continue;
4666 }
4667
4668 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
4669 {
4670 noway_assert(varDsc->lvRefCnt() == 0);
4671 continue;
4672 }
4673
4674 if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
4675 {
4676 continue;
4677 }
4678
4679#if FEATURE_FIXED_OUT_ARGS
4680 if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
4681 {
4682 continue;
4683 }
4684 if (varNum == compiler->lvaOutgoingArgSpaceVar)
4685 {
4686 continue;
4687 }
4688#endif
4689
4690#if FEATURE_EH_FUNCLETS
4691 // There's no need to force 0-initialization of the PSPSym, it will be
4692 // initialized with a real value in the prolog
4693 if (varNum == compiler->lvaPSPSym)
4694 {
4695 continue;
4696 }
4697#endif
4698
4699 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
4700 {
4701 // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
4702 // initialized by the parent struct. No need to set the lvMustInit bit in the
4703 // field locals.
4704 continue;
4705 }
4706
4707 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
4708 varDsc->lvMustInit)
4709 {
4710 if (varDsc->lvTracked)
4711 {
4712 /* For uninitialized use of tracked variables, the liveness
4713 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
4714 */
4715 if (varDsc->lvMustInit ||
4716 VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
4717 {
4718 /* This var must be initialized */
4719
4720 varDsc->lvMustInit = 1;
4721
4722 /* See if the variable is on the stack will be initialized
4723 * using rep stos - compute the total size to be zero-ed */
4724
4725 if (varDsc->lvOnFrame)
4726 {
4727 if (!varDsc->lvRegister)
4728 {
4729 if (!varDsc->lvIsInReg())
4730 {
4731 // Var is on the stack at entry.
4732 initStkLclCnt +=
4733 roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4734 }
4735 }
4736 else
4737 {
4738 // Var is partially enregistered
4739 noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
4740 initStkLclCnt += genTypeStSz(TYP_INT);
4741 }
4742 }
4743 }
4744 }
4745
4746 /* With compInitMem, all untracked vars will have to be init'ed */
4747 /* VSW 102460 - Do not force initialization of compiler generated temps,
4748 unless they are untracked GC type or structs that contain GC pointers */
4749 CLANG_FORMAT_COMMENT_ANCHOR;
4750
4751#if FEATURE_SIMD
4752 // TODO-1stClassStructs
4753 // This is here to duplicate previous behavior, where TYP_SIMD8 locals
4754 // were not being re-typed correctly.
4755 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
4756#else // !FEATURE_SIMD
4757 if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
4758#endif // !FEATURE_SIMD
4759 varDsc->lvOnFrame &&
4760 (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
4761 {
4762 varDsc->lvMustInit = true;
4763
4764 initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
4765 }
4766
4767 continue;
4768 }
4769
4770 /* Ignore if not a pointer variable or value class with a GC field */
4771
4772 if (!compiler->lvaTypeIsGC(varNum))
4773 {
4774 continue;
4775 }
4776
4777 /* If we don't know lifetimes of variables, must be conservative */
4778 if (!compiler->backendRequiresLocalVarLifetimes())
4779 {
4780 varDsc->lvMustInit = true;
4781 noway_assert(!varDsc->lvRegister);
4782 }
4783 else
4784 {
4785 if (!varDsc->lvTracked)
4786 {
4787 varDsc->lvMustInit = true;
4788 }
4789 }
4790
4791 /* Is this a 'must-init' stack pointer local? */
4792
4793 if (varDsc->lvMustInit && varDsc->lvOnFrame)
4794 {
4795 initStkLclCnt += varDsc->lvStructGcCount;
4796 }
4797
4798 if ((compiler->lvaLclSize(varNum) > (3 * TARGET_POINTER_SIZE)) && (largeGcStructs <= 4))
4799 {
4800 largeGcStructs++;
4801 }
4802 }
4803
4804 /* Don't forget about spill temps that hold pointers */
4805
4806 if (!TRACK_GC_TEMP_LIFETIMES)
4807 {
4808 assert(regSet.tmpAllFree());
4809 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
4810 {
4811 if (varTypeIsGC(tempThis->tdTempType()))
4812 {
4813 initStkLclCnt++;
4814 }
4815 }
4816 }
4817
4818 // After debugging this further it was found that this logic is incorrect:
4819 // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
4820 // and this also double counts variables (we saw this in the debugger) around line 4829.
4821 // Even though this doesn't pose a problem with correctness it will improperly decide to
4822 // zero init the stack using a block operation instead of a 'case by case' basis.
4823 genInitStkLclCnt = initStkLclCnt;
4824
4825 /* If we have more than 4 untracked locals, use block initialization */
4826 /* TODO-Review: If we have large structs, bias toward not using block initialization since
4827 we waste all the other slots. Really need to compute the correct
4828 and compare that against zeroing the slots individually */
4829
4830 genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
4831
4832 if (genUseBlockInit)
4833 {
4834 regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
4835
4836 // If there is a secret stub param, don't count it, as it will no longer
4837 // be live when we do block init.
4838 if (compiler->info.compPublishStubParam)
4839 {
4840 maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
4841 }
4842
4843#ifdef _TARGET_XARCH_
4844 // If we're going to use "REP STOS", remember that we will trash EDI
4845 // For fastcall we will have to save ECX, EAX
4846 // so reserve two extra callee saved
4847 // This is better than pushing eax, ecx, because we in the later
4848 // we will mess up already computed offsets on the stack (for ESP frames)
4849 regSet.rsSetRegsModified(RBM_EDI);
4850
4851#ifdef UNIX_AMD64_ABI
4852 // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
4853 // In such case use R12 and R13 registers.
4854 if (maskCalleeRegArgMask & RBM_RCX)
4855 {
4856 regSet.rsSetRegsModified(RBM_R12);
4857 }
4858
4859 if (maskCalleeRegArgMask & RBM_RDI)
4860 {
4861 regSet.rsSetRegsModified(RBM_R13);
4862 }
4863#else // !UNIX_AMD64_ABI
4864 if (maskCalleeRegArgMask & RBM_ECX)
4865 {
4866 regSet.rsSetRegsModified(RBM_ESI);
4867 }
4868#endif // !UNIX_AMD64_ABI
4869
4870 if (maskCalleeRegArgMask & RBM_EAX)
4871 {
4872 regSet.rsSetRegsModified(RBM_EBX);
4873 }
4874
4875#endif // _TARGET_XARCH_
4876#ifdef _TARGET_ARM_
4877 //
4878 // On the Arm if we are using a block init to initialize, then we
4879 // must force spill R4/R5/R6 so that we can use them during
4880 // zero-initialization process.
4881 //
4882 int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
4883 if (forceSpillRegCount > 0)
4884 regSet.rsSetRegsModified(RBM_R4);
4885 if (forceSpillRegCount > 1)
4886 regSet.rsSetRegsModified(RBM_R5);
4887 if (forceSpillRegCount > 2)
4888 regSet.rsSetRegsModified(RBM_R6);
4889#endif // _TARGET_ARM_
4890 }
4891}
4892
4893/*-----------------------------------------------------------------------------
4894 *
4895 * Push any callee-saved registers we have used
4896 */
4897
4898#if defined(_TARGET_ARM64_)
4899void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
4900#else
4901void CodeGen::genPushCalleeSavedRegisters()
4902#endif
4903{
4904 assert(compiler->compGeneratingProlog);
4905
4906#if defined(_TARGET_XARCH_)
4907 // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
4908 // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
4909 // here.
4910 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
4911#else // !defined(_TARGET_XARCH_)
4912 regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
4913#endif
4914
4915#if ETW_EBP_FRAMED
4916 if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
4917 {
4918 noway_assert(!"Used register RBM_FPBASE as a scratch register!");
4919 }
4920#endif
4921
4922#ifdef _TARGET_XARCH_
4923 // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
4924 if (isFramePointerUsed())
4925 {
4926 rsPushRegs &= ~RBM_FPBASE;
4927 }
4928#endif
4929
4930#ifdef _TARGET_ARMARCH_
4931 // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
4932 if (isFramePointerUsed())
4933 rsPushRegs |= RBM_FPBASE;
4934
4935 //
4936 // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
4937 // changes in GC suspension architecture.
4938 //
4939 // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
4940 // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
4941 // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
4942 // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
4943 // be saved on the stack and the GC suspension would time out.
4944 //
4945 // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
4946 // the following to make GC suspension work in the above scenario:
4947 // - Make return address hijacking work even when lr is not saved on the stack.
4948 // - Generate fully interruptible code for loops that contains calls
4949 // - Generate fully interruptible code for leaf methods
4950 //
4951 // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
4952 // is not worth it.
4953 //
4954 rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
4955
4956 regSet.rsMaskCalleeSaved = rsPushRegs;
4957#endif // _TARGET_ARMARCH_
4958
4959#ifdef DEBUG
4960 if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
4961 {
4962 printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
4963 compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
4964 dspRegMask(rsPushRegs);
4965 printf("\n");
4966 assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
4967 }
4968#endif // DEBUG
4969
4970#if defined(_TARGET_ARM_)
4971 regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
4972 regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
4973
4974 maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
4975
4976 assert(FitsIn<int>(maskPushRegsInt));
4977 inst_IV(INS_push, (int)maskPushRegsInt);
4978 compiler->unwindPushMaskInt(maskPushRegsInt);
4979
4980 if (maskPushRegsFloat != 0)
4981 {
4982 genPushFltRegs(maskPushRegsFloat);
4983 compiler->unwindPushMaskFloat(maskPushRegsFloat);
4984 }
4985#elif defined(_TARGET_ARM64_)
4986 // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
4987 // options. Case numbers in comments here refer to this document.
4988 //
4989 // For most frames, generate, e.g.:
4990 // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
4991 // // ensures stack stays aligned.
4992 // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
4993 // // at top of frame (highest addresses).
4994 // stp r21, r22, [sp, 0x70]
4995 //
4996 // Notes:
4997 // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
4998 // at the top of the frame.
4999 // 2. If we save FP, then the first store is FP, LR.
5000 // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
5001 // preserve their lower 8 bytes, by calling convention.
5002 // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
5003 // consecutive.
5004 // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
5005
5006 int totalFrameSize = genTotalFrameSize();
5007
5008 int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
5009
5010 regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
5011 regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
5012
5013 int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
5014 // generate based on various sizes.
5015 int calleeSaveSPDelta = 0;
5016 int calleeSaveSPDeltaUnaligned = 0;
5017
5018 if (isFramePointerUsed())
5019 {
5020 // We need to save both FP and LR.
5021
5022 assert((maskSaveRegsInt & RBM_FP) != 0);
5023 assert((maskSaveRegsInt & RBM_LR) != 0);
5024
5025 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
5026 {
5027 // Case #1.
5028 //
5029 // Generate:
5030 // stp fp,lr,[sp,#-framesz]!
5031 //
5032 // The (totalFrameSize < 512) condition ensures that both the predecrement
5033 // and the postincrement of SP can occur with STP.
5034 //
5035 // After saving callee-saved registers, we establish the frame pointer with:
5036 // mov fp,sp
5037 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5038
5039 frameType = 1;
5040
5041 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
5042 INS_OPTS_PRE_INDEX);
5043 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5044
5045 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5046 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5047 }
5048 else if (totalFrameSize <= 512)
5049 {
5050 // Case #2.
5051 //
5052 // Generate:
5053 // sub sp,sp,#framesz
5054 // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
5055 //
5056 // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
5057 // signed offset encoding.
5058 //
5059 // After saving callee-saved registers, we establish the frame pointer with:
5060 // add fp,sp,#outsz
5061 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
5062
5063 frameType = 2;
5064
5065 assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
5066
5067 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5068 compiler->unwindAllocStack(totalFrameSize);
5069
5070 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5071 compiler->lvaOutgoingArgSpaceSize);
5072 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5073
5074 maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
5075 offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
5076 }
5077 else
5078 {
5079 // Case 5 or 6.
5080 //
5081 // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
5082 // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
5083 // stored. For example:
5084 // stp r19,r20,[sp,#-96]!
5085 // stp d8,d9,[sp,#16]
5086 // ... save varargs incoming integer registers ...
5087 // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
5088 // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
5089 // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
5090 // register):
5091 // sub sp,sp,#16
5092 // str r19,[sp,#8]
5093 // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
5094 // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
5095 // above them. If that is preferable, we could implement it.
5096 // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
5097 //
5098 // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
5099 // padding from above).
5100 // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
5101 //
5102 // Generate:
5103 // sub sp,sp,#remainingFrameSz
5104 // or, for large frames:
5105 // mov rX, #remainingFrameSz // maybe multiple instructions
5106 // sub sp,sp,rX
5107 //
5108 // followed by:
5109 // stp fp,lr,[sp,#outsz]
5110 // add fp,sp,#outsz
5111 //
5112 // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
5113 // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
5114 // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
5115 // following sequences:
5116 //
5117 // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
5118 //
5119 // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
5120 // stp fp,lr,[sp]
5121 // mov fp,sp
5122 // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
5123 //
5124 // Or:
5125 //
5126 // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
5127 // // always guaranteed to be 8 byte aligned).
5128 // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
5129 // add fp,sp,#8
5130 // sub sp,sp,#outsz - #8
5131 //
5132 // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
5133 // mov rX, #outsz - #8 // maybe multiple instructions
5134 // sub sp,sp,rX
5135 // )
5136
5137 frameType = 3;
5138
5139 calleeSaveSPDeltaUnaligned =
5140 totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
5141 assert(calleeSaveSPDeltaUnaligned >= 0);
5142 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5143 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5144
5145 offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5146 assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
5147 // store the callee-saved registers.
5148
5149 // We'll take care of these later, but callee-saved regs code shouldn't see them.
5150 maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
5151 }
5152 }
5153 else
5154 {
5155 // No frame pointer (no chaining).
5156 assert((maskSaveRegsInt & RBM_FP) == 0);
5157 assert((maskSaveRegsInt & RBM_LR) != 0);
5158
5159 // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
5160 // if we only have one callee-saved register plus LR to save.
5161
5162 NYI("Frame without frame pointer");
5163 offset = 0;
5164 }
5165
5166 assert(frameType != 0);
5167
5168 genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
5169
5170 offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
5171
5172 // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
5173 // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
5174 // need to add codes at all.
5175
5176 if (compiler->info.compIsVarArgs)
5177 {
5178 // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
5179 assert((offset % 16) == 0);
5180 for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
5181 {
5182 regNumber reg2 = REG_NEXT(reg1);
5183 // stp REG, REG + 1, [SP, #offset]
5184 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
5185 compiler->unwindNop();
5186 offset += 2 * REGSIZE_BYTES;
5187 }
5188 }
5189
5190 if (frameType == 1)
5191 {
5192 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
5193 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5194 }
5195 else if (frameType == 2)
5196 {
5197 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5198 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5199 }
5200 else if (frameType == 3)
5201 {
5202 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5203 assert(remainingFrameSz > 0);
5204 assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
5205 // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
5206
5207 if (compiler->lvaOutgoingArgSpaceSize >= 504)
5208 {
5209 // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
5210 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5211 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5212 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5213 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5214 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5215 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
5216
5217 genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
5218 offset += spAdjustment2;
5219
5220 // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
5221 // some of it)
5222
5223 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5224 assert(spAdjustment3 > 0);
5225 assert((spAdjustment3 % 16) == 0);
5226
5227 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
5228 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5229
5230 genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
5231 offset += spAdjustment3;
5232 }
5233 else
5234 {
5235 genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
5236 pInitRegZeroed);
5237 offset += remainingFrameSz;
5238
5239 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
5240 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5241 }
5242 }
5243
5244 assert(offset == totalFrameSize);
5245
5246#elif defined(_TARGET_XARCH_)
5247 // Push backwards so we match the order we will pop them in the epilog
5248 // and all the other code that expects it to be in this order.
5249 for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
5250 {
5251 regMaskTP regBit = genRegMask(reg);
5252
5253 if ((regBit & rsPushRegs) != 0)
5254 {
5255 inst_RV(INS_push, reg, TYP_REF);
5256 compiler->unwindPush(reg);
5257
5258 if (!doubleAlignOrFramePointerUsed())
5259 {
5260 psiAdjustStackLevel(REGSIZE_BYTES);
5261 }
5262
5263 rsPushRegs &= ~regBit;
5264 }
5265 }
5266
5267#else
5268 assert(!"Unknown TARGET");
5269#endif // _TARGET_*
5270}
5271
5272#if defined(_TARGET_ARM_)
5273
5274void CodeGen::genPushFltRegs(regMaskTP regMask)
5275{
5276 assert(regMask != 0); // Don't call uness we have some registers to push
5277 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5278
5279 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5280 int slots = genCountBits(regMask);
5281 // regMask should be contiguously set
5282 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5283 assert((tmpMask & (tmpMask - 1)) == 0);
5284 assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
5285
5286 // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
5287 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5288 noway_assert((slots % 2) == 0);
5289
5290 getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
5291}
5292
5293void CodeGen::genPopFltRegs(regMaskTP regMask)
5294{
5295 assert(regMask != 0); // Don't call uness we have some registers to pop
5296 assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
5297
5298 regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
5299 int slots = genCountBits(regMask);
5300 // regMask should be contiguously set
5301 regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
5302 assert((tmpMask & (tmpMask - 1)) == 0);
5303
5304 // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
5305 noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
5306 noway_assert((slots % 2) == 0);
5307
5308 getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
5309}
5310
5311/*-----------------------------------------------------------------------------
5312 *
5313 * If we have a jmp call, then the argument registers cannot be used in the
5314 * epilog. So return the current call's argument registers as the argument
5315 * registers for the jmp call.
5316 */
5317regMaskTP CodeGen::genJmpCallArgMask()
5318{
5319 assert(compiler->compGeneratingEpilog);
5320
5321 regMaskTP argMask = RBM_NONE;
5322 for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
5323 {
5324 const LclVarDsc& desc = compiler->lvaTable[varNum];
5325 if (desc.lvIsRegArg)
5326 {
5327 argMask |= genRegMask(desc.lvArgReg);
5328 }
5329 }
5330 return argMask;
5331}
5332
5333/*-----------------------------------------------------------------------------
5334 *
5335 * Free the local stack frame: add to SP.
5336 * If epilog unwind hasn't been started, and we generate code, we start unwind
5337 * and set *pUnwindStarted = true.
5338 */
5339
5340void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
5341{
5342 assert(compiler->compGeneratingEpilog);
5343
5344 if (frameSize == 0)
5345 return;
5346
5347 // Add 'frameSize' to SP.
5348 //
5349 // Unfortunately, we can't just use:
5350 //
5351 // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
5352 //
5353 // because we need to generate proper unwind codes for each instruction generated,
5354 // and large frame sizes might generate a temp register load which might
5355 // need an unwind code. We don't want to generate a "NOP" code for this
5356 // temp register load; we want the unwind codes to start after that.
5357
5358 if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
5359 {
5360 if (!*pUnwindStarted)
5361 {
5362 compiler->unwindBegEpilog();
5363 *pUnwindStarted = true;
5364 }
5365
5366 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
5367 }
5368 else
5369 {
5370 regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
5371 if (jmpEpilog)
5372 {
5373 // Do not use argument registers as scratch registers in the jmp epilog.
5374 grabMask &= ~genJmpCallArgMask();
5375 }
5376 regNumber tmpReg = REG_TMP_0;
5377 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
5378 if (*pUnwindStarted)
5379 {
5380 compiler->unwindPadding();
5381 }
5382
5383 // We're going to generate an unwindable instruction, so check again if
5384 // we need to start the unwind codes.
5385
5386 if (!*pUnwindStarted)
5387 {
5388 compiler->unwindBegEpilog();
5389 *pUnwindStarted = true;
5390 }
5391
5392 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
5393 }
5394
5395 compiler->unwindAllocStack(frameSize);
5396}
5397
5398/*-----------------------------------------------------------------------------
5399 *
5400 * Move of relocatable displacement value to register
5401 */
5402void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg)
5403{
5404 getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg);
5405 getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg);
5406
5407 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5408 {
5409 getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC);
5410 }
5411}
5412
5413/*-----------------------------------------------------------------------------
5414 *
5415 * Move of relocatable data-label to register
5416 */
5417void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg)
5418{
5419 getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg);
5420 getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg);
5421
5422 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5423 {
5424 getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC);
5425 }
5426}
5427
5428/*-----------------------------------------------------------------------------
5429 *
5430 * Move of relocatable immediate to register
5431 */
5432void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber reg)
5433{
5434 _ASSERTE(EA_IS_RELOC(size));
5435
5436 getEmitter()->emitIns_MovRelocatableImmediate(INS_movw, size, reg, addr);
5437 getEmitter()->emitIns_MovRelocatableImmediate(INS_movt, size, reg, addr);
5438
5439 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS))
5440 {
5441 getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC);
5442 }
5443}
5444
5445/*-----------------------------------------------------------------------------
5446 *
5447 * Returns register mask to push/pop to allocate a small stack frame,
5448 * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
5449 * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
5450 */
5451regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
5452{
5453 assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
5454
5455 // We can't do this optimization with callee saved floating point registers because
5456 // the stack would be allocated in a wrong spot.
5457 if (maskCalleeSavedFloat != RBM_NONE)
5458 return RBM_NONE;
5459
5460 // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
5461 // that extra sub sp,XXX/add sp,XXX.
5462 // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
5463 // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
5464 // significant negative side-effects (more memory bus traffic).
5465 switch (frameSize)
5466 {
5467 case REGSIZE_BYTES:
5468 return RBM_R3;
5469 case 2 * REGSIZE_BYTES:
5470 return RBM_R2 | RBM_R3;
5471 default:
5472 return RBM_NONE;
5473 }
5474}
5475
5476#endif // _TARGET_ARM_
5477
5478/*****************************************************************************
5479 *
5480 * initFltRegs -- The mask of float regs to be zeroed.
5481 * initDblRegs -- The mask of double regs to be zeroed.
5482 * initReg -- A zero initialized integer reg to copy from.
5483 *
5484 * Does best effort to move between VFP/xmm regs if one is already
5485 * initialized to 0. (Arm Only) Else copies from the integer register which
5486 * is slower.
5487 */
5488void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
5489{
5490 assert(compiler->compGeneratingProlog);
5491
5492 // The first float/double reg that is initialized to 0. So they can be used to
5493 // initialize the remaining registers.
5494 regNumber fltInitReg = REG_NA;
5495 regNumber dblInitReg = REG_NA;
5496
5497 // Iterate through float/double registers and initialize them to 0 or
5498 // copy from already initialized register of the same type.
5499 regMaskTP regMask = genRegMask(REG_FP_FIRST);
5500 for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
5501 {
5502 if (regMask & initFltRegs)
5503 {
5504 // Do we have a float register already set to 0?
5505 if (fltInitReg != REG_NA)
5506 {
5507 // Copy from float.
5508 inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
5509 }
5510 else
5511 {
5512#ifdef _TARGET_ARM_
5513 // Do we have a double register initialized to 0?
5514 if (dblInitReg != REG_NA)
5515 {
5516 // Copy from double.
5517 inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
5518 }
5519 else
5520 {
5521 // Copy from int.
5522 inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
5523 }
5524#elif defined(_TARGET_XARCH_)
5525 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5526 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5527 dblInitReg = reg;
5528#elif defined(_TARGET_ARM64_)
5529 // We will just zero out the entire vector register. This sets it to a double/float zero value
5530 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5531#else // _TARGET_*
5532#error Unsupported or unset target architecture
5533#endif
5534 fltInitReg = reg;
5535 }
5536 }
5537 else if (regMask & initDblRegs)
5538 {
5539 // Do we have a double register already set to 0?
5540 if (dblInitReg != REG_NA)
5541 {
5542 // Copy from double.
5543 inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
5544 }
5545 else
5546 {
5547#ifdef _TARGET_ARM_
5548 // Do we have a float register initialized to 0?
5549 if (fltInitReg != REG_NA)
5550 {
5551 // Copy from float.
5552 inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
5553 }
5554 else
5555 {
5556 // Copy from int.
5557 inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
5558 }
5559#elif defined(_TARGET_XARCH_)
5560 // XORPS is the fastest and smallest way to initialize a XMM register to zero.
5561 inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
5562 fltInitReg = reg;
5563#elif defined(_TARGET_ARM64_)
5564 // We will just zero out the entire vector register. This sets it to a double/float zero value
5565 getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
5566#else // _TARGET_*
5567#error Unsupported or unset target architecture
5568#endif
5569 dblInitReg = reg;
5570 }
5571 }
5572 }
5573}
5574
5575/*-----------------------------------------------------------------------------
5576 *
5577 * Restore any callee-saved registers we have used
5578 */
5579
5580#if defined(_TARGET_ARM_)
5581
5582bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
5583{
5584 assert(compiler->compGeneratingEpilog);
5585
5586 if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
5587 return true;
5588 else
5589 return false;
5590}
5591
5592void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5593{
5594 assert(compiler->compGeneratingEpilog);
5595
5596 regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5597 regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
5598 regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
5599
5600 // First, pop float registers
5601
5602 if (maskPopRegsFloat != RBM_NONE)
5603 {
5604 genPopFltRegs(maskPopRegsFloat);
5605 compiler->unwindPopMaskFloat(maskPopRegsFloat);
5606 }
5607
5608 // Next, pop integer registers
5609
5610 if (!jmpEpilog)
5611 {
5612 regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
5613 maskPopRegsInt |= maskStackAlloc;
5614 }
5615
5616 if (isFramePointerUsed())
5617 {
5618 assert(!regSet.rsRegsModified(RBM_FPBASE));
5619 maskPopRegsInt |= RBM_FPBASE;
5620 }
5621
5622 if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
5623 {
5624 maskPopRegsInt |= RBM_PC;
5625 // Record the fact that we use a pop to the PC to perform the return
5626 genUsedPopToReturn = true;
5627 }
5628 else
5629 {
5630 maskPopRegsInt |= RBM_LR;
5631 // Record the fact that we did not use a pop to the PC to perform the return
5632 genUsedPopToReturn = false;
5633 }
5634
5635 assert(FitsIn<int>(maskPopRegsInt));
5636 inst_IV(INS_pop, (int)maskPopRegsInt);
5637 compiler->unwindPopMaskInt(maskPopRegsInt);
5638}
5639
5640#elif defined(_TARGET_ARM64_)
5641
5642void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
5643{
5644 assert(compiler->compGeneratingEpilog);
5645
5646 regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
5647
5648 if (isFramePointerUsed())
5649 {
5650 rsRestoreRegs |= RBM_FPBASE;
5651 }
5652
5653 rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
5654
5655 regMaskTP regsToRestoreMask = rsRestoreRegs;
5656
5657 int totalFrameSize = genTotalFrameSize();
5658
5659 int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
5660 // order.
5661 int frameType = 0; // An indicator of what type of frame we are popping.
5662 int calleeSaveSPDelta = 0;
5663 int calleeSaveSPDeltaUnaligned = 0;
5664
5665 if (isFramePointerUsed())
5666 {
5667 if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
5668 {
5669 frameType = 1;
5670 if (compiler->compLocallocUsed)
5671 {
5672 // Restore sp from fp
5673 // mov sp, fp
5674 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
5675 compiler->unwindSetFrameReg(REG_FPBASE, 0);
5676 }
5677
5678 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5679
5680 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5681 // of stack.
5682 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5683 }
5684 else if (totalFrameSize <= 512)
5685 {
5686 frameType = 2;
5687 if (compiler->compLocallocUsed)
5688 {
5689 // Restore sp from fp
5690 // sub sp, fp, #outsz
5691 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5692 compiler->lvaOutgoingArgSpaceSize);
5693 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5694 }
5695
5696 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
5697
5698 // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
5699 // of stack.
5700 calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
5701 }
5702 else
5703 {
5704 frameType = 3;
5705
5706 calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
5707 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
5708 assert(calleeSaveSPDeltaUnaligned >= 0);
5709 assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
5710 calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
5711
5712 regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
5713
5714 int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
5715 assert(remainingFrameSz > 0);
5716
5717 if (compiler->lvaOutgoingArgSpaceSize >= 504)
5718 {
5719 // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
5720 // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
5721 assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
5722 int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
5723 int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN);
5724 int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
5725 assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
5726
5727 if (compiler->compLocallocUsed)
5728 {
5729 // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
5730 // prolog.
5731 // sub sp, fp, #alignmentAdjustment2
5732 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
5733 compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
5734 }
5735 else
5736 {
5737 // Generate:
5738 // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
5739 // ; careful
5740 int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
5741 assert(spAdjustment3 > 0);
5742 assert((spAdjustment3 % 16) == 0);
5743 genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
5744 }
5745
5746 // Generate:
5747 // ldp fp,lr,[sp]
5748 // add sp,sp,#remainingFrameSz
5749 genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP1, nullptr);
5750 }
5751 else
5752 {
5753 if (compiler->compLocallocUsed)
5754 {
5755 // Restore sp from fp
5756 // sub sp, fp, #outsz
5757 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
5758 compiler->lvaOutgoingArgSpaceSize);
5759 compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
5760 }
5761
5762 // Generate:
5763 // ldp fp,lr,[sp,#outsz]
5764 // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
5765 // ; it's large
5766
5767 genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP1,
5768 nullptr);
5769 }
5770
5771 // Unlike frameType=1 or frameType=2 that restore SP at the end,
5772 // frameType=3 already adjusted SP above to delete local frame.
5773 // There is at most one alignment slot between SP and where we store the callee-saved registers.
5774 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
5775 assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
5776 }
5777 }
5778 else
5779 {
5780 // No frame pointer (no chaining).
5781 NYI("Frame without frame pointer");
5782 calleeSaveSPOffset = 0;
5783 }
5784
5785 genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
5786
5787 if (frameType == 1)
5788 {
5789 // Generate:
5790 // ldp fp,lr,[sp],#framesz
5791
5792 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
5793 INS_OPTS_POST_INDEX);
5794 compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
5795 }
5796 else if (frameType == 2)
5797 {
5798 // Generate:
5799 // ldr fp,lr,[sp,#outsz]
5800 // add sp,sp,#framesz
5801
5802 getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
5803 compiler->lvaOutgoingArgSpaceSize);
5804 compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
5805
5806 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
5807 compiler->unwindAllocStack(totalFrameSize);
5808 }
5809 else if (frameType == 3)
5810 {
5811 // Nothing to do after restoring callee-saved registers.
5812 }
5813 else
5814 {
5815 unreached();
5816 }
5817}
5818
5819#elif defined(_TARGET_XARCH_)
5820
5821void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5822{
5823 assert(compiler->compGeneratingEpilog);
5824
5825 unsigned popCount = 0;
5826 if (regSet.rsRegsModified(RBM_EBX))
5827 {
5828 popCount++;
5829 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5830 }
5831 if (regSet.rsRegsModified(RBM_FPBASE))
5832 {
5833 // EBP cannot be directly modified for EBP frame and double-aligned frames
5834 assert(!doubleAlignOrFramePointerUsed());
5835
5836 popCount++;
5837 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5838 }
5839
5840#ifndef UNIX_AMD64_ABI
5841 // For System V AMD64 calling convention ESI and EDI are volatile registers.
5842 if (regSet.rsRegsModified(RBM_ESI))
5843 {
5844 popCount++;
5845 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5846 }
5847 if (regSet.rsRegsModified(RBM_EDI))
5848 {
5849 popCount++;
5850 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5851 }
5852#endif // !defined(UNIX_AMD64_ABI)
5853
5854#ifdef _TARGET_AMD64_
5855 if (regSet.rsRegsModified(RBM_R12))
5856 {
5857 popCount++;
5858 inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
5859 }
5860 if (regSet.rsRegsModified(RBM_R13))
5861 {
5862 popCount++;
5863 inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
5864 }
5865 if (regSet.rsRegsModified(RBM_R14))
5866 {
5867 popCount++;
5868 inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
5869 }
5870 if (regSet.rsRegsModified(RBM_R15))
5871 {
5872 popCount++;
5873 inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
5874 }
5875#endif // _TARGET_AMD64_
5876
5877 // Amd64/x86 doesn't support push/pop of xmm registers.
5878 // These will get saved to stack separately after allocating
5879 // space on stack in prolog sequence. PopCount is essentially
5880 // tracking the count of integer registers pushed.
5881
5882 noway_assert(compiler->compCalleeRegsPushed == popCount);
5883}
5884
5885#elif defined(_TARGET_X86_)
5886
5887void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
5888{
5889 assert(compiler->compGeneratingEpilog);
5890
5891 unsigned popCount = 0;
5892
5893 /* NOTE: The EBP-less frame code below depends on the fact that
5894 all of the pops are generated right at the start and
5895 each takes one byte of machine code.
5896 */
5897
5898 if (regSet.rsRegsModified(RBM_FPBASE))
5899 {
5900 // EBP cannot be directly modified for EBP frame and double-aligned frames
5901 noway_assert(!doubleAlignOrFramePointerUsed());
5902
5903 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
5904 popCount++;
5905 }
5906 if (regSet.rsRegsModified(RBM_EBX))
5907 {
5908 popCount++;
5909 inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
5910 }
5911 if (regSet.rsRegsModified(RBM_ESI))
5912 {
5913 popCount++;
5914 inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
5915 }
5916 if (regSet.rsRegsModified(RBM_EDI))
5917 {
5918 popCount++;
5919 inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
5920 }
5921 noway_assert(compiler->compCalleeRegsPushed == popCount);
5922}
5923
5924#endif // _TARGET_*
5925
5926// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
5927// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
5928regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
5929{
5930#ifdef _TARGET_ARM64_
5931 return REG_ZR;
5932#else // !_TARGET_ARM64_
5933 if (*pInitRegZeroed == false)
5934 {
5935 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
5936 *pInitRegZeroed = true;
5937 }
5938 return initReg;
5939#endif // !_TARGET_ARM64_
5940}
5941
5942/*-----------------------------------------------------------------------------
5943 *
5944 * Do we have any untracked pointer locals at all,
5945 * or do we need to initialize memory for locspace?
5946 *
5947 * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
5948 * initializing memory (not inclusive).
5949 * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
5950 * initializing memory.
5951 * initReg - A scratch register (that gets set to zero on some platforms).
5952 * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
5953 */
5954void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
5955{
5956 assert(compiler->compGeneratingProlog);
5957
5958 if (genUseBlockInit)
5959 {
5960 assert(untrLclHi > untrLclLo);
5961#ifdef _TARGET_ARMARCH_
5962 /*
5963 Generate the following code:
5964
5965 For cnt less than 10
5966
5967 mov rZero1, 0
5968 mov rZero2, 0
5969 mov rCnt, <cnt>
5970 stm <rZero1,rZero2>,[rAddr!]
5971 <optional> stm <rZero1,rZero2>,[rAddr!]
5972 <optional> stm <rZero1,rZero2>,[rAddr!]
5973 <optional> stm <rZero1,rZero2>,[rAddr!]
5974 <optional> str rZero1,[rAddr]
5975
5976 For rCnt greater than or equal to 10
5977
5978 mov rZero1, 0
5979 mov rZero2, 0
5980 mov rCnt, <cnt/2>
5981 sub rAddr, sp, OFFS
5982
5983 loop:
5984 stm <rZero1,rZero2>,[rAddr!]
5985 sub rCnt,rCnt,1
5986 jnz loop
5987
5988 <optional> str rZero1,[rAddr] // When cnt is odd
5989
5990 NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
5991 */
5992
5993 regNumber rAddr;
5994 regNumber rCnt = REG_NA; // Invalid
5995 regMaskTP regMask;
5996
5997 regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
5998 availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
5999 // currently live
6000 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
6001 // a large constant.
6002
6003#if defined(_TARGET_ARM_)
6004
6005 if (compiler->compLocallocUsed)
6006 {
6007 availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
6008 }
6009
6010 regNumber rZero1; // We're going to use initReg for rZero1
6011 regNumber rZero2;
6012
6013 // We pick the next lowest register number for rZero2
6014 noway_assert(availMask != RBM_NONE);
6015 regMask = genFindLowestBit(availMask);
6016 rZero2 = genRegNumFromMask(regMask);
6017 availMask &= ~regMask;
6018 assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6019 0); // rZero2 is not a live incoming argument reg
6020
6021 // We pick the next lowest register number for rAddr
6022 noway_assert(availMask != RBM_NONE);
6023 regMask = genFindLowestBit(availMask);
6024 rAddr = genRegNumFromMask(regMask);
6025 availMask &= ~regMask;
6026
6027#else // !define(_TARGET_ARM_)
6028
6029 regNumber rZero1 = REG_ZR;
6030 rAddr = initReg;
6031 *pInitRegZeroed = false;
6032
6033#endif // !defined(_TARGET_ARM_)
6034
6035 bool useLoop = false;
6036 unsigned uCntBytes = untrLclHi - untrLclLo;
6037 assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
6038 unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
6039
6040 // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
6041 // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
6042 // In both of these cases the stm/stp instruction will write two zeros to memory
6043 // and we will use a single str instruction at the end whenever we have an odd count.
6044 if (uCntSlots >= 10)
6045 useLoop = true;
6046
6047 if (useLoop)
6048 {
6049 // We pick the next lowest register number for rCnt
6050 noway_assert(availMask != RBM_NONE);
6051 regMask = genFindLowestBit(availMask);
6052 rCnt = genRegNumFromMask(regMask);
6053 availMask &= ~regMask;
6054 }
6055
6056 assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6057 0); // rAddr is not a live incoming argument reg
6058#if defined(_TARGET_ARM_)
6059 if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
6060#else // !_TARGET_ARM_
6061 if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
6062#endif // !_TARGET_ARM_
6063 {
6064 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
6065 }
6066 else
6067 {
6068 // Load immediate into the InitReg register
6069 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
6070 getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
6071 *pInitRegZeroed = false;
6072 }
6073
6074 if (useLoop)
6075 {
6076 noway_assert(uCntSlots >= 2);
6077 assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6078 0); // rCnt is not a live incoming argument reg
6079 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
6080 }
6081
6082#if defined(_TARGET_ARM_)
6083 rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
6084 instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
6085 target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
6086#endif // _TARGET_ARM_
6087
6088 if (!useLoop)
6089 {
6090 while (uCntBytes >= REGSIZE_BYTES * 2)
6091 {
6092#ifdef _TARGET_ARM_
6093 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
6094#else // !_TARGET_ARM_
6095 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6096 INS_OPTS_POST_INDEX);
6097#endif // !_TARGET_ARM_
6098 uCntBytes -= REGSIZE_BYTES * 2;
6099 }
6100 }
6101 else // useLoop is true
6102 {
6103#ifdef _TARGET_ARM_
6104 getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
6105 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
6106#else // !_TARGET_ARM_
6107 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
6108 INS_OPTS_POST_INDEX); // zero stack slots
6109 getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
6110#endif // !_TARGET_ARM_
6111 getEmitter()->emitIns_J(INS_bhi, NULL, -3);
6112 uCntBytes %= REGSIZE_BYTES * 2;
6113 }
6114
6115 if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
6116 {
6117#ifdef _TARGET_ARM_
6118 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
6119#else // _TARGET_ARM_
6120 if ((uCntBytes - REGSIZE_BYTES) == 0)
6121 {
6122 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
6123 }
6124 else
6125 {
6126 getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
6127 }
6128#endif // !_TARGET_ARM_
6129 uCntBytes -= REGSIZE_BYTES;
6130 }
6131#ifdef _TARGET_ARM64_
6132 if (uCntBytes > 0)
6133 {
6134 assert(uCntBytes == sizeof(int));
6135 getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
6136 uCntBytes -= sizeof(int);
6137 }
6138#endif // _TARGET_ARM64_
6139 noway_assert(uCntBytes == 0);
6140
6141#elif defined(_TARGET_XARCH_)
6142 /*
6143 Generate the following code:
6144
6145 lea edi, [ebp/esp-OFFS]
6146 mov ecx, <size>
6147 xor eax, eax
6148 rep stosd
6149 */
6150
6151 noway_assert(regSet.rsRegsModified(RBM_EDI));
6152
6153#ifdef UNIX_AMD64_ABI
6154 // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
6155 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6156 {
6157 noway_assert(regSet.rsRegsModified(RBM_R12));
6158 inst_RV_RV(INS_mov, REG_R12, REG_RCX);
6159 regSet.verifyRegUsed(REG_R12);
6160 }
6161
6162 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6163 {
6164 noway_assert(regSet.rsRegsModified(RBM_R13));
6165 inst_RV_RV(INS_mov, REG_R13, REG_RDI);
6166 regSet.verifyRegUsed(REG_R13);
6167 }
6168#else // !UNIX_AMD64_ABI
6169 // For register arguments we may have to save ECX
6170 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6171 {
6172 noway_assert(regSet.rsRegsModified(RBM_ESI));
6173 inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
6174 regSet.verifyRegUsed(REG_ESI);
6175 }
6176#endif // !UNIX_AMD64_ABI
6177
6178 noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
6179
6180 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
6181 regSet.verifyRegUsed(REG_EDI);
6182
6183 inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
6184 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
6185 instGen(INS_r_stosd);
6186
6187#ifdef UNIX_AMD64_ABI
6188 // Move back the argument registers
6189 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
6190 {
6191 inst_RV_RV(INS_mov, REG_RCX, REG_R12);
6192 }
6193
6194 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
6195 {
6196 inst_RV_RV(INS_mov, REG_RDI, REG_R13);
6197 }
6198#else // !UNIX_AMD64_ABI
6199 // Move back the argument registers
6200 if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
6201 {
6202 inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
6203 }
6204#endif // !UNIX_AMD64_ABI
6205
6206#else // _TARGET_*
6207#error Unsupported or unset target architecture
6208#endif // _TARGET_*
6209 }
6210 else if (genInitStkLclCnt > 0)
6211 {
6212 assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
6213 0); // initReg is not a live incoming argument reg
6214
6215 /* Initialize any lvMustInit vars on the stack */
6216
6217 LclVarDsc* varDsc;
6218 unsigned varNum;
6219
6220 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
6221 {
6222 if (!varDsc->lvMustInit)
6223 {
6224 continue;
6225 }
6226
6227 // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
6228 // partially-enregistered vars in the case where we don't use a block init.
6229 noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
6230
6231 // lvMustInit can only be set for GC types or TYP_STRUCT types
6232 // or when compInitMem is true
6233 // or when in debug code
6234
6235 noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
6236 compiler->info.compInitMem || compiler->opts.compDbgCode);
6237
6238 if (!varDsc->lvOnFrame)
6239 {
6240 continue;
6241 }
6242
6243 if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
6244 (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
6245 {
6246 // We only initialize the GC variables in the TYP_STRUCT
6247 const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
6248 const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
6249
6250 for (unsigned i = 0; i < slots; i++)
6251 {
6252 if (gcPtrs[i] != TYPE_GC_NONE)
6253 {
6254 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
6255 genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
6256 }
6257 }
6258 }
6259 else
6260 {
6261 regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
6262
6263 // zero out the whole thing rounded up to a single stack slot size
6264 unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int));
6265 unsigned i;
6266 for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
6267 {
6268 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
6269 }
6270
6271#ifdef _TARGET_64BIT_
6272 assert(i == lclSize || (i + sizeof(int) == lclSize));
6273 if (i != lclSize)
6274 {
6275 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
6276 i += sizeof(int);
6277 }
6278#endif // _TARGET_64BIT_
6279 assert(i == lclSize);
6280 }
6281 }
6282
6283 if (!TRACK_GC_TEMP_LIFETIMES)
6284 {
6285 assert(regSet.tmpAllFree());
6286 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
6287 {
6288 if (!varTypeIsGC(tempThis->tdTempType()))
6289 {
6290 continue;
6291 }
6292
6293 // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
6294
6295 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
6296 }
6297 }
6298 }
6299}
6300
6301/*-----------------------------------------------------------------------------
6302 *
6303 * Save the generic context argument.
6304 *
6305 * We need to do this within the "prolog" in case anyone tries to inspect
6306 * the param-type-arg/this (which can be done after the prolog) using
6307 * ICodeManager::GetParamTypeArg().
6308 */
6309
6310void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
6311{
6312 assert(compiler->compGeneratingProlog);
6313
6314 bool reportArg = compiler->lvaReportParamTypeArg();
6315
6316 // We should report either generic context arg or "this" when used so.
6317 if (!reportArg)
6318 {
6319#ifndef JIT32_GCENCODER
6320 if (!compiler->lvaKeepAliveAndReportThis())
6321#endif
6322 {
6323 return;
6324 }
6325 }
6326
6327 // For JIT32_GCENCODER, we won't be here if reportArg is false.
6328 unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
6329
6330 noway_assert(contextArg != BAD_VAR_NUM);
6331 LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
6332
6333 // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
6334 // moved to its final home location. So we need to use it from the
6335 // incoming location.
6336
6337 regNumber reg;
6338
6339 bool isPrespilledForProfiling = false;
6340#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
6341 isPrespilledForProfiling =
6342 compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
6343#endif
6344
6345 // Load from the argument register only if it is not prespilled.
6346 if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
6347 {
6348 reg = varDsc->lvArgReg;
6349 }
6350 else
6351 {
6352 if (isFramePointerUsed())
6353 {
6354#if defined(_TARGET_ARM_)
6355 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6356 // will become enregistered.
6357 // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
6358 noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
6359 (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
6360#else
6361 // lvStkOffs is always valid for incoming stack-arguments, even if the argument
6362 // will become enregistered.
6363 noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
6364#endif
6365 }
6366
6367 // We will just use the initReg since it is an available register
6368 // and we are probably done using it anyway...
6369 reg = initReg;
6370 *pInitRegZeroed = false;
6371
6372 // mov reg, [compiler->info.compTypeCtxtArg]
6373 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
6374 regSet.verifyRegUsed(reg);
6375 }
6376
6377#if CPU_LOAD_STORE_ARCH
6378 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6379 compiler->lvaCachedGenericContextArgOffset());
6380#else // CPU_LOAD_STORE_ARCH
6381 // mov [ebp-lvaCachedGenericContextArgOffset()], reg
6382 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
6383 compiler->lvaCachedGenericContextArgOffset());
6384#endif // !CPU_LOAD_STORE_ARCH
6385}
6386
6387/*-----------------------------------------------------------------------------
6388 *
6389 * Set the "GS" security cookie in the prolog.
6390 */
6391
6392void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
6393{
6394 assert(compiler->compGeneratingProlog);
6395
6396 if (!compiler->getNeedsGSSecurityCookie())
6397 {
6398 return;
6399 }
6400
6401 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
6402
6403 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
6404 {
6405#ifdef _TARGET_AMD64_
6406 // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
6407 getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
6408 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
6409#else
6410 // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
6411 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
6412 compiler->lvaGSSecurityCookie, 0, initReg);
6413#endif
6414 }
6415 else
6416 {
6417 regNumber reg;
6418#ifdef _TARGET_XARCH_
6419 // Always use EAX on x86 and x64
6420 // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
6421 reg = REG_EAX;
6422#else
6423 // We will just use the initReg since it is an available register
6424 reg = initReg;
6425#endif
6426
6427 *pInitRegZeroed = false;
6428
6429#if CPU_LOAD_STORE_ARCH
6430 instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6431 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
6432 regSet.verifyRegUsed(reg);
6433#else
6434 // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
6435 // mov dword ptr [frame.GSSecurityCookie], reg
6436 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
6437 regSet.verifyRegUsed(reg);
6438#endif
6439 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
6440 }
6441}
6442
6443#ifdef PROFILING_SUPPORTED
6444
6445//-----------------------------------------------------------------------------------
6446// genProfilingEnterCallback: Generate the profiling function enter callback.
6447//
6448// Arguments:
6449// initReg - register to use as scratch register
6450// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
6451// not zero after this call.
6452//
6453// Return Value:
6454// None
6455//
6456// Notes:
6457// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
6458// VM\i386\asmhelpers.asm for details):
6459// 1. The calling sequence for calling the helper is:
6460// push FunctionIDOrClientID
6461// call ProfileEnterHelper
6462// 2. The calling function has an EBP frame.
6463// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6464// the following prolog is assumed:
6465// push ESP
6466// mov EBP, ESP
6467// 4. All registers are preserved.
6468// 5. The helper pops the FunctionIDOrClientID argument from the stack.
6469//
6470void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
6471{
6472 assert(compiler->compGeneratingProlog);
6473
6474 // Give profiler a chance to back out of hooking this method
6475 if (!compiler->compIsProfilerHookNeeded())
6476 {
6477 return;
6478 }
6479
6480#if defined(_TARGET_AMD64_)
6481#if !defined(UNIX_AMD64_ABI)
6482
6483 unsigned varNum;
6484 LclVarDsc* varDsc;
6485
6486 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6487 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6488 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6489
6490 // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
6491 // In case of vararg methods, arg regs are already homed.
6492 //
6493 // Note: Here we don't need to worry about updating gc'info since enter
6494 // callback is generated as part of prolog which is non-gc interruptible.
6495 // Moreover GC cannot kick while executing inside profiler callback which is a
6496 // profiler requirement so it can examine arguments which could be obj refs.
6497 if (!compiler->info.compIsVarArgs)
6498 {
6499 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6500 {
6501 noway_assert(varDsc->lvIsParam);
6502
6503 if (!varDsc->lvIsRegArg)
6504 {
6505 continue;
6506 }
6507
6508 var_types storeType = varDsc->lvaArgType();
6509 regNumber argReg = varDsc->lvArgReg;
6510
6511 instruction store_ins = ins_Store(storeType);
6512
6513#ifdef FEATURE_SIMD
6514 if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
6515 {
6516 store_ins = INS_mov;
6517 }
6518#endif // FEATURE_SIMD
6519
6520 getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
6521 }
6522 }
6523
6524 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6525 // RCX = ProfilerMethHnd
6526 if (compiler->compProfilerMethHndIndirected)
6527 {
6528 // Profiler hooks enabled during Ngen time.
6529 // Profiler handle needs to be accessed through an indirection of a pointer.
6530 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6531 }
6532 else
6533 {
6534 // No need to record relocations, if we are generating ELT hooks under the influence
6535 // of COMPlus_JitELTHookEnabled=1
6536 if (compiler->opts.compJitELTHookEnabled)
6537 {
6538 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6539 }
6540 else
6541 {
6542 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6543 }
6544 }
6545
6546 // RDX = caller's SP
6547 // Notes
6548 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6549 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6550 // of that offset to FramePointer to obtain caller's SP value.
6551 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6552 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6553 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6554
6555 // Can't have a call until we have enough padding for rejit
6556 genPrologPadForReJit();
6557
6558 // This will emit either
6559 // "call ip-relative 32-bit offset" or
6560 // "mov rax, helper addr; call rax"
6561 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
6562
6563 // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
6564 // generation logic that moves args around as required by first BB entry point conditions
6565 // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
6566 // and genEnregisterIncomingStackArgs().
6567 //
6568 // Now reload arg registers from home locations.
6569 // Vararg methods:
6570 // - we need to reload only known (i.e. fixed) reg args.
6571 // - if floating point type, also reload it into corresponding integer reg
6572 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
6573 {
6574 noway_assert(varDsc->lvIsParam);
6575
6576 if (!varDsc->lvIsRegArg)
6577 {
6578 continue;
6579 }
6580
6581 var_types loadType = varDsc->lvaArgType();
6582 regNumber argReg = varDsc->lvArgReg;
6583
6584 instruction load_ins = ins_Load(loadType);
6585
6586#ifdef FEATURE_SIMD
6587 if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
6588 {
6589 load_ins = INS_mov;
6590 }
6591#endif // FEATURE_SIMD
6592
6593 getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
6594
6595#if FEATURE_VARARG
6596 if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
6597 {
6598 regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
6599 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
6600 inst_RV_RV(ins, argReg, intArgReg, loadType);
6601 }
6602#endif // FEATURE_VARARG
6603 }
6604
6605 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6606 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6607 {
6608 *pInitRegZeroed = false;
6609 }
6610
6611#else // !defined(UNIX_AMD64_ABI)
6612
6613 // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
6614 // R14 = ProfilerMethHnd
6615 if (compiler->compProfilerMethHndIndirected)
6616 {
6617 // Profiler hooks enabled during Ngen time.
6618 // Profiler handle needs to be accessed through an indirection of a pointer.
6619 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
6620 (ssize_t)compiler->compProfilerMethHnd);
6621 }
6622 else
6623 {
6624 // No need to record relocations, if we are generating ELT hooks under the influence
6625 // of COMPlus_JitELTHookEnabled=1
6626 if (compiler->opts.compJitELTHookEnabled)
6627 {
6628 genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6629 }
6630 else
6631 {
6632 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6633 }
6634 }
6635
6636 // R15 = caller's SP
6637 // Notes
6638 // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
6639 // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
6640 // of that offset to FramePointer to obtain caller's SP value.
6641 assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6642 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6643 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
6644
6645 // Can't have a call until we have enough padding for rejit
6646 genPrologPadForReJit();
6647
6648 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6649 // We use R11 here. This will emit either
6650 // "call ip-relative 32-bit offset" or
6651 // "mov r11, helper addr; call r11"
6652 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6653
6654 // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
6655 if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
6656 {
6657 *pInitRegZeroed = false;
6658 }
6659
6660#endif // !defined(UNIX_AMD64_ABI)
6661
6662#elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
6663
6664 unsigned saveStackLvl2 = genStackLevel;
6665
6666#if defined(_TARGET_X86_)
6667// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
6668// for x86 stack unwinding
6669
6670#if defined(UNIX_X86_ABI)
6671 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6672 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6673#endif // UNIX_X86_ABI
6674
6675 // Push the profilerHandle
6676 if (compiler->compProfilerMethHndIndirected)
6677 {
6678 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6679 }
6680 else
6681 {
6682 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6683 }
6684
6685#elif defined(_TARGET_ARM_)
6686 // On Arm arguments are prespilled on stack, which frees r0-r3.
6687 // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
6688 // The call target register could be any free register.
6689 regNumber argReg = REG_PROFILER_ENTER_ARG;
6690 regMaskTP argRegMask = genRegMask(argReg);
6691 assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
6692
6693 if (compiler->compProfilerMethHndIndirected)
6694 {
6695 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
6696 regSet.verifyRegUsed(argReg);
6697 }
6698 else
6699 {
6700 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
6701 }
6702#else // _TARGET_*
6703 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6704#endif // _TARGET_*
6705
6706 //
6707 // Can't have a call until we have enough padding for rejit
6708 //
6709 genPrologPadForReJit();
6710
6711 // This will emit either
6712 // "call ip-relative 32-bit offset" or
6713 // "mov rax, helper addr; call rax"
6714 genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
6715 0, // argSize. Again, we have to lie about it
6716 EA_UNKNOWN); // retSize
6717
6718#if defined(_TARGET_X86_)
6719 // Check that we have place for the push.
6720 assert(compiler->fgPtrArgCntMax >= 1);
6721
6722#if defined(UNIX_X86_ABI)
6723 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6724 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6725#endif // UNIX_X86_ABI
6726
6727#elif defined(_TARGET_ARM_)
6728 if (initReg == argReg)
6729 {
6730 *pInitRegZeroed = false;
6731 }
6732#else // _TARGET_*
6733 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
6734#endif // _TARGET_*
6735
6736 /* Restore the stack level */
6737
6738 SetStackLevel(saveStackLvl2);
6739
6740#else // target
6741 NYI("Emit Profiler Enter callback");
6742#endif // target
6743}
6744
6745//-----------------------------------------------------------------------------------
6746// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
6747// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
6748//
6749// Arguments:
6750// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
6751//
6752// Return Value:
6753// None
6754//
6755// Notes:
6756// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
6757// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
6758// 1. The calling sequence for calling the helper is:
6759// push FunctionIDOrClientID
6760// call ProfileLeaveHelper or ProfileTailcallHelper
6761// 2. The calling function has an EBP frame.
6762// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
6763// the following prolog is assumed:
6764// push ESP
6765// mov EBP, ESP
6766// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
6767// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
6768// 5. The helper pops the FunctionIDOrClientID argument from the stack.
6769//
6770void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
6771{
6772 assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
6773
6774 // Only hook if profiler says it's okay.
6775 if (!compiler->compIsProfilerHookNeeded())
6776 {
6777 return;
6778 }
6779
6780 compiler->info.compProfilerCallback = true;
6781
6782 // Need to save on to the stack level, since the helper call will pop the argument
6783 unsigned saveStackLvl2 = genStackLevel;
6784
6785#if defined(_TARGET_AMD64_)
6786#if !defined(UNIX_AMD64_ABI)
6787
6788 // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
6789 noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
6790 noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
6791
6792 // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
6793 // registers that profiler callback kills.
6794 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
6795 {
6796 regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
6797 noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
6798 }
6799
6800 // At this point return value is computed and stored in RAX or XMM0.
6801 // On Amd64, Leave callback preserves the return register. We keep
6802 // RAX alive by not reporting as trashed by helper call. Also note
6803 // that GC cannot kick-in while executing inside profiler callback,
6804 // which is a requirement of profiler as well since it needs to examine
6805 // return value which could be an obj ref.
6806
6807 // RCX = ProfilerMethHnd
6808 if (compiler->compProfilerMethHndIndirected)
6809 {
6810 // Profiler hooks enabled during Ngen time.
6811 // Profiler handle needs to be accessed through an indirection of an address.
6812 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6813 }
6814 else
6815 {
6816 // Don't record relocations, if we are generating ELT hooks under the influence
6817 // of COMPlus_JitELTHookEnabled=1
6818 if (compiler->opts.compJitELTHookEnabled)
6819 {
6820 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6821 }
6822 else
6823 {
6824 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6825 }
6826 }
6827
6828 // RDX = caller's SP
6829 // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
6830 // of the stmnts to execute unconditionally and clean-up rest.
6831 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6832 {
6833 // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
6834 // value of that offset to FramePointer to obtain caller's SP value.
6835 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6836 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6837 }
6838 else
6839 {
6840 // If we are here means that it is a tentative frame layout during which we
6841 // cannot use caller's SP offset since it is an estimate. For now we require the
6842 // method to have at least a single arg so that we can use it to obtain caller's
6843 // SP.
6844 LclVarDsc* varDsc = compiler->lvaTable;
6845 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6846
6847 // lea rdx, [FramePointer + Arg0's offset]
6848 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6849 }
6850
6851 // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
6852 // We use R8 here. This will emit either
6853 // "call ip-relative 32-bit offset" or
6854 // "mov r8, helper addr; call r8"
6855 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
6856
6857#else // !defined(UNIX_AMD64_ABI)
6858
6859 // RDI = ProfilerMethHnd
6860 if (compiler->compProfilerMethHndIndirected)
6861 {
6862 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6863 }
6864 else
6865 {
6866 if (compiler->opts.compJitELTHookEnabled)
6867 {
6868 genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
6869 }
6870 else
6871 {
6872 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6873 }
6874 }
6875
6876 // RSI = caller's SP
6877 if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
6878 {
6879 int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
6880 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
6881 }
6882 else
6883 {
6884 LclVarDsc* varDsc = compiler->lvaTable;
6885 NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
6886
6887 // lea rdx, [FramePointer + Arg0's offset]
6888 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
6889 }
6890
6891 // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
6892 // We use R11 here. This will emit either
6893 // "call ip-relative 32-bit offset" or
6894 // "mov r11, helper addr; call r11"
6895 genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
6896
6897#endif // !defined(UNIX_AMD64_ABI)
6898
6899#elif defined(_TARGET_X86_)
6900
6901#if defined(UNIX_X86_ABI)
6902 // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
6903 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
6904 AddStackLevel(0xC);
6905 AddNestedAlignment(0xC);
6906#endif // UNIX_X86_ABI
6907
6908 //
6909 // Push the profilerHandle
6910 //
6911
6912 if (compiler->compProfilerMethHndIndirected)
6913 {
6914 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
6915 }
6916 else
6917 {
6918 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
6919 }
6920 genSinglePush();
6921
6922#if defined(UNIX_X86_ABI)
6923 int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
6924#else
6925 int argSize = REGSIZE_BYTES;
6926#endif
6927 genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
6928
6929 // Check that we have place for the push.
6930 assert(compiler->fgPtrArgCntMax >= 1);
6931
6932#if defined(UNIX_X86_ABI)
6933 // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
6934 getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
6935 SubtractStackLevel(0x10);
6936 SubtractNestedAlignment(0xC);
6937#endif // UNIX_X86_ABI
6938
6939#elif defined(_TARGET_ARM_)
6940 //
6941 // Push the profilerHandle
6942 //
6943
6944 // Contract between JIT and Profiler Leave callout on arm:
6945 // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
6946 // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
6947 // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
6948 // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
6949 //
6950 // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
6951 // callback.
6952 bool r0Trashed;
6953 emitAttr attr = EA_UNKNOWN;
6954
6955 if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
6956 (varTypeIsFloating(compiler->info.compRetType) ||
6957 compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
6958 {
6959 r0Trashed = false;
6960 }
6961 else
6962 {
6963 // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
6964 // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
6965 if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
6966 {
6967 attr = EA_GCREF;
6968 gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
6969 }
6970 else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
6971 {
6972 attr = EA_BYREF;
6973 gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
6974 }
6975 else
6976 {
6977 attr = EA_4BYTE;
6978 }
6979
6980 getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
6981 regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
6982 gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
6983 r0Trashed = true;
6984 }
6985
6986 if (compiler->compProfilerMethHndIndirected)
6987 {
6988 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6989 regSet.verifyRegUsed(REG_ARG_0);
6990 }
6991 else
6992 {
6993 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
6994 }
6995
6996 genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
6997 0, // argSize
6998 EA_UNKNOWN); // retSize
6999
7000 // Restore state that existed before profiler callback
7001 if (r0Trashed)
7002 {
7003 getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
7004 regSet.verifyRegUsed(REG_ARG_0);
7005 gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
7006 }
7007
7008#else // target
7009 NYI("Emit Profiler Leave callback");
7010#endif // target
7011
7012 /* Restore the stack level */
7013 SetStackLevel(saveStackLvl2);
7014}
7015
7016#endif // PROFILING_SUPPORTED
7017
7018/*****************************************************************************
7019
7020Esp frames :
7021----------
7022
7023These instructions are just a reordering of the instructions used today.
7024
7025push ebp
7026push esi
7027push edi
7028push ebx
7029sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7030...
7031add esp, LOCALS_SIZE / pop dummyReg
7032pop ebx
7033pop edi
7034pop esi
7035pop ebp
7036ret
7037
7038Ebp frames :
7039----------
7040
7041The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
7042Everything else is similar, though in a different order.
7043
7044The security object will no longer be at a fixed offset. However, the
7045offset can still be determined by looking up the GC-info and determining
7046how many callee-saved registers are pushed.
7047
7048push ebp
7049mov ebp, esp
7050push esi
7051push edi
7052push ebx
7053sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7054...
7055add esp, LOCALS_SIZE / pop dummyReg
7056pop ebx
7057pop edi
7058pop esi
7059(mov esp, ebp if there are no callee-saved registers)
7060pop ebp
7061ret
7062
7063Double-aligned frame :
7064--------------------
7065
7066LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
7067of callee-saved registers are pushed on the stack so that the locals
7068themselves are qword-aligned. The instructions are the same as today,
7069just in a different order.
7070
7071push ebp
7072mov ebp, esp
7073and esp, 0xFFFFFFFC
7074push esi
7075push edi
7076push ebx
7077sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
7078...
7079add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
7080pop ebx
7081pop edi
7082pop esi
7083pop ebp
7084mov esp, ebp
7085pop ebp
7086ret
7087
7088localloc (with ebp) frames :
7089--------------------------
7090
7091The instructions are the same as today, just in a different order.
7092Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
7093which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
7094
7095push ebp
7096mov ebp, esp
7097push esi
7098push edi
7099push ebx
7100sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
7101...
7102lea esp, [ebp-calleeSavedRegsPushedSize]
7103pop ebx
7104pop edi
7105pop esi
7106(mov esp, ebp if there are no callee-saved registers)
7107pop ebp
7108ret
7109
7110*****************************************************************************/
7111
7112/*****************************************************************************
7113 *
7114 * Generates appropriate NOP padding for a function prolog to support ReJIT.
7115 */
7116
7117void CodeGen::genPrologPadForReJit()
7118{
7119 assert(compiler->compGeneratingProlog);
7120
7121#ifdef _TARGET_XARCH_
7122 if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
7123 {
7124 return;
7125 }
7126
7127#if FEATURE_EH_FUNCLETS
7128
7129 // No need to generate pad (nops) for funclets.
7130 // When compiling the main function (and not a funclet)
7131 // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
7132 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
7133 {
7134 return;
7135 }
7136
7137#endif // FEATURE_EH_FUNCLETS
7138
7139 unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
7140 if (size < 5)
7141 {
7142 instNop(5 - size);
7143 }
7144#endif
7145}
7146
7147/*****************************************************************************
7148 *
7149 * Reserve space for a function prolog.
7150 */
7151
7152void CodeGen::genReserveProlog(BasicBlock* block)
7153{
7154 assert(block != nullptr);
7155
7156 JITDUMP("Reserving prolog IG for block " FMT_BB "\n", block->bbNum);
7157
7158 /* Nothing is live on entry to the prolog */
7159
7160 getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
7161}
7162
7163/*****************************************************************************
7164 *
7165 * Reserve space for a function epilog.
7166 */
7167
7168void CodeGen::genReserveEpilog(BasicBlock* block)
7169{
7170 regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
7171 regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
7172
7173 /* The return value is special-cased: make sure it goes live for the epilog */
7174
7175 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
7176
7177 if (genFullPtrRegMap && !jmpEpilog)
7178 {
7179 if (varTypeIsGC(compiler->info.compRetNativeType))
7180 {
7181 noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
7182
7183 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
7184
7185 switch (compiler->info.compRetNativeType)
7186 {
7187 case TYP_REF:
7188 gcrefRegsArg |= RBM_INTRET;
7189 break;
7190 case TYP_BYREF:
7191 byrefRegsArg |= RBM_INTRET;
7192 break;
7193 default:
7194 break;
7195 }
7196 }
7197 }
7198
7199 JITDUMP("Reserving epilog IG for block " FMT_BB "\n", block->bbNum);
7200
7201 assert(block != nullptr);
7202 const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars);
7203 bool last = (block->bbNext == nullptr);
7204 getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
7205}
7206
7207#if FEATURE_EH_FUNCLETS
7208
7209/*****************************************************************************
7210 *
7211 * Reserve space for a funclet prolog.
7212 */
7213
7214void CodeGen::genReserveFuncletProlog(BasicBlock* block)
7215{
7216 assert(block != nullptr);
7217
7218 /* Currently, no registers are live on entry to the prolog, except maybe
7219 the exception object. There might be some live stack vars, but they
7220 cannot be accessed until after the frame pointer is re-established.
7221 In order to potentially prevent emitting a death before the prolog
7222 and a birth right after it, we just report it as live during the
7223 prolog, and rely on the prolog being non-interruptible. Trust
7224 genCodeForBBlist to correctly initialize all the sets.
7225
7226 We might need to relax these asserts if the VM ever starts
7227 restoring any registers, then we could have live-in reg vars...
7228 */
7229
7230 noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
7231 noway_assert(gcInfo.gcRegByrefSetCur == 0);
7232
7233 JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n", block->bbNum);
7234
7235 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7236 gcInfo.gcRegByrefSetCur, false);
7237}
7238
7239/*****************************************************************************
7240 *
7241 * Reserve space for a funclet epilog.
7242 */
7243
7244void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
7245{
7246 assert(block != nullptr);
7247
7248 JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n", block->bbNum);
7249
7250 bool last = (block->bbNext == nullptr);
7251 getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
7252 gcInfo.gcRegByrefSetCur, last);
7253}
7254
7255#endif // FEATURE_EH_FUNCLETS
7256
7257/*****************************************************************************
7258 * Finalize the frame size and offset assignments.
7259 *
7260 * No changes can be made to the modified register set after this, since that can affect how many
7261 * callee-saved registers get saved.
7262 */
7263void CodeGen::genFinalizeFrame()
7264{
7265 JITDUMP("Finalizing stack frame\n");
7266
7267 // Initializations need to happen based on the var locations at the start
7268 // of the first basic block, so load those up. In particular, the determination
7269 // of whether or not to use block init in the prolog is dependent on the variable
7270 // locations on entry to the function.
7271 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
7272
7273 genCheckUseBlockInit();
7274
7275 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7276 CLANG_FORMAT_COMMENT_ANCHOR;
7277
7278#if defined(_TARGET_X86_)
7279
7280 if (compiler->compTailCallUsed)
7281 {
7282 // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
7283 // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
7284 // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
7285 // actually get saved.
7286
7287 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
7288 }
7289#endif // _TARGET_X86_
7290
7291#if defined(_TARGET_ARMARCH_)
7292 // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
7293 // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
7294 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7295 {
7296 regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
7297 }
7298#endif // defined(_TARGET_ARMARCH_)
7299
7300#if defined(_TARGET_ARM_)
7301 // If there are any reserved registers, add them to the
7302 if (regSet.rsMaskResvd != RBM_NONE)
7303 {
7304 regSet.rsSetRegsModified(regSet.rsMaskResvd);
7305 }
7306#endif // _TARGET_ARM_
7307
7308#ifdef DEBUG
7309 if (verbose)
7310 {
7311 printf("Modified regs: ");
7312 dspRegMask(regSet.rsGetModifiedRegsMask());
7313 printf("\n");
7314 }
7315#endif // DEBUG
7316
7317 // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
7318 if (compiler->opts.compDbgEnC)
7319 {
7320 // We always save FP.
7321 noway_assert(isFramePointerUsed());
7322#ifdef _TARGET_AMD64_
7323 // On x64 we always save exactly RBP, RSI and RDI for EnC.
7324 regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
7325 regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
7326 noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
7327#else // !_TARGET_AMD64_
7328 // On x86 we save all callee saved regs so the saved reg area size is consistent
7329 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7330#endif // !_TARGET_AMD64_
7331 }
7332
7333 /* If we have any pinvoke calls, we might potentially trash everything */
7334 if (compiler->info.compCallUnmanaged)
7335 {
7336 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
7337 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
7338 }
7339
7340#ifdef UNIX_AMD64_ABI
7341 // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
7342 if (compiler->compIsProfilerHookNeeded())
7343 {
7344 regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
7345 }
7346#endif
7347
7348 /* Count how many callee-saved registers will actually be saved (pushed) */
7349
7350 // EBP cannot be (directly) modified for EBP frame and double-aligned frames
7351 noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
7352
7353#if ETW_EBP_FRAMED
7354 // EBP cannot be (directly) modified
7355 noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
7356#endif
7357
7358 regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
7359
7360#ifdef _TARGET_ARMARCH_
7361 if (isFramePointerUsed())
7362 {
7363 // For a FP based frame we have to push/pop the FP register
7364 //
7365 maskCalleeRegsPushed |= RBM_FPBASE;
7366
7367 // This assert check that we are not using REG_FP
7368 // as both the frame pointer and as a codegen register
7369 //
7370 assert(!regSet.rsRegsModified(RBM_FPBASE));
7371 }
7372
7373 // we always push LR. See genPushCalleeSavedRegisters
7374 //
7375 maskCalleeRegsPushed |= RBM_LR;
7376
7377#if defined(_TARGET_ARM_)
7378 // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
7379 regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
7380 regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
7381
7382 if ((maskPushRegsFloat != RBM_NONE) ||
7383 (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
7384 {
7385 // Here we try to keep stack double-aligned before the vpush
7386 if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
7387 {
7388 regNumber extraPushedReg = REG_R4;
7389 while (maskPushRegsInt & genRegMask(extraPushedReg))
7390 {
7391 extraPushedReg = REG_NEXT(extraPushedReg);
7392 }
7393 if (extraPushedReg < REG_R11)
7394 {
7395 maskPushRegsInt |= genRegMask(extraPushedReg);
7396 regSet.rsSetRegsModified(genRegMask(extraPushedReg));
7397 }
7398 }
7399 maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
7400 }
7401
7402 // We currently only expect to push/pop consecutive FP registers
7403 // and these have to be double-sized registers as well.
7404 // Here we will insure that maskPushRegsFloat obeys these requirements.
7405 //
7406 if (maskPushRegsFloat != RBM_NONE)
7407 {
7408 regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
7409 while (maskPushRegsFloat > contiguousMask)
7410 {
7411 contiguousMask <<= 2;
7412 contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
7413 }
7414 if (maskPushRegsFloat != contiguousMask)
7415 {
7416 regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
7417 maskPushRegsFloat |= maskExtraRegs;
7418 regSet.rsSetRegsModified(maskExtraRegs);
7419 maskCalleeRegsPushed |= maskExtraRegs;
7420 }
7421 }
7422#endif // _TARGET_ARM_
7423#endif // _TARGET_ARMARCH_
7424
7425#if defined(_TARGET_XARCH_)
7426 // Compute the count of callee saved float regs saved on stack.
7427 // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
7428 // regs are stack allocated and preserved in their stack locations.
7429 compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
7430 maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
7431#endif // defined(_TARGET_XARCH_)
7432
7433 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
7434
7435#ifdef DEBUG
7436 if (verbose)
7437 {
7438 printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
7439 dspRegMask(maskCalleeRegsPushed);
7440 printf("\n");
7441 }
7442#endif // DEBUG
7443
7444 /* Assign the final offsets to things living on the stack frame */
7445
7446 compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
7447
7448 /* We want to make sure that the prolog size calculated here is accurate
7449 (that is instructions will not shrink because of conservative stack
7450 frame approximations). We do this by filling in the correct size
7451 here (where we have committed to the final numbers for the frame offsets)
7452 This will ensure that the prolog size is always correct
7453 */
7454 getEmitter()->emitMaxTmpSize = regSet.tmpGetTotalSize();
7455
7456#ifdef DEBUG
7457 if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
7458 {
7459 compiler->lvaTableDump();
7460 }
7461#endif
7462}
7463
7464//------------------------------------------------------------------------
7465// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
7466//
7467// Arguments:
7468// delta - the offset to add to the current stack pointer to establish the frame pointer
7469// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
7470
7471void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
7472{
7473 assert(compiler->compGeneratingProlog);
7474
7475#if defined(_TARGET_XARCH_)
7476
7477 if (delta == 0)
7478 {
7479 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
7480 psiMoveESPtoEBP();
7481 }
7482 else
7483 {
7484 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7485 // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
7486 // anyway.
7487 }
7488
7489 if (reportUnwindData)
7490 {
7491 compiler->unwindSetFrameReg(REG_FPBASE, delta);
7492 }
7493
7494#elif defined(_TARGET_ARM_)
7495
7496 assert(arm_Valid_Imm_For_Add_SP(delta));
7497 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
7498
7499 if (reportUnwindData)
7500 {
7501 compiler->unwindPadding();
7502 }
7503
7504#else
7505 NYI("establish frame pointer");
7506#endif
7507}
7508
7509/*****************************************************************************
7510 *
7511 * Generates code for a function prolog.
7512 *
7513 * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
7514 *
7515 * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
7516 * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
7517 * only instructions which result in control not going to the next instruction. Basically, any time execution would
7518 * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
7519 * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
7520 * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
7521 *
7522 * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
7523 * debugger team to ensure that stepping still works.
7524 *
7525 * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
7526 */
7527
7528#ifdef _PREFAST_
7529#pragma warning(push)
7530#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
7531#endif
7532void CodeGen::genFnProlog()
7533{
7534 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
7535
7536 compiler->funSetCurrentFunc(0);
7537
7538#ifdef DEBUG
7539 if (verbose)
7540 {
7541 printf("*************** In genFnProlog()\n");
7542 }
7543#endif
7544
7545#ifdef DEBUG
7546 genInterruptibleUsed = true;
7547#endif
7548
7549 assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
7550
7551 /* Ready to start on the prolog proper */
7552
7553 getEmitter()->emitBegProlog();
7554 compiler->unwindBegProlog();
7555
7556 // Do this so we can put the prolog instruction group ahead of
7557 // other instruction groups
7558 genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
7559
7560#ifdef DEBUG
7561 if (compiler->opts.dspCode)
7562 {
7563 printf("\n__prolog:\n");
7564 }
7565#endif
7566
7567 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
7568 {
7569 // Create new scopes for the method-parameters for the prolog-block.
7570 psiBegProlog();
7571 }
7572
7573#ifdef DEBUG
7574
7575 if (compiler->compJitHaltMethod())
7576 {
7577 /* put a nop first because the debugger and other tools are likely to
7578 put an int3 at the begining and we don't want to confuse them */
7579
7580 instGen(INS_nop);
7581 instGen(INS_BREAKPOINT);
7582
7583#ifdef _TARGET_ARMARCH_
7584 // Avoid asserts in the unwind info because these instructions aren't accounted for.
7585 compiler->unwindPadding();
7586#endif // _TARGET_ARMARCH_
7587 }
7588#endif // DEBUG
7589
7590#if FEATURE_EH_FUNCLETS && defined(DEBUG)
7591
7592 // We cannot force 0-initialization of the PSPSym
7593 // as it will overwrite the real value
7594 if (compiler->lvaPSPSym != BAD_VAR_NUM)
7595 {
7596 LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
7597 assert(!varDsc->lvMustInit);
7598 }
7599
7600#endif // FEATURE_EH_FUNCLETS && DEBUG
7601
7602 /*-------------------------------------------------------------------------
7603 *
7604 * Record the stack frame ranges that will cover all of the tracked
7605 * and untracked pointer variables.
7606 * Also find which registers will need to be zero-initialized.
7607 *
7608 * 'initRegs': - Generally, enregistered variables should not need to be
7609 * zero-inited. They only need to be zero-inited when they
7610 * have a possibly uninitialized read on some control
7611 * flow path. Apparently some of the IL_STUBs that we
7612 * generate have this property.
7613 */
7614
7615 int untrLclLo = +INT_MAX;
7616 int untrLclHi = -INT_MAX;
7617 // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
7618 // Note that they may be tracked, but simply not allocated to a register.
7619 bool hasUntrLcl = false;
7620
7621 int GCrefLo = +INT_MAX;
7622 int GCrefHi = -INT_MAX;
7623 bool hasGCRef = false;
7624
7625 regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
7626 regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
7627 regMaskTP initDblRegs = RBM_NONE;
7628
7629 unsigned varNum;
7630 LclVarDsc* varDsc;
7631
7632 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
7633 {
7634 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
7635 {
7636 continue;
7637 }
7638
7639 if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
7640 {
7641 noway_assert(varDsc->lvRefCnt() == 0);
7642 continue;
7643 }
7644
7645 signed int loOffs = varDsc->lvStkOffs;
7646 signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
7647
7648 /* We need to know the offset range of tracked stack GC refs */
7649 /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
7650
7651 if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
7652 {
7653 // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
7654 // taken care of by the parent struct.
7655 if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
7656 {
7657 hasGCRef = true;
7658
7659 if (loOffs < GCrefLo)
7660 {
7661 GCrefLo = loOffs;
7662 }
7663 if (hiOffs > GCrefHi)
7664 {
7665 GCrefHi = hiOffs;
7666 }
7667 }
7668 }
7669
7670 /* For lvMustInit vars, gather pertinent info */
7671
7672 if (!varDsc->lvMustInit)
7673 {
7674 continue;
7675 }
7676
7677 if (varDsc->lvIsInReg())
7678 {
7679 regMaskTP regMask = genRegMask(varDsc->lvRegNum);
7680 if (!varDsc->IsFloatRegType())
7681 {
7682 initRegs |= regMask;
7683
7684 if (varTypeIsMultiReg(varDsc))
7685 {
7686 if (varDsc->lvOtherReg != REG_STK)
7687 {
7688 initRegs |= genRegMask(varDsc->lvOtherReg);
7689 }
7690 else
7691 {
7692 /* Upper DWORD is on the stack, and needs to be inited */
7693
7694 loOffs += sizeof(int);
7695 goto INIT_STK;
7696 }
7697 }
7698 }
7699 else if (varDsc->TypeGet() == TYP_DOUBLE)
7700 {
7701 initDblRegs |= regMask;
7702 }
7703 else
7704 {
7705 initFltRegs |= regMask;
7706 }
7707 }
7708 else
7709 {
7710 INIT_STK:
7711
7712 hasUntrLcl = true;
7713
7714 if (loOffs < untrLclLo)
7715 {
7716 untrLclLo = loOffs;
7717 }
7718 if (hiOffs > untrLclHi)
7719 {
7720 untrLclHi = hiOffs;
7721 }
7722 }
7723 }
7724
7725 /* Don't forget about spill temps that hold pointers */
7726
7727 if (!TRACK_GC_TEMP_LIFETIMES)
7728 {
7729 assert(regSet.tmpAllFree());
7730 for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis))
7731 {
7732 if (!varTypeIsGC(tempThis->tdTempType()))
7733 {
7734 continue;
7735 }
7736
7737 signed int loOffs = tempThis->tdTempOffs();
7738 signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
7739
7740 // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
7741 // previous frame pointer. Thus, stkOffs can't be zero.
7742 CLANG_FORMAT_COMMENT_ANCHOR;
7743
7744#if !defined(_TARGET_AMD64_)
7745 // However, on amd64 there is no requirement to chain frame pointers.
7746
7747 noway_assert(!isFramePointerUsed() || loOffs != 0);
7748#endif // !defined(_TARGET_AMD64_)
7749
7750 // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
7751
7752 hasUntrLcl = true;
7753
7754 if (loOffs < untrLclLo)
7755 {
7756 untrLclLo = loOffs;
7757 }
7758 if (hiOffs > untrLclHi)
7759 {
7760 untrLclHi = hiOffs;
7761 }
7762 }
7763 }
7764
7765 assert((genInitStkLclCnt > 0) == hasUntrLcl);
7766
7767#ifdef DEBUG
7768 if (verbose)
7769 {
7770 if (genInitStkLclCnt > 0)
7771 {
7772 printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
7773 -untrLclHi);
7774 }
7775 }
7776#endif
7777
7778#ifdef _TARGET_ARM_
7779 // On the ARM we will spill any incoming struct args in the first instruction in the prolog
7780 // Ditto for all enregistered user arguments in a varargs method.
7781 // These registers will be available to use for the initReg. We just remove
7782 // all of these registers from the rsCalleeRegArgMaskLiveIn.
7783 //
7784 intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
7785#endif
7786
7787 /* Choose the register to use for zero initialization */
7788
7789 regNumber initReg = REG_SCRATCH; // Unless we find a better register below
7790 bool initRegZeroed = false;
7791 regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
7792 regMaskTP tempMask;
7793
7794 // We should not use the special PINVOKE registers as the initReg
7795 // since they are trashed by the jithelper call to setup the PINVOKE frame
7796 if (compiler->info.compCallUnmanaged)
7797 {
7798 excludeMask |= RBM_PINVOKE_FRAME;
7799
7800 assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
7801 if (!compiler->opts.ShouldUsePInvokeHelpers())
7802 {
7803 noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
7804
7805 excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
7806
7807 // We also must exclude the register used by compLvFrameListRoot when it is enregistered
7808 //
7809 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
7810 if (varDsc->lvRegister)
7811 {
7812 excludeMask |= genRegMask(varDsc->lvRegNum);
7813 }
7814 }
7815 }
7816
7817#ifdef _TARGET_ARM_
7818 // If we have a variable sized frame (compLocallocUsed is true)
7819 // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
7820 if (compiler->compLocallocUsed)
7821 {
7822 excludeMask |= RBM_SAVED_LOCALLOC_SP;
7823 }
7824#endif // _TARGET_ARM_
7825
7826#if defined(_TARGET_XARCH_)
7827 if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
7828 {
7829 // We currently must use REG_EAX on x86 here
7830 // because the loop's backwards branch depends upon the size of EAX encodings
7831 assert(initReg == REG_EAX);
7832 }
7833 else
7834#endif // _TARGET_XARCH_
7835 {
7836 tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
7837
7838 if (tempMask != RBM_NONE)
7839 {
7840 // We will use one of the registers that we were planning to zero init anyway.
7841 // We pick the lowest register number.
7842 tempMask = genFindLowestBit(tempMask);
7843 initReg = genRegNumFromMask(tempMask);
7844 }
7845 // Next we prefer to use one of the unused argument registers.
7846 // If they aren't available we use one of the caller-saved integer registers.
7847 else
7848 {
7849 tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
7850 if (tempMask != RBM_NONE)
7851 {
7852 // We pick the lowest register number
7853 tempMask = genFindLowestBit(tempMask);
7854 initReg = genRegNumFromMask(tempMask);
7855 }
7856 }
7857 }
7858
7859 noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
7860
7861#if defined(_TARGET_AMD64_)
7862 // If we are a varargs call, in order to set up the arguments correctly this
7863 // must be done in a 2 step process. As per the x64 ABI:
7864 // a) The caller sets up the argument shadow space (just before the return
7865 // address, 4 pointer sized slots).
7866 // b) The callee is responsible to home the arguments on the shadow space
7867 // provided by the caller.
7868 // This way, the varargs iterator will be able to retrieve the
7869 // call arguments properly since both the arg regs and the stack allocated
7870 // args will be contiguous.
7871 if (compiler->info.compIsVarArgs)
7872 {
7873 getEmitter()->spillIntArgRegsToShadowSlots();
7874 }
7875
7876#endif // _TARGET_AMD64_
7877
7878#ifdef _TARGET_ARM_
7879 /*-------------------------------------------------------------------------
7880 *
7881 * Now start emitting the part of the prolog which sets up the frame
7882 */
7883
7884 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
7885 {
7886 inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
7887 compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
7888 }
7889#endif // _TARGET_ARM_
7890
7891#ifdef _TARGET_XARCH_
7892 if (doubleAlignOrFramePointerUsed())
7893 {
7894 inst_RV(INS_push, REG_FPBASE, TYP_REF);
7895 compiler->unwindPush(REG_FPBASE);
7896 psiAdjustStackLevel(REGSIZE_BYTES);
7897
7898#ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
7899 genEstablishFramePointer(0, /*reportUnwindData*/ true);
7900#endif // !_TARGET_AMD64_
7901
7902#if DOUBLE_ALIGN
7903 if (compiler->genDoubleAlign())
7904 {
7905 noway_assert(isFramePointerUsed() == false);
7906 noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
7907
7908 inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
7909 }
7910#endif // DOUBLE_ALIGN
7911 }
7912#endif // _TARGET_XARCH_
7913
7914#ifdef _TARGET_ARM64_
7915 // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
7916 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7917 genPushCalleeSavedRegisters(initReg, &initRegZeroed);
7918#else // !_TARGET_ARM64_
7919 genPushCalleeSavedRegisters();
7920#endif // !_TARGET_ARM64_
7921
7922#ifdef _TARGET_ARM_
7923 bool needToEstablishFP = false;
7924 int afterLclFrameSPtoFPdelta = 0;
7925 if (doubleAlignOrFramePointerUsed())
7926 {
7927 needToEstablishFP = true;
7928
7929 // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
7930 // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
7931 // too big, we go ahead and do it here.
7932
7933 int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
7934 afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
7935 if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
7936 {
7937 // Oh well, it looks too big. Go ahead and establish the frame pointer here.
7938 genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
7939 needToEstablishFP = false;
7940 }
7941 }
7942#endif // _TARGET_ARM_
7943
7944 //-------------------------------------------------------------------------
7945 //
7946 // Subtract the local frame size from SP.
7947 //
7948 //-------------------------------------------------------------------------
7949 CLANG_FORMAT_COMMENT_ANCHOR;
7950
7951#ifndef _TARGET_ARM64_
7952 regMaskTP maskStackAlloc = RBM_NONE;
7953
7954#ifdef _TARGET_ARM_
7955 maskStackAlloc =
7956 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
7957#endif // _TARGET_ARM_
7958
7959 if (maskStackAlloc == RBM_NONE)
7960 {
7961 genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
7962 }
7963#endif // !_TARGET_ARM64_
7964
7965//-------------------------------------------------------------------------
7966
7967#ifdef _TARGET_ARM_
7968 if (compiler->compLocallocUsed)
7969 {
7970 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
7971 regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP);
7972 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
7973 }
7974#endif // _TARGET_ARMARCH_
7975
7976#if defined(_TARGET_XARCH_)
7977 // Preserve callee saved float regs to stack.
7978 genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
7979#endif // defined(_TARGET_XARCH_)
7980
7981#ifdef _TARGET_AMD64_
7982 // Establish the AMD64 frame pointer after the OS-reported prolog.
7983 if (doubleAlignOrFramePointerUsed())
7984 {
7985 bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
7986 genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
7987 }
7988#endif //_TARGET_AMD64_
7989
7990//-------------------------------------------------------------------------
7991//
7992// This is the end of the OS-reported prolog for purposes of unwinding
7993//
7994//-------------------------------------------------------------------------
7995
7996#ifdef _TARGET_ARM_
7997 if (needToEstablishFP)
7998 {
7999 genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
8000 needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
8001 }
8002#endif // _TARGET_ARM_
8003
8004 if (compiler->info.compPublishStubParam)
8005 {
8006#if CPU_LOAD_STORE_ARCH
8007 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8008 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8009#else
8010 // mov [lvaStubArgumentVar], EAX
8011 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
8012 compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
8013#endif
8014 assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
8015
8016 // It's no longer live; clear it out so it can be used after this in the prolog
8017 intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
8018 }
8019
8020#if STACK_PROBES
8021 // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
8022 // when creating the stack frame. Don't think it's worth it, though.
8023 if (genNeedPrologStackProbe)
8024 {
8025 //
8026 // Can't have a call until we have enough padding for rejit
8027 //
8028 genPrologPadForReJit();
8029 noway_assert(compiler->opts.compNeedStackProbes);
8030 genGenerateStackProbe();
8031 compiler->compStackProbePrologDone = true;
8032 }
8033#endif // STACK_PROBES
8034
8035 //
8036 // Zero out the frame as needed
8037 //
8038
8039 genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
8040
8041#if FEATURE_EH_FUNCLETS
8042
8043 genSetPSPSym(initReg, &initRegZeroed);
8044
8045#else // !FEATURE_EH_FUNCLETS
8046
8047 // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
8048 if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
8049 {
8050 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
8051 unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE;
8052
8053 // Zero out the slot for nesting level 0
8054 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE;
8055
8056 if (!initRegZeroed)
8057 {
8058 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8059 initRegZeroed = true;
8060 }
8061
8062 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
8063 firstSlotOffs);
8064 }
8065
8066#endif // !FEATURE_EH_FUNCLETS
8067
8068 genReportGenericContextArg(initReg, &initRegZeroed);
8069
8070 // The local variable representing the security object must be on the stack frame
8071 // and must be 0 initialized.
8072 noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
8073 (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
8074 compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
8075
8076#ifdef JIT32_GCENCODER
8077 // Initialize the LocalAllocSP slot if there is localloc in the function.
8078 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
8079 {
8080 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
8081 }
8082#endif // JIT32_GCENCODER
8083
8084 // Set up the GS security cookie
8085
8086 genSetGSSecurityCookie(initReg, &initRegZeroed);
8087
8088#ifdef PROFILING_SUPPORTED
8089
8090 // Insert a function entry callback for profiling, if requested.
8091 genProfilingEnterCallback(initReg, &initRegZeroed);
8092
8093#endif // PROFILING_SUPPORTED
8094
8095 if (!genInterruptible)
8096 {
8097 /*-------------------------------------------------------------------------
8098 *
8099 * The 'real' prolog ends here for non-interruptible methods.
8100 * For fully-interruptible methods, we extend the prolog so that
8101 * we do not need to track GC inforation while shuffling the
8102 * arguments.
8103 *
8104 * Make sure there's enough padding for ReJIT.
8105 *
8106 */
8107 genPrologPadForReJit();
8108 getEmitter()->emitMarkPrologEnd();
8109 }
8110
8111#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
8112 // The unused bits of Vector3 arguments must be cleared
8113 // since native compiler doesn't initize the upper bits to zeros.
8114 //
8115 // TODO-Cleanup: This logic can be implemented in
8116 // genFnPrologCalleeRegArgs() for argument registers and
8117 // genEnregisterIncomingStackArgs() for stack arguments.
8118 genClearStackVec3ArgUpperBits();
8119#endif // UNIX_AMD64_ABI && FEATURE_SIMD
8120
8121 /*-----------------------------------------------------------------------------
8122 * Take care of register arguments first
8123 */
8124
8125 RegState* regState;
8126
8127 // Update the arg initial register locations.
8128 compiler->lvaUpdateArgsWithInitialReg();
8129
8130 FOREACH_REGISTER_FILE(regState)
8131 {
8132 if (regState->rsCalleeRegArgMaskLiveIn)
8133 {
8134 // If we need an extra register to shuffle around the incoming registers
8135 // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
8136 // if we don't need to use the xtraReg then this flag will stay false
8137 //
8138 regNumber xtraReg;
8139 bool xtraRegClobbered = false;
8140
8141 if (genRegMask(initReg) & RBM_ARG_REGS)
8142 {
8143 xtraReg = initReg;
8144 }
8145 else
8146 {
8147 xtraReg = REG_SCRATCH;
8148 initRegZeroed = false;
8149 }
8150
8151 genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
8152
8153 if (xtraRegClobbered)
8154 {
8155 initRegZeroed = false;
8156 }
8157 }
8158 }
8159
8160 // Home the incoming arguments
8161 genEnregisterIncomingStackArgs();
8162
8163 /* Initialize any must-init registers variables now */
8164
8165 if (initRegs)
8166 {
8167 regMaskTP regMask = 0x1;
8168
8169 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
8170 {
8171 if (regMask & initRegs)
8172 {
8173 // Check if we have already zeroed this register
8174 if ((reg == initReg) && initRegZeroed)
8175 {
8176 continue;
8177 }
8178 else
8179 {
8180 instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
8181 if (reg == initReg)
8182 {
8183 initRegZeroed = true;
8184 }
8185 }
8186 }
8187 }
8188 }
8189
8190 if (initFltRegs | initDblRegs)
8191 {
8192 // If initReg is not in initRegs then we will use REG_SCRATCH
8193 if ((genRegMask(initReg) & initRegs) == 0)
8194 {
8195 initReg = REG_SCRATCH;
8196 initRegZeroed = false;
8197 }
8198
8199#ifdef _TARGET_ARM_
8200 // This is needed only for Arm since it can use a zero initialized int register
8201 // to initialize vfp registers.
8202 if (!initRegZeroed)
8203 {
8204 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
8205 initRegZeroed = true;
8206 }
8207#endif // _TARGET_ARM_
8208
8209 genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
8210 }
8211
8212 //-----------------------------------------------------------------------------
8213
8214 //
8215 // Increase the prolog size here only if fully interruptible.
8216 // And again make sure it's big enough for ReJIT
8217 //
8218
8219 if (genInterruptible)
8220 {
8221 genPrologPadForReJit();
8222 getEmitter()->emitMarkPrologEnd();
8223 }
8224
8225 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
8226 {
8227 psiEndProlog();
8228 }
8229
8230 if (hasGCRef)
8231 {
8232 getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
8233 }
8234 else
8235 {
8236 noway_assert(GCrefLo == +INT_MAX);
8237 noway_assert(GCrefHi == -INT_MAX);
8238 }
8239
8240#ifdef DEBUG
8241 if (compiler->opts.dspCode)
8242 {
8243 printf("\n");
8244 }
8245#endif
8246
8247#ifdef _TARGET_X86_
8248 // On non-x86 the VARARG cookie does not need any special treatment.
8249
8250 // Load up the VARARG argument pointer register so it doesn't get clobbered.
8251 // only do this if we actually access any statically declared args
8252 // (our argument pointer register has a refcount > 0).
8253 unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
8254
8255 if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt() > 0)
8256 {
8257 varDsc = &compiler->lvaTable[argsStartVar];
8258
8259 noway_assert(compiler->info.compArgsCount > 0);
8260
8261 // MOV EAX, <VARARGS HANDLE>
8262 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
8263 regSet.verifyRegUsed(REG_EAX);
8264
8265 // MOV EAX, [EAX]
8266 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
8267
8268 // EDX might actually be holding something here. So make sure to only use EAX for this code
8269 // sequence.
8270
8271 LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
8272 noway_assert(!lastArg->lvRegister);
8273 signed offset = lastArg->lvStkOffs;
8274 assert(offset != BAD_STK_OFFS);
8275 noway_assert(lastArg->lvFramePointerBased);
8276
8277 // LEA EAX, &<VARARGS HANDLE> + EAX
8278 getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
8279
8280 if (varDsc->lvIsInReg())
8281 {
8282 if (varDsc->lvRegNum != REG_EAX)
8283 {
8284 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
8285 regSet.verifyRegUsed(varDsc->lvRegNum);
8286 }
8287 }
8288 else
8289 {
8290 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
8291 }
8292 }
8293
8294#endif // _TARGET_X86_
8295
8296#if defined(DEBUG) && defined(_TARGET_XARCH_)
8297 if (compiler->opts.compStackCheckOnRet)
8298 {
8299 noway_assert(compiler->lvaReturnSpCheck != 0xCCCCCCCC &&
8300 compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister &&
8301 compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame);
8302 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0);
8303 }
8304#endif // defined(DEBUG) && defined(_TARGET_XARCH_)
8305
8306 getEmitter()->emitEndProlog();
8307 compiler->unwindEndProlog();
8308
8309 noway_assert(getEmitter()->emitMaxTmpSize == regSet.tmpGetTotalSize());
8310}
8311#ifdef _PREFAST_
8312#pragma warning(pop)
8313#endif
8314
8315/*****************************************************************************
8316 *
8317 * Generates code for a function epilog.
8318 *
8319 * Please consult the "debugger team notification" comment in genFnProlog().
8320 */
8321
8322#if defined(_TARGET_ARMARCH_)
8323
8324void CodeGen::genFnEpilog(BasicBlock* block)
8325{
8326#ifdef DEBUG
8327 if (verbose)
8328 printf("*************** In genFnEpilog()\n");
8329#endif // DEBUG
8330
8331 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8332
8333 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8334 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8335 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8336
8337#ifdef DEBUG
8338 if (compiler->opts.dspCode)
8339 printf("\n__epilog:\n");
8340
8341 if (verbose)
8342 {
8343 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8344 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8345 printf(", gcRegGCrefSetCur=");
8346 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8347 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8348 printf(", gcRegByrefSetCur=");
8349 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8350 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8351 printf("\n");
8352 }
8353#endif // DEBUG
8354
8355 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8356
8357 GenTree* lastNode = block->lastNode();
8358
8359 // Method handle and address info used in case of jump epilog
8360 CORINFO_METHOD_HANDLE methHnd = nullptr;
8361 CORINFO_CONST_LOOKUP addrInfo;
8362 addrInfo.addr = nullptr;
8363 addrInfo.accessType = IAT_VALUE;
8364
8365 if (jmpEpilog && lastNode->gtOper == GT_JMP)
8366 {
8367 methHnd = (CORINFO_METHOD_HANDLE)lastNode->gtVal.gtVal1;
8368 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8369 }
8370
8371#ifdef _TARGET_ARM_
8372 // We delay starting the unwind codes until we have an instruction which we know
8373 // needs an unwind code. In particular, for large stack frames in methods without
8374 // localloc, the sequence might look something like this:
8375 // movw r3, 0x38e0
8376 // add sp, r3
8377 // pop {r4,r5,r6,r10,r11,pc}
8378 // In this case, the "movw" should not be part of the unwind codes, since it will
8379 // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
8380 // also sets the current location as the beginning offset of the epilog, so every
8381 // instruction afterwards needs an unwind code. In the case above, if you call
8382 // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
8383
8384 bool unwindStarted = false;
8385
8386 // Tear down the stack frame
8387
8388 if (compiler->compLocallocUsed)
8389 {
8390 if (!unwindStarted)
8391 {
8392 compiler->unwindBegEpilog();
8393 unwindStarted = true;
8394 }
8395
8396 // mov R9 into SP
8397 inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
8398 compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
8399 }
8400
8401 if (jmpEpilog ||
8402 genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
8403 RBM_NONE)
8404 {
8405 genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
8406 }
8407
8408 if (!unwindStarted)
8409 {
8410 // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
8411 compiler->unwindBegEpilog();
8412 unwindStarted = true;
8413 }
8414
8415 if (jmpEpilog && lastNode->gtOper == GT_JMP && addrInfo.accessType == IAT_RELPVALUE)
8416 {
8417 // IAT_RELPVALUE jump at the end is done using relative indirection, so,
8418 // additional helper register is required.
8419 // We use LR just before it is going to be restored from stack, i.e.
8420 //
8421 // movw r12, laddr
8422 // movt r12, haddr
8423 // mov lr, r12
8424 // ldr r12, [r12]
8425 // add r12, r12, lr
8426 // pop {lr}
8427 // ...
8428 // bx r12
8429
8430 regNumber indCallReg = REG_R12;
8431 regNumber vptrReg1 = REG_LR;
8432
8433 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8434 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg);
8435 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8436 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, indCallReg, vptrReg1);
8437 }
8438
8439 genPopCalleeSavedRegisters(jmpEpilog);
8440
8441 if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
8442 {
8443 // We better not have used a pop PC to return otherwise this will be unreachable code
8444 noway_assert(!genUsedPopToReturn);
8445
8446 int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
8447 inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
8448 compiler->unwindAllocStack(preSpillRegArgSize);
8449 }
8450
8451 if (jmpEpilog)
8452 {
8453 // We better not have used a pop PC to return otherwise this will be unreachable code
8454 noway_assert(!genUsedPopToReturn);
8455 }
8456
8457#else // _TARGET_ARM64_
8458 compiler->unwindBegEpilog();
8459
8460 genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
8461#endif // _TARGET_ARM64_
8462
8463 if (jmpEpilog)
8464 {
8465 hasTailCalls = true;
8466
8467 noway_assert(block->bbJumpKind == BBJ_RETURN);
8468 noway_assert(block->bbTreeList != nullptr);
8469
8470 /* figure out what jump we have */
8471 GenTree* jmpNode = lastNode;
8472#if !FEATURE_FASTTAILCALL
8473 noway_assert(jmpNode->gtOper == GT_JMP);
8474#else // FEATURE_FASTTAILCALL
8475 // armarch
8476 // If jmpNode is GT_JMP then gtNext must be null.
8477 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8478 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8479
8480 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8481 noway_assert((jmpNode->gtOper == GT_JMP) ||
8482 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8483
8484 // The next block is associated with this "if" stmt
8485 if (jmpNode->gtOper == GT_JMP)
8486#endif // FEATURE_FASTTAILCALL
8487 {
8488 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8489 // the same descriptor with some minor adjustments.
8490 assert(methHnd != nullptr);
8491 assert(addrInfo.addr != nullptr);
8492
8493#ifdef _TARGET_ARMARCH_
8494 emitter::EmitCallType callType;
8495 void* addr;
8496 regNumber indCallReg;
8497 switch (addrInfo.accessType)
8498 {
8499 case IAT_VALUE:
8500 if (validImmForBL((ssize_t)addrInfo.addr))
8501 {
8502 // Simple direct call
8503 callType = emitter::EC_FUNC_TOKEN;
8504 addr = addrInfo.addr;
8505 indCallReg = REG_NA;
8506 break;
8507 }
8508
8509 // otherwise the target address doesn't fit in an immediate
8510 // so we have to burn a register...
8511 __fallthrough;
8512
8513 case IAT_PVALUE:
8514 // Load the address into a register, load indirect and call through a register
8515 // We have to use R12 since we assume the argument registers are in use
8516 callType = emitter::EC_INDIR_R;
8517 indCallReg = REG_INDIRECT_CALL_TARGET_REG;
8518 addr = NULL;
8519 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
8520 if (addrInfo.accessType == IAT_PVALUE)
8521 {
8522 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
8523 regSet.verifyRegUsed(indCallReg);
8524 }
8525 break;
8526
8527 case IAT_RELPVALUE:
8528 {
8529 // Load the address into a register, load relative indirect and call through a register
8530 // We have to use R12 since we assume the argument registers are in use
8531 // LR is used as helper register right before it is restored from stack, thus,
8532 // all relative address calculations are performed before LR is restored.
8533 callType = emitter::EC_INDIR_R;
8534 indCallReg = REG_R12;
8535 addr = NULL;
8536
8537 regSet.verifyRegUsed(indCallReg);
8538 break;
8539 }
8540
8541 case IAT_PPVALUE:
8542 default:
8543 NO_WAY("Unsupported JMP indirection");
8544 }
8545
8546 /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
8547 * the same descriptor with some minor adjustments.
8548 */
8549
8550 // clang-format off
8551 getEmitter()->emitIns_Call(callType,
8552 methHnd,
8553 INDEBUG_LDISASM_COMMA(nullptr)
8554 addr,
8555 0, // argSize
8556 EA_UNKNOWN, // retSize
8557#if defined(_TARGET_ARM64_)
8558 EA_UNKNOWN, // secondRetSize
8559#endif
8560 gcInfo.gcVarPtrSetCur,
8561 gcInfo.gcRegGCrefSetCur,
8562 gcInfo.gcRegByrefSetCur,
8563 BAD_IL_OFFSET, // IL offset
8564 indCallReg, // ireg
8565 REG_NA, // xreg
8566 0, // xmul
8567 0, // disp
8568 true); // isJump
8569 // clang-format on
8570 CLANG_FORMAT_COMMENT_ANCHOR;
8571#endif //_TARGET_ARMARCH_
8572 }
8573#if FEATURE_FASTTAILCALL
8574 else
8575 {
8576 // Fast tail call.
8577 // Call target = REG_FASTTAILCALL_TARGET
8578 // https://github.com/dotnet/coreclr/issues/4827
8579 // Do we need a special encoding for stack walker like rex.w prefix for x64?
8580 getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET);
8581 }
8582#endif // FEATURE_FASTTAILCALL
8583 }
8584 else
8585 {
8586#ifdef _TARGET_ARM_
8587 if (!genUsedPopToReturn)
8588 {
8589 // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
8590 // so we need a "bx lr" instruction to return from the function.
8591 inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
8592 compiler->unwindBranch16();
8593 }
8594#else // _TARGET_ARM64_
8595 inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
8596 compiler->unwindReturn(REG_LR);
8597#endif // _TARGET_ARM64_
8598 }
8599
8600 compiler->unwindEndEpilog();
8601}
8602
8603#elif defined(_TARGET_XARCH_)
8604
8605void CodeGen::genFnEpilog(BasicBlock* block)
8606{
8607#ifdef DEBUG
8608 if (verbose)
8609 {
8610 printf("*************** In genFnEpilog()\n");
8611 }
8612#endif
8613
8614 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
8615
8616 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
8617 gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
8618 gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
8619
8620 noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
8621
8622#ifdef DEBUG
8623 genInterruptibleUsed = true;
8624#endif
8625
8626 bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
8627
8628#ifdef DEBUG
8629 if (compiler->opts.dspCode)
8630 {
8631 printf("\n__epilog:\n");
8632 }
8633
8634 if (verbose)
8635 {
8636 printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
8637 dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
8638 printf(", gcRegGCrefSetCur=");
8639 printRegMaskInt(gcInfo.gcRegGCrefSetCur);
8640 getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
8641 printf(", gcRegByrefSetCur=");
8642 printRegMaskInt(gcInfo.gcRegByrefSetCur);
8643 getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
8644 printf("\n");
8645 }
8646#endif
8647
8648 // Restore float registers that were saved to stack before SP is modified.
8649 genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
8650
8651#ifdef JIT32_GCENCODER
8652 // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
8653 // the above call to `genRestoreCalleeSavedFltRegs` because that function
8654 // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
8655 // which is the only target that uses the JIT32 GC encoder
8656 // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
8657 // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
8658 // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
8659 // unwinder (and break binary compat with older versions of the runtime) by starting the epilog
8660 // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
8661 // we will need to rethink this.
8662 getEmitter()->emitStartEpilog();
8663#endif
8664
8665 /* Compute the size in bytes we've pushed/popped */
8666
8667 if (!doubleAlignOrFramePointerUsed())
8668 {
8669 // We have an ESP frame */
8670
8671 noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
8672
8673 /* Get rid of our local variables */
8674
8675 if (compiler->compLclFrameSize)
8676 {
8677#ifdef _TARGET_X86_
8678 /* Add 'compiler->compLclFrameSize' to ESP */
8679 /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
8680
8681 if ((compiler->compLclFrameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed)
8682 {
8683 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8684 regSet.verifyRegUsed(REG_ECX);
8685 }
8686 else
8687#endif // _TARGET_X86
8688 {
8689 /* Add 'compiler->compLclFrameSize' to ESP */
8690 /* Generate "add esp, <stack-size>" */
8691 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8692 }
8693 }
8694
8695 genPopCalleeSavedRegisters();
8696 }
8697 else
8698 {
8699 noway_assert(doubleAlignOrFramePointerUsed());
8700
8701 /* Tear down the stack frame */
8702
8703 bool needMovEspEbp = false;
8704
8705#if DOUBLE_ALIGN
8706 if (compiler->genDoubleAlign())
8707 {
8708 //
8709 // add esp, compLclFrameSize
8710 //
8711 // We need not do anything (except the "mov esp, ebp") if
8712 // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
8713 // also complicates the code manager. Hence, we ignore that case.
8714
8715 noway_assert(compiler->compLclFrameSize != 0);
8716 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
8717
8718 needMovEspEbp = true;
8719 }
8720 else
8721#endif // DOUBLE_ALIGN
8722 {
8723 bool needLea = false;
8724
8725 if (compiler->compLocallocUsed)
8726 {
8727 // ESP may be variable if a localloc was actually executed. Reset it.
8728 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8729
8730 needLea = true;
8731 }
8732 else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
8733 {
8734 if (compiler->compLclFrameSize != 0)
8735 {
8736#ifdef _TARGET_AMD64_
8737 // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
8738 // do an LEA to "pop off" the frame allocation.
8739 needLea = true;
8740#else // !_TARGET_AMD64_
8741 // We will just generate "mov esp, ebp" and be done with it.
8742 needMovEspEbp = true;
8743#endif // !_TARGET_AMD64_
8744 }
8745 }
8746 else if (compiler->compLclFrameSize == 0)
8747 {
8748 // do nothing before popping the callee-saved registers
8749 }
8750#ifdef _TARGET_X86_
8751 else if (compiler->compLclFrameSize == REGSIZE_BYTES)
8752 {
8753 // "pop ecx" will make ESP point to the callee-saved registers
8754 inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
8755 regSet.verifyRegUsed(REG_ECX);
8756 }
8757#endif // _TARGET_X86
8758 else
8759 {
8760 // We need to make ESP point to the callee-saved registers
8761 needLea = true;
8762 }
8763
8764 if (needLea)
8765 {
8766 int offset;
8767
8768#ifdef _TARGET_AMD64_
8769 // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
8770 //
8771 // Case 1: localloc not used.
8772 // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
8773 // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8774 // The amount to be subtracted from RBP to point at callee saved int regs.
8775 //
8776 // Case 2: localloc used
8777 // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
8778 // Offset = Amount to be added to RBP to point at callee saved int regs.
8779 offset = genSPtoFPdelta() - compiler->compLclFrameSize;
8780
8781 // Offset should fit within a byte if localloc is not used.
8782 if (!compiler->compLocallocUsed)
8783 {
8784 noway_assert(offset < UCHAR_MAX);
8785 }
8786#else
8787 // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
8788 offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
8789 noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
8790#endif
8791
8792 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
8793 }
8794 }
8795
8796 //
8797 // Pop the callee-saved registers (if any)
8798 //
8799
8800 genPopCalleeSavedRegisters();
8801
8802#ifdef _TARGET_AMD64_
8803 assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
8804#else // !_TARGET_AMD64_
8805 if (needMovEspEbp)
8806 {
8807 // mov esp, ebp
8808 inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
8809 }
8810#endif // !_TARGET_AMD64_
8811
8812 // pop ebp
8813 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
8814 }
8815
8816 getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
8817
8818 /* Check if this a special return block i.e.
8819 * CEE_JMP instruction */
8820
8821 if (jmpEpilog)
8822 {
8823 noway_assert(block->bbJumpKind == BBJ_RETURN);
8824 noway_assert(block->bbTreeList);
8825
8826 // figure out what jump we have
8827 GenTree* jmpNode = block->lastNode();
8828#if !FEATURE_FASTTAILCALL
8829 // x86
8830 noway_assert(jmpNode->gtOper == GT_JMP);
8831#else
8832 // amd64
8833 // If jmpNode is GT_JMP then gtNext must be null.
8834 // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
8835 noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
8836
8837 // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
8838 noway_assert((jmpNode->gtOper == GT_JMP) ||
8839 ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
8840
8841 // The next block is associated with this "if" stmt
8842 if (jmpNode->gtOper == GT_JMP)
8843#endif
8844 {
8845 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8846 // the same descriptor with some minor adjustments.
8847 CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
8848
8849 CORINFO_CONST_LOOKUP addrInfo;
8850 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
8851 if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
8852 {
8853 NO_WAY("Unsupported JMP indirection");
8854 }
8855
8856 const emitter::EmitCallType callType =
8857 (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
8858
8859 // Simply emit a jump to the methodHnd. This is similar to a call so we can use
8860 // the same descriptor with some minor adjustments.
8861
8862 // clang-format off
8863 getEmitter()->emitIns_Call(callType,
8864 methHnd,
8865 INDEBUG_LDISASM_COMMA(nullptr)
8866 addrInfo.addr,
8867 0, // argSize
8868 EA_UNKNOWN // retSize
8869 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
8870 gcInfo.gcVarPtrSetCur,
8871 gcInfo.gcRegGCrefSetCur,
8872 gcInfo.gcRegByrefSetCur,
8873 BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
8874 true /* isJump */
8875 );
8876 // clang-format on
8877 }
8878#if FEATURE_FASTTAILCALL
8879 else
8880 {
8881#ifdef _TARGET_AMD64_
8882 // Fast tail call.
8883 // Call target = RAX.
8884 // Stack walker requires that a register indirect tail call be rex.w prefixed.
8885 getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
8886#else
8887 assert(!"Fast tail call as epilog+jmp");
8888 unreached();
8889#endif //_TARGET_AMD64_
8890 }
8891#endif // FEATURE_FASTTAILCALL
8892 }
8893 else
8894 {
8895 unsigned stkArgSize = 0; // Zero on all platforms except x86
8896
8897#if defined(_TARGET_X86_)
8898 bool fCalleePop = true;
8899
8900 // varargs has caller pop
8901 if (compiler->info.compIsVarArgs)
8902 fCalleePop = false;
8903
8904#ifdef UNIX_X86_ABI
8905 if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
8906 fCalleePop = false;
8907#endif // UNIX_X86_ABI
8908
8909 if (fCalleePop)
8910 {
8911 noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES);
8912 stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
8913
8914 noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
8915 }
8916#endif // _TARGET_X86_
8917
8918 /* Return, popping our arguments (if any) */
8919 instGen_Return(stkArgSize);
8920 }
8921}
8922
8923#else // _TARGET_*
8924#error Unsupported or unset target architecture
8925#endif // _TARGET_*
8926
8927#if FEATURE_EH_FUNCLETS
8928
8929#ifdef _TARGET_ARM_
8930
8931/*****************************************************************************
8932 *
8933 * Generates code for an EH funclet prolog.
8934 *
8935 * Funclets have the following incoming arguments:
8936 *
8937 * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
8938 * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
8939 * finally/fault: none
8940 *
8941 * Funclets set the following registers on exit:
8942 *
8943 * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
8944 * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
8945 * finally/fault: none
8946 *
8947 * The ARM funclet prolog sequence is:
8948 *
8949 * push {regs,lr} ; We push the callee-saved regs and 'lr'.
8950 * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
8951 * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
8952 * ; calculated for the entire function.
8953 * sub sp, XXX ; Establish the rest of the frame.
8954 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
8955 * ; up to preserve stack alignment. If we push an odd number of registers, we also
8956 * ; generate this, to keep the stack aligned.
8957 *
8958 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
8959 * ; filters.
8960 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
8961 * ; epilog.
8962 *
8963 * if (this is a filter funclet)
8964 * {
8965 * // r1 on entry to a filter funclet is CallerSP of the containing function:
8966 * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
8967 * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
8968 * // a funclet. Consider:
8969 * //
8970 * // try {
8971 * // try {
8972 * // throw new Exception();
8973 * // } catch(Exception) {
8974 * // throw new Exception(); // The exception thrown here ...
8975 * // }
8976 * // } filter { // ... will be processed here, while the "catch" funclet frame is
8977 * // // still on the stack
8978 * // } filter-handler {
8979 * // }
8980 * //
8981 * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
8982 * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
8983 * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
8984 *
8985 * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
8986 * ; the dynamically containing funclet or function)
8987 * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
8988 * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
8989 * }
8990 * else
8991 * {
8992 * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
8993 * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
8994 *
8995 * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
8996 * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
8997 * }
8998 *
8999 * The epilog sequence is then:
9000 *
9001 * add sp, XXX ; if necessary
9002 * pop {regs,pc}
9003 *
9004 * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
9005 * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
9006 *
9007 * The funclet frame is thus:
9008 *
9009 * | |
9010 * |-----------------------|
9011 * | incoming |
9012 * | arguments |
9013 * +=======================+ <---- Caller's SP
9014 * |Callee saved registers |
9015 * |-----------------------|
9016 * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
9017 * | | // in function and funclet
9018 * |-----------------------|
9019 * | PSP slot | // Omitted in CoreRT ABI
9020 * |-----------------------|
9021 * ~ possible 4 byte pad ~
9022 * ~ for alignment ~
9023 * |-----------------------|
9024 * | Outgoing arg space |
9025 * |-----------------------| <---- Ambient SP
9026 * | | |
9027 * ~ | Stack grows ~
9028 * | | downward |
9029 * V
9030 */
9031
9032void CodeGen::genFuncletProlog(BasicBlock* block)
9033{
9034#ifdef DEBUG
9035 if (verbose)
9036 printf("*************** In genFuncletProlog()\n");
9037#endif
9038
9039 assert(block != NULL);
9040 assert(block->bbFlags & BBF_FUNCLET_BEG);
9041
9042 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9043
9044 gcInfo.gcResetForBB();
9045
9046 compiler->unwindBegProlog();
9047
9048 regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9049 regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
9050
9051 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
9052 maskPushRegsInt |= maskStackAlloc;
9053
9054 assert(FitsIn<int>(maskPushRegsInt));
9055 inst_IV(INS_push, (int)maskPushRegsInt);
9056 compiler->unwindPushMaskInt(maskPushRegsInt);
9057
9058 if (maskPushRegsFloat != RBM_NONE)
9059 {
9060 genPushFltRegs(maskPushRegsFloat);
9061 compiler->unwindPushMaskFloat(maskPushRegsFloat);
9062 }
9063
9064 bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
9065
9066 regMaskTP maskArgRegsLiveIn;
9067 if (isFilter)
9068 {
9069 maskArgRegsLiveIn = RBM_R0 | RBM_R1;
9070 }
9071 else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9072 {
9073 maskArgRegsLiveIn = RBM_NONE;
9074 }
9075 else
9076 {
9077 maskArgRegsLiveIn = RBM_R0;
9078 }
9079
9080 regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
9081 bool initRegZeroed = false;
9082
9083 if (maskStackAlloc == RBM_NONE)
9084 {
9085 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9086 }
9087
9088 // This is the end of the OS-reported prolog for purposes of unwinding
9089 compiler->unwindEndProlog();
9090
9091 if (isFilter)
9092 {
9093 // This is the first block of a filter
9094
9095 getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
9096 genFuncletInfo.fiPSP_slot_CallerSP_offset);
9097 regSet.verifyRegUsed(REG_R1);
9098 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
9099 genFuncletInfo.fiPSP_slot_SP_offset);
9100 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
9101 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9102 }
9103 else
9104 {
9105 // This is a non-filter funclet
9106 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
9107 genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9108 regSet.verifyRegUsed(REG_R3);
9109 getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
9110 genFuncletInfo.fiPSP_slot_SP_offset);
9111 }
9112}
9113
9114/*****************************************************************************
9115 *
9116 * Generates code for an EH funclet epilog.
9117 */
9118
9119void CodeGen::genFuncletEpilog()
9120{
9121#ifdef DEBUG
9122 if (verbose)
9123 printf("*************** In genFuncletEpilog()\n");
9124#endif
9125
9126 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9127
9128 // Just as for the main function, we delay starting the unwind codes until we have
9129 // an instruction which we know needs an unwind code. This is to support code like
9130 // this:
9131 // movw r3, 0x38e0
9132 // add sp, r3
9133 // pop {r4,r5,r6,r10,r11,pc}
9134 // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
9135
9136 bool unwindStarted = false;
9137
9138 /* The saved regs info saves the LR register. We need to pop the PC register to return */
9139 assert(genFuncletInfo.fiSaveRegs & RBM_LR);
9140
9141 regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
9142 regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
9143
9144 regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
9145 maskPopRegsInt |= maskStackAlloc;
9146
9147 if (maskStackAlloc == RBM_NONE)
9148 {
9149 genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
9150 }
9151
9152 if (!unwindStarted)
9153 {
9154 // We'll definitely generate an unwindable instruction next
9155 compiler->unwindBegEpilog();
9156 unwindStarted = true;
9157 }
9158
9159 maskPopRegsInt &= ~RBM_LR;
9160 maskPopRegsInt |= RBM_PC;
9161
9162 if (maskPopRegsFloat != RBM_NONE)
9163 {
9164 genPopFltRegs(maskPopRegsFloat);
9165 compiler->unwindPopMaskFloat(maskPopRegsFloat);
9166 }
9167
9168 assert(FitsIn<int>(maskPopRegsInt));
9169 inst_IV(INS_pop, (int)maskPopRegsInt);
9170 compiler->unwindPopMaskInt(maskPopRegsInt);
9171
9172 compiler->unwindEndEpilog();
9173}
9174
9175/*****************************************************************************
9176 *
9177 * Capture the information used to generate the funclet prologs and epilogs.
9178 * Note that all funclet prologs are identical, and all funclet epilogs are
9179 * identical (per type: filters are identical, and non-filters are identical).
9180 * Thus, we compute the data used for these just once.
9181 *
9182 * See genFuncletProlog() for more information about the prolog/epilog sequences.
9183 */
9184
9185void CodeGen::genCaptureFuncletPrologEpilogInfo()
9186{
9187 if (compiler->ehAnyFunclets())
9188 {
9189 assert(isFramePointerUsed());
9190 assert(compiler->lvaDoneFrameLayout ==
9191 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9192
9193 // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
9194 // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
9195 // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
9196 // (also assumed in genFnProlog()).
9197 assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
9198 unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
9199 genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
9200
9201 regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
9202 unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
9203 unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
9204 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9205 unsigned funcletFrameSize =
9206 preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
9207
9208 unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
9209 unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
9210 unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
9211
9212 unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
9213 int PSP_slot_CallerSP_offset =
9214 -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
9215
9216 /* Now save it for future use */
9217
9218 genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
9219 genFuncletInfo.fiSpDelta = spDelta;
9220 genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
9221 genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
9222
9223#ifdef DEBUG
9224 if (verbose)
9225 {
9226 printf("\n");
9227 printf("Funclet prolog / epilog info\n");
9228 printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
9229 printf(" Save regs: ");
9230 dspRegMask(rsMaskSaveRegs);
9231 printf("\n");
9232 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9233 printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
9234 printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
9235
9236 if (PSP_slot_CallerSP_offset !=
9237 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
9238 printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
9239 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
9240 }
9241#endif // DEBUG
9242
9243 assert(PSP_slot_CallerSP_offset < 0);
9244 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9245 {
9246 assert(PSP_slot_CallerSP_offset ==
9247 compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main
9248 // function and funclet!
9249 }
9250 }
9251}
9252
9253#elif defined(_TARGET_AMD64_)
9254
9255/*****************************************************************************
9256 *
9257 * Generates code for an EH funclet prolog.
9258 *
9259 * Funclets have the following incoming arguments:
9260 *
9261 * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
9262 * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
9263 * finally/fault: rcx = InitialSP
9264 *
9265 * Funclets set the following registers on exit:
9266 *
9267 * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
9268 * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9269 * finally/fault: none
9270 *
9271 * The AMD64 funclet prolog sequence is:
9272 *
9273 * push ebp
9274 * push callee-saved regs
9275 * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
9276 * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
9277 * ; the entire function.
9278 * sub sp, XXX ; Establish the rest of the frame.
9279 * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
9280 * ; up to preserve stack alignment. If we push an odd number of registers, we also
9281 * ; generate this, to keep the stack aligned.
9282 *
9283 * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
9284 * ; filters.
9285 * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
9286 * ; epilog.
9287 * ; Also, re-establish the frame pointer from the PSP.
9288 *
9289 * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
9290 * ; PSP of the dynamically containing funclet or function)
9291 * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
9292 * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
9293 * ; Function_InitialSP_to_FP_delta==0, we don't need this
9294 * ; instruction.
9295 *
9296 * The epilog sequence is then:
9297 *
9298 * add rsp, XXX
9299 * pop callee-saved regs ; if necessary
9300 * pop rbp
9301 * ret
9302 *
9303 * The funclet frame is thus:
9304 *
9305 * | |
9306 * |-----------------------|
9307 * | incoming |
9308 * | arguments |
9309 * +=======================+ <---- Caller's SP
9310 * | Return address |
9311 * |-----------------------|
9312 * | Saved EBP |
9313 * |-----------------------|
9314 * |Callee saved registers |
9315 * |-----------------------|
9316 * ~ possible 8 byte pad ~
9317 * ~ for alignment ~
9318 * |-----------------------|
9319 * | PSP slot | // Omitted in CoreRT ABI
9320 * |-----------------------|
9321 * | Outgoing arg space | // this only exists if the function makes a call
9322 * |-----------------------| <---- Initial SP
9323 * | | |
9324 * ~ | Stack grows ~
9325 * | | downward |
9326 * V
9327 *
9328 * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
9329 * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
9330 * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
9331 * "FRAMEPTR OFFSETS" for details.
9332 */
9333
9334void CodeGen::genFuncletProlog(BasicBlock* block)
9335{
9336#ifdef DEBUG
9337 if (verbose)
9338 {
9339 printf("*************** In genFuncletProlog()\n");
9340 }
9341#endif
9342
9343 assert(!regSet.rsRegsModified(RBM_FPBASE));
9344 assert(block != nullptr);
9345 assert(block->bbFlags & BBF_FUNCLET_BEG);
9346 assert(isFramePointerUsed());
9347
9348 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9349
9350 gcInfo.gcResetForBB();
9351
9352 compiler->unwindBegProlog();
9353
9354 // We need to push ebp, since it's callee-saved.
9355 // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
9356 // keep track of that on a per-funclet basis, so we push the same set as in the main function.
9357 // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
9358 // is stored here (all temps are allocated in the parent frame).
9359 // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
9360 // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
9361
9362 inst_RV(INS_push, REG_FPBASE, TYP_REF);
9363 compiler->unwindPush(REG_FPBASE);
9364
9365 // Callee saved int registers are pushed to stack.
9366 genPushCalleeSavedRegisters();
9367
9368 regMaskTP maskArgRegsLiveIn;
9369 if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
9370 {
9371 maskArgRegsLiveIn = RBM_ARG_0;
9372 }
9373 else
9374 {
9375 maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
9376 }
9377
9378 regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
9379 bool initRegZeroed = false;
9380
9381 genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
9382
9383 // Callee saved float registers are copied to stack in their assigned stack slots
9384 // after allocating space for them as part of funclet frame.
9385 genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9386
9387 // This is the end of the OS-reported prolog for purposes of unwinding
9388 compiler->unwindEndProlog();
9389
9390 // If there is no PSPSym (CoreRT ABI), we are done.
9391 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9392 {
9393 return;
9394 }
9395
9396 getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9397
9398 regSet.verifyRegUsed(REG_FPBASE);
9399
9400 getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
9401
9402 if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
9403 {
9404 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
9405 genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9406 }
9407
9408 // We've modified EBP, but not really. Say that we haven't...
9409 regSet.rsRemoveRegsModified(RBM_FPBASE);
9410}
9411
9412/*****************************************************************************
9413 *
9414 * Generates code for an EH funclet epilog.
9415 *
9416 * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
9417 */
9418
9419void CodeGen::genFuncletEpilog()
9420{
9421#ifdef DEBUG
9422 if (verbose)
9423 {
9424 printf("*************** In genFuncletEpilog()\n");
9425 }
9426#endif
9427
9428 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9429
9430 // Restore callee saved XMM regs from their stack slots before modifying SP
9431 // to position at callee saved int regs.
9432 genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
9433 inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
9434 genPopCalleeSavedRegisters();
9435 inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
9436 instGen_Return(0);
9437}
9438
9439/*****************************************************************************
9440 *
9441 * Capture the information used to generate the funclet prologs and epilogs.
9442 */
9443
9444void CodeGen::genCaptureFuncletPrologEpilogInfo()
9445{
9446 if (!compiler->ehAnyFunclets())
9447 {
9448 return;
9449 }
9450
9451 // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
9452 // because we're not going to allocate the same size frame as the parent.
9453
9454 assert(isFramePointerUsed());
9455 assert(compiler->lvaDoneFrameLayout ==
9456 Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
9457 assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
9458
9459 // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
9460 // that's ok, because we're figuring out an offset in the parent frame.
9461 genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
9462 compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
9463 // pointer.
9464
9465 assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
9466#ifndef UNIX_AMD64_ABI
9467 // No 4 slots for outgoing params on the stack for System V systems.
9468 assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
9469 (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
9470// slots if there are any calls in the function.
9471#endif // UNIX_AMD64_ABI
9472 unsigned offset = compiler->lvaOutgoingArgSpaceSize;
9473
9474 genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
9475
9476 // How much stack do we allocate in the funclet?
9477 // We need to 16-byte align the stack.
9478
9479 unsigned totalFrameSize =
9480 REGSIZE_BYTES // return address
9481 + REGSIZE_BYTES // pushed EBP
9482 + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
9483
9484 // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
9485 // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
9486 unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
9487 unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
9488
9489 unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
9490
9491 totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
9492 + calleeFPRegsSavedSize // pushed callee-saved float regs
9493 // below calculated 'pad' will go here
9494 + PSPSymSize // PSPSym
9495 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9496 ;
9497
9498 unsigned pad = AlignmentPad(totalFrameSize, 16);
9499
9500 genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
9501 + calleeFPRegsSavedSize // Callee saved xmm regs
9502 + pad + PSPSymSize // PSPSym
9503 + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
9504 ;
9505
9506#ifdef DEBUG
9507 if (verbose)
9508 {
9509 printf("\n");
9510 printf("Funclet prolog / epilog info\n");
9511 printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
9512 printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
9513 printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
9514 }
9515
9516 if (compiler->lvaPSPSym != BAD_VAR_NUM)
9517 {
9518 assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
9519 compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
9520 // funclet!
9521 }
9522#endif // DEBUG
9523}
9524
9525#elif defined(_TARGET_ARM64_)
9526
9527// Look in CodeGenArm64.cpp
9528
9529#elif defined(_TARGET_X86_)
9530
9531/*****************************************************************************
9532 *
9533 * Generates code for an EH funclet prolog.
9534 *
9535 *
9536 * Funclets have the following incoming arguments:
9537 *
9538 * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
9539 * filter: eax = the exception object that was caught (see GT_CATCH_ARG)
9540 * finally/fault: none
9541 *
9542 * Funclets set the following registers on exit:
9543 *
9544 * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
9545 * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
9546 * finally/fault: none
9547 *
9548 * Funclet prolog/epilog sequence and funclet frame layout are TBD.
9549 *
9550 */
9551
9552void CodeGen::genFuncletProlog(BasicBlock* block)
9553{
9554#ifdef DEBUG
9555 if (verbose)
9556 {
9557 printf("*************** In genFuncletProlog()\n");
9558 }
9559#endif
9560
9561 ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
9562
9563 gcInfo.gcResetForBB();
9564
9565 compiler->unwindBegProlog();
9566
9567 // This is the end of the OS-reported prolog for purposes of unwinding
9568 compiler->unwindEndProlog();
9569
9570 // TODO We may need EBP restore sequence here if we introduce PSPSym
9571
9572 // Add a padding for 16-byte alignment
9573 inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
9574}
9575
9576/*****************************************************************************
9577 *
9578 * Generates code for an EH funclet epilog.
9579 */
9580
9581void CodeGen::genFuncletEpilog()
9582{
9583#ifdef DEBUG
9584 if (verbose)
9585 {
9586 printf("*************** In genFuncletEpilog()\n");
9587 }
9588#endif
9589
9590 ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
9591
9592 // Revert a padding that was added for 16-byte alignment
9593 inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
9594
9595 instGen_Return(0);
9596}
9597
9598/*****************************************************************************
9599 *
9600 * Capture the information used to generate the funclet prologs and epilogs.
9601 */
9602
9603void CodeGen::genCaptureFuncletPrologEpilogInfo()
9604{
9605 if (!compiler->ehAnyFunclets())
9606 {
9607 return;
9608 }
9609}
9610
9611#else // _TARGET_*
9612
9613/*****************************************************************************
9614 *
9615 * Generates code for an EH funclet prolog.
9616 */
9617
9618void CodeGen::genFuncletProlog(BasicBlock* block)
9619{
9620 NYI("Funclet prolog");
9621}
9622
9623/*****************************************************************************
9624 *
9625 * Generates code for an EH funclet epilog.
9626 */
9627
9628void CodeGen::genFuncletEpilog()
9629{
9630 NYI("Funclet epilog");
9631}
9632
9633/*****************************************************************************
9634 *
9635 * Capture the information used to generate the funclet prologs and epilogs.
9636 */
9637
9638void CodeGen::genCaptureFuncletPrologEpilogInfo()
9639{
9640 if (compiler->ehAnyFunclets())
9641 {
9642 NYI("genCaptureFuncletPrologEpilogInfo()");
9643 }
9644}
9645
9646#endif // _TARGET_*
9647
9648/*-----------------------------------------------------------------------------
9649 *
9650 * Set the main function PSPSym value in the frame.
9651 * Funclets use different code to load the PSP sym and save it in their frame.
9652 * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
9653 * The PSPSym section of that document is copied here.
9654 *
9655 ***********************************
9656 * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
9657 * accesses locals from the main function body.
9658 *
9659 * First, two definitions.
9660 *
9661 * Caller-SP is the value of the stack pointer in a function's caller before the call
9662 * instruction is executed. That is, when function A calls function B, Caller-SP for B
9663 * is the value of the stack pointer immediately before the call instruction in A
9664 * (calling B) was executed. Note that this definition holds for both AMD64, which
9665 * pushes the return value when a call instruction is executed, and for ARM, which
9666 * doesn't. For AMD64, Caller-SP is the address above the call return address.
9667 *
9668 * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
9669 * the frame has been allocated. That is, before any "alloca"-type allocations.
9670 *
9671 * The PSPSym is a pointer-sized local variable in the frame of the main function and
9672 * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
9673 * for the main function. The stack offset of the PSPSym is reported to the VM in the
9674 * GC information header. The value reported in the GC information is the offset of the
9675 * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
9676 * value is reported to the VM, differs between architectures. In particular, note that
9677 * most things in the GC information header are reported as offsets relative to Caller-SP,
9678 * but PSPSym on AMD64 is one (maybe the only) exception.)
9679 *
9680 * The VM uses the PSPSym to find other locals it cares about (such as the generics context
9681 * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
9682 * the frame pointer is the same value in a funclet as it is in the main function body.
9683 *
9684 * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
9685 * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
9686 * only true for first pass funclets (currently just filters) and it is passed as the second
9687 * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
9688 * frame in the exception processing system. For the CLR, it points either to the main function
9689 * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
9690 * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
9691 *
9692 * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
9693 * don't know if the Establisher Frame is from the main function or a funclet, we design the
9694 * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
9695 * offset from the Establisher Frame in each case. (This is also required because we only report
9696 * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
9697 * function and all of its funclets). Then, the funclet uses this known offset to compute the
9698 * PSPSym address and read its value. From this, it can compute the value of the frame pointer
9699 * (which is a constant offset from the PSPSym value) and set the frame register to be the same
9700 * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
9701 * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
9702 * for every nested funclet invocation.
9703 *
9704 * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
9705 * restores all non-volatile registers to their values within the parent frame. This includes
9706 * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
9707 * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
9708 *
9709 * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
9710 * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
9711 * ARM this is the first argument and passed in R0.
9712 *
9713 * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
9714 * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
9715 * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
9716 * is required in all funclets as well as the main function, whereas if the establisher frame was
9717 * correctly reported, the PSPSym could be omitted in some cases.)
9718 ***********************************
9719 */
9720void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
9721{
9722 assert(compiler->compGeneratingProlog);
9723
9724 if (compiler->lvaPSPSym == BAD_VAR_NUM)
9725 {
9726 return;
9727 }
9728
9729 noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
9730
9731#if defined(_TARGET_ARM_)
9732
9733 // We either generate:
9734 // add r1, r11, 8
9735 // str r1, [reg + PSPSymOffset]
9736 // or:
9737 // add r1, sp, 76
9738 // str r1, [reg + PSPSymOffset]
9739 // depending on the smallest encoding
9740
9741 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9742
9743 int callerSPOffs;
9744 regNumber regBase;
9745
9746 if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
9747 {
9748 // use the "add <reg>, sp, imm" form
9749
9750 callerSPOffs = SPtoCallerSPdelta;
9751 regBase = REG_SPBASE;
9752 }
9753 else
9754 {
9755 // use the "add <reg>, r11, imm" form
9756
9757 int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
9758 noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
9759
9760 callerSPOffs = FPtoCallerSPdelta;
9761 regBase = REG_FPBASE;
9762 }
9763
9764 // We will just use the initReg since it is an available register
9765 // and we are probably done using it anyway...
9766 regNumber regTmp = initReg;
9767 *pInitRegZeroed = false;
9768
9769 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
9770 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9771
9772#elif defined(_TARGET_ARM64_)
9773
9774 int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
9775
9776 // We will just use the initReg since it is an available register
9777 // and we are probably done using it anyway...
9778 regNumber regTmp = initReg;
9779 *pInitRegZeroed = false;
9780
9781 getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
9782 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
9783
9784#elif defined(_TARGET_AMD64_)
9785
9786 // The PSP sym value is Initial-SP, not Caller-SP!
9787 // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
9788 // has been established.
9789 //
9790 // We generate:
9791 // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
9792
9793 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
9794
9795#else // _TARGET_*
9796
9797 NYI("Set function PSP sym");
9798
9799#endif // _TARGET_*
9800}
9801
9802#endif // FEATURE_EH_FUNCLETS
9803
9804/*****************************************************************************
9805 *
9806 * Generates code for all the function and funclet prologs and epilogs.
9807 */
9808
9809void CodeGen::genGeneratePrologsAndEpilogs()
9810{
9811#ifdef DEBUG
9812 if (verbose)
9813 {
9814 printf("*************** Before prolog / epilog generation\n");
9815 getEmitter()->emitDispIGlist(false);
9816 }
9817#endif
9818
9819 // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
9820 // This affects our code that determines which untracked locals need to be zero initialized.
9821 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
9822
9823 // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
9824
9825 getEmitter()->emitStartPrologEpilogGeneration();
9826
9827 gcInfo.gcResetForBB();
9828 genFnProlog();
9829
9830 // Generate all the prologs and epilogs.
9831 CLANG_FORMAT_COMMENT_ANCHOR;
9832
9833#if FEATURE_EH_FUNCLETS
9834
9835 // Capture the data we're going to use in the funclet prolog and epilog generation. This is
9836 // information computed during codegen, or during function prolog generation, like
9837 // frame offsets. It must run after main function prolog generation.
9838
9839 genCaptureFuncletPrologEpilogInfo();
9840
9841#endif // FEATURE_EH_FUNCLETS
9842
9843 // Walk the list of prologs and epilogs and generate them.
9844 // We maintain a list of prolog and epilog basic blocks in
9845 // the insGroup structure in the emitter. This list was created
9846 // during code generation by the genReserve*() functions.
9847 //
9848 // TODO: it seems like better design would be to create a list of prologs/epilogs
9849 // in the code generator (not the emitter), and then walk that list. But we already
9850 // have the insGroup list, which serves well, so we don't need the extra allocations
9851 // for a prolog/epilog list in the code generator.
9852
9853 getEmitter()->emitGeneratePrologEpilog();
9854
9855 // Tell the emitter we're done with all prolog and epilog generation.
9856
9857 getEmitter()->emitFinishPrologEpilogGeneration();
9858
9859#ifdef DEBUG
9860 if (verbose)
9861 {
9862 printf("*************** After prolog / epilog generation\n");
9863 getEmitter()->emitDispIGlist(false);
9864 }
9865#endif
9866}
9867
9868/*
9869XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9870XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9871XX XX
9872XX End Prolog / Epilog XX
9873XX XX
9874XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9875XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
9876*/
9877
9878#if STACK_PROBES
9879void CodeGen::genGenerateStackProbe()
9880{
9881 noway_assert(compiler->opts.compNeedStackProbes);
9882
9883 // If this assert fires, it means somebody has changed the value
9884 // CORINFO_STACKPROBE_DEPTH.
9885 // Why does the EE need such a deep probe? It should just need a couple
9886 // of bytes, to set up a frame in the unmanaged code..
9887
9888 static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
9889
9890 JITDUMP("Emitting stack probe:\n");
9891 getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
9892 -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
9893}
9894#endif // STACK_PROBES
9895
9896#if defined(_TARGET_XARCH_)
9897// Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9898// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
9899// Here offset = 16-byte aligned offset after pushing integer registers.
9900//
9901// Params
9902// lclFrameSize - Fixed frame size excluding callee pushed int regs.
9903// non-funclet: this will be compLclFrameSize.
9904// funclet frames: this will be FuncletInfo.fiSpDelta.
9905void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
9906{
9907 genVzeroupperIfNeeded(false);
9908 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9909
9910 // Only callee saved floating point registers should be in regMask
9911 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9912
9913 // fast path return
9914 if (regMask == RBM_NONE)
9915 {
9916 return;
9917 }
9918
9919#ifdef _TARGET_AMD64_
9920 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
9921 unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9922
9923 // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
9924 assert((offset % 16) == 0);
9925 instruction copyIns = ins_Copy(TYP_FLOAT);
9926#else // !_TARGET_AMD64_
9927 unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
9928 instruction copyIns = INS_movupd;
9929#endif // !_TARGET_AMD64_
9930
9931 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9932 {
9933 regMaskTP regBit = genRegMask(reg);
9934 if ((regBit & regMask) != 0)
9935 {
9936 // ABI requires us to preserve lower 128-bits of YMM register.
9937 getEmitter()->emitIns_AR_R(copyIns,
9938 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
9939 // EA_16BYTE
9940 reg, REG_SPBASE, offset);
9941 compiler->unwindSaveReg(reg, offset);
9942 regMask &= ~regBit;
9943 offset -= XMM_REGSIZE_BYTES;
9944 }
9945 }
9946}
9947
9948// Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
9949// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
9950// Here offset = 16-byte aligned offset after pushing integer registers.
9951//
9952// Params
9953// lclFrameSize - Fixed frame size excluding callee pushed int regs.
9954// non-funclet: this will be compLclFrameSize.
9955// funclet frames: this will be FuncletInfo.fiSpDelta.
9956void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
9957{
9958 regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
9959
9960 // Only callee saved floating point registers should be in regMask
9961 assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
9962
9963 // fast path return
9964 if (regMask == RBM_NONE)
9965 {
9966 genVzeroupperIfNeeded();
9967 return;
9968 }
9969
9970#ifdef _TARGET_AMD64_
9971 unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
9972 instruction copyIns = ins_Copy(TYP_FLOAT);
9973#else // !_TARGET_AMD64_
9974 unsigned firstFPRegPadding = 0;
9975 instruction copyIns = INS_movupd;
9976#endif // !_TARGET_AMD64_
9977
9978 unsigned offset;
9979 regNumber regBase;
9980 if (compiler->compLocallocUsed)
9981 {
9982 // localloc frame: use frame pointer relative offset
9983 assert(isFramePointerUsed());
9984 regBase = REG_FPBASE;
9985 offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
9986 }
9987 else
9988 {
9989 regBase = REG_SPBASE;
9990 offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
9991 }
9992
9993#ifdef _TARGET_AMD64_
9994 // Offset is 16-byte aligned since we use movaps for restoring xmm regs
9995 assert((offset % 16) == 0);
9996#endif // _TARGET_AMD64_
9997
9998 for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
9999 {
10000 regMaskTP regBit = genRegMask(reg);
10001 if ((regBit & regMask) != 0)
10002 {
10003 // ABI requires us to restore lower 128-bits of YMM register.
10004 getEmitter()->emitIns_R_AR(copyIns,
10005 EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
10006 // EA_16BYTE
10007 reg, regBase, offset);
10008 regMask &= ~regBit;
10009 offset -= XMM_REGSIZE_BYTES;
10010 }
10011 }
10012 genVzeroupperIfNeeded();
10013}
10014
10015// Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
10016// AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
10017// (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
10018// 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
10019// code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
10020// if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
10021//
10022// Params
10023// check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
10024// instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
10025//
10026void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
10027{
10028 bool emitVzeroUpper = false;
10029 if (check256bitOnly)
10030 {
10031 emitVzeroUpper = getEmitter()->Contains256bitAVX();
10032 }
10033 else
10034 {
10035 emitVzeroUpper = getEmitter()->ContainsAVX();
10036 }
10037
10038 if (emitVzeroUpper)
10039 {
10040 assert(compiler->canUseVexEncoding());
10041 instGen(INS_vzeroupper);
10042 }
10043}
10044
10045#endif // defined(_TARGET_XARCH_)
10046
10047//-----------------------------------------------------------------------------------
10048// IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
10049//
10050// Arguments:
10051// hClass - type handle
10052//
10053// Return Value:
10054// true if type is returned in multiple registers, false otherwise.
10055//
10056bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
10057{
10058 if (hClass == NO_CLASS_HANDLE)
10059 {
10060 return false;
10061 }
10062
10063 structPassingKind howToReturnStruct;
10064 var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
10065
10066 return (varTypeIsStruct(returnType));
10067}
10068
10069//----------------------------------------------
10070// Methods that support HFA's for ARM32/ARM64
10071//----------------------------------------------
10072
10073bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
10074{
10075#ifdef FEATURE_HFA
10076 return varTypeIsFloating(GetHfaType(hClass));
10077#else
10078 return false;
10079#endif
10080}
10081
10082bool Compiler::IsHfa(GenTree* tree)
10083{
10084#ifdef FEATURE_HFA
10085 return IsHfa(gtGetStructHandleIfPresent(tree));
10086#else
10087 return false;
10088#endif
10089}
10090
10091var_types Compiler::GetHfaType(GenTree* tree)
10092{
10093#ifdef FEATURE_HFA
10094 return GetHfaType(gtGetStructHandleIfPresent(tree));
10095#else
10096 return TYP_UNDEF;
10097#endif
10098}
10099
10100unsigned Compiler::GetHfaCount(GenTree* tree)
10101{
10102 return GetHfaCount(gtGetStructHandleIfPresent(tree));
10103}
10104
10105var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
10106{
10107 var_types result = TYP_UNDEF;
10108 if (hClass != NO_CLASS_HANDLE)
10109 {
10110#ifdef FEATURE_HFA
10111 CorInfoType corType = info.compCompHnd->getHFAType(hClass);
10112 if (corType != CORINFO_TYPE_UNDEF)
10113 {
10114 result = JITtype2varType(corType);
10115 }
10116#endif // FEATURE_HFA
10117 }
10118 return result;
10119}
10120
10121//------------------------------------------------------------------------
10122// GetHfaCount: Given a class handle for an HFA struct
10123// return the number of registers needed to hold the HFA
10124//
10125// Note that on ARM32 the single precision registers overlap with
10126// the double precision registers and for that reason each
10127// double register is considered to be two single registers.
10128// Thus for ARM32 an HFA of 4 doubles this function will return 8.
10129// On ARM64 given an HFA of 4 singles or 4 doubles this function will
10130// will return 4 for both.
10131// Arguments:
10132// hClass: the class handle of a HFA struct
10133//
10134unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
10135{
10136 assert(IsHfa(hClass));
10137#ifdef _TARGET_ARM_
10138 // A HFA of doubles is twice as large as an HFA of singles for ARM32
10139 // (i.e. uses twice the number of single precison registers)
10140 return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
10141#else // _TARGET_ARM64_
10142 var_types hfaType = GetHfaType(hClass);
10143 unsigned classSize = info.compCompHnd->getClassSize(hClass);
10144 // Note that the retail build issues a warning about a potential divsion by zero without the Max function
10145 unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
10146 return classSize / elemSize;
10147#endif // _TARGET_ARM64_
10148}
10149
10150#ifdef _TARGET_XARCH_
10151
10152//------------------------------------------------------------------------
10153// genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
10154// map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
10155// X86/x64 has a special encoding for shift/rotate-by-constant-1.
10156//
10157// Arguments:
10158// ins: the base shift/rotate instruction
10159// shiftByValue: the constant value by which we are shifting/rotating
10160//
10161instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
10162{
10163 assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
10164 ins == INS_sar);
10165
10166 // Which format should we use?
10167
10168 instruction shiftByConstantIns;
10169
10170 if (shiftByValue == 1)
10171 {
10172 // Use the shift-by-one format.
10173
10174 assert(INS_rcl + 1 == INS_rcl_1);
10175 assert(INS_rcr + 1 == INS_rcr_1);
10176 assert(INS_rol + 1 == INS_rol_1);
10177 assert(INS_ror + 1 == INS_ror_1);
10178 assert(INS_shl + 1 == INS_shl_1);
10179 assert(INS_shr + 1 == INS_shr_1);
10180 assert(INS_sar + 1 == INS_sar_1);
10181
10182 shiftByConstantIns = (instruction)(ins + 1);
10183 }
10184 else
10185 {
10186 // Use the shift-by-NNN format.
10187
10188 assert(INS_rcl + 2 == INS_rcl_N);
10189 assert(INS_rcr + 2 == INS_rcr_N);
10190 assert(INS_rol + 2 == INS_rol_N);
10191 assert(INS_ror + 2 == INS_ror_N);
10192 assert(INS_shl + 2 == INS_shl_N);
10193 assert(INS_shr + 2 == INS_shr_N);
10194 assert(INS_sar + 2 == INS_sar_N);
10195
10196 shiftByConstantIns = (instruction)(ins + 2);
10197 }
10198
10199 return shiftByConstantIns;
10200}
10201
10202#endif // _TARGET_XARCH_
10203
10204//------------------------------------------------------------------------------------------------ //
10205// getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
10206//
10207// Return value:
10208// The number of the first argument with stack slot on the caller's frame.
10209//
10210// Note:
10211// On x64 Windows the caller always creates slots (homing space) in its frame for the
10212// first 4 arguments of a callee (register passed args). So, the the variable number
10213// (lclNum) for the first argument with a stack slot is always 0.
10214// For System V systems or armarch, there is no such calling convention requirement, and the code
10215// needs to find the first stack passed argument from the caller. This is done by iterating over
10216// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
10217//
10218unsigned CodeGen::getFirstArgWithStackSlot()
10219{
10220#if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARMARCH_)
10221 unsigned baseVarNum = 0;
10222 // Iterate over all the lvParam variables in the Lcl var table until we find the first one
10223 // that's passed on the stack.
10224 LclVarDsc* varDsc = nullptr;
10225 for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
10226 {
10227 varDsc = &(compiler->lvaTable[i]);
10228
10229 // We should have found a stack parameter (and broken out of this loop) before
10230 // we find any non-parameters.
10231 assert(varDsc->lvIsParam);
10232
10233 if (varDsc->lvArgReg == REG_STK)
10234 {
10235 baseVarNum = i;
10236 break;
10237 }
10238 }
10239 assert(varDsc != nullptr);
10240
10241 return baseVarNum;
10242#elif defined(_TARGET_AMD64_)
10243 return 0;
10244#else // _TARGET_X86
10245 // Not implemented for x86.
10246 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
10247 return BAD_VAR_NUM;
10248#endif // _TARGET_X86_
10249}
10250
10251//------------------------------------------------------------------------
10252// genSinglePush: Report a change in stack level caused by a single word-sized push instruction
10253//
10254void CodeGen::genSinglePush()
10255{
10256 AddStackLevel(REGSIZE_BYTES);
10257}
10258
10259//------------------------------------------------------------------------
10260// genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
10261//
10262void CodeGen::genSinglePop()
10263{
10264 SubtractStackLevel(REGSIZE_BYTES);
10265}
10266
10267//------------------------------------------------------------------------
10268// genPushRegs: Push the given registers.
10269//
10270// Arguments:
10271// regs - mask or registers to push
10272// byrefRegs - OUT arg. Set to byref registers that were pushed.
10273// noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
10274//
10275// Return Value:
10276// Mask of registers pushed.
10277//
10278// Notes:
10279// This function does not check if the register is marked as used, etc.
10280//
10281regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
10282{
10283 *byrefRegs = RBM_NONE;
10284 *noRefRegs = RBM_NONE;
10285
10286 if (regs == RBM_NONE)
10287 {
10288 return RBM_NONE;
10289 }
10290
10291#if FEATURE_FIXED_OUT_ARGS
10292
10293 NYI("Don't call genPushRegs with real regs!");
10294 return RBM_NONE;
10295
10296#else // FEATURE_FIXED_OUT_ARGS
10297
10298 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
10299 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
10300
10301 regMaskTP pushedRegs = regs;
10302
10303 for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
10304 {
10305 regMaskTP regBit = regMaskTP(1) << reg;
10306
10307 if ((regBit & regs) == RBM_NONE)
10308 continue;
10309
10310 var_types type;
10311 if (regBit & gcInfo.gcRegGCrefSetCur)
10312 {
10313 type = TYP_REF;
10314 }
10315 else if (regBit & gcInfo.gcRegByrefSetCur)
10316 {
10317 *byrefRegs |= regBit;
10318 type = TYP_BYREF;
10319 }
10320 else if (noRefRegs != NULL)
10321 {
10322 *noRefRegs |= regBit;
10323 type = TYP_I_IMPL;
10324 }
10325 else
10326 {
10327 continue;
10328 }
10329
10330 inst_RV(INS_push, reg, type);
10331
10332 genSinglePush();
10333 gcInfo.gcMarkRegSetNpt(regBit);
10334
10335 regs &= ~regBit;
10336 }
10337
10338 return pushedRegs;
10339
10340#endif // FEATURE_FIXED_OUT_ARGS
10341}
10342
10343//------------------------------------------------------------------------
10344// genPopRegs: Pop the registers that were pushed by genPushRegs().
10345//
10346// Arguments:
10347// regs - mask of registers to pop
10348// byrefRegs - The byref registers that were pushed by genPushRegs().
10349// noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
10350//
10351// Return Value:
10352// None
10353//
10354void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
10355{
10356 if (regs == RBM_NONE)
10357 {
10358 return;
10359 }
10360
10361#if FEATURE_FIXED_OUT_ARGS
10362
10363 NYI("Don't call genPopRegs with real regs!");
10364
10365#else // FEATURE_FIXED_OUT_ARGS
10366
10367 noway_assert((regs & byrefRegs) == byrefRegs);
10368 noway_assert((regs & noRefRegs) == noRefRegs);
10369 noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
10370
10371 noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
10372 noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
10373
10374 // Walk the registers in the reverse order as genPushRegs()
10375 for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
10376 {
10377 regMaskTP regBit = regMaskTP(1) << reg;
10378
10379 if ((regBit & regs) == RBM_NONE)
10380 continue;
10381
10382 var_types type;
10383 if (regBit & byrefRegs)
10384 {
10385 type = TYP_BYREF;
10386 }
10387 else if (regBit & noRefRegs)
10388 {
10389 type = TYP_INT;
10390 }
10391 else
10392 {
10393 type = TYP_REF;
10394 }
10395
10396 inst_RV(INS_pop, reg, type);
10397 genSinglePop();
10398
10399 if (type != TYP_INT)
10400 gcInfo.gcMarkRegPtrVal(reg, type);
10401
10402 regs &= ~regBit;
10403 }
10404
10405#endif // FEATURE_FIXED_OUT_ARGS
10406}
10407
10408/*****************************************************************************
10409 * genSetScopeInfo
10410 *
10411 * This function should be called only after the sizes of the emitter blocks
10412 * have been finalized.
10413 */
10414
10415void CodeGen::genSetScopeInfo()
10416{
10417 if (!compiler->opts.compScopeInfo)
10418 {
10419 return;
10420 }
10421
10422#ifdef DEBUG
10423 if (verbose)
10424 {
10425 printf("*************** In genSetScopeInfo()\n");
10426 }
10427#endif
10428
10429 if (compiler->info.compVarScopesCount == 0)
10430 {
10431 compiler->eeSetLVcount(0);
10432 compiler->eeSetLVdone();
10433 return;
10434 }
10435
10436 noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
10437 noway_assert(psiOpenScopeList.scNext == nullptr);
10438
10439 unsigned i;
10440 unsigned scopeCnt = siScopeCnt + psiScopeCnt;
10441
10442 compiler->eeSetLVcount(scopeCnt);
10443
10444#ifdef DEBUG
10445 genTrnslLocalVarCount = scopeCnt;
10446 if (scopeCnt)
10447 {
10448 genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
10449 }
10450#endif
10451
10452 // Record the scopes found for the parameters over the prolog.
10453 // The prolog needs to be treated differently as a variable may not
10454 // have the same info in the prolog block as is given by compiler->lvaTable.
10455 // eg. A register parameter is actually on the stack, before it is loaded to reg.
10456
10457 CodeGen::psiScope* scopeP;
10458
10459 for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
10460 {
10461 noway_assert(scopeP != nullptr);
10462 noway_assert(scopeP->scStartLoc.Valid());
10463 noway_assert(scopeP->scEndLoc.Valid());
10464
10465 UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
10466 UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
10467
10468 unsigned varNum = scopeP->scSlotNum;
10469 noway_assert(startOffs <= endOffs);
10470
10471 // The range may be 0 if the prolog is empty. For such a case,
10472 // report the liveness of arguments to span at least the first
10473 // instruction in the method. This will be incorrect (except on
10474 // entry to the method) if the very first instruction of the method
10475 // is part of a loop. However, this should happen
10476 // very rarely, and the incorrectness is worth being able to look
10477 // at the argument on entry to the method.
10478 if (startOffs == endOffs)
10479 {
10480 noway_assert(startOffs == 0);
10481 endOffs++;
10482 }
10483
10484 Compiler::siVarLoc varLoc;
10485
10486 if (scopeP->scRegister)
10487 {
10488 varLoc.vlType = Compiler::VLT_REG;
10489 varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
10490 }
10491 else
10492 {
10493 varLoc.vlType = Compiler::VLT_STK;
10494 varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
10495 varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
10496 }
10497
10498 genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
10499 }
10500
10501 // Record the scopes for the rest of the method.
10502 // Check that the LocalVarInfo scopes look OK
10503 noway_assert(siOpenScopeList.scNext == nullptr);
10504
10505 CodeGen::siScope* scopeL;
10506
10507 for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
10508 {
10509 noway_assert(scopeL != nullptr);
10510 noway_assert(scopeL->scStartLoc.Valid());
10511 noway_assert(scopeL->scEndLoc.Valid());
10512
10513 // Find the start and end IP
10514
10515 UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
10516 UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
10517
10518 noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
10519
10520 // For stack vars, find the base register, and offset
10521
10522 regNumber baseReg;
10523 signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
10524
10525 if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
10526 {
10527 baseReg = REG_SPBASE;
10528 offset += scopeL->scStackLevel;
10529 }
10530 else
10531 {
10532 baseReg = REG_FPBASE;
10533 }
10534
10535 // Now fill in the varLoc
10536
10537 Compiler::siVarLoc varLoc;
10538
10539 // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
10540 // for part of its lifetime, or in different registers for different parts of its lifetime.
10541 // This should only matter for non-debug code, where we do variable enregistration.
10542 // We should store the ranges of variable enregistration in the scope table.
10543 if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
10544 {
10545 var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
10546 switch (type)
10547 {
10548 case TYP_INT:
10549 case TYP_REF:
10550 case TYP_BYREF:
10551#ifdef _TARGET_64BIT_
10552 case TYP_LONG:
10553#endif // _TARGET_64BIT_
10554
10555 varLoc.vlType = Compiler::VLT_REG;
10556 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10557 break;
10558
10559#ifndef _TARGET_64BIT_
10560 case TYP_LONG:
10561#if !CPU_HAS_FP_SUPPORT
10562 case TYP_DOUBLE:
10563#endif
10564
10565 if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
10566 {
10567 varLoc.vlType = Compiler::VLT_REG_REG;
10568 varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10569 varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
10570 }
10571 else
10572 {
10573 varLoc.vlType = Compiler::VLT_REG_STK;
10574 varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10575 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
10576 if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
10577 {
10578 varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10579 }
10580 varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
10581 }
10582 break;
10583#endif // !_TARGET_64BIT_
10584
10585#ifdef _TARGET_64BIT_
10586
10587 case TYP_FLOAT:
10588 case TYP_DOUBLE:
10589 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10590 // so no XMM registers can get debug information.
10591 varLoc.vlType = Compiler::VLT_REG_FP;
10592 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10593 break;
10594
10595#else // !_TARGET_64BIT_
10596
10597#if CPU_HAS_FP_SUPPORT
10598 case TYP_FLOAT:
10599 case TYP_DOUBLE:
10600 if (isFloatRegType(type))
10601 {
10602 varLoc.vlType = Compiler::VLT_FPSTK;
10603 varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10604 }
10605 break;
10606#endif // CPU_HAS_FP_SUPPORT
10607
10608#endif // !_TARGET_64BIT_
10609
10610#ifdef FEATURE_SIMD
10611 case TYP_SIMD8:
10612 case TYP_SIMD12:
10613 case TYP_SIMD16:
10614 case TYP_SIMD32:
10615 varLoc.vlType = Compiler::VLT_REG_FP;
10616
10617 // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
10618 // so no XMM registers can get debug information.
10619 //
10620 // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
10621 // in eeDispVar() --> getRegName() that regNumber is valid.
10622 varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
10623 break;
10624#endif // FEATURE_SIMD
10625
10626 default:
10627 noway_assert(!"Invalid type");
10628 }
10629 }
10630 else
10631 {
10632 assert(offset != BAD_STK_OFFS);
10633 LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
10634 switch (genActualType(varDsc->TypeGet()))
10635 {
10636 case TYP_INT:
10637 case TYP_REF:
10638 case TYP_BYREF:
10639 case TYP_FLOAT:
10640 case TYP_STRUCT:
10641 case TYP_BLK: // Needed because of the TYP_BLK stress mode
10642#ifdef FEATURE_SIMD
10643 case TYP_SIMD8:
10644 case TYP_SIMD12:
10645 case TYP_SIMD16:
10646 case TYP_SIMD32:
10647#endif
10648#ifdef _TARGET_64BIT_
10649 case TYP_LONG:
10650 case TYP_DOUBLE:
10651#endif // _TARGET_64BIT_
10652#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
10653 // In the AMD64 ABI we are supposed to pass a struct by reference when its
10654 // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
10655 // the IR to comply with the ABI and therefore changes the type of the lclVar
10656 // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
10657 // this is still a struct by setting the lvIsTemp flag.
10658 // The same is true for ARM64 and structs > 16 bytes.
10659 // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
10660 // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
10661 // to accomodate for this situation.
10662 if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
10663 {
10664 assert(varDsc->lvIsParam);
10665 varLoc.vlType = Compiler::VLT_STK_BYREF;
10666 }
10667 else
10668#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
10669 {
10670 varLoc.vlType = Compiler::VLT_STK;
10671 }
10672 varLoc.vlStk.vlsBaseReg = baseReg;
10673 varLoc.vlStk.vlsOffset = offset;
10674 if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
10675 {
10676 varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10677 }
10678 break;
10679
10680#ifndef _TARGET_64BIT_
10681 case TYP_LONG:
10682 case TYP_DOUBLE:
10683 varLoc.vlType = Compiler::VLT_STK2;
10684 varLoc.vlStk2.vls2BaseReg = baseReg;
10685 varLoc.vlStk2.vls2Offset = offset;
10686 if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
10687 {
10688 varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
10689 }
10690 break;
10691#endif // !_TARGET_64BIT_
10692
10693 default:
10694 noway_assert(!"Invalid type");
10695 }
10696 }
10697
10698 genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
10699 scopeL->scAvailable, varLoc);
10700 }
10701
10702 compiler->eeSetLVdone();
10703}
10704
10705//------------------------------------------------------------------------
10706// genSetScopeInfo: Record scope information for debug info
10707//
10708// Arguments:
10709// which
10710// startOffs - the starting offset for this scope
10711// length - the length of this scope
10712// varNum - the lclVar for this scope info
10713// LVnum
10714// avail
10715// varLoc
10716//
10717// Notes:
10718// Called for every scope info piece to record by the main genSetScopeInfo()
10719
10720void CodeGen::genSetScopeInfo(unsigned which,
10721 UNATIVE_OFFSET startOffs,
10722 UNATIVE_OFFSET length,
10723 unsigned varNum,
10724 unsigned LVnum,
10725 bool avail,
10726 Compiler::siVarLoc& varLoc)
10727{
10728 // We need to do some mapping while reporting back these variables.
10729
10730 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
10731 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
10732
10733#ifdef _TARGET_X86_
10734 // Non-x86 platforms are allowed to access all arguments directly
10735 // so we don't need this code.
10736
10737 // Is this a varargs function?
10738
10739 if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
10740 varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
10741 {
10742 noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
10743
10744 // All stack arguments (except the varargs handle) have to be
10745 // accessed via the varargs cookie. Discard generated info,
10746 // and just find its position relative to the varargs handle
10747
10748 PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
10749 if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
10750 {
10751 noway_assert(!compiler->opts.compDbgCode);
10752 return;
10753 }
10754
10755 // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
10756 // arguments of vararg functions to avoid reporting them to GC.
10757 noway_assert(!compiler->lvaTable[varNum].lvRegister);
10758 unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
10759 unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
10760
10761 noway_assert(cookieOffset < varOffset);
10762 unsigned offset = varOffset - cookieOffset;
10763 unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES;
10764 noway_assert(offset < stkArgSize);
10765 offset = stkArgSize - offset;
10766
10767 varLoc.vlType = Compiler::VLT_FIXED_VA;
10768 varLoc.vlFixedVarArg.vlfvOffset = offset;
10769 }
10770
10771#endif // _TARGET_X86_
10772
10773 VarName name = nullptr;
10774
10775#ifdef DEBUG
10776
10777 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
10778 {
10779 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
10780 {
10781 name = compiler->info.compVarScopes[scopeNum].vsdName;
10782 }
10783 }
10784
10785 // Hang on to this compiler->info.
10786
10787 TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
10788
10789 tlvi.tlviVarNum = ilVarNum;
10790 tlvi.tlviLVnum = LVnum;
10791 tlvi.tlviName = name;
10792 tlvi.tlviStartPC = startOffs;
10793 tlvi.tlviLength = length;
10794 tlvi.tlviAvailable = avail;
10795 tlvi.tlviVarLoc = varLoc;
10796
10797#endif // DEBUG
10798
10799 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
10800}
10801
10802/*****************************************************************************/
10803#ifdef LATE_DISASM
10804#if defined(DEBUG)
10805/*****************************************************************************
10806 * CompilerRegName
10807 *
10808 * Can be called only after lviSetLocalVarInfo() has been called
10809 */
10810
10811/* virtual */
10812const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
10813{
10814 if (!compiler->opts.compScopeInfo)
10815 return nullptr;
10816
10817 if (compiler->info.compVarScopesCount == 0)
10818 return nullptr;
10819
10820 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10821
10822 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10823 {
10824 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
10825 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10826 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10827 {
10828 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10829 }
10830 }
10831
10832 return NULL;
10833}
10834
10835/*****************************************************************************
10836 * CompilerStkName
10837 *
10838 * Can be called only after lviSetLocalVarInfo() has been called
10839 */
10840
10841/* virtual */
10842const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
10843{
10844 if (!compiler->opts.compScopeInfo)
10845 return nullptr;
10846
10847 if (compiler->info.compVarScopesCount == 0)
10848 return nullptr;
10849
10850 noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
10851
10852 for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
10853 {
10854 if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
10855 (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
10856 (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
10857 {
10858 return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
10859 }
10860 }
10861
10862 return NULL;
10863}
10864
10865/*****************************************************************************/
10866#endif // defined(DEBUG)
10867#endif // LATE_DISASM
10868
10869#ifdef DEBUG
10870
10871/*****************************************************************************
10872 * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
10873 */
10874
10875void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
10876{
10877 if (mappingNum != unsigned(-1))
10878 {
10879 printf("%d: ", mappingNum);
10880 }
10881
10882 IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
10883
10884 if (offsx == BAD_IL_OFFSET)
10885 {
10886 printf("???");
10887 }
10888 else
10889 {
10890 Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
10891
10892 if (jitIsStackEmpty(offsx))
10893 {
10894 printf(" STACK_EMPTY");
10895 }
10896
10897 if (jitIsCallInstruction(offsx))
10898 {
10899 printf(" CALL_INSTRUCTION");
10900 }
10901 }
10902
10903 printf(" ");
10904 ipMapping->ipmdNativeLoc.Print();
10905 // We can only call this after code generation. Is there any way to tell when it's legal to call?
10906 // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
10907
10908 if (ipMapping->ipmdIsLabel)
10909 {
10910 printf(" label");
10911 }
10912
10913 printf("\n");
10914}
10915
10916void CodeGen::genIPmappingListDisp()
10917{
10918 unsigned mappingNum = 0;
10919 Compiler::IPmappingDsc* ipMapping;
10920
10921 for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
10922 {
10923 genIPmappingDisp(mappingNum, ipMapping);
10924 ++mappingNum;
10925 }
10926}
10927
10928#endif // DEBUG
10929
10930/*****************************************************************************
10931 *
10932 * Append an IPmappingDsc struct to the list that we're maintaining
10933 * for the debugger.
10934 * Record the instr offset as being at the current code gen position.
10935 */
10936
10937void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
10938{
10939 if (!compiler->opts.compDbgInfo)
10940 {
10941 return;
10942 }
10943
10944 assert(offsx != BAD_IL_OFFSET);
10945
10946 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
10947 {
10948 case ICorDebugInfo::PROLOG:
10949 case ICorDebugInfo::EPILOG:
10950 break;
10951
10952 default:
10953
10954 if (offsx != ICorDebugInfo::NO_MAPPING)
10955 {
10956 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
10957 }
10958
10959 // Ignore this one if it's the same IL offset as the last one we saw.
10960 // Note that we'll let through two identical IL offsets if the flag bits
10961 // differ, or two identical "special" mappings (e.g., PROLOG).
10962 if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
10963 {
10964 JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
10965 return;
10966 }
10967 break;
10968 }
10969
10970 /* Create a mapping entry and append it to the list */
10971
10972 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
10973 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
10974 addMapping->ipmdILoffsx = offsx;
10975 addMapping->ipmdIsLabel = isLabel;
10976 addMapping->ipmdNext = nullptr;
10977
10978 if (compiler->genIPmappingList != nullptr)
10979 {
10980 assert(compiler->genIPmappingLast != nullptr);
10981 assert(compiler->genIPmappingLast->ipmdNext == nullptr);
10982 compiler->genIPmappingLast->ipmdNext = addMapping;
10983 }
10984 else
10985 {
10986 assert(compiler->genIPmappingLast == nullptr);
10987 compiler->genIPmappingList = addMapping;
10988 }
10989
10990 compiler->genIPmappingLast = addMapping;
10991
10992#ifdef DEBUG
10993 if (verbose)
10994 {
10995 printf("Added IP mapping: ");
10996 genIPmappingDisp(unsigned(-1), addMapping);
10997 }
10998#endif // DEBUG
10999}
11000
11001/*****************************************************************************
11002 *
11003 * Prepend an IPmappingDsc struct to the list that we're maintaining
11004 * for the debugger.
11005 * Record the instr offset as being at the current code gen position.
11006 */
11007void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
11008{
11009 if (!compiler->opts.compDbgInfo)
11010 {
11011 return;
11012 }
11013
11014 assert(offsx != BAD_IL_OFFSET);
11015 assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
11016
11017 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11018 {
11019 case ICorDebugInfo::NO_MAPPING:
11020 case ICorDebugInfo::PROLOG:
11021 case ICorDebugInfo::EPILOG:
11022 break;
11023
11024 default:
11025 noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
11026 break;
11027 }
11028
11029 /* Create a mapping entry and prepend it to the list */
11030
11031 Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1);
11032 addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
11033 addMapping->ipmdILoffsx = offsx;
11034 addMapping->ipmdIsLabel = true;
11035 addMapping->ipmdNext = nullptr;
11036
11037 addMapping->ipmdNext = compiler->genIPmappingList;
11038 compiler->genIPmappingList = addMapping;
11039
11040 if (compiler->genIPmappingLast == nullptr)
11041 {
11042 compiler->genIPmappingLast = addMapping;
11043 }
11044
11045#ifdef DEBUG
11046 if (verbose)
11047 {
11048 printf("Added IP mapping to front: ");
11049 genIPmappingDisp(unsigned(-1), addMapping);
11050 }
11051#endif // DEBUG
11052}
11053
11054/*****************************************************************************/
11055
11056C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
11057C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
11058C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
11059
11060C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
11061C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
11062C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
11063C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
11064
11065//------------------------------------------------------------------------
11066// jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
11067// Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
11068// is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
11069//
11070// Arguments:
11071// offsx - the IL_OFFSETX value with the IL offset to extract.
11072//
11073// Return Value:
11074// The IL offset.
11075
11076IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
11077{
11078 assert(offsx != BAD_IL_OFFSET);
11079
11080 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11081 {
11082 case ICorDebugInfo::NO_MAPPING:
11083 case ICorDebugInfo::PROLOG:
11084 case ICorDebugInfo::EPILOG:
11085 unreached();
11086
11087 default:
11088 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11089 }
11090}
11091
11092//------------------------------------------------------------------------
11093// jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
11094// distinguished values. Asserts if passed BAD_IL_OFFSET.
11095//
11096// Arguments:
11097// offsx - the IL_OFFSETX value with the IL offset to extract.
11098//
11099// Return Value:
11100// The IL offset.
11101
11102IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
11103{
11104 assert(offsx != BAD_IL_OFFSET);
11105
11106 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11107 {
11108 case ICorDebugInfo::NO_MAPPING:
11109 case ICorDebugInfo::PROLOG:
11110 case ICorDebugInfo::EPILOG:
11111 return IL_OFFSET(offsx);
11112
11113 default:
11114 return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
11115 }
11116}
11117
11118//------------------------------------------------------------------------
11119// jitIsStackEmpty: Does the IL offset have the stack empty bit set?
11120// Asserts if passed BAD_IL_OFFSET.
11121//
11122// Arguments:
11123// offsx - the IL_OFFSETX value to check
11124//
11125// Return Value:
11126// 'true' if the stack empty bit is set; 'false' otherwise.
11127
11128bool jitIsStackEmpty(IL_OFFSETX offsx)
11129{
11130 assert(offsx != BAD_IL_OFFSET);
11131
11132 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11133 {
11134 case ICorDebugInfo::NO_MAPPING:
11135 case ICorDebugInfo::PROLOG:
11136 case ICorDebugInfo::EPILOG:
11137 return true;
11138
11139 default:
11140 return (offsx & IL_OFFSETX_STKBIT) == 0;
11141 }
11142}
11143
11144//------------------------------------------------------------------------
11145// jitIsCallInstruction: Does the IL offset have the call instruction bit set?
11146// Asserts if passed BAD_IL_OFFSET.
11147//
11148// Arguments:
11149// offsx - the IL_OFFSETX value to check
11150//
11151// Return Value:
11152// 'true' if the call instruction bit is set; 'false' otherwise.
11153
11154bool jitIsCallInstruction(IL_OFFSETX offsx)
11155{
11156 assert(offsx != BAD_IL_OFFSET);
11157
11158 switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
11159 {
11160 case ICorDebugInfo::NO_MAPPING:
11161 case ICorDebugInfo::PROLOG:
11162 case ICorDebugInfo::EPILOG:
11163 return false;
11164
11165 default:
11166 return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
11167 }
11168}
11169
11170/*****************************************************************************/
11171
11172void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
11173{
11174 if (!compiler->opts.compDbgCode)
11175 {
11176 return;
11177 }
11178
11179 if (offsx == BAD_IL_OFFSET)
11180 {
11181 return;
11182 }
11183
11184 /* If other IL were offsets reported, skip */
11185
11186 if (compiler->genIPmappingLast == nullptr)
11187 {
11188 return;
11189 }
11190
11191 if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
11192 {
11193 return;
11194 }
11195
11196 /* offsx was the last reported offset. Make sure that we generated native code */
11197
11198 if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
11199 {
11200 instGen(INS_nop);
11201 }
11202}
11203
11204/*****************************************************************************
11205 *
11206 * Shut down the IP-mapping logic, report the info to the EE.
11207 */
11208
11209void CodeGen::genIPmappingGen()
11210{
11211 if (!compiler->opts.compDbgInfo)
11212 {
11213 return;
11214 }
11215
11216#ifdef DEBUG
11217 if (verbose)
11218 {
11219 printf("*************** In genIPmappingGen()\n");
11220 }
11221#endif
11222
11223 if (compiler->genIPmappingList == nullptr)
11224 {
11225 compiler->eeSetLIcount(0);
11226 compiler->eeSetLIdone();
11227 return;
11228 }
11229
11230 Compiler::IPmappingDsc* tmpMapping;
11231 Compiler::IPmappingDsc* prevMapping;
11232 unsigned mappingCnt;
11233 UNATIVE_OFFSET lastNativeOfs;
11234
11235 /* First count the number of distinct mapping records */
11236
11237 mappingCnt = 0;
11238 lastNativeOfs = UNATIVE_OFFSET(~0);
11239
11240 for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
11241 tmpMapping = tmpMapping->ipmdNext)
11242 {
11243 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11244
11245 // Managed RetVal - since new sequence points are emitted to identify IL calls,
11246 // make sure that those are not filtered and do not interfere with filtering of
11247 // other sequence points.
11248 if (jitIsCallInstruction(srcIP))
11249 {
11250 mappingCnt++;
11251 continue;
11252 }
11253
11254 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11255
11256 if (nextNativeOfs != lastNativeOfs)
11257 {
11258 mappingCnt++;
11259 lastNativeOfs = nextNativeOfs;
11260 prevMapping = tmpMapping;
11261 continue;
11262 }
11263
11264 /* If there are mappings with the same native offset, then:
11265 o If one of them is NO_MAPPING, ignore it
11266 o If one of them is a label, report that and ignore the other one
11267 o Else report the higher IL offset
11268 */
11269
11270 PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
11271 if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11272 {
11273 // If the previous entry was NO_MAPPING, ignore it
11274 prevMapping->ipmdNativeLoc.Init();
11275 prevMapping = tmpMapping;
11276 }
11277 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
11278 {
11279 // If the current entry is NO_MAPPING, ignore it
11280 // Leave prevMapping unchanged as tmpMapping is no longer valid
11281 tmpMapping->ipmdNativeLoc.Init();
11282 }
11283 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11284 {
11285 // counting for special cases: see below
11286 mappingCnt++;
11287 prevMapping = tmpMapping;
11288 }
11289 else
11290 {
11291 noway_assert(prevMapping != nullptr);
11292 noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
11293 lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
11294
11295 /* The previous block had the same native offset. We have to
11296 discard one of the mappings. Simply reinitialize ipmdNativeLoc
11297 and prevMapping will be ignored later. */
11298
11299 if (prevMapping->ipmdIsLabel)
11300 {
11301 // Leave prevMapping unchanged as tmpMapping is no longer valid
11302 tmpMapping->ipmdNativeLoc.Init();
11303 }
11304 else
11305 {
11306 prevMapping->ipmdNativeLoc.Init();
11307 prevMapping = tmpMapping;
11308 }
11309 }
11310 }
11311
11312 /* Tell them how many mapping records we've got */
11313
11314 compiler->eeSetLIcount(mappingCnt);
11315
11316 /* Now tell them about the mappings */
11317
11318 mappingCnt = 0;
11319 lastNativeOfs = UNATIVE_OFFSET(~0);
11320
11321 for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
11322 {
11323 // Do we have to skip this record ?
11324 if (!tmpMapping->ipmdNativeLoc.Valid())
11325 {
11326 continue;
11327 }
11328
11329 UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
11330 IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
11331
11332 if (jitIsCallInstruction(srcIP))
11333 {
11334 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
11335 }
11336 else if (nextNativeOfs != lastNativeOfs)
11337 {
11338 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11339 lastNativeOfs = nextNativeOfs;
11340 }
11341 else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
11342 {
11343 // For the special case of an IL instruction with no body
11344 // followed by the epilog (say ret void immediately preceding
11345 // the method end), we put two entries in, so that we'll stop
11346 // at the (empty) ret statement if the user tries to put a
11347 // breakpoint there, and then have the option of seeing the
11348 // epilog or not based on SetUnmappedStopMask for the stepper.
11349 compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
11350 }
11351 }
11352
11353#if 0
11354 // TODO-Review:
11355 //This check is disabled. It is always true that any time this check asserts, the debugger would have a
11356 //problem with IL source level debugging. However, for a C# file, it only matters if things are on
11357 //different source lines. As a result, we have all sorts of latent problems with how we emit debug
11358 //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
11359 //assert back on.
11360 if (compiler->opts.compDbgCode)
11361 {
11362 //Assert that the first instruction of every basic block with more than one incoming edge has a
11363 //different sequence point from each incoming block.
11364 //
11365 //It turns out that the only thing we really have to assert is that the first statement in each basic
11366 //block has an IL offset and appears in eeBoundaries.
11367 for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
11368 {
11369 if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
11370 {
11371 noway_assert(block->bbTreeList->gtOper == GT_STMT);
11372 bool found = false;
11373 if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
11374 {
11375 IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
11376 for (unsigned i = 0; i < eeBoundariesCount; ++i)
11377 {
11378 if (eeBoundaries[i].ilOffset == ilOffs)
11379 {
11380 found = true;
11381 break;
11382 }
11383 }
11384 }
11385 noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
11386 }
11387 }
11388 }
11389#endif // 0
11390
11391 compiler->eeSetLIdone();
11392}
11393
11394/*============================================================================
11395 *
11396 * These are empty stubs to help the late dis-assembler to compile
11397 * if the late disassembler is being built into a non-DEBUG build.
11398 *
11399 *============================================================================
11400 */
11401
11402#if defined(LATE_DISASM)
11403#if !defined(DEBUG)
11404
11405/* virtual */
11406const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
11407{
11408 return NULL;
11409}
11410
11411/* virtual */
11412const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
11413{
11414 return NULL;
11415}
11416
11417/*****************************************************************************/
11418#endif // !defined(DEBUG)
11419#endif // defined(LATE_DISASM)
11420/*****************************************************************************/
11421
11422//------------------------------------------------------------------------
11423// indirForm: Make a temporary indir we can feed to pattern matching routines
11424// in cases where we don't want to instantiate all the indirs that happen.
11425//
11426GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
11427{
11428 GenTreeIndir i(GT_IND, type, base, nullptr);
11429 i.gtRegNum = REG_NA;
11430 i.SetContained();
11431 return i;
11432}
11433
11434//------------------------------------------------------------------------
11435// intForm: Make a temporary int we can feed to pattern matching routines
11436// in cases where we don't want to instantiate.
11437//
11438GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
11439{
11440 GenTreeIntCon i(type, value);
11441 i.gtRegNum = REG_NA;
11442 return i;
11443}
11444
11445#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11446//------------------------------------------------------------------------
11447// genLongReturn: Generates code for long return statement for x86 and arm.
11448//
11449// Note: treeNode's and op1's registers are already consumed.
11450//
11451// Arguments:
11452// treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type.
11453//
11454// Return Value:
11455// None
11456//
11457void CodeGen::genLongReturn(GenTree* treeNode)
11458{
11459 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11460 assert(treeNode->TypeGet() == TYP_LONG);
11461 GenTree* op1 = treeNode->gtGetOp1();
11462 var_types targetType = treeNode->TypeGet();
11463
11464 assert(op1 != nullptr);
11465 assert(op1->OperGet() == GT_LONG);
11466 GenTree* loRetVal = op1->gtGetOp1();
11467 GenTree* hiRetVal = op1->gtGetOp2();
11468 assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
11469
11470 genConsumeReg(loRetVal);
11471 genConsumeReg(hiRetVal);
11472 if (loRetVal->gtRegNum != REG_LNGRET_LO)
11473 {
11474 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
11475 }
11476 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
11477 {
11478 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
11479 }
11480}
11481#endif // _TARGET_X86_ || _TARGET_ARM_
11482
11483//------------------------------------------------------------------------
11484// genReturn: Generates code for return statement.
11485// In case of struct return, delegates to the genStructReturn method.
11486//
11487// Arguments:
11488// treeNode - The GT_RETURN or GT_RETFILT tree node.
11489//
11490// Return Value:
11491// None
11492//
11493void CodeGen::genReturn(GenTree* treeNode)
11494{
11495 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
11496 GenTree* op1 = treeNode->gtGetOp1();
11497 var_types targetType = treeNode->TypeGet();
11498
11499 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
11500 // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
11501 // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
11502 assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
11503
11504#ifdef DEBUG
11505 if (targetType == TYP_VOID)
11506 {
11507 assert(op1 == nullptr);
11508 }
11509#endif // DEBUG
11510
11511#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
11512 if (targetType == TYP_LONG)
11513 {
11514 genLongReturn(treeNode);
11515 }
11516 else
11517#endif // _TARGET_X86_ || _TARGET_ARM_
11518 {
11519 if (isStructReturn(treeNode))
11520 {
11521 genStructReturn(treeNode);
11522 }
11523 else if (targetType != TYP_VOID)
11524 {
11525 assert(op1 != nullptr);
11526 noway_assert(op1->gtRegNum != REG_NA);
11527
11528 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
11529 // consumed a reg for the operand. This is because the variable
11530 // is dead after return. But we are issuing more instructions
11531 // like "profiler leave callback" after this consumption. So
11532 // if you are issuing more instructions after this point,
11533 // remember to keep the variable live up until the new method
11534 // exit point where it is actually dead.
11535 genConsumeReg(op1);
11536
11537#if defined(_TARGET_ARM64_)
11538 genSimpleReturn(treeNode);
11539#else // !_TARGET_ARM64_
11540#if defined(_TARGET_X86_)
11541 if (varTypeIsFloating(treeNode))
11542 {
11543 genFloatReturn(treeNode);
11544 }
11545 else
11546#elif defined(_TARGET_ARM_)
11547 if (varTypeIsFloating(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs))
11548 {
11549 if (targetType == TYP_FLOAT)
11550 {
11551 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
11552 }
11553 else
11554 {
11555 assert(targetType == TYP_DOUBLE);
11556 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET),
11557 op1->gtRegNum);
11558 }
11559 }
11560 else
11561#endif // _TARGET_ARM_
11562 {
11563 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
11564 if (op1->gtRegNum != retReg)
11565 {
11566 inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
11567 }
11568 }
11569#endif // !_TARGET_ARM64_
11570 }
11571 }
11572
11573#ifdef PROFILING_SUPPORTED
11574 // !! Note !!
11575 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
11576 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
11577 // in the handling of the GT_RETURN statement.
11578 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
11579 // for the return registers containing GC refs.
11580
11581 // There will be a single return block while generating profiler ELT callbacks.
11582 //
11583 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
11584 // In flowgraph and other places assert that the last node of a block marked as
11585 // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
11586 // maintain such an invariant irrespective of whether profiler hook needed or not.
11587 // Also, there is not much to be gained by materializing it as an explicit node.
11588 if (compiler->compCurBB == compiler->genReturnBB)
11589 {
11590 // !! NOTE !!
11591 // Since we are invalidating the assumption that we would slip into the epilog
11592 // right after the "return", we need to preserve the return reg's GC state
11593 // across the call until actual method return.
11594 ReturnTypeDesc retTypeDesc;
11595 unsigned regCount = 0;
11596 if (compiler->compMethodReturnsMultiRegRetType())
11597 {
11598 if (varTypeIsLong(compiler->info.compRetNativeType))
11599 {
11600 retTypeDesc.InitializeLongReturnType(compiler);
11601 }
11602 else // we must have a struct return type
11603 {
11604 retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
11605 }
11606 regCount = retTypeDesc.GetReturnRegCount();
11607 }
11608
11609 if (varTypeIsGC(compiler->info.compRetType))
11610 {
11611 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
11612 }
11613 else if (compiler->compMethodReturnsMultiRegRetType())
11614 {
11615 for (unsigned i = 0; i < regCount; ++i)
11616 {
11617 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11618 {
11619 gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
11620 }
11621 }
11622 }
11623
11624 genProfilingLeaveCallback();
11625
11626 if (varTypeIsGC(compiler->info.compRetType))
11627 {
11628 gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
11629 }
11630 else if (compiler->compMethodReturnsMultiRegRetType())
11631 {
11632 for (unsigned i = 0; i < regCount; ++i)
11633 {
11634 if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
11635 {
11636 gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
11637 }
11638 }
11639 }
11640 }
11641#endif // PROFILING_SUPPORTED
11642
11643#if defined(DEBUG) && defined(_TARGET_XARCH_)
11644 bool doStackPointerCheck = compiler->opts.compStackCheckOnRet;
11645
11646#if FEATURE_EH_FUNCLETS
11647 // Don't do stack pointer check at the return from a funclet; only for the main function.
11648 if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
11649 {
11650 doStackPointerCheck = false;
11651 }
11652#else // !FEATURE_EH_FUNCLETS
11653 // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked
11654 // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet().
11655 if ((compiler->compCurBB->bbJumpKind == BBJ_EHFINALLYRET) || (compiler->compCurBB->bbJumpKind == BBJ_EHFILTERRET))
11656 {
11657 doStackPointerCheck = false;
11658 }
11659#endif // !FEATURE_EH_FUNCLETS
11660
11661 genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck);
11662#endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11663}
11664
11665#if defined(DEBUG) && defined(_TARGET_XARCH_)
11666
11667//------------------------------------------------------------------------
11668// genStackPointerCheck: Generate code to check the stack pointer against a saved value.
11669// This is a debug check.
11670//
11671// Arguments:
11672// doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing.
11673// lvaStackPointerVar - The local variable number that holds the value of the stack pointer
11674// we are comparing against.
11675//
11676// Return Value:
11677// None
11678//
11679void CodeGen::genStackPointerCheck(bool doStackPointerCheck, unsigned lvaStackPointerVar)
11680{
11681 if (doStackPointerCheck)
11682 {
11683 noway_assert(lvaStackPointerVar != 0xCCCCCCCC && compiler->lvaTable[lvaStackPointerVar].lvDoNotEnregister &&
11684 compiler->lvaTable[lvaStackPointerVar].lvOnFrame);
11685 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, 0);
11686
11687 BasicBlock* sp_check = genCreateTempLabel();
11688 getEmitter()->emitIns_J(INS_je, sp_check);
11689 instGen(INS_BREAKPOINT);
11690 genDefineTempLabel(sp_check);
11691 }
11692}
11693
11694#endif // defined(DEBUG) && defined(_TARGET_XARCH_)
11695