1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8
9 Linear Scan Register Allocation
10
11 a.k.a. LSRA
12
13 Preconditions
14 - All register requirements are expressed in the code stream, either as destination
15 registers of tree nodes, or as internal registers. These requirements are
16 expressed in the RefPositions built for each node by BuildNode(), which includes:
17 - The register uses and definitions.
18 - The register restrictions (candidates) of the target register, both from itself,
19 as producer of the value (dstCandidates), and from its consuming node (srcCandidates).
20 Note that when we talk about srcCandidates we are referring to the destination register
21 (not any of its sources).
22 - The number (internalCount) of registers required, and their register restrictions (internalCandidates).
23 These are neither inputs nor outputs of the node, but used in the sequence of code generated for the tree.
24 "Internal registers" are registers used during the code sequence generated for the node.
25 The register lifetimes must obey the following lifetime model:
26 - First, any internal registers are defined.
27 - Next, any source registers are used (and are then freed if they are last use and are not identified as
28 "delayRegFree").
29 - Next, the internal registers are used (and are then freed).
30 - Next, any registers in the kill set for the instruction are killed.
31 - Next, the destination register(s) are defined (multiple destination registers are only supported on ARM)
32 - Finally, any "delayRegFree" source registers are freed.
33 There are several things to note about this order:
34 - The internal registers will never overlap any use, but they may overlap a destination register.
35 - Internal registers are never live beyond the node.
36 - The "delayRegFree" annotation is used for instructions that are only available in a Read-Modify-Write form.
37 That is, the destination register is one of the sources. In this case, we must not use the same register for
38 the non-RMW operand as for the destination.
39
40 Overview (doLinearScan):
41 - Walk all blocks, building intervals and RefPositions (buildIntervals)
42 - Allocate registers (allocateRegisters)
43 - Annotate nodes with register assignments (resolveRegisters)
44 - Add move nodes as needed to resolve conflicting register
45 assignments across non-adjacent edges. (resolveEdges, called from resolveRegisters)
46
47 Postconditions:
48
49 Tree nodes (GenTree):
50 - GenTree::gtRegNum (and gtRegPair for ARM) is annotated with the register
51 assignment for a node. If the node does not require a register, it is
52 annotated as such (gtRegNum = REG_NA). For a variable definition or interior
53 tree node (an "implicit" definition), this is the register to put the result.
54 For an expression use, this is the place to find the value that has previously
55 been computed.
56 - In most cases, this register must satisfy the constraints specified for the RefPosition.
57 - In some cases, this is difficult:
58 - If a lclVar node currently lives in some register, it may not be desirable to move it
59 (i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
60 but needs to be in a specific arg register for a call).
61 - In other cases there may be conflicts on the restrictions placed by the defining node and the node which
62 consumes it
63 - If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call),
64 then LSRA is free to annotate the node with a different register. The code generator must issue the appropriate
65 move.
66 - However, if such a node is constrained to a set of registers, and its current location does not satisfy that
67 requirement, LSRA must insert a GT_COPY node between the node and its parent. The gtRegNum on the GT_COPY node
68 must satisfy the register requirement of the parent.
69 - GenTree::gtRsvdRegs has a set of registers used for internal temps.
70 - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been
71 evaluated.
72 - LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator.
73 In the new backend perhaps this should change (see also the note below under CodeGen).
74 - A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use.
75 - The register (gtRegNum) on the node indicates the register to which it must be reloaded.
76 - For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node
77 with the register to which the variable must be reloaded.
78 - For other nodes, since they represent both the def and use, if the value must be reloaded to a different
79 register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
80
81 Local variable table (LclVarDsc):
82 - LclVarDsc::lvRegister is set to true if a local variable has the
83 same register assignment for its entire lifetime.
84 - LclVarDsc::lvRegNum / lvOtherReg: these are initialized to their
85 first value at the end of LSRA (it looks like lvOtherReg isn't?
86 This is probably a bug (ARM)). Codegen will set them to their current value
87 as it processes the trees, since a variable can (now) be assigned different
88 registers over its lifetimes.
89
90XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
91XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
92*/
93
94#include "jitpch.h"
95#ifdef _MSC_VER
96#pragma hdrstop
97#endif
98
99#include "lsra.h"
100
101#ifdef DEBUG
102const char* LinearScan::resolveTypeName[] = {"Split", "Join", "Critical", "SharedCritical"};
103#endif // DEBUG
104
105/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
106XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
107XX XX
108XX Small Helper functions XX
109XX XX
110XX XX
111XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
112XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
113*/
114
115//--------------------------------------------------------------
116// lsraAssignRegToTree: Assign the given reg to tree node.
117//
118// Arguments:
119// tree - Gentree node
120// reg - register to be assigned
121// regIdx - register idx, if tree is a multi-reg call node.
122// regIdx will be zero for single-reg result producing tree nodes.
123//
124// Return Value:
125// None
126//
127void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx)
128{
129 if (regIdx == 0)
130 {
131 tree->gtRegNum = reg;
132 }
133#if !defined(_TARGET_64BIT_)
134 else if (tree->OperIsMultiRegOp())
135 {
136 assert(regIdx == 1);
137 GenTreeMultiRegOp* mul = tree->AsMultiRegOp();
138 mul->gtOtherReg = reg;
139 }
140#endif // _TARGET_64BIT_
141#if FEATURE_MULTIREG_RET
142 else if (tree->OperGet() == GT_COPY)
143 {
144 assert(regIdx == 1);
145 GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
146 copy->gtOtherRegs[0] = (regNumberSmall)reg;
147 }
148#endif // FEATURE_MULTIREG_RET
149#if FEATURE_ARG_SPLIT
150 else if (tree->OperIsPutArgSplit())
151 {
152 GenTreePutArgSplit* putArg = tree->AsPutArgSplit();
153 putArg->SetRegNumByIdx(reg, regIdx);
154 }
155#endif // FEATURE_ARG_SPLIT
156 else
157 {
158 assert(tree->IsMultiRegCall());
159 GenTreeCall* call = tree->AsCall();
160 call->SetRegNumByIdx(reg, regIdx);
161 }
162}
163
164//-------------------------------------------------------------
165// getWeight: Returns the weight of the RefPosition.
166//
167// Arguments:
168// refPos - ref position
169//
170// Returns:
171// Weight of ref position.
172unsigned LinearScan::getWeight(RefPosition* refPos)
173{
174 unsigned weight;
175 GenTree* treeNode = refPos->treeNode;
176
177 if (treeNode != nullptr)
178 {
179 if (isCandidateLocalRef(treeNode))
180 {
181 // Tracked locals: use weighted ref cnt as the weight of the
182 // ref position.
183 GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon();
184 LclVarDsc* varDsc = &(compiler->lvaTable[lclCommon->gtLclNum]);
185 weight = varDsc->lvRefCntWtd();
186 if (refPos->getInterval()->isSpilled)
187 {
188 // Decrease the weight if the interval has already been spilled.
189 weight -= BB_UNITY_WEIGHT;
190 }
191 }
192 else
193 {
194 // Non-candidate local ref or non-lcl tree node.
195 // These are considered to have two references in the basic block:
196 // a def and a use and hence weighted ref count would be 2 times
197 // the basic block weight in which they appear.
198 // However, it is generally more harmful to spill tree temps, so we
199 // double that.
200 const unsigned TREE_TEMP_REF_COUNT = 2;
201 const unsigned TREE_TEMP_BOOST_FACTOR = 2;
202 weight = TREE_TEMP_REF_COUNT * TREE_TEMP_BOOST_FACTOR * blockInfo[refPos->bbNum].weight;
203 }
204 }
205 else
206 {
207 // Non-tree node ref positions. These will have a single
208 // reference in the basic block and hence their weighted
209 // refcount is equal to the block weight in which they
210 // appear.
211 weight = blockInfo[refPos->bbNum].weight;
212 }
213
214 return weight;
215}
216
217// allRegs represents a set of registers that can
218// be used to allocate the specified type in any point
219// in time (more of a 'bank' of registers).
220regMaskTP LinearScan::allRegs(RegisterType rt)
221{
222 if (rt == TYP_FLOAT)
223 {
224 return availableFloatRegs;
225 }
226 else if (rt == TYP_DOUBLE)
227 {
228 return availableDoubleRegs;
229 }
230#ifdef FEATURE_SIMD
231 // TODO-Cleanup: Add an RBM_ALLSIMD
232 else if (varTypeIsSIMD(rt))
233 {
234 return availableDoubleRegs;
235 }
236#endif // FEATURE_SIMD
237 else
238 {
239 return availableIntRegs;
240 }
241}
242
243regMaskTP LinearScan::allByteRegs()
244{
245#ifdef _TARGET_X86_
246 return availableIntRegs & RBM_BYTE_REGS;
247#else
248 return availableIntRegs;
249#endif
250}
251
252regMaskTP LinearScan::allSIMDRegs()
253{
254 return availableFloatRegs;
255}
256
257//------------------------------------------------------------------------
258// internalFloatRegCandidates: Return the set of registers that are appropriate
259// for use as internal float registers.
260//
261// Return Value:
262// The set of registers (as a regMaskTP).
263//
264// Notes:
265// compFloatingPointUsed is only required to be set if it is possible that we
266// will use floating point callee-save registers.
267// It is unlikely, if an internal register is the only use of floating point,
268// that it will select a callee-save register. But to be safe, we restrict
269// the set of candidates if compFloatingPointUsed is not already set.
270
271regMaskTP LinearScan::internalFloatRegCandidates()
272{
273 if (compiler->compFloatingPointUsed)
274 {
275 return allRegs(TYP_FLOAT);
276 }
277 else
278 {
279 return RBM_FLT_CALLEE_TRASH;
280 }
281}
282
283/*****************************************************************************
284 * Inline functions for RegRecord
285 *****************************************************************************/
286
287bool RegRecord::isFree()
288{
289 return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill);
290}
291
292/*****************************************************************************
293 * Inline functions for LinearScan
294 *****************************************************************************/
295RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
296{
297 assert((unsigned)regNum < ArrLen(physRegs));
298 return &physRegs[regNum];
299}
300
301#ifdef DEBUG
302
303//----------------------------------------------------------------------------
304// getConstrainedRegMask: Returns new regMask which is the intersection of
305// regMaskActual and regMaskConstraint if the new regMask has at least
306// minRegCount registers, otherwise returns regMaskActual.
307//
308// Arguments:
309// regMaskActual - regMask that needs to be constrained
310// regMaskConstraint - regMask constraint that needs to be
311// applied to regMaskActual
312// minRegCount - Minimum number of regs that should be
313// be present in new regMask.
314//
315// Return Value:
316// New regMask that has minRegCount registers after instersection.
317// Otherwise returns regMaskActual.
318regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount)
319{
320 regMaskTP newMask = regMaskActual & regMaskConstraint;
321 if (genCountBits(newMask) >= minRegCount)
322 {
323 return newMask;
324 }
325
326 return regMaskActual;
327}
328
329//------------------------------------------------------------------------
330// stressLimitRegs: Given a set of registers, expressed as a register mask, reduce
331// them based on the current stress options.
332//
333// Arguments:
334// mask - The current mask of register candidates for a node
335//
336// Return Value:
337// A possibly-modified mask, based on the value of COMPlus_JitStressRegs.
338//
339// Notes:
340// This is the method used to implement the stress options that limit
341// the set of registers considered for allocation.
342
343regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
344{
345 if (getStressLimitRegs() != LSRA_LIMIT_NONE)
346 {
347 // The refPosition could be null, for example when called
348 // by getTempRegForResolution().
349 int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1;
350
351 switch (getStressLimitRegs())
352 {
353 case LSRA_LIMIT_CALLEE:
354 if (!compiler->opts.compDbgEnC)
355 {
356 mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount);
357 }
358 break;
359
360 case LSRA_LIMIT_CALLER:
361 {
362 mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount);
363 }
364 break;
365
366 case LSRA_LIMIT_SMALL_SET:
367 if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
368 {
369 mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount);
370 }
371 else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
372 {
373 mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount);
374 }
375 break;
376
377 default:
378 unreached();
379 }
380
381 if (refPosition != nullptr && refPosition->isFixedRegRef)
382 {
383 mask |= refPosition->registerAssignment;
384 }
385 }
386
387 return mask;
388}
389#endif // DEBUG
390
391//------------------------------------------------------------------------
392// conflictingFixedRegReference: Determine whether the current RegRecord has a
393// fixed register use that conflicts with 'refPosition'
394//
395// Arguments:
396// refPosition - The RefPosition of interest
397//
398// Return Value:
399// Returns true iff the given RefPosition is NOT a fixed use of this register,
400// AND either:
401// - there is a RefPosition on this RegRecord at the nodeLocation of the given RefPosition, or
402// - the given RefPosition has a delayRegFree, and there is a RefPosition on this RegRecord at
403// the nodeLocation just past the given RefPosition.
404//
405// Assumptions:
406// 'refPosition is non-null.
407
408bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition)
409{
410 // Is this a fixed reference of this register? If so, there is no conflict.
411 if (refPosition->isFixedRefOfRegMask(genRegMask(regNum)))
412 {
413 return false;
414 }
415 // Otherwise, check for conflicts.
416 // There is a conflict if:
417 // 1. There is a recent RefPosition on this RegRecord that is at this location,
418 // except in the case where it is a special "putarg" that is associated with this interval, OR
419 // 2. There is an upcoming RefPosition at this location, or at the next location
420 // if refPosition is a delayed use (i.e. must be kept live through the next/def location).
421
422 LsraLocation refLocation = refPosition->nodeLocation;
423 if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill &&
424 recentRefPosition->nodeLocation == refLocation &&
425 (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval()))
426 {
427 return true;
428 }
429 LsraLocation nextPhysRefLocation = getNextRefLocation();
430 if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1)))
431 {
432 return true;
433 }
434 return false;
435}
436
437/*****************************************************************************
438 * Inline functions for Interval
439 *****************************************************************************/
440RefPosition* Referenceable::getNextRefPosition()
441{
442 if (recentRefPosition == nullptr)
443 {
444 return firstRefPosition;
445 }
446 else
447 {
448 return recentRefPosition->nextRefPosition;
449 }
450}
451
452LsraLocation Referenceable::getNextRefLocation()
453{
454 RefPosition* nextRefPosition = getNextRefPosition();
455 if (nextRefPosition == nullptr)
456 {
457 return MaxLocation;
458 }
459 else
460 {
461 return nextRefPosition->nodeLocation;
462 }
463}
464
465// Iterate through all the registers of the given type
466class RegisterIterator
467{
468 friend class Registers;
469
470public:
471 RegisterIterator(RegisterType type) : regType(type)
472 {
473 if (useFloatReg(regType))
474 {
475 currentRegNum = REG_FP_FIRST;
476 }
477 else
478 {
479 currentRegNum = REG_INT_FIRST;
480 }
481 }
482
483protected:
484 static RegisterIterator Begin(RegisterType regType)
485 {
486 return RegisterIterator(regType);
487 }
488 static RegisterIterator End(RegisterType regType)
489 {
490 RegisterIterator endIter = RegisterIterator(regType);
491 // This assumes only integer and floating point register types
492 // if we target a processor with additional register types,
493 // this would have to change
494 if (useFloatReg(regType))
495 {
496 // This just happens to work for both double & float
497 endIter.currentRegNum = REG_NEXT(REG_FP_LAST);
498 }
499 else
500 {
501 endIter.currentRegNum = REG_NEXT(REG_INT_LAST);
502 }
503 return endIter;
504 }
505
506public:
507 void operator++(int dummy) // int dummy is c++ for "this is postfix ++"
508 {
509 currentRegNum = REG_NEXT(currentRegNum);
510#ifdef _TARGET_ARM_
511 if (regType == TYP_DOUBLE)
512 currentRegNum = REG_NEXT(currentRegNum);
513#endif
514 }
515 void operator++() // prefix operator++
516 {
517 currentRegNum = REG_NEXT(currentRegNum);
518#ifdef _TARGET_ARM_
519 if (regType == TYP_DOUBLE)
520 currentRegNum = REG_NEXT(currentRegNum);
521#endif
522 }
523 regNumber operator*()
524 {
525 return currentRegNum;
526 }
527 bool operator!=(const RegisterIterator& other)
528 {
529 return other.currentRegNum != currentRegNum;
530 }
531
532private:
533 regNumber currentRegNum;
534 RegisterType regType;
535};
536
537class Registers
538{
539public:
540 friend class RegisterIterator;
541 RegisterType type;
542 Registers(RegisterType t)
543 {
544 type = t;
545 }
546 RegisterIterator begin()
547 {
548 return RegisterIterator::Begin(type);
549 }
550 RegisterIterator end()
551 {
552 return RegisterIterator::End(type);
553 }
554};
555
556#ifdef DEBUG
557void LinearScan::dumpVarToRegMap(VarToRegMap map)
558{
559 bool anyPrinted = false;
560 for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
561 {
562 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
563 if (map[varIndex] != REG_STK)
564 {
565 printf("V%02u=%s ", varNum, getRegName(map[varIndex]));
566 anyPrinted = true;
567 }
568 }
569 if (!anyPrinted)
570 {
571 printf("none");
572 }
573 printf("\n");
574}
575
576void LinearScan::dumpInVarToRegMap(BasicBlock* block)
577{
578 printf("Var=Reg beg of " FMT_BB ": ", block->bbNum);
579 VarToRegMap map = getInVarToRegMap(block->bbNum);
580 dumpVarToRegMap(map);
581}
582
583void LinearScan::dumpOutVarToRegMap(BasicBlock* block)
584{
585 printf("Var=Reg end of " FMT_BB ": ", block->bbNum);
586 VarToRegMap map = getOutVarToRegMap(block->bbNum);
587 dumpVarToRegMap(map);
588}
589
590#endif // DEBUG
591
592LinearScanInterface* getLinearScanAllocator(Compiler* comp)
593{
594 return new (comp, CMK_LSRA) LinearScan(comp);
595}
596
597//------------------------------------------------------------------------
598// LSRA constructor
599//
600// Arguments:
601// theCompiler
602//
603// Notes:
604// The constructor takes care of initializing the data structures that are used
605// during Lowering, including (in DEBUG) getting the stress environment variables,
606// as they may affect the block ordering.
607
608LinearScan::LinearScan(Compiler* theCompiler)
609 : compiler(theCompiler)
610 , intervals(theCompiler->getAllocator(CMK_LSRA_Interval))
611 , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition))
612 , listNodePool(theCompiler)
613{
614#ifdef DEBUG
615 maxNodeLocation = 0;
616 activeRefPosition = nullptr;
617
618 // Get the value of the environment variable that controls stress for register allocation
619 lsraStressMask = JitConfig.JitStressRegs();
620#if 0
621 if (lsraStressMask != 0)
622 {
623 // The code in this #if can be used to debug JitStressRegs issues according to
624 // method hash. To use, simply set environment variables JitStressRegsHashLo and JitStressRegsHashHi
625 unsigned methHash = compiler->info.compMethodHash();
626 char* lostr = getenv("JitStressRegsHashLo");
627 unsigned methHashLo = 0;
628 bool dump = false;
629 if (lostr != nullptr)
630 {
631 sscanf_s(lostr, "%x", &methHashLo);
632 dump = true;
633 }
634 char* histr = getenv("JitStressRegsHashHi");
635 unsigned methHashHi = UINT32_MAX;
636 if (histr != nullptr)
637 {
638 sscanf_s(histr, "%x", &methHashHi);
639 dump = true;
640 }
641 if (methHash < methHashLo || methHash > methHashHi)
642 {
643 lsraStressMask = 0;
644 }
645 else if (dump == true)
646 {
647 printf("JitStressRegs = %x for method %s, hash = 0x%x.\n",
648 lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash());
649 printf(""); // in our logic this causes a flush
650 }
651 }
652#endif // 0
653#endif // DEBUG
654
655 // Assume that we will enregister local variables if it's not disabled. We'll reset it if we
656 // have no tracked locals when we start allocating. Note that new tracked lclVars may be added
657 // after the first liveness analysis - either by optimizations or by Lowering, and the tracked
658 // set won't be recomputed until after Lowering (and this constructor is called prior to Lowering),
659 // so we don't want to check that yet.
660 enregisterLocalVars = ((compiler->opts.compFlags & CLFLG_REGVAR) != 0);
661#ifdef _TARGET_ARM64_
662 availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd);
663#else
664 availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
665#endif
666
667#if ETW_EBP_FRAMED
668 availableIntRegs &= ~RBM_FPBASE;
669#endif // ETW_EBP_FRAMED
670
671 availableFloatRegs = RBM_ALLFLOAT;
672 availableDoubleRegs = RBM_ALLDOUBLE;
673
674#ifdef _TARGET_AMD64_
675 if (compiler->opts.compDbgEnC)
676 {
677 // On x64 when the EnC option is set, we always save exactly RBP, RSI and RDI.
678 // RBP is not available to the register allocator, so RSI and RDI are the only
679 // callee-save registers available.
680 availableIntRegs &= ~RBM_CALLEE_SAVED | RBM_RSI | RBM_RDI;
681 availableFloatRegs &= ~RBM_CALLEE_SAVED;
682 availableDoubleRegs &= ~RBM_CALLEE_SAVED;
683 }
684#endif // _TARGET_AMD64_
685 compiler->rpFrameType = FT_NOT_SET;
686 compiler->rpMustCreateEBPCalled = false;
687
688 compiler->codeGen->intRegState.rsIsFloat = false;
689 compiler->codeGen->floatRegState.rsIsFloat = true;
690
691 // Block sequencing (the order in which we schedule).
692 // Note that we don't initialize the bbVisitedSet until we do the first traversal
693 // This is so that any blocks that are added during the first traversal
694 // are accounted for (and we don't have BasicBlockEpoch issues).
695 blockSequencingDone = false;
696 blockSequence = nullptr;
697 blockSequenceWorkList = nullptr;
698 curBBSeqNum = 0;
699 bbSeqCount = 0;
700
701 // Information about each block, including predecessor blocks used for variable locations at block entry.
702 blockInfo = nullptr;
703
704 pendingDelayFree = false;
705 tgtPrefUse = nullptr;
706}
707
708//------------------------------------------------------------------------
709// getNextCandidateFromWorkList: Get the next candidate for block sequencing
710//
711// Arguments:
712// None.
713//
714// Return Value:
715// The next block to be placed in the sequence.
716//
717// Notes:
718// This method currently always returns the next block in the list, and relies on having
719// blocks added to the list only when they are "ready", and on the
720// addToBlockSequenceWorkList() method to insert them in the proper order.
721// However, a block may be in the list and already selected, if it was subsequently
722// encountered as both a flow and layout successor of the most recently selected
723// block.
724
725BasicBlock* LinearScan::getNextCandidateFromWorkList()
726{
727 BasicBlockList* nextWorkList = nullptr;
728 for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList)
729 {
730 nextWorkList = workList->next;
731 BasicBlock* candBlock = workList->block;
732 removeFromBlockSequenceWorkList(workList, nullptr);
733 if (!isBlockVisited(candBlock))
734 {
735 return candBlock;
736 }
737 }
738 return nullptr;
739}
740
741//------------------------------------------------------------------------
742// setBlockSequence:Determine the block order for register allocation.
743//
744// Arguments:
745// None
746//
747// Return Value:
748// None
749//
750// Notes:
751// On return, the blockSequence array contains the blocks, in the order in which they
752// will be allocated.
753// This method clears the bbVisitedSet on LinearScan, and when it returns the set
754// contains all the bbNums for the block.
755
756void LinearScan::setBlockSequence()
757{
758 // Reset the "visited" flag on each block.
759 compiler->EnsureBasicBlockEpoch();
760 bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
761 BlockSet readySet(BlockSetOps::MakeEmpty(compiler));
762 BlockSet predSet(BlockSetOps::MakeEmpty(compiler));
763
764 assert(blockSequence == nullptr && bbSeqCount == 0);
765 blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
766 bbNumMaxBeforeResolution = compiler->fgBBNumMax;
767 blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
768
769 assert(blockSequenceWorkList == nullptr);
770
771 bool addedInternalBlocks = false;
772 verifiedAllBBs = false;
773 hasCriticalEdges = false;
774 BasicBlock* nextBlock;
775 // We use a bbNum of 0 for entry RefPositions.
776 // The other information in blockInfo[0] will never be used.
777 blockInfo[0].weight = BB_UNITY_WEIGHT;
778 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock)
779 {
780 blockSequence[bbSeqCount] = block;
781 markBlockVisited(block);
782 bbSeqCount++;
783 nextBlock = nullptr;
784
785 // Initialize the blockInfo.
786 // predBBNum will be set later. 0 is never used as a bbNum.
787 assert(block->bbNum != 0);
788 blockInfo[block->bbNum].predBBNum = 0;
789 // We check for critical edges below, but initialize to false.
790 blockInfo[block->bbNum].hasCriticalInEdge = false;
791 blockInfo[block->bbNum].hasCriticalOutEdge = false;
792 blockInfo[block->bbNum].weight = block->getBBWeight(compiler);
793
794#if TRACK_LSRA_STATS
795 blockInfo[block->bbNum].spillCount = 0;
796 blockInfo[block->bbNum].copyRegCount = 0;
797 blockInfo[block->bbNum].resolutionMovCount = 0;
798 blockInfo[block->bbNum].splitEdgeCount = 0;
799#endif // TRACK_LSRA_STATS
800
801 if (block->GetUniquePred(compiler) == nullptr)
802 {
803 for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
804 {
805 BasicBlock* predBlock = pred->flBlock;
806 if (predBlock->NumSucc(compiler) > 1)
807 {
808 blockInfo[block->bbNum].hasCriticalInEdge = true;
809 hasCriticalEdges = true;
810 break;
811 }
812 else if (predBlock->bbJumpKind == BBJ_SWITCH)
813 {
814 assert(!"Switch with single successor");
815 }
816 }
817 }
818
819 // Determine which block to schedule next.
820
821 // First, update the NORMAL successors of the current block, adding them to the worklist
822 // according to the desired order. We will handle the EH successors below.
823 bool checkForCriticalOutEdge = (block->NumSucc(compiler) > 1);
824 if (!checkForCriticalOutEdge && block->bbJumpKind == BBJ_SWITCH)
825 {
826 assert(!"Switch with single successor");
827 }
828
829 const unsigned numSuccs = block->NumSucc(compiler);
830 for (unsigned succIndex = 0; succIndex < numSuccs; succIndex++)
831 {
832 BasicBlock* succ = block->GetSucc(succIndex, compiler);
833 if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr)
834 {
835 blockInfo[block->bbNum].hasCriticalOutEdge = true;
836 hasCriticalEdges = true;
837 // We can stop checking now.
838 checkForCriticalOutEdge = false;
839 }
840
841 if (isTraversalLayoutOrder() || isBlockVisited(succ))
842 {
843 continue;
844 }
845
846 // We've now seen a predecessor, so add it to the work list and the "readySet".
847 // It will be inserted in the worklist according to the specified traversal order
848 // (i.e. pred-first or random, since layout order is handled above).
849 if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
850 {
851 addToBlockSequenceWorkList(readySet, succ, predSet);
852 BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
853 }
854 }
855
856 // For layout order, simply use bbNext
857 if (isTraversalLayoutOrder())
858 {
859 nextBlock = block->bbNext;
860 continue;
861 }
862
863 while (nextBlock == nullptr)
864 {
865 nextBlock = getNextCandidateFromWorkList();
866
867 // TODO-Throughput: We would like to bypass this traversal if we know we've handled all
868 // the blocks - but fgBBcount does not appear to be updated when blocks are removed.
869 if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs)
870 {
871 // If we don't encounter all blocks by traversing the regular successor links, do a full
872 // traversal of all the blocks, and add them in layout order.
873 // This may include:
874 // - internal-only blocks (in the fgAddCodeList) which may not be in the flow graph
875 // (these are not even in the bbNext links).
876 // - blocks that have become unreachable due to optimizations, but that are strongly
877 // connected (these are not removed)
878 // - EH blocks
879
880 for (Compiler::AddCodeDsc* desc = compiler->fgAddCodeList; desc != nullptr; desc = desc->acdNext)
881 {
882 if (!isBlockVisited(block))
883 {
884 addToBlockSequenceWorkList(readySet, block, predSet);
885 BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
886 }
887 }
888
889 for (BasicBlock* block = compiler->fgFirstBB; block; block = block->bbNext)
890 {
891 if (!isBlockVisited(block))
892 {
893 addToBlockSequenceWorkList(readySet, block, predSet);
894 BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
895 }
896 }
897 verifiedAllBBs = true;
898 }
899 else
900 {
901 break;
902 }
903 }
904 }
905 blockSequencingDone = true;
906
907#ifdef DEBUG
908 // Make sure that we've visited all the blocks.
909 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
910 {
911 assert(isBlockVisited(block));
912 }
913
914 JITDUMP("LSRA Block Sequence: ");
915 int i = 1;
916 for (BasicBlock *block = startBlockSequence(); block != nullptr; ++i, block = moveToNextBlock())
917 {
918 JITDUMP(FMT_BB, block->bbNum);
919
920 if (block->isMaxBBWeight())
921 {
922 JITDUMP("(MAX) ");
923 }
924 else
925 {
926 JITDUMP("(%6s) ", refCntWtd2str(block->getBBWeight(compiler)));
927 }
928
929 if (i % 10 == 0)
930 {
931 JITDUMP("\n ");
932 }
933 }
934 JITDUMP("\n\n");
935#endif
936}
937
938//------------------------------------------------------------------------
939// compareBlocksForSequencing: Compare two basic blocks for sequencing order.
940//
941// Arguments:
942// block1 - the first block for comparison
943// block2 - the second block for comparison
944// useBlockWeights - whether to use block weights for comparison
945//
946// Return Value:
947// -1 if block1 is preferred.
948// 0 if the blocks are equivalent.
949// 1 if block2 is preferred.
950//
951// Notes:
952// See addToBlockSequenceWorkList.
953int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights)
954{
955 if (useBlockWeights)
956 {
957 unsigned weight1 = block1->getBBWeight(compiler);
958 unsigned weight2 = block2->getBBWeight(compiler);
959
960 if (weight1 > weight2)
961 {
962 return -1;
963 }
964 else if (weight1 < weight2)
965 {
966 return 1;
967 }
968 }
969
970 // If weights are the same prefer LOWER bbnum
971 if (block1->bbNum < block2->bbNum)
972 {
973 return -1;
974 }
975 else if (block1->bbNum == block2->bbNum)
976 {
977 return 0;
978 }
979 else
980 {
981 return 1;
982 }
983}
984
985//------------------------------------------------------------------------
986// addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing.
987//
988// Arguments:
989// sequencedBlockSet - the set of blocks that are already sequenced
990// block - the new block to be added
991// predSet - the buffer to save predecessors set. A block set allocated by the caller used here as a
992// temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block
993// set with every call to this function
994//
995// Return Value:
996// None.
997//
998// Notes:
999// The first block in the list will be the next one to be sequenced, as soon
1000// as we encounter a block whose successors have all been sequenced, in pred-first
1001// order, or the very next block if we are traversing in random order (once implemented).
1002// This method uses a comparison method to determine the order in which to place
1003// the blocks in the list. This method queries whether all predecessors of the
1004// block are sequenced at the time it is added to the list and if so uses block weights
1005// for inserting the block. A block is never inserted ahead of its predecessors.
1006// A block at the time of insertion may not have all its predecessors sequenced, in
1007// which case it will be sequenced based on its block number. Once a block is inserted,
1008// its priority\order will not be changed later once its remaining predecessors are
1009// sequenced. This would mean that work list may not be sorted entirely based on
1010// block weights alone.
1011//
1012// Note also that, when random traversal order is implemented, this method
1013// should insert the blocks into the list in random order, so that we can always
1014// simply select the first block in the list.
1015void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet)
1016{
1017 // The block that is being added is not already sequenced
1018 assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));
1019
1020 // Get predSet of block
1021 BlockSetOps::ClearD(compiler, predSet);
1022 flowList* pred;
1023 for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
1024 {
1025 BlockSetOps::AddElemD(compiler, predSet, pred->flBlock->bbNum);
1026 }
1027
1028 // If either a rarely run block or all its preds are already sequenced, use block's weight to sequence
1029 bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet);
1030
1031 BasicBlockList* prevNode = nullptr;
1032 BasicBlockList* nextNode = blockSequenceWorkList;
1033
1034 while (nextNode != nullptr)
1035 {
1036 int seqResult;
1037
1038 if (nextNode->block->isRunRarely())
1039 {
1040 // If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing
1041 seqResult = compareBlocksForSequencing(nextNode->block, block, true);
1042 }
1043 else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum))
1044 {
1045 // always prefer unsequenced pred blocks
1046 seqResult = -1;
1047 }
1048 else
1049 {
1050 seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight);
1051 }
1052
1053 if (seqResult > 0)
1054 {
1055 break;
1056 }
1057
1058 prevNode = nextNode;
1059 nextNode = nextNode->next;
1060 }
1061
1062 BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode);
1063 if (prevNode == nullptr)
1064 {
1065 blockSequenceWorkList = newListNode;
1066 }
1067 else
1068 {
1069 prevNode->next = newListNode;
1070 }
1071}
1072
1073void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode)
1074{
1075 if (listNode == blockSequenceWorkList)
1076 {
1077 assert(prevNode == nullptr);
1078 blockSequenceWorkList = listNode->next;
1079 }
1080 else
1081 {
1082 assert(prevNode != nullptr && prevNode->next == listNode);
1083 prevNode->next = listNode->next;
1084 }
1085 // TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList
1086 // compiler->FreeBlockListNode(listNode);
1087}
1088
1089// Initialize the block order for allocation (called each time a new traversal begins).
1090BasicBlock* LinearScan::startBlockSequence()
1091{
1092 if (!blockSequencingDone)
1093 {
1094 setBlockSequence();
1095 }
1096 BasicBlock* curBB = compiler->fgFirstBB;
1097 curBBSeqNum = 0;
1098 curBBNum = curBB->bbNum;
1099 clearVisitedBlocks();
1100 assert(blockSequence[0] == compiler->fgFirstBB);
1101 markBlockVisited(curBB);
1102 return curBB;
1103}
1104
1105//------------------------------------------------------------------------
1106// moveToNextBlock: Move to the next block in order for allocation or resolution.
1107//
1108// Arguments:
1109// None
1110//
1111// Return Value:
1112// The next block.
1113//
1114// Notes:
1115// This method is used when the next block is actually going to be handled.
1116// It changes curBBNum.
1117
1118BasicBlock* LinearScan::moveToNextBlock()
1119{
1120 BasicBlock* nextBlock = getNextBlock();
1121 curBBSeqNum++;
1122 if (nextBlock != nullptr)
1123 {
1124 curBBNum = nextBlock->bbNum;
1125 }
1126 return nextBlock;
1127}
1128
1129//------------------------------------------------------------------------
1130// getNextBlock: Get the next block in order for allocation or resolution.
1131//
1132// Arguments:
1133// None
1134//
1135// Return Value:
1136// The next block.
1137//
1138// Notes:
1139// This method does not actually change the current block - it is used simply
1140// to determine which block will be next.
1141
1142BasicBlock* LinearScan::getNextBlock()
1143{
1144 assert(blockSequencingDone);
1145 unsigned int nextBBSeqNum = curBBSeqNum + 1;
1146 if (nextBBSeqNum < bbSeqCount)
1147 {
1148 return blockSequence[nextBBSeqNum];
1149 }
1150 return nullptr;
1151}
1152
1153//------------------------------------------------------------------------
1154// doLinearScan: The main method for register allocation.
1155//
1156// Arguments:
1157// None
1158//
1159// Return Value:
1160// None.
1161//
1162
1163void LinearScan::doLinearScan()
1164{
1165 // Check to see whether we have any local variables to enregister.
1166 // We initialize this in the constructor based on opt settings,
1167 // but we don't want to spend time on the lclVar parts of LinearScan
1168 // if we have no tracked locals.
1169 if (enregisterLocalVars && (compiler->lvaTrackedCount == 0))
1170 {
1171 enregisterLocalVars = false;
1172 }
1173
1174 unsigned lsraBlockEpoch = compiler->GetCurBasicBlockEpoch();
1175
1176 splitBBNumToTargetBBNumMap = nullptr;
1177
1178 // This is complicated by the fact that physical registers have refs associated
1179 // with locations where they are killed (e.g. calls), but we don't want to
1180 // count these as being touched.
1181
1182 compiler->codeGen->regSet.rsClearRegsModified();
1183
1184 initMaxSpill();
1185 buildIntervals();
1186 DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS));
1187 compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD);
1188
1189 DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));
1190
1191 clearVisitedBlocks();
1192 initVarRegMaps();
1193 allocateRegisters();
1194 compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
1195 resolveRegisters();
1196 compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE);
1197
1198#if TRACK_LSRA_STATS
1199 if ((JitConfig.DisplayLsraStats() != 0)
1200#ifdef DEBUG
1201 || VERBOSE
1202#endif
1203 )
1204 {
1205 dumpLsraStats(jitstdout);
1206 }
1207#endif // TRACK_LSRA_STATS
1208
1209 DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
1210
1211 compiler->compLSRADone = true;
1212 noway_assert(lsraBlockEpoch = compiler->GetCurBasicBlockEpoch());
1213}
1214
1215//------------------------------------------------------------------------
1216// recordVarLocationsAtStartOfBB: Update live-in LclVarDscs with the appropriate
1217// register location at the start of a block, during codegen.
1218//
1219// Arguments:
1220// bb - the block for which code is about to be generated.
1221//
1222// Return Value:
1223// None.
1224//
1225// Assumptions:
1226// CodeGen will take care of updating the reg masks and the current var liveness,
1227// after calling this method.
1228// This is because we need to kill off the dead registers before setting the newly live ones.
1229
1230void LinearScan::recordVarLocationsAtStartOfBB(BasicBlock* bb)
1231{
1232 if (!enregisterLocalVars)
1233 {
1234 return;
1235 }
1236 JITDUMP("Recording Var Locations at start of " FMT_BB "\n", bb->bbNum);
1237 VarToRegMap map = getInVarToRegMap(bb->bbNum);
1238 unsigned count = 0;
1239
1240 VarSetOps::AssignNoCopy(compiler, currentLiveVars,
1241 VarSetOps::Intersection(compiler, registerCandidateVars, bb->bbLiveIn));
1242 VarSetOps::Iter iter(compiler, currentLiveVars);
1243 unsigned varIndex = 0;
1244 while (iter.NextElem(&varIndex))
1245 {
1246 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
1247 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1248 regNumber regNum = getVarReg(map, varIndex);
1249
1250 regNumber oldRegNum = varDsc->lvRegNum;
1251 regNumber newRegNum = regNum;
1252
1253 if (oldRegNum != newRegNum)
1254 {
1255 JITDUMP(" V%02u(%s->%s)", varNum, compiler->compRegVarName(oldRegNum),
1256 compiler->compRegVarName(newRegNum));
1257 varDsc->lvRegNum = newRegNum;
1258 count++;
1259 }
1260 else if (newRegNum != REG_STK)
1261 {
1262 JITDUMP(" V%02u(%s)", varNum, compiler->compRegVarName(newRegNum));
1263 count++;
1264 }
1265 }
1266
1267 if (count == 0)
1268 {
1269 JITDUMP(" <none>\n");
1270 }
1271
1272 JITDUMP("\n");
1273}
1274
1275void Interval::setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* linScan)
1276{
1277 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
1278 assert(varDsc->lvTracked);
1279 assert(varDsc->lvVarIndex < compiler->lvaTrackedCount);
1280
1281 linScan->localVarIntervals[varDsc->lvVarIndex] = this;
1282
1283 assert(linScan->getIntervalForLocalVar(varDsc->lvVarIndex) == this);
1284 this->isLocalVar = true;
1285 this->varNum = lclNum;
1286}
1287
1288// identify the candidates which we are not going to enregister due to
1289// being used in EH in a way we don't want to deal with
1290// this logic cloned from fgInterBlockLocalVarLiveness
1291void LinearScan::identifyCandidatesExceptionDataflow()
1292{
1293 VARSET_TP exceptVars(VarSetOps::MakeEmpty(compiler));
1294 VARSET_TP filterVars(VarSetOps::MakeEmpty(compiler));
1295 VARSET_TP finallyVars(VarSetOps::MakeEmpty(compiler));
1296 BasicBlock* block;
1297
1298 foreach_block(compiler, block)
1299 {
1300 if (block->bbCatchTyp != BBCT_NONE)
1301 {
1302 // live on entry to handler
1303 VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
1304 }
1305
1306 if (block->bbJumpKind == BBJ_EHFILTERRET)
1307 {
1308 // live on exit from filter
1309 VarSetOps::UnionD(compiler, filterVars, block->bbLiveOut);
1310 }
1311 else if (block->bbJumpKind == BBJ_EHFINALLYRET)
1312 {
1313 // live on exit from finally
1314 VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut);
1315 }
1316#if FEATURE_EH_FUNCLETS
1317 // Funclets are called and returned from, as such we can only count on the frame
1318 // pointer being restored, and thus everything live in or live out must be on the
1319 // stack
1320 if (block->bbFlags & BBF_FUNCLET_BEG)
1321 {
1322 VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
1323 }
1324 if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) ||
1325 (block->bbJumpKind == BBJ_EHCATCHRET))
1326 {
1327 VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut);
1328 }
1329#endif // FEATURE_EH_FUNCLETS
1330 }
1331
1332 // slam them all together (there was really no need to use more than 2 bitvectors here)
1333 VarSetOps::UnionD(compiler, exceptVars, filterVars);
1334 VarSetOps::UnionD(compiler, exceptVars, finallyVars);
1335
1336 /* Mark all pointer variables live on exit from a 'finally'
1337 block as either volatile for non-GC ref types or as
1338 'explicitly initialized' (volatile and must-init) for GC-ref types */
1339
1340 VarSetOps::Iter iter(compiler, exceptVars);
1341 unsigned varIndex = 0;
1342 while (iter.NextElem(&varIndex))
1343 {
1344 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
1345 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1346
1347 compiler->lvaSetVarDoNotEnregister(varNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
1348
1349 if (varTypeIsGC(varDsc))
1350 {
1351 if (VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam)
1352 {
1353 varDsc->lvMustInit = true;
1354 }
1355 }
1356 }
1357}
1358
1359bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
1360{
1361 if (!enregisterLocalVars)
1362 {
1363 return false;
1364 }
1365 assert((compiler->opts.compFlags & CLFLG_REGVAR) != 0);
1366
1367 if (!varDsc->lvTracked)
1368 {
1369 return false;
1370 }
1371
1372#if !defined(_TARGET_64BIT_)
1373 if (varDsc->lvType == TYP_LONG)
1374 {
1375 // Long variables should not be register candidates.
1376 // Lowering will have split any candidate lclVars into lo/hi vars.
1377 return false;
1378 }
1379#endif // !defined(_TARGET_64BIT)
1380
1381 // If we have JMP, reg args must be put on the stack
1382
1383 if (compiler->compJmpOpUsed && varDsc->lvIsRegArg)
1384 {
1385 return false;
1386 }
1387
1388 // Don't allocate registers for dependently promoted struct fields
1389 if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
1390 {
1391 return false;
1392 }
1393
1394 // Don't enregister if the ref count is zero.
1395 if (varDsc->lvRefCnt() == 0)
1396 {
1397 varDsc->setLvRefCntWtd(0);
1398 return false;
1399 }
1400
1401 // Variables that are address-exposed are never enregistered, or tracked.
1402 // A struct may be promoted, and a struct that fits in a register may be fully enregistered.
1403 // Pinned variables may not be tracked (a condition of the GCInfo representation)
1404 // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
1405 // references when using the general GC encoding.
1406 unsigned lclNum = (unsigned)(varDsc - compiler->lvaTable);
1407 if (varDsc->lvAddrExposed || !varTypeIsEnregisterableStruct(varDsc))
1408 {
1409#ifdef DEBUG
1410 Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed;
1411 if (!varDsc->lvAddrExposed)
1412 {
1413 dner = Compiler::DNER_IsStruct;
1414 }
1415#endif // DEBUG
1416 compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(dner));
1417 return false;
1418 }
1419 else if (varDsc->lvPinned)
1420 {
1421 varDsc->lvTracked = 0;
1422#ifdef JIT32_GCENCODER
1423 compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_PinningRef));
1424#endif // JIT32_GCENCODER
1425 return false;
1426 }
1427
1428 // Are we not optimizing and we have exception handlers?
1429 // if so mark all args and locals as volatile, so that they
1430 // won't ever get enregistered.
1431 //
1432 if (compiler->opts.MinOpts() && compiler->compHndBBtabCount > 0)
1433 {
1434 compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
1435 }
1436
1437 if (varDsc->lvDoNotEnregister)
1438 {
1439 return false;
1440 }
1441
1442 switch (genActualType(varDsc->TypeGet()))
1443 {
1444#if CPU_HAS_FP_SUPPORT
1445 case TYP_FLOAT:
1446 case TYP_DOUBLE:
1447 return !compiler->opts.compDbgCode;
1448
1449#endif // CPU_HAS_FP_SUPPORT
1450
1451 case TYP_INT:
1452 case TYP_LONG:
1453 case TYP_REF:
1454 case TYP_BYREF:
1455 break;
1456
1457#ifdef FEATURE_SIMD
1458 case TYP_SIMD12:
1459 case TYP_SIMD16:
1460 case TYP_SIMD32:
1461 return !varDsc->lvPromoted;
1462
1463 // TODO-1stClassStructs: Move TYP_SIMD8 up with the other SIMD types, after handling the param issue
1464 // (passing & returning as TYP_LONG).
1465 case TYP_SIMD8:
1466 return false;
1467#endif // FEATURE_SIMD
1468
1469 case TYP_STRUCT:
1470 return false;
1471
1472 case TYP_UNDEF:
1473 case TYP_UNKNOWN:
1474 noway_assert(!"lvType not set correctly");
1475 varDsc->lvType = TYP_INT;
1476 return false;
1477
1478 default:
1479 return false;
1480 }
1481
1482 return true;
1483}
1484
1485// Identify locals & compiler temps that are register candidates
1486// TODO-Cleanup: This was cloned from Compiler::lvaSortByRefCount() in lclvars.cpp in order
1487// to avoid perturbation, but should be merged.
1488
1489void LinearScan::identifyCandidates()
1490{
1491 if (enregisterLocalVars)
1492 {
1493 // Initialize the set of lclVars that are candidates for register allocation.
1494 VarSetOps::AssignNoCopy(compiler, registerCandidateVars, VarSetOps::MakeEmpty(compiler));
1495
1496 // Initialize the sets of lclVars that are used to determine whether, and for which lclVars,
1497 // we need to perform resolution across basic blocks.
1498 // Note that we can't do this in the constructor because the number of tracked lclVars may
1499 // change between the constructor and the actual allocation.
1500 VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler));
1501 VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler));
1502
1503 // We set enregisterLocalVars to true only if there are tracked lclVars
1504 assert(compiler->lvaCount != 0);
1505 }
1506 else if (compiler->lvaCount == 0)
1507 {
1508 // Nothing to do. Note that even if enregisterLocalVars is false, we still need to set the
1509 // lvLRACandidate field on all the lclVars to false if we have any.
1510 return;
1511 }
1512
1513 if (compiler->compHndBBtabCount > 0)
1514 {
1515 identifyCandidatesExceptionDataflow();
1516 }
1517
1518 unsigned lclNum;
1519 LclVarDsc* varDsc;
1520
1521 // While we build intervals for the candidate lclVars, we will determine the floating point
1522 // lclVars, if any, to consider for callee-save register preferencing.
1523 // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
1524 // and those that meet the second.
1525 // The first threshold is used for methods that are heuristically deemed either to have light
1526 // fp usage, or other factors that encourage conservative use of callee-save registers, such
1527 // as multiple exits (where there might be an early exit that woudl be excessively penalized by
1528 // lots of prolog/epilog saves & restores).
1529 // The second threshold is used where there are factors deemed to make it more likely that fp
1530 // fp callee save registers will be needed, such as loops or many fp vars.
1531 // We keep two sets of vars, since we collect some of the information to determine which set to
1532 // use as we iterate over the vars.
1533 // When we are generating AVX code on non-Unix (FEATURE_PARTIAL_SIMD_CALLEE_SAVE), we maintain an
1534 // additional set of LargeVectorType vars, and there is a separate threshold defined for those.
1535 // It is assumed that if we encounter these, that we should consider this a "high use" scenario,
1536 // so we don't maintain two sets of these vars.
1537 // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
1538 // for vectors on Arm64, though the actual value may differ.
1539
1540 unsigned int floatVarCount = 0;
1541 unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
1542 unsigned int maybeFPRefCntWtd = 2 * BB_UNITY_WEIGHT;
1543 VARSET_TP fpMaybeCandidateVars(VarSetOps::UninitVal());
1544#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1545 unsigned int largeVectorVarCount = 0;
1546 unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
1547#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1548 if (enregisterLocalVars)
1549 {
1550 VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
1551 VarSetOps::AssignNoCopy(compiler, fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler));
1552#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1553 VarSetOps::AssignNoCopy(compiler, largeVectorVars, VarSetOps::MakeEmpty(compiler));
1554 VarSetOps::AssignNoCopy(compiler, largeVectorCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
1555#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1556 }
1557#if DOUBLE_ALIGN
1558 unsigned refCntStk = 0;
1559 unsigned refCntReg = 0;
1560 unsigned refCntWtdReg = 0;
1561 unsigned refCntStkParam = 0; // sum of ref counts for all stack based parameters
1562 unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
1563 doDoubleAlign = false;
1564 bool checkDoubleAlign = true;
1565 if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts())
1566 {
1567 checkDoubleAlign = false;
1568 }
1569 else
1570 {
1571 switch (compiler->getCanDoubleAlign())
1572 {
1573 case MUST_DOUBLE_ALIGN:
1574 doDoubleAlign = true;
1575 checkDoubleAlign = false;
1576 break;
1577 case CAN_DOUBLE_ALIGN:
1578 break;
1579 case CANT_DOUBLE_ALIGN:
1580 doDoubleAlign = false;
1581 checkDoubleAlign = false;
1582 break;
1583 default:
1584 unreached();
1585 }
1586 }
1587#endif // DOUBLE_ALIGN
1588
1589 // Check whether register variables are permitted.
1590 if (!enregisterLocalVars)
1591 {
1592 localVarIntervals = nullptr;
1593 }
1594 else if (compiler->lvaTrackedCount > 0)
1595 {
1596 // initialize mapping from tracked local to interval
1597 localVarIntervals = new (compiler, CMK_LSRA) Interval*[compiler->lvaTrackedCount];
1598 }
1599
1600 INTRACK_STATS(regCandidateVarCount = 0);
1601 for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
1602 {
1603 // Initialize all variables to REG_STK
1604 varDsc->lvRegNum = REG_STK;
1605#ifndef _TARGET_64BIT_
1606 varDsc->lvOtherReg = REG_STK;
1607#endif // _TARGET_64BIT_
1608
1609 if (!enregisterLocalVars)
1610 {
1611 varDsc->lvLRACandidate = false;
1612 continue;
1613 }
1614
1615#if DOUBLE_ALIGN
1616 if (checkDoubleAlign)
1617 {
1618 if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
1619 {
1620 refCntStkParam += varDsc->lvRefCnt();
1621 }
1622 else if (!isRegCandidate(varDsc) || varDsc->lvDoNotEnregister)
1623 {
1624 refCntStk += varDsc->lvRefCnt();
1625 if ((varDsc->lvType == TYP_DOUBLE) ||
1626 ((varTypeIsStruct(varDsc) && varDsc->lvStructDoubleAlign &&
1627 (compiler->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT))))
1628 {
1629 refCntWtdStkDbl += varDsc->lvRefCntWtd();
1630 }
1631 }
1632 else
1633 {
1634 refCntReg += varDsc->lvRefCnt();
1635 refCntWtdReg += varDsc->lvRefCntWtd();
1636 }
1637 }
1638#endif // DOUBLE_ALIGN
1639
1640 // Start with the assumption that it's a candidate.
1641
1642 varDsc->lvLRACandidate = 1;
1643
1644 // Start with lvRegister as false - set it true only if the variable gets
1645 // the same register assignment throughout
1646 varDsc->lvRegister = false;
1647
1648 if (!isRegCandidate(varDsc))
1649 {
1650 varDsc->lvLRACandidate = 0;
1651 if (varDsc->lvTracked)
1652 {
1653 localVarIntervals[varDsc->lvVarIndex] = nullptr;
1654 }
1655 continue;
1656 }
1657
1658 if (varDsc->lvLRACandidate)
1659 {
1660 var_types type = genActualType(varDsc->TypeGet());
1661 Interval* newInt = newInterval(type);
1662 newInt->setLocalNumber(compiler, lclNum, this);
1663 VarSetOps::AddElemD(compiler, registerCandidateVars, varDsc->lvVarIndex);
1664
1665 // we will set this later when we have determined liveness
1666 varDsc->lvMustInit = false;
1667
1668 if (varDsc->lvIsStructField)
1669 {
1670 newInt->isStructField = true;
1671 }
1672
1673 INTRACK_STATS(regCandidateVarCount++);
1674
1675 // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
1676 // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
1677 // above).
1678 CLANG_FORMAT_COMMENT_ANCHOR;
1679
1680#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1681 // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType
1682 // vars.
1683 if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
1684 {
1685 largeVectorVarCount++;
1686 VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
1687 unsigned refCntWtd = varDsc->lvRefCntWtd();
1688 if (refCntWtd >= thresholdLargeVectorRefCntWtd)
1689 {
1690 VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex);
1691 }
1692 }
1693 else
1694#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1695 if (regType(type) == FloatRegisterType)
1696 {
1697 floatVarCount++;
1698 unsigned refCntWtd = varDsc->lvRefCntWtd();
1699 if (varDsc->lvIsRegArg)
1700 {
1701 // Don't count the initial reference for register params. In those cases,
1702 // using a callee-save causes an extra copy.
1703 refCntWtd -= BB_UNITY_WEIGHT;
1704 }
1705 if (refCntWtd >= thresholdFPRefCntWtd)
1706 {
1707 VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex);
1708 }
1709 else if (refCntWtd >= maybeFPRefCntWtd)
1710 {
1711 VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex);
1712 }
1713 }
1714 }
1715 else
1716 {
1717 localVarIntervals[varDsc->lvVarIndex] = nullptr;
1718 }
1719 }
1720
1721#if DOUBLE_ALIGN
1722 if (checkDoubleAlign)
1723 {
1724 // TODO-CQ: Fine-tune this:
1725 // In the legacy reg predictor, this runs after allocation, and then demotes any lclVars
1726 // allocated to the frame pointer, which is probably the wrong order.
1727 // However, because it runs after allocation, it can determine the impact of demoting
1728 // the lclVars allocated to the frame pointer.
1729 // => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess.
1730 //
1731 unsigned refCntEBP = refCntReg / 8;
1732 unsigned refCntWtdEBP = refCntWtdReg / 8;
1733
1734 doDoubleAlign =
1735 compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl);
1736 }
1737#endif // DOUBLE_ALIGN
1738
1739 // The factors we consider to determine which set of fp vars to use as candidates for callee save
1740 // registers current include the number of fp vars, whether there are loops, and whether there are
1741 // multiple exits. These have been selected somewhat empirically, but there is probably room for
1742 // more tuning.
1743 CLANG_FORMAT_COMMENT_ANCHOR;
1744
1745#ifdef DEBUG
1746 if (VERBOSE)
1747 {
1748 printf("\nFP callee save candidate vars: ");
1749 if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, fpCalleeSaveCandidateVars))
1750 {
1751 dumpConvertedVarSet(compiler, fpCalleeSaveCandidateVars);
1752 printf("\n");
1753 }
1754 else
1755 {
1756 printf("None\n\n");
1757 }
1758 }
1759#endif
1760
1761 JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n", floatVarCount, compiler->fgHasLoops,
1762 (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr));
1763
1764 // Determine whether to use the 2nd, more aggressive, threshold for fp callee saves.
1765 if (floatVarCount > 6 && compiler->fgHasLoops &&
1766 (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr))
1767 {
1768 assert(enregisterLocalVars);
1769#ifdef DEBUG
1770 if (VERBOSE)
1771 {
1772 printf("Adding additional fp callee save candidates: \n");
1773 if (!VarSetOps::IsEmpty(compiler, fpMaybeCandidateVars))
1774 {
1775 dumpConvertedVarSet(compiler, fpMaybeCandidateVars);
1776 printf("\n");
1777 }
1778 else
1779 {
1780 printf("None\n\n");
1781 }
1782 }
1783#endif
1784 VarSetOps::UnionD(compiler, fpCalleeSaveCandidateVars, fpMaybeCandidateVars);
1785 }
1786
1787#ifdef _TARGET_ARM_
1788#ifdef DEBUG
1789 if (VERBOSE)
1790 {
1791 // Frame layout is only pre-computed for ARM
1792 printf("\nlvaTable after IdentifyCandidates\n");
1793 compiler->lvaTableDump(Compiler::FrameLayoutState::PRE_REGALLOC_FRAME_LAYOUT);
1794 }
1795#endif // DEBUG
1796#endif // _TARGET_ARM_
1797}
1798
1799// TODO-Throughput: This mapping can surely be more efficiently done
1800void LinearScan::initVarRegMaps()
1801{
1802 if (!enregisterLocalVars)
1803 {
1804 inVarToRegMaps = nullptr;
1805 outVarToRegMaps = nullptr;
1806 return;
1807 }
1808 assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked
1809 // variables.
1810
1811 // The compiler memory allocator requires that the allocation be an
1812 // even multiple of int-sized objects
1813 unsigned int varCount = compiler->lvaTrackedCount;
1814 regMapCount = roundUp(varCount, (unsigned)sizeof(int));
1815
1816 // Not sure why blocks aren't numbered from zero, but they don't appear to be.
1817 // So, if we want to index by bbNum we have to know the maximum value.
1818 unsigned int bbCount = compiler->fgBBNumMax + 1;
1819
1820 inVarToRegMaps = new (compiler, CMK_LSRA) regNumberSmall*[bbCount];
1821 outVarToRegMaps = new (compiler, CMK_LSRA) regNumberSmall*[bbCount];
1822
1823 if (varCount > 0)
1824 {
1825 // This VarToRegMap is used during the resolution of critical edges.
1826 sharedCriticalVarToRegMap = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1827
1828 for (unsigned int i = 0; i < bbCount; i++)
1829 {
1830 VarToRegMap inVarToRegMap = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1831 VarToRegMap outVarToRegMap = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1832
1833 for (unsigned int j = 0; j < regMapCount; j++)
1834 {
1835 inVarToRegMap[j] = REG_STK;
1836 outVarToRegMap[j] = REG_STK;
1837 }
1838 inVarToRegMaps[i] = inVarToRegMap;
1839 outVarToRegMaps[i] = outVarToRegMap;
1840 }
1841 }
1842 else
1843 {
1844 sharedCriticalVarToRegMap = nullptr;
1845 for (unsigned int i = 0; i < bbCount; i++)
1846 {
1847 inVarToRegMaps[i] = nullptr;
1848 outVarToRegMaps[i] = nullptr;
1849 }
1850 }
1851}
1852
1853void LinearScan::setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
1854{
1855 assert(enregisterLocalVars);
1856 assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
1857 inVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = (regNumberSmall)reg;
1858}
1859
1860void LinearScan::setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
1861{
1862 assert(enregisterLocalVars);
1863 assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
1864 outVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = (regNumberSmall)reg;
1865}
1866
1867LinearScan::SplitEdgeInfo LinearScan::getSplitEdgeInfo(unsigned int bbNum)
1868{
1869 assert(enregisterLocalVars);
1870 SplitEdgeInfo splitEdgeInfo;
1871 assert(bbNum <= compiler->fgBBNumMax);
1872 assert(bbNum > bbNumMaxBeforeResolution);
1873 assert(splitBBNumToTargetBBNumMap != nullptr);
1874 splitBBNumToTargetBBNumMap->Lookup(bbNum, &splitEdgeInfo);
1875 assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
1876 assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
1877 return splitEdgeInfo;
1878}
1879
1880VarToRegMap LinearScan::getInVarToRegMap(unsigned int bbNum)
1881{
1882 assert(enregisterLocalVars);
1883 assert(bbNum <= compiler->fgBBNumMax);
1884 // For the blocks inserted to split critical edges, the inVarToRegMap is
1885 // equal to the outVarToRegMap at the "from" block.
1886 if (bbNum > bbNumMaxBeforeResolution)
1887 {
1888 SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
1889 unsigned fromBBNum = splitEdgeInfo.fromBBNum;
1890 if (fromBBNum == 0)
1891 {
1892 assert(splitEdgeInfo.toBBNum != 0);
1893 return inVarToRegMaps[splitEdgeInfo.toBBNum];
1894 }
1895 else
1896 {
1897 return outVarToRegMaps[fromBBNum];
1898 }
1899 }
1900
1901 return inVarToRegMaps[bbNum];
1902}
1903
1904VarToRegMap LinearScan::getOutVarToRegMap(unsigned int bbNum)
1905{
1906 assert(enregisterLocalVars);
1907 assert(bbNum <= compiler->fgBBNumMax);
1908 // For the blocks inserted to split critical edges, the outVarToRegMap is
1909 // equal to the inVarToRegMap at the target.
1910 if (bbNum > bbNumMaxBeforeResolution)
1911 {
1912 // If this is an empty block, its in and out maps are both the same.
1913 // We identify this case by setting fromBBNum or toBBNum to 0, and using only the other.
1914 SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
1915 unsigned toBBNum = splitEdgeInfo.toBBNum;
1916 if (toBBNum == 0)
1917 {
1918 assert(splitEdgeInfo.fromBBNum != 0);
1919 return outVarToRegMaps[splitEdgeInfo.fromBBNum];
1920 }
1921 else
1922 {
1923 return inVarToRegMaps[toBBNum];
1924 }
1925 }
1926 return outVarToRegMaps[bbNum];
1927}
1928
1929//------------------------------------------------------------------------
1930// setVarReg: Set the register associated with a variable in the given 'bbVarToRegMap'.
1931//
1932// Arguments:
1933// bbVarToRegMap - the map of interest
1934// trackedVarIndex - the lvVarIndex for the variable
1935// reg - the register to which it is being mapped
1936//
1937// Return Value:
1938// None
1939//
1940void LinearScan::setVarReg(VarToRegMap bbVarToRegMap, unsigned int trackedVarIndex, regNumber reg)
1941{
1942 assert(trackedVarIndex < compiler->lvaTrackedCount);
1943 regNumberSmall regSmall = (regNumberSmall)reg;
1944 assert((regNumber)regSmall == reg);
1945 bbVarToRegMap[trackedVarIndex] = regSmall;
1946}
1947
1948//------------------------------------------------------------------------
1949// getVarReg: Get the register associated with a variable in the given 'bbVarToRegMap'.
1950//
1951// Arguments:
1952// bbVarToRegMap - the map of interest
1953// trackedVarIndex - the lvVarIndex for the variable
1954//
1955// Return Value:
1956// The register to which 'trackedVarIndex' is mapped
1957//
1958regNumber LinearScan::getVarReg(VarToRegMap bbVarToRegMap, unsigned int trackedVarIndex)
1959{
1960 assert(enregisterLocalVars);
1961 assert(trackedVarIndex < compiler->lvaTrackedCount);
1962 return (regNumber)bbVarToRegMap[trackedVarIndex];
1963}
1964
1965// Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
1966// the block)
1967VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap)
1968{
1969 assert(enregisterLocalVars);
1970 VarToRegMap inVarToRegMap = inVarToRegMaps[bbNum];
1971 memcpy(inVarToRegMap, srcVarToRegMap, (regMapCount * sizeof(regNumber)));
1972 return inVarToRegMap;
1973}
1974
1975//------------------------------------------------------------------------
1976// checkLastUses: Check correctness of last use flags
1977//
1978// Arguments:
1979// The block for which we are checking last uses.
1980//
1981// Notes:
1982// This does a backward walk of the RefPositions, starting from the liveOut set.
1983// This method was previously used to set the last uses, which were computed by
1984// liveness, but were not create in some cases of multiple lclVar references in the
1985// same tree. However, now that last uses are computed as RefPositions are created,
1986// that is no longer necessary, and this method is simply retained as a check.
1987// The exception to the check-only behavior is when LSRA_EXTEND_LIFETIMES if set via
1988// COMPlus_JitStressRegs. In that case, this method is required, because even though
1989// the RefPositions will not be marked lastUse in that case, we still need to correclty
1990// mark the last uses on the tree nodes, which is done by this method.
1991//
1992#ifdef DEBUG
1993void LinearScan::checkLastUses(BasicBlock* block)
1994{
1995 if (VERBOSE)
1996 {
1997 JITDUMP("\n\nCHECKING LAST USES for " FMT_BB ", liveout=", block->bbNum);
1998 dumpConvertedVarSet(compiler, block->bbLiveOut);
1999 JITDUMP("\n==============================\n");
2000 }
2001
2002 unsigned keepAliveVarNum = BAD_VAR_NUM;
2003 if (compiler->lvaKeepAliveAndReportThis())
2004 {
2005 keepAliveVarNum = compiler->info.compThisArg;
2006 assert(compiler->info.compIsStatic == false);
2007 }
2008
2009 // find which uses are lastUses
2010
2011 // Work backwards starting with live out.
2012 // 'computedLive' is updated to include any exposed use (including those in this
2013 // block that we've already seen). When we encounter a use, if it's
2014 // not in that set, then it's a last use.
2015
2016 VARSET_TP computedLive(VarSetOps::MakeCopy(compiler, block->bbLiveOut));
2017
2018 bool foundDiff = false;
2019 RefPositionReverseIterator reverseIterator = refPositions.rbegin();
2020 RefPosition* currentRefPosition;
2021 for (currentRefPosition = &reverseIterator; currentRefPosition->refType != RefTypeBB;
2022 reverseIterator++, currentRefPosition = &reverseIterator)
2023 {
2024 // We should never see ParamDefs or ZeroInits within a basic block.
2025 assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit);
2026 if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar)
2027 {
2028 unsigned varNum = currentRefPosition->getInterval()->varNum;
2029 unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler);
2030
2031 LsraLocation loc = currentRefPosition->nodeLocation;
2032
2033 // We should always have a tree node for a localVar, except for the "special" RefPositions.
2034 GenTree* tree = currentRefPosition->treeNode;
2035 assert(tree != nullptr || currentRefPosition->refType == RefTypeExpUse ||
2036 currentRefPosition->refType == RefTypeDummyDef);
2037
2038 if (!VarSetOps::IsMember(compiler, computedLive, varIndex) && varNum != keepAliveVarNum)
2039 {
2040 // There was no exposed use, so this is a "last use" (and we mark it thus even if it's a def)
2041
2042 if (extendLifetimes())
2043 {
2044 // NOTE: this is a bit of a hack. When extending lifetimes, the "last use" bit will be clear.
2045 // This bit, however, would normally be used during resolveLocalRef to set the value of
2046 // GTF_VAR_DEATH on the node for a ref position. If this bit is not set correctly even when
2047 // extending lifetimes, the code generator will assert as it expects to have accurate last
2048 // use information. To avoid these asserts, set the GTF_VAR_DEATH bit here.
2049 // Note also that extendLifetimes() is an LSRA stress mode, so it will only be true for
2050 // Checked or Debug builds, for which this method will be executed.
2051 if (tree != nullptr)
2052 {
2053 tree->gtFlags |= GTF_VAR_DEATH;
2054 }
2055 }
2056 else if (!currentRefPosition->lastUse)
2057 {
2058 JITDUMP("missing expected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
2059 foundDiff = true;
2060 }
2061 VarSetOps::AddElemD(compiler, computedLive, varIndex);
2062 }
2063 else if (currentRefPosition->lastUse)
2064 {
2065 JITDUMP("unexpected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
2066 foundDiff = true;
2067 }
2068 else if (extendLifetimes() && tree != nullptr)
2069 {
2070 // NOTE: see the comment above re: the extendLifetimes hack.
2071 tree->gtFlags &= ~GTF_VAR_DEATH;
2072 }
2073
2074 if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef)
2075 {
2076 VarSetOps::RemoveElemD(compiler, computedLive, varIndex);
2077 }
2078 }
2079
2080 assert(reverseIterator != refPositions.rend());
2081 }
2082
2083 VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive));
2084
2085 VarSetOps::Iter liveInNotComputedLiveIter(compiler, liveInNotComputedLive);
2086 unsigned liveInNotComputedLiveIndex = 0;
2087 while (liveInNotComputedLiveIter.NextElem(&liveInNotComputedLiveIndex))
2088 {
2089 unsigned varNum = compiler->lvaTrackedToVarNum[liveInNotComputedLiveIndex];
2090 if (compiler->lvaTable[varNum].lvLRACandidate)
2091 {
2092 JITDUMP(FMT_BB ": V%02u is in LiveIn set, but not computed live.\n", block->bbNum, varNum);
2093 foundDiff = true;
2094 }
2095 }
2096
2097 VarSetOps::DiffD(compiler, computedLive, block->bbLiveIn);
2098 const VARSET_TP& computedLiveNotLiveIn(computedLive); // reuse the buffer.
2099 VarSetOps::Iter computedLiveNotLiveInIter(compiler, computedLiveNotLiveIn);
2100 unsigned computedLiveNotLiveInIndex = 0;
2101 while (computedLiveNotLiveInIter.NextElem(&computedLiveNotLiveInIndex))
2102 {
2103 unsigned varNum = compiler->lvaTrackedToVarNum[computedLiveNotLiveInIndex];
2104 if (compiler->lvaTable[varNum].lvLRACandidate)
2105 {
2106 JITDUMP(FMT_BB ": V%02u is computed live, but not in LiveIn set.\n", block->bbNum, varNum);
2107 foundDiff = true;
2108 }
2109 }
2110
2111 assert(!foundDiff);
2112}
2113#endif // DEBUG
2114
2115//------------------------------------------------------------------------
2116// findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables.
2117//
2118// Arguments:
2119// block - The block for which we're selecting a predecesor.
2120// prevBlock - The previous block in in allocation order.
2121// pPredBlockIsAllocated - A debug-only argument that indicates whether any of the predecessors have been seen
2122// in allocation order.
2123//
2124// Return Value:
2125// The selected predecessor.
2126//
2127// Assumptions:
2128// in DEBUG, caller initializes *pPredBlockIsAllocated to false, and it will be set to true if the block
2129// returned is in fact a predecessor.
2130//
2131// Notes:
2132// This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be
2133// one of:
2134// LSRA_BLOCK_BOUNDARY_PRED - Use the register locations of a predecessor block (default)
2135// LSRA_BLOCK_BOUNDARY_LAYOUT - Use the register locations of the previous block in layout order.
2136// This is the only case where this actually returns a different block.
2137// LSRA_BLOCK_BOUNDARY_ROTATE - Rotate the register locations from a predecessor.
2138// For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but
2139// the register locations will be "rotated" to stress the resolution and allocation
2140// code.
2141
2142BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block,
2143 BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
2144{
2145 BasicBlock* predBlock = nullptr;
2146#ifdef DEBUG
2147 assert(*pPredBlockIsAllocated == false);
2148 if (getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_LAYOUT)
2149 {
2150 if (prevBlock != nullptr)
2151 {
2152 predBlock = prevBlock;
2153 }
2154 }
2155 else
2156#endif // DEBUG
2157 if (block != compiler->fgFirstBB)
2158 {
2159 predBlock = block->GetUniquePred(compiler);
2160 if (predBlock != nullptr)
2161 {
2162 if (isBlockVisited(predBlock))
2163 {
2164 if (predBlock->bbJumpKind == BBJ_COND)
2165 {
2166 // Special handling to improve matching on backedges.
2167 BasicBlock* otherBlock = (block == predBlock->bbNext) ? predBlock->bbJumpDest : predBlock->bbNext;
2168 noway_assert(otherBlock != nullptr);
2169 if (isBlockVisited(otherBlock))
2170 {
2171 // This is the case when we have a conditional branch where one target has already
2172 // been visited. It would be best to use the same incoming regs as that block,
2173 // so that we have less likelihood of having to move registers.
2174 // For example, in determining the block to use for the starting register locations for
2175 // "block" in the following example, we'd like to use the same predecessor for "block"
2176 // as for "otherBlock", so that both successors of predBlock have the same locations, reducing
2177 // the likelihood of needing a split block on a backedge:
2178 //
2179 // otherPred
2180 // |
2181 // otherBlock <-+
2182 // . . . |
2183 // |
2184 // predBlock----+
2185 // |
2186 // block
2187 //
2188 for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext)
2189 {
2190 BasicBlock* otherPred = pred->flBlock;
2191 if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum)
2192 {
2193 predBlock = otherPred;
2194 break;
2195 }
2196 }
2197 }
2198 }
2199 }
2200 else
2201 {
2202 predBlock = nullptr;
2203 }
2204 }
2205 else
2206 {
2207 for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
2208 {
2209 BasicBlock* candidatePredBlock = pred->flBlock;
2210 if (isBlockVisited(candidatePredBlock))
2211 {
2212 if (predBlock == nullptr || predBlock->bbWeight < candidatePredBlock->bbWeight)
2213 {
2214 predBlock = candidatePredBlock;
2215 INDEBUG(*pPredBlockIsAllocated = true;)
2216 }
2217 }
2218 }
2219 }
2220 if (predBlock == nullptr)
2221 {
2222 predBlock = prevBlock;
2223 assert(predBlock != nullptr);
2224 JITDUMP("\n\nNo allocated predecessor; ");
2225 }
2226 }
2227 return predBlock;
2228}
2229
2230#ifdef DEBUG
2231void LinearScan::dumpVarRefPositions(const char* title)
2232{
2233 if (enregisterLocalVars)
2234 {
2235 printf("\nVAR REFPOSITIONS %s\n", title);
2236
2237 for (unsigned i = 0; i < compiler->lvaCount; i++)
2238 {
2239 printf("--- V%02u\n", i);
2240
2241 LclVarDsc* varDsc = compiler->lvaTable + i;
2242 if (varDsc->lvIsRegCandidate())
2243 {
2244 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
2245 for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
2246 {
2247 ref->dump();
2248 }
2249 }
2250 }
2251 printf("\n");
2252 }
2253}
2254
2255#endif // DEBUG
2256
2257// Set the default rpFrameType based upon codeGen->isFramePointerRequired()
2258// This was lifted from the register predictor
2259//
2260void LinearScan::setFrameType()
2261{
2262 FrameType frameType = FT_NOT_SET;
2263#if DOUBLE_ALIGN
2264 compiler->codeGen->setDoubleAlign(false);
2265 if (doDoubleAlign)
2266 {
2267 frameType = FT_DOUBLE_ALIGN_FRAME;
2268 compiler->codeGen->setDoubleAlign(true);
2269 }
2270 else
2271#endif // DOUBLE_ALIGN
2272 if (compiler->codeGen->isFramePointerRequired())
2273 {
2274 frameType = FT_EBP_FRAME;
2275 }
2276 else
2277 {
2278 if (compiler->rpMustCreateEBPCalled == false)
2279 {
2280#ifdef DEBUG
2281 const char* reason;
2282#endif // DEBUG
2283 compiler->rpMustCreateEBPCalled = true;
2284 if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason)))
2285 {
2286 JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
2287 compiler->codeGen->setFrameRequired(true);
2288 }
2289 }
2290
2291 if (compiler->codeGen->isFrameRequired())
2292 {
2293 frameType = FT_EBP_FRAME;
2294 }
2295 else
2296 {
2297 frameType = FT_ESP_FRAME;
2298 }
2299 }
2300
2301 switch (frameType)
2302 {
2303 case FT_ESP_FRAME:
2304 noway_assert(!compiler->codeGen->isFramePointerRequired());
2305 noway_assert(!compiler->codeGen->isFrameRequired());
2306 compiler->codeGen->setFramePointerUsed(false);
2307 break;
2308 case FT_EBP_FRAME:
2309 compiler->codeGen->setFramePointerUsed(true);
2310 break;
2311#if DOUBLE_ALIGN
2312 case FT_DOUBLE_ALIGN_FRAME:
2313 noway_assert(!compiler->codeGen->isFramePointerRequired());
2314 compiler->codeGen->setFramePointerUsed(false);
2315 break;
2316#endif // DOUBLE_ALIGN
2317 default:
2318 noway_assert(!"rpFrameType not set correctly!");
2319 break;
2320 }
2321
2322 // If we are using FPBASE as the frame register, we cannot also use it for
2323 // a local var.
2324 regMaskTP removeMask = RBM_NONE;
2325 if (frameType == FT_EBP_FRAME)
2326 {
2327 removeMask |= RBM_FPBASE;
2328 }
2329
2330 compiler->rpFrameType = frameType;
2331
2332#ifdef _TARGET_ARMARCH_
2333 // Determine whether we need to reserve a register for large lclVar offsets.
2334 if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT))
2335 {
2336 // We reserve R10/IP1 in this case to hold the offsets in load/store instructions
2337 compiler->codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
2338 assert(REG_OPT_RSVD != REG_FP);
2339 JITDUMP(" Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD));
2340 removeMask |= RBM_OPT_RSVD;
2341 }
2342#endif // _TARGET_ARMARCH_
2343
2344 if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0))
2345 {
2346 // We know that we're already in "read mode" for availableIntRegs. However,
2347 // we need to remove these registers, so subsequent users (like callers
2348 // to allRegs()) get the right thing. The RemoveRegistersFromMasks() code
2349 // fixes up everything that already took a dependency on the value that was
2350 // previously read, so this completes the picture.
2351 availableIntRegs.OverrideAssign(availableIntRegs & ~removeMask);
2352 }
2353}
2354
2355//------------------------------------------------------------------------
2356// copyOrMoveRegInUse: Is 'ref' a copyReg/moveReg that is still busy at the given location?
2357//
2358// Arguments:
2359// ref: The RefPosition of interest
2360// loc: The LsraLocation at which we're determining whether it's busy.
2361//
2362// Return Value:
2363// true iff 'ref' is active at the given location
2364//
2365bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc)
2366{
2367 if (!ref->copyReg && !ref->moveReg)
2368 {
2369 return false;
2370 }
2371 if (ref->getRefEndLocation() >= loc)
2372 {
2373 return true;
2374 }
2375 Interval* interval = ref->getInterval();
2376 RefPosition* nextRef = interval->getNextRefPosition();
2377 if (nextRef != nullptr && nextRef->treeNode == ref->treeNode && nextRef->getRefEndLocation() >= loc)
2378 {
2379 return true;
2380 }
2381 return false;
2382}
2383
2384// Determine whether the register represented by "physRegRecord" is available at least
2385// at the "currentLoc", and if so, return the next location at which it is in use in
2386// "nextRefLocationPtr"
2387//
2388bool LinearScan::registerIsAvailable(RegRecord* physRegRecord,
2389 LsraLocation currentLoc,
2390 LsraLocation* nextRefLocationPtr,
2391 RegisterType regType)
2392{
2393 *nextRefLocationPtr = MaxLocation;
2394 LsraLocation nextRefLocation = MaxLocation;
2395 regMaskTP regMask = genRegMask(physRegRecord->regNum);
2396 if (physRegRecord->isBusyUntilNextKill)
2397 {
2398 return false;
2399 }
2400
2401 RefPosition* nextPhysReference = physRegRecord->getNextRefPosition();
2402 if (nextPhysReference != nullptr)
2403 {
2404 nextRefLocation = nextPhysReference->nodeLocation;
2405 // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--;
2406 }
2407 else if (!physRegRecord->isCalleeSave)
2408 {
2409 nextRefLocation = MaxLocation - 1;
2410 }
2411
2412 Interval* assignedInterval = physRegRecord->assignedInterval;
2413
2414 if (assignedInterval != nullptr)
2415 {
2416 RefPosition* recentReference = assignedInterval->recentRefPosition;
2417
2418 // The only case where we have an assignedInterval, but recentReference is null
2419 // is where this interval is live at procedure entry (i.e. an arg register), in which
2420 // case it's still live and its assigned register is not available
2421 // (Note that the ParamDef will be recorded as a recentReference when we encounter
2422 // it, but we will be allocating registers, potentially to other incoming parameters,
2423 // as we process the ParamDefs.)
2424
2425 if (recentReference == nullptr)
2426 {
2427 return false;
2428 }
2429
2430 // Is this a copyReg/moveReg? It is if the register assignment doesn't match.
2431 // (the recentReference may not be a copyReg/moveReg, because we could have seen another
2432 // reference since the copyReg/moveReg)
2433
2434 if (!assignedInterval->isAssignedTo(physRegRecord->regNum))
2435 {
2436 // If the recentReference is for a different register, it can be reassigned, but
2437 // otherwise don't reassign it if it's still in use.
2438 // (Note that it is unlikely that we have a recent copy or move to a different register,
2439 // where this physRegRecord is still pointing at an earlier copy or move, but it is possible,
2440 // especially in stress modes.)
2441 if ((recentReference->registerAssignment == regMask) && copyOrMoveRegInUse(recentReference, currentLoc))
2442 {
2443 return false;
2444 }
2445 }
2446 else if (!assignedInterval->isActive && assignedInterval->isConstant)
2447 {
2448 // Treat this as unassigned, i.e. do nothing.
2449 // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method)
2450 // to avoid reusing these registers.
2451 }
2452 // If this interval isn't active, it's available if it isn't referenced
2453 // at this location (or the previous location, if the recent RefPosition
2454 // is a delayRegFree).
2455 else if (!assignedInterval->isActive &&
2456 (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc))
2457 {
2458 // This interval must have a next reference (otherwise it wouldn't be assigned to this register)
2459 RefPosition* nextReference = recentReference->nextRefPosition;
2460 if (nextReference != nullptr)
2461 {
2462 if (nextReference->nodeLocation < nextRefLocation)
2463 {
2464 nextRefLocation = nextReference->nodeLocation;
2465 }
2466 }
2467 else
2468 {
2469 assert(recentReference->copyReg && recentReference->registerAssignment != regMask);
2470 }
2471 }
2472 else
2473 {
2474 return false;
2475 }
2476 }
2477 if (nextRefLocation < *nextRefLocationPtr)
2478 {
2479 *nextRefLocationPtr = nextRefLocation;
2480 }
2481
2482#ifdef _TARGET_ARM_
2483 if (regType == TYP_DOUBLE)
2484 {
2485 // Recurse, but check the other half this time (TYP_FLOAT)
2486 if (!registerIsAvailable(findAnotherHalfRegRec(physRegRecord), currentLoc, nextRefLocationPtr, TYP_FLOAT))
2487 return false;
2488 nextRefLocation = *nextRefLocationPtr;
2489 }
2490#endif // _TARGET_ARM_
2491
2492 return (nextRefLocation >= currentLoc);
2493}
2494
2495//------------------------------------------------------------------------
2496// getRegisterType: Get the RegisterType to use for the given RefPosition
2497//
2498// Arguments:
2499// currentInterval: The interval for the current allocation
2500// refPosition: The RefPosition of the current Interval for which a register is being allocated
2501//
2502// Return Value:
2503// The RegisterType that should be allocated for this RefPosition
2504//
2505// Notes:
2506// This will nearly always be identical to the registerType of the interval, except in the case
2507// of SIMD types of 8 bytes (currently only Vector2) when they are passed and returned in integer
2508// registers, or copied to a return temp.
2509// This method need only be called in situations where we may be dealing with the register requirements
2510// of a RefTypeUse RefPosition (i.e. not when we are only looking at the type of an interval, nor when
2511// we are interested in the "defining" type of the interval). This is because the situation of interest
2512// only happens at the use (where it must be copied to an integer register).
2513
2514RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition)
2515{
2516 assert(refPosition->getInterval() == currentInterval);
2517 RegisterType regType = currentInterval->registerType;
2518 regMaskTP candidates = refPosition->registerAssignment;
2519
2520 assert((candidates & allRegs(regType)) != RBM_NONE);
2521 return regType;
2522}
2523
2524//------------------------------------------------------------------------
2525// isMatchingConstant: Check to see whether a given register contains the constant referenced
2526// by the given RefPosition
2527//
2528// Arguments:
2529// physRegRecord: The RegRecord for the register we're interested in.
2530// refPosition: The RefPosition for a constant interval.
2531//
2532// Return Value:
2533// True iff the register was defined by an identical constant node as the current interval.
2534//
2535bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition)
2536{
2537 if ((physRegRecord->assignedInterval == nullptr) || !physRegRecord->assignedInterval->isConstant)
2538 {
2539 return false;
2540 }
2541 noway_assert(refPosition->treeNode != nullptr);
2542 GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode;
2543 noway_assert(otherTreeNode != nullptr);
2544
2545 if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet())
2546 {
2547 switch (otherTreeNode->OperGet())
2548 {
2549 case GT_CNS_INT:
2550 if ((refPosition->treeNode->AsIntCon()->IconValue() == otherTreeNode->AsIntCon()->IconValue()) &&
2551 (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode)))
2552 {
2553#ifdef _TARGET_64BIT_
2554 // If the constant is negative, only reuse registers of the same type.
2555 // This is because, on a 64-bit system, we do not sign-extend immediates in registers to
2556 // 64-bits unless they are actually longs, as this requires a longer instruction.
2557 // This doesn't apply to a 32-bit system, on which long values occupy multiple registers.
2558 // (We could sign-extend, but we would have to always sign-extend, because if we reuse more
2559 // than once, we won't have access to the instruction that originally defines the constant).
2560 if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) ||
2561 (refPosition->treeNode->AsIntCon()->IconValue() >= 0))
2562#endif // _TARGET_64BIT_
2563 {
2564 return true;
2565 }
2566 }
2567 break;
2568 case GT_CNS_DBL:
2569 {
2570 // For floating point constants, the values must be identical, not simply compare
2571 // equal. So we compare the bits.
2572 if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) &&
2573 (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()))
2574 {
2575 return true;
2576 }
2577 break;
2578 }
2579 default:
2580 break;
2581 }
2582 }
2583 return false;
2584}
2585
2586//------------------------------------------------------------------------
2587// tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition,
2588// and takes into account the preferences for the given Interval
2589//
2590// Arguments:
2591// currentInterval: The interval for the current allocation
2592// refPosition: The RefPosition of the current Interval for which a register is being allocated
2593//
2594// Return Value:
2595// The regNumber, if any, allocated to the RefPositon. Returns REG_NA if no free register is found.
2596//
2597// Notes:
2598// TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as
2599// reg predict does
2600
2601static const regNumber lsraRegOrder[] = {REG_VAR_ORDER};
2602const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder);
2603static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT};
2604const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt);
2605
2606regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition)
2607{
2608 regNumber foundReg = REG_NA;
2609
2610 RegisterType regType = getRegisterType(currentInterval, refPosition);
2611 const regNumber* regOrder;
2612 unsigned regOrderSize;
2613 if (useFloatReg(regType))
2614 {
2615 regOrder = lsraRegOrderFlt;
2616 regOrderSize = lsraRegOrderFltSize;
2617 }
2618 else
2619 {
2620 regOrder = lsraRegOrder;
2621 regOrderSize = lsraRegOrderSize;
2622 }
2623
2624 LsraLocation currentLocation = refPosition->nodeLocation;
2625 RefPosition* nextRefPos = refPosition->nextRefPosition;
2626 LsraLocation nextLocation = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation;
2627 regMaskTP candidates = refPosition->registerAssignment;
2628 regMaskTP preferences = currentInterval->registerPreferences;
2629
2630 if (RefTypeIsDef(refPosition->refType))
2631 {
2632 if (currentInterval->hasConflictingDefUse)
2633 {
2634 resolveConflictingDefAndUse(currentInterval, refPosition);
2635 candidates = refPosition->registerAssignment;
2636 }
2637 // Otherwise, check for the case of a fixed-reg def of a reg that will be killed before the
2638 // use, or interferes at the point of use (which shouldn't happen, but Lower doesn't mark
2639 // the contained nodes as interfering).
2640 // Note that we may have a ParamDef RefPosition that is marked isFixedRegRef, but which
2641 // has had its registerAssignment changed to no longer be a single register.
2642 else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) &&
2643 !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment))
2644 {
2645 regNumber defReg = refPosition->assignedReg();
2646 RegRecord* defRegRecord = getRegisterRecord(defReg);
2647
2648 RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
2649 assert(currFixedRegRefPosition != nullptr &&
2650 currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation);
2651
2652 // If there is another fixed reference to this register before the use, change the candidates
2653 // on this RefPosition to include that of nextRefPos.
2654 if (currFixedRegRefPosition->nextRefPosition != nullptr &&
2655 currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation())
2656 {
2657 candidates |= nextRefPos->registerAssignment;
2658 if (preferences == refPosition->registerAssignment)
2659 {
2660 preferences = candidates;
2661 }
2662 }
2663 }
2664 }
2665
2666 preferences &= candidates;
2667 if (preferences == RBM_NONE)
2668 {
2669 preferences = candidates;
2670 }
2671 regMaskTP relatedPreferences = RBM_NONE;
2672
2673#ifdef DEBUG
2674 candidates = stressLimitRegs(refPosition, candidates);
2675#endif
2676 assert(candidates != RBM_NONE);
2677
2678 // If the related interval has no further references, it is possible that it is a source of the
2679 // node that produces this interval. However, we don't want to use the relatedInterval for preferencing
2680 // if its next reference is not a new definition (as it either is or will become live).
2681 Interval* relatedInterval = currentInterval->relatedInterval;
2682 if (relatedInterval != nullptr)
2683 {
2684 RefPosition* nextRelatedRefPosition = relatedInterval->getNextRefPosition();
2685 if (nextRelatedRefPosition != nullptr)
2686 {
2687 // Don't use the relatedInterval for preferencing if its next reference is not a new definition,
2688 // or if it is only related because they are multi-reg targets of the same node.
2689 if (!RefTypeIsDef(nextRelatedRefPosition->refType))
2690 {
2691 relatedInterval = nullptr;
2692 }
2693 // Is the relatedInterval not assigned and simply a copy to another relatedInterval?
2694 else if ((relatedInterval->assignedReg == nullptr) && (relatedInterval->relatedInterval != nullptr) &&
2695 (nextRelatedRefPosition->nextRefPosition != nullptr) &&
2696 (nextRelatedRefPosition->nextRefPosition->nextRefPosition == nullptr) &&
2697 (nextRelatedRefPosition->nextRefPosition->nodeLocation <
2698 relatedInterval->relatedInterval->getNextRefLocation()))
2699 {
2700 // The current relatedInterval has only two remaining RefPositions, both of which
2701 // occur prior to the next RefPosition for its relatedInterval.
2702 // It is likely a copy.
2703 relatedInterval = relatedInterval->relatedInterval;
2704 }
2705 }
2706 }
2707
2708 if (relatedInterval != nullptr)
2709 {
2710 // If the related interval already has an assigned register, then use that
2711 // as the related preference. We'll take the related
2712 // interval preferences into account in the loop over all the registers.
2713
2714 if (relatedInterval->assignedReg != nullptr)
2715 {
2716 relatedPreferences = genRegMask(relatedInterval->assignedReg->regNum);
2717 }
2718 else
2719 {
2720 relatedPreferences = relatedInterval->registerPreferences;
2721 }
2722 }
2723
2724 bool preferCalleeSave = currentInterval->preferCalleeSave;
2725
2726 // For floating point, we want to be less aggressive about using callee-save registers.
2727 // So in that case, we just need to ensure that the current RefPosition is covered.
2728 RefPosition* rangeEndRefPosition;
2729 RefPosition* lastRefPosition = currentInterval->lastRefPosition;
2730 if (useFloatReg(currentInterval->registerType))
2731 {
2732 rangeEndRefPosition = refPosition;
2733 }
2734 else
2735 {
2736 rangeEndRefPosition = currentInterval->lastRefPosition;
2737 // If we have a relatedInterval that is not currently occupying a register,
2738 // and whose lifetime begins after this one ends,
2739 // we want to try to select a register that will cover its lifetime.
2740 if ((relatedInterval != nullptr) && (relatedInterval->assignedReg == nullptr) &&
2741 (relatedInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation))
2742 {
2743 lastRefPosition = relatedInterval->lastRefPosition;
2744 preferCalleeSave = relatedInterval->preferCalleeSave;
2745 }
2746 }
2747
2748 // If this has a delayed use (due to being used in a rmw position of a
2749 // non-commutative operator), its endLocation is delayed until the "def"
2750 // position, which is one location past the use (getRefEndLocation() takes care of this).
2751 LsraLocation rangeEndLocation = rangeEndRefPosition->getRefEndLocation();
2752 LsraLocation lastLocation = lastRefPosition->getRefEndLocation();
2753 regNumber prevReg = REG_NA;
2754
2755 if (currentInterval->assignedReg)
2756 {
2757 bool useAssignedReg = false;
2758 // This was an interval that was previously allocated to the given
2759 // physical register, and we should try to allocate it to that register
2760 // again, if possible and reasonable.
2761 // Use it preemptively (i.e. before checking other available regs)
2762 // only if it is preferred and available.
2763
2764 RegRecord* regRec = currentInterval->assignedReg;
2765 prevReg = regRec->regNum;
2766 regMaskTP prevRegBit = genRegMask(prevReg);
2767
2768 // Is it in the preferred set of regs?
2769 if ((prevRegBit & preferences) != RBM_NONE)
2770 {
2771 // Is it currently available?
2772 LsraLocation nextPhysRefLoc;
2773 if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType))
2774 {
2775 // If the register is next referenced at this location, only use it if
2776 // this has a fixed reg requirement (i.e. this is the reference that caused
2777 // the FixedReg ref to be created)
2778
2779 if (!regRec->conflictingFixedRegReference(refPosition))
2780 {
2781 useAssignedReg = true;
2782 }
2783 }
2784 }
2785 if (useAssignedReg)
2786 {
2787 regNumber foundReg = prevReg;
2788 assignPhysReg(regRec, currentInterval);
2789 refPosition->registerAssignment = genRegMask(foundReg);
2790 return foundReg;
2791 }
2792 else
2793 {
2794 // Don't keep trying to allocate to this register
2795 currentInterval->assignedReg = nullptr;
2796 }
2797 }
2798
2799 //-------------------------------------------------------------------------
2800 // Register Selection
2801
2802 RegRecord* availablePhysRegInterval = nullptr;
2803 bool unassignInterval = false;
2804
2805 // Each register will receive a score which is the sum of the scoring criteria below.
2806 // These were selected on the assumption that they will have an impact on the "goodness"
2807 // of a register selection, and have been tuned to a certain extent by observing the impact
2808 // of the ordering on asmDiffs. However, there is probably much more room for tuning,
2809 // and perhaps additional criteria.
2810 //
2811 // These are FLAGS (bits) so that we can easily order them and add them together.
2812 // If the scores are equal, but one covers more of the current interval's range,
2813 // then it wins. Otherwise, the one encountered earlier in the regOrder wins.
2814
2815 enum RegisterScore
2816 {
2817 VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register.
2818 COVERS = 0x20, // It is in the interval's preference set and it covers the entire lifetime.
2819 OWN_PREFERENCE = 0x10, // It is in the preference set of this interval.
2820 COVERS_RELATED = 0x08, // It is in the preference set of the related interval and covers the entire lifetime.
2821 RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval.
2822 CALLER_CALLEE = 0x02, // It is in the right "set" for the interval (caller or callee-save).
2823 UNASSIGNED = 0x01, // It is not currently assigned to an inactive interval.
2824 };
2825
2826 int bestScore = 0;
2827
2828 // Compute the best possible score so we can stop looping early if we find it.
2829 // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but
2830 // probably not until we've tuned the order of these criteria. At that point,
2831 // we'll need to avoid the short-circuit if we've got a stress option to reverse
2832 // the selection.
2833 int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE;
2834 if (relatedPreferences != RBM_NONE)
2835 {
2836 bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED;
2837 }
2838
2839 LsraLocation bestLocation = MinLocation;
2840
2841 // In non-debug builds, this will simply get optimized away
2842 bool reverseSelect = false;
2843#ifdef DEBUG
2844 reverseSelect = doReverseSelect();
2845#endif // DEBUG
2846
2847 // An optimization for the common case where there is only one candidate -
2848 // avoid looping over all the other registers
2849
2850 regNumber singleReg = REG_NA;
2851
2852 if (genMaxOneBit(candidates))
2853 {
2854 regOrderSize = 1;
2855 singleReg = genRegNumFromMask(candidates);
2856 regOrder = &singleReg;
2857 }
2858
2859 for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++)
2860 {
2861 regNumber regNum = regOrder[i];
2862 regMaskTP candidateBit = genRegMask(regNum);
2863
2864 if (!(candidates & candidateBit))
2865 {
2866 continue;
2867 }
2868
2869 candidates &= ~candidateBit;
2870
2871 RegRecord* physRegRecord = getRegisterRecord(regNum);
2872
2873 int score = 0;
2874 LsraLocation nextPhysRefLocation = MaxLocation;
2875
2876 // By chance, is this register already holding this interval, as a copyReg or having
2877 // been restored as inactive after a kill?
2878 if (physRegRecord->assignedInterval == currentInterval)
2879 {
2880 availablePhysRegInterval = physRegRecord;
2881 unassignInterval = false;
2882 break;
2883 }
2884
2885 // Find the next RefPosition of the physical register
2886 if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType))
2887 {
2888 continue;
2889 }
2890
2891 // If the register is next referenced at this location, only use it if
2892 // this has a fixed reg requirement (i.e. this is the reference that caused
2893 // the FixedReg ref to be created)
2894
2895 if (physRegRecord->conflictingFixedRegReference(refPosition))
2896 {
2897 continue;
2898 }
2899
2900 // If this is a definition of a constant interval, check to see if its value is already in this register.
2901 if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) &&
2902 isMatchingConstant(physRegRecord, refPosition))
2903 {
2904 score |= VALUE_AVAILABLE;
2905 }
2906
2907 // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that
2908 // we don't think it isn't covering the live range.
2909 // This doesn't handle the case where earlier RefPositions for this Interval are also
2910 // FixedRefs of this regNum, but at least those are only interesting in the case where those
2911 // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg.
2912 if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum))
2913 {
2914 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval, regNum));
2915 nextPhysRefLocation++;
2916 }
2917
2918 if ((candidateBit & preferences) != RBM_NONE)
2919 {
2920 score |= OWN_PREFERENCE;
2921 if (nextPhysRefLocation > rangeEndLocation)
2922 {
2923 score |= COVERS;
2924 }
2925 }
2926 if (relatedInterval != nullptr && (candidateBit & relatedPreferences) != RBM_NONE)
2927 {
2928 score |= RELATED_PREFERENCE;
2929 if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation)
2930 {
2931 score |= COVERS_RELATED;
2932 }
2933 }
2934
2935 // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers
2936 // with the same score. (Note that we haven't changed the original registerAssignment on the RefPosition).
2937 // Overload the RELATED_PREFERENCE value.
2938 else if (candidateBit == refPosition->registerAssignment)
2939 {
2940 score |= RELATED_PREFERENCE;
2941 }
2942
2943 if ((preferCalleeSave && physRegRecord->isCalleeSave) || (!preferCalleeSave && !physRegRecord->isCalleeSave))
2944 {
2945 score |= CALLER_CALLEE;
2946 }
2947
2948 // The register is considered unassigned if it has no assignedInterval, OR
2949 // if its next reference is beyond the range of this interval.
2950 if (!isAssigned(physRegRecord, lastLocation ARM_ARG(currentInterval->registerType)))
2951 {
2952 score |= UNASSIGNED;
2953 }
2954
2955 bool foundBetterCandidate = false;
2956
2957 if (score > bestScore)
2958 {
2959 foundBetterCandidate = true;
2960 }
2961 else if (score == bestScore)
2962 {
2963 // Prefer a register that covers the range.
2964 if (bestLocation <= lastLocation)
2965 {
2966 if (nextPhysRefLocation > bestLocation)
2967 {
2968 foundBetterCandidate = true;
2969 }
2970 }
2971 // If both cover the range, prefer a register that is killed sooner (leaving the longer range register
2972 // available). If both cover the range and also getting killed at the same location, prefer the one which
2973 // is same as previous assignment.
2974 else if (nextPhysRefLocation > lastLocation)
2975 {
2976 if (nextPhysRefLocation < bestLocation)
2977 {
2978 foundBetterCandidate = true;
2979 }
2980 else if (nextPhysRefLocation == bestLocation && prevReg == regNum)
2981 {
2982 foundBetterCandidate = true;
2983 }
2984 }
2985 }
2986
2987#ifdef DEBUG
2988 if (doReverseSelect() && bestScore != 0)
2989 {
2990 foundBetterCandidate = !foundBetterCandidate;
2991 }
2992#endif // DEBUG
2993
2994 if (foundBetterCandidate)
2995 {
2996 bestLocation = nextPhysRefLocation;
2997 availablePhysRegInterval = physRegRecord;
2998 unassignInterval = true;
2999 bestScore = score;
3000 }
3001
3002 // there is no way we can get a better score so break out
3003 if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1)
3004 {
3005 break;
3006 }
3007 }
3008
3009 if (availablePhysRegInterval != nullptr)
3010 {
3011 if (unassignInterval && isAssigned(availablePhysRegInterval ARM_ARG(currentInterval->registerType)))
3012 {
3013 Interval* const intervalToUnassign = availablePhysRegInterval->assignedInterval;
3014 unassignPhysReg(availablePhysRegInterval ARM_ARG(currentInterval->registerType));
3015
3016 if ((bestScore & VALUE_AVAILABLE) != 0 && intervalToUnassign != nullptr)
3017 {
3018 assert(intervalToUnassign->isConstant);
3019 refPosition->treeNode->SetReuseRegVal();
3020 }
3021 // If we considered this "unassigned" because this interval's lifetime ends before
3022 // the next ref, remember it.
3023 else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr)
3024 {
3025 updatePreviousInterval(availablePhysRegInterval, intervalToUnassign, intervalToUnassign->registerType);
3026 }
3027 }
3028 else
3029 {
3030 assert((bestScore & VALUE_AVAILABLE) == 0);
3031 }
3032 assignPhysReg(availablePhysRegInterval, currentInterval);
3033 foundReg = availablePhysRegInterval->regNum;
3034 regMaskTP foundRegMask = genRegMask(foundReg);
3035 refPosition->registerAssignment = foundRegMask;
3036 if (relatedInterval != nullptr)
3037 {
3038 relatedInterval->updateRegisterPreferences(foundRegMask);
3039 }
3040 }
3041
3042 return foundReg;
3043}
3044
3045//------------------------------------------------------------------------
3046// canSpillReg: Determine whether we can spill physRegRecord
3047//
3048// Arguments:
3049// physRegRecord - reg to spill
3050// refLocation - Location of RefPosition where this register will be spilled
3051// recentAssignedRefWeight - Weight of recent assigned RefPosition which will be determined in this function
3052// farthestRefPosWeight - Current farthestRefPosWeight at allocateBusyReg()
3053//
3054// Return Value:
3055// True - if we can spill physRegRecord
3056// False - otherwise
3057//
3058// Note: This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg()
3059//
3060bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight)
3061{
3062 assert(physRegRecord->assignedInterval != nullptr);
3063 RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition;
3064
3065 if (recentAssignedRef != nullptr)
3066 {
3067 if (isRefPositionActive(recentAssignedRef, refLocation))
3068 {
3069 // We can't spill a register that's active at the current location
3070 return false;
3071 }
3072
3073 // We don't prefer to spill a register if the weight of recentAssignedRef > weight
3074 // of the spill candidate found so far. We would consider spilling a greater weight
3075 // ref position only if the refPosition being allocated must need a reg.
3076 *recentAssignedRefWeight = getWeight(recentAssignedRef);
3077 }
3078 return true;
3079}
3080
3081#ifdef _TARGET_ARM_
3082//------------------------------------------------------------------------
3083// canSpillDoubleReg: Determine whether we can spill physRegRecord
3084//
3085// Arguments:
3086// physRegRecord - reg to spill (must be a valid double register)
3087// refLocation - Location of RefPosition where this register will be spilled
3088// recentAssignedRefWeight - Weight of recent assigned RefPosition which will be determined in this function
3089//
3090// Return Value:
3091// True - if we can spill physRegRecord
3092// False - otherwise
3093//
3094// Notes:
3095// This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg().
3096// The recentAssignedRefWeight is not updated if either register cannot be spilled.
3097//
3098bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord,
3099 LsraLocation refLocation,
3100 unsigned* recentAssignedRefWeight)
3101{
3102 assert(genIsValidDoubleReg(physRegRecord->regNum));
3103 bool retVal = true;
3104 unsigned weight = BB_ZERO_WEIGHT;
3105 unsigned weight2 = BB_ZERO_WEIGHT;
3106
3107 RegRecord* physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
3108
3109 if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation, &weight))
3110 {
3111 return false;
3112 }
3113 if (physRegRecord2->assignedInterval != nullptr)
3114 {
3115 if (!canSpillReg(physRegRecord2, refLocation, &weight2))
3116 {
3117 return false;
3118 }
3119 if (weight2 > weight)
3120 {
3121 weight = weight2;
3122 }
3123 }
3124 *recentAssignedRefWeight = weight;
3125 return true;
3126}
3127#endif
3128
3129#ifdef _TARGET_ARM_
3130//------------------------------------------------------------------------
3131// unassignDoublePhysReg: unassign a double register (pair)
3132//
3133// Arguments:
3134// doubleRegRecord - reg to unassign
3135//
3136// Note:
3137// The given RegRecord must be a valid (even numbered) double register.
3138//
3139void LinearScan::unassignDoublePhysReg(RegRecord* doubleRegRecord)
3140{
3141 assert(genIsValidDoubleReg(doubleRegRecord->regNum));
3142
3143 RegRecord* doubleRegRecordLo = doubleRegRecord;
3144 RegRecord* doubleRegRecordHi = findAnotherHalfRegRec(doubleRegRecordLo);
3145 // For a double register, we has following four cases.
3146 // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval
3147 // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals
3148 // Case 3: doubelRegRecLo is assgined to TYP_FLOAT interval and doubleRegRecHi is nullptr
3149 // Case 4: doubleRegRecordLo is nullptr, and doubleRegRecordHi is assigned to a TYP_FLOAT interval
3150 if (doubleRegRecordLo->assignedInterval != nullptr)
3151 {
3152 if (doubleRegRecordLo->assignedInterval->registerType == TYP_DOUBLE)
3153 {
3154 // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval
3155 unassignPhysReg(doubleRegRecordLo, doubleRegRecordLo->assignedInterval->recentRefPosition);
3156 }
3157 else
3158 {
3159 // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals
3160 // Case 3: doubelRegRecLo is assgined to TYP_FLOAT interval and doubleRegRecHi is nullptr
3161 assert(doubleRegRecordLo->assignedInterval->registerType == TYP_FLOAT);
3162 unassignPhysReg(doubleRegRecordLo, doubleRegRecordLo->assignedInterval->recentRefPosition);
3163
3164 if (doubleRegRecordHi != nullptr)
3165 {
3166 if (doubleRegRecordHi->assignedInterval != nullptr)
3167 {
3168 assert(doubleRegRecordHi->assignedInterval->registerType == TYP_FLOAT);
3169 unassignPhysReg(doubleRegRecordHi, doubleRegRecordHi->assignedInterval->recentRefPosition);
3170 }
3171 }
3172 }
3173 }
3174 else
3175 {
3176 // Case 4: doubleRegRecordLo is nullptr, and doubleRegRecordHi is assigned to a TYP_FLOAT interval
3177 assert(doubleRegRecordHi->assignedInterval != nullptr);
3178 assert(doubleRegRecordHi->assignedInterval->registerType == TYP_FLOAT);
3179 unassignPhysReg(doubleRegRecordHi, doubleRegRecordHi->assignedInterval->recentRefPosition);
3180 }
3181}
3182
3183#endif // _TARGET_ARM_
3184
3185//------------------------------------------------------------------------
3186// isRefPositionActive: Determine whether a given RefPosition is active at the given location
3187//
3188// Arguments:
3189// refPosition - the RefPosition of interest
3190// refLocation - the LsraLocation at which we want to know if it is active
3191//
3192// Return Value:
3193// True - if this RefPosition occurs at the given location, OR
3194// if it occurs at the previous location and is marked delayRegFree.
3195// False - otherwise
3196//
3197bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation)
3198{
3199 return (refPosition->nodeLocation == refLocation ||
3200 ((refPosition->nodeLocation + 1 == refLocation) && refPosition->delayRegFree));
3201}
3202
3203//----------------------------------------------------------------------------------------
3204// isRegInUse: Test whether regRec is being used at the refPosition
3205//
3206// Arguments:
3207// regRec - A register to be tested
3208// refPosition - RefPosition where regRec is tested
3209//
3210// Return Value:
3211// True - if regRec is being used
3212// False - otherwise
3213//
3214// Notes:
3215// This helper is designed to be used only from allocateBusyReg(), where:
3216// - This register was *not* found when looking for a free register, and
3217// - The caller must have already checked for the case where 'refPosition' is a fixed ref
3218// (asserted at the beginning of this method).
3219//
3220bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition)
3221{
3222 // We shouldn't reach this check if 'refPosition' is a FixedReg of this register.
3223 assert(!refPosition->isFixedRefOfReg(regRec->regNum));
3224 Interval* assignedInterval = regRec->assignedInterval;
3225 if (assignedInterval != nullptr)
3226 {
3227 if (!assignedInterval->isActive)
3228 {
3229 // This can only happen if we have a recentRefPosition active at this location that hasn't yet been freed.
3230 CLANG_FORMAT_COMMENT_ANCHOR;
3231
3232 if (isRefPositionActive(assignedInterval->recentRefPosition, refPosition->nodeLocation))
3233 {
3234 return true;
3235 }
3236 else
3237 {
3238#ifdef _TARGET_ARM_
3239 // In the case of TYP_DOUBLE, we may have the case where 'assignedInterval' is inactive,
3240 // but the other half register is active. If so, it must be have an active recentRefPosition,
3241 // as above.
3242 if (refPosition->getInterval()->registerType == TYP_DOUBLE)
3243 {
3244 RegRecord* otherHalfRegRec = findAnotherHalfRegRec(regRec);
3245 if (!otherHalfRegRec->assignedInterval->isActive)
3246 {
3247 if (isRefPositionActive(otherHalfRegRec->assignedInterval->recentRefPosition,
3248 refPosition->nodeLocation))
3249 {
3250 return true;
3251 }
3252 else
3253 {
3254 assert(!"Unexpected inactive assigned interval in isRegInUse");
3255 return true;
3256 }
3257 }
3258 }
3259 else
3260#endif
3261 {
3262 assert(!"Unexpected inactive assigned interval in isRegInUse");
3263 return true;
3264 }
3265 }
3266 }
3267 RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition();
3268
3269 // We should never spill a register that's occupied by an Interval with its next use at the current
3270 // location.
3271 // Normally this won't occur (unless we actually had more uses in a single node than there are registers),
3272 // because we'll always find something with a later nextLocation, but it can happen in stress when
3273 // we have LSRA_SELECT_NEAREST.
3274 if ((nextAssignedRef != nullptr) && isRefPositionActive(nextAssignedRef, refPosition->nodeLocation) &&
3275 nextAssignedRef->RequiresRegister())
3276 {
3277 return true;
3278 }
3279 }
3280 return false;
3281}
3282
3283//------------------------------------------------------------------------
3284// isSpillCandidate: Determine if a register is a spill candidate for a given RefPosition.
3285//
3286// Arguments:
3287// current The interval for the current allocation
3288// refPosition The RefPosition of the current Interval for which a register is being allocated
3289// physRegRecord The RegRecord for the register we're considering for spill
3290// nextLocation An out (reference) parameter in which the next use location of the
3291// given RegRecord will be returned.
3292//
3293// Return Value:
3294// True iff the given register can be spilled to accommodate the given RefPosition.
3295//
3296bool LinearScan::isSpillCandidate(Interval* current,
3297 RefPosition* refPosition,
3298 RegRecord* physRegRecord,
3299 LsraLocation& nextLocation)
3300{
3301 regMaskTP candidateBit = genRegMask(physRegRecord->regNum);
3302 LsraLocation refLocation = refPosition->nodeLocation;
3303 if (physRegRecord->isBusyUntilNextKill)
3304 {
3305 return false;
3306 }
3307 Interval* assignedInterval = physRegRecord->assignedInterval;
3308 if (assignedInterval != nullptr)
3309 {
3310 nextLocation = assignedInterval->getNextRefLocation();
3311 }
3312#ifdef _TARGET_ARM_
3313 RegRecord* physRegRecord2 = nullptr;
3314 Interval* assignedInterval2 = nullptr;
3315
3316 // For ARM32, a double occupies a consecutive even/odd pair of float registers.
3317 if (current->registerType == TYP_DOUBLE)
3318 {
3319 assert(genIsValidDoubleReg(physRegRecord->regNum));
3320 physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
3321 if (physRegRecord2->isBusyUntilNextKill)
3322 {
3323 return false;
3324 }
3325 assignedInterval2 = physRegRecord2->assignedInterval;
3326 if ((assignedInterval2 != nullptr) && (assignedInterval2->getNextRefLocation() > nextLocation))
3327 {
3328 nextLocation = assignedInterval2->getNextRefLocation();
3329 }
3330 }
3331#endif
3332
3333 // If there is a fixed reference at the same location (and it's not due to this reference),
3334 // don't use it.
3335 if (physRegRecord->conflictingFixedRegReference(refPosition))
3336 {
3337 return false;
3338 }
3339
3340 if (refPosition->isFixedRefOfRegMask(candidateBit))
3341 {
3342 // Either:
3343 // - there is a fixed reference due to this node, OR
3344 // - or there is a fixed use fed by a def at this node, OR
3345 // - or we have restricted the set of registers for stress.
3346 // In any case, we must use this register as it's the only candidate
3347 // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going
3348 // to remain live until the use, we should set the candidates to allRegs(regType)
3349 // to avoid a spill - codegen can then insert the copy.
3350 // If this is marked as allocateIfProfitable, the caller will compare the weights
3351 // of this RefPosition and the RefPosition to which it is currently assigned.
3352 assert(refPosition->isFixedRegRef ||
3353 (refPosition->nextRefPosition != nullptr && refPosition->nextRefPosition->isFixedRegRef) ||
3354 candidatesAreStressLimited());
3355 return true;
3356 }
3357
3358 // If this register is not assigned to an interval, either
3359 // - it has a FixedReg reference at the current location that is not this reference, OR
3360 // - this is the special case of a fixed loReg, where this interval has a use at the same location
3361 // In either case, we cannot use it
3362 CLANG_FORMAT_COMMENT_ANCHOR;
3363
3364#ifdef _TARGET_ARM_
3365 if (assignedInterval == nullptr && assignedInterval2 == nullptr)
3366#else
3367 if (assignedInterval == nullptr)
3368#endif
3369 {
3370 RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition();
3371 assert((nextPhysRegPosition != nullptr) && (nextPhysRegPosition->nodeLocation == refLocation) &&
3372 (candidateBit != refPosition->registerAssignment));
3373 return false;
3374 }
3375
3376 if (isRegInUse(physRegRecord, refPosition))
3377 {
3378 return false;
3379 }
3380
3381#ifdef _TARGET_ARM_
3382 if (current->registerType == TYP_DOUBLE)
3383 {
3384 if (isRegInUse(physRegRecord2, refPosition))
3385 {
3386 return false;
3387 }
3388 }
3389#endif
3390 return true;
3391}
3392
3393//------------------------------------------------------------------------
3394// allocateBusyReg: Find a busy register that satisfies the requirements for refPosition,
3395// and that can be spilled.
3396//
3397// Arguments:
3398// current The interval for the current allocation
3399// refPosition The RefPosition of the current Interval for which a register is being allocated
3400// allocateIfProfitable If true, a reg may not be allocated if all other ref positions currently
3401// occupying registers are more important than the 'refPosition'.
3402//
3403// Return Value:
3404// The regNumber allocated to the RefPositon. Returns REG_NA if no free register is found.
3405//
3406// Note: Currently this routine uses weight and farthest distance of next reference
3407// to select a ref position for spilling.
3408// a) if allocateIfProfitable = false
3409// The ref position chosen for spilling will be the lowest weight
3410// of all and if there is is more than one ref position with the
3411// same lowest weight, among them choses the one with farthest
3412// distance to its next reference.
3413//
3414// b) if allocateIfProfitable = true
3415// The ref position chosen for spilling will not only be lowest weight
3416// of all but also has a weight lower than 'refPosition'. If there is
3417// no such ref position, reg will not be allocated.
3418//
3419regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable)
3420{
3421 regNumber foundReg = REG_NA;
3422
3423 RegisterType regType = getRegisterType(current, refPosition);
3424 regMaskTP candidates = refPosition->registerAssignment;
3425 regMaskTP preferences = (current->registerPreferences & candidates);
3426 if (preferences == RBM_NONE)
3427 {
3428 preferences = candidates;
3429 }
3430 if (candidates == RBM_NONE)
3431 {
3432 // This assumes only integer and floating point register types
3433 // if we target a processor with additional register types,
3434 // this would have to change
3435 candidates = allRegs(regType);
3436 }
3437
3438#ifdef DEBUG
3439 candidates = stressLimitRegs(refPosition, candidates);
3440#endif // DEBUG
3441
3442 // TODO-CQ: Determine whether/how to take preferences into account in addition to
3443 // prefering the one with the furthest ref position when considering
3444 // a candidate to spill
3445 RegRecord* farthestRefPhysRegRecord = nullptr;
3446#ifdef _TARGET_ARM_
3447 RegRecord* farthestRefPhysRegRecord2 = nullptr;
3448#endif
3449 LsraLocation farthestLocation = MinLocation;
3450 LsraLocation refLocation = refPosition->nodeLocation;
3451 unsigned farthestRefPosWeight;
3452 if (allocateIfProfitable)
3453 {
3454 // If allocating a reg is optional, we will consider those ref positions
3455 // whose weight is less than 'refPosition' for spilling.
3456 farthestRefPosWeight = getWeight(refPosition);
3457 }
3458 else
3459 {
3460 // If allocating a reg is a must, we start off with max weight so
3461 // that the first spill candidate will be selected based on
3462 // farthest distance alone. Since we start off with farthestLocation
3463 // initialized to MinLocation, the first available ref position
3464 // will be selected as spill candidate and its weight as the
3465 // fathestRefPosWeight.
3466 farthestRefPosWeight = BB_MAX_WEIGHT;
3467 }
3468
3469 for (regNumber regNum : Registers(regType))
3470 {
3471 regMaskTP candidateBit = genRegMask(regNum);
3472 if (!(candidates & candidateBit))
3473 {
3474 continue;
3475 }
3476 RegRecord* physRegRecord = getRegisterRecord(regNum);
3477 RegRecord* physRegRecord2 = nullptr; // only used for _TARGET_ARM_
3478 LsraLocation nextLocation = MinLocation;
3479 LsraLocation physRegNextLocation;
3480 if (!isSpillCandidate(current, refPosition, physRegRecord, nextLocation))
3481 {
3482 assert(candidates != candidateBit);
3483 continue;
3484 }
3485
3486 // We've passed the preliminary checks for a spill candidate.
3487 // Now, if we have a recentAssignedRef, check that it is going to be OK to spill it.
3488 Interval* assignedInterval = physRegRecord->assignedInterval;
3489 unsigned recentAssignedRefWeight = BB_ZERO_WEIGHT;
3490 RefPosition* recentAssignedRef = nullptr;
3491 RefPosition* recentAssignedRef2 = nullptr;
3492#ifdef _TARGET_ARM_
3493 if (current->registerType == TYP_DOUBLE)
3494 {
3495 recentAssignedRef = (assignedInterval == nullptr) ? nullptr : assignedInterval->recentRefPosition;
3496 physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
3497 Interval* assignedInterval2 = physRegRecord2->assignedInterval;
3498 recentAssignedRef2 = (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->recentRefPosition;
3499 if (!canSpillDoubleReg(physRegRecord, refLocation, &recentAssignedRefWeight))
3500 {
3501 continue;
3502 }
3503 }
3504 else
3505#endif
3506 {
3507 recentAssignedRef = assignedInterval->recentRefPosition;
3508 if (!canSpillReg(physRegRecord, refLocation, &recentAssignedRefWeight))
3509 {
3510 continue;
3511 }
3512 }
3513 if (recentAssignedRefWeight > farthestRefPosWeight)
3514 {
3515 continue;
3516 }
3517
3518 physRegNextLocation = physRegRecord->getNextRefLocation();
3519 if (nextLocation > physRegNextLocation)
3520 {
3521 nextLocation = physRegNextLocation;
3522 }
3523
3524 bool isBetterLocation;
3525
3526#ifdef DEBUG
3527 if (doSelectNearest() && farthestRefPhysRegRecord != nullptr)
3528 {
3529 isBetterLocation = (nextLocation <= farthestLocation);
3530 }
3531 else
3532#endif
3533 // This if-stmt is associated with the above else
3534 if (recentAssignedRefWeight < farthestRefPosWeight)
3535 {
3536 isBetterLocation = true;
3537 }
3538 else
3539 {
3540 // This would mean the weight of spill ref position we found so far is equal
3541 // to the weight of the ref position that is being evaluated. In this case
3542 // we prefer to spill ref position whose distance to its next reference is
3543 // the farthest.
3544 assert(recentAssignedRefWeight == farthestRefPosWeight);
3545
3546 // If allocateIfProfitable=true, the first spill candidate selected
3547 // will be based on weight alone. After we have found a spill
3548 // candidate whose weight is less than the 'refPosition', we will
3549 // consider farthest distance when there is a tie in weights.
3550 // This is to ensure that we don't spill a ref position whose
3551 // weight is equal to weight of 'refPosition'.
3552 if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr)
3553 {
3554 isBetterLocation = false;
3555 }
3556 else
3557 {
3558 isBetterLocation = (nextLocation > farthestLocation);
3559
3560 if (nextLocation > farthestLocation)
3561 {
3562 isBetterLocation = true;
3563 }
3564 else if (nextLocation == farthestLocation)
3565 {
3566 // Both weight and distance are equal.
3567 // Prefer that ref position which is marked both reload and
3568 // allocate if profitable. These ref positions don't need
3569 // need to be spilled as they are already in memory and
3570 // codegen considers them as contained memory operands.
3571 CLANG_FORMAT_COMMENT_ANCHOR;
3572#ifdef _TARGET_ARM_
3573 // TODO-CQ-ARM: Just conservatively "and" two condition. We may implement better condision later.
3574 isBetterLocation = true;
3575 if (recentAssignedRef != nullptr)
3576 isBetterLocation &= (recentAssignedRef->reload && recentAssignedRef->AllocateIfProfitable());
3577
3578 if (recentAssignedRef2 != nullptr)
3579 isBetterLocation &= (recentAssignedRef2->reload && recentAssignedRef2->AllocateIfProfitable());
3580#else
3581 isBetterLocation = (recentAssignedRef != nullptr) && recentAssignedRef->reload &&
3582 recentAssignedRef->AllocateIfProfitable();
3583#endif
3584 }
3585 else
3586 {
3587 isBetterLocation = false;
3588 }
3589 }
3590 }
3591
3592 if (isBetterLocation)
3593 {
3594 farthestLocation = nextLocation;
3595 farthestRefPhysRegRecord = physRegRecord;
3596#ifdef _TARGET_ARM_
3597 farthestRefPhysRegRecord2 = physRegRecord2;
3598#endif
3599 farthestRefPosWeight = recentAssignedRefWeight;
3600 }
3601 }
3602
3603#if DEBUG
3604 if (allocateIfProfitable)
3605 {
3606 // There may not be a spill candidate or if one is found
3607 // its weight must be less than the weight of 'refPosition'
3608 assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition)));
3609 }
3610 else
3611 {
3612 // Must have found a spill candidate.
3613 assert(farthestRefPhysRegRecord != nullptr);
3614
3615 if (farthestLocation == refLocation)
3616 {
3617 // This must be a RefPosition that is constrained to use a single register, either directly,
3618 // or at the use, or by stress.
3619 bool isConstrained = (refPosition->isFixedRegRef || (refPosition->nextRefPosition != nullptr &&
3620 refPosition->nextRefPosition->isFixedRegRef) ||
3621 candidatesAreStressLimited());
3622 if (!isConstrained)
3623 {
3624#ifdef _TARGET_ARM_
3625 Interval* assignedInterval =
3626 (farthestRefPhysRegRecord == nullptr) ? nullptr : farthestRefPhysRegRecord->assignedInterval;
3627 Interval* assignedInterval2 =
3628 (farthestRefPhysRegRecord2 == nullptr) ? nullptr : farthestRefPhysRegRecord2->assignedInterval;
3629 RefPosition* nextRefPosition =
3630 (assignedInterval == nullptr) ? nullptr : assignedInterval->getNextRefPosition();
3631 RefPosition* nextRefPosition2 =
3632 (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->getNextRefPosition();
3633 if (nextRefPosition != nullptr)
3634 {
3635 if (nextRefPosition2 != nullptr)
3636 {
3637 assert(!nextRefPosition->RequiresRegister() || !nextRefPosition2->RequiresRegister());
3638 }
3639 else
3640 {
3641 assert(!nextRefPosition->RequiresRegister());
3642 }
3643 }
3644 else
3645 {
3646 assert(nextRefPosition2 != nullptr && !nextRefPosition2->RequiresRegister());
3647 }
3648#else // !_TARGET_ARM_
3649 Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval;
3650 RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
3651 assert(!nextRefPosition->RequiresRegister());
3652#endif // !_TARGET_ARM_
3653 }
3654 }
3655 else
3656 {
3657 assert(farthestLocation > refLocation);
3658 }
3659 }
3660#endif // DEBUG
3661
3662 if (farthestRefPhysRegRecord != nullptr)
3663 {
3664 foundReg = farthestRefPhysRegRecord->regNum;
3665
3666#ifdef _TARGET_ARM_
3667 if (current->registerType == TYP_DOUBLE)
3668 {
3669 assert(genIsValidDoubleReg(foundReg));
3670 unassignDoublePhysReg(farthestRefPhysRegRecord);
3671 }
3672 else
3673#endif
3674 {
3675 unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
3676 }
3677
3678 assignPhysReg(farthestRefPhysRegRecord, current);
3679 refPosition->registerAssignment = genRegMask(foundReg);
3680 }
3681 else
3682 {
3683 foundReg = REG_NA;
3684 refPosition->registerAssignment = RBM_NONE;
3685 }
3686
3687 return foundReg;
3688}
3689
3690// Grab a register to use to copy and then immediately use.
3691// This is called only for localVar intervals that already have a register
3692// assignment that is not compatible with the current RefPosition.
3693// This is not like regular assignment, because we don't want to change
3694// any preferences or existing register assignments.
3695// Prefer a free register that's got the earliest next use.
3696// Otherwise, spill something with the farthest next use
3697//
3698regNumber LinearScan::assignCopyReg(RefPosition* refPosition)
3699{
3700 Interval* currentInterval = refPosition->getInterval();
3701 assert(currentInterval != nullptr);
3702 assert(currentInterval->isActive);
3703
3704 bool foundFreeReg = false;
3705 RegRecord* bestPhysReg = nullptr;
3706 LsraLocation bestLocation = MinLocation;
3707 regMaskTP candidates = refPosition->registerAssignment;
3708
3709 // Save the relatedInterval, if any, so that it doesn't get modified during allocation.
3710 Interval* savedRelatedInterval = currentInterval->relatedInterval;
3711 currentInterval->relatedInterval = nullptr;
3712
3713 // We don't want really want to change the default assignment,
3714 // so 1) pretend this isn't active, and 2) remember the old reg
3715 regNumber oldPhysReg = currentInterval->physReg;
3716 RegRecord* oldRegRecord = currentInterval->assignedReg;
3717 assert(oldRegRecord->regNum == oldPhysReg);
3718 currentInterval->isActive = false;
3719
3720 regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition);
3721 if (allocatedReg == REG_NA)
3722 {
3723 allocatedReg = allocateBusyReg(currentInterval, refPosition, false);
3724 }
3725
3726 // Now restore the old info
3727 currentInterval->relatedInterval = savedRelatedInterval;
3728 currentInterval->physReg = oldPhysReg;
3729 currentInterval->assignedReg = oldRegRecord;
3730 currentInterval->isActive = true;
3731
3732 refPosition->copyReg = true;
3733 return allocatedReg;
3734}
3735
3736//------------------------------------------------------------------------
3737// isAssigned: This is the function to check if the given RegRecord has an assignedInterval
3738// regardless of lastLocation.
3739// So it would be call isAssigned() with Maxlocation value.
3740//
3741// Arguments:
3742// regRec - The RegRecord to check that it is assigned.
3743// newRegType - There are elements to judge according to the upcoming register type.
3744//
3745// Return Value:
3746// Returns true if the given RegRecord has an assignedInterval.
3747//
3748// Notes:
3749// There is the case to check if the RegRecord has an assignedInterval regardless of Lastlocation.
3750//
3751bool LinearScan::isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType))
3752{
3753 return isAssigned(regRec, MaxLocation ARM_ARG(newRegType));
3754}
3755
3756//------------------------------------------------------------------------
3757// isAssigned: Check whether the given RegRecord has an assignedInterval
3758// that has a reference prior to the given location.
3759//
3760// Arguments:
3761// regRec - The RegRecord of interest
3762// lastLocation - The LsraLocation up to which we want to check
3763// newRegType - The `RegisterType` of interval we want to check
3764// (this is for the purposes of checking the other half of a TYP_DOUBLE RegRecord)
3765//
3766// Return value:
3767// Returns true if the given RegRecord (and its other half, if TYP_DOUBLE) has an assignedInterval
3768// that is referenced prior to the given location
3769//
3770// Notes:
3771// The register is not considered to be assigned if it has no assignedInterval, or that Interval's
3772// next reference is beyond lastLocation
3773//
3774bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType))
3775{
3776 Interval* assignedInterval = regRec->assignedInterval;
3777
3778 if ((assignedInterval == nullptr) || assignedInterval->getNextRefLocation() > lastLocation)
3779 {
3780#ifdef _TARGET_ARM_
3781 if (newRegType == TYP_DOUBLE)
3782 {
3783 RegRecord* anotherRegRec = findAnotherHalfRegRec(regRec);
3784
3785 if ((anotherRegRec->assignedInterval == nullptr) ||
3786 (anotherRegRec->assignedInterval->getNextRefLocation() > lastLocation))
3787 {
3788 // In case the newRegType is a double register,
3789 // the score would be set UNASSIGNED if another register is also not set.
3790 return false;
3791 }
3792 }
3793 else
3794#endif
3795 {
3796 return false;
3797 }
3798 }
3799
3800 return true;
3801}
3802
3803// Check if the interval is already assigned and if it is then unassign the physical record
3804// then set the assignedInterval to 'interval'
3805//
3806void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval)
3807{
3808 Interval* assignedInterval = regRec->assignedInterval;
3809 if (assignedInterval != nullptr && assignedInterval != interval)
3810 {
3811 // This is allocated to another interval. Either it is inactive, or it was allocated as a
3812 // copyReg and is therefore not the "assignedReg" of the other interval. In the latter case,
3813 // we simply unassign it - in the former case we need to set the physReg on the interval to
3814 // REG_NA to indicate that it is no longer in that register.
3815 // The lack of checking for this case resulted in an assert in the retail version of System.dll,
3816 // in method SerialStream.GetDcbFlag.
3817 // Note that we can't check for the copyReg case, because we may have seen a more recent
3818 // RefPosition for the Interval that was NOT a copyReg.
3819 if (assignedInterval->assignedReg == regRec)
3820 {
3821 assert(assignedInterval->isActive == false);
3822 assignedInterval->physReg = REG_NA;
3823 }
3824 unassignPhysReg(regRec->regNum);
3825 }
3826#ifdef _TARGET_ARM_
3827 // If 'interval' and 'assignedInterval' were both TYP_DOUBLE, then we have unassigned 'assignedInterval'
3828 // from both halves. Otherwise, if 'interval' is TYP_DOUBLE, we now need to unassign the other half.
3829 if ((interval->registerType == TYP_DOUBLE) &&
3830 ((assignedInterval == nullptr) || (assignedInterval->registerType == TYP_FLOAT)))
3831 {
3832 RegRecord* otherRegRecord = getSecondHalfRegRec(regRec);
3833 assignedInterval = otherRegRecord->assignedInterval;
3834 if (assignedInterval != nullptr && assignedInterval != interval)
3835 {
3836 if (assignedInterval->assignedReg == otherRegRecord)
3837 {
3838 assert(assignedInterval->isActive == false);
3839 assignedInterval->physReg = REG_NA;
3840 }
3841 unassignPhysReg(otherRegRecord->regNum);
3842 }
3843 }
3844#endif
3845
3846 updateAssignedInterval(regRec, interval, interval->registerType);
3847}
3848
3849// Assign the given physical register interval to the given interval
3850void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
3851{
3852 regMaskTP assignedRegMask = genRegMask(regRec->regNum);
3853 compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true));
3854
3855 checkAndAssignInterval(regRec, interval);
3856 interval->assignedReg = regRec;
3857
3858 interval->physReg = regRec->regNum;
3859 interval->isActive = true;
3860 if (interval->isLocalVar)
3861 {
3862 // Prefer this register for future references
3863 interval->updateRegisterPreferences(assignedRegMask);
3864 }
3865}
3866
3867//------------------------------------------------------------------------
3868// setIntervalAsSplit: Set this Interval as being split
3869//
3870// Arguments:
3871// interval - The Interval which is being split
3872//
3873// Return Value:
3874// None.
3875//
3876// Notes:
3877// The given Interval will be marked as split, and it will be added to the
3878// set of splitOrSpilledVars.
3879//
3880// Assumptions:
3881// "interval" must be a lclVar interval, as tree temps are never split.
3882// This is asserted in the call to getVarIndex().
3883//
3884void LinearScan::setIntervalAsSplit(Interval* interval)
3885{
3886 if (interval->isLocalVar)
3887 {
3888 unsigned varIndex = interval->getVarIndex(compiler);
3889 if (!interval->isSplit)
3890 {
3891 VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
3892 }
3893 else
3894 {
3895 assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
3896 }
3897 }
3898 interval->isSplit = true;
3899}
3900
3901//------------------------------------------------------------------------
3902// setIntervalAsSpilled: Set this Interval as being spilled
3903//
3904// Arguments:
3905// interval - The Interval which is being spilled
3906//
3907// Return Value:
3908// None.
3909//
3910// Notes:
3911// The given Interval will be marked as spilled, and it will be added
3912// to the set of splitOrSpilledVars.
3913//
3914void LinearScan::setIntervalAsSpilled(Interval* interval)
3915{
3916 if (interval->isLocalVar)
3917 {
3918 unsigned varIndex = interval->getVarIndex(compiler);
3919 if (!interval->isSpilled)
3920 {
3921 VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
3922 }
3923 else
3924 {
3925 assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
3926 }
3927 }
3928 interval->isSpilled = true;
3929}
3930
3931//------------------------------------------------------------------------
3932// spill: Spill this Interval between "fromRefPosition" and "toRefPosition"
3933//
3934// Arguments:
3935// fromRefPosition - The RefPosition at which the Interval is to be spilled
3936// toRefPosition - The RefPosition at which it must be reloaded
3937//
3938// Return Value:
3939// None.
3940//
3941// Assumptions:
3942// fromRefPosition and toRefPosition must not be null
3943//
3944void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition)
3945{
3946 assert(fromRefPosition != nullptr && toRefPosition != nullptr);
3947 assert(fromRefPosition->getInterval() == interval && toRefPosition->getInterval() == interval);
3948 assert(fromRefPosition->nextRefPosition == toRefPosition);
3949
3950 if (!fromRefPosition->lastUse)
3951 {
3952 // If not allocated a register, Lcl var def/use ref positions even if reg optional
3953 // should be marked as spillAfter.
3954 if (!fromRefPosition->RequiresRegister() && !(interval->isLocalVar && fromRefPosition->IsActualRef()))
3955 {
3956 fromRefPosition->registerAssignment = RBM_NONE;
3957 }
3958 else
3959 {
3960 fromRefPosition->spillAfter = true;
3961 }
3962 }
3963 assert(toRefPosition != nullptr);
3964
3965#ifdef DEBUG
3966 if (VERBOSE)
3967 {
3968 dumpLsraAllocationEvent(LSRA_EVENT_SPILL, interval);
3969 }
3970#endif // DEBUG
3971
3972 INTRACK_STATS(updateLsraStat(LSRA_STAT_SPILL, fromRefPosition->bbNum));
3973
3974 interval->isActive = false;
3975 setIntervalAsSpilled(interval);
3976
3977 // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack
3978 // on entry to this block.
3979 if (fromRefPosition->nodeLocation <= curBBStartLocation)
3980 {
3981 // This must be a lclVar interval
3982 assert(interval->isLocalVar);
3983 setInVarRegForBB(curBBNum, interval->varNum, REG_STK);
3984 }
3985}
3986
3987//------------------------------------------------------------------------
3988// unassignPhysRegNoSpill: Unassign the given physical register record from
3989// an active interval, without spilling.
3990//
3991// Arguments:
3992// regRec - the RegRecord to be unasssigned
3993//
3994// Return Value:
3995// None.
3996//
3997// Assumptions:
3998// The assignedInterval must not be null, and must be active.
3999//
4000// Notes:
4001// This method is used to unassign a register when an interval needs to be moved to a
4002// different register, but not (yet) spilled.
4003
4004void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec)
4005{
4006 Interval* assignedInterval = regRec->assignedInterval;
4007 assert(assignedInterval != nullptr && assignedInterval->isActive);
4008 assignedInterval->isActive = false;
4009 unassignPhysReg(regRec, nullptr);
4010 assignedInterval->isActive = true;
4011}
4012
4013//------------------------------------------------------------------------
4014// checkAndClearInterval: Clear the assignedInterval for the given
4015// physical register record
4016//
4017// Arguments:
4018// regRec - the physical RegRecord to be unasssigned
4019// spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
4020// or nullptr if we aren't spilling
4021//
4022// Return Value:
4023// None.
4024//
4025// Assumptions:
4026// see unassignPhysReg
4027//
4028void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition)
4029{
4030 Interval* assignedInterval = regRec->assignedInterval;
4031 assert(assignedInterval != nullptr);
4032 regNumber thisRegNum = regRec->regNum;
4033
4034 if (spillRefPosition == nullptr)
4035 {
4036 // Note that we can't assert for the copyReg case
4037 //
4038 if (assignedInterval->physReg == thisRegNum)
4039 {
4040 assert(assignedInterval->isActive == false);
4041 }
4042 }
4043 else
4044 {
4045 assert(spillRefPosition->getInterval() == assignedInterval);
4046 }
4047
4048 updateAssignedInterval(regRec, nullptr, assignedInterval->registerType);
4049}
4050
4051//------------------------------------------------------------------------
4052// unassignPhysReg: Unassign the given physical register record, and spill the
4053// assignedInterval at the given spillRefPosition, if any.
4054//
4055// Arguments:
4056// regRec - The RegRecord to be unasssigned
4057// newRegType - The RegisterType of interval that would be assigned
4058//
4059// Return Value:
4060// None.
4061//
4062// Notes:
4063// On ARM architecture, Intervals have to be unassigned considering
4064// with the register type of interval that would be assigned.
4065//
4066void LinearScan::unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType))
4067{
4068 RegRecord* regRecToUnassign = regRec;
4069#ifdef _TARGET_ARM_
4070 RegRecord* anotherRegRec = nullptr;
4071
4072 if ((regRecToUnassign->assignedInterval != nullptr) &&
4073 (regRecToUnassign->assignedInterval->registerType == TYP_DOUBLE))
4074 {
4075 // If the register type of interval(being unassigned or new) is TYP_DOUBLE,
4076 // It should have to be valid double register (even register)
4077 if (!genIsValidDoubleReg(regRecToUnassign->regNum))
4078 {
4079 regRecToUnassign = findAnotherHalfRegRec(regRec);
4080 }
4081 }
4082 else
4083 {
4084 if (newRegType == TYP_DOUBLE)
4085 {
4086 anotherRegRec = findAnotherHalfRegRec(regRecToUnassign);
4087 }
4088 }
4089#endif
4090
4091 if (regRecToUnassign->assignedInterval != nullptr)
4092 {
4093 unassignPhysReg(regRecToUnassign, regRecToUnassign->assignedInterval->recentRefPosition);
4094 }
4095#ifdef _TARGET_ARM_
4096 if ((anotherRegRec != nullptr) && (anotherRegRec->assignedInterval != nullptr))
4097 {
4098 unassignPhysReg(anotherRegRec, anotherRegRec->assignedInterval->recentRefPosition);
4099 }
4100#endif
4101}
4102
4103//------------------------------------------------------------------------
4104// unassignPhysReg: Unassign the given physical register record, and spill the
4105// assignedInterval at the given spillRefPosition, if any.
4106//
4107// Arguments:
4108// regRec - the RegRecord to be unasssigned
4109// spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
4110//
4111// Return Value:
4112// None.
4113//
4114// Assumptions:
4115// The assignedInterval must not be null.
4116// If spillRefPosition is null, the assignedInterval must be inactive, or not currently
4117// assigned to this register (e.g. this is a copyReg for that Interval).
4118// Otherwise, spillRefPosition must be associated with the assignedInterval.
4119//
4120void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition)
4121{
4122 Interval* assignedInterval = regRec->assignedInterval;
4123 assert(assignedInterval != nullptr);
4124 regNumber thisRegNum = regRec->regNum;
4125
4126 // Is assignedInterval actually still assigned to this register?
4127 bool intervalIsAssigned = (assignedInterval->physReg == thisRegNum);
4128
4129#ifdef _TARGET_ARM_
4130 RegRecord* anotherRegRec = nullptr;
4131
4132 // Prepare second half RegRecord of a double register for TYP_DOUBLE
4133 if (assignedInterval->registerType == TYP_DOUBLE)
4134 {
4135 assert(isFloatRegType(regRec->registerType));
4136
4137 anotherRegRec = findAnotherHalfRegRec(regRec);
4138
4139 // Both two RegRecords should have been assigned to the same interval.
4140 assert(assignedInterval == anotherRegRec->assignedInterval);
4141 if (!intervalIsAssigned && (assignedInterval->physReg == anotherRegRec->regNum))
4142 {
4143 intervalIsAssigned = true;
4144 }
4145 }
4146#endif // _TARGET_ARM_
4147
4148 checkAndClearInterval(regRec, spillRefPosition);
4149
4150#ifdef _TARGET_ARM_
4151 if (assignedInterval->registerType == TYP_DOUBLE)
4152 {
4153 // Both two RegRecords should have been unassigned together.
4154 assert(regRec->assignedInterval == nullptr);
4155 assert(anotherRegRec->assignedInterval == nullptr);
4156 }
4157#endif // _TARGET_ARM_
4158
4159 RefPosition* nextRefPosition = nullptr;
4160 if (spillRefPosition != nullptr)
4161 {
4162 nextRefPosition = spillRefPosition->nextRefPosition;
4163 }
4164
4165 if (!intervalIsAssigned && assignedInterval->physReg != REG_NA)
4166 {
4167 // This must have been a temporary copy reg, but we can't assert that because there
4168 // may have been intervening RefPositions that were not copyRegs.
4169
4170 // reg->assignedInterval has already been set to nullptr by checkAndClearInterval()
4171 assert(regRec->assignedInterval == nullptr);
4172 return;
4173 }
4174
4175 regNumber victimAssignedReg = assignedInterval->physReg;
4176 assignedInterval->physReg = REG_NA;
4177
4178 bool spill = assignedInterval->isActive && nextRefPosition != nullptr;
4179 if (spill)
4180 {
4181 // If this is an active interval, it must have a recentRefPosition,
4182 // otherwise it would not be active
4183 assert(spillRefPosition != nullptr);
4184
4185#if 0
4186 // TODO-CQ: Enable this and insert an explicit GT_COPY (otherwise there's no way to communicate
4187 // to codegen that we want the copyReg to be the new home location).
4188 // If the last reference was a copyReg, and we're spilling the register
4189 // it was copied from, then make the copyReg the new primary location
4190 // if possible
4191 if (spillRefPosition->copyReg)
4192 {
4193 regNumber copyFromRegNum = victimAssignedReg;
4194 regNumber copyRegNum = genRegNumFromMask(spillRefPosition->registerAssignment);
4195 if (copyFromRegNum == thisRegNum &&
4196 getRegisterRecord(copyRegNum)->assignedInterval == assignedInterval)
4197 {
4198 assert(copyRegNum != thisRegNum);
4199 assignedInterval->physReg = copyRegNum;
4200 assignedInterval->assignedReg = this->getRegisterRecord(copyRegNum);
4201 return;
4202 }
4203 }
4204#endif // 0
4205#ifdef DEBUG
4206 // With JitStressRegs == 0x80 (LSRA_EXTEND_LIFETIMES), we may have a RefPosition
4207 // that is not marked lastUse even though the treeNode is a lastUse. In that case
4208 // we must not mark it for spill because the register will have been immediately freed
4209 // after use. While we could conceivably add special handling for this case in codegen,
4210 // it would be messy and undesirably cause the "bleeding" of LSRA stress modes outside
4211 // of LSRA.
4212 if (extendLifetimes() && assignedInterval->isLocalVar && RefTypeIsUse(spillRefPosition->refType) &&
4213 spillRefPosition->treeNode != nullptr && (spillRefPosition->treeNode->gtFlags & GTF_VAR_DEATH) != 0)
4214 {
4215 dumpLsraAllocationEvent(LSRA_EVENT_SPILL_EXTENDED_LIFETIME, assignedInterval);
4216 assignedInterval->isActive = false;
4217 spill = false;
4218 // If the spillRefPosition occurs before the beginning of this block, it will have
4219 // been marked as living in this register on entry to this block, but we now need
4220 // to mark this as living on the stack.
4221 if (spillRefPosition->nodeLocation <= curBBStartLocation)
4222 {
4223 setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK);
4224 if (spillRefPosition->nextRefPosition != nullptr)
4225 {
4226 setIntervalAsSpilled(assignedInterval);
4227 }
4228 }
4229 else
4230 {
4231 // Otherwise, we need to mark spillRefPosition as lastUse, or the interval
4232 // will remain active beyond its allocated range during the resolution phase.
4233 spillRefPosition->lastUse = true;
4234 }
4235 }
4236 else
4237#endif // DEBUG
4238 {
4239 spillInterval(assignedInterval, spillRefPosition, nextRefPosition);
4240 }
4241 }
4242 // Maintain the association with the interval, if it has more references.
4243 // Or, if we "remembered" an interval assigned to this register, restore it.
4244 if (nextRefPosition != nullptr)
4245 {
4246 assignedInterval->assignedReg = regRec;
4247 }
4248 else if (canRestorePreviousInterval(regRec, assignedInterval))
4249 {
4250 regRec->assignedInterval = regRec->previousInterval;
4251 regRec->previousInterval = nullptr;
4252
4253#ifdef _TARGET_ARM_
4254 // Note:
4255 // We can not use updateAssignedInterval() and updatePreviousInterval() here,
4256 // because regRec may not be a even-numbered float register.
4257
4258 // Update second half RegRecord of a double register for TYP_DOUBLE
4259 if (regRec->assignedInterval->registerType == TYP_DOUBLE)
4260 {
4261 RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(regRec);
4262
4263 anotherHalfRegRec->assignedInterval = regRec->assignedInterval;
4264 anotherHalfRegRec->previousInterval = nullptr;
4265 }
4266#endif // _TARGET_ARM_
4267
4268#ifdef DEBUG
4269 if (spill)
4270 {
4271 dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, regRec->assignedInterval,
4272 thisRegNum);
4273 }
4274 else
4275 {
4276 dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, regRec->assignedInterval, thisRegNum);
4277 }
4278#endif // DEBUG
4279 }
4280 else
4281 {
4282 updateAssignedInterval(regRec, nullptr, assignedInterval->registerType);
4283 updatePreviousInterval(regRec, nullptr, assignedInterval->registerType);
4284 }
4285}
4286
4287//------------------------------------------------------------------------
4288// spillGCRefs: Spill any GC-type intervals that are currently in registers.a
4289//
4290// Arguments:
4291// killRefPosition - The RefPosition for the kill
4292//
4293// Return Value:
4294// None.
4295//
4296void LinearScan::spillGCRefs(RefPosition* killRefPosition)
4297{
4298 // For each physical register that can hold a GC type,
4299 // if it is occupied by an interval of a GC type, spill that interval.
4300 regMaskTP candidateRegs = killRefPosition->registerAssignment;
4301 while (candidateRegs != RBM_NONE)
4302 {
4303 regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
4304 candidateRegs &= ~nextRegBit;
4305 regNumber nextReg = genRegNumFromMask(nextRegBit);
4306 RegRecord* regRecord = getRegisterRecord(nextReg);
4307 Interval* assignedInterval = regRecord->assignedInterval;
4308 if (assignedInterval == nullptr || (assignedInterval->isActive == false) ||
4309 !varTypeIsGC(assignedInterval->registerType))
4310 {
4311 continue;
4312 }
4313 unassignPhysReg(regRecord, assignedInterval->recentRefPosition);
4314 }
4315 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr));
4316}
4317
4318//------------------------------------------------------------------------
4319// processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated
4320//
4321// Arguments:
4322// currentBlock - the BasicBlock we have just finished allocating registers for
4323//
4324// Return Value:
4325// None
4326//
4327// Notes:
4328// Calls processBlockEndLocations() to set the outVarToRegMap, then gets the next block,
4329// and sets the inVarToRegMap appropriately.
4330
4331void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock)
4332{
4333 assert(currentBlock != nullptr);
4334 if (enregisterLocalVars)
4335 {
4336 processBlockEndLocations(currentBlock);
4337 }
4338 markBlockVisited(currentBlock);
4339
4340 // Get the next block to allocate.
4341 // When the last block in the method has successors, there will be a final "RefTypeBB" to
4342 // ensure that we get the varToRegMap set appropriately, but in that case we don't need
4343 // to worry about "nextBlock".
4344 BasicBlock* nextBlock = getNextBlock();
4345 if (nextBlock != nullptr)
4346 {
4347 processBlockStartLocations(nextBlock, true);
4348 }
4349}
4350
4351//------------------------------------------------------------------------
4352// rotateBlockStartLocation: When in the LSRA_BLOCK_BOUNDARY_ROTATE stress mode, attempt to
4353// "rotate" the register assignment for a localVar to the next higher
4354// register that is available.
4355//
4356// Arguments:
4357// interval - the Interval for the variable whose register is getting rotated
4358// targetReg - its register assignment from the predecessor block being used for live-in
4359// availableRegs - registers available for use
4360//
4361// Return Value:
4362// The new register to use.
4363
4364#ifdef DEBUG
4365regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs)
4366{
4367 if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE)
4368 {
4369 // If we're rotating the register locations at block boundaries, try to use
4370 // the next higher register number of the appropriate register type.
4371 regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs;
4372 regNumber firstReg = REG_NA;
4373 regNumber newReg = REG_NA;
4374 while (candidateRegs != RBM_NONE)
4375 {
4376 regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
4377 candidateRegs &= ~nextRegBit;
4378 regNumber nextReg = genRegNumFromMask(nextRegBit);
4379 if (nextReg > targetReg)
4380 {
4381 newReg = nextReg;
4382 break;
4383 }
4384 else if (firstReg == REG_NA)
4385 {
4386 firstReg = nextReg;
4387 }
4388 }
4389 if (newReg == REG_NA)
4390 {
4391 assert(firstReg != REG_NA);
4392 newReg = firstReg;
4393 }
4394 targetReg = newReg;
4395 }
4396 return targetReg;
4397}
4398#endif // DEBUG
4399
4400#ifdef _TARGET_ARM_
4401//--------------------------------------------------------------------------------------
4402// isSecondHalfReg: Test if recRec is second half of double register
4403// which is assigned to an interval.
4404//
4405// Arguments:
4406// regRec - a register to be tested
4407// interval - an interval which is assigned to some register
4408//
4409// Assumptions:
4410// None
4411//
4412// Return Value:
4413// True only if regRec is second half of assignedReg in interval
4414//
4415bool LinearScan::isSecondHalfReg(RegRecord* regRec, Interval* interval)
4416{
4417 RegRecord* assignedReg = interval->assignedReg;
4418
4419 if (assignedReg != nullptr && interval->registerType == TYP_DOUBLE)
4420 {
4421 // interval should have been allocated to a valid double register
4422 assert(genIsValidDoubleReg(assignedReg->regNum));
4423
4424 // Find a second half RegRecord of double register
4425 regNumber firstRegNum = assignedReg->regNum;
4426 regNumber secondRegNum = REG_NEXT(firstRegNum);
4427
4428 assert(genIsValidFloatReg(secondRegNum) && !genIsValidDoubleReg(secondRegNum));
4429
4430 RegRecord* secondRegRec = getRegisterRecord(secondRegNum);
4431
4432 return secondRegRec == regRec;
4433 }
4434
4435 return false;
4436}
4437
4438//------------------------------------------------------------------------------------------
4439// getSecondHalfRegRec: Get the second (odd) half of an ARM32 double register
4440//
4441// Arguments:
4442// regRec - A float RegRecord
4443//
4444// Assumptions:
4445// regRec must be a valid double register (i.e. even)
4446//
4447// Return Value:
4448// The RegRecord for the second half of the double register
4449//
4450RegRecord* LinearScan::getSecondHalfRegRec(RegRecord* regRec)
4451{
4452 regNumber secondHalfRegNum;
4453 RegRecord* secondHalfRegRec;
4454
4455 assert(genIsValidDoubleReg(regRec->regNum));
4456
4457 secondHalfRegNum = REG_NEXT(regRec->regNum);
4458 secondHalfRegRec = getRegisterRecord(secondHalfRegNum);
4459
4460 return secondHalfRegRec;
4461}
4462//------------------------------------------------------------------------------------------
4463// findAnotherHalfRegRec: Find another half RegRecord which forms same ARM32 double register
4464//
4465// Arguments:
4466// regRec - A float RegRecord
4467//
4468// Assumptions:
4469// None
4470//
4471// Return Value:
4472// A RegRecord which forms same double register with regRec
4473//
4474RegRecord* LinearScan::findAnotherHalfRegRec(RegRecord* regRec)
4475{
4476 regNumber anotherHalfRegNum;
4477 RegRecord* anotherHalfRegRec;
4478
4479 assert(genIsValidFloatReg(regRec->regNum));
4480
4481 // Find another half register for TYP_DOUBLE interval,
4482 // following same logic in canRestorePreviousInterval().
4483 if (genIsValidDoubleReg(regRec->regNum))
4484 {
4485 anotherHalfRegNum = REG_NEXT(regRec->regNum);
4486 assert(!genIsValidDoubleReg(anotherHalfRegNum));
4487 }
4488 else
4489 {
4490 anotherHalfRegNum = REG_PREV(regRec->regNum);
4491 assert(genIsValidDoubleReg(anotherHalfRegNum));
4492 }
4493 anotherHalfRegRec = getRegisterRecord(anotherHalfRegNum);
4494
4495 return anotherHalfRegRec;
4496}
4497#endif
4498
4499//--------------------------------------------------------------------------------------
4500// canRestorePreviousInterval: Test if we can restore previous interval
4501//
4502// Arguments:
4503// regRec - a register which contains previous interval to be restored
4504// assignedInterval - an interval just unassigned
4505//
4506// Assumptions:
4507// None
4508//
4509// Return Value:
4510// True only if previous interval of regRec can be restored
4511//
4512bool LinearScan::canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval)
4513{
4514 bool retVal =
4515 (regRec->previousInterval != nullptr && regRec->previousInterval != assignedInterval &&
4516 regRec->previousInterval->assignedReg == regRec && regRec->previousInterval->getNextRefPosition() != nullptr);
4517
4518#ifdef _TARGET_ARM_
4519 if (retVal && regRec->previousInterval->registerType == TYP_DOUBLE)
4520 {
4521 RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(regRec);
4522
4523 retVal = retVal && anotherHalfRegRec->assignedInterval == nullptr;
4524 }
4525#endif
4526
4527 return retVal;
4528}
4529
4530bool LinearScan::isAssignedToInterval(Interval* interval, RegRecord* regRec)
4531{
4532 bool isAssigned = (interval->assignedReg == regRec);
4533#ifdef _TARGET_ARM_
4534 isAssigned |= isSecondHalfReg(regRec, interval);
4535#endif
4536 return isAssigned;
4537}
4538
4539void LinearScan::unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap)
4540{
4541 // Is there another interval currently assigned to this register? If so unassign it.
4542 Interval* assignedInterval = regRecord->assignedInterval;
4543 if (assignedInterval != nullptr)
4544 {
4545 if (isAssignedToInterval(assignedInterval, regRecord))
4546 {
4547 // Only localVars or constants should be assigned to registers at block boundaries.
4548 if (!assignedInterval->isLocalVar)
4549 {
4550 assert(assignedInterval->isConstant);
4551 // Don't need to update the VarToRegMap.
4552 inVarToRegMap = nullptr;
4553 }
4554
4555 regNumber assignedRegNum = assignedInterval->assignedReg->regNum;
4556
4557 // If the interval is active, it will be set to active when we reach its new
4558 // register assignment (which we must not yet have done, or it wouldn't still be
4559 // assigned to this register).
4560 assignedInterval->isActive = false;
4561 unassignPhysReg(assignedInterval->assignedReg, nullptr);
4562 if ((inVarToRegMap != nullptr) && inVarToRegMap[assignedInterval->getVarIndex(compiler)] == assignedRegNum)
4563 {
4564 inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
4565 }
4566 }
4567 else
4568 {
4569 // This interval is no longer assigned to this register.
4570 updateAssignedInterval(regRecord, nullptr, assignedInterval->registerType);
4571 }
4572 }
4573}
4574
4575//------------------------------------------------------------------------
4576// processBlockStartLocations: Update var locations on entry to 'currentBlock' and clear constant
4577// registers.
4578//
4579// Arguments:
4580// currentBlock - the BasicBlock we are about to allocate registers for
4581// allocationPass - true if we are currently allocating registers (versus writing them back)
4582//
4583// Return Value:
4584// None
4585//
4586// Notes:
4587// During the allocation pass, we use the outVarToRegMap of the selected predecessor to
4588// determine the lclVar locations for the inVarToRegMap.
4589// During the resolution (write-back) pass, we only modify the inVarToRegMap in cases where
4590// a lclVar was spilled after the block had been completed.
4591void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool allocationPass)
4592{
4593 // If we have no register candidates we should only call this method during allocation.
4594
4595 assert(enregisterLocalVars || allocationPass);
4596
4597 if (!enregisterLocalVars)
4598 {
4599 // Just clear any constant registers and return.
4600 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
4601 {
4602 RegRecord* physRegRecord = getRegisterRecord(reg);
4603 Interval* assignedInterval = physRegRecord->assignedInterval;
4604
4605 if (assignedInterval != nullptr)
4606 {
4607 assert(assignedInterval->isConstant);
4608 physRegRecord->assignedInterval = nullptr;
4609 }
4610 }
4611 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock));
4612 return;
4613 }
4614
4615 unsigned predBBNum = blockInfo[currentBlock->bbNum].predBBNum;
4616 VarToRegMap predVarToRegMap = getOutVarToRegMap(predBBNum);
4617 VarToRegMap inVarToRegMap = getInVarToRegMap(currentBlock->bbNum);
4618 bool hasCriticalInEdge = blockInfo[currentBlock->bbNum].hasCriticalInEdge;
4619
4620 VarSetOps::AssignNoCopy(compiler, currentLiveVars,
4621 VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveIn));
4622#ifdef DEBUG
4623 if (getLsraExtendLifeTimes())
4624 {
4625 VarSetOps::AssignNoCopy(compiler, currentLiveVars, registerCandidateVars);
4626 }
4627 // If we are rotating register assignments at block boundaries, we want to make the
4628 // inactive registers available for the rotation.
4629 regMaskTP inactiveRegs = RBM_NONE;
4630#endif // DEBUG
4631 regMaskTP liveRegs = RBM_NONE;
4632 VarSetOps::Iter iter(compiler, currentLiveVars);
4633 unsigned varIndex = 0;
4634 while (iter.NextElem(&varIndex))
4635 {
4636 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
4637 if (!compiler->lvaTable[varNum].lvLRACandidate)
4638 {
4639 continue;
4640 }
4641 regNumber targetReg;
4642 Interval* interval = getIntervalForLocalVar(varIndex);
4643 RefPosition* nextRefPosition = interval->getNextRefPosition();
4644 assert(nextRefPosition != nullptr);
4645
4646 if (allocationPass)
4647 {
4648 targetReg = getVarReg(predVarToRegMap, varIndex);
4649#ifdef DEBUG
4650 regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs));
4651 if (newTargetReg != targetReg)
4652 {
4653 targetReg = newTargetReg;
4654 setIntervalAsSplit(interval);
4655 }
4656#endif // DEBUG
4657 setVarReg(inVarToRegMap, varIndex, targetReg);
4658 }
4659 else // !allocationPass (i.e. resolution/write-back pass)
4660 {
4661 targetReg = getVarReg(inVarToRegMap, varIndex);
4662 // There are four cases that we need to consider during the resolution pass:
4663 // 1. This variable had a register allocated initially, and it was not spilled in the RefPosition
4664 // that feeds this block. In this case, both targetReg and predVarToRegMap[varIndex] will be targetReg.
4665 // 2. This variable had not been spilled prior to the end of predBB, but was later spilled, so
4666 // predVarToRegMap[varIndex] will be REG_STK, but targetReg is its former allocated value.
4667 // In this case, we will normally change it to REG_STK. We will update its "spilled" status when we
4668 // encounter it in resolveLocalRef().
4669 // 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register. This is
4670 // because the copyReg RefPosition will not have recorded the "home" register, yet downstream
4671 // RefPositions rely on the correct "home" register.
4672 // 3. This variable was spilled before we reached the end of predBB. In this case, both targetReg and
4673 // predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked
4674 // as reload during allocation time if necessary (note that by the time we actually reach the next
4675 // RefPosition, we may be using a different predecessor, at which it is still in a register).
4676 // 4. This variable was spilled during the allocation of this block, so targetReg is REG_STK
4677 // (because we set inVarToRegMap at the time we spilled it), but predVarToRegMap[varIndex]
4678 // is not REG_STK. We retain the REG_STK value in the inVarToRegMap.
4679 if (targetReg != REG_STK)
4680 {
4681 if (getVarReg(predVarToRegMap, varIndex) != REG_STK)
4682 {
4683 // Case #1 above.
4684 assert(getVarReg(predVarToRegMap, varIndex) == targetReg ||
4685 getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE);
4686 }
4687 else if (!nextRefPosition->copyReg)
4688 {
4689 // case #2 above.
4690 setVarReg(inVarToRegMap, varIndex, REG_STK);
4691 targetReg = REG_STK;
4692 }
4693 // Else case 2a. - retain targetReg.
4694 }
4695 // Else case #3 or #4, we retain targetReg and nothing further to do or assert.
4696 }
4697 if (interval->physReg == targetReg)
4698 {
4699 if (interval->isActive)
4700 {
4701 assert(targetReg != REG_STK);
4702 assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg &&
4703 interval->assignedReg->assignedInterval == interval);
4704 liveRegs |= genRegMask(targetReg);
4705 continue;
4706 }
4707 }
4708 else if (interval->physReg != REG_NA)
4709 {
4710 // This can happen if we are using the locations from a basic block other than the
4711 // immediately preceding one - where the variable was in a different location.
4712 if (targetReg != REG_STK)
4713 {
4714 // Unassign it from the register (it will get a new register below).
4715 if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
4716 {
4717 interval->isActive = false;
4718 unassignPhysReg(getRegisterRecord(interval->physReg), nullptr);
4719 }
4720 else
4721 {
4722 // This interval was live in this register the last time we saw a reference to it,
4723 // but has since been displaced.
4724 interval->physReg = REG_NA;
4725 }
4726 }
4727 else if (allocationPass)
4728 {
4729 // Keep the register assignment - if another var has it, it will get unassigned.
4730 // Otherwise, resolution will fix it up later, and it will be more
4731 // likely to match other assignments this way.
4732 interval->isActive = true;
4733 liveRegs |= genRegMask(interval->physReg);
4734 INDEBUG(inactiveRegs |= genRegMask(interval->physReg));
4735 setVarReg(inVarToRegMap, varIndex, interval->physReg);
4736 }
4737 else
4738 {
4739 interval->physReg = REG_NA;
4740 }
4741 }
4742 if (targetReg != REG_STK)
4743 {
4744 RegRecord* targetRegRecord = getRegisterRecord(targetReg);
4745 liveRegs |= genRegMask(targetReg);
4746 if (!interval->isActive)
4747 {
4748 interval->isActive = true;
4749 interval->physReg = targetReg;
4750 interval->assignedReg = targetRegRecord;
4751 }
4752 if (targetRegRecord->assignedInterval != interval)
4753 {
4754#ifdef _TARGET_ARM_
4755 // If this is a TYP_DOUBLE interval, and the assigned interval is either null or is TYP_FLOAT,
4756 // we also need to unassign the other half of the register.
4757 // Note that if the assigned interval is TYP_DOUBLE, it will be unassigned below.
4758 if ((interval->registerType == TYP_DOUBLE) &&
4759 ((targetRegRecord->assignedInterval == nullptr) ||
4760 (targetRegRecord->assignedInterval->registerType == TYP_FLOAT)))
4761 {
4762 assert(genIsValidDoubleReg(targetReg));
4763 unassignIntervalBlockStart(findAnotherHalfRegRec(targetRegRecord),
4764 allocationPass ? inVarToRegMap : nullptr);
4765 }
4766#endif // _TARGET_ARM_
4767 unassignIntervalBlockStart(targetRegRecord, allocationPass ? inVarToRegMap : nullptr);
4768 assignPhysReg(targetRegRecord, interval);
4769 }
4770 if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg &&
4771 interval->recentRefPosition->registerAssignment != genRegMask(targetReg))
4772 {
4773 interval->getNextRefPosition()->outOfOrder = true;
4774 }
4775 }
4776 }
4777
4778 // Unassign any registers that are no longer live.
4779 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
4780 {
4781 if ((liveRegs & genRegMask(reg)) == 0)
4782 {
4783 RegRecord* physRegRecord = getRegisterRecord(reg);
4784 Interval* assignedInterval = physRegRecord->assignedInterval;
4785
4786 if (assignedInterval != nullptr)
4787 {
4788 assert(assignedInterval->isLocalVar || assignedInterval->isConstant);
4789
4790 if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
4791 {
4792 assignedInterval->isActive = false;
4793 if (assignedInterval->getNextRefPosition() == nullptr)
4794 {
4795 unassignPhysReg(physRegRecord, nullptr);
4796 }
4797 inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
4798 }
4799 else
4800 {
4801 // This interval may still be active, but was in another register in an
4802 // intervening block.
4803 updateAssignedInterval(physRegRecord, nullptr, assignedInterval->registerType);
4804 }
4805
4806#ifdef _TARGET_ARM_
4807 // unassignPhysReg, above, may have restored a 'previousInterval', in which case we need to
4808 // get the value of 'physRegRecord->assignedInterval' rather than using 'assignedInterval'.
4809 if (physRegRecord->assignedInterval != nullptr)
4810 {
4811 assignedInterval = physRegRecord->assignedInterval;
4812 }
4813 if (assignedInterval->registerType == TYP_DOUBLE)
4814 {
4815 // Skip next float register, because we already addressed a double register
4816 assert(genIsValidDoubleReg(reg));
4817 reg = REG_NEXT(reg);
4818 }
4819#endif // _TARGET_ARM_
4820 }
4821 }
4822#ifdef _TARGET_ARM_
4823 else
4824 {
4825 RegRecord* physRegRecord = getRegisterRecord(reg);
4826 Interval* assignedInterval = physRegRecord->assignedInterval;
4827
4828 if (assignedInterval != nullptr && assignedInterval->registerType == TYP_DOUBLE)
4829 {
4830 // Skip next float register, because we already addressed a double register
4831 assert(genIsValidDoubleReg(reg));
4832 reg = REG_NEXT(reg);
4833 }
4834 }
4835#endif // _TARGET_ARM_
4836 }
4837 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock));
4838}
4839
4840//------------------------------------------------------------------------
4841// processBlockEndLocations: Record the variables occupying registers after completing the current block.
4842//
4843// Arguments:
4844// currentBlock - the block we have just completed.
4845//
4846// Return Value:
4847// None
4848//
4849// Notes:
4850// This must be called both during the allocation and resolution (write-back) phases.
4851// This is because we need to have the outVarToRegMap locations in order to set the locations
4852// at successor blocks during allocation time, but if lclVars are spilled after a block has been
4853// completed, we need to record the REG_STK location for those variables at resolution time.
4854
4855void LinearScan::processBlockEndLocations(BasicBlock* currentBlock)
4856{
4857 assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum);
4858 VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum);
4859
4860 VarSetOps::AssignNoCopy(compiler, currentLiveVars,
4861 VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveOut));
4862#ifdef DEBUG
4863 if (getLsraExtendLifeTimes())
4864 {
4865 VarSetOps::Assign(compiler, currentLiveVars, registerCandidateVars);
4866 }
4867#endif // DEBUG
4868 regMaskTP liveRegs = RBM_NONE;
4869 VarSetOps::Iter iter(compiler, currentLiveVars);
4870 unsigned varIndex = 0;
4871 while (iter.NextElem(&varIndex))
4872 {
4873 Interval* interval = getIntervalForLocalVar(varIndex);
4874 if (interval->isActive)
4875 {
4876 assert(interval->physReg != REG_NA && interval->physReg != REG_STK);
4877 setVarReg(outVarToRegMap, varIndex, interval->physReg);
4878 }
4879 else
4880 {
4881 outVarToRegMap[varIndex] = REG_STK;
4882 }
4883 }
4884 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
4885}
4886
4887#ifdef DEBUG
4888void LinearScan::dumpRefPositions(const char* str)
4889{
4890 printf("------------\n");
4891 printf("REFPOSITIONS %s: \n", str);
4892 printf("------------\n");
4893 for (RefPosition& refPos : refPositions)
4894 {
4895 refPos.dump();
4896 }
4897}
4898#endif // DEBUG
4899
4900bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType)
4901{
4902 RegRecord* physRegRecord = getRegisterRecord(regNum);
4903
4904 bool isFree = physRegRecord->isFree();
4905
4906#ifdef _TARGET_ARM_
4907 if (isFree && regType == TYP_DOUBLE)
4908 {
4909 isFree = getSecondHalfRegRec(physRegRecord)->isFree();
4910 }
4911#endif // _TARGET_ARM_
4912
4913 return isFree;
4914}
4915
4916//------------------------------------------------------------------------
4917// LinearScan::freeRegister: Make a register available for use
4918//
4919// Arguments:
4920// physRegRecord - the RegRecord for the register to be freed.
4921//
4922// Return Value:
4923// None.
4924//
4925// Assumptions:
4926// None.
4927// It may be that the RegRecord has already been freed, e.g. due to a kill,
4928// in which case this method has no effect.
4929//
4930// Notes:
4931// If there is currently an Interval assigned to this register, and it has
4932// more references (i.e. this is a local last-use, but more uses and/or
4933// defs remain), it will remain assigned to the physRegRecord. However, since
4934// it is marked inactive, the register will be available, albeit less desirable
4935// to allocate.
4936void LinearScan::freeRegister(RegRecord* physRegRecord)
4937{
4938 Interval* assignedInterval = physRegRecord->assignedInterval;
4939 // It may have already been freed by a "Kill"
4940 if (assignedInterval != nullptr)
4941 {
4942 assignedInterval->isActive = false;
4943 // If this is a constant node, that we may encounter again (e.g. constant),
4944 // don't unassign it until we need the register.
4945 if (!assignedInterval->isConstant)
4946 {
4947 RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
4948 // Unassign the register only if there are no more RefPositions, or the next
4949 // one is a def. Note that the latter condition doesn't actually ensure that
4950 // there aren't subsequent uses that could be reached by a def in the assigned
4951 // register, but is merely a heuristic to avoid tying up the register (or using
4952 // it when it's non-optimal). A better alternative would be to use SSA, so that
4953 // we wouldn't unnecessarily link separate live ranges to the same register.
4954 if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))
4955 {
4956#ifdef _TARGET_ARM_
4957 assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum));
4958#endif // _TARGET_ARM_
4959 unassignPhysReg(physRegRecord, nullptr);
4960 }
4961 }
4962 }
4963}
4964
4965void LinearScan::freeRegisters(regMaskTP regsToFree)
4966{
4967 if (regsToFree == RBM_NONE)
4968 {
4969 return;
4970 }
4971
4972 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS));
4973 while (regsToFree != RBM_NONE)
4974 {
4975 regMaskTP nextRegBit = genFindLowestBit(regsToFree);
4976 regsToFree &= ~nextRegBit;
4977 regNumber nextReg = genRegNumFromMask(nextRegBit);
4978 freeRegister(getRegisterRecord(nextReg));
4979 }
4980}
4981
4982// Actual register allocation, accomplished by iterating over all of the previously
4983// constructed Intervals
4984// Loosely based on raAssignVars()
4985//
4986void LinearScan::allocateRegisters()
4987{
4988 JITDUMP("*************** In LinearScan::allocateRegisters()\n");
4989 DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegisters"));
4990
4991 // at start, nothing is active except for register args
4992 for (Interval& interval : intervals)
4993 {
4994 Interval* currentInterval = &interval;
4995 currentInterval->recentRefPosition = nullptr;
4996 currentInterval->isActive = false;
4997 if (currentInterval->isLocalVar)
4998 {
4999 LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
5000 if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr)
5001 {
5002 currentInterval->isActive = true;
5003 }
5004 }
5005 }
5006
5007 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
5008 {
5009 getRegisterRecord(reg)->recentRefPosition = nullptr;
5010 getRegisterRecord(reg)->isActive = false;
5011 }
5012
5013#ifdef DEBUG
5014 regNumber lastAllocatedReg = REG_NA;
5015 if (VERBOSE)
5016 {
5017 dumpRefPositions("BEFORE ALLOCATION");
5018 dumpVarRefPositions("BEFORE ALLOCATION");
5019
5020 printf("\n\nAllocating Registers\n"
5021 "--------------------\n");
5022 // Start with a small set of commonly used registers, so that we don't keep having to print a new title.
5023 registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet;
5024 dumpRegRecordHeader();
5025 // Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop.
5026 printf(indentFormat, "");
5027 }
5028#endif // DEBUG
5029
5030 BasicBlock* currentBlock = nullptr;
5031
5032 LsraLocation prevLocation = MinLocation;
5033 regMaskTP regsToFree = RBM_NONE;
5034 regMaskTP delayRegsToFree = RBM_NONE;
5035
5036 // This is the most recent RefPosition for which a register was allocated
5037 // - currently only used for DEBUG but maintained in non-debug, for clarity of code
5038 // (and will be optimized away because in non-debug spillAlways() unconditionally returns false)
5039 RefPosition* lastAllocatedRefPosition = nullptr;
5040
5041 bool handledBlockEnd = false;
5042
5043 for (RefPosition& refPositionIterator : refPositions)
5044 {
5045 RefPosition* currentRefPosition = &refPositionIterator;
5046
5047#ifdef DEBUG
5048 // Set the activeRefPosition to null until we're done with any boundary handling.
5049 activeRefPosition = nullptr;
5050 if (VERBOSE)
5051 {
5052 // We're really dumping the RegRecords "after" the previous RefPosition, but it's more convenient
5053 // to do this here, since there are a number of "continue"s in this loop.
5054 dumpRegRecords();
5055 }
5056#endif // DEBUG
5057
5058 // This is the previousRefPosition of the current Referent, if any
5059 RefPosition* previousRefPosition = nullptr;
5060
5061 Interval* currentInterval = nullptr;
5062 Referenceable* currentReferent = nullptr;
5063 bool isInternalRef = false;
5064 RefType refType = currentRefPosition->refType;
5065
5066 currentReferent = currentRefPosition->referent;
5067
5068 if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef &&
5069 !lastAllocatedRefPosition->getInterval()->isInternal &&
5070 (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar))
5071 {
5072 assert(lastAllocatedRefPosition->registerAssignment != RBM_NONE);
5073 RegRecord* regRecord = lastAllocatedRefPosition->getInterval()->assignedReg;
5074 unassignPhysReg(regRecord, lastAllocatedRefPosition);
5075 // Now set lastAllocatedRefPosition to null, so that we don't try to spill it again
5076 lastAllocatedRefPosition = nullptr;
5077 }
5078
5079 // We wait to free any registers until we've completed all the
5080 // uses for the current node.
5081 // This avoids reusing registers too soon.
5082 // We free before the last true def (after all the uses & internal
5083 // registers), and then again at the beginning of the next node.
5084 // This is made easier by assigning two LsraLocations per node - one
5085 // for all the uses, internal registers & all but the last def, and
5086 // another for the final def (if any).
5087
5088 LsraLocation currentLocation = currentRefPosition->nodeLocation;
5089
5090 if ((regsToFree | delayRegsToFree) != RBM_NONE)
5091 {
5092 // Free at a new location, or at a basic block boundary
5093 if (refType == RefTypeBB)
5094 {
5095 assert(currentLocation > prevLocation);
5096 }
5097 if (currentLocation > prevLocation)
5098 {
5099 freeRegisters(regsToFree);
5100 if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE))
5101 {
5102 // We should never see a delayReg that is delayed until a Location that has no RefPosition
5103 // (that would be the RefPosition that it was supposed to interfere with).
5104 assert(!"Found a delayRegFree associated with Location with no reference");
5105 // However, to be cautious for the Release build case, we will free them.
5106 freeRegisters(delayRegsToFree);
5107 delayRegsToFree = RBM_NONE;
5108 }
5109 regsToFree = delayRegsToFree;
5110 delayRegsToFree = RBM_NONE;
5111 }
5112 }
5113 prevLocation = currentLocation;
5114
5115 // get previous refposition, then current refpos is the new previous
5116 if (currentReferent != nullptr)
5117 {
5118 previousRefPosition = currentReferent->recentRefPosition;
5119 currentReferent->recentRefPosition = currentRefPosition;
5120 }
5121 else
5122 {
5123 assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs));
5124 }
5125
5126#ifdef DEBUG
5127 activeRefPosition = currentRefPosition;
5128#endif // DEBUG
5129
5130 // For the purposes of register resolution, we handle the DummyDefs before
5131 // the block boundary - so the RefTypeBB is after all the DummyDefs.
5132 // However, for the purposes of allocation, we want to handle the block
5133 // boundary first, so that we can free any registers occupied by lclVars
5134 // that aren't live in the next block and make them available for the
5135 // DummyDefs.
5136
5137 if (!handledBlockEnd && (refType == RefTypeBB || refType == RefTypeDummyDef))
5138 {
5139 // Free any delayed regs (now in regsToFree) before processing the block boundary
5140 freeRegisters(regsToFree);
5141 regsToFree = RBM_NONE;
5142 handledBlockEnd = true;
5143 curBBStartLocation = currentRefPosition->nodeLocation;
5144 if (currentBlock == nullptr)
5145 {
5146 currentBlock = startBlockSequence();
5147 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, compiler->fgFirstBB));
5148 }
5149 else
5150 {
5151 processBlockEndAllocation(currentBlock);
5152 currentBlock = moveToNextBlock();
5153 }
5154 }
5155
5156 if (refType == RefTypeBB)
5157 {
5158 handledBlockEnd = false;
5159 continue;
5160 }
5161
5162 if (refType == RefTypeKillGCRefs)
5163 {
5164 spillGCRefs(currentRefPosition);
5165 continue;
5166 }
5167
5168 // If this is a FixedReg, disassociate any inactive constant interval from this register.
5169 // Otherwise, do nothing.
5170 if (refType == RefTypeFixedReg)
5171 {
5172 RegRecord* regRecord = currentRefPosition->getReg();
5173 Interval* assignedInterval = regRecord->assignedInterval;
5174
5175 if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant)
5176 {
5177 regRecord->assignedInterval = nullptr;
5178
5179#ifdef _TARGET_ARM_
5180 // Update overlapping floating point register for TYP_DOUBLE
5181 if (assignedInterval->registerType == TYP_DOUBLE)
5182 {
5183 regRecord = findAnotherHalfRegRec(regRecord);
5184 assert(regRecord->assignedInterval == assignedInterval);
5185 regRecord->assignedInterval = nullptr;
5186 }
5187#endif
5188 }
5189 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg()));
5190 continue;
5191 }
5192
5193 // If this is an exposed use, do nothing - this is merely a placeholder to attempt to
5194 // ensure that a register is allocated for the full lifetime. The resolution logic
5195 // will take care of moving to the appropriate register if needed.
5196
5197 if (refType == RefTypeExpUse)
5198 {
5199 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE));
5200 continue;
5201 }
5202
5203 regNumber assignedRegister = REG_NA;
5204
5205 if (currentRefPosition->isIntervalRef())
5206 {
5207 currentInterval = currentRefPosition->getInterval();
5208 assignedRegister = currentInterval->physReg;
5209
5210 // Identify the special cases where we decide up-front not to allocate
5211 bool allocate = true;
5212 bool didDump = false;
5213
5214 if (refType == RefTypeParamDef || refType == RefTypeZeroInit)
5215 {
5216 // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry.
5217 // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly
5218 // inserting a store.
5219 LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
5220 assert(varDsc != nullptr);
5221 if (refType == RefTypeParamDef && varDsc->lvRefCntWtd() <= BB_UNITY_WEIGHT)
5222 {
5223 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval));
5224 didDump = true;
5225 allocate = false;
5226 setIntervalAsSpilled(currentInterval);
5227 }
5228 // If it has no actual references, mark it as "lastUse"; since they're not actually part
5229 // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a
5230 // register we won't unassign it.
5231 else if (currentRefPosition->nextRefPosition == nullptr)
5232 {
5233 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval));
5234 currentRefPosition->lastUse = true;
5235 }
5236 }
5237#ifdef FEATURE_SIMD
5238 else if (refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse)
5239 {
5240 Interval* lclVarInterval = currentInterval->relatedInterval;
5241 if (lclVarInterval->physReg == REG_NA)
5242 {
5243 allocate = false;
5244 }
5245 }
5246#endif // FEATURE_SIMD
5247
5248 if (allocate == false)
5249 {
5250 if (assignedRegister != REG_NA)
5251 {
5252 unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
5253 }
5254 else if (!didDump)
5255 {
5256 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5257 didDump = true;
5258 }
5259 currentRefPosition->registerAssignment = RBM_NONE;
5260 continue;
5261 }
5262
5263 if (currentInterval->isSpecialPutArg)
5264 {
5265 assert(!currentInterval->isLocalVar);
5266 Interval* srcInterval = currentInterval->relatedInterval;
5267 assert(srcInterval != nullptr && srcInterval->isLocalVar);
5268 if (refType == RefTypeDef)
5269 {
5270 assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1);
5271 RegRecord* physRegRecord = srcInterval->assignedReg;
5272
5273 // For a putarg_reg to be special, its next use location has to be the same
5274 // as fixed reg's next kill location. Otherwise, if source lcl var's next use
5275 // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's
5276 // kill would lead to spill of source but not the putarg_reg if it were treated
5277 // as special.
5278 if (srcInterval->isActive &&
5279 genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment &&
5280 currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation())
5281 {
5282 assert(physRegRecord->regNum == srcInterval->physReg);
5283
5284 // Special putarg_reg acts as a pass-thru since both source lcl var
5285 // and putarg_reg have the same register allocated. Physical reg
5286 // record of reg continue to point to source lcl var's interval
5287 // instead of to putarg_reg's interval. So if a spill of reg
5288 // allocated to source lcl var happens, to reallocate to another
5289 // tree node, before its use at call node it will lead to spill of
5290 // lcl var instead of putarg_reg since physical reg record is pointing
5291 // to lcl var's interval. As a result, arg reg would get trashed leading
5292 // to bad codegen. The assumption here is that source lcl var of a
5293 // special putarg_reg doesn't get spilled and re-allocated prior to
5294 // its use at the call node. This is ensured by marking physical reg
5295 // record as busy until next kill.
5296 physRegRecord->isBusyUntilNextKill = true;
5297 }
5298 else
5299 {
5300 currentInterval->isSpecialPutArg = false;
5301 }
5302 }
5303 // If this is still a SpecialPutArg, continue;
5304 if (currentInterval->isSpecialPutArg)
5305 {
5306 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval,
5307 currentRefPosition->assignedReg()));
5308 continue;
5309 }
5310 }
5311
5312 if (assignedRegister == REG_NA && RefTypeIsUse(refType))
5313 {
5314 currentRefPosition->reload = true;
5315 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister));
5316 }
5317 }
5318
5319 regMaskTP assignedRegBit = RBM_NONE;
5320 bool isInRegister = false;
5321 if (assignedRegister != REG_NA)
5322 {
5323 isInRegister = true;
5324 assignedRegBit = genRegMask(assignedRegister);
5325 if (!currentInterval->isActive)
5326 {
5327 // If this is a use, it must have started the block on the stack, but the register
5328 // was available for use so we kept the association.
5329 if (RefTypeIsUse(refType))
5330 {
5331 assert(enregisterLocalVars);
5332 assert(inVarToRegMaps[curBBNum][currentInterval->getVarIndex(compiler)] == REG_STK &&
5333 previousRefPosition->nodeLocation <= curBBStartLocation);
5334 isInRegister = false;
5335 }
5336 else
5337 {
5338 currentInterval->isActive = true;
5339 }
5340 }
5341 assert(currentInterval->assignedReg != nullptr &&
5342 currentInterval->assignedReg->regNum == assignedRegister &&
5343 currentInterval->assignedReg->assignedInterval == currentInterval);
5344 }
5345
5346 // If this is a physical register, we unconditionally assign it to itself!
5347 if (currentRefPosition->isPhysRegRef)
5348 {
5349 RegRecord* currentReg = currentRefPosition->getReg();
5350 Interval* assignedInterval = currentReg->assignedInterval;
5351
5352 if (assignedInterval != nullptr)
5353 {
5354 unassignPhysReg(currentReg, assignedInterval->recentRefPosition);
5355 }
5356 currentReg->isActive = true;
5357 assignedRegister = currentReg->regNum;
5358 assignedRegBit = genRegMask(assignedRegister);
5359 if (refType == RefTypeKill)
5360 {
5361 currentReg->isBusyUntilNextKill = false;
5362 }
5363 }
5364 else if (previousRefPosition != nullptr)
5365 {
5366 assert(previousRefPosition->nextRefPosition == currentRefPosition);
5367 assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment ||
5368 currentRefPosition->outOfOrder || previousRefPosition->copyReg ||
5369 previousRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef);
5370 }
5371 else if (assignedRegister != REG_NA)
5372 {
5373 // Handle the case where this is a preassigned register (i.e. parameter).
5374 // We don't want to actually use the preassigned register if it's not
5375 // going to cover the lifetime - but we had to preallocate it to ensure
5376 // that it remained live.
5377 // TODO-CQ: At some point we may want to refine the analysis here, in case
5378 // it might be beneficial to keep it in this reg for PART of the lifetime
5379 if (currentInterval->isLocalVar)
5380 {
5381 regMaskTP preferences = currentInterval->registerPreferences;
5382 bool keepAssignment = true;
5383 bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE;
5384
5385 // Will the assigned register cover the lifetime? If not, does it at least
5386 // meet the preferences for the next RefPosition?
5387 RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg);
5388 RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition();
5389 if (nextPhysRegRefPos != nullptr &&
5390 nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation)
5391 {
5392 // Check to see if the existing assignment matches the preferences (e.g. callee save registers)
5393 // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos
5394 // There must be a next RefPosition, because we know that the Interval extends beyond the
5395 // nextPhysRegRefPos.
5396 RefPosition* nextLclVarRefPos = currentRefPosition->nextRefPosition;
5397 assert(nextLclVarRefPos != nullptr);
5398 if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextLclVarRefPos->nodeLocation ||
5399 physRegRecord->conflictingFixedRegReference(nextLclVarRefPos))
5400 {
5401 keepAssignment = false;
5402 }
5403 }
5404 else if (refType == RefTypeParamDef && !matchesPreferences)
5405 {
5406 // Don't use the register, even if available, if it doesn't match the preferences.
5407 // Note that this case is only for ParamDefs, for which we haven't yet taken preferences
5408 // into account (we've just automatically got the initial location). In other cases,
5409 // we would already have put it in a preferenced register, if it was available.
5410 // TODO-CQ: Consider expanding this to check availability - that would duplicate
5411 // code here, but otherwise we may wind up in this register anyway.
5412 keepAssignment = false;
5413 }
5414
5415 if (keepAssignment == false)
5416 {
5417 currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
5418 unassignPhysRegNoSpill(physRegRecord);
5419
5420 // If the preferences are currently set to just this register, reset them to allRegs
5421 // of the appropriate type (just as we just reset the registerAssignment for this
5422 // RefPosition.
5423 // Otherwise, simply remove this register from the preferences, if it's there.
5424
5425 if (currentInterval->registerPreferences == assignedRegBit)
5426 {
5427 currentInterval->registerPreferences = currentRefPosition->registerAssignment;
5428 }
5429 else
5430 {
5431 currentInterval->registerPreferences &= ~assignedRegBit;
5432 }
5433
5434 assignedRegister = REG_NA;
5435 assignedRegBit = RBM_NONE;
5436 }
5437 }
5438 }
5439
5440 if (assignedRegister != REG_NA)
5441 {
5442 // If there is a conflicting fixed reference, insert a copy.
5443 RegRecord* physRegRecord = getRegisterRecord(assignedRegister);
5444 if (physRegRecord->conflictingFixedRegReference(currentRefPosition))
5445 {
5446 // We may have already reassigned the register to the conflicting reference.
5447 // If not, we need to unassign this interval.
5448 if (physRegRecord->assignedInterval == currentInterval)
5449 {
5450 unassignPhysRegNoSpill(physRegRecord);
5451 }
5452 currentRefPosition->moveReg = true;
5453 assignedRegister = REG_NA;
5454 setIntervalAsSplit(currentInterval);
5455 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister));
5456 }
5457 else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0)
5458 {
5459 currentRefPosition->registerAssignment = assignedRegBit;
5460 if (!currentReferent->isActive)
5461 {
5462 // If we've got an exposed use at the top of a block, the
5463 // interval might not have been active. Otherwise if it's a use,
5464 // the interval must be active.
5465 if (refType == RefTypeDummyDef)
5466 {
5467 currentReferent->isActive = true;
5468 assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval);
5469 }
5470 else
5471 {
5472 currentRefPosition->reload = true;
5473 }
5474 }
5475 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister));
5476 }
5477 else
5478 {
5479 assert(currentInterval != nullptr);
5480
5481 // It's already in a register, but not one we need.
5482 if (!RefTypeIsDef(currentRefPosition->refType))
5483 {
5484 regNumber copyReg = assignCopyReg(currentRefPosition);
5485 assert(copyReg != REG_NA);
5486 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg));
5487 lastAllocatedRefPosition = currentRefPosition;
5488 if (currentRefPosition->lastUse)
5489 {
5490 if (currentRefPosition->delayRegFree)
5491 {
5492 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval,
5493 assignedRegister));
5494 delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
5495 }
5496 else
5497 {
5498 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister));
5499 regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
5500 }
5501 }
5502 // If this is a tree temp (non-localVar) interval, we will need an explicit move.
5503 if (!currentInterval->isLocalVar)
5504 {
5505 currentRefPosition->moveReg = true;
5506 currentRefPosition->copyReg = false;
5507 }
5508 continue;
5509 }
5510 else
5511 {
5512 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
5513 regsToFree |= genRegMask(assignedRegister);
5514 // We want a new register, but we don't want this to be considered a spill.
5515 assignedRegister = REG_NA;
5516 if (physRegRecord->assignedInterval == currentInterval)
5517 {
5518 unassignPhysRegNoSpill(physRegRecord);
5519 }
5520 }
5521 }
5522 }
5523
5524 if (assignedRegister == REG_NA)
5525 {
5526 bool allocateReg = true;
5527
5528 if (currentRefPosition->AllocateIfProfitable())
5529 {
5530 // We can avoid allocating a register if it is a the last use requiring a reload.
5531 if (currentRefPosition->lastUse && currentRefPosition->reload)
5532 {
5533 allocateReg = false;
5534 }
5535
5536#ifdef DEBUG
5537 // Under stress mode, don't attempt to allocate a reg to
5538 // reg optional ref position.
5539 if (allocateReg && regOptionalNoAlloc())
5540 {
5541 allocateReg = false;
5542 }
5543#endif
5544 }
5545
5546 if (allocateReg)
5547 {
5548 // Try to allocate a register
5549 assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
5550 }
5551
5552 // If no register was found, and if the currentRefPosition must have a register,
5553 // then find a register to spill
5554 if (assignedRegister == REG_NA)
5555 {
5556#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5557 if (refType == RefTypeUpperVectorSaveDef)
5558 {
5559 // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable.
5560 // TODO CQ: Save the value directly to memory, #18144.
5561 // TODO-ARM64-CQ: Determine whether copying to one integer callee-save registers would be
5562 // profitable.
5563
5564 // SaveDef position occurs after the Use of args and at the same location as Kill/Def
5565 // positions of a call node. But SaveDef position cannot use any of the arg regs as
5566 // they are needed for call node.
5567 currentRefPosition->registerAssignment =
5568 (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS);
5569 assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
5570
5571 // There MUST be caller-save registers available, because they have all just been killed.
5572 // Amd64 Windows: xmm4-xmm5 are guaranteed to be available as xmm0-xmm3 are used for passing args.
5573 // Amd64 Unix: xmm8-xmm15 are guaranteed to be available as xmm0-xmm7 are used for passing args.
5574 // X86 RyuJIT Windows: xmm4-xmm7 are guanrateed to be available.
5575 assert(assignedRegister != REG_NA);
5576
5577 // Now, spill it.
5578 // Note:
5579 // i) The reason we have to spill is that SaveDef position is allocated after the Kill positions
5580 // of the call node are processed. Since callee-trash registers are killed by call node
5581 // we explicitly spill and unassign the register.
5582 // ii) These will look a bit backward in the dump, but it's a pain to dump the alloc before the
5583 // spill).
5584 unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
5585 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister));
5586
5587 // Now set assignedRegister to REG_NA again so that we don't re-activate it.
5588 assignedRegister = REG_NA;
5589 }
5590 else
5591#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5592 if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable())
5593 {
5594 if (allocateReg)
5595 {
5596 assignedRegister = allocateBusyReg(currentInterval, currentRefPosition,
5597 currentRefPosition->AllocateIfProfitable());
5598 }
5599
5600 if (assignedRegister != REG_NA)
5601 {
5602 INDEBUG(
5603 dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
5604 }
5605 else
5606 {
5607 // This can happen only for those ref positions that are to be allocated
5608 // only if profitable.
5609 noway_assert(currentRefPosition->AllocateIfProfitable());
5610
5611 currentRefPosition->registerAssignment = RBM_NONE;
5612 currentRefPosition->reload = false;
5613 setIntervalAsSpilled(currentInterval);
5614
5615 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5616 }
5617 }
5618 else
5619 {
5620 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5621 currentRefPosition->registerAssignment = RBM_NONE;
5622 currentInterval->isActive = false;
5623 setIntervalAsSpilled(currentInterval);
5624 }
5625 }
5626#ifdef DEBUG
5627 else
5628 {
5629 if (VERBOSE)
5630 {
5631 if (currentInterval->isConstant && (currentRefPosition->treeNode != nullptr) &&
5632 currentRefPosition->treeNode->IsReuseRegVal())
5633 {
5634 dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, currentInterval, assignedRegister, currentBlock);
5635 }
5636 else
5637 {
5638 dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister, currentBlock);
5639 }
5640 }
5641 }
5642#endif // DEBUG
5643
5644 if (refType == RefTypeDummyDef && assignedRegister != REG_NA)
5645 {
5646 setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister);
5647 }
5648
5649 // If we allocated a register, and this is a use of a spilled value,
5650 // it should have been marked for reload above.
5651 if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister)
5652 {
5653 assert(currentRefPosition->reload);
5654 }
5655 }
5656
5657 // If we allocated a register, record it
5658 if (currentInterval != nullptr && assignedRegister != REG_NA)
5659 {
5660 assignedRegBit = genRegMask(assignedRegister);
5661 currentRefPosition->registerAssignment = assignedRegBit;
5662 currentInterval->physReg = assignedRegister;
5663 regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead
5664
5665 // If this interval is dead, free the register.
5666 // The interval could be dead if this is a user variable, or if the
5667 // node is being evaluated for side effects, or a call whose result
5668 // is not used, etc.
5669 if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr)
5670 {
5671 assert(currentRefPosition->isIntervalRef());
5672
5673 if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr)
5674 {
5675 if (currentRefPosition->delayRegFree)
5676 {
5677 delayRegsToFree |= assignedRegBit;
5678
5679 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
5680 }
5681 else
5682 {
5683 regsToFree |= assignedRegBit;
5684
5685 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
5686 }
5687 }
5688 else
5689 {
5690 currentInterval->isActive = false;
5691 }
5692 }
5693
5694 lastAllocatedRefPosition = currentRefPosition;
5695 }
5696 }
5697
5698 // Free registers to clear associated intervals for resolution phase
5699 CLANG_FORMAT_COMMENT_ANCHOR;
5700
5701#ifdef DEBUG
5702 if (getLsraExtendLifeTimes())
5703 {
5704 // If we have extended lifetimes, we need to make sure all the registers are freed.
5705 for (int regNumIndex = 0; regNumIndex <= REG_FP_LAST; regNumIndex++)
5706 {
5707 RegRecord& regRecord = physRegs[regNumIndex];
5708 Interval* interval = regRecord.assignedInterval;
5709 if (interval != nullptr)
5710 {
5711 interval->isActive = false;
5712 unassignPhysReg(&regRecord, nullptr);
5713 }
5714 }
5715 }
5716 else
5717#endif // DEBUG
5718 {
5719 freeRegisters(regsToFree | delayRegsToFree);
5720 }
5721
5722#ifdef DEBUG
5723 if (VERBOSE)
5724 {
5725 // Dump the RegRecords after the last RefPosition is handled.
5726 dumpRegRecords();
5727 printf("\n");
5728
5729 dumpRefPositions("AFTER ALLOCATION");
5730 dumpVarRefPositions("AFTER ALLOCATION");
5731
5732 // Dump the intervals that remain active
5733 printf("Active intervals at end of allocation:\n");
5734
5735 // We COULD just reuse the intervalIter from above, but ArrayListIterator doesn't
5736 // provide a Reset function (!) - we'll probably replace this so don't bother
5737 // adding it
5738
5739 for (Interval& interval : intervals)
5740 {
5741 if (interval.isActive)
5742 {
5743 printf("Active ");
5744 interval.dump();
5745 }
5746 }
5747
5748 printf("\n");
5749 }
5750#endif // DEBUG
5751}
5752
5753//-----------------------------------------------------------------------------
5754// updateAssignedInterval: Update assigned interval of register.
5755//
5756// Arguments:
5757// reg - register to be updated
5758// interval - interval to be assigned
5759// regType - register type
5760//
5761// Return Value:
5762// None
5763//
5764// Assumptions:
5765// For ARM32, when "regType" is TYP_DOUBLE, "reg" should be a even-numbered
5766// float register, i.e. lower half of double register.
5767//
5768// Note:
5769// For ARM32, two float registers consisting a double register are updated
5770// together when "regType" is TYP_DOUBLE.
5771//
5772void LinearScan::updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType)
5773{
5774#ifdef _TARGET_ARM_
5775 // Update overlapping floating point register for TYP_DOUBLE.
5776 Interval* oldAssignedInterval = reg->assignedInterval;
5777 if (regType == TYP_DOUBLE)
5778 {
5779 RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
5780
5781 anotherHalfReg->assignedInterval = interval;
5782 }
5783 else if ((oldAssignedInterval != nullptr) && (oldAssignedInterval->registerType == TYP_DOUBLE))
5784 {
5785 RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
5786
5787 anotherHalfReg->assignedInterval = nullptr;
5788 }
5789#endif
5790 reg->assignedInterval = interval;
5791}
5792
5793//-----------------------------------------------------------------------------
5794// updatePreviousInterval: Update previous interval of register.
5795//
5796// Arguments:
5797// reg - register to be updated
5798// interval - interval to be assigned
5799// regType - register type
5800//
5801// Return Value:
5802// None
5803//
5804// Assumptions:
5805// For ARM32, when "regType" is TYP_DOUBLE, "reg" should be a even-numbered
5806// float register, i.e. lower half of double register.
5807//
5808// Note:
5809// For ARM32, two float registers consisting a double register are updated
5810// together when "regType" is TYP_DOUBLE.
5811//
5812void LinearScan::updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType)
5813{
5814 reg->previousInterval = interval;
5815
5816#ifdef _TARGET_ARM_
5817 // Update overlapping floating point register for TYP_DOUBLE
5818 if (regType == TYP_DOUBLE)
5819 {
5820 RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
5821
5822 anotherHalfReg->previousInterval = interval;
5823 }
5824#endif
5825}
5826
5827// LinearScan::resolveLocalRef
5828// Description:
5829// Update the graph for a local reference.
5830// Also, track the register (if any) that is currently occupied.
5831// Arguments:
5832// treeNode: The lclVar that's being resolved
5833// currentRefPosition: the RefPosition associated with the treeNode
5834//
5835// Details:
5836// This method is called for each local reference, during the resolveRegisters
5837// phase of LSRA. It is responsible for keeping the following in sync:
5838// - varDsc->lvRegNum (and lvOtherReg) contain the unique register location.
5839// If it is not in the same register through its lifetime, it is set to REG_STK.
5840// - interval->physReg is set to the assigned register
5841// (i.e. at the code location which is currently being handled by resolveRegisters())
5842// - interval->isActive is true iff the interval is live and occupying a register
5843// - interval->isSpilled should have already been set to true if the interval is EVER spilled
5844// - interval->isSplit is set to true if the interval does not occupy the same
5845// register throughout the method
5846// - RegRecord->assignedInterval points to the interval which currently occupies
5847// the register
5848// - For each lclVar node:
5849// - gtRegNum/gtRegPair is set to the currently allocated register(s).
5850// - GTF_SPILLED is set on a use if it must be reloaded prior to use.
5851// - GTF_SPILL is set if it must be spilled after use.
5852//
5853// A copyReg is an ugly case where the variable must be in a specific (fixed) register,
5854// but it currently resides elsewhere. The register allocator must track the use of the
5855// fixed register, but it marks the lclVar node with the register it currently lives in
5856// and the code generator does the necessary move.
5857//
5858// Before beginning, the varDsc for each parameter must be set to its initial location.
5859//
5860// NICE: Consider tracking whether an Interval is always in the same location (register/stack)
5861// in which case it will require no resolution.
5862//
5863void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition)
5864{
5865 assert((block == nullptr) == (treeNode == nullptr));
5866 assert(enregisterLocalVars);
5867
5868 // Is this a tracked local? Or just a register allocated for loading
5869 // a non-tracked one?
5870 Interval* interval = currentRefPosition->getInterval();
5871 if (!interval->isLocalVar)
5872 {
5873 return;
5874 }
5875 interval->recentRefPosition = currentRefPosition;
5876 LclVarDsc* varDsc = interval->getLocalVar(compiler);
5877
5878 // NOTE: we set the GTF_VAR_DEATH flag here unless we are extending lifetimes, in which case we write
5879 // this bit in checkLastUses. This is a bit of a hack, but is necessary because codegen requires
5880 // accurate last use info that is not reflected in the lastUse bit on ref positions when we are extending
5881 // lifetimes. See also the comments in checkLastUses.
5882 if ((treeNode != nullptr) && !extendLifetimes())
5883 {
5884 if (currentRefPosition->lastUse)
5885 {
5886 treeNode->gtFlags |= GTF_VAR_DEATH;
5887 }
5888 else
5889 {
5890 treeNode->gtFlags &= ~GTF_VAR_DEATH;
5891 }
5892 }
5893
5894 if (currentRefPosition->registerAssignment == RBM_NONE)
5895 {
5896 assert(!currentRefPosition->RequiresRegister());
5897 assert(interval->isSpilled);
5898
5899 varDsc->lvRegNum = REG_STK;
5900 if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
5901 {
5902 updateAssignedInterval(interval->assignedReg, nullptr, interval->registerType);
5903 }
5904 interval->assignedReg = nullptr;
5905 interval->physReg = REG_NA;
5906 if (treeNode != nullptr)
5907 {
5908 treeNode->SetContained();
5909 }
5910
5911 return;
5912 }
5913
5914 // In most cases, assigned and home registers will be the same
5915 // The exception is the copyReg case, where we've assigned a register
5916 // for a specific purpose, but will be keeping the register assignment
5917 regNumber assignedReg = currentRefPosition->assignedReg();
5918 regNumber homeReg = assignedReg;
5919
5920 // Undo any previous association with a physical register, UNLESS this
5921 // is a copyReg
5922 if (!currentRefPosition->copyReg)
5923 {
5924 regNumber oldAssignedReg = interval->physReg;
5925 if (oldAssignedReg != REG_NA && assignedReg != oldAssignedReg)
5926 {
5927 RegRecord* oldRegRecord = getRegisterRecord(oldAssignedReg);
5928 if (oldRegRecord->assignedInterval == interval)
5929 {
5930 updateAssignedInterval(oldRegRecord, nullptr, interval->registerType);
5931 }
5932 }
5933 }
5934
5935 if (currentRefPosition->refType == RefTypeUse && !currentRefPosition->reload)
5936 {
5937 // Was this spilled after our predecessor was scheduled?
5938 if (interval->physReg == REG_NA)
5939 {
5940 assert(inVarToRegMaps[curBBNum][varDsc->lvVarIndex] == REG_STK);
5941 currentRefPosition->reload = true;
5942 }
5943 }
5944
5945 bool reload = currentRefPosition->reload;
5946 bool spillAfter = currentRefPosition->spillAfter;
5947
5948 // In the reload case we either:
5949 // - Set the register to REG_STK if it will be referenced only from the home location, or
5950 // - Set the register to the assigned register and set GTF_SPILLED if it must be loaded into a register.
5951 if (reload)
5952 {
5953 assert(currentRefPosition->refType != RefTypeDef);
5954 assert(interval->isSpilled);
5955 varDsc->lvRegNum = REG_STK;
5956 if (!spillAfter)
5957 {
5958 interval->physReg = assignedReg;
5959 }
5960
5961 // If there is no treeNode, this must be a RefTypeExpUse, in
5962 // which case we did the reload already
5963 if (treeNode != nullptr)
5964 {
5965 treeNode->gtFlags |= GTF_SPILLED;
5966 if (spillAfter)
5967 {
5968 if (currentRefPosition->AllocateIfProfitable())
5969 {
5970 // This is a use of lclVar that is flagged as reg-optional
5971 // by lower/codegen and marked for both reload and spillAfter.
5972 // In this case we can avoid unnecessary reload and spill
5973 // by setting reg on lclVar to REG_STK and reg on tree node
5974 // to REG_NA. Codegen will generate the code by considering
5975 // it as a contained memory operand.
5976 //
5977 // Note that varDsc->lvRegNum is already to REG_STK above.
5978 interval->physReg = REG_NA;
5979 treeNode->gtRegNum = REG_NA;
5980 treeNode->gtFlags &= ~GTF_SPILLED;
5981 treeNode->SetContained();
5982 }
5983 else
5984 {
5985 treeNode->gtFlags |= GTF_SPILL;
5986 }
5987 }
5988 }
5989 else
5990 {
5991 assert(currentRefPosition->refType == RefTypeExpUse);
5992 }
5993 }
5994 else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType))
5995 {
5996 // In the case of a pure def, don't bother spilling - just assign it to the
5997 // stack. However, we need to remember that it was spilled.
5998
5999 assert(interval->isSpilled);
6000 varDsc->lvRegNum = REG_STK;
6001 interval->physReg = REG_NA;
6002 if (treeNode != nullptr)
6003 {
6004 treeNode->gtRegNum = REG_NA;
6005 }
6006 }
6007 else
6008 {
6009 // Not reload and Not pure-def that's spillAfter
6010
6011 if (currentRefPosition->copyReg || currentRefPosition->moveReg)
6012 {
6013 // For a copyReg or moveReg, we have two cases:
6014 // - In the first case, we have a fixedReg - i.e. a register which the code
6015 // generator is constrained to use.
6016 // The code generator will generate the appropriate move to meet the requirement.
6017 // - In the second case, we were forced to use a different register because of
6018 // interference (or JitStressRegs).
6019 // In this case, we generate a GT_COPY.
6020 // In either case, we annotate the treeNode with the register in which the value
6021 // currently lives. For moveReg, the homeReg is the new register (as assigned above).
6022 // But for copyReg, the homeReg remains unchanged.
6023
6024 assert(treeNode != nullptr);
6025 treeNode->gtRegNum = interval->physReg;
6026
6027 if (currentRefPosition->copyReg)
6028 {
6029 homeReg = interval->physReg;
6030 }
6031 else
6032 {
6033 assert(interval->isSplit);
6034 interval->physReg = assignedReg;
6035 }
6036
6037 if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg)
6038 {
6039 // This is the second case, where we need to generate a copy
6040 insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition);
6041 }
6042 }
6043 else
6044 {
6045 interval->physReg = assignedReg;
6046
6047 if (!interval->isSpilled && !interval->isSplit)
6048 {
6049 if (varDsc->lvRegNum != REG_STK)
6050 {
6051 // If the register assignments don't match, then this interval is split.
6052 if (varDsc->lvRegNum != assignedReg)
6053 {
6054 setIntervalAsSplit(interval);
6055 varDsc->lvRegNum = REG_STK;
6056 }
6057 }
6058 else
6059 {
6060 varDsc->lvRegNum = assignedReg;
6061 }
6062 }
6063 }
6064 if (spillAfter)
6065 {
6066 if (treeNode != nullptr)
6067 {
6068 treeNode->gtFlags |= GTF_SPILL;
6069 }
6070 assert(interval->isSpilled);
6071 interval->physReg = REG_NA;
6072 varDsc->lvRegNum = REG_STK;
6073 }
6074 }
6075
6076 // Update the physRegRecord for the register, so that we know what vars are in
6077 // regs at the block boundaries
6078 RegRecord* physRegRecord = getRegisterRecord(homeReg);
6079 if (spillAfter || currentRefPosition->lastUse)
6080 {
6081 interval->isActive = false;
6082 interval->assignedReg = nullptr;
6083 interval->physReg = REG_NA;
6084
6085 updateAssignedInterval(physRegRecord, nullptr, interval->registerType);
6086 }
6087 else
6088 {
6089 interval->isActive = true;
6090 interval->assignedReg = physRegRecord;
6091
6092 updateAssignedInterval(physRegRecord, interval, interval->registerType);
6093 }
6094}
6095
6096void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree)
6097{
6098 lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx());
6099}
6100
6101//------------------------------------------------------------------------
6102// insertCopyOrReload: Insert a copy in the case where a tree node value must be moved
6103// to a different register at the point of use (GT_COPY), or it is reloaded to a different register
6104// than the one it was spilled from (GT_RELOAD).
6105//
6106// Arguments:
6107// block - basic block in which GT_COPY/GT_RELOAD is inserted.
6108// tree - This is the node to copy or reload.
6109// Insert copy or reload node between this node and its parent.
6110// multiRegIdx - register position of tree node for which copy or reload is needed.
6111// refPosition - The RefPosition at which copy or reload will take place.
6112//
6113// Notes:
6114// The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur.
6115//
6116// For example, for this tree (numbers are execution order, lower is earlier and higher is later):
6117//
6118// +---------+----------+
6119// | GT_ADD (3) |
6120// +---------+----------+
6121// |
6122// / \
6123// / \
6124// / \
6125// +-------------------+ +----------------------+
6126// | x (1) | "tree" | y (2) |
6127// +-------------------+ +----------------------+
6128//
6129// generate this tree:
6130//
6131// +---------+----------+
6132// | GT_ADD (4) |
6133// +---------+----------+
6134// |
6135// / \
6136// / \
6137// / \
6138// +-------------------+ +----------------------+
6139// | GT_RELOAD (3) | | y (2) |
6140// +-------------------+ +----------------------+
6141// |
6142// +-------------------+
6143// | x (1) | "tree"
6144// +-------------------+
6145//
6146// Note in particular that the GT_RELOAD node gets inserted in execution order immediately before the parent of "tree",
6147// which seems a bit weird since normally a node's parent (in this case, the parent of "x", GT_RELOAD in the "after"
6148// picture) immediately follows all of its children (that is, normally the execution ordering is postorder).
6149// The ordering must be this weird "out of normal order" way because the "x" node is being spilled, probably
6150// because the expression in the tree represented above by "y" has high register requirements. We don't want
6151// to reload immediately, of course. So we put GT_RELOAD where the reload should actually happen.
6152//
6153// Note that GT_RELOAD is required when we reload to a different register than the one we spilled to. It can also be
6154// used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED,
6155// and the unspilling code automatically reuses the same register, and does the reload when it notices that flag
6156// when considering a node's operands.
6157//
6158void LinearScan::insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition)
6159{
6160 LIR::Range& blockRange = LIR::AsRange(block);
6161
6162 LIR::Use treeUse;
6163 bool foundUse = blockRange.TryGetUse(tree, &treeUse);
6164 assert(foundUse);
6165
6166 GenTree* parent = treeUse.User();
6167
6168 genTreeOps oper;
6169 if (refPosition->reload)
6170 {
6171 oper = GT_RELOAD;
6172 }
6173 else
6174 {
6175 oper = GT_COPY;
6176
6177#if TRACK_LSRA_STATS
6178 updateLsraStat(LSRA_STAT_COPY_REG, block->bbNum);
6179#endif
6180 }
6181
6182 // If the parent is a reload/copy node, then tree must be a multi-reg node
6183 // that has already had one of its registers spilled.
6184 // It is possible that one of its RefTypeDef positions got spilled and the next
6185 // use of it requires it to be in a different register.
6186 //
6187 // In this case set the i'th position reg of reload/copy node to the reg allocated
6188 // for copy/reload refPosition. Essentially a copy/reload node will have a reg
6189 // for each multi-reg position of its child. If there is a valid reg in i'th
6190 // position of GT_COPY or GT_RELOAD node then the corresponding result of its
6191 // child needs to be copied or reloaded to that reg.
6192 if (parent->IsCopyOrReload())
6193 {
6194 noway_assert(parent->OperGet() == oper);
6195 noway_assert(tree->IsMultiRegNode());
6196 GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload();
6197 noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA);
6198 copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
6199 }
6200 else
6201 {
6202 // Create the new node, with "tree" as its only child.
6203 var_types treeType = tree->TypeGet();
6204
6205 GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
6206 assert(refPosition->registerAssignment != RBM_NONE);
6207 SetLsraAdded(newNode);
6208 newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
6209 if (refPosition->copyReg)
6210 {
6211 // This is a TEMPORARY copy
6212 assert(isCandidateLocalRef(tree));
6213 newNode->gtFlags |= GTF_VAR_DEATH;
6214 }
6215
6216 // Insert the copy/reload after the spilled node and replace the use of the original node with a use
6217 // of the copy/reload.
6218 blockRange.InsertAfter(tree, newNode);
6219 treeUse.ReplaceWith(compiler, newNode);
6220 }
6221}
6222
6223#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6224//------------------------------------------------------------------------
6225// insertUpperVectorSaveAndReload: Insert code to save and restore the upper half of a vector that lives
6226// in a callee-save register at the point of a kill (the upper half is
6227// not preserved).
6228//
6229// Arguments:
6230// tree - This is the node around which we will insert the Save & Reload.
6231// It will be a call or some node that turns into a call.
6232// refPosition - The RefTypeUpperVectorSaveDef RefPosition.
6233//
6234void LinearScan::insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block)
6235{
6236 Interval* lclVarInterval = refPosition->getInterval()->relatedInterval;
6237 assert(lclVarInterval->isLocalVar == true);
6238 LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
6239 assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
6240 regNumber lclVarReg = lclVarInterval->physReg;
6241 if (lclVarReg == REG_NA)
6242 {
6243 return;
6244 }
6245
6246 assert((genRegMask(lclVarReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
6247
6248 regNumber spillReg = refPosition->assignedReg();
6249 bool spillToMem = refPosition->spillAfter;
6250
6251 LIR::Range& blockRange = LIR::AsRange(block);
6252
6253 // First, insert the save before the call.
6254
6255 GenTree* saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
6256 saveLcl->gtRegNum = lclVarReg;
6257 SetLsraAdded(saveLcl);
6258
6259 GenTreeSIMD* simdNode =
6260 new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave,
6261 varDsc->lvBaseType, genTypeSize(varDsc->lvType));
6262 SetLsraAdded(simdNode);
6263 simdNode->gtRegNum = spillReg;
6264 if (spillToMem)
6265 {
6266 simdNode->gtFlags |= GTF_SPILL;
6267 }
6268
6269 blockRange.InsertBefore(tree, LIR::SeqTree(compiler, simdNode));
6270
6271 // Now insert the restore after the call.
6272
6273 GenTree* restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
6274 restoreLcl->gtRegNum = lclVarReg;
6275 SetLsraAdded(restoreLcl);
6276
6277 simdNode = new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore,
6278 varDsc->lvBaseType, genTypeSize(varDsc->lvType));
6279 simdNode->gtRegNum = spillReg;
6280 SetLsraAdded(simdNode);
6281 if (spillToMem)
6282 {
6283 simdNode->gtFlags |= GTF_SPILLED;
6284 }
6285
6286 blockRange.InsertAfter(tree, LIR::SeqTree(compiler, simdNode));
6287}
6288#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6289
6290//------------------------------------------------------------------------
6291// initMaxSpill: Initializes the LinearScan members used to track the max number
6292// of concurrent spills. This is needed so that we can set the
6293// fields in Compiler, so that the code generator, in turn can
6294// allocate the right number of spill locations.
6295//
6296// Arguments:
6297// None.
6298//
6299// Return Value:
6300// None.
6301//
6302// Assumptions:
6303// This is called before any calls to updateMaxSpill().
6304
6305void LinearScan::initMaxSpill()
6306{
6307 needDoubleTmpForFPCall = false;
6308 needFloatTmpForFPCall = false;
6309 for (int i = 0; i < TYP_COUNT; i++)
6310 {
6311 maxSpill[i] = 0;
6312 currentSpill[i] = 0;
6313 }
6314}
6315
6316//------------------------------------------------------------------------
6317// recordMaxSpill: Sets the fields in Compiler for the max number of concurrent spills.
6318// (See the comment on initMaxSpill.)
6319//
6320// Arguments:
6321// None.
6322//
6323// Return Value:
6324// None.
6325//
6326// Assumptions:
6327// This is called after updateMaxSpill() has been called for all "real"
6328// RefPositions.
6329
6330void LinearScan::recordMaxSpill()
6331{
6332 // Note: due to the temp normalization process (see tmpNormalizeType)
6333 // only a few types should actually be seen here.
6334 JITDUMP("Recording the maximum number of concurrent spills:\n");
6335#ifdef _TARGET_X86_
6336 var_types returnType = RegSet::tmpNormalizeType(compiler->info.compRetType);
6337 if (needDoubleTmpForFPCall || (returnType == TYP_DOUBLE))
6338 {
6339 JITDUMP("Adding a spill temp for moving a double call/return value between xmm reg and x87 stack.\n");
6340 maxSpill[TYP_DOUBLE] += 1;
6341 }
6342 if (needFloatTmpForFPCall || (returnType == TYP_FLOAT))
6343 {
6344 JITDUMP("Adding a spill temp for moving a float call/return value between xmm reg and x87 stack.\n");
6345 maxSpill[TYP_FLOAT] += 1;
6346 }
6347#endif // _TARGET_X86_
6348 for (int i = 0; i < TYP_COUNT; i++)
6349 {
6350 if (var_types(i) != RegSet::tmpNormalizeType(var_types(i)))
6351 {
6352 // Only normalized types should have anything in the maxSpill array.
6353 // We assume here that if type 'i' does not normalize to itself, then
6354 // nothing else normalizes to 'i', either.
6355 assert(maxSpill[i] == 0);
6356 }
6357 if (maxSpill[i] != 0)
6358 {
6359 JITDUMP(" %s: %d\n", varTypeName(var_types(i)), maxSpill[i]);
6360 compiler->codeGen->regSet.tmpPreAllocateTemps(var_types(i), maxSpill[i]);
6361 }
6362 }
6363 JITDUMP("\n");
6364}
6365
6366//------------------------------------------------------------------------
6367// updateMaxSpill: Update the maximum number of concurrent spills
6368//
6369// Arguments:
6370// refPosition - the current RefPosition being handled
6371//
6372// Return Value:
6373// None.
6374//
6375// Assumptions:
6376// The RefPosition has an associated interval (getInterval() will
6377// otherwise assert).
6378//
6379// Notes:
6380// This is called for each "real" RefPosition during the writeback
6381// phase of LSRA. It keeps track of how many concurrently-live
6382// spills there are, and the largest number seen so far.
6383
6384void LinearScan::updateMaxSpill(RefPosition* refPosition)
6385{
6386 RefType refType = refPosition->refType;
6387
6388 if (refPosition->spillAfter || refPosition->reload ||
6389 (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA))
6390 {
6391 Interval* interval = refPosition->getInterval();
6392 if (!interval->isLocalVar)
6393 {
6394 // The tmp allocation logic 'normalizes' types to a small number of
6395 // types that need distinct stack locations from each other.
6396 // Those types are currently gc refs, byrefs, <= 4 byte non-GC items,
6397 // 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors.
6398 // LSRA is agnostic to those choices but needs
6399 // to know what they are here.
6400 var_types typ;
6401
6402#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6403 if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse))
6404 {
6405 typ = LargeVectorSaveType;
6406 }
6407 else
6408#endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6409 {
6410 GenTree* treeNode = refPosition->treeNode;
6411 if (treeNode == nullptr)
6412 {
6413 assert(RefTypeIsUse(refType));
6414 treeNode = interval->firstRefPosition->treeNode;
6415 }
6416 assert(treeNode != nullptr);
6417
6418 // In case of multi-reg call nodes, we need to use the type
6419 // of the return register given by multiRegIdx of the refposition.
6420 if (treeNode->IsMultiRegCall())
6421 {
6422 ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc();
6423 typ = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx());
6424 }
6425#if FEATURE_ARG_SPLIT
6426 else if (treeNode->OperIsPutArgSplit())
6427 {
6428 typ = treeNode->AsPutArgSplit()->GetRegType(refPosition->getMultiRegIdx());
6429 }
6430#if !defined(_TARGET_64BIT_)
6431 else if (treeNode->OperIsPutArgReg())
6432 {
6433 // For double arg regs, the type is changed to long since they must be passed via `r0-r3`.
6434 // However when they get spilled, they should be treated as separated int registers.
6435 var_types typNode = treeNode->TypeGet();
6436 typ = (typNode == TYP_LONG) ? TYP_INT : typNode;
6437 }
6438#endif // !_TARGET_64BIT_
6439#endif // FEATURE_ARG_SPLIT
6440 else
6441 {
6442 typ = treeNode->TypeGet();
6443 }
6444 typ = RegSet::tmpNormalizeType(typ);
6445 }
6446
6447 if (refPosition->spillAfter && !refPosition->reload)
6448 {
6449 currentSpill[typ]++;
6450 if (currentSpill[typ] > maxSpill[typ])
6451 {
6452 maxSpill[typ] = currentSpill[typ];
6453 }
6454 }
6455 else if (refPosition->reload)
6456 {
6457 assert(currentSpill[typ] > 0);
6458 currentSpill[typ]--;
6459 }
6460 else if (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA)
6461 {
6462 // A spill temp not getting reloaded into a reg because it is
6463 // marked as allocate if profitable and getting used from its
6464 // memory location. To properly account max spill for typ we
6465 // decrement spill count.
6466 assert(RefTypeIsUse(refType));
6467 assert(currentSpill[typ] > 0);
6468 currentSpill[typ]--;
6469 }
6470 JITDUMP(" Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]);
6471 }
6472 }
6473}
6474
6475// This is the final phase of register allocation. It writes the register assignments to
6476// the tree, and performs resolution across joins and backedges.
6477//
6478void LinearScan::resolveRegisters()
6479{
6480 // Iterate over the tree and the RefPositions in lockstep
6481 // - annotate the tree with register assignments by setting gtRegNum or gtRegPair (for longs)
6482 // on the tree node
6483 // - track globally-live var locations
6484 // - add resolution points at split/merge/critical points as needed
6485
6486 // Need to use the same traversal order as the one that assigns the location numbers.
6487
6488 // Dummy RefPositions have been added at any split, join or critical edge, at the
6489 // point where resolution may be required. These are located:
6490 // - for a split, at the top of the non-adjacent block
6491 // - for a join, at the bottom of the non-adjacent joining block
6492 // - for a critical edge, at the top of the target block of each critical
6493 // edge.
6494 // Note that a target block may have multiple incoming critical or split edges
6495 //
6496 // These RefPositions record the expected location of the Interval at that point.
6497 // At each branch, we identify the location of each liveOut interval, and check
6498 // against the RefPositions at the target.
6499
6500 BasicBlock* block;
6501 LsraLocation currentLocation = MinLocation;
6502
6503 // Clear register assignments - these will be reestablished as lclVar defs (including RefTypeParamDefs)
6504 // are encountered.
6505 if (enregisterLocalVars)
6506 {
6507 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
6508 {
6509 RegRecord* physRegRecord = getRegisterRecord(reg);
6510 Interval* assignedInterval = physRegRecord->assignedInterval;
6511 if (assignedInterval != nullptr)
6512 {
6513 assignedInterval->assignedReg = nullptr;
6514 assignedInterval->physReg = REG_NA;
6515 }
6516 physRegRecord->assignedInterval = nullptr;
6517 physRegRecord->recentRefPosition = nullptr;
6518 }
6519
6520 // Clear "recentRefPosition" for lclVar intervals
6521 for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
6522 {
6523 if (localVarIntervals[varIndex] != nullptr)
6524 {
6525 localVarIntervals[varIndex]->recentRefPosition = nullptr;
6526 localVarIntervals[varIndex]->isActive = false;
6527 }
6528 else
6529 {
6530 assert(compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate == false);
6531 }
6532 }
6533 }
6534
6535 // handle incoming arguments and special temps
6536 RefPositionIterator refPosIterator = refPositions.begin();
6537 RefPosition* currentRefPosition = &refPosIterator;
6538
6539 if (enregisterLocalVars)
6540 {
6541 VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum];
6542 for (; refPosIterator != refPositions.end() &&
6543 (currentRefPosition->refType == RefTypeParamDef || currentRefPosition->refType == RefTypeZeroInit);
6544 ++refPosIterator, currentRefPosition = &refPosIterator)
6545 {
6546 Interval* interval = currentRefPosition->getInterval();
6547 assert(interval != nullptr && interval->isLocalVar);
6548 resolveLocalRef(nullptr, nullptr, currentRefPosition);
6549 regNumber reg = REG_STK;
6550 int varIndex = interval->getVarIndex(compiler);
6551
6552 if (!currentRefPosition->spillAfter && currentRefPosition->registerAssignment != RBM_NONE)
6553 {
6554 reg = currentRefPosition->assignedReg();
6555 }
6556 else
6557 {
6558 reg = REG_STK;
6559 interval->isActive = false;
6560 }
6561 setVarReg(entryVarToRegMap, varIndex, reg);
6562 }
6563 }
6564 else
6565 {
6566 assert(refPosIterator == refPositions.end() ||
6567 (refPosIterator->refType != RefTypeParamDef && refPosIterator->refType != RefTypeZeroInit));
6568 }
6569
6570 BasicBlock* insertionBlock = compiler->fgFirstBB;
6571 GenTree* insertionPoint = LIR::AsRange(insertionBlock).FirstNonPhiNode();
6572
6573 // write back assignments
6574 for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
6575 {
6576 assert(curBBNum == block->bbNum);
6577
6578 if (enregisterLocalVars)
6579 {
6580 // Record the var locations at the start of this block.
6581 // (If it's fgFirstBB, we've already done that above, see entryVarToRegMap)
6582
6583 curBBStartLocation = currentRefPosition->nodeLocation;
6584 if (block != compiler->fgFirstBB)
6585 {
6586 processBlockStartLocations(block, false);
6587 }
6588
6589 // Handle the DummyDefs, updating the incoming var location.
6590 for (; refPosIterator != refPositions.end() && currentRefPosition->refType == RefTypeDummyDef;
6591 ++refPosIterator, currentRefPosition = &refPosIterator)
6592 {
6593 assert(currentRefPosition->isIntervalRef());
6594 // Don't mark dummy defs as reload
6595 currentRefPosition->reload = false;
6596 resolveLocalRef(nullptr, nullptr, currentRefPosition);
6597 regNumber reg;
6598 if (currentRefPosition->registerAssignment != RBM_NONE)
6599 {
6600 reg = currentRefPosition->assignedReg();
6601 }
6602 else
6603 {
6604 reg = REG_STK;
6605 currentRefPosition->getInterval()->isActive = false;
6606 }
6607 setInVarRegForBB(curBBNum, currentRefPosition->getInterval()->varNum, reg);
6608 }
6609 }
6610
6611 // The next RefPosition should be for the block. Move past it.
6612 assert(refPosIterator != refPositions.end());
6613 assert(currentRefPosition->refType == RefTypeBB);
6614 ++refPosIterator;
6615 currentRefPosition = &refPosIterator;
6616
6617 // Handle the RefPositions for the block
6618 for (; refPosIterator != refPositions.end() && currentRefPosition->refType != RefTypeBB &&
6619 currentRefPosition->refType != RefTypeDummyDef;
6620 ++refPosIterator, currentRefPosition = &refPosIterator)
6621 {
6622 currentLocation = currentRefPosition->nodeLocation;
6623
6624 // Ensure that the spill & copy info is valid.
6625 // First, if it's reload, it must not be copyReg or moveReg
6626 assert(!currentRefPosition->reload || (!currentRefPosition->copyReg && !currentRefPosition->moveReg));
6627 // If it's copyReg it must not be moveReg, and vice-versa
6628 assert(!currentRefPosition->copyReg || !currentRefPosition->moveReg);
6629
6630 switch (currentRefPosition->refType)
6631 {
6632#ifdef FEATURE_SIMD
6633 case RefTypeUpperVectorSaveUse:
6634 case RefTypeUpperVectorSaveDef:
6635#endif // FEATURE_SIMD
6636 case RefTypeUse:
6637 case RefTypeDef:
6638 // These are the ones we're interested in
6639 break;
6640 case RefTypeKill:
6641 case RefTypeFixedReg:
6642 // These require no handling at resolution time
6643 assert(currentRefPosition->referent != nullptr);
6644 currentRefPosition->referent->recentRefPosition = currentRefPosition;
6645 continue;
6646 case RefTypeExpUse:
6647 // Ignore the ExpUse cases - a RefTypeExpUse would only exist if the
6648 // variable is dead at the entry to the next block. So we'll mark
6649 // it as in its current location and resolution will take care of any
6650 // mismatch.
6651 assert(getNextBlock() == nullptr ||
6652 !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn,
6653 currentRefPosition->getInterval()->getVarIndex(compiler)));
6654 currentRefPosition->referent->recentRefPosition = currentRefPosition;
6655 continue;
6656 case RefTypeKillGCRefs:
6657 // No action to take at resolution time, and no interval to update recentRefPosition for.
6658 continue;
6659 case RefTypeDummyDef:
6660 case RefTypeParamDef:
6661 case RefTypeZeroInit:
6662 // Should have handled all of these already
6663 default:
6664 unreached();
6665 break;
6666 }
6667 updateMaxSpill(currentRefPosition);
6668 GenTree* treeNode = currentRefPosition->treeNode;
6669
6670#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6671 if (currentRefPosition->refType == RefTypeUpperVectorSaveDef)
6672 {
6673 // The treeNode must be a call, and this must be a RefPosition for a LargeVectorType LocalVar.
6674 // If the LocalVar is in a callee-save register, we are going to spill its upper half around the call.
6675 // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it
6676 // to the stack. We can use as a temp register any non-arg caller-save register.
6677 noway_assert(treeNode != nullptr);
6678 currentRefPosition->referent->recentRefPosition = currentRefPosition;
6679 insertUpperVectorSaveAndReload(treeNode, currentRefPosition, block);
6680 }
6681 else if (currentRefPosition->refType == RefTypeUpperVectorSaveUse)
6682 {
6683 continue;
6684 }
6685#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6686
6687 // Most uses won't actually need to be recorded (they're on the def).
6688 // In those cases, treeNode will be nullptr.
6689 if (treeNode == nullptr)
6690 {
6691 // This is either a use, a dead def, or a field of a struct
6692 Interval* interval = currentRefPosition->getInterval();
6693 assert(currentRefPosition->refType == RefTypeUse ||
6694 currentRefPosition->registerAssignment == RBM_NONE || interval->isStructField);
6695
6696 // TODO-Review: Need to handle the case where any of the struct fields
6697 // are reloaded/spilled at this use
6698 assert(!interval->isStructField ||
6699 (currentRefPosition->reload == false && currentRefPosition->spillAfter == false));
6700
6701 if (interval->isLocalVar && !interval->isStructField)
6702 {
6703 LclVarDsc* varDsc = interval->getLocalVar(compiler);
6704
6705 // This must be a dead definition. We need to mark the lclVar
6706 // so that it's not considered a candidate for lvRegister, as
6707 // this dead def will have to go to the stack.
6708 assert(currentRefPosition->refType == RefTypeDef);
6709 varDsc->lvRegNum = REG_STK;
6710 }
6711 continue;
6712 }
6713
6714 if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal)
6715 {
6716 treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
6717 }
6718 else
6719 {
6720 writeRegisters(currentRefPosition, treeNode);
6721
6722 if (treeNode->IsLocal() && currentRefPosition->getInterval()->isLocalVar)
6723 {
6724 resolveLocalRef(block, treeNode, currentRefPosition);
6725 }
6726
6727 // Mark spill locations on temps
6728 // (local vars are handled in resolveLocalRef, above)
6729 // Note that the tree node will be changed from GTF_SPILL to GTF_SPILLED
6730 // in codegen, taking care of the "reload" case for temps
6731 else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr &&
6732 currentRefPosition->nextRefPosition->moveReg))
6733 {
6734 if (treeNode != nullptr && currentRefPosition->isIntervalRef())
6735 {
6736 if (currentRefPosition->spillAfter)
6737 {
6738 treeNode->gtFlags |= GTF_SPILL;
6739
6740 // If this is a constant interval that is reusing a pre-existing value, we actually need
6741 // to generate the value at this point in order to spill it.
6742 if (treeNode->IsReuseRegVal())
6743 {
6744 treeNode->ResetReuseRegVal();
6745 }
6746
6747 // In case of multi-reg call node, also set spill flag on the
6748 // register specified by multi-reg index of current RefPosition.
6749 // Note that the spill flag on treeNode indicates that one or
6750 // more its allocated registers are in that state.
6751 if (treeNode->IsMultiRegCall())
6752 {
6753 GenTreeCall* call = treeNode->AsCall();
6754 call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
6755 }
6756#if FEATURE_ARG_SPLIT
6757 else if (treeNode->OperIsPutArgSplit())
6758 {
6759 GenTreePutArgSplit* splitArg = treeNode->AsPutArgSplit();
6760 splitArg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
6761 }
6762#ifdef _TARGET_ARM_
6763 else if (treeNode->OperIsMultiRegOp())
6764 {
6765 GenTreeMultiRegOp* multiReg = treeNode->AsMultiRegOp();
6766 multiReg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
6767 }
6768#endif // _TARGET_ARM_
6769#endif // FEATURE_ARG_SPLIT
6770 }
6771
6772 // If the value is reloaded or moved to a different register, we need to insert
6773 // a node to hold the register to which it should be reloaded
6774 RefPosition* nextRefPosition = currentRefPosition->nextRefPosition;
6775 assert(nextRefPosition != nullptr);
6776 if (INDEBUG(alwaysInsertReload() ||)
6777 nextRefPosition->assignedReg() != currentRefPosition->assignedReg())
6778 {
6779 if (nextRefPosition->assignedReg() != REG_NA)
6780 {
6781 insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(),
6782 nextRefPosition);
6783 }
6784 else
6785 {
6786 assert(nextRefPosition->AllocateIfProfitable());
6787
6788 // In case of tree temps, if def is spilled and use didn't
6789 // get a register, set a flag on tree node to be treated as
6790 // contained at the point of its use.
6791 if (currentRefPosition->spillAfter && currentRefPosition->refType == RefTypeDef &&
6792 nextRefPosition->refType == RefTypeUse)
6793 {
6794 assert(nextRefPosition->treeNode == nullptr);
6795 treeNode->gtFlags |= GTF_NOREG_AT_USE;
6796 }
6797 }
6798 }
6799 }
6800
6801 // We should never have to "spill after" a temp use, since
6802 // they're single use
6803 else
6804 {
6805 unreached();
6806 }
6807 }
6808 }
6809 }
6810
6811 if (enregisterLocalVars)
6812 {
6813 processBlockEndLocations(block);
6814 }
6815 }
6816
6817 if (enregisterLocalVars)
6818 {
6819#ifdef DEBUG
6820 if (VERBOSE)
6821 {
6822 printf("-----------------------\n");
6823 printf("RESOLVING BB BOUNDARIES\n");
6824 printf("-----------------------\n");
6825
6826 printf("Resolution Candidates: ");
6827 dumpConvertedVarSet(compiler, resolutionCandidateVars);
6828 printf("\n");
6829 printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No");
6830
6831 printf("Prior to Resolution\n");
6832 foreach_block(compiler, block)
6833 {
6834 printf("\n" FMT_BB " use def in out\n", block->bbNum);
6835 dumpConvertedVarSet(compiler, block->bbVarUse);
6836 printf("\n");
6837 dumpConvertedVarSet(compiler, block->bbVarDef);
6838 printf("\n");
6839 dumpConvertedVarSet(compiler, block->bbLiveIn);
6840 printf("\n");
6841 dumpConvertedVarSet(compiler, block->bbLiveOut);
6842 printf("\n");
6843
6844 dumpInVarToRegMap(block);
6845 dumpOutVarToRegMap(block);
6846 }
6847
6848 printf("\n\n");
6849 }
6850#endif // DEBUG
6851
6852 resolveEdges();
6853
6854 // Verify register assignments on variables
6855 unsigned lclNum;
6856 LclVarDsc* varDsc;
6857 for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
6858 {
6859 if (!isCandidateVar(varDsc))
6860 {
6861 varDsc->lvRegNum = REG_STK;
6862 }
6863 else
6864 {
6865 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
6866
6867 // Determine initial position for parameters
6868
6869 if (varDsc->lvIsParam)
6870 {
6871 regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment;
6872 regNumber initialReg = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
6873 ? REG_STK
6874 : genRegNumFromMask(initialRegMask);
6875 regNumber sourceReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
6876
6877#ifdef _TARGET_ARM_
6878 if (varTypeIsMultiReg(varDsc))
6879 {
6880 // TODO-ARM-NYI: Map the hi/lo intervals back to lvRegNum and lvOtherReg (these should NYI
6881 // before this)
6882 assert(!"Multi-reg types not yet supported");
6883 }
6884 else
6885#endif // _TARGET_ARM_
6886 {
6887 varDsc->lvArgInitReg = initialReg;
6888 JITDUMP(" Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg));
6889 }
6890
6891 // Stack args that are part of dependently-promoted structs should never be register candidates (see
6892 // LinearScan::isRegCandidate).
6893 assert(varDsc->lvIsRegArg || !compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc));
6894 }
6895
6896 // If lvRegNum is REG_STK, that means that either no register
6897 // was assigned, or (more likely) that the same register was not
6898 // used for all references. In that case, codegen gets the register
6899 // from the tree node.
6900 if (varDsc->lvRegNum == REG_STK || interval->isSpilled || interval->isSplit)
6901 {
6902 // For codegen purposes, we'll set lvRegNum to whatever register
6903 // it's currently in as we go.
6904 // However, we never mark an interval as lvRegister if it has either been spilled
6905 // or split.
6906 varDsc->lvRegister = false;
6907
6908 // Skip any dead defs or exposed uses
6909 // (first use exposed will only occur when there is no explicit initialization)
6910 RefPosition* firstRefPosition = interval->firstRefPosition;
6911 while ((firstRefPosition != nullptr) && (firstRefPosition->refType == RefTypeExpUse))
6912 {
6913 firstRefPosition = firstRefPosition->nextRefPosition;
6914 }
6915 if (firstRefPosition == nullptr)
6916 {
6917 // Dead interval
6918 varDsc->lvLRACandidate = false;
6919 if (varDsc->lvRefCnt() == 0)
6920 {
6921 varDsc->lvOnFrame = false;
6922 }
6923 else
6924 {
6925 // We may encounter cases where a lclVar actually has no references, but
6926 // a non-zero refCnt. For safety (in case this is some "hidden" lclVar that we're
6927 // not correctly recognizing), we'll mark those as needing a stack location.
6928 // TODO-Cleanup: Make this an assert if/when we correct the refCnt
6929 // updating.
6930 varDsc->lvOnFrame = true;
6931 }
6932 }
6933 else
6934 {
6935 // If the interval was not spilled, it doesn't need a stack location.
6936 if (!interval->isSpilled)
6937 {
6938 varDsc->lvOnFrame = false;
6939 }
6940 if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter)
6941 {
6942 // Either this RefPosition is spilled, or regOptional or it is not a "real" def or use
6943 assert(
6944 firstRefPosition->spillAfter || firstRefPosition->AllocateIfProfitable() ||
6945 (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse));
6946 varDsc->lvRegNum = REG_STK;
6947 }
6948 else
6949 {
6950 varDsc->lvRegNum = firstRefPosition->assignedReg();
6951 }
6952 }
6953 }
6954 else
6955 {
6956 {
6957 varDsc->lvRegister = true;
6958 varDsc->lvOnFrame = false;
6959 }
6960#ifdef DEBUG
6961 regMaskTP registerAssignment = genRegMask(varDsc->lvRegNum);
6962 assert(!interval->isSpilled && !interval->isSplit);
6963 RefPosition* refPosition = interval->firstRefPosition;
6964 assert(refPosition != nullptr);
6965
6966 while (refPosition != nullptr)
6967 {
6968 // All RefPositions must match, except for dead definitions,
6969 // copyReg/moveReg and RefTypeExpUse positions
6970 if (refPosition->registerAssignment != RBM_NONE && !refPosition->copyReg &&
6971 !refPosition->moveReg && refPosition->refType != RefTypeExpUse)
6972 {
6973 assert(refPosition->registerAssignment == registerAssignment);
6974 }
6975 refPosition = refPosition->nextRefPosition;
6976 }
6977#endif // DEBUG
6978 }
6979 }
6980 }
6981 }
6982
6983#ifdef DEBUG
6984 if (VERBOSE)
6985 {
6986 printf("Trees after linear scan register allocator (LSRA)\n");
6987 compiler->fgDispBasicBlocks(true);
6988 }
6989
6990 verifyFinalAllocation();
6991#endif // DEBUG
6992
6993 compiler->raMarkStkVars();
6994 recordMaxSpill();
6995
6996 // TODO-CQ: Review this comment and address as needed.
6997 // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
6998 // so that the gc tracking logic and lvMustInit logic will ignore them.
6999 // Extract the code that does this from raAssignVars, and call it here.
7000 // PRECONDITIONS: Ensure that lvPromoted is set on promoted structs, if and
7001 // only if it is promoted on all paths.
7002 // Call might be something like:
7003 // compiler->BashUnusedStructLocals();
7004}
7005
7006//
7007//------------------------------------------------------------------------
7008// insertMove: Insert a move of a lclVar with the given lclNum into the given block.
7009//
7010// Arguments:
7011// block - the BasicBlock into which the move will be inserted.
7012// insertionPoint - the instruction before which to insert the move
7013// lclNum - the lclNum of the var to be moved
7014// fromReg - the register from which the var is moving
7015// toReg - the register to which the var is moving
7016//
7017// Return Value:
7018// None.
7019//
7020// Notes:
7021// If insertionPoint is non-NULL, insert before that instruction;
7022// otherwise, insert "near" the end (prior to the branch, if any).
7023// If fromReg or toReg is REG_STK, then move from/to memory, respectively.
7024
7025void LinearScan::insertMove(
7026 BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg)
7027{
7028 LclVarDsc* varDsc = compiler->lvaTable + lclNum;
7029 // the lclVar must be a register candidate
7030 assert(isRegCandidate(varDsc));
7031 // One or both MUST be a register
7032 assert(fromReg != REG_STK || toReg != REG_STK);
7033 // They must not be the same register.
7034 assert(fromReg != toReg);
7035
7036 // This var can't be marked lvRegister now
7037 varDsc->lvRegNum = REG_STK;
7038
7039 GenTree* src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
7040 SetLsraAdded(src);
7041
7042 // There are three cases we need to handle:
7043 // - We are loading a lclVar from the stack.
7044 // - We are storing a lclVar to the stack.
7045 // - We are copying a lclVar between registers.
7046 //
7047 // In the first and second cases, the lclVar node will be marked with GTF_SPILLED and GTF_SPILL, respectively.
7048 // It is up to the code generator to ensure that any necessary normalization is done when loading or storing the
7049 // lclVar's value.
7050 //
7051 // In the third case, we generate GT_COPY(GT_LCL_VAR) and type each node with the normalized type of the lclVar.
7052 // This is safe because a lclVar is always normalized once it is in a register.
7053
7054 GenTree* dst = src;
7055 if (fromReg == REG_STK)
7056 {
7057 src->gtFlags |= GTF_SPILLED;
7058 src->gtRegNum = toReg;
7059 }
7060 else if (toReg == REG_STK)
7061 {
7062 src->gtFlags |= GTF_SPILL;
7063 src->gtRegNum = fromReg;
7064 }
7065 else
7066 {
7067 var_types movType = genActualType(varDsc->TypeGet());
7068 src->gtType = movType;
7069
7070 dst = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, movType, src);
7071 // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
7072 // Note that if src is itself a lastUse, this will have no effect.
7073 dst->gtFlags &= ~(GTF_VAR_DEATH);
7074 src->gtRegNum = fromReg;
7075 dst->gtRegNum = toReg;
7076 SetLsraAdded(dst);
7077 }
7078 dst->SetUnusedValue();
7079
7080 LIR::Range treeRange = LIR::SeqTree(compiler, dst);
7081 LIR::Range& blockRange = LIR::AsRange(block);
7082
7083 if (insertionPoint != nullptr)
7084 {
7085 blockRange.InsertBefore(insertionPoint, std::move(treeRange));
7086 }
7087 else
7088 {
7089 // Put the copy at the bottom
7090 // If there's a branch, make an embedded statement that executes just prior to the branch
7091 if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
7092 {
7093 noway_assert(!blockRange.IsEmpty());
7094
7095 GenTree* branch = blockRange.LastNode();
7096 assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
7097 branch->OperGet() == GT_SWITCH);
7098
7099 blockRange.InsertBefore(branch, std::move(treeRange));
7100 }
7101 else
7102 {
7103 assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
7104 blockRange.InsertAtEnd(std::move(treeRange));
7105 }
7106 }
7107}
7108
7109void LinearScan::insertSwap(
7110 BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2)
7111{
7112#ifdef DEBUG
7113 if (VERBOSE)
7114 {
7115 const char* insertionPointString = "top";
7116 if (insertionPoint == nullptr)
7117 {
7118 insertionPointString = "bottom";
7119 }
7120 printf(" " FMT_BB " %s: swap V%02u in %s with V%02u in %s\n", block->bbNum, insertionPointString, lclNum1,
7121 getRegName(reg1), lclNum2, getRegName(reg2));
7122 }
7123#endif // DEBUG
7124
7125 LclVarDsc* varDsc1 = compiler->lvaTable + lclNum1;
7126 LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2;
7127 assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA);
7128
7129 GenTree* lcl1 = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet());
7130 lcl1->gtRegNum = reg1;
7131 SetLsraAdded(lcl1);
7132
7133 GenTree* lcl2 = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet());
7134 lcl2->gtRegNum = reg2;
7135 SetLsraAdded(lcl2);
7136
7137 GenTree* swap = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2);
7138 swap->gtRegNum = REG_NA;
7139 SetLsraAdded(swap);
7140
7141 lcl1->gtNext = lcl2;
7142 lcl2->gtPrev = lcl1;
7143 lcl2->gtNext = swap;
7144 swap->gtPrev = lcl2;
7145
7146 LIR::Range swapRange = LIR::SeqTree(compiler, swap);
7147 LIR::Range& blockRange = LIR::AsRange(block);
7148
7149 if (insertionPoint != nullptr)
7150 {
7151 blockRange.InsertBefore(insertionPoint, std::move(swapRange));
7152 }
7153 else
7154 {
7155 // Put the copy at the bottom
7156 // If there's a branch, make an embedded statement that executes just prior to the branch
7157 if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
7158 {
7159 noway_assert(!blockRange.IsEmpty());
7160
7161 GenTree* branch = blockRange.LastNode();
7162 assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
7163 branch->OperGet() == GT_SWITCH);
7164
7165 blockRange.InsertBefore(branch, std::move(swapRange));
7166 }
7167 else
7168 {
7169 assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
7170 blockRange.InsertAtEnd(std::move(swapRange));
7171 }
7172 }
7173}
7174
7175//------------------------------------------------------------------------
7176// getTempRegForResolution: Get a free register to use for resolution code.
7177//
7178// Arguments:
7179// fromBlock - The "from" block on the edge being resolved.
7180// toBlock - The "to"block on the edge
7181// type - the type of register required
7182//
7183// Return Value:
7184// Returns a register that is free on the given edge, or REG_NA if none is available.
7185//
7186// Notes:
7187// It is up to the caller to check the return value, and to determine whether a register is
7188// available, and to handle that case appropriately.
7189// It is also up to the caller to cache the return value, as this is not cheap to compute.
7190
7191regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type)
7192{
7193 // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps
7194 // and they would be more space-efficient as well.
7195 VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
7196 VarToRegMap toVarToRegMap = getInVarToRegMap(toBlock->bbNum);
7197
7198#ifdef _TARGET_ARM_
7199 regMaskTP freeRegs;
7200 if (type == TYP_DOUBLE)
7201 {
7202 // We have to consider all float registers for TYP_DOUBLE
7203 freeRegs = allRegs(TYP_FLOAT);
7204 }
7205 else
7206 {
7207 freeRegs = allRegs(type);
7208 }
7209#else // !_TARGET_ARM_
7210 regMaskTP freeRegs = allRegs(type);
7211#endif // !_TARGET_ARM_
7212
7213#ifdef DEBUG
7214 if (getStressLimitRegs() == LSRA_LIMIT_SMALL_SET)
7215 {
7216 return REG_NA;
7217 }
7218#endif // DEBUG
7219 INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs));
7220
7221 // We are only interested in the variables that are live-in to the "to" block.
7222 VarSetOps::Iter iter(compiler, toBlock->bbLiveIn);
7223 unsigned varIndex = 0;
7224 while (iter.NextElem(&varIndex) && freeRegs != RBM_NONE)
7225 {
7226 regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
7227 regNumber toReg = getVarReg(toVarToRegMap, varIndex);
7228 assert(fromReg != REG_NA && toReg != REG_NA);
7229 if (fromReg != REG_STK)
7230 {
7231 freeRegs &= ~genRegMask(fromReg, getIntervalForLocalVar(varIndex)->registerType);
7232 }
7233 if (toReg != REG_STK)
7234 {
7235 freeRegs &= ~genRegMask(toReg, getIntervalForLocalVar(varIndex)->registerType);
7236 }
7237 }
7238
7239#ifdef _TARGET_ARM_
7240 if (type == TYP_DOUBLE)
7241 {
7242 // Exclude any doubles for which the odd half isn't in freeRegs.
7243 freeRegs = freeRegs & ((freeRegs << 1) & RBM_ALLDOUBLE);
7244 }
7245#endif
7246
7247 if (freeRegs == RBM_NONE)
7248 {
7249 return REG_NA;
7250 }
7251 else
7252 {
7253 regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs));
7254 return tempReg;
7255 }
7256}
7257
7258#ifdef _TARGET_ARM_
7259//------------------------------------------------------------------------
7260// addResolutionForDouble: Add resolution move(s) for TYP_DOUBLE interval
7261// and update location.
7262//
7263// Arguments:
7264// block - the BasicBlock into which the move will be inserted.
7265// insertionPoint - the instruction before which to insert the move
7266// sourceIntervals - maintains sourceIntervals[reg] which each 'reg' is associated with
7267// location - maintains location[reg] which is the location of the var that was originally in 'reg'.
7268// toReg - the register to which the var is moving
7269// fromReg - the register from which the var is moving
7270// resolveType - the type of resolution to be performed
7271//
7272// Return Value:
7273// None.
7274//
7275// Notes:
7276// It inserts at least one move and updates incoming parameter 'location'.
7277//
7278void LinearScan::addResolutionForDouble(BasicBlock* block,
7279 GenTree* insertionPoint,
7280 Interval** sourceIntervals,
7281 regNumberSmall* location,
7282 regNumber toReg,
7283 regNumber fromReg,
7284 ResolveType resolveType)
7285{
7286 regNumber secondHalfTargetReg = REG_NEXT(fromReg);
7287 Interval* intervalToBeMoved1 = sourceIntervals[fromReg];
7288 Interval* intervalToBeMoved2 = sourceIntervals[secondHalfTargetReg];
7289
7290 assert(!(intervalToBeMoved1 == nullptr && intervalToBeMoved2 == nullptr));
7291
7292 if (intervalToBeMoved1 != nullptr)
7293 {
7294 if (intervalToBeMoved1->registerType == TYP_DOUBLE)
7295 {
7296 // TYP_DOUBLE interval occupies a double register, i.e. two float registers.
7297 assert(intervalToBeMoved2 == nullptr);
7298 assert(genIsValidDoubleReg(toReg));
7299 }
7300 else
7301 {
7302 // TYP_FLOAT interval occupies 1st half of double register, i.e. 1st float register
7303 assert(genIsValidFloatReg(toReg));
7304 }
7305 addResolution(block, insertionPoint, intervalToBeMoved1, toReg, fromReg);
7306 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
7307 location[fromReg] = (regNumberSmall)toReg;
7308 }
7309
7310 if (intervalToBeMoved2 != nullptr)
7311 {
7312 // TYP_FLOAT interval occupies 2nd half of double register.
7313 assert(intervalToBeMoved2->registerType == TYP_FLOAT);
7314 regNumber secondHalfTempReg = REG_NEXT(toReg);
7315
7316 addResolution(block, insertionPoint, intervalToBeMoved2, secondHalfTempReg, secondHalfTargetReg);
7317 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
7318 location[secondHalfTargetReg] = (regNumberSmall)secondHalfTempReg;
7319 }
7320
7321 return;
7322}
7323#endif // _TARGET_ARM_
7324
7325//------------------------------------------------------------------------
7326// addResolution: Add a resolution move of the given interval
7327//
7328// Arguments:
7329// block - the BasicBlock into which the move will be inserted.
7330// insertionPoint - the instruction before which to insert the move
7331// interval - the interval of the var to be moved
7332// toReg - the register to which the var is moving
7333// fromReg - the register from which the var is moving
7334//
7335// Return Value:
7336// None.
7337//
7338// Notes:
7339// For joins, we insert at the bottom (indicated by an insertionPoint
7340// of nullptr), while for splits we insert at the top.
7341// This is because for joins 'block' is a pred of the join, while for splits it is a succ.
7342// For critical edges, this function may be called twice - once to move from
7343// the source (fromReg), if any, to the stack, in which case toReg will be
7344// REG_STK, and we insert at the bottom (leave insertionPoint as nullptr).
7345// The next time, we want to move from the stack to the destination (toReg),
7346// in which case fromReg will be REG_STK, and we insert at the top.
7347
7348void LinearScan::addResolution(
7349 BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg)
7350{
7351#ifdef DEBUG
7352 const char* insertionPointString = "top";
7353#endif // DEBUG
7354 if (insertionPoint == nullptr)
7355 {
7356#ifdef DEBUG
7357 insertionPointString = "bottom";
7358#endif // DEBUG
7359 }
7360
7361 JITDUMP(" " FMT_BB " %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum);
7362 JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg));
7363
7364 insertMove(block, insertionPoint, interval->varNum, fromReg, toReg);
7365 if (fromReg == REG_STK || toReg == REG_STK)
7366 {
7367 assert(interval->isSpilled);
7368 }
7369 else
7370 {
7371 // We should have already marked this as spilled or split.
7372 assert((interval->isSpilled) || (interval->isSplit));
7373 }
7374
7375 INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
7376}
7377
7378//------------------------------------------------------------------------
7379// handleOutgoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block'
7380//
7381// Arguments:
7382// block - the block with outgoing critical edges.
7383//
7384// Return Value:
7385// None..
7386//
7387// Notes:
7388// For all outgoing critical edges (i.e. any successor of this block which is
7389// a join edge), if there are any conflicts, split the edge by adding a new block,
7390// and generate the resolution code into that block.
7391
7392void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
7393{
7394 VARSET_TP outResolutionSet(VarSetOps::Intersection(compiler, block->bbLiveOut, resolutionCandidateVars));
7395 if (VarSetOps::IsEmpty(compiler, outResolutionSet))
7396 {
7397 return;
7398 }
7399 VARSET_TP sameResolutionSet(VarSetOps::MakeEmpty(compiler));
7400 VARSET_TP sameLivePathsSet(VarSetOps::MakeEmpty(compiler));
7401 VARSET_TP singleTargetSet(VarSetOps::MakeEmpty(compiler));
7402 VARSET_TP diffResolutionSet(VarSetOps::MakeEmpty(compiler));
7403
7404 // Get the outVarToRegMap for this block
7405 VarToRegMap outVarToRegMap = getOutVarToRegMap(block->bbNum);
7406 unsigned succCount = block->NumSucc(compiler);
7407 assert(succCount > 1);
7408 VarToRegMap firstSuccInVarToRegMap = nullptr;
7409 BasicBlock* firstSucc = nullptr;
7410
7411 // First, determine the live regs at the end of this block so that we know what regs are
7412 // available to copy into.
7413 // Note that for this purpose we use the full live-out set, because we must ensure that
7414 // even the registers that remain the same across the edge are preserved correctly.
7415 regMaskTP liveOutRegs = RBM_NONE;
7416 VarSetOps::Iter liveOutIter(compiler, block->bbLiveOut);
7417 unsigned liveOutVarIndex = 0;
7418 while (liveOutIter.NextElem(&liveOutVarIndex))
7419 {
7420 regNumber fromReg = getVarReg(outVarToRegMap, liveOutVarIndex);
7421 if (fromReg != REG_STK)
7422 {
7423 regMaskTP fromRegMask = genRegMask(fromReg, getIntervalForLocalVar(liveOutVarIndex)->registerType);
7424 liveOutRegs |= fromRegMask;
7425 }
7426 }
7427
7428 // Next, if this blocks ends with a switch table, we have to make sure not to copy
7429 // into the registers that it uses.
7430 regMaskTP switchRegs = RBM_NONE;
7431 if (block->bbJumpKind == BBJ_SWITCH)
7432 {
7433 // At this point, Lowering has transformed any non-switch-table blocks into
7434 // cascading ifs.
7435 GenTree* switchTable = LIR::AsRange(block).LastNode();
7436 assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE);
7437
7438 switchRegs = switchTable->gtRsvdRegs;
7439 GenTree* op1 = switchTable->gtGetOp1();
7440 GenTree* op2 = switchTable->gtGetOp2();
7441 noway_assert(op1 != nullptr && op2 != nullptr);
7442 assert(op1->gtRegNum != REG_NA && op2->gtRegNum != REG_NA);
7443 // No floating point values, so no need to worry about the register type
7444 // (i.e. for ARM32, where we used the genRegMask overload with a type).
7445 assert(varTypeIsIntegralOrI(op1) && varTypeIsIntegralOrI(op2));
7446 switchRegs |= genRegMask(op1->gtRegNum);
7447 switchRegs |= genRegMask(op2->gtRegNum);
7448 }
7449
7450#ifdef _TARGET_ARM64_
7451 // Next, if this blocks ends with a JCMP, we have to make sure not to copy
7452 // into the register that it uses or modify the local variable it must consume
7453 LclVarDsc* jcmpLocalVarDsc = nullptr;
7454 if (block->bbJumpKind == BBJ_COND)
7455 {
7456 GenTree* lastNode = LIR::AsRange(block).LastNode();
7457
7458 if (lastNode->OperIs(GT_JCMP))
7459 {
7460 GenTree* op1 = lastNode->gtGetOp1();
7461 switchRegs |= genRegMask(op1->gtRegNum);
7462
7463 if (op1->IsLocal())
7464 {
7465 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
7466 jcmpLocalVarDsc = &compiler->lvaTable[lcl->gtLclNum];
7467 }
7468 }
7469 }
7470#endif
7471
7472 VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
7473 regMaskTP sameWriteRegs = RBM_NONE;
7474 regMaskTP diffReadRegs = RBM_NONE;
7475
7476 // For each var that may require resolution, classify them as:
7477 // - in the same register at the end of this block and at each target (no resolution needed)
7478 // - in different registers at different targets (resolve separately):
7479 // diffResolutionSet
7480 // - in the same register at each target at which it's live, but different from the end of
7481 // this block. We may be able to resolve these as if it is "join", but only if they do not
7482 // write to any registers that are read by those in the diffResolutionSet:
7483 // sameResolutionSet
7484
7485 VarSetOps::Iter outResolutionSetIter(compiler, outResolutionSet);
7486 unsigned outResolutionSetVarIndex = 0;
7487 while (outResolutionSetIter.NextElem(&outResolutionSetVarIndex))
7488 {
7489 regNumber fromReg = getVarReg(outVarToRegMap, outResolutionSetVarIndex);
7490 bool isMatch = true;
7491 bool isSame = false;
7492 bool maybeSingleTarget = false;
7493 bool maybeSameLivePaths = false;
7494 bool liveOnlyAtSplitEdge = true;
7495 regNumber sameToReg = REG_NA;
7496 for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
7497 {
7498 BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
7499 if (!VarSetOps::IsMember(compiler, succBlock->bbLiveIn, outResolutionSetVarIndex))
7500 {
7501 maybeSameLivePaths = true;
7502 continue;
7503 }
7504 else if (liveOnlyAtSplitEdge)
7505 {
7506 // Is the var live only at those target blocks which are connected by a split edge to this block
7507 liveOnlyAtSplitEdge = ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB));
7508 }
7509
7510 regNumber toReg = getVarReg(getInVarToRegMap(succBlock->bbNum), outResolutionSetVarIndex);
7511 if (sameToReg == REG_NA)
7512 {
7513 sameToReg = toReg;
7514 continue;
7515 }
7516 if (toReg == sameToReg)
7517 {
7518 continue;
7519 }
7520 sameToReg = REG_NA;
7521 break;
7522 }
7523
7524 // Check for the cases where we can't write to a register.
7525 // We only need to check for these cases if sameToReg is an actual register (not REG_STK).
7526 if (sameToReg != REG_NA && sameToReg != REG_STK)
7527 {
7528 // If there's a path on which this var isn't live, it may use the original value in sameToReg.
7529 // In this case, sameToReg will be in the liveOutRegs of this block.
7530 // Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's
7531 // live only at another target), and we can't copy another lclVar into that reg in this block.
7532 regMaskTP sameToRegMask =
7533 genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7534 if (maybeSameLivePaths &&
7535 (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE)))
7536 {
7537 sameToReg = REG_NA;
7538 }
7539 // If this register is used by a switch table at the end of the block, we can't do the copy
7540 // in this block (since we can't insert it after the switch).
7541 if ((sameToRegMask & switchRegs) != RBM_NONE)
7542 {
7543 sameToReg = REG_NA;
7544 }
7545
7546#ifdef _TARGET_ARM64_
7547 if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex))
7548 {
7549 sameToReg = REG_NA;
7550 }
7551#endif
7552
7553 // If the var is live only at those blocks connected by a split edge and not live-in at some of the
7554 // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution
7555 // will be deferred to the handling of split edges, which means copy will only be at those target(s).
7556 //
7557 // Another way to achieve similar resolution for vars live only at split edges is by removing them
7558 // from consideration up-front but it requires that we traverse those edges anyway to account for
7559 // the registers that must note be overwritten.
7560 if (liveOnlyAtSplitEdge && maybeSameLivePaths)
7561 {
7562 sameToReg = REG_NA;
7563 }
7564 }
7565
7566 if (sameToReg == REG_NA)
7567 {
7568 VarSetOps::AddElemD(compiler, diffResolutionSet, outResolutionSetVarIndex);
7569 if (fromReg != REG_STK)
7570 {
7571 diffReadRegs |= genRegMask(fromReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7572 }
7573 }
7574 else if (sameToReg != fromReg)
7575 {
7576 VarSetOps::AddElemD(compiler, sameResolutionSet, outResolutionSetVarIndex);
7577 setVarReg(sameVarToRegMap, outResolutionSetVarIndex, sameToReg);
7578 if (sameToReg != REG_STK)
7579 {
7580 sameWriteRegs |= genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7581 }
7582 }
7583 }
7584
7585 if (!VarSetOps::IsEmpty(compiler, sameResolutionSet))
7586 {
7587 if ((sameWriteRegs & diffReadRegs) != RBM_NONE)
7588 {
7589 // We cannot split the "same" and "diff" regs if the "same" set writes registers
7590 // that must be read by the "diff" set. (Note that when these are done as a "batch"
7591 // we carefully order them to ensure all the input regs are read before they are
7592 // overwritten.)
7593 VarSetOps::UnionD(compiler, diffResolutionSet, sameResolutionSet);
7594 VarSetOps::ClearD(compiler, sameResolutionSet);
7595 }
7596 else
7597 {
7598 // For any vars in the sameResolutionSet, we can simply add the move at the end of "block".
7599 resolveEdge(block, nullptr, ResolveSharedCritical, sameResolutionSet);
7600 }
7601 }
7602 if (!VarSetOps::IsEmpty(compiler, diffResolutionSet))
7603 {
7604 for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
7605 {
7606 BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
7607
7608 // Any "diffResolutionSet" resolution for a block with no other predecessors will be handled later
7609 // as split resolution.
7610 if ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB))
7611 {
7612 continue;
7613 }
7614
7615 // Now collect the resolution set for just this edge, if any.
7616 // Check only the vars in diffResolutionSet that are live-in to this successor.
7617 bool needsResolution = false;
7618 VarToRegMap succInVarToRegMap = getInVarToRegMap(succBlock->bbNum);
7619 VARSET_TP edgeResolutionSet(VarSetOps::Intersection(compiler, diffResolutionSet, succBlock->bbLiveIn));
7620 VarSetOps::Iter iter(compiler, edgeResolutionSet);
7621 unsigned varIndex = 0;
7622 while (iter.NextElem(&varIndex))
7623 {
7624 regNumber fromReg = getVarReg(outVarToRegMap, varIndex);
7625 regNumber toReg = getVarReg(succInVarToRegMap, varIndex);
7626
7627 if (fromReg == toReg)
7628 {
7629 VarSetOps::RemoveElemD(compiler, edgeResolutionSet, varIndex);
7630 }
7631 }
7632 if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet))
7633 {
7634 resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet);
7635 }
7636 }
7637 }
7638}
7639
7640//------------------------------------------------------------------------
7641// resolveEdges: Perform resolution across basic block edges
7642//
7643// Arguments:
7644// None.
7645//
7646// Return Value:
7647// None.
7648//
7649// Notes:
7650// Traverse the basic blocks.
7651// - If this block has a single predecessor that is not the immediately
7652// preceding block, perform any needed 'split' resolution at the beginning of this block
7653// - Otherwise if this block has critical incoming edges, handle them.
7654// - If this block has a single successor that has multiple predecesors, perform any needed
7655// 'join' resolution at the end of this block.
7656// Note that a block may have both 'split' or 'critical' incoming edge(s) and 'join' outgoing
7657// edges.
7658
7659void LinearScan::resolveEdges()
7660{
7661 JITDUMP("RESOLVING EDGES\n");
7662
7663 // The resolutionCandidateVars set was initialized with all the lclVars that are live-in to
7664 // any block. We now intersect that set with any lclVars that ever spilled or split.
7665 // If there are no candidates for resoultion, simply return.
7666
7667 VarSetOps::IntersectionD(compiler, resolutionCandidateVars, splitOrSpilledVars);
7668 if (VarSetOps::IsEmpty(compiler, resolutionCandidateVars))
7669 {
7670 return;
7671 }
7672
7673 BasicBlock *block, *prevBlock = nullptr;
7674
7675 // Handle all the critical edges first.
7676 // We will try to avoid resolution across critical edges in cases where all the critical-edge
7677 // targets of a block have the same home. We will then split the edges only for the
7678 // remaining mismatches. We visit the out-edges, as that allows us to share the moves that are
7679 // common among all the targets.
7680
7681 if (hasCriticalEdges)
7682 {
7683 foreach_block(compiler, block)
7684 {
7685 if (block->bbNum > bbNumMaxBeforeResolution)
7686 {
7687 // This is a new block added during resolution - we don't need to visit these now.
7688 continue;
7689 }
7690 if (blockInfo[block->bbNum].hasCriticalOutEdge)
7691 {
7692 handleOutgoingCriticalEdges(block);
7693 }
7694 prevBlock = block;
7695 }
7696 }
7697
7698 prevBlock = nullptr;
7699 foreach_block(compiler, block)
7700 {
7701 if (block->bbNum > bbNumMaxBeforeResolution)
7702 {
7703 // This is a new block added during resolution - we don't need to visit these now.
7704 continue;
7705 }
7706
7707 unsigned succCount = block->NumSucc(compiler);
7708 flowList* preds = block->bbPreds;
7709 BasicBlock* uniquePredBlock = block->GetUniquePred(compiler);
7710
7711 // First, if this block has a single predecessor,
7712 // we may need resolution at the beginning of this block.
7713 // This may be true even if it's the block we used for starting locations,
7714 // if a variable was spilled.
7715 VARSET_TP inResolutionSet(VarSetOps::Intersection(compiler, block->bbLiveIn, resolutionCandidateVars));
7716 if (!VarSetOps::IsEmpty(compiler, inResolutionSet))
7717 {
7718 if (uniquePredBlock != nullptr)
7719 {
7720 // We may have split edges during critical edge resolution, and in the process split
7721 // a non-critical edge as well.
7722 // It is unlikely that we would ever have more than one of these in sequence (indeed,
7723 // I don't think it's possible), but there's no need to assume that it can't.
7724 while (uniquePredBlock->bbNum > bbNumMaxBeforeResolution)
7725 {
7726 uniquePredBlock = uniquePredBlock->GetUniquePred(compiler);
7727 noway_assert(uniquePredBlock != nullptr);
7728 }
7729 resolveEdge(uniquePredBlock, block, ResolveSplit, inResolutionSet);
7730 }
7731 }
7732
7733 // Finally, if this block has a single successor:
7734 // - and that has at least one other predecessor (otherwise we will do the resolution at the
7735 // top of the successor),
7736 // - and that is not the target of a critical edge (otherwise we've already handled it)
7737 // we may need resolution at the end of this block.
7738
7739 if (succCount == 1)
7740 {
7741 BasicBlock* succBlock = block->GetSucc(0, compiler);
7742 if (succBlock->GetUniquePred(compiler) == nullptr)
7743 {
7744 VARSET_TP outResolutionSet(
7745 VarSetOps::Intersection(compiler, succBlock->bbLiveIn, resolutionCandidateVars));
7746 if (!VarSetOps::IsEmpty(compiler, outResolutionSet))
7747 {
7748 resolveEdge(block, succBlock, ResolveJoin, outResolutionSet);
7749 }
7750 }
7751 }
7752 }
7753
7754 // Now, fixup the mapping for any blocks that were adding for edge splitting.
7755 // See the comment prior to the call to fgSplitEdge() in resolveEdge().
7756 // Note that we could fold this loop in with the checking code below, but that
7757 // would only improve the debug case, and would clutter up the code somewhat.
7758 if (compiler->fgBBNumMax > bbNumMaxBeforeResolution)
7759 {
7760 foreach_block(compiler, block)
7761 {
7762 if (block->bbNum > bbNumMaxBeforeResolution)
7763 {
7764 // There may be multiple blocks inserted when we split. But we must always have exactly
7765 // one path (i.e. all blocks must be single-successor and single-predecessor),
7766 // and only one block along the path may be non-empty.
7767 // Note that we may have a newly-inserted block that is empty, but which connects
7768 // two non-resolution blocks. This happens when an edge is split that requires it.
7769
7770 BasicBlock* succBlock = block;
7771 do
7772 {
7773 succBlock = succBlock->GetUniqueSucc();
7774 noway_assert(succBlock != nullptr);
7775 } while ((succBlock->bbNum > bbNumMaxBeforeResolution) && succBlock->isEmpty());
7776
7777 BasicBlock* predBlock = block;
7778 do
7779 {
7780 predBlock = predBlock->GetUniquePred(compiler);
7781 noway_assert(predBlock != nullptr);
7782 } while ((predBlock->bbNum > bbNumMaxBeforeResolution) && predBlock->isEmpty());
7783
7784 unsigned succBBNum = succBlock->bbNum;
7785 unsigned predBBNum = predBlock->bbNum;
7786 if (block->isEmpty())
7787 {
7788 // For the case of the empty block, find the non-resolution block (succ or pred).
7789 if (predBBNum > bbNumMaxBeforeResolution)
7790 {
7791 assert(succBBNum <= bbNumMaxBeforeResolution);
7792 predBBNum = 0;
7793 }
7794 else
7795 {
7796 succBBNum = 0;
7797 }
7798 }
7799 else
7800 {
7801 assert((succBBNum <= bbNumMaxBeforeResolution) && (predBBNum <= bbNumMaxBeforeResolution));
7802 }
7803 SplitEdgeInfo info = {predBBNum, succBBNum};
7804 getSplitBBNumToTargetBBNumMap()->Set(block->bbNum, info);
7805 }
7806 }
7807 }
7808
7809#ifdef DEBUG
7810 // Make sure the varToRegMaps match up on all edges.
7811 bool foundMismatch = false;
7812 foreach_block(compiler, block)
7813 {
7814 if (block->isEmpty() && block->bbNum > bbNumMaxBeforeResolution)
7815 {
7816 continue;
7817 }
7818 VarToRegMap toVarToRegMap = getInVarToRegMap(block->bbNum);
7819 for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
7820 {
7821 BasicBlock* predBlock = pred->flBlock;
7822 VarToRegMap fromVarToRegMap = getOutVarToRegMap(predBlock->bbNum);
7823 VarSetOps::Iter iter(compiler, block->bbLiveIn);
7824 unsigned varIndex = 0;
7825 while (iter.NextElem(&varIndex))
7826 {
7827 regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
7828 regNumber toReg = getVarReg(toVarToRegMap, varIndex);
7829 if (fromReg != toReg)
7830 {
7831 if (!foundMismatch)
7832 {
7833 foundMismatch = true;
7834 printf("Found mismatched var locations after resolution!\n");
7835 }
7836 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
7837 printf(" V%02u: " FMT_BB " to " FMT_BB ": %s to %s\n", varNum, predBlock->bbNum, block->bbNum,
7838 getRegName(fromReg), getRegName(toReg));
7839 }
7840 }
7841 }
7842 }
7843 assert(!foundMismatch);
7844#endif
7845 JITDUMP("\n");
7846}
7847
7848//------------------------------------------------------------------------
7849// resolveEdge: Perform the specified type of resolution between two blocks.
7850//
7851// Arguments:
7852// fromBlock - the block from which the edge originates
7853// toBlock - the block at which the edge terminates
7854// resolveType - the type of resolution to be performed
7855// liveSet - the set of tracked lclVar indices which may require resolution
7856//
7857// Return Value:
7858// None.
7859//
7860// Assumptions:
7861// The caller must have performed the analysis to determine the type of the edge.
7862//
7863// Notes:
7864// This method emits the correctly ordered moves necessary to place variables in the
7865// correct registers across a Split, Join or Critical edge.
7866// In order to avoid overwriting register values before they have been moved to their
7867// new home (register/stack), it first does the register-to-stack moves (to free those
7868// registers), then the register to register moves, ensuring that the target register
7869// is free before the move, and then finally the stack to register moves.
7870
7871void LinearScan::resolveEdge(BasicBlock* fromBlock,
7872 BasicBlock* toBlock,
7873 ResolveType resolveType,
7874 VARSET_VALARG_TP liveSet)
7875{
7876 VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
7877 VarToRegMap toVarToRegMap;
7878 if (resolveType == ResolveSharedCritical)
7879 {
7880 toVarToRegMap = sharedCriticalVarToRegMap;
7881 }
7882 else
7883 {
7884 toVarToRegMap = getInVarToRegMap(toBlock->bbNum);
7885 }
7886
7887 // The block to which we add the resolution moves depends on the resolveType
7888 BasicBlock* block;
7889 switch (resolveType)
7890 {
7891 case ResolveJoin:
7892 case ResolveSharedCritical:
7893 block = fromBlock;
7894 break;
7895 case ResolveSplit:
7896 block = toBlock;
7897 break;
7898 case ResolveCritical:
7899 // fgSplitEdge may add one or two BasicBlocks. It returns the block that splits
7900 // the edge from 'fromBlock' and 'toBlock', but if it inserts that block right after
7901 // a block with a fall-through it will have to create another block to handle that edge.
7902 // These new blocks can be mapped to existing blocks in order to correctly handle
7903 // the calls to recordVarLocationsAtStartOfBB() from codegen. That mapping is handled
7904 // in resolveEdges(), after all the edge resolution has been done (by calling this
7905 // method for each edge).
7906 block = compiler->fgSplitEdge(fromBlock, toBlock);
7907
7908 // Split edges are counted against fromBlock.
7909 INTRACK_STATS(updateLsraStat(LSRA_STAT_SPLIT_EDGE, fromBlock->bbNum));
7910 break;
7911 default:
7912 unreached();
7913 break;
7914 }
7915
7916#ifndef _TARGET_XARCH_
7917 // We record tempregs for beginning and end of each block.
7918 // For amd64/x86 we only need a tempReg for float - we'll use xchg for int.
7919 // TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below
7920 // modifies the varToRegMaps so we don't have all the correct registers at the time
7921 // we need to get the tempReg.
7922 regNumber tempRegInt =
7923 (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT);
7924#endif // !_TARGET_XARCH_
7925 regNumber tempRegFlt = REG_NA;
7926 regNumber tempRegDbl = REG_NA; // Used only for ARM
7927 if ((compiler->compFloatingPointUsed) && (resolveType != ResolveSharedCritical))
7928 {
7929#ifdef _TARGET_ARM_
7930 // Try to reserve a double register for TYP_DOUBLE and use it for TYP_FLOAT too if available.
7931 tempRegDbl = getTempRegForResolution(fromBlock, toBlock, TYP_DOUBLE);
7932 if (tempRegDbl != REG_NA)
7933 {
7934 tempRegFlt = tempRegDbl;
7935 }
7936 else
7937#endif // _TARGET_ARM_
7938 {
7939 tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT);
7940 }
7941 }
7942
7943 regMaskTP targetRegsToDo = RBM_NONE;
7944 regMaskTP targetRegsReady = RBM_NONE;
7945 regMaskTP targetRegsFromStack = RBM_NONE;
7946
7947 // The following arrays capture the location of the registers as they are moved:
7948 // - location[reg] gives the current location of the var that was originally in 'reg'.
7949 // (Note that a var may be moved more than once.)
7950 // - source[reg] gives the original location of the var that needs to be moved to 'reg'.
7951 // For example, if a var is in rax and needs to be moved to rsi, then we would start with:
7952 // location[rax] == rax
7953 // source[rsi] == rax -- this doesn't change
7954 // Then, if for some reason we need to move it temporary to rbx, we would have:
7955 // location[rax] == rbx
7956 // Once we have completed the move, we will have:
7957 // location[rax] == REG_NA
7958 // This indicates that the var originally in rax is now in its target register.
7959
7960 regNumberSmall location[REG_COUNT];
7961 C_ASSERT(sizeof(char) == sizeof(regNumberSmall)); // for memset to work
7962 memset(location, REG_NA, REG_COUNT);
7963 regNumberSmall source[REG_COUNT];
7964 memset(source, REG_NA, REG_COUNT);
7965
7966 // What interval is this register associated with?
7967 // (associated with incoming reg)
7968 Interval* sourceIntervals[REG_COUNT];
7969 memset(&sourceIntervals, 0, sizeof(sourceIntervals));
7970
7971 // Intervals for vars that need to be loaded from the stack
7972 Interval* stackToRegIntervals[REG_COUNT];
7973 memset(&stackToRegIntervals, 0, sizeof(stackToRegIntervals));
7974
7975 // Get the starting insertion point for the "to" resolution
7976 GenTree* insertionPoint = nullptr;
7977 if (resolveType == ResolveSplit || resolveType == ResolveCritical)
7978 {
7979 insertionPoint = LIR::AsRange(block).FirstNonPhiNode();
7980 }
7981
7982 // First:
7983 // - Perform all moves from reg to stack (no ordering needed on these)
7984 // - For reg to reg moves, record the current location, associating their
7985 // source location with the target register they need to go into
7986 // - For stack to reg moves (done last, no ordering needed between them)
7987 // record the interval associated with the target reg
7988 // TODO-Throughput: We should be looping over the liveIn and liveOut registers, since
7989 // that will scale better than the live variables
7990
7991 VarSetOps::Iter iter(compiler, liveSet);
7992 unsigned varIndex = 0;
7993 while (iter.NextElem(&varIndex))
7994 {
7995 regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
7996 regNumber toReg = getVarReg(toVarToRegMap, varIndex);
7997 if (fromReg == toReg)
7998 {
7999 continue;
8000 }
8001
8002 // For Critical edges, the location will not change on either side of the edge,
8003 // since we'll add a new block to do the move.
8004 if (resolveType == ResolveSplit)
8005 {
8006 setVarReg(toVarToRegMap, varIndex, fromReg);
8007 }
8008 else if (resolveType == ResolveJoin || resolveType == ResolveSharedCritical)
8009 {
8010 setVarReg(fromVarToRegMap, varIndex, toReg);
8011 }
8012
8013 assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX);
8014
8015 Interval* interval = getIntervalForLocalVar(varIndex);
8016
8017 if (fromReg == REG_STK)
8018 {
8019 stackToRegIntervals[toReg] = interval;
8020 targetRegsFromStack |= genRegMask(toReg);
8021 }
8022 else if (toReg == REG_STK)
8023 {
8024 // Do the reg to stack moves now
8025 addResolution(block, insertionPoint, interval, REG_STK, fromReg);
8026 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8027 }
8028 else
8029 {
8030 location[fromReg] = (regNumberSmall)fromReg;
8031 source[toReg] = (regNumberSmall)fromReg;
8032 sourceIntervals[fromReg] = interval;
8033 targetRegsToDo |= genRegMask(toReg);
8034 }
8035 }
8036
8037 // REGISTER to REGISTER MOVES
8038
8039 // First, find all the ones that are ready to move now
8040 regMaskTP targetCandidates = targetRegsToDo;
8041 while (targetCandidates != RBM_NONE)
8042 {
8043 regMaskTP targetRegMask = genFindLowestBit(targetCandidates);
8044 targetCandidates &= ~targetRegMask;
8045 regNumber targetReg = genRegNumFromMask(targetRegMask);
8046 if (location[targetReg] == REG_NA)
8047 {
8048#ifdef _TARGET_ARM_
8049 regNumber sourceReg = (regNumber)source[targetReg];
8050 Interval* interval = sourceIntervals[sourceReg];
8051 if (interval->registerType == TYP_DOUBLE)
8052 {
8053 // For ARM32, make sure that both of the float halves of the double register are available.
8054 assert(genIsValidDoubleReg(targetReg));
8055 regNumber anotherHalfRegNum = REG_NEXT(targetReg);
8056 if (location[anotherHalfRegNum] == REG_NA)
8057 {
8058 targetRegsReady |= targetRegMask;
8059 }
8060 }
8061 else
8062#endif // _TARGET_ARM_
8063 {
8064 targetRegsReady |= targetRegMask;
8065 }
8066 }
8067 }
8068
8069 // Perform reg to reg moves
8070 while (targetRegsToDo != RBM_NONE)
8071 {
8072 while (targetRegsReady != RBM_NONE)
8073 {
8074 regMaskTP targetRegMask = genFindLowestBit(targetRegsReady);
8075 targetRegsToDo &= ~targetRegMask;
8076 targetRegsReady &= ~targetRegMask;
8077 regNumber targetReg = genRegNumFromMask(targetRegMask);
8078 assert(location[targetReg] != targetReg);
8079 regNumber sourceReg = (regNumber)source[targetReg];
8080 regNumber fromReg = (regNumber)location[sourceReg];
8081 assert(fromReg < UCHAR_MAX && sourceReg < UCHAR_MAX);
8082 Interval* interval = sourceIntervals[sourceReg];
8083 assert(interval != nullptr);
8084 addResolution(block, insertionPoint, interval, targetReg, fromReg);
8085 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8086 sourceIntervals[sourceReg] = nullptr;
8087 location[sourceReg] = REG_NA;
8088
8089 // Do we have a free targetReg?
8090 if (fromReg == sourceReg)
8091 {
8092 if (source[fromReg] != REG_NA)
8093 {
8094 regMaskTP fromRegMask = genRegMask(fromReg);
8095 targetRegsReady |= fromRegMask;
8096#ifdef _TARGET_ARM_
8097 if (genIsValidDoubleReg(fromReg))
8098 {
8099 // Ensure that either:
8100 // - the Interval targeting fromReg is not double, or
8101 // - the other half of the double is free.
8102 Interval* otherInterval = sourceIntervals[source[fromReg]];
8103 regNumber upperHalfReg = REG_NEXT(fromReg);
8104 if ((otherInterval->registerType == TYP_DOUBLE) && (location[upperHalfReg] != REG_NA))
8105 {
8106 targetRegsReady &= ~fromRegMask;
8107 }
8108 }
8109 }
8110 else if (genIsValidFloatReg(fromReg) && !genIsValidDoubleReg(fromReg))
8111 {
8112 // We may have freed up the other half of a double where the lower half
8113 // was already free.
8114 regNumber lowerHalfReg = REG_PREV(fromReg);
8115 regNumber lowerHalfSrcReg = (regNumber)source[lowerHalfReg];
8116 regNumber lowerHalfSrcLoc = (regNumber)location[lowerHalfReg];
8117 // Necessary conditions:
8118 // - There is a source register for this reg (lowerHalfSrcReg != REG_NA)
8119 // - It is currently free (lowerHalfSrcLoc == REG_NA)
8120 // - The source interval isn't yet completed (sourceIntervals[lowerHalfSrcReg] != nullptr)
8121 // - It's not in the ready set ((targetRegsReady & genRegMask(lowerHalfReg)) ==
8122 // RBM_NONE)
8123 //
8124 if ((lowerHalfSrcReg != REG_NA) && (lowerHalfSrcLoc == REG_NA) &&
8125 (sourceIntervals[lowerHalfSrcReg] != nullptr) &&
8126 ((targetRegsReady & genRegMask(lowerHalfReg)) == RBM_NONE))
8127 {
8128 // This must be a double interval, otherwise it would be in targetRegsReady, or already
8129 // completed.
8130 assert(sourceIntervals[lowerHalfSrcReg]->registerType == TYP_DOUBLE);
8131 targetRegsReady |= genRegMask(lowerHalfReg);
8132 }
8133#endif // _TARGET_ARM_
8134 }
8135 }
8136 }
8137 if (targetRegsToDo != RBM_NONE)
8138 {
8139 regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo);
8140 regNumber targetReg = genRegNumFromMask(targetRegMask);
8141
8142 // Is it already there due to other moves?
8143 // If not, move it to the temp reg, OR swap it with another register
8144 regNumber sourceReg = (regNumber)source[targetReg];
8145 regNumber fromReg = (regNumber)location[sourceReg];
8146 if (targetReg == fromReg)
8147 {
8148 targetRegsToDo &= ~targetRegMask;
8149 }
8150 else
8151 {
8152 regNumber tempReg = REG_NA;
8153 bool useSwap = false;
8154 if (emitter::isFloatReg(targetReg))
8155 {
8156#ifdef _TARGET_ARM_
8157 if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE)
8158 {
8159 // ARM32 requires a double temp register for TYP_DOUBLE.
8160 tempReg = tempRegDbl;
8161 }
8162 else
8163#endif // _TARGET_ARM_
8164 tempReg = tempRegFlt;
8165 }
8166#ifdef _TARGET_XARCH_
8167 else
8168 {
8169 useSwap = true;
8170 }
8171#else // !_TARGET_XARCH_
8172
8173 else
8174 {
8175 tempReg = tempRegInt;
8176 }
8177
8178#endif // !_TARGET_XARCH_
8179 if (useSwap || tempReg == REG_NA)
8180 {
8181 // First, we have to figure out the destination register for what's currently in fromReg,
8182 // so that we can find its sourceInterval.
8183 regNumber otherTargetReg = REG_NA;
8184
8185 // By chance, is fromReg going where it belongs?
8186 if (location[source[fromReg]] == targetReg)
8187 {
8188 otherTargetReg = fromReg;
8189 // If we can swap, we will be done with otherTargetReg as well.
8190 // Otherwise, we'll spill it to the stack and reload it later.
8191 if (useSwap)
8192 {
8193 regMaskTP fromRegMask = genRegMask(fromReg);
8194 targetRegsToDo &= ~fromRegMask;
8195 }
8196 }
8197 else
8198 {
8199 // Look at the remaining registers from targetRegsToDo (which we expect to be relatively
8200 // small at this point) to find out what's currently in targetReg.
8201 regMaskTP mask = targetRegsToDo;
8202 while (mask != RBM_NONE && otherTargetReg == REG_NA)
8203 {
8204 regMaskTP nextRegMask = genFindLowestBit(mask);
8205 regNumber nextReg = genRegNumFromMask(nextRegMask);
8206 mask &= ~nextRegMask;
8207 if (location[source[nextReg]] == targetReg)
8208 {
8209 otherTargetReg = nextReg;
8210 }
8211 }
8212 }
8213 assert(otherTargetReg != REG_NA);
8214
8215 if (useSwap)
8216 {
8217 // Generate a "swap" of fromReg and targetReg
8218 insertSwap(block, insertionPoint, sourceIntervals[source[otherTargetReg]]->varNum, targetReg,
8219 sourceIntervals[sourceReg]->varNum, fromReg);
8220 location[sourceReg] = REG_NA;
8221 location[source[otherTargetReg]] = (regNumberSmall)fromReg;
8222
8223 INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
8224 }
8225 else
8226 {
8227 // Spill "targetReg" to the stack and add its eventual target (otherTargetReg)
8228 // to "targetRegsFromStack", which will be handled below.
8229 // NOTE: This condition is very rare. Setting COMPlus_JitStressRegs=0x203
8230 // has been known to trigger it in JIT SH.
8231
8232 // First, spill "otherInterval" from targetReg to the stack.
8233 Interval* otherInterval = sourceIntervals[source[otherTargetReg]];
8234 setIntervalAsSpilled(otherInterval);
8235 addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg);
8236 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8237 location[source[otherTargetReg]] = REG_STK;
8238
8239 // Now, move the interval that is going to targetReg, and add its "fromReg" to
8240 // "targetRegsReady".
8241 addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg, fromReg);
8242 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8243 location[sourceReg] = REG_NA;
8244 targetRegsReady |= genRegMask(fromReg);
8245 }
8246 targetRegsToDo &= ~targetRegMask;
8247 }
8248 else
8249 {
8250 compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(true));
8251#ifdef _TARGET_ARM_
8252 if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE)
8253 {
8254 assert(genIsValidDoubleReg(targetReg));
8255 assert(genIsValidDoubleReg(tempReg));
8256
8257 addResolutionForDouble(block, insertionPoint, sourceIntervals, location, tempReg, targetReg,
8258 resolveType);
8259 }
8260 else
8261#endif // _TARGET_ARM_
8262 {
8263 assert(sourceIntervals[targetReg] != nullptr);
8264
8265 addResolution(block, insertionPoint, sourceIntervals[targetReg], tempReg, targetReg);
8266 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8267 location[targetReg] = (regNumberSmall)tempReg;
8268 }
8269 targetRegsReady |= targetRegMask;
8270 }
8271 }
8272 }
8273 }
8274
8275 // Finally, perform stack to reg moves
8276 // All the target regs will be empty at this point
8277 while (targetRegsFromStack != RBM_NONE)
8278 {
8279 regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack);
8280 targetRegsFromStack &= ~targetRegMask;
8281 regNumber targetReg = genRegNumFromMask(targetRegMask);
8282
8283 Interval* interval = stackToRegIntervals[targetReg];
8284 assert(interval != nullptr);
8285
8286 addResolution(block, insertionPoint, interval, targetReg, REG_STK);
8287 JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8288 }
8289}
8290
8291#if TRACK_LSRA_STATS
8292// ----------------------------------------------------------
8293// updateLsraStat: Increment LSRA stat counter.
8294//
8295// Arguments:
8296// stat - LSRA stat enum
8297// bbNum - Basic block to which LSRA stat needs to be
8298// associated with.
8299//
8300void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum)
8301{
8302 if (bbNum > bbNumMaxBeforeResolution)
8303 {
8304 // This is a newly created basic block as part of resolution.
8305 // These blocks contain resolution moves that are already accounted.
8306 return;
8307 }
8308
8309 switch (stat)
8310 {
8311 case LSRA_STAT_SPILL:
8312 ++(blockInfo[bbNum].spillCount);
8313 break;
8314
8315 case LSRA_STAT_COPY_REG:
8316 ++(blockInfo[bbNum].copyRegCount);
8317 break;
8318
8319 case LSRA_STAT_RESOLUTION_MOV:
8320 ++(blockInfo[bbNum].resolutionMovCount);
8321 break;
8322
8323 case LSRA_STAT_SPLIT_EDGE:
8324 ++(blockInfo[bbNum].splitEdgeCount);
8325 break;
8326
8327 default:
8328 break;
8329 }
8330}
8331
8332// -----------------------------------------------------------
8333// dumpLsraStats - dumps Lsra stats to given file.
8334//
8335// Arguments:
8336// file - file to which stats are to be written.
8337//
8338void LinearScan::dumpLsraStats(FILE* file)
8339{
8340 unsigned sumSpillCount = 0;
8341 unsigned sumCopyRegCount = 0;
8342 unsigned sumResolutionMovCount = 0;
8343 unsigned sumSplitEdgeCount = 0;
8344 UINT64 wtdSpillCount = 0;
8345 UINT64 wtdCopyRegCount = 0;
8346 UINT64 wtdResolutionMovCount = 0;
8347
8348 fprintf(file, "----------\n");
8349 fprintf(file, "LSRA Stats");
8350#ifdef DEBUG
8351 if (!VERBOSE)
8352 {
8353 fprintf(file, " : %s\n", compiler->info.compFullName);
8354 }
8355 else
8356 {
8357 // In verbose mode no need to print full name
8358 // while printing lsra stats.
8359 fprintf(file, "\n");
8360 }
8361#else
8362 fprintf(file, " : %s\n", compiler->eeGetMethodFullName(compiler->info.compCompHnd));
8363#endif
8364
8365 fprintf(file, "----------\n");
8366
8367 for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
8368 {
8369 if (block->bbNum > bbNumMaxBeforeResolution)
8370 {
8371 continue;
8372 }
8373
8374 unsigned spillCount = blockInfo[block->bbNum].spillCount;
8375 unsigned copyRegCount = blockInfo[block->bbNum].copyRegCount;
8376 unsigned resolutionMovCount = blockInfo[block->bbNum].resolutionMovCount;
8377 unsigned splitEdgeCount = blockInfo[block->bbNum].splitEdgeCount;
8378
8379 if (spillCount != 0 || copyRegCount != 0 || resolutionMovCount != 0 || splitEdgeCount != 0)
8380 {
8381 fprintf(file, FMT_BB " [%8d]: ", block->bbNum, block->bbWeight);
8382 fprintf(file, "SpillCount = %d, ResolutionMovs = %d, SplitEdges = %d, CopyReg = %d\n", spillCount,
8383 resolutionMovCount, splitEdgeCount, copyRegCount);
8384 }
8385
8386 sumSpillCount += spillCount;
8387 sumCopyRegCount += copyRegCount;
8388 sumResolutionMovCount += resolutionMovCount;
8389 sumSplitEdgeCount += splitEdgeCount;
8390
8391 wtdSpillCount += (UINT64)spillCount * block->bbWeight;
8392 wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight;
8393 wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight;
8394 }
8395
8396 fprintf(file, "Total Tracked Vars: %d\n", compiler->lvaTrackedCount);
8397 fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount);
8398 fprintf(file, "Total number of Intervals: %d\n", static_cast<unsigned>(intervals.size() - 1));
8399 fprintf(file, "Total number of RefPositions: %d\n", static_cast<unsigned>(refPositions.size() - 1));
8400 fprintf(file, "Total Spill Count: %d Weighted: %I64u\n", sumSpillCount, wtdSpillCount);
8401 fprintf(file, "Total CopyReg Count: %d Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount);
8402 fprintf(file, "Total ResolutionMov Count: %d Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount);
8403 fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount);
8404
8405 // compute total number of spill temps created
8406 unsigned numSpillTemps = 0;
8407 for (int i = 0; i < TYP_COUNT; i++)
8408 {
8409 numSpillTemps += maxSpill[i];
8410 }
8411 fprintf(file, "Total Number of spill temps created: %d\n\n", numSpillTemps);
8412}
8413#endif // TRACK_LSRA_STATS
8414
8415#ifdef DEBUG
8416void dumpRegMask(regMaskTP regs)
8417{
8418 if (regs == RBM_ALLINT)
8419 {
8420 printf("[allInt]");
8421 }
8422 else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
8423 {
8424 printf("[allIntButFP]");
8425 }
8426 else if (regs == RBM_ALLFLOAT)
8427 {
8428 printf("[allFloat]");
8429 }
8430 else if (regs == RBM_ALLDOUBLE)
8431 {
8432 printf("[allDouble]");
8433 }
8434 else
8435 {
8436 dspRegMask(regs);
8437 }
8438}
8439
8440static const char* getRefTypeName(RefType refType)
8441{
8442 switch (refType)
8443 {
8444#define DEF_REFTYPE(memberName, memberValue, shortName) \
8445 case memberName: \
8446 return #memberName;
8447#include "lsra_reftypes.h"
8448#undef DEF_REFTYPE
8449 default:
8450 return nullptr;
8451 }
8452}
8453
8454static const char* getRefTypeShortName(RefType refType)
8455{
8456 switch (refType)
8457 {
8458#define DEF_REFTYPE(memberName, memberValue, shortName) \
8459 case memberName: \
8460 return shortName;
8461#include "lsra_reftypes.h"
8462#undef DEF_REFTYPE
8463 default:
8464 return nullptr;
8465 }
8466}
8467
8468void RefPosition::dump()
8469{
8470 printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation);
8471
8472 printf(" %s ", getRefTypeName(refType));
8473
8474 if (this->isPhysRegRef)
8475 {
8476 this->getReg()->tinyDump();
8477 }
8478 else if (getInterval())
8479 {
8480 this->getInterval()->tinyDump();
8481 }
8482
8483 if (this->treeNode)
8484 {
8485 printf("%s ", treeNode->OpName(treeNode->OperGet()));
8486 }
8487 printf(FMT_BB " ", this->bbNum);
8488
8489 printf("regmask=");
8490 dumpRegMask(registerAssignment);
8491
8492 printf(" minReg=%d", minRegCandidateCount);
8493
8494 if (this->lastUse)
8495 {
8496 printf(" last");
8497 }
8498 if (this->reload)
8499 {
8500 printf(" reload");
8501 }
8502 if (this->spillAfter)
8503 {
8504 printf(" spillAfter");
8505 }
8506 if (this->moveReg)
8507 {
8508 printf(" move");
8509 }
8510 if (this->copyReg)
8511 {
8512 printf(" copy");
8513 }
8514 if (this->isFixedRegRef)
8515 {
8516 printf(" fixed");
8517 }
8518 if (this->isLocalDefUse)
8519 {
8520 printf(" local");
8521 }
8522 if (this->delayRegFree)
8523 {
8524 printf(" delay");
8525 }
8526 if (this->outOfOrder)
8527 {
8528 printf(" outOfOrder");
8529 }
8530
8531 if (this->AllocateIfProfitable())
8532 {
8533 printf(" regOptional");
8534 }
8535 printf(">\n");
8536}
8537
8538void RegRecord::dump()
8539{
8540 tinyDump();
8541}
8542
8543void Interval::dump()
8544{
8545 printf("Interval %2u:", intervalIndex);
8546
8547 if (isLocalVar)
8548 {
8549 printf(" (V%02u)", varNum);
8550 }
8551 if (isInternal)
8552 {
8553 printf(" (INTERNAL)");
8554 }
8555 if (isSpilled)
8556 {
8557 printf(" (SPILLED)");
8558 }
8559 if (isSplit)
8560 {
8561 printf(" (SPLIT)");
8562 }
8563 if (isStructField)
8564 {
8565 printf(" (struct)");
8566 }
8567 if (isPromotedStruct)
8568 {
8569 printf(" (promoted struct)");
8570 }
8571 if (hasConflictingDefUse)
8572 {
8573 printf(" (def-use conflict)");
8574 }
8575 if (hasInterferingUses)
8576 {
8577 printf(" (interfering uses)");
8578 }
8579 if (isSpecialPutArg)
8580 {
8581 printf(" (specialPutArg)");
8582 }
8583 if (isConstant)
8584 {
8585 printf(" (constant)");
8586 }
8587
8588 printf(" RefPositions {");
8589 for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr;
8590 refPosition = refPosition->nextRefPosition)
8591 {
8592 printf("#%u@%u", refPosition->rpNum, refPosition->nodeLocation);
8593 if (refPosition->nextRefPosition)
8594 {
8595 printf(" ");
8596 }
8597 }
8598 printf("}");
8599
8600 // this is not used (yet?)
8601 // printf(" SpillOffset %d", this->spillOffset);
8602
8603 printf(" physReg:%s", getRegName(physReg));
8604
8605 printf(" Preferences=");
8606 dumpRegMask(this->registerPreferences);
8607
8608 if (relatedInterval)
8609 {
8610 printf(" RelatedInterval ");
8611 relatedInterval->microDump();
8612 printf("[%p]", dspPtr(relatedInterval));
8613 }
8614
8615 printf("\n");
8616}
8617
8618// print out very concise representation
8619void Interval::tinyDump()
8620{
8621 printf("<Ivl:%u", intervalIndex);
8622 if (isLocalVar)
8623 {
8624 printf(" V%02u", varNum);
8625 }
8626 if (isInternal)
8627 {
8628 printf(" internal");
8629 }
8630 printf("> ");
8631}
8632
8633// print out extremely concise representation
8634void Interval::microDump()
8635{
8636 char intervalTypeChar = 'I';
8637 if (isInternal)
8638 {
8639 intervalTypeChar = 'T';
8640 }
8641 else if (isLocalVar)
8642 {
8643 intervalTypeChar = 'L';
8644 }
8645
8646 printf("<%c%u>", intervalTypeChar, intervalIndex);
8647}
8648
8649void RegRecord::tinyDump()
8650{
8651 printf("<Reg:%-3s> ", getRegName(regNum));
8652}
8653
8654void LinearScan::dumpNodeInfo(GenTree* node, regMaskTP dstCandidates, int srcCount, int dstCount)
8655{
8656 if (!VERBOSE)
8657 {
8658 return;
8659 }
8660 // This is formatted like the old dump to make diffs easier. TODO-Cleanup: improve.
8661 int internalIntCount = 0;
8662 int internalFloatCount = 0;
8663 regMaskTP internalCandidates = RBM_NONE;
8664 for (int i = 0; i < internalCount; i++)
8665 {
8666 RefPosition* def = internalDefs[i];
8667 if (def->getInterval()->registerType == TYP_INT)
8668 {
8669 internalIntCount++;
8670 }
8671 else
8672 {
8673 internalFloatCount++;
8674 }
8675 internalCandidates |= def->registerAssignment;
8676 }
8677 if (dstCandidates == RBM_NONE)
8678 {
8679 dstCandidates = varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT);
8680 }
8681 if (internalCandidates == RBM_NONE)
8682 {
8683 internalCandidates = allRegs(TYP_INT);
8684 }
8685 printf(" +<TreeNodeInfo %d=%d %di %df", dstCount, srcCount, internalIntCount, internalFloatCount);
8686 printf(" src=");
8687 dumpRegMask(varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT));
8688 printf(" int=");
8689 dumpRegMask(internalCandidates);
8690 printf(" dst=");
8691 dumpRegMask(dstCandidates);
8692 if (node->IsUnusedValue())
8693 {
8694 printf(" L");
8695 }
8696 printf(" I");
8697 if (pendingDelayFree)
8698 {
8699 printf(" D");
8700 }
8701 if (setInternalRegsDelayFree)
8702 {
8703 printf(" ID");
8704 }
8705 printf(">");
8706 node->dumpLIRFlags();
8707 printf("\n consume= %d produce=%d\n", srcCount, dstCount);
8708}
8709
8710void LinearScan::dumpDefList()
8711{
8712 if (!VERBOSE)
8713 {
8714 return;
8715 }
8716 JITDUMP("DefList: { ");
8717 bool first = true;
8718 for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
8719 listNode = listNode->Next())
8720 {
8721 GenTree* node = listNode->treeNode;
8722 JITDUMP("%sN%03u.t%d. %s", first ? "" : "; ", node->gtSeqNum, node->gtTreeID, GenTree::OpName(node->OperGet()));
8723 first = false;
8724 }
8725 JITDUMP(" }\n");
8726}
8727
8728void LinearScan::lsraDumpIntervals(const char* msg)
8729{
8730 printf("\nLinear scan intervals %s:\n", msg);
8731 for (Interval& interval : intervals)
8732 {
8733 // only dump something if it has references
8734 // if (interval->firstRefPosition)
8735 interval.dump();
8736 }
8737
8738 printf("\n");
8739}
8740
8741// Dumps a tree node as a destination or source operand, with the style
8742// of dump dependent on the mode
8743void LinearScan::lsraGetOperandString(GenTree* tree,
8744 LsraTupleDumpMode mode,
8745 char* operandString,
8746 unsigned operandStringLength)
8747{
8748 const char* lastUseChar = "";
8749 if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
8750 {
8751 lastUseChar = "*";
8752 }
8753 switch (mode)
8754 {
8755 case LinearScan::LSRA_DUMP_PRE:
8756 _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtTreeID, lastUseChar);
8757 break;
8758 case LinearScan::LSRA_DUMP_REFPOS:
8759 _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtTreeID, lastUseChar);
8760 break;
8761 case LinearScan::LSRA_DUMP_POST:
8762 {
8763 Compiler* compiler = JitTls::GetCompiler();
8764
8765 if (!tree->gtHasReg())
8766 {
8767 _snprintf_s(operandString, operandStringLength, operandStringLength, "STK%s", lastUseChar);
8768 }
8769 else
8770 {
8771 regNumber reg = tree->gtRegNum;
8772 int charCount = _snprintf_s(operandString, operandStringLength, operandStringLength, "%s%s",
8773 getRegName(reg, genIsValidFloatReg(reg)), lastUseChar);
8774 operandString += charCount;
8775 operandStringLength -= charCount;
8776
8777 if (tree->IsMultiRegNode())
8778 {
8779 unsigned regCount = tree->GetMultiRegCount();
8780 for (unsigned regIndex = 1; regIndex < regCount; regIndex++)
8781 {
8782 regNumber reg = tree->GetRegByIndex(regIndex);
8783 charCount = _snprintf_s(operandString, operandStringLength, operandStringLength, ",%s%s",
8784 getRegName(reg, genIsValidFloatReg(reg)), lastUseChar);
8785 operandString += charCount;
8786 operandStringLength -= charCount;
8787 }
8788 }
8789 }
8790 }
8791 break;
8792 default:
8793 printf("ERROR: INVALID TUPLE DUMP MODE\n");
8794 break;
8795 }
8796}
8797void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest)
8798{
8799 Compiler* compiler = JitTls::GetCompiler();
8800 const unsigned operandStringLength = 16;
8801 char operandString[operandStringLength];
8802 const char* emptyDestOperand = " ";
8803 char spillChar = ' ';
8804
8805 if (mode == LinearScan::LSRA_DUMP_POST)
8806 {
8807 if ((tree->gtFlags & GTF_SPILL) != 0)
8808 {
8809 spillChar = 'S';
8810 }
8811 if (!hasDest && tree->gtHasReg())
8812 {
8813 // A node can define a register, but not produce a value for a parent to consume,
8814 // i.e. in the "localDefUse" case.
8815 // There used to be an assert here that we wouldn't spill such a node.
8816 // However, we can have unused lclVars that wind up being the node at which
8817 // it is spilled. This probably indicates a bug, but we don't realy want to
8818 // assert during a dump.
8819 if (spillChar == 'S')
8820 {
8821 spillChar = '$';
8822 }
8823 else
8824 {
8825 spillChar = '*';
8826 }
8827 hasDest = true;
8828 }
8829 }
8830 printf("%c N%03u. ", spillChar, tree->gtSeqNum);
8831
8832 LclVarDsc* varDsc = nullptr;
8833 unsigned varNum = UINT_MAX;
8834 if (tree->IsLocal())
8835 {
8836 varNum = tree->gtLclVarCommon.gtLclNum;
8837 varDsc = &(compiler->lvaTable[varNum]);
8838 if (varDsc->lvLRACandidate)
8839 {
8840 hasDest = false;
8841 }
8842 }
8843 if (hasDest)
8844 {
8845 if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
8846 {
8847 assert(tree->gtHasReg());
8848 }
8849 lsraGetOperandString(tree, mode, operandString, operandStringLength);
8850 printf("%-15s =", operandString);
8851 }
8852 else
8853 {
8854 printf("%-15s ", emptyDestOperand);
8855 }
8856 if (varDsc != nullptr)
8857 {
8858 if (varDsc->lvLRACandidate)
8859 {
8860 if (mode == LSRA_DUMP_REFPOS)
8861 {
8862 printf(" V%02u(L%d)", varNum, getIntervalForLocalVar(varDsc->lvVarIndex)->intervalIndex);
8863 }
8864 else
8865 {
8866 lsraGetOperandString(tree, mode, operandString, operandStringLength);
8867 printf(" V%02u(%s)", varNum, operandString);
8868 if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
8869 {
8870 printf("R");
8871 }
8872 }
8873 }
8874 else
8875 {
8876 printf(" V%02u MEM", varNum);
8877 }
8878 }
8879 else if (tree->OperIs(GT_ASG))
8880 {
8881 assert(!tree->gtHasReg());
8882 printf(" asg%s ", GenTree::OpName(tree->OperGet()));
8883 }
8884 else
8885 {
8886 compiler->gtDispNodeName(tree);
8887 if (tree->OperKind() & GTK_LEAF)
8888 {
8889 compiler->gtDispLeaf(tree, nullptr);
8890 }
8891 }
8892}
8893
8894//------------------------------------------------------------------------
8895// DumpOperandDefs: dumps the registers defined by a node.
8896//
8897// Arguments:
8898// operand - The operand for which to compute a register count.
8899//
8900// Returns:
8901// The number of registers defined by `operand`.
8902//
8903void LinearScan::DumpOperandDefs(
8904 GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength)
8905{
8906 assert(operand != nullptr);
8907 assert(operandString != nullptr);
8908 if (!operand->IsLIR())
8909 {
8910 return;
8911 }
8912
8913 int dstCount = ComputeOperandDstCount(operand);
8914
8915 if (dstCount != 0)
8916 {
8917 // This operand directly produces registers; print it.
8918 if (!first)
8919 {
8920 printf(",");
8921 }
8922 lsraGetOperandString(operand, mode, operandString, operandStringLength);
8923 printf("%s", operandString);
8924 first = false;
8925 }
8926 else if (operand->isContained())
8927 {
8928 // This is a contained node. Dump the defs produced by its operands.
8929 for (GenTree* op : operand->Operands())
8930 {
8931 DumpOperandDefs(op, first, mode, operandString, operandStringLength);
8932 }
8933 }
8934}
8935
8936void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
8937{
8938 BasicBlock* block;
8939 LsraLocation currentLoc = 1; // 0 is the entry
8940 const unsigned operandStringLength = 16;
8941 char operandString[operandStringLength];
8942
8943 // currentRefPosition is not used for LSRA_DUMP_PRE
8944 // We keep separate iterators for defs, so that we can print them
8945 // on the lhs of the dump
8946 RefPositionIterator refPosIterator = refPositions.begin();
8947 RefPosition* currentRefPosition = &refPosIterator;
8948
8949 switch (mode)
8950 {
8951 case LSRA_DUMP_PRE:
8952 printf("TUPLE STYLE DUMP BEFORE LSRA\n");
8953 break;
8954 case LSRA_DUMP_REFPOS:
8955 printf("TUPLE STYLE DUMP WITH REF POSITIONS\n");
8956 break;
8957 case LSRA_DUMP_POST:
8958 printf("TUPLE STYLE DUMP WITH REGISTER ASSIGNMENTS\n");
8959 break;
8960 default:
8961 printf("ERROR: INVALID TUPLE DUMP MODE\n");
8962 return;
8963 }
8964
8965 if (mode != LSRA_DUMP_PRE)
8966 {
8967 printf("Incoming Parameters: ");
8968 for (; refPosIterator != refPositions.end() && currentRefPosition->refType != RefTypeBB;
8969 ++refPosIterator, currentRefPosition = &refPosIterator)
8970 {
8971 Interval* interval = currentRefPosition->getInterval();
8972 assert(interval != nullptr && interval->isLocalVar);
8973 printf(" V%02d", interval->varNum);
8974 if (mode == LSRA_DUMP_POST)
8975 {
8976 regNumber reg;
8977 if (currentRefPosition->registerAssignment == RBM_NONE)
8978 {
8979 reg = REG_STK;
8980 }
8981 else
8982 {
8983 reg = currentRefPosition->assignedReg();
8984 }
8985 LclVarDsc* varDsc = &(compiler->lvaTable[interval->varNum]);
8986 printf("(");
8987 regNumber assignedReg = varDsc->lvRegNum;
8988 regNumber argReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
8989
8990 assert(reg == assignedReg || varDsc->lvRegister == false);
8991 if (reg != argReg)
8992 {
8993 printf(getRegName(argReg, isFloatRegType(interval->registerType)));
8994 printf("=>");
8995 }
8996 printf("%s)", getRegName(reg, isFloatRegType(interval->registerType)));
8997 }
8998 }
8999 printf("\n");
9000 }
9001
9002 for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
9003 {
9004 currentLoc += 2;
9005
9006 if (mode == LSRA_DUMP_REFPOS)
9007 {
9008 bool printedBlockHeader = false;
9009 // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks
9010 for (; refPosIterator != refPositions.end() &&
9011 (currentRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef ||
9012 (currentRefPosition->refType == RefTypeBB && !printedBlockHeader));
9013 ++refPosIterator, currentRefPosition = &refPosIterator)
9014 {
9015 Interval* interval = nullptr;
9016 if (currentRefPosition->isIntervalRef())
9017 {
9018 interval = currentRefPosition->getInterval();
9019 }
9020 switch (currentRefPosition->refType)
9021 {
9022 case RefTypeExpUse:
9023 assert(interval != nullptr);
9024 assert(interval->isLocalVar);
9025 printf(" Exposed use of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
9026 break;
9027 case RefTypeDummyDef:
9028 assert(interval != nullptr);
9029 assert(interval->isLocalVar);
9030 printf(" Dummy def of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
9031 break;
9032 case RefTypeBB:
9033 block->dspBlockHeader(compiler);
9034 printedBlockHeader = true;
9035 printf("=====\n");
9036 break;
9037 default:
9038 printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
9039 break;
9040 }
9041 }
9042 }
9043 else
9044 {
9045 block->dspBlockHeader(compiler);
9046 printf("=====\n");
9047 }
9048 if (enregisterLocalVars && mode == LSRA_DUMP_POST && block != compiler->fgFirstBB &&
9049 block->bbNum <= bbNumMaxBeforeResolution)
9050 {
9051 printf("Predecessor for variable locations: " FMT_BB "\n", blockInfo[block->bbNum].predBBNum);
9052 dumpInVarToRegMap(block);
9053 }
9054 if (block->bbNum > bbNumMaxBeforeResolution)
9055 {
9056 SplitEdgeInfo splitEdgeInfo;
9057 splitBBNumToTargetBBNumMap->Lookup(block->bbNum, &splitEdgeInfo);
9058 assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
9059 assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
9060 printf("New block introduced for resolution from " FMT_BB " to " FMT_BB "\n", splitEdgeInfo.fromBBNum,
9061 splitEdgeInfo.toBBNum);
9062 }
9063
9064 for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
9065 {
9066 GenTree* tree = node;
9067
9068 genTreeOps oper = tree->OperGet();
9069 int produce = tree->IsValue() ? ComputeOperandDstCount(tree) : 0;
9070 int consume = ComputeAvailableSrcCount(tree);
9071 regMaskTP killMask = RBM_NONE;
9072 regMaskTP fixedMask = RBM_NONE;
9073
9074 lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS);
9075
9076 if (mode != LSRA_DUMP_REFPOS)
9077 {
9078 if (consume > 0)
9079 {
9080 printf("; ");
9081
9082 bool first = true;
9083 for (GenTree* operand : tree->Operands())
9084 {
9085 DumpOperandDefs(operand, first, mode, operandString, operandStringLength);
9086 }
9087 }
9088 }
9089 else
9090 {
9091 // Print each RefPosition on a new line, but
9092 // printing all the kills for each node on a single line
9093 // and combining the fixed regs with their associated def or use
9094 bool killPrinted = false;
9095 RefPosition* lastFixedRegRefPos = nullptr;
9096 for (; refPosIterator != refPositions.end() &&
9097 (currentRefPosition->refType == RefTypeUse || currentRefPosition->refType == RefTypeFixedReg ||
9098 currentRefPosition->refType == RefTypeKill || currentRefPosition->refType == RefTypeDef) &&
9099 (currentRefPosition->nodeLocation == tree->gtSeqNum ||
9100 currentRefPosition->nodeLocation == tree->gtSeqNum + 1);
9101 ++refPosIterator, currentRefPosition = &refPosIterator)
9102 {
9103 Interval* interval = nullptr;
9104 if (currentRefPosition->isIntervalRef())
9105 {
9106 interval = currentRefPosition->getInterval();
9107 }
9108 switch (currentRefPosition->refType)
9109 {
9110 case RefTypeUse:
9111 if (currentRefPosition->isPhysRegRef)
9112 {
9113 printf("\n Use:R%d(#%d)",
9114 currentRefPosition->getReg()->regNum, currentRefPosition->rpNum);
9115 }
9116 else
9117 {
9118 assert(interval != nullptr);
9119 printf("\n Use:");
9120 interval->microDump();
9121 printf("(#%d)", currentRefPosition->rpNum);
9122 if (currentRefPosition->isFixedRegRef && !interval->isInternal)
9123 {
9124 assert(genMaxOneBit(currentRefPosition->registerAssignment));
9125 assert(lastFixedRegRefPos != nullptr);
9126 printf(" Fixed:%s(#%d)", getRegName(currentRefPosition->assignedReg(),
9127 isFloatRegType(interval->registerType)),
9128 lastFixedRegRefPos->rpNum);
9129 lastFixedRegRefPos = nullptr;
9130 }
9131 if (currentRefPosition->isLocalDefUse)
9132 {
9133 printf(" LocalDefUse");
9134 }
9135 if (currentRefPosition->lastUse)
9136 {
9137 printf(" *");
9138 }
9139 }
9140 break;
9141 case RefTypeDef:
9142 {
9143 // Print each def on a new line
9144 assert(interval != nullptr);
9145 printf("\n Def:");
9146 interval->microDump();
9147 printf("(#%d)", currentRefPosition->rpNum);
9148 if (currentRefPosition->isFixedRegRef)
9149 {
9150 assert(genMaxOneBit(currentRefPosition->registerAssignment));
9151 printf(" %s", getRegName(currentRefPosition->assignedReg(),
9152 isFloatRegType(interval->registerType)));
9153 }
9154 if (currentRefPosition->isLocalDefUse)
9155 {
9156 printf(" LocalDefUse");
9157 }
9158 if (currentRefPosition->lastUse)
9159 {
9160 printf(" *");
9161 }
9162 if (interval->relatedInterval != nullptr)
9163 {
9164 printf(" Pref:");
9165 interval->relatedInterval->microDump();
9166 }
9167 }
9168 break;
9169 case RefTypeKill:
9170 if (!killPrinted)
9171 {
9172 printf("\n Kill: ");
9173 killPrinted = true;
9174 }
9175 printf(getRegName(currentRefPosition->assignedReg(),
9176 isFloatRegType(currentRefPosition->getReg()->registerType)));
9177 printf(" ");
9178 break;
9179 case RefTypeFixedReg:
9180 lastFixedRegRefPos = currentRefPosition;
9181 break;
9182 default:
9183 printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
9184 break;
9185 }
9186 }
9187 }
9188 printf("\n");
9189 }
9190 if (enregisterLocalVars && mode == LSRA_DUMP_POST)
9191 {
9192 dumpOutVarToRegMap(block);
9193 }
9194 printf("\n");
9195 }
9196 printf("\n\n");
9197}
9198
9199void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event,
9200 Interval* interval,
9201 regNumber reg,
9202 BasicBlock* currentBlock)
9203{
9204 if (!(VERBOSE))
9205 {
9206 return;
9207 }
9208 if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK))
9209 {
9210 registersToDump |= genRegMask(reg);
9211 dumpRegRecordTitleIfNeeded();
9212 }
9213
9214 switch (event)
9215 {
9216 // Conflicting def/use
9217 case LSRA_EVENT_DEFUSE_CONFLICT:
9218 dumpRefPositionShort(activeRefPosition, currentBlock);
9219 printf("DUconflict ");
9220 dumpRegRecords();
9221 break;
9222 case LSRA_EVENT_DEFUSE_CASE1:
9223 printf(indentFormat, " Case #1 use defRegAssignment");
9224 dumpRegRecords();
9225 break;
9226 case LSRA_EVENT_DEFUSE_CASE2:
9227 printf(indentFormat, " Case #2 use useRegAssignment");
9228 dumpRegRecords();
9229 break;
9230 case LSRA_EVENT_DEFUSE_CASE3:
9231 printf(indentFormat, " Case #3 use useRegAssignment");
9232 dumpRegRecords();
9233 dumpRegRecords();
9234 break;
9235 case LSRA_EVENT_DEFUSE_CASE4:
9236 printf(indentFormat, " Case #4 use defRegAssignment");
9237 dumpRegRecords();
9238 break;
9239 case LSRA_EVENT_DEFUSE_CASE5:
9240 printf(indentFormat, " Case #5 set def to all regs");
9241 dumpRegRecords();
9242 break;
9243 case LSRA_EVENT_DEFUSE_CASE6:
9244 printf(indentFormat, " Case #6 need a copy");
9245 dumpRegRecords();
9246 if (interval == nullptr)
9247 {
9248 printf(indentFormat, " NULL interval");
9249 dumpRegRecords();
9250 }
9251 else if (interval->firstRefPosition->multiRegIdx != 0)
9252 {
9253 printf(indentFormat, " (multiReg)");
9254 dumpRegRecords();
9255 }
9256 break;
9257
9258 case LSRA_EVENT_SPILL:
9259 dumpRefPositionShort(activeRefPosition, currentBlock);
9260 assert(interval != nullptr && interval->assignedReg != nullptr);
9261 printf("Spill %-4s ", getRegName(interval->assignedReg->regNum));
9262 dumpRegRecords();
9263 break;
9264
9265 // Restoring the previous register
9266 case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL:
9267 assert(interval != nullptr);
9268 dumpRefPositionShort(activeRefPosition, currentBlock);
9269 printf("SRstr %-4s ", getRegName(reg));
9270 dumpRegRecords();
9271 break;
9272
9273 case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL:
9274 assert(interval != nullptr);
9275 if (activeRefPosition == nullptr)
9276 {
9277 printf(emptyRefPositionFormat, "");
9278 }
9279 else
9280 {
9281 dumpRefPositionShort(activeRefPosition, currentBlock);
9282 }
9283 printf("Restr %-4s ", getRegName(reg));
9284 dumpRegRecords();
9285 if (activeRefPosition != nullptr)
9286 {
9287 printf(emptyRefPositionFormat, "");
9288 }
9289 break;
9290
9291 // Done with GC Kills
9292 case LSRA_EVENT_DONE_KILL_GC_REFS:
9293 printf(indentFormat, " DoneKillGC ");
9294 break;
9295
9296 // Block boundaries
9297 case LSRA_EVENT_START_BB:
9298 assert(currentBlock != nullptr);
9299 dumpRefPositionShort(activeRefPosition, currentBlock);
9300 break;
9301
9302 // Allocation decisions
9303 case LSRA_EVENT_NEEDS_NEW_REG:
9304 dumpRefPositionShort(activeRefPosition, currentBlock);
9305 printf("Free %-4s ", getRegName(reg));
9306 dumpRegRecords();
9307 break;
9308
9309 case LSRA_EVENT_ZERO_REF:
9310 assert(interval != nullptr && interval->isLocalVar);
9311 dumpRefPositionShort(activeRefPosition, currentBlock);
9312 printf("NoRef ");
9313 dumpRegRecords();
9314 break;
9315
9316 case LSRA_EVENT_FIXED_REG:
9317 case LSRA_EVENT_EXP_USE:
9318 case LSRA_EVENT_KEPT_ALLOCATION:
9319 dumpRefPositionShort(activeRefPosition, currentBlock);
9320 printf("Keep %-4s ", getRegName(reg));
9321 break;
9322
9323 case LSRA_EVENT_COPY_REG:
9324 assert(interval != nullptr && interval->recentRefPosition != nullptr);
9325 dumpRefPositionShort(activeRefPosition, currentBlock);
9326 printf("Copy %-4s ", getRegName(reg));
9327 break;
9328
9329 case LSRA_EVENT_MOVE_REG:
9330 assert(interval != nullptr && interval->recentRefPosition != nullptr);
9331 dumpRefPositionShort(activeRefPosition, currentBlock);
9332 printf("Move %-4s ", getRegName(reg));
9333 dumpRegRecords();
9334 break;
9335
9336 case LSRA_EVENT_ALLOC_REG:
9337 dumpRefPositionShort(activeRefPosition, currentBlock);
9338 printf("Alloc %-4s ", getRegName(reg));
9339 break;
9340
9341 case LSRA_EVENT_REUSE_REG:
9342 dumpRefPositionShort(activeRefPosition, currentBlock);
9343 printf("Reuse %-4s ", getRegName(reg));
9344 break;
9345
9346 case LSRA_EVENT_ALLOC_SPILLED_REG:
9347 dumpRefPositionShort(activeRefPosition, currentBlock);
9348 printf("Steal %-4s ", getRegName(reg));
9349 break;
9350
9351 case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED:
9352 assert(interval != nullptr && interval->isLocalVar);
9353 dumpRefPositionShort(activeRefPosition, currentBlock);
9354 printf("LoRef ");
9355 break;
9356
9357 case LSRA_EVENT_NO_REG_ALLOCATED:
9358 dumpRefPositionShort(activeRefPosition, currentBlock);
9359 printf("NoReg ");
9360 break;
9361
9362 case LSRA_EVENT_RELOAD:
9363 dumpRefPositionShort(activeRefPosition, currentBlock);
9364 printf("ReLod %-4s ", getRegName(reg));
9365 dumpRegRecords();
9366 break;
9367
9368 case LSRA_EVENT_SPECIAL_PUTARG:
9369 dumpRefPositionShort(activeRefPosition, currentBlock);
9370 printf("PtArg %-4s ", getRegName(reg));
9371 break;
9372
9373 // We currently don't dump anything for these events.
9374 case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE:
9375 case LSRA_EVENT_SPILL_EXTENDED_LIFETIME:
9376 case LSRA_EVENT_END_BB:
9377 case LSRA_EVENT_FREE_REGS:
9378 case LSRA_EVENT_INCREMENT_RANGE_END:
9379 case LSRA_EVENT_LAST_USE:
9380 case LSRA_EVENT_LAST_USE_DELAYED:
9381 break;
9382
9383 default:
9384 unreached();
9385 }
9386}
9387
9388//------------------------------------------------------------------------
9389// dumpRegRecordHeader: Dump the header for a column-based dump of the register state.
9390//
9391// Arguments:
9392// None.
9393//
9394// Return Value:
9395// None.
9396//
9397// Assumptions:
9398// Reg names fit in 4 characters (minimum width of the columns)
9399//
9400// Notes:
9401// In order to make the table as dense as possible (for ease of reading the dumps),
9402// we determine the minimum regColumnWidth width required to represent:
9403// regs, by name (e.g. eax or xmm0) - this is fixed at 4 characters.
9404// intervals, as Vnn for lclVar intervals, or as I<num> for other intervals.
9405// The table is indented by the amount needed for dumpRefPositionShort, which is
9406// captured in shortRefPositionDumpWidth.
9407//
9408void LinearScan::dumpRegRecordHeader()
9409{
9410 printf("The following table has one or more rows for each RefPosition that is handled during allocation.\n"
9411 "The first column provides the basic information about the RefPosition, with its type (e.g. Def,\n"
9412 "Use, Fixd) followed by a '*' if it is a last use, and a 'D' if it is delayRegFree, and then the\n"
9413 "action taken during allocation (e.g. Alloc a new register, or Keep an existing one).\n"
9414 "The subsequent columns show the Interval occupying each register, if any, followed by 'a' if it is\n"
9415 "active, and 'i'if it is inactive. Columns are only printed up to the last modifed register, which\n"
9416 "may increase during allocation, in which case additional columns will appear. Registers which are\n"
9417 "not marked modified have ---- in their column.\n\n");
9418
9419 // First, determine the width of each register column (which holds a reg name in the
9420 // header, and an interval name in each subsequent row).
9421 int intervalNumberWidth = (int)log10((double)intervals.size()) + 1;
9422 // The regColumnWidth includes the identifying character (I or V) and an 'i' or 'a' (inactive or active)
9423 regColumnWidth = intervalNumberWidth + 2;
9424 if (regColumnWidth < 4)
9425 {
9426 regColumnWidth = 4;
9427 }
9428 sprintf_s(intervalNameFormat, MAX_FORMAT_CHARS, "%%c%%-%dd", regColumnWidth - 2);
9429 sprintf_s(regNameFormat, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
9430
9431 // Next, determine the width of the short RefPosition (see dumpRefPositionShort()).
9432 // This is in the form:
9433 // nnn.#mmm NAME TYPEld
9434 // Where:
9435 // nnn is the Location, right-justified to the width needed for the highest location.
9436 // mmm is the RefPosition rpNum, left-justified to the width needed for the highest rpNum.
9437 // NAME is dumped by dumpReferentName(), and is "regColumnWidth".
9438 // TYPE is RefTypeNameShort, and is 4 characters
9439 // l is either '*' (if a last use) or ' ' (otherwise)
9440 // d is either 'D' (if a delayed use) or ' ' (otherwise)
9441
9442 maxNodeLocation = (maxNodeLocation == 0)
9443 ? 1
9444 : maxNodeLocation; // corner case of a method with an infinite loop without any gentree nodes
9445 assert(maxNodeLocation >= 1);
9446 assert(refPositions.size() >= 1);
9447 int nodeLocationWidth = (int)log10((double)maxNodeLocation) + 1;
9448 int refPositionWidth = (int)log10((double)refPositions.size()) + 1;
9449 int refTypeInfoWidth = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */;
9450 int locationAndRPNumWidth = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */;
9451 int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth;
9452 sprintf_s(shortRefPositionFormat, MAX_FORMAT_CHARS, "%%%dd.#%%-%dd ", nodeLocationWidth, refPositionWidth);
9453 sprintf_s(emptyRefPositionFormat, MAX_FORMAT_CHARS, "%%-%ds", shortRefPositionDumpWidth);
9454
9455 // The width of the "allocation info"
9456 // - a 5-character allocation decision
9457 // - a space
9458 // - a 4-character register
9459 // - a space
9460 int allocationInfoWidth = 5 + 1 + 4 + 1;
9461
9462 // Next, determine the width of the legend for each row. This includes:
9463 // - a short RefPosition dump (shortRefPositionDumpWidth), which includes a space
9464 // - the allocation info (allocationInfoWidth), which also includes a space
9465
9466 regTableIndent = shortRefPositionDumpWidth + allocationInfoWidth;
9467
9468 // BBnn printed left-justified in the NAME Typeld and allocationInfo space.
9469 int bbDumpWidth = regColumnWidth + 1 + refTypeInfoWidth + allocationInfoWidth;
9470 int bbNumWidth = (int)log10((double)compiler->fgBBNumMax) + 1;
9471 // In the unlikely event that BB numbers overflow the space, we'll simply omit the predBB
9472 int predBBNumDumpSpace = regTableIndent - locationAndRPNumWidth - bbNumWidth - 9; // 'BB' + ' PredBB'
9473 if (predBBNumDumpSpace < bbNumWidth)
9474 {
9475 sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd", shortRefPositionDumpWidth - 2);
9476 }
9477 else
9478 {
9479 sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd PredBB%%-%dd", bbNumWidth, predBBNumDumpSpace);
9480 }
9481
9482 if (compiler->shouldDumpASCIITrees())
9483 {
9484 columnSeparator = "|";
9485 line = "-";
9486 leftBox = "+";
9487 middleBox = "+";
9488 rightBox = "+";
9489 }
9490 else
9491 {
9492 columnSeparator = "\xe2\x94\x82";
9493 line = "\xe2\x94\x80";
9494 leftBox = "\xe2\x94\x9c";
9495 middleBox = "\xe2\x94\xbc";
9496 rightBox = "\xe2\x94\xa4";
9497 }
9498 sprintf_s(indentFormat, MAX_FORMAT_CHARS, "%%-%ds", regTableIndent);
9499
9500 // Now, set up the legend format for the RefPosition info
9501 sprintf_s(legendFormat, MAX_LEGEND_FORMAT_CHARS, "%%-%d.%ds%%-%d.%ds%%-%ds%%s", nodeLocationWidth + 1,
9502 nodeLocationWidth + 1, refPositionWidth + 2, refPositionWidth + 2, regColumnWidth + 1);
9503
9504 // Print a "title row" including the legend and the reg names.
9505 lastDumpedRegisters = RBM_NONE;
9506 dumpRegRecordTitleIfNeeded();
9507}
9508
9509void LinearScan::dumpRegRecordTitleIfNeeded()
9510{
9511 if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES))
9512 {
9513 lastUsedRegNumIndex = 0;
9514 int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
9515 for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
9516 {
9517 if ((registersToDump & genRegMask((regNumber)regNumIndex)) != 0)
9518 {
9519 lastUsedRegNumIndex = regNumIndex;
9520 }
9521 }
9522 dumpRegRecordTitle();
9523 lastDumpedRegisters = registersToDump;
9524 }
9525}
9526
9527void LinearScan::dumpRegRecordTitleLines()
9528{
9529 for (int i = 0; i < regTableIndent; i++)
9530 {
9531 printf("%s", line);
9532 }
9533 for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9534 {
9535 regNumber regNum = (regNumber)regNumIndex;
9536 if (shouldDumpReg(regNum))
9537 {
9538 printf("%s", middleBox);
9539 for (int i = 0; i < regColumnWidth; i++)
9540 {
9541 printf("%s", line);
9542 }
9543 }
9544 }
9545 printf("%s\n", rightBox);
9546}
9547void LinearScan::dumpRegRecordTitle()
9548{
9549 dumpRegRecordTitleLines();
9550
9551 // Print out the legend for the RefPosition info
9552 printf(legendFormat, "Loc ", "RP# ", "Name ", "Type Action Reg ");
9553
9554 // Print out the register name column headers
9555 char columnFormatArray[MAX_FORMAT_CHARS];
9556 sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%s%%-%d.%ds", columnSeparator, regColumnWidth, regColumnWidth);
9557 for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9558 {
9559 regNumber regNum = (regNumber)regNumIndex;
9560 if (shouldDumpReg(regNum))
9561 {
9562 const char* regName = getRegName(regNum);
9563 printf(columnFormatArray, regName);
9564 }
9565 }
9566 printf("%s\n", columnSeparator);
9567
9568 rowCountSinceLastTitle = 0;
9569
9570 dumpRegRecordTitleLines();
9571}
9572
9573void LinearScan::dumpRegRecords()
9574{
9575 static char columnFormatArray[18];
9576
9577 for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9578 {
9579 if (shouldDumpReg((regNumber)regNumIndex))
9580 {
9581 printf("%s", columnSeparator);
9582 RegRecord& regRecord = physRegs[regNumIndex];
9583 Interval* interval = regRecord.assignedInterval;
9584 if (interval != nullptr)
9585 {
9586 dumpIntervalName(interval);
9587 char activeChar = interval->isActive ? 'a' : 'i';
9588 printf("%c", activeChar);
9589 }
9590 else if (regRecord.isBusyUntilNextKill)
9591 {
9592 printf(columnFormatArray, "Busy");
9593 }
9594 else
9595 {
9596 sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
9597 printf(columnFormatArray, "");
9598 }
9599 }
9600 }
9601 printf("%s\n", columnSeparator);
9602 rowCountSinceLastTitle++;
9603}
9604
9605void LinearScan::dumpIntervalName(Interval* interval)
9606{
9607 if (interval->isLocalVar)
9608 {
9609 printf(intervalNameFormat, 'V', interval->varNum);
9610 }
9611 else if (interval->isConstant)
9612 {
9613 printf(intervalNameFormat, 'C', interval->intervalIndex);
9614 }
9615 else
9616 {
9617 printf(intervalNameFormat, 'I', interval->intervalIndex);
9618 }
9619}
9620
9621void LinearScan::dumpEmptyRefPosition()
9622{
9623 printf(emptyRefPositionFormat, "");
9624}
9625
9626// Note that the size of this dump is computed in dumpRegRecordHeader().
9627//
9628void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock)
9629{
9630 BasicBlock* block = currentBlock;
9631 static RefPosition* lastPrintedRefPosition = nullptr;
9632 if (refPosition == lastPrintedRefPosition)
9633 {
9634 dumpEmptyRefPosition();
9635 return;
9636 }
9637 lastPrintedRefPosition = refPosition;
9638 if (refPosition->refType == RefTypeBB)
9639 {
9640 // Always print a title row before a RefTypeBB (except for the first, because we
9641 // will already have printed it before the parameters)
9642 if (refPosition->refType == RefTypeBB && block != compiler->fgFirstBB && block != nullptr)
9643 {
9644 dumpRegRecordTitle();
9645 }
9646 }
9647 printf(shortRefPositionFormat, refPosition->nodeLocation, refPosition->rpNum);
9648 if (refPosition->refType == RefTypeBB)
9649 {
9650 if (block == nullptr)
9651 {
9652 printf(regNameFormat, "END");
9653 printf(" ");
9654 printf(regNameFormat, "");
9655 }
9656 else
9657 {
9658 printf(bbRefPosFormat, block->bbNum, block == compiler->fgFirstBB ? 0 : blockInfo[block->bbNum].predBBNum);
9659 }
9660 }
9661 else if (refPosition->isIntervalRef())
9662 {
9663 Interval* interval = refPosition->getInterval();
9664 dumpIntervalName(interval);
9665 char lastUseChar = ' ';
9666 char delayChar = ' ';
9667 if (refPosition->lastUse)
9668 {
9669 lastUseChar = '*';
9670 if (refPosition->delayRegFree)
9671 {
9672 delayChar = 'D';
9673 }
9674 }
9675 printf(" %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar);
9676 }
9677 else if (refPosition->isPhysRegRef)
9678 {
9679 RegRecord* regRecord = refPosition->getReg();
9680 printf(regNameFormat, getRegName(regRecord->regNum));
9681 printf(" %s ", getRefTypeShortName(refPosition->refType));
9682 }
9683 else
9684 {
9685 assert(refPosition->refType == RefTypeKillGCRefs);
9686 // There's no interval or reg name associated with this.
9687 printf(regNameFormat, " ");
9688 printf(" %s ", getRefTypeShortName(refPosition->refType));
9689 }
9690}
9691
9692//------------------------------------------------------------------------
9693// LinearScan::IsResolutionMove:
9694// Returns true if the given node is a move inserted by LSRA
9695// resolution.
9696//
9697// Arguments:
9698// node - the node to check.
9699//
9700bool LinearScan::IsResolutionMove(GenTree* node)
9701{
9702 if (!IsLsraAdded(node))
9703 {
9704 return false;
9705 }
9706
9707 switch (node->OperGet())
9708 {
9709 case GT_LCL_VAR:
9710 case GT_COPY:
9711 return node->IsUnusedValue();
9712
9713 case GT_SWAP:
9714 return true;
9715
9716 default:
9717 return false;
9718 }
9719}
9720
9721//------------------------------------------------------------------------
9722// LinearScan::IsResolutionNode:
9723// Returns true if the given node is either a move inserted by LSRA
9724// resolution or an operand to such a move.
9725//
9726// Arguments:
9727// containingRange - the range that contains the node to check.
9728// node - the node to check.
9729//
9730bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node)
9731{
9732 for (;;)
9733 {
9734 if (IsResolutionMove(node))
9735 {
9736 return true;
9737 }
9738
9739 if (!IsLsraAdded(node) || (node->OperGet() != GT_LCL_VAR))
9740 {
9741 return false;
9742 }
9743
9744 LIR::Use use;
9745 bool foundUse = containingRange.TryGetUse(node, &use);
9746 assert(foundUse);
9747
9748 node = use.User();
9749 }
9750}
9751
9752//------------------------------------------------------------------------
9753// verifyFinalAllocation: Traverse the RefPositions and verify various invariants.
9754//
9755// Arguments:
9756// None.
9757//
9758// Return Value:
9759// None.
9760//
9761// Notes:
9762// If verbose is set, this will also dump a table of the final allocations.
9763void LinearScan::verifyFinalAllocation()
9764{
9765 if (VERBOSE)
9766 {
9767 printf("\nFinal allocation\n");
9768 }
9769
9770 // Clear register assignments.
9771 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
9772 {
9773 RegRecord* physRegRecord = getRegisterRecord(reg);
9774 physRegRecord->assignedInterval = nullptr;
9775 }
9776
9777 for (Interval& interval : intervals)
9778 {
9779 interval.assignedReg = nullptr;
9780 interval.physReg = REG_NA;
9781 }
9782
9783 DBEXEC(VERBOSE, dumpRegRecordTitle());
9784
9785 BasicBlock* currentBlock = nullptr;
9786 GenTree* firstBlockEndResolutionNode = nullptr;
9787 regMaskTP regsToFree = RBM_NONE;
9788 regMaskTP delayRegsToFree = RBM_NONE;
9789 LsraLocation currentLocation = MinLocation;
9790 for (RefPosition& refPosition : refPositions)
9791 {
9792 RefPosition* currentRefPosition = &refPosition;
9793 Interval* interval = nullptr;
9794 RegRecord* regRecord = nullptr;
9795 regNumber regNum = REG_NA;
9796 activeRefPosition = currentRefPosition;
9797
9798 if (currentRefPosition->refType == RefTypeBB)
9799 {
9800 regsToFree |= delayRegsToFree;
9801 delayRegsToFree = RBM_NONE;
9802 }
9803 else
9804 {
9805 if (currentRefPosition->isPhysRegRef)
9806 {
9807 regRecord = currentRefPosition->getReg();
9808 regRecord->recentRefPosition = currentRefPosition;
9809 regNum = regRecord->regNum;
9810 }
9811 else if (currentRefPosition->isIntervalRef())
9812 {
9813 interval = currentRefPosition->getInterval();
9814 interval->recentRefPosition = currentRefPosition;
9815 if (currentRefPosition->registerAssignment != RBM_NONE)
9816 {
9817 if (!genMaxOneBit(currentRefPosition->registerAssignment))
9818 {
9819 assert(currentRefPosition->refType == RefTypeExpUse ||
9820 currentRefPosition->refType == RefTypeDummyDef);
9821 }
9822 else
9823 {
9824 regNum = currentRefPosition->assignedReg();
9825 regRecord = getRegisterRecord(regNum);
9826 }
9827 }
9828 }
9829 }
9830
9831 LsraLocation newLocation = currentRefPosition->nodeLocation;
9832
9833 if (newLocation > currentLocation)
9834 {
9835 // Free Registers.
9836 // We could use the freeRegisters() method, but we'd have to carefully manage the active intervals.
9837 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
9838 {
9839 regMaskTP regMask = genRegMask(reg);
9840 if ((regsToFree & regMask) != RBM_NONE)
9841 {
9842 RegRecord* physRegRecord = getRegisterRecord(reg);
9843 physRegRecord->assignedInterval = nullptr;
9844 }
9845 }
9846 regsToFree = delayRegsToFree;
9847 regsToFree = RBM_NONE;
9848 }
9849 currentLocation = newLocation;
9850
9851 switch (currentRefPosition->refType)
9852 {
9853 case RefTypeBB:
9854 {
9855 if (currentBlock == nullptr)
9856 {
9857 currentBlock = startBlockSequence();
9858 }
9859 else
9860 {
9861 // Verify the resolution moves at the end of the previous block.
9862 for (GenTree* node = firstBlockEndResolutionNode; node != nullptr; node = node->gtNext)
9863 {
9864 assert(enregisterLocalVars);
9865 // Only verify nodes that are actually moves; don't bother with the nodes that are
9866 // operands to moves.
9867 if (IsResolutionMove(node))
9868 {
9869 verifyResolutionMove(node, currentLocation);
9870 }
9871 }
9872
9873 // Validate the locations at the end of the previous block.
9874 if (enregisterLocalVars)
9875 {
9876 VarToRegMap outVarToRegMap = outVarToRegMaps[currentBlock->bbNum];
9877 VarSetOps::Iter iter(compiler, currentBlock->bbLiveOut);
9878 unsigned varIndex = 0;
9879 while (iter.NextElem(&varIndex))
9880 {
9881 if (localVarIntervals[varIndex] == nullptr)
9882 {
9883 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
9884 continue;
9885 }
9886 regNumber regNum = getVarReg(outVarToRegMap, varIndex);
9887 interval = getIntervalForLocalVar(varIndex);
9888 assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
9889 interval->physReg = REG_NA;
9890 interval->assignedReg = nullptr;
9891 interval->isActive = false;
9892 }
9893 }
9894
9895 // Clear register assignments.
9896 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
9897 {
9898 RegRecord* physRegRecord = getRegisterRecord(reg);
9899 physRegRecord->assignedInterval = nullptr;
9900 }
9901
9902 // Now, record the locations at the beginning of this block.
9903 currentBlock = moveToNextBlock();
9904 }
9905
9906 if (currentBlock != nullptr)
9907 {
9908 if (enregisterLocalVars)
9909 {
9910 VarToRegMap inVarToRegMap = inVarToRegMaps[currentBlock->bbNum];
9911 VarSetOps::Iter iter(compiler, currentBlock->bbLiveIn);
9912 unsigned varIndex = 0;
9913 while (iter.NextElem(&varIndex))
9914 {
9915 if (localVarIntervals[varIndex] == nullptr)
9916 {
9917 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
9918 continue;
9919 }
9920 regNumber regNum = getVarReg(inVarToRegMap, varIndex);
9921 interval = getIntervalForLocalVar(varIndex);
9922 interval->physReg = regNum;
9923 interval->assignedReg = &(physRegs[regNum]);
9924 interval->isActive = true;
9925 physRegs[regNum].assignedInterval = interval;
9926 }
9927 }
9928
9929 if (VERBOSE)
9930 {
9931 dumpRefPositionShort(currentRefPosition, currentBlock);
9932 dumpRegRecords();
9933 }
9934
9935 // Finally, handle the resolution moves, if any, at the beginning of the next block.
9936 firstBlockEndResolutionNode = nullptr;
9937 bool foundNonResolutionNode = false;
9938
9939 LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
9940 for (GenTree* node : currentBlockRange.NonPhiNodes())
9941 {
9942 if (IsResolutionNode(currentBlockRange, node))
9943 {
9944 assert(enregisterLocalVars);
9945 if (foundNonResolutionNode)
9946 {
9947 firstBlockEndResolutionNode = node;
9948 break;
9949 }
9950 else if (IsResolutionMove(node))
9951 {
9952 // Only verify nodes that are actually moves; don't bother with the nodes that are
9953 // operands to moves.
9954 verifyResolutionMove(node, currentLocation);
9955 }
9956 }
9957 else
9958 {
9959 foundNonResolutionNode = true;
9960 }
9961 }
9962 }
9963 }
9964
9965 break;
9966
9967 case RefTypeKill:
9968 assert(regRecord != nullptr);
9969 assert(regRecord->assignedInterval == nullptr);
9970 dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
9971 break;
9972 case RefTypeFixedReg:
9973 assert(regRecord != nullptr);
9974 dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
9975 break;
9976
9977 case RefTypeUpperVectorSaveDef:
9978 case RefTypeUpperVectorSaveUse:
9979 case RefTypeDef:
9980 case RefTypeUse:
9981 case RefTypeParamDef:
9982 case RefTypeZeroInit:
9983 assert(interval != nullptr);
9984
9985 if (interval->isSpecialPutArg)
9986 {
9987 dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, interval, regNum);
9988 break;
9989 }
9990 if (currentRefPosition->reload)
9991 {
9992 interval->isActive = true;
9993 assert(regNum != REG_NA);
9994 interval->physReg = regNum;
9995 interval->assignedReg = regRecord;
9996 regRecord->assignedInterval = interval;
9997 dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, nullptr, regRecord->regNum, currentBlock);
9998 }
9999 if (regNum == REG_NA)
10000 {
10001 dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, interval);
10002 }
10003 else if (RefTypeIsDef(currentRefPosition->refType))
10004 {
10005 interval->isActive = true;
10006 if (VERBOSE)
10007 {
10008 if (interval->isConstant && (currentRefPosition->treeNode != nullptr) &&
10009 currentRefPosition->treeNode->IsReuseRegVal())
10010 {
10011 dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, regRecord->regNum, currentBlock);
10012 }
10013 else
10014 {
10015 dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, regRecord->regNum, currentBlock);
10016 }
10017 }
10018 }
10019 else if (currentRefPosition->copyReg)
10020 {
10021 dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, interval, regRecord->regNum, currentBlock);
10022 }
10023 else if (currentRefPosition->moveReg)
10024 {
10025 assert(interval->assignedReg != nullptr);
10026 interval->assignedReg->assignedInterval = nullptr;
10027 interval->physReg = regNum;
10028 interval->assignedReg = regRecord;
10029 regRecord->assignedInterval = interval;
10030 if (VERBOSE)
10031 {
10032 printf("Move %-4s ", getRegName(regRecord->regNum));
10033 }
10034 }
10035 else
10036 {
10037 dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
10038 }
10039 if (currentRefPosition->lastUse || currentRefPosition->spillAfter)
10040 {
10041 interval->isActive = false;
10042 }
10043 if (regNum != REG_NA)
10044 {
10045 if (currentRefPosition->spillAfter)
10046 {
10047 if (VERBOSE)
10048 {
10049 // If refPos is marked as copyReg, then the reg that is spilled
10050 // is the homeReg of the interval not the reg currently assigned
10051 // to refPos.
10052 regNumber spillReg = regNum;
10053 if (currentRefPosition->copyReg)
10054 {
10055 assert(interval != nullptr);
10056 spillReg = interval->physReg;
10057 }
10058 dumpRegRecords();
10059 dumpEmptyRefPosition();
10060 printf("Spill %-4s ", getRegName(spillReg));
10061 }
10062 }
10063 else if (currentRefPosition->copyReg)
10064 {
10065 regRecord->assignedInterval = interval;
10066 }
10067 else
10068 {
10069 interval->physReg = regNum;
10070 interval->assignedReg = regRecord;
10071 regRecord->assignedInterval = interval;
10072 }
10073 }
10074 break;
10075 case RefTypeKillGCRefs:
10076 // No action to take.
10077 // However, we will assert that, at resolution time, no registers contain GC refs.
10078 {
10079 DBEXEC(VERBOSE, printf(" "));
10080 regMaskTP candidateRegs = currentRefPosition->registerAssignment;
10081 while (candidateRegs != RBM_NONE)
10082 {
10083 regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
10084 candidateRegs &= ~nextRegBit;
10085 regNumber nextReg = genRegNumFromMask(nextRegBit);
10086 RegRecord* regRecord = getRegisterRecord(nextReg);
10087 Interval* assignedInterval = regRecord->assignedInterval;
10088 assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType));
10089 }
10090 }
10091 break;
10092
10093 case RefTypeExpUse:
10094 case RefTypeDummyDef:
10095 // Do nothing; these will be handled by the RefTypeBB.
10096 DBEXEC(VERBOSE, printf(" "));
10097 break;
10098
10099 case RefTypeInvalid:
10100 // for these 'currentRefPosition->refType' values, No action to take
10101 break;
10102 }
10103
10104 if (currentRefPosition->refType != RefTypeBB)
10105 {
10106 DBEXEC(VERBOSE, dumpRegRecords());
10107 if (interval != nullptr)
10108 {
10109 if (currentRefPosition->copyReg)
10110 {
10111 assert(interval->physReg != regNum);
10112 regRecord->assignedInterval = nullptr;
10113 assert(interval->assignedReg != nullptr);
10114 regRecord = interval->assignedReg;
10115 }
10116 if (currentRefPosition->spillAfter || currentRefPosition->lastUse)
10117 {
10118 interval->physReg = REG_NA;
10119 interval->assignedReg = nullptr;
10120
10121 // regRegcord could be null if the RefPosition does not require a register.
10122 if (regRecord != nullptr)
10123 {
10124 regRecord->assignedInterval = nullptr;
10125 }
10126 else
10127 {
10128 assert(!currentRefPosition->RequiresRegister());
10129 }
10130 }
10131 }
10132 }
10133 }
10134
10135 // Now, verify the resolution blocks.
10136 // Currently these are nearly always at the end of the method, but that may not always be the case.
10137 // So, we'll go through all the BBs looking for blocks whose bbNum is greater than bbNumMaxBeforeResolution.
10138 for (BasicBlock* currentBlock = compiler->fgFirstBB; currentBlock != nullptr; currentBlock = currentBlock->bbNext)
10139 {
10140 if (currentBlock->bbNum > bbNumMaxBeforeResolution)
10141 {
10142 // If we haven't enregistered an lclVars, we have no resolution blocks.
10143 assert(enregisterLocalVars);
10144
10145 if (VERBOSE)
10146 {
10147 dumpRegRecordTitle();
10148 printf(shortRefPositionFormat, 0, 0);
10149 assert(currentBlock->bbPreds != nullptr && currentBlock->bbPreds->flBlock != nullptr);
10150 printf(bbRefPosFormat, currentBlock->bbNum, currentBlock->bbPreds->flBlock->bbNum);
10151 dumpRegRecords();
10152 }
10153
10154 // Clear register assignments.
10155 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10156 {
10157 RegRecord* physRegRecord = getRegisterRecord(reg);
10158 physRegRecord->assignedInterval = nullptr;
10159 }
10160
10161 // Set the incoming register assignments
10162 VarToRegMap inVarToRegMap = getInVarToRegMap(currentBlock->bbNum);
10163 VarSetOps::Iter iter(compiler, currentBlock->bbLiveIn);
10164 unsigned varIndex = 0;
10165 while (iter.NextElem(&varIndex))
10166 {
10167 if (localVarIntervals[varIndex] == nullptr)
10168 {
10169 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10170 continue;
10171 }
10172 regNumber regNum = getVarReg(inVarToRegMap, varIndex);
10173 Interval* interval = getIntervalForLocalVar(varIndex);
10174 interval->physReg = regNum;
10175 interval->assignedReg = &(physRegs[regNum]);
10176 interval->isActive = true;
10177 physRegs[regNum].assignedInterval = interval;
10178 }
10179
10180 // Verify the moves in this block
10181 LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
10182 for (GenTree* node : currentBlockRange.NonPhiNodes())
10183 {
10184 assert(IsResolutionNode(currentBlockRange, node));
10185 if (IsResolutionMove(node))
10186 {
10187 // Only verify nodes that are actually moves; don't bother with the nodes that are
10188 // operands to moves.
10189 verifyResolutionMove(node, currentLocation);
10190 }
10191 }
10192
10193 // Verify the outgoing register assignments
10194 {
10195 VarToRegMap outVarToRegMap = getOutVarToRegMap(currentBlock->bbNum);
10196 VarSetOps::Iter iter(compiler, currentBlock->bbLiveOut);
10197 unsigned varIndex = 0;
10198 while (iter.NextElem(&varIndex))
10199 {
10200 if (localVarIntervals[varIndex] == nullptr)
10201 {
10202 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10203 continue;
10204 }
10205 regNumber regNum = getVarReg(outVarToRegMap, varIndex);
10206 Interval* interval = getIntervalForLocalVar(varIndex);
10207 assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
10208 interval->physReg = REG_NA;
10209 interval->assignedReg = nullptr;
10210 interval->isActive = false;
10211 }
10212 }
10213 }
10214 }
10215
10216 DBEXEC(VERBOSE, printf("\n"));
10217}
10218
10219//------------------------------------------------------------------------
10220// verifyResolutionMove: Verify a resolution statement. Called by verifyFinalAllocation()
10221//
10222// Arguments:
10223// resolutionMove - A GenTree* that must be a resolution move.
10224// currentLocation - The LsraLocation of the most recent RefPosition that has been verified.
10225//
10226// Return Value:
10227// None.
10228//
10229// Notes:
10230// If verbose is set, this will also dump the moves into the table of final allocations.
10231void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation currentLocation)
10232{
10233 GenTree* dst = resolutionMove;
10234 assert(IsResolutionMove(dst));
10235
10236 if (dst->OperGet() == GT_SWAP)
10237 {
10238 GenTreeLclVarCommon* left = dst->gtGetOp1()->AsLclVarCommon();
10239 GenTreeLclVarCommon* right = dst->gtGetOp2()->AsLclVarCommon();
10240 regNumber leftRegNum = left->gtRegNum;
10241 regNumber rightRegNum = right->gtRegNum;
10242 LclVarDsc* leftVarDsc = compiler->lvaTable + left->gtLclNum;
10243 LclVarDsc* rightVarDsc = compiler->lvaTable + right->gtLclNum;
10244 Interval* leftInterval = getIntervalForLocalVar(leftVarDsc->lvVarIndex);
10245 Interval* rightInterval = getIntervalForLocalVar(rightVarDsc->lvVarIndex);
10246 assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum);
10247 leftInterval->physReg = rightRegNum;
10248 rightInterval->physReg = leftRegNum;
10249 leftInterval->assignedReg = &physRegs[rightRegNum];
10250 rightInterval->assignedReg = &physRegs[leftRegNum];
10251 physRegs[rightRegNum].assignedInterval = leftInterval;
10252 physRegs[leftRegNum].assignedInterval = rightInterval;
10253 if (VERBOSE)
10254 {
10255 printf(shortRefPositionFormat, currentLocation, 0);
10256 dumpIntervalName(leftInterval);
10257 printf(" Swap ");
10258 printf(" %-4s ", getRegName(rightRegNum));
10259 dumpRegRecords();
10260 printf(shortRefPositionFormat, currentLocation, 0);
10261 dumpIntervalName(rightInterval);
10262 printf(" \" ");
10263 printf(" %-4s ", getRegName(leftRegNum));
10264 dumpRegRecords();
10265 }
10266 return;
10267 }
10268 regNumber dstRegNum = dst->gtRegNum;
10269 regNumber srcRegNum;
10270 GenTreeLclVarCommon* lcl;
10271 if (dst->OperGet() == GT_COPY)
10272 {
10273 lcl = dst->gtGetOp1()->AsLclVarCommon();
10274 srcRegNum = lcl->gtRegNum;
10275 }
10276 else
10277 {
10278 lcl = dst->AsLclVarCommon();
10279 if ((lcl->gtFlags & GTF_SPILLED) != 0)
10280 {
10281 srcRegNum = REG_STK;
10282 }
10283 else
10284 {
10285 assert((lcl->gtFlags & GTF_SPILL) != 0);
10286 srcRegNum = dstRegNum;
10287 dstRegNum = REG_STK;
10288 }
10289 }
10290
10291 Interval* interval = getIntervalForLocalVarNode(lcl);
10292 assert(interval->physReg == srcRegNum || (srcRegNum == REG_STK && interval->physReg == REG_NA));
10293 if (srcRegNum != REG_STK)
10294 {
10295 physRegs[srcRegNum].assignedInterval = nullptr;
10296 }
10297 if (dstRegNum != REG_STK)
10298 {
10299 interval->physReg = dstRegNum;
10300 interval->assignedReg = &(physRegs[dstRegNum]);
10301 physRegs[dstRegNum].assignedInterval = interval;
10302 interval->isActive = true;
10303 }
10304 else
10305 {
10306 interval->physReg = REG_NA;
10307 interval->assignedReg = nullptr;
10308 interval->isActive = false;
10309 }
10310 if (VERBOSE)
10311 {
10312 printf(shortRefPositionFormat, currentLocation, 0);
10313 dumpIntervalName(interval);
10314 printf(" Move ");
10315 printf(" %-4s ", getRegName(dstRegNum));
10316 dumpRegRecords();
10317 }
10318}
10319#endif // DEBUG
10320