1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4/*****************************************************************************/
5
6#ifndef _LSRA_H_
7#define _LSRA_H_
8
9#include "arraylist.h"
10#include "smallhash.h"
11
12// Minor and forward-reference types
13class Interval;
14class RefPosition;
15class LinearScan;
16class RegRecord;
17
18template <class T>
19class ArrayStack;
20
21// LsraLocation tracks the linearized order of the nodes.
22// Each node is assigned two LsraLocations - one for all the uses and all but the last
23// def, and a second location for the last def (if any)
24
25typedef unsigned int LsraLocation;
26const unsigned int MinLocation = 0;
27const unsigned int MaxLocation = UINT_MAX;
28// max number of registers an operation could require internally (in addition to uses and defs)
29const unsigned int MaxInternalRegisters = 8;
30const unsigned int RegisterTypeCount = 2;
31
32/*****************************************************************************
33* Register types
34*****************************************************************************/
35typedef var_types RegisterType;
36#define IntRegisterType TYP_INT
37#define FloatRegisterType TYP_FLOAT
38
39//------------------------------------------------------------------------
40// regType: Return the RegisterType to use for a given type
41//
42// Arguments:
43// type - the type of interest
44//
45template <class T>
46RegisterType regType(T type)
47{
48#ifdef FEATURE_SIMD
49 if (varTypeIsSIMD(type))
50 {
51 return FloatRegisterType;
52 }
53#endif // FEATURE_SIMD
54 return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType;
55}
56
57//------------------------------------------------------------------------
58// useFloatReg: Check if the given var_type should be allocated to a FloatRegisterType
59//
60inline bool useFloatReg(var_types type)
61{
62 return (regType(type) == FloatRegisterType);
63}
64
65//------------------------------------------------------------------------
66// registerTypesEquivalent: Check to see if two RegisterTypes are equivalent
67//
68inline bool registerTypesEquivalent(RegisterType a, RegisterType b)
69{
70 return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b);
71}
72
73//------------------------------------------------------------------------
74// registerTypesEquivalent: Get the set of callee-save registers of the given RegisterType
75//
76inline regMaskTP calleeSaveRegs(RegisterType rt)
77{
78 return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
79}
80
81//------------------------------------------------------------------------
82// RefInfo: Captures the necessary information for a definition that is "in-flight"
83// during `buildIntervals` (i.e. a tree-node definition has been encountered,
84// but not its use). This includes the RefPosition and its associated
85// GenTree node.
86//
87struct RefInfo
88{
89 RefPosition* ref;
90 GenTree* treeNode;
91
92 RefInfo(RefPosition* r, GenTree* t) : ref(r), treeNode(t)
93 {
94 }
95
96 // default constructor for data structures
97 RefInfo()
98 {
99 }
100};
101
102//------------------------------------------------------------------------
103// RefInfoListNode: used to store a single `RefInfo` value for a
104// node during `buildIntervals`.
105//
106// This is the node type for `RefInfoList` below.
107//
108class RefInfoListNode final : public RefInfo
109{
110 friend class RefInfoList;
111 friend class RefInfoListNodePool;
112
113 RefInfoListNode* m_next; // The next node in the list
114
115public:
116 RefInfoListNode(RefPosition* r, GenTree* t) : RefInfo(r, t)
117 {
118 }
119
120 //------------------------------------------------------------------------
121 // RefInfoListNode::Next: Returns the next node in the list.
122 RefInfoListNode* Next() const
123 {
124 return m_next;
125 }
126};
127
128//------------------------------------------------------------------------
129// RefInfoList: used to store a list of `RefInfo` values for a
130// node during `buildIntervals`.
131//
132// This list of 'RefInfoListNode's contains the source nodes consumed by
133// a node, and is created by 'BuildNode'.
134//
135class RefInfoList final
136{
137 friend class RefInfoListNodePool;
138
139 RefInfoListNode* m_head; // The head of the list
140 RefInfoListNode* m_tail; // The tail of the list
141
142public:
143 RefInfoList() : m_head(nullptr), m_tail(nullptr)
144 {
145 }
146
147 RefInfoList(RefInfoListNode* node) : m_head(node), m_tail(node)
148 {
149 assert(m_head->m_next == nullptr);
150 }
151
152 //------------------------------------------------------------------------
153 // RefInfoList::IsEmpty: Returns true if the list is empty.
154 //
155 bool IsEmpty() const
156 {
157 return m_head == nullptr;
158 }
159
160 //------------------------------------------------------------------------
161 // RefInfoList::Begin: Returns the first node in the list.
162 //
163 RefInfoListNode* Begin() const
164 {
165 return m_head;
166 }
167
168 //------------------------------------------------------------------------
169 // RefInfoList::End: Returns the position after the last node in the
170 // list. The returned value is suitable for use as
171 // a sentinel for iteration.
172 //
173 RefInfoListNode* End() const
174 {
175 return nullptr;
176 }
177
178 //------------------------------------------------------------------------
179 // RefInfoList::End: Returns the position after the last node in the
180 // list. The returned value is suitable for use as
181 // a sentinel for iteration.
182 //
183 RefInfoListNode* Last() const
184 {
185 return m_tail;
186 }
187
188 //------------------------------------------------------------------------
189 // RefInfoList::Append: Appends a node to the list.
190 //
191 // Arguments:
192 // node - The node to append. Must not be part of an existing list.
193 //
194 void Append(RefInfoListNode* node)
195 {
196 assert(node->m_next == nullptr);
197
198 if (m_tail == nullptr)
199 {
200 assert(m_head == nullptr);
201 m_head = node;
202 }
203 else
204 {
205 m_tail->m_next = node;
206 }
207
208 m_tail = node;
209 }
210 //------------------------------------------------------------------------
211 // RefInfoList::Append: Appends another list to this list.
212 //
213 // Arguments:
214 // other - The list to append.
215 //
216 void Append(RefInfoList other)
217 {
218 if (m_tail == nullptr)
219 {
220 assert(m_head == nullptr);
221 m_head = other.m_head;
222 }
223 else
224 {
225 m_tail->m_next = other.m_head;
226 }
227
228 m_tail = other.m_tail;
229 }
230
231 //------------------------------------------------------------------------
232 // RefInfoList::Prepend: Prepends a node to the list.
233 //
234 // Arguments:
235 // node - The node to prepend. Must not be part of an existing list.
236 //
237 void Prepend(RefInfoListNode* node)
238 {
239 assert(node->m_next == nullptr);
240
241 if (m_head == nullptr)
242 {
243 assert(m_tail == nullptr);
244 m_tail = node;
245 }
246 else
247 {
248 node->m_next = m_head;
249 }
250
251 m_head = node;
252 }
253
254 //------------------------------------------------------------------------
255 // RefInfoList::Add: Adds a node to the list.
256 //
257 // Arguments:
258 // node - The node to add. Must not be part of an existing list.
259 // prepend - True if it should be prepended (otherwise is appended)
260 //
261 void Add(RefInfoListNode* node, bool prepend)
262 {
263 if (prepend)
264 {
265 Prepend(node);
266 }
267 else
268 {
269 Append(node);
270 }
271 }
272
273 //------------------------------------------------------------------------
274 // removeListNode - retrieve the RefInfo for the given node
275 //
276 // Notes:
277 // The BuildNode methods use this helper to retrieve the RefInfo for child nodes
278 // from the useList being constructed.
279 //
280 RefInfoListNode* removeListNode(RefInfoListNode* listNode, RefInfoListNode* prevListNode)
281 {
282 RefInfoListNode* nextNode = listNode->Next();
283 if (prevListNode == nullptr)
284 {
285 m_head = nextNode;
286 }
287 else
288 {
289 prevListNode->m_next = nextNode;
290 }
291 if (nextNode == nullptr)
292 {
293 m_tail = prevListNode;
294 }
295 listNode->m_next = nullptr;
296 return listNode;
297 }
298
299 // removeListNode - remove the RefInfoListNode for the given GenTree node from the defList
300 RefInfoListNode* removeListNode(GenTree* node);
301 // Same as above but takes a multiRegIdx to support multi-reg nodes.
302 RefInfoListNode* removeListNode(GenTree* node, unsigned multiRegIdx);
303
304 //------------------------------------------------------------------------
305 // GetRefPosition - retrieve the RefPosition for the given node
306 //
307 // Notes:
308 // The Build methods use this helper to retrieve the RefPosition for child nodes
309 // from the useList being constructed. Note that, if the user knows the order of the operands,
310 // it is expected that they should just retrieve them directly.
311
312 RefPosition* GetRefPosition(GenTree* node)
313 {
314 for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
315 {
316 if (listNode->treeNode == node)
317 {
318 return listNode->ref;
319 }
320 }
321 assert(!"GetRefPosition didn't find the node");
322 unreached();
323 }
324
325 //------------------------------------------------------------------------
326 // RefInfoList::GetSecond: Gets the second node in the list.
327 //
328 // Arguments:
329 // (DEBUG ONLY) treeNode - The GenTree* we expect to be in the second node.
330 //
331 RefInfoListNode* GetSecond(INDEBUG(GenTree* treeNode))
332 {
333 noway_assert((Begin() != nullptr) && (Begin()->Next() != nullptr));
334 RefInfoListNode* second = Begin()->Next();
335 assert(second->treeNode == treeNode);
336 return second;
337 }
338
339#ifdef DEBUG
340 // Count - return the number of nodes in the list (DEBUG only)
341 int Count()
342 {
343 int count = 0;
344 for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
345 {
346 count++;
347 }
348 return count;
349 }
350#endif // DEBUG
351};
352
353//------------------------------------------------------------------------
354// RefInfoListNodePool: manages a pool of `RefInfoListNode`
355// values to decrease overall memory usage
356// during `buildIntervals`.
357//
358// `buildIntervals` involves creating a list of RefInfo items per
359// node that either directly produces a set of registers or that is a
360// contained node with register-producing sources. However, these lists
361// are short-lived: they are destroyed once the use of the corresponding
362// node is processed. As such, there is typically only a small number of
363// `RefInfoListNode` values in use at any given time. Pooling these
364// values avoids otherwise frequent allocations.
365class RefInfoListNodePool final
366{
367 RefInfoListNode* m_freeList;
368 Compiler* m_compiler;
369 static const unsigned defaultPreallocation = 8;
370
371public:
372 RefInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
373 RefInfoListNode* GetNode(RefPosition* r, GenTree* t, unsigned regIdx = 0);
374 void ReturnNode(RefInfoListNode* listNode);
375};
376
377struct LsraBlockInfo
378{
379 // bbNum of the predecessor to use for the register location of live-in variables.
380 // 0 for fgFirstBB.
381 unsigned int predBBNum;
382 BasicBlock::weight_t weight;
383 bool hasCriticalInEdge;
384 bool hasCriticalOutEdge;
385
386#if TRACK_LSRA_STATS
387 // Per block maintained LSRA statistics.
388
389 // Number of spills of local vars or tree temps in this basic block.
390 unsigned spillCount;
391
392 // Number of GT_COPY nodes inserted in this basic block while allocating regs.
393 // Note that GT_COPY nodes are also inserted as part of basic block boundary
394 // resolution, which are accounted against resolutionMovCount but not
395 // against copyRegCount.
396 unsigned copyRegCount;
397
398 // Number of resolution moves inserted in this basic block.
399 unsigned resolutionMovCount;
400
401 // Number of critical edges from this block that are split.
402 unsigned splitEdgeCount;
403#endif // TRACK_LSRA_STATS
404};
405
406// This is sort of a bit mask
407// The low order 2 bits will be 1 for defs, and 2 for uses
408enum RefType : unsigned char
409{
410#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
411#include "lsra_reftypes.h"
412#undef DEF_REFTYPE
413};
414
415// position in a block (for resolution)
416enum BlockStartOrEnd
417{
418 BlockPositionStart = 0,
419 BlockPositionEnd = 1,
420 PositionCount = 2
421};
422
423inline bool RefTypeIsUse(RefType refType)
424{
425 return ((refType & RefTypeUse) == RefTypeUse);
426}
427
428inline bool RefTypeIsDef(RefType refType)
429{
430 return ((refType & RefTypeDef) == RefTypeDef);
431}
432
433typedef regNumberSmall* VarToRegMap;
434
435typedef jitstd::list<Interval> IntervalList;
436typedef jitstd::list<RefPosition> RefPositionList;
437typedef jitstd::list<RefPosition>::iterator RefPositionIterator;
438typedef jitstd::list<RefPosition>::reverse_iterator RefPositionReverseIterator;
439
440class Referenceable
441{
442public:
443 Referenceable()
444 {
445 firstRefPosition = nullptr;
446 recentRefPosition = nullptr;
447 lastRefPosition = nullptr;
448 isActive = false;
449 }
450
451 // A linked list of RefPositions. These are only traversed in the forward
452 // direction, and are not moved, so they don't need to be doubly linked
453 // (see RefPosition).
454
455 RefPosition* firstRefPosition;
456 RefPosition* recentRefPosition;
457 RefPosition* lastRefPosition;
458
459 bool isActive;
460
461 // Get the position of the next reference which is at or greater than
462 // the current location (relies upon recentRefPosition being udpated
463 // during traversal).
464 RefPosition* getNextRefPosition();
465 LsraLocation getNextRefLocation();
466};
467
468class RegRecord : public Referenceable
469{
470public:
471 RegRecord()
472 {
473 assignedInterval = nullptr;
474 previousInterval = nullptr;
475 regNum = REG_NA;
476 isCalleeSave = false;
477 registerType = IntRegisterType;
478 isBusyUntilNextKill = false;
479 }
480
481 void init(regNumber reg)
482 {
483#ifdef _TARGET_ARM64_
484 // The Zero register, or the SP
485 if ((reg == REG_ZR) || (reg == REG_SP))
486 {
487 // IsGeneralRegister returns false for REG_ZR and REG_SP
488 regNum = reg;
489 registerType = IntRegisterType;
490 }
491 else
492#endif
493 if (emitter::isFloatReg(reg))
494 {
495 registerType = FloatRegisterType;
496 }
497 else
498 {
499 // The constructor defaults to IntRegisterType
500 assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);
501 }
502 regNum = reg;
503 isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
504 }
505
506#ifdef DEBUG
507 // print out representation
508 void dump();
509 // concise representation for embedding
510 void tinyDump();
511#endif // DEBUG
512
513 bool isFree();
514
515 // RefPosition * getNextRefPosition();
516 // LsraLocation getNextRefLocation();
517
518 // DATA
519
520 // interval to which this register is currently allocated.
521 // If the interval is inactive (isActive == false) then it is not currently live,
522 // and the register call be unassigned (i.e. setting assignedInterval to nullptr)
523 // without spilling the register.
524 Interval* assignedInterval;
525 // Interval to which this register was previously allocated, and which was unassigned
526 // because it was inactive. This register will be reassigned to this Interval when
527 // assignedInterval becomes inactive.
528 Interval* previousInterval;
529
530 regNumber regNum;
531 bool isCalleeSave;
532 RegisterType registerType;
533 // This register must be considered busy until the next time it is explicitly killed.
534 // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
535 // the problem with the reg becoming free if the last-use is encountered before the call.
536 bool isBusyUntilNextKill;
537
538 bool conflictingFixedRegReference(RefPosition* refPosition);
539};
540
541inline bool leafInRange(GenTree* leaf, int lower, int upper)
542{
543 if (!leaf->IsIntCnsFitsInI32())
544 {
545 return false;
546 }
547 if (leaf->gtIntCon.gtIconVal < lower)
548 {
549 return false;
550 }
551 if (leaf->gtIntCon.gtIconVal > upper)
552 {
553 return false;
554 }
555
556 return true;
557}
558
559inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
560{
561 if (!leafInRange(leaf, lower, upper))
562 {
563 return false;
564 }
565 if (leaf->gtIntCon.gtIconVal % multiple)
566 {
567 return false;
568 }
569
570 return true;
571}
572
573inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
574{
575 if (leaf->OperGet() != GT_ADD)
576 {
577 return false;
578 }
579 return leafInRange(leaf->gtGetOp2(), lower, upper, multiple);
580}
581
582inline bool isCandidateVar(LclVarDsc* varDsc)
583{
584 return varDsc->lvLRACandidate;
585}
586
587/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
588XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
589XX XX
590XX LinearScan XX
591XX XX
592XX This is the container for the Linear Scan data structures and methods. XX
593XX XX
594XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
595XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
596*/
597// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
598// Linear Scan Register Allocator". It is driven by iterating over the Interval
599// lists. In this case, we need multiple IntervalLists, and Intervals will be
600// moved between them so they must be easily updated.
601
602// OPTION 2: The algorithm is driven by iterating over the RefPositions. In this
603// case, we only need a single IntervalList, and it won't be updated.
604// The RefPosition must refer to its Interval, and we need to be able to traverse
605// to the next RefPosition in code order
606// THIS IS THE OPTION CURRENTLY BEING PURSUED
607
608class LinearScan : public LinearScanInterface
609{
610 friend class RefPosition;
611 friend class Interval;
612 friend class Lowering;
613
614public:
615 // This could use further abstraction. From Compiler we need the tree,
616 // the flowgraph and the allocator.
617 LinearScan(Compiler* theCompiler);
618
619 // This is the main driver
620 virtual void doLinearScan();
621
622 static bool isSingleRegister(regMaskTP regMask)
623 {
624 return (genExactlyOneBit(regMask));
625 }
626
627 // Initialize the block traversal for LSRA.
628 // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
629 // which determines the order in which blocks will be allocated (currently called during Lowering).
630 BasicBlock* startBlockSequence();
631 // Move to the next block in sequence, updating the current block information.
632 BasicBlock* moveToNextBlock();
633 // Get the next block to be scheduled without changing the current block,
634 // but updating the blockSequence during the first iteration if it is not fully computed.
635 BasicBlock* getNextBlock();
636
637 // This is called during code generation to update the location of variables
638 virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
639
640 // This does the dataflow analysis and builds the intervals
641 void buildIntervals();
642
643 // This is where the actual assignment is done
644 void allocateRegisters();
645
646 // This is the resolution phase, where cross-block mismatches are fixed up
647 void resolveRegisters();
648
649 void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
650
651 // Insert a copy in the case where a tree node value must be moved to a different
652 // register at the point of use, or it is reloaded to a different register
653 // than the one it was spilled from
654 void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition);
655
656#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
657 // Insert code to save and restore the upper half of a vector that lives
658 // in a callee-save register at the point of a call (the upper half is
659 // not preserved).
660 void insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block);
661#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
662
663 // resolve along one block-block edge
664 enum ResolveType
665 {
666 ResolveSplit,
667 ResolveJoin,
668 ResolveCritical,
669 ResolveSharedCritical,
670 ResolveTypeCount
671 };
672#ifdef DEBUG
673 static const char* resolveTypeName[ResolveTypeCount];
674#endif
675
676 enum WhereToInsert
677 {
678 InsertAtTop,
679 InsertAtBottom
680 };
681
682#ifdef _TARGET_ARM_
683 void addResolutionForDouble(BasicBlock* block,
684 GenTree* insertionPoint,
685 Interval** sourceIntervals,
686 regNumberSmall* location,
687 regNumber toReg,
688 regNumber fromReg,
689 ResolveType resolveType);
690#endif
691 void addResolution(
692 BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);
693
694 void handleOutgoingCriticalEdges(BasicBlock* block);
695
696 void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);
697
698 void resolveEdges();
699
700 // Keep track of how many temp locations we'll need for spill
701 void initMaxSpill();
702 void updateMaxSpill(RefPosition* refPosition);
703 void recordMaxSpill();
704
705 // max simultaneous spill locations used of every type
706 unsigned int maxSpill[TYP_COUNT];
707 unsigned int currentSpill[TYP_COUNT];
708 bool needFloatTmpForFPCall;
709 bool needDoubleTmpForFPCall;
710
711#ifdef DEBUG
712private:
713 //------------------------------------------------------------------------
714 // Should we stress lsra? This uses the COMPlus_JitStressRegs variable.
715 //
716 // The mask bits are currently divided into fields in which each non-zero value
717 // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
718 // However, subject to possible constraints (to be determined), the different
719 // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
720 // Note that the field values are declared in a public enum, but the actual bits are
721 // only accessed via accessors.
722
723 unsigned lsraStressMask;
724
725 // This controls the registers available for allocation
726 enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
727 LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
728
729 // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
730 // registers, so as to get different coverage than limiting to callee or caller.
731 // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
732 // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
733 // Hence the "SmallFPSet" has 5 elements.
734 CLANG_FORMAT_COMMENT_ANCHOR;
735
736#if defined(_TARGET_AMD64_)
737#ifdef UNIX_AMD64_ABI
738 // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
739 static const regMaskTP LsraLimitSmallIntSet =
740 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
741#else // !UNIX_AMD64_ABI
742 // On Windows Amd64 use the RDI and RSI as callee saved registers.
743 static const regMaskTP LsraLimitSmallIntSet =
744 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
745#endif // !UNIX_AMD64_ABI
746 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
747#elif defined(_TARGET_ARM_)
748 // On ARM, we may need two registers to set up the target register for a virtual call, so we need
749 // to have at least the maximum number of arg registers, plus 2.
750 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
751 static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
752#elif defined(_TARGET_ARM64_)
753 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
754 static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
755#elif defined(_TARGET_X86_)
756 static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
757 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
758#else
759#error Unsupported or unset target architecture
760#endif // target
761
762 LsraStressLimitRegs getStressLimitRegs()
763 {
764 return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
765 }
766
767 regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
768 regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
769
770 // This controls the heuristics used to select registers
771 // These can be combined.
772 enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
773 LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
774 LsraSelect getSelectionHeuristics()
775 {
776 return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
777 }
778 bool doReverseSelect()
779 {
780 return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
781 }
782 bool doReverseCallerCallee()
783 {
784 return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
785 }
786 bool doSelectNearest()
787 {
788 return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
789 }
790
791 // This controls the order in which basic blocks are visited during allocation
792 enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
793 LSRA_TRAVERSE_RANDOM = 0x60, // NYI
794 LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
795 LsraTraversalOrder getLsraTraversalOrder()
796 {
797 if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
798 {
799 return LSRA_TRAVERSE_DEFAULT;
800 }
801 return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
802 }
803 bool isTraversalLayoutOrder()
804 {
805 return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
806 }
807 bool isTraversalPredFirstOrder()
808 {
809 return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
810 }
811
812 // This controls whether lifetimes should be extended to the entire method.
813 // Note that this has no effect under MinOpts
814 enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
815 LsraExtendLifetimes getLsraExtendLifeTimes()
816 {
817 return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
818 }
819 bool extendLifetimes()
820 {
821 return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
822 }
823
824 // This controls whether variables locations should be set to the previous block in layout order
825 // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
826 // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
827 enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
828 LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
829 LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
830 {
831 return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
832 }
833 regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
834
835 // This controls whether we always insert a GT_RELOAD instruction after a spill
836 // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
837 enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
838 LsraReload getLsraReload()
839 {
840 return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
841 }
842 bool alwaysInsertReload()
843 {
844 return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
845 }
846
847 // This controls whether we spill everywhere
848 enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
849 LsraSpill getLsraSpill()
850 {
851 return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
852 }
853 bool spillAlways()
854 {
855 return getLsraSpill() == LSRA_SPILL_ALWAYS;
856 }
857
858 // This controls whether RefPositions that lower/codegen indicated as reg optional be
859 // allocated a reg at all.
860 enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
861 LSRA_REG_OPTIONAL_MASK = 0x1000};
862
863 LsraRegOptionalControl getLsraRegOptionalControl()
864 {
865 return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
866 }
867
868 bool regOptionalNoAlloc()
869 {
870 return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
871 }
872
873 bool candidatesAreStressLimited()
874 {
875 return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0);
876 }
877
878 // Dump support
879 void dumpNodeInfo(GenTree* node, regMaskTP dstCandidates, int srcCount, int dstCount);
880 void dumpDefList();
881 void lsraDumpIntervals(const char* msg);
882 void dumpRefPositions(const char* msg);
883 void dumpVarRefPositions(const char* msg);
884
885 // Checking code
886 static bool IsLsraAdded(GenTree* node)
887 {
888 return ((node->gtDebugFlags & GTF_DEBUG_NODE_LSRA_ADDED) != 0);
889 }
890 static void SetLsraAdded(GenTree* node)
891 {
892 node->gtDebugFlags |= GTF_DEBUG_NODE_LSRA_ADDED;
893 }
894 static bool IsResolutionMove(GenTree* node);
895 static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
896
897 void verifyFinalAllocation();
898 void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
899#else // !DEBUG
900 bool doSelectNearest()
901 {
902 return false;
903 }
904 bool extendLifetimes()
905 {
906 return false;
907 }
908 bool spillAlways()
909 {
910 return false;
911 }
912 // In a retail build we support only the default traversal order
913 bool isTraversalLayoutOrder()
914 {
915 return false;
916 }
917 bool isTraversalPredFirstOrder()
918 {
919 return true;
920 }
921 bool getLsraExtendLifeTimes()
922 {
923 return false;
924 }
925 static void SetLsraAdded(GenTree* node)
926 {
927 // do nothing; checked only under #DEBUG
928 }
929 bool candidatesAreStressLimited()
930 {
931 return false;
932 }
933#endif // !DEBUG
934
935public:
936 // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
937 bool isRegCandidate(LclVarDsc* varDsc);
938
939 bool isContainableMemoryOp(GenTree* node);
940
941private:
942 // Determine which locals are candidates for allocation
943 void identifyCandidates();
944
945 // determine which locals are used in EH constructs we don't want to deal with
946 void identifyCandidatesExceptionDataflow();
947
948 void buildPhysRegRecords();
949
950#ifdef DEBUG
951 void checkLastUses(BasicBlock* block);
952 static int ComputeOperandDstCount(GenTree* operand);
953 static int ComputeAvailableSrcCount(GenTree* node);
954#endif // DEBUG
955
956 void setFrameType();
957
958 // Update allocations at start/end of block
959 void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap);
960 void processBlockEndAllocation(BasicBlock* current);
961
962 // Record variable locations at start/end of block
963 void processBlockStartLocations(BasicBlock* current, bool allocationPass);
964 void processBlockEndLocations(BasicBlock* current);
965
966#ifdef _TARGET_ARM_
967 bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
968 RegRecord* getSecondHalfRegRec(RegRecord* regRec);
969 RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
970 bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
971 void unassignDoublePhysReg(RegRecord* doubleRegRecord);
972#endif
973 void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
974 void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType);
975 bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
976 bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
977 bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
978 bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
979 bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);
980
981 // insert refpositions representing prolog zero-inits which will be added later
982 void insertZeroInitRefPositions();
983
984 // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
985 void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
986
987 void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
988
989 void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
990
991#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
992 VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree,
993 LsraLocation currentLoc,
994 regMaskTP fpCalleeKillSet);
995 void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
996#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
997
998#if defined(UNIX_AMD64_ABI)
999 // For AMD64 on SystemV machines. This method
1000 // is called as replacement for raUpdateRegStateForArg
1001 // that is used on Windows. On System V systems a struct can be passed
1002 // partially using registers from the 2 register files.
1003 void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
1004#endif // defined(UNIX_AMD64_ABI)
1005
1006 // Update reg state for an incoming register argument
1007 void updateRegStateForArg(LclVarDsc* argDsc);
1008
1009 inline bool isCandidateLocalRef(GenTree* tree)
1010 {
1011 if (tree->IsLocal())
1012 {
1013 unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
1014 assert(lclNum < compiler->lvaCount);
1015 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
1016
1017 return isCandidateVar(varDsc);
1018 }
1019 return false;
1020 }
1021
1022 // Helpers for getKillSetForNode().
1023 regMaskTP getKillSetForStoreInd(GenTreeStoreInd* tree);
1024 regMaskTP getKillSetForShiftRotate(GenTreeOp* tree);
1025 regMaskTP getKillSetForMul(GenTreeOp* tree);
1026 regMaskTP getKillSetForCall(GenTreeCall* call);
1027 regMaskTP getKillSetForModDiv(GenTreeOp* tree);
1028 regMaskTP getKillSetForBlockStore(GenTreeBlk* blkNode);
1029 regMaskTP getKillSetForReturn();
1030 regMaskTP getKillSetForProfilerHook();
1031#ifdef FEATURE_HW_INTRINSICS
1032 regMaskTP getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node);
1033#endif // FEATURE_HW_INTRINSICS
1034
1035// Return the registers killed by the given tree node.
1036// This is used only for an assert, and for stress, so it is only defined under DEBUG.
1037// Otherwise, the Build methods should obtain the killMask from the appropriate method above.
1038#ifdef DEBUG
1039 regMaskTP getKillSetForNode(GenTree* tree);
1040#endif
1041
1042 // Given some tree node add refpositions for all the registers this node kills
1043 bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask);
1044
1045 regMaskTP allRegs(RegisterType rt);
1046 regMaskTP allByteRegs();
1047 regMaskTP allSIMDRegs();
1048 regMaskTP internalFloatRegCandidates();
1049
1050 bool registerIsFree(regNumber regNum, RegisterType regType);
1051 bool registerIsAvailable(RegRecord* physRegRecord,
1052 LsraLocation currentLoc,
1053 LsraLocation* nextRefLocationPtr,
1054 RegisterType regType);
1055 void freeRegister(RegRecord* physRegRecord);
1056 void freeRegisters(regMaskTP regsToFree);
1057
1058 // Get the type that this tree defines.
1059 var_types getDefType(GenTree* tree)
1060 {
1061 return tree->TypeGet();
1062 }
1063
1064 // Managing internal registers during the BuildNode process.
1065 RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP candidates);
1066 RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
1067 RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
1068 void buildInternalRegisterUses();
1069
1070 void resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition);
1071
1072 void insertMove(BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);
1073
1074 void insertSwap(
1075 BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2);
1076
1077public:
1078 // TODO-Cleanup: unused?
1079 class PhysRegIntervalIterator
1080 {
1081 public:
1082 PhysRegIntervalIterator(LinearScan* theLinearScan)
1083 {
1084 nextRegNumber = (regNumber)0;
1085 linearScan = theLinearScan;
1086 }
1087 RegRecord* GetNext()
1088 {
1089 return &linearScan->physRegs[nextRegNumber];
1090 }
1091
1092 private:
1093 // This assumes that the physical registers are contiguous, starting
1094 // with a register number of 0
1095 regNumber nextRegNumber;
1096 LinearScan* linearScan;
1097 };
1098
1099private:
1100 Interval* newInterval(RegisterType regType);
1101
1102 Interval* getIntervalForLocalVar(unsigned varIndex)
1103 {
1104 assert(varIndex < compiler->lvaTrackedCount);
1105 assert(localVarIntervals[varIndex] != nullptr);
1106 return localVarIntervals[varIndex];
1107 }
1108
1109 Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree)
1110 {
1111 LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum];
1112 assert(varDsc->lvTracked);
1113 return getIntervalForLocalVar(varDsc->lvVarIndex);
1114 }
1115
1116 RegRecord* getRegisterRecord(regNumber regNum);
1117
1118 RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
1119
1120 RefPosition* newRefPosition(Interval* theInterval,
1121 LsraLocation theLocation,
1122 RefType theRefType,
1123 GenTree* theTreeNode,
1124 regMaskTP mask,
1125 unsigned multiRegIdx = 0);
1126
1127 // This creates a RefTypeUse at currentLoc. It sets the treeNode to nullptr if it is not a
1128 // lclVar interval.
1129 RefPosition* newUseRefPosition(Interval* theInterval,
1130 GenTree* theTreeNode,
1131 regMaskTP mask,
1132 unsigned multiRegIdx = 0);
1133
1134 RefPosition* newRefPosition(
1135 regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
1136
1137 void applyCalleeSaveHeuristics(RefPosition* rp);
1138
1139 void checkConflictingDefUse(RefPosition* rp);
1140
1141 void associateRefPosWithInterval(RefPosition* rp);
1142
1143 unsigned getWeight(RefPosition* refPos);
1144
1145 /*****************************************************************************
1146 * Register management
1147 ****************************************************************************/
1148 RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
1149 regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
1150 regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
1151 regNumber assignCopyReg(RefPosition* refPosition);
1152
1153 bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition);
1154 bool isSpillCandidate(Interval* current,
1155 RefPosition* refPosition,
1156 RegRecord* physRegRecord,
1157 LsraLocation& nextLocation);
1158 void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
1159 void assignPhysReg(RegRecord* regRec, Interval* interval);
1160 void assignPhysReg(regNumber reg, Interval* interval)
1161 {
1162 assignPhysReg(getRegisterRecord(reg), interval);
1163 }
1164
1165 bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType));
1166 bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType));
1167 void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
1168 void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType));
1169 void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
1170 void unassignPhysRegNoSpill(RegRecord* reg);
1171 void unassignPhysReg(regNumber reg)
1172 {
1173 unassignPhysReg(getRegisterRecord(reg), nullptr);
1174 }
1175
1176 void setIntervalAsSpilled(Interval* interval);
1177 void setIntervalAsSplit(Interval* interval);
1178 void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
1179
1180 void spillGCRefs(RefPosition* killRefPosition);
1181
1182 /*****************************************************************************
1183 * For Resolution phase
1184 ****************************************************************************/
1185 // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling
1186 unsigned int regMapCount;
1187
1188 // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we
1189 // rely on the property that the "in" map is the same as the "from" block of the edge, and the
1190 // "out" map is the same as the "to" block of the edge (by construction).
1191 // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the
1192 // splitBBNumToTargetBBNumMap.
1193 // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding
1194 // the arrays.
1195
1196 unsigned bbNumMaxBeforeResolution;
1197 struct SplitEdgeInfo
1198 {
1199 unsigned fromBBNum;
1200 unsigned toBBNum;
1201 };
1202 typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo> SplitBBNumToTargetBBNumMap;
1203 SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
1204 SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
1205 {
1206 if (splitBBNumToTargetBBNumMap == nullptr)
1207 {
1208 splitBBNumToTargetBBNumMap =
1209 new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));
1210 }
1211 return splitBBNumToTargetBBNumMap;
1212 }
1213 SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
1214
1215 void initVarRegMaps();
1216 void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
1217 void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
1218 VarToRegMap getInVarToRegMap(unsigned int bbNum);
1219 VarToRegMap getOutVarToRegMap(unsigned int bbNum);
1220 void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);
1221 regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex);
1222 // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
1223 // the block)
1224 VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
1225
1226 regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);
1227
1228#ifdef DEBUG
1229 void dumpVarToRegMap(VarToRegMap map);
1230 void dumpInVarToRegMap(BasicBlock* block);
1231 void dumpOutVarToRegMap(BasicBlock* block);
1232
1233 // There are three points at which a tuple-style dump is produced, and each
1234 // differs slightly:
1235 // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what
1236 // tree nodes are consumed.
1237 // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before
1238 // register allocation, each node is dumped, along with all of the RefPositions,
1239 // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other
1240 // intervals, and Tnnn for internal temps.
1241 // - In LSRA_DUMP_POST, which is after register allocation, the registers are
1242 // shown.
1243
1244 enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
1245 void lsraGetOperandString(GenTree* tree, LsraTupleDumpMode mode, char* operandString, unsigned operandStringLength);
1246 void lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest);
1247 void DumpOperandDefs(
1248 GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength);
1249 void TupleStyleDump(LsraTupleDumpMode mode);
1250
1251 LsraLocation maxNodeLocation;
1252
1253 // Width of various fields - used to create a streamlined dump during allocation that shows the
1254 // state of all the registers in columns.
1255 int regColumnWidth;
1256 int regTableIndent;
1257
1258 const char* columnSeparator;
1259 const char* line;
1260 const char* leftBox;
1261 const char* middleBox;
1262 const char* rightBox;
1263
1264 static const int MAX_FORMAT_CHARS = 12;
1265 char intervalNameFormat[MAX_FORMAT_CHARS];
1266 char regNameFormat[MAX_FORMAT_CHARS];
1267 char shortRefPositionFormat[MAX_FORMAT_CHARS];
1268 char emptyRefPositionFormat[MAX_FORMAT_CHARS];
1269 char indentFormat[MAX_FORMAT_CHARS];
1270 static const int MAX_LEGEND_FORMAT_CHARS = 25;
1271 char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];
1272 char legendFormat[MAX_LEGEND_FORMAT_CHARS];
1273
1274 // How many rows have we printed since last printing a "title row"?
1275 static const int MAX_ROWS_BETWEEN_TITLES = 50;
1276 int rowCountSinceLastTitle;
1277 // Current mask of registers being printed in the dump.
1278 regMaskTP lastDumpedRegisters;
1279 regMaskTP registersToDump;
1280 int lastUsedRegNumIndex;
1281 bool shouldDumpReg(regNumber regNum)
1282 {
1283 return (registersToDump & genRegMask(regNum)) != 0;
1284 }
1285
1286 void dumpRegRecordHeader();
1287 void dumpRegRecordTitle();
1288 void dumpRegRecordTitleIfNeeded();
1289 void dumpRegRecordTitleLines();
1290 void dumpRegRecords();
1291 // An abbreviated RefPosition dump for printing with column-based register state
1292 void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);
1293 // Print the number of spaces occupied by a dumpRefPositionShort()
1294 void dumpEmptyRefPosition();
1295 // A dump of Referent, in exactly regColumnWidth characters
1296 void dumpIntervalName(Interval* interval);
1297
1298 // Events during the allocation phase that cause some dump output
1299 enum LsraDumpEvent{
1300 // Conflicting def/use
1301 LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
1302 LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
1303
1304 // Spilling
1305 LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
1306 LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,
1307
1308 // Block boundaries
1309 LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
1310
1311 // Miscellaneous
1312 LSRA_EVENT_FREE_REGS,
1313
1314 // Characteristics of the current RefPosition
1315 LSRA_EVENT_INCREMENT_RANGE_END, // ???
1316 LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
1317
1318 // Allocation decisions
1319 LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
1320 LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
1321 LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
1322 LSRA_EVENT_REUSE_REG,
1323 };
1324 void dumpLsraAllocationEvent(LsraDumpEvent event,
1325 Interval* interval = nullptr,
1326 regNumber reg = REG_NA,
1327 BasicBlock* currentBlock = nullptr);
1328
1329 void validateIntervals();
1330#endif // DEBUG
1331
1332#if TRACK_LSRA_STATS
1333 enum LsraStat{
1334 LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,
1335 };
1336
1337 unsigned regCandidateVarCount;
1338 void updateLsraStat(LsraStat stat, unsigned currentBBNum);
1339
1340 void dumpLsraStats(FILE* file);
1341
1342#define INTRACK_STATS(x) x
1343#else // !TRACK_LSRA_STATS
1344#define INTRACK_STATS(x)
1345#endif // !TRACK_LSRA_STATS
1346
1347 Compiler* compiler;
1348
1349private:
1350 CompAllocator getAllocator(Compiler* comp)
1351 {
1352 return comp->getAllocator(CMK_LSRA);
1353 }
1354
1355#ifdef DEBUG
1356 // This is used for dumping
1357 RefPosition* activeRefPosition;
1358#endif // DEBUG
1359
1360 IntervalList intervals;
1361
1362 RegRecord physRegs[REG_COUNT];
1363
1364 // Map from tracked variable index to Interval*.
1365 Interval** localVarIntervals;
1366
1367 // Set of blocks that have been visited.
1368 BlockSet bbVisitedSet;
1369 void markBlockVisited(BasicBlock* block)
1370 {
1371 BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
1372 }
1373 void clearVisitedBlocks()
1374 {
1375 BlockSetOps::ClearD(compiler, bbVisitedSet);
1376 }
1377 bool isBlockVisited(BasicBlock* block)
1378 {
1379 return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
1380 }
1381
1382#if DOUBLE_ALIGN
1383 bool doDoubleAlign;
1384#endif
1385
1386 // A map from bbNum to the block information used during register allocation.
1387 LsraBlockInfo* blockInfo;
1388 BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
1389
1390 // The order in which the blocks will be allocated.
1391 // This is any array of BasicBlock*, in the order in which they should be traversed.
1392 BasicBlock** blockSequence;
1393 // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
1394 // included in the blockSeuqence above, during setBlockSequence().
1395 bool verifiedAllBBs;
1396 void setBlockSequence();
1397 int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
1398 BasicBlockList* blockSequenceWorkList;
1399 bool blockSequencingDone;
1400 void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
1401 void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
1402 BasicBlock* getNextCandidateFromWorkList();
1403
1404 // The bbNum of the block being currently allocated or resolved.
1405 unsigned int curBBNum;
1406 // The current location
1407 LsraLocation currentLoc;
1408 // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).
1409 unsigned int curBBSeqNum;
1410 // The number of blocks that we've sequenced.
1411 unsigned int bbSeqCount;
1412 // The Location of the start of the current block.
1413 LsraLocation curBBStartLocation;
1414 // True if the method contains any critical edges.
1415 bool hasCriticalEdges;
1416
1417 // True if there are any register candidate lclVars available for allocation.
1418 bool enregisterLocalVars;
1419
1420 virtual bool willEnregisterLocalVars() const
1421 {
1422 return enregisterLocalVars;
1423 }
1424
1425 // Ordered list of RefPositions
1426 RefPositionList refPositions;
1427
1428 // Per-block variable location mappings: an array indexed by block number that yields a
1429 // pointer to an array of regNumber, one per variable.
1430 VarToRegMap* inVarToRegMaps;
1431 VarToRegMap* outVarToRegMaps;
1432
1433 // A temporary VarToRegMap used during the resolution of critical edges.
1434 VarToRegMap sharedCriticalVarToRegMap;
1435
1436 PhasedVar<regMaskTP> availableIntRegs;
1437 PhasedVar<regMaskTP> availableFloatRegs;
1438 PhasedVar<regMaskTP> availableDoubleRegs;
1439
1440 // The set of all register candidates. Note that this may be a subset of tracked vars.
1441 VARSET_TP registerCandidateVars;
1442 // Current set of live register candidate vars, used during building of RefPositions to determine
1443 // whether to preference to callee-save.
1444 VARSET_TP currentLiveVars;
1445 // Set of variables that may require resolution across an edge.
1446 // This is first constructed during interval building, to contain all the lclVars that are live at BB edges.
1447 // Then, any lclVar that is always in the same register is removed from the set.
1448 VARSET_TP resolutionCandidateVars;
1449 // This set contains all the lclVars that are ever spilled or split.
1450 VARSET_TP splitOrSpilledVars;
1451 // Set of floating point variables to consider for callee-save registers.
1452 VARSET_TP fpCalleeSaveCandidateVars;
1453#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1454#if defined(_TARGET_AMD64_)
1455 static bool varTypeNeedsPartialCalleeSave(var_types type)
1456 {
1457 return (emitTypeSize(type) == 32);
1458 }
1459 static const var_types LargeVectorSaveType = TYP_SIMD16;
1460#elif defined(_TARGET_ARM64_)
1461 static bool varTypeNeedsPartialCalleeSave(var_types type)
1462 {
1463 // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
1464 // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes.
1465 return (emitTypeSize(type) == 16);
1466 }
1467 static const var_types LargeVectorSaveType = TYP_DOUBLE;
1468#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1469#error("Unknown target architecture for FEATURE_SIMD")
1470#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1471
1472 // Set of large vector (TYP_SIMD32 on AVX) variables.
1473 VARSET_TP largeVectorVars;
1474 // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
1475 VARSET_TP largeVectorCalleeSaveCandidateVars;
1476#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1477
1478 //-----------------------------------------------------------------------
1479 // Build methods
1480 //-----------------------------------------------------------------------
1481
1482 // The listNodePool is used to maintain the RefInfo for nodes that are "in flight"
1483 // i.e. whose consuming node has not yet been handled.
1484 RefInfoListNodePool listNodePool;
1485
1486 // The defList is used for the transient RefInfo that is computed by
1487 // the Build methods, and used in building RefPositions.
1488 // When Def RefPositions are built for a node, their NodeInfo is placed
1489 // in the defList. As the consuming node is handled, it moves the NodeInfo
1490 // into an ordered useList corresponding to the uses for that node.
1491 RefInfoList defList;
1492
1493 // As we build uses, we may want to preference the next definition (i.e. the register produced
1494 // by the current node) to the same register as one of its uses. This is done by setting
1495 // 'tgtPrefUse' to that RefPosition.
1496 RefPosition* tgtPrefUse = nullptr;
1497
1498 // The following keep track of information about internal (temporary register) intervals
1499 // during the building of a single node.
1500 static const int MaxInternalCount = 4;
1501 RefPosition* internalDefs[MaxInternalCount];
1502 int internalCount = 0;
1503 bool setInternalRegsDelayFree;
1504
1505 // When a RefTypeUse is marked as 'delayRegFree', we also want to mark the RefTypeDef
1506 // in the next Location as 'hasInterferingUses'. This is accomplished by setting this
1507 // 'pendingDelayFree' to true as they are created, and clearing it as a new node is
1508 // handled in 'BuildNode'.
1509 bool pendingDelayFree;
1510
1511 // This method clears the "build state" before starting to handle a new node.
1512 void clearBuildState()
1513 {
1514 tgtPrefUse = nullptr;
1515 internalCount = 0;
1516 setInternalRegsDelayFree = false;
1517 pendingDelayFree = false;
1518 }
1519
1520 RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0);
1521
1522 void setDelayFree(RefPosition* use);
1523 int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
1524#ifdef _TARGET_XARCH_
1525 int BuildRMWUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
1526#endif // !_TARGET_XARCH_
1527 // This is the main entry point for building the RefPositions for a node.
1528 // These methods return the number of sources.
1529 int BuildNode(GenTree* stmt);
1530
1531 GenTree* getTgtPrefOperand(GenTreeOp* tree);
1532 bool supportsSpecialPutArg();
1533
1534 int BuildSimple(GenTree* tree);
1535 int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
1536 int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE);
1537 int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
1538 void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
1539 RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0);
1540 void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE);
1541 void BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask);
1542
1543 int BuildStoreLoc(GenTree* tree);
1544 int BuildReturn(GenTree* tree);
1545#ifdef _TARGET_XARCH_
1546 // This method, unlike the others, returns the number of sources, since it may be called when
1547 // 'tree' is contained.
1548 int BuildShiftRotate(GenTree* tree);
1549#endif // _TARGET_XARCH_
1550#ifdef _TARGET_ARM_
1551 int BuildShiftLongCarry(GenTree* tree);
1552#endif
1553 int BuildPutArgReg(GenTreeUnOp* node);
1554 int BuildCall(GenTreeCall* call);
1555 int BuildCmp(GenTree* tree);
1556 int BuildBlockStore(GenTreeBlk* blkNode);
1557 int BuildModDiv(GenTree* tree);
1558 int BuildIntrinsic(GenTree* tree);
1559 int BuildStoreLoc(GenTreeLclVarCommon* tree);
1560 int BuildIndir(GenTreeIndir* indirTree);
1561 int BuildGCWriteBarrier(GenTree* tree);
1562 int BuildCast(GenTreeCast* cast);
1563
1564#if defined(_TARGET_XARCH_)
1565 // returns true if the tree can use the read-modify-write memory instruction form
1566 bool isRMWRegOper(GenTree* tree);
1567 int BuildMul(GenTree* tree);
1568 void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
1569 // Move the last use bit, if any, from 'fromTree' to 'toTree'; 'fromTree' must be contained.
1570 void CheckAndMoveRMWLastUse(GenTree* fromTree, GenTree* toTree)
1571 {
1572 // If 'fromTree' is not a last-use lclVar, there's nothing to do.
1573 if ((fromTree == nullptr) || !fromTree->OperIs(GT_LCL_VAR) || ((fromTree->gtFlags & GTF_VAR_DEATH) == 0))
1574 {
1575 return;
1576 }
1577 // If 'fromTree' was a lclVar, it must be contained and 'toTree' must match.
1578 if (!fromTree->isContained() || (toTree == nullptr) || !toTree->OperIs(GT_LCL_VAR) ||
1579 (toTree->AsLclVarCommon()->gtLclNum != toTree->AsLclVarCommon()->gtLclNum))
1580 {
1581 assert(!"Unmatched RMW indirections");
1582 return;
1583 }
1584 // This is probably not necessary, but keeps things consistent.
1585 fromTree->gtFlags &= ~GTF_VAR_DEATH;
1586 if (toTree != nullptr) // Just to be conservative
1587 {
1588 toTree->gtFlags |= GTF_VAR_DEATH;
1589 }
1590 }
1591#endif // defined(_TARGET_XARCH_)
1592
1593#ifdef FEATURE_SIMD
1594 int BuildSIMD(GenTreeSIMD* tree);
1595#endif // FEATURE_SIMD
1596
1597#ifdef FEATURE_HW_INTRINSICS
1598 int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree);
1599#endif // FEATURE_HW_INTRINSICS
1600
1601 int BuildPutArgStk(GenTreePutArgStk* argNode);
1602#if FEATURE_ARG_SPLIT
1603 int BuildPutArgSplit(GenTreePutArgSplit* tree);
1604#endif // FEATURE_ARG_SPLIT
1605 int BuildLclHeap(GenTree* tree);
1606};
1607
1608/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1609XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1610XX XX
1611XX Interval XX
1612XX XX
1613XX This is the fundamental data structure for linear scan register XX
1614XX allocation. It represents the live range(s) for a variable or temp. XX
1615XX XX
1616XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1617XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1618*/
1619
1620class Interval : public Referenceable
1621{
1622public:
1623 Interval(RegisterType registerType, regMaskTP registerPreferences)
1624 : registerPreferences(registerPreferences)
1625 , relatedInterval(nullptr)
1626 , assignedReg(nullptr)
1627 , registerType(registerType)
1628 , isLocalVar(false)
1629 , isSplit(false)
1630 , isSpilled(false)
1631 , isInternal(false)
1632 , isStructField(false)
1633 , isPromotedStruct(false)
1634 , hasConflictingDefUse(false)
1635 , hasInterferingUses(false)
1636 , isSpecialPutArg(false)
1637 , preferCalleeSave(false)
1638 , isConstant(false)
1639 , physReg(REG_COUNT)
1640#ifdef DEBUG
1641 , intervalIndex(0)
1642#endif
1643 , varNum(0)
1644 {
1645 }
1646
1647#ifdef DEBUG
1648 // print out representation
1649 void dump();
1650 // concise representation for embedding
1651 void tinyDump();
1652 // extremely concise representation
1653 void microDump();
1654#endif // DEBUG
1655
1656 void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l);
1657
1658 // Fixed registers for which this Interval has a preference
1659 regMaskTP registerPreferences;
1660
1661 // The relatedInterval is:
1662 // - for any other interval, it is the interval to which this interval
1663 // is currently preferenced (e.g. because they are related by a copy)
1664 Interval* relatedInterval;
1665
1666 // The assignedReg is the RecRecord for the register to which this interval
1667 // has been assigned at some point - if the interval is active, this is the
1668 // register it currently occupies.
1669 RegRecord* assignedReg;
1670
1671 // DECIDE : put this in a union or do something w/ inheritance?
1672 // this is an interval for a physical register, not a allocatable entity
1673
1674 RegisterType registerType;
1675 bool isLocalVar : 1;
1676 // Indicates whether this interval has been assigned to different registers
1677 bool isSplit : 1;
1678 // Indicates whether this interval is ever spilled
1679 bool isSpilled : 1;
1680 // indicates an interval representing the internal requirements for
1681 // generating code for a node (temp registers internal to the node)
1682 // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND
1683 // case (though never lives beyond a stmt)
1684 bool isInternal : 1;
1685 // true if this is a LocalVar for a struct field
1686 bool isStructField : 1;
1687 // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct
1688 bool isPromotedStruct : 1;
1689 // true if this is an SDSU interval for which the def and use have conflicting register
1690 // requirements
1691 bool hasConflictingDefUse : 1;
1692 // true if this interval's defining node has "delayRegFree" uses, either due to it being an RMW instruction,
1693 // OR because it requires an internal register that differs from the target.
1694 bool hasInterferingUses : 1;
1695
1696 // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
1697 // During allocation, this flag will be cleared if the source is not already in the required register.
1698 // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
1699 // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
1700 bool isSpecialPutArg : 1;
1701
1702 // True if this interval interferes with a call.
1703 bool preferCalleeSave : 1;
1704
1705 // True if this interval is defined by a constant node that may be reused and/or may be
1706 // able to reuse a constant that's already in a register.
1707 bool isConstant : 1;
1708
1709 // The register to which it is currently assigned.
1710 regNumber physReg;
1711
1712#ifdef DEBUG
1713 unsigned int intervalIndex;
1714#endif // DEBUG
1715
1716 unsigned int varNum; // This is the "variable number": the index into the lvaTable array
1717
1718 LclVarDsc* getLocalVar(Compiler* comp)
1719 {
1720 assert(isLocalVar);
1721 return &(comp->lvaTable[this->varNum]);
1722 }
1723
1724 // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.
1725 unsigned getVarIndex(Compiler* comp)
1726 {
1727 LclVarDsc* varDsc = getLocalVar(comp);
1728 assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!
1729 return varDsc->lvVarIndex;
1730 }
1731
1732 bool isAssignedTo(regNumber regNum)
1733 {
1734 // This uses regMasks to handle the case where a double actually occupies two registers
1735 // TODO-Throughput: This could/should be done more cheaply.
1736 return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);
1737 }
1738
1739 // Assign the related interval.
1740 void assignRelatedInterval(Interval* newRelatedInterval)
1741 {
1742#ifdef DEBUG
1743 if (VERBOSE)
1744 {
1745 printf("Assigning related ");
1746 newRelatedInterval->microDump();
1747 printf(" to ");
1748 this->microDump();
1749 printf("\n");
1750 }
1751#endif // DEBUG
1752 relatedInterval = newRelatedInterval;
1753 }
1754
1755 // Assign the related interval, but only if it isn't already assigned.
1756 void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)
1757 {
1758 if (relatedInterval == nullptr)
1759 {
1760 assignRelatedInterval(newRelatedInterval);
1761 }
1762 else
1763 {
1764#ifdef DEBUG
1765 if (VERBOSE)
1766 {
1767 printf("Interval ");
1768 this->microDump();
1769 printf(" already has a related interval\n");
1770 }
1771#endif // DEBUG
1772 }
1773 }
1774
1775 // Update the registerPreferences on the interval.
1776 // If there are conflicting requirements on this interval, set the preferences to
1777 // the union of them. That way maybe we'll get at least one of them.
1778 // An exception is made in the case where one of the existing or new
1779 // preferences are all callee-save, in which case we "prefer" the callee-save
1780
1781 void updateRegisterPreferences(regMaskTP preferences)
1782 {
1783 // We require registerPreferences to have been initialized.
1784 assert(registerPreferences != RBM_NONE);
1785 // It is invalid to update with empty preferences
1786 assert(preferences != RBM_NONE);
1787
1788 regMaskTP commonPreferences = (registerPreferences & preferences);
1789 if (commonPreferences != RBM_NONE)
1790 {
1791 registerPreferences = commonPreferences;
1792 return;
1793 }
1794
1795 // There are no preferences in common.
1796 // Preferences need to reflect both cases where a var must occupy a specific register,
1797 // as well as cases where a var is live when a register is killed.
1798 // In the former case, we would like to record all such registers, however we don't
1799 // really want to use any registers that will interfere.
1800 // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.
1801
1802 if (!genMaxOneBit(preferences))
1803 {
1804 // The new preference value is a multi-reg set, so it's probably a kill.
1805 // Keep the new value.
1806 registerPreferences = preferences;
1807 return;
1808 }
1809
1810 if (!genMaxOneBit(registerPreferences))
1811 {
1812 // The old preference value is a multi-reg set.
1813 // Keep the existing preference set, as it probably reflects one or more kills.
1814 // It may have been a union of multiple individual registers, but we can't
1815 // distinguish that case without extra cost.
1816 return;
1817 }
1818
1819 // If we reach here, we have two disjoint single-reg sets.
1820 // Keep only the callee-save preferences, if not empty.
1821 // Otherwise, take the union of the preferences.
1822
1823 regMaskTP newPreferences = registerPreferences | preferences;
1824
1825 if (preferCalleeSave)
1826 {
1827 regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));
1828 if (calleeSaveMask != RBM_NONE)
1829 {
1830 newPreferences = calleeSaveMask;
1831 }
1832 }
1833 registerPreferences = newPreferences;
1834 }
1835};
1836
1837class RefPosition
1838{
1839public:
1840 // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one
1841 // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it
1842 // refers to an Interval, then 'isPhysRegRef' is false.
1843 //
1844 // Q: can 'referent' be NULL?
1845
1846 Referenceable* referent;
1847
1848 // nextRefPosition is the next in code order.
1849 // Note that in either case there is no need for these to be doubly linked, as they
1850 // are only traversed in the forward direction, and are not moved.
1851 RefPosition* nextRefPosition;
1852
1853 // The remaining fields are common to both options
1854 GenTree* treeNode;
1855 unsigned int bbNum;
1856
1857 // Prior to the allocation pass, registerAssignment captures the valid registers
1858 // for this RefPosition. An empty set means that any register is valid. A non-empty
1859 // set means that it must be one of the given registers (may be the full set if the
1860 // only constraint is that it must reside in SOME register)
1861 // After the allocation pass, this contains the actual assignment
1862 LsraLocation nodeLocation;
1863 regMaskTP registerAssignment;
1864
1865 RefType refType;
1866
1867 // NOTE: C++ only packs bitfields if the base type is the same. So make all the base
1868 // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match
1869 // NOTE: RefType that precedes this, and multiRegIdx can also match.
1870
1871 // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the
1872 // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if
1873 // no reg is allocated.
1874 unsigned char allocRegIfProfitable : 1;
1875
1876 // Used by RefTypeDef/Use positions of a multi-reg call node.
1877 // Indicates the position of the register that this ref position refers to.
1878 // The max bits needed is based on max value of MAX_RET_REG_COUNT value
1879 // across all targets and that happens 4 on on Arm. Hence index value
1880 // would be 0..MAX_RET_REG_COUNT-1.
1881 unsigned char multiRegIdx : 2;
1882
1883 // Last Use - this may be true for multiple RefPositions in the same Interval
1884 unsigned char lastUse : 1;
1885
1886 // Spill and Copy info
1887 // reload indicates that the value was spilled, and must be reloaded here.
1888 // spillAfter indicates that the value is spilled here, so a spill must be added.
1889 // copyReg indicates that the value needs to be copied to a specific register,
1890 // but that it will also retain its current assigned register.
1891 // moveReg indicates that the value needs to be moved to a different register,
1892 // and that this will be its new assigned register.
1893 // A RefPosition may have any flag individually or the following combinations:
1894 // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg
1895 // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)
1896 // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)
1897 // - spillAfter and moveReg (i.e. it most be both spilled and moved)
1898 // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going
1899 // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to
1900 // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting
1901 // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then
1902 // we need an explicit move.
1903 // - copyReg and moveReg must not exist with each other.
1904
1905 unsigned char reload : 1;
1906 unsigned char spillAfter : 1;
1907 unsigned char copyReg : 1;
1908 unsigned char moveReg : 1; // true if this var is moved to a new register
1909
1910 unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
1911 unsigned char isFixedRegRef : 1;
1912 unsigned char isLocalDefUse : 1;
1913
1914 // delayRegFree indicates that the register should not be freed right away, but instead wait
1915 // until the next Location after it would normally be freed. This is used for the case of
1916 // non-commutative binary operators, where op2 must not be assigned the same register as
1917 // the target. We do this by not freeing it until after the target has been defined.
1918 // Another option would be to actually change the Location of the op2 use until the same
1919 // Location as the def, but then it could potentially reuse a register that has been freed
1920 // from the other source(s), e.g. if it's a lastUse or spilled.
1921 unsigned char delayRegFree : 1;
1922
1923 // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the
1924 // register currently assigned to the Interval. This happens when we use the assigned
1925 // register from a predecessor that is not the most recently allocated BasicBlock.
1926 unsigned char outOfOrder : 1;
1927
1928#ifdef DEBUG
1929 // Minimum number registers that needs to be ensured while
1930 // constraining candidates for this ref position under
1931 // LSRA stress.
1932 unsigned minRegCandidateCount;
1933
1934 // The unique RefPosition number, equal to its index in the
1935 // refPositions list. Only used for debugging dumps.
1936 unsigned rpNum;
1937#endif // DEBUG
1938
1939 RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
1940 : referent(nullptr)
1941 , nextRefPosition(nullptr)
1942 , treeNode(treeNode)
1943 , bbNum(bbNum)
1944 , nodeLocation(nodeLocation)
1945 , registerAssignment(RBM_NONE)
1946 , refType(refType)
1947 , multiRegIdx(0)
1948 , lastUse(false)
1949 , reload(false)
1950 , spillAfter(false)
1951 , copyReg(false)
1952 , moveReg(false)
1953 , isPhysRegRef(false)
1954 , isFixedRegRef(false)
1955 , isLocalDefUse(false)
1956 , delayRegFree(false)
1957 , outOfOrder(false)
1958#ifdef DEBUG
1959 , minRegCandidateCount(1)
1960 , rpNum(0)
1961#endif
1962 {
1963 }
1964
1965 Interval* getInterval()
1966 {
1967 assert(!isPhysRegRef);
1968 return (Interval*)referent;
1969 }
1970 void setInterval(Interval* i)
1971 {
1972 referent = i;
1973 isPhysRegRef = false;
1974 }
1975
1976 RegRecord* getReg()
1977 {
1978 assert(isPhysRegRef);
1979 return (RegRecord*)referent;
1980 }
1981 void setReg(RegRecord* r)
1982 {
1983 referent = r;
1984 isPhysRegRef = true;
1985 registerAssignment = genRegMask(r->regNum);
1986 }
1987
1988 regNumber assignedReg()
1989 {
1990 if (registerAssignment == RBM_NONE)
1991 {
1992 return REG_NA;
1993 }
1994
1995 return genRegNumFromMask(registerAssignment);
1996 }
1997
1998 // Returns true if it is a reference on a gentree node.
1999 bool IsActualRef()
2000 {
2001 return (refType == RefTypeDef || refType == RefTypeUse);
2002 }
2003
2004 bool RequiresRegister()
2005 {
2006 return (IsActualRef()
2007#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2008 || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
2009#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2010 ) &&
2011 !AllocateIfProfitable();
2012 }
2013
2014 void setAllocateIfProfitable(bool val)
2015 {
2016 allocRegIfProfitable = val;
2017 }
2018
2019 // Returns true whether this ref position is to be allocated
2020 // a reg only if it is profitable.
2021 bool AllocateIfProfitable()
2022 {
2023 // TODO-CQ: Right now if a ref position is marked as
2024 // copyreg or movereg, then it is not treated as
2025 // 'allocate if profitable'. This is an implementation
2026 // limitation that needs to be addressed.
2027 return allocRegIfProfitable && !copyReg && !moveReg;
2028 }
2029
2030 void setMultiRegIdx(unsigned idx)
2031 {
2032 multiRegIdx = idx;
2033 assert(multiRegIdx == idx);
2034 }
2035
2036 unsigned getMultiRegIdx()
2037 {
2038 return multiRegIdx;
2039 }
2040
2041 LsraLocation getRefEndLocation()
2042 {
2043 return delayRegFree ? nodeLocation + 1 : nodeLocation;
2044 }
2045
2046 bool isIntervalRef()
2047 {
2048 return (!isPhysRegRef && (referent != nullptr));
2049 }
2050
2051 // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
2052 // interval
2053 bool isTrueDef()
2054 {
2055 return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);
2056 }
2057
2058 // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
2059 // specified by the given mask
2060 bool isFixedRefOfRegMask(regMaskTP regMask)
2061 {
2062 assert(genMaxOneBit(regMask));
2063 return (registerAssignment == regMask);
2064 }
2065
2066 // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register
2067 bool isFixedRefOfReg(regNumber regNum)
2068 {
2069 return (isFixedRefOfRegMask(genRegMask(regNum)));
2070 }
2071
2072#ifdef DEBUG
2073 // operator= copies everything except 'rpNum', which must remain unique
2074 RefPosition& operator=(const RefPosition& rp)
2075 {
2076 unsigned rpNumSave = rpNum;
2077 memcpy(this, &rp, sizeof(rp));
2078 rpNum = rpNumSave;
2079 return *this;
2080 }
2081
2082 void dump();
2083#endif // DEBUG
2084};
2085
2086#ifdef DEBUG
2087void dumpRegMask(regMaskTP regs);
2088#endif // DEBUG
2089
2090/*****************************************************************************/
2091#endif //_LSRA_H_
2092/*****************************************************************************/
2093