1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX Interval and RefPosition Building XX
9XX XX
10XX This contains the logic for constructing Intervals and RefPositions that XX
11XX is common across architectures. See lsra{arch}.cpp for the architecture- XX
12XX specific methods for building. XX
13XX XX
14XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16*/
17
18#include "jitpch.h"
19#ifdef _MSC_VER
20#pragma hdrstop
21#endif
22
23#include "lsra.h"
24
25//------------------------------------------------------------------------
26// RefInfoList
27//------------------------------------------------------------------------
28// removeListNode - retrieve the RefInfoListNode for the given GenTree node
29//
30// Notes:
31// The BuildNode methods use this helper to retrieve the RefPositions for child nodes
32// from the useList being constructed. Note that, if the user knows the order of the operands,
33// it is expected that they should just retrieve them directly.
34
35RefInfoListNode* RefInfoList::removeListNode(GenTree* node)
36{
37 RefInfoListNode* prevListNode = nullptr;
38 for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
39 {
40 if (listNode->treeNode == node)
41 {
42 assert(listNode->ref->getMultiRegIdx() == 0);
43 return removeListNode(listNode, prevListNode);
44 }
45 prevListNode = listNode;
46 }
47 assert(!"removeListNode didn't find the node");
48 unreached();
49}
50
51//------------------------------------------------------------------------
52// removeListNode - retrieve the RefInfoListNode for one reg of the given multireg GenTree node
53//
54// Notes:
55// The BuildNode methods use this helper to retrieve the RefPositions for child nodes
56// from the useList being constructed. Note that, if the user knows the order of the operands,
57// it is expected that they should just retrieve them directly.
58
59RefInfoListNode* RefInfoList::removeListNode(GenTree* node, unsigned multiRegIdx)
60{
61 RefInfoListNode* prevListNode = nullptr;
62 for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
63 {
64 if ((listNode->treeNode == node) && (listNode->ref->getMultiRegIdx() == multiRegIdx))
65 {
66 return removeListNode(listNode, prevListNode);
67 }
68 prevListNode = listNode;
69 }
70 assert(!"removeListNode didn't find the node");
71 unreached();
72}
73
74//------------------------------------------------------------------------
75// RefInfoListNodePool::RefInfoListNodePool:
76// Creates a pool of `RefInfoListNode` values.
77//
78// Arguments:
79// compiler - The compiler context.
80// preallocate - The number of nodes to preallocate.
81//
82RefInfoListNodePool::RefInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler)
83{
84 if (preallocate > 0)
85 {
86 RefInfoListNode* preallocatedNodes = compiler->getAllocator(CMK_LSRA).allocate<RefInfoListNode>(preallocate);
87
88 RefInfoListNode* head = preallocatedNodes;
89 head->m_next = nullptr;
90
91 for (unsigned i = 1; i < preallocate; i++)
92 {
93 RefInfoListNode* node = &preallocatedNodes[i];
94 node->m_next = head;
95 head = node;
96 }
97
98 m_freeList = head;
99 }
100}
101
102//------------------------------------------------------------------------
103// RefInfoListNodePool::GetNode: Fetches an unused node from the
104// pool.
105//
106// Arguments:
107// l - - The `LsraLocation` for the `RefInfo` value.
108// i - The interval for the `RefInfo` value.
109// t - The IR node for the `RefInfo` value
110// regIdx - The register index for the `RefInfo` value.
111//
112// Returns:
113// A pooled or newly-allocated `RefInfoListNode`, depending on the
114// contents of the pool.
115RefInfoListNode* RefInfoListNodePool::GetNode(RefPosition* r, GenTree* t, unsigned regIdx)
116{
117 RefInfoListNode* head = m_freeList;
118 if (head == nullptr)
119 {
120 head = m_compiler->getAllocator(CMK_LSRA).allocate<RefInfoListNode>(1);
121 }
122 else
123 {
124 m_freeList = head->m_next;
125 }
126
127 head->ref = r;
128 head->treeNode = t;
129 head->m_next = nullptr;
130
131 return head;
132}
133
134//------------------------------------------------------------------------
135// RefInfoListNodePool::ReturnNode: Returns a list of nodes to the node
136// pool and clears the given list.
137//
138// Arguments:
139// list - The list to return.
140//
141void RefInfoListNodePool::ReturnNode(RefInfoListNode* listNode)
142{
143 listNode->m_next = m_freeList;
144 m_freeList = listNode;
145}
146
147//------------------------------------------------------------------------
148// newInterval: Create a new Interval of the given RegisterType.
149//
150// Arguments:
151// theRegisterType - The type of Interval to create.
152//
153// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately
154// set such fields as isStructField
155//
156Interval* LinearScan::newInterval(RegisterType theRegisterType)
157{
158 intervals.emplace_back(theRegisterType, allRegs(theRegisterType));
159 Interval* newInt = &intervals.back();
160
161#ifdef DEBUG
162 newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1);
163#endif // DEBUG
164
165 DBEXEC(VERBOSE, newInt->dump());
166 return newInt;
167}
168
169//------------------------------------------------------------------------
170// newRefPositionRaw: Create a new RefPosition
171//
172// Arguments:
173// nodeLocation - The location of the reference.
174// treeNode - The GenTree of the reference.
175// refType - The type of reference
176//
177// Notes:
178// This is used to create RefPositions for both RegRecords and Intervals,
179// so it does only the common initialization.
180//
181RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
182{
183 refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType);
184 RefPosition* newRP = &refPositions.back();
185#ifdef DEBUG
186 newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1);
187#endif // DEBUG
188 return newRP;
189}
190
191//------------------------------------------------------------------------
192// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use
193// register requirements on a single-def, single-use interval.
194//
195// Arguments:
196// defRefPosition - The interval definition
197// useRefPosition - The (sole) interval use
198//
199// Return Value:
200// None.
201//
202// Assumptions:
203// The two RefPositions are for the same interval, which is a tree-temp.
204//
205// Notes:
206// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg.
207// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that,
208// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs
209// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4
210// below.
211// Here are the cases we consider (in this order):
212// 1. If The defRefPosition specifies a single register, and there are no conflicting
213// FixedReg uses of it between the def and use, we use that register, and the code generator
214// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def.
215// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no
216// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator
217// will insert the copy.
218// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined
219// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register),
220/// we set the register requirements on the defRefPosition to the use registers, and the
221// code generator will insert a copy on the def. We can't rely on the code generator to put a copy
222// on the use if it has multiple possible candidates, as it won't know which one has been allocated.
223// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register
224// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set
225// the useRefPosition to the def registers, for similar reasons to case #3.
226// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts,
227// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are
228// single registers, codegen can insert the copy.
229// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but
230// has a conflict), we must insert a copy. The copy will be inserted before the use if the
231// use is not fixed (in the fixed case, the code generator will insert the use).
232//
233// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is
234// available for the lifetime. We end up allocating a register that must be spilled, and it probably
235// won't be the register that is actually defined by the target instruction. So, we have to copy it
236// and THEN spill it. In this case, we should be using the def requirement. But we need to change
237// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but
238// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway
239// we can use the fixed-reg on the def.
240//
241
242void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition)
243{
244 assert(!interval->isLocalVar);
245
246 RefPosition* useRefPosition = defRefPosition->nextRefPosition;
247 regMaskTP defRegAssignment = defRefPosition->registerAssignment;
248 regMaskTP useRegAssignment = useRefPosition->registerAssignment;
249 RegRecord* defRegRecord = nullptr;
250 RegRecord* useRegRecord = nullptr;
251 regNumber defReg = REG_NA;
252 regNumber useReg = REG_NA;
253 bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE);
254 bool useRegConflict = defRegConflict;
255
256 // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment
257 // on it, or we will fail to ensure that the fixedReg is busy at the time the target
258 // (of the node that uses this interval) is allocated.
259 bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree;
260
261 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT));
262 if (!canChangeUseAssignment)
263 {
264 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE));
265 }
266 if (defRefPosition->isFixedRegRef && !defRegConflict)
267 {
268 defReg = defRefPosition->assignedReg();
269 defRegRecord = getRegisterRecord(defReg);
270 if (canChangeUseAssignment)
271 {
272 RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
273 assert(currFixedRegRefPosition != nullptr &&
274 currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation);
275
276 if (currFixedRegRefPosition->nextRefPosition == nullptr ||
277 currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation())
278 {
279 // This is case #1. Use the defRegAssignment
280 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1));
281 useRefPosition->registerAssignment = defRegAssignment;
282 return;
283 }
284 else
285 {
286 defRegConflict = true;
287 }
288 }
289 }
290 if (useRefPosition->isFixedRegRef && !useRegConflict)
291 {
292 useReg = useRefPosition->assignedReg();
293 useRegRecord = getRegisterRecord(useReg);
294 RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition;
295
296 // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null.
297 RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition();
298 assert(nextFixedRegRefPosition != nullptr &&
299 nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation);
300
301 // First, check to see if there are any conflicting FixedReg references between the def and use.
302 if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation)
303 {
304 // OK, no conflicting FixedReg references.
305 // Now, check to see whether it is currently in use.
306 if (useRegRecord->assignedInterval != nullptr)
307 {
308 RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition;
309 LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation();
310 if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation)
311 {
312 useRegConflict = true;
313 }
314 }
315 if (!useRegConflict)
316 {
317 // This is case #2. Use the useRegAssignment
318 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2, interval));
319 defRefPosition->registerAssignment = useRegAssignment;
320 return;
321 }
322 }
323 else
324 {
325 useRegConflict = true;
326 }
327 }
328 if (defRegRecord != nullptr && !useRegConflict)
329 {
330 // This is case #3.
331 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3, interval));
332 defRefPosition->registerAssignment = useRegAssignment;
333 return;
334 }
335 if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment)
336 {
337 // This is case #4.
338 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4, interval));
339 useRefPosition->registerAssignment = defRegAssignment;
340 return;
341 }
342 if (defRegRecord != nullptr && useRegRecord != nullptr)
343 {
344 // This is case #5.
345 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5, interval));
346 RegisterType regType = interval->registerType;
347 assert((getRegisterType(interval, defRefPosition) == regType) &&
348 (getRegisterType(interval, useRefPosition) == regType));
349 regMaskTP candidates = allRegs(regType);
350 defRefPosition->registerAssignment = candidates;
351 return;
352 }
353 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6, interval));
354 return;
355}
356
357//------------------------------------------------------------------------
358// applyCalleeSaveHeuristics: Set register preferences for an interval based on the given RefPosition
359//
360// Arguments:
361// rp - The RefPosition of interest
362//
363// Notes:
364// This is slightly more general than its name applies, and updates preferences not just
365// for callee-save registers.
366//
367void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp)
368{
369#ifdef _TARGET_AMD64_
370 if (compiler->opts.compDbgEnC)
371 {
372 // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
373 return;
374 }
375#endif // _TARGET_AMD64_
376
377 Interval* theInterval = rp->getInterval();
378
379#ifdef DEBUG
380 if (!doReverseCallerCallee())
381#endif // DEBUG
382 {
383 // Set preferences so that this register set will be preferred for earlier refs
384 theInterval->updateRegisterPreferences(rp->registerAssignment);
385 }
386}
387
388//------------------------------------------------------------------------
389// checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps.
390//
391// Arguments:
392// useRP - The use RefPosition of a tree temp (SDSU Interval)
393//
394// Notes:
395// There are a couple of cases where this may over-constrain allocation:
396// 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or
397// 2. In the case where the defining node requires a temp distinct from the target (also a
398// delay-free case).
399// In those cases, if we propagate a single-register restriction from the consumer to the producer
400// the delayed uses will not see a fixed reference in the PhysReg at that position, and may
401// incorrectly allocate that register.
402// TODO-CQ: This means that we may often require a copy at the use of this node's result.
403// This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is
404// created, causing a RefTypeFixedReg to be added at that location. This, however, results in
405// more PhysReg RefPositions (a throughput impact), and a large number of diffs that require
406// further analysis to determine benefit.
407// See Issue #11274.
408//
409void LinearScan::checkConflictingDefUse(RefPosition* useRP)
410{
411 assert(useRP->refType == RefTypeUse);
412 Interval* theInterval = useRP->getInterval();
413 assert(!theInterval->isLocalVar);
414
415 RefPosition* defRP = theInterval->firstRefPosition;
416
417 // All defs must have a valid treeNode, but we check it below to be conservative.
418 assert(defRP->treeNode != nullptr);
419 regMaskTP prevAssignment = defRP->registerAssignment;
420 regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment);
421 if (newAssignment != RBM_NONE)
422 {
423 if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses)
424 {
425 defRP->registerAssignment = newAssignment;
426 }
427 }
428 else
429 {
430 theInterval->hasConflictingDefUse = true;
431 }
432}
433
434//------------------------------------------------------------------------
435// associateRefPosWithInterval: Update the Interval based on the given RefPosition.
436//
437// Arguments:
438// rp - The RefPosition of interest
439//
440// Notes:
441// This is called at the time when 'rp' has just been created, so it becomes
442// the nextRefPosition of the recentRefPosition, and both the recentRefPosition
443// and lastRefPosition of its referent.
444//
445void LinearScan::associateRefPosWithInterval(RefPosition* rp)
446{
447 Referenceable* theReferent = rp->referent;
448
449 if (theReferent != nullptr)
450 {
451 // All RefPositions except the dummy ones at the beginning of blocks
452
453 if (rp->isIntervalRef())
454 {
455 Interval* theInterval = rp->getInterval();
456
457 applyCalleeSaveHeuristics(rp);
458
459 if (theInterval->isLocalVar)
460 {
461 if (RefTypeIsUse(rp->refType))
462 {
463 RefPosition* const prevRP = theInterval->recentRefPosition;
464 if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum))
465 {
466 prevRP->lastUse = false;
467 }
468 }
469
470 rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) &&
471 (rp->refType != RefTypeZeroInit) && !extendLifetimes();
472 }
473 else if (rp->refType == RefTypeUse)
474 {
475 checkConflictingDefUse(rp);
476 rp->lastUse = true;
477 }
478 }
479
480 RefPosition* prevRP = theReferent->recentRefPosition;
481 if (prevRP != nullptr)
482 {
483 prevRP->nextRefPosition = rp;
484 }
485 else
486 {
487 theReferent->firstRefPosition = rp;
488 }
489 theReferent->recentRefPosition = rp;
490 theReferent->lastRefPosition = rp;
491 }
492 else
493 {
494 assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs));
495 }
496}
497
498//---------------------------------------------------------------------------
499// newRefPosition: allocate and initialize a new RefPosition.
500//
501// Arguments:
502// reg - reg number that identifies RegRecord to be associated
503// with this RefPosition
504// theLocation - LSRA location of RefPosition
505// theRefType - RefPosition type
506// theTreeNode - GenTree node for which this RefPosition is created
507// mask - Set of valid registers for this RefPosition
508// multiRegIdx - register position if this RefPosition corresponds to a
509// multi-reg call node.
510//
511// Return Value:
512// a new RefPosition
513//
514RefPosition* LinearScan::newRefPosition(
515 regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask)
516{
517 RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
518
519 RegRecord* regRecord = getRegisterRecord(reg);
520 newRP->setReg(regRecord);
521 newRP->registerAssignment = mask;
522
523 newRP->setMultiRegIdx(0);
524 newRP->setAllocateIfProfitable(false);
525
526 // We can't have two RefPositions on a RegRecord at the same location, unless they are different types.
527 assert((regRecord->lastRefPosition == nullptr) || (regRecord->lastRefPosition->nodeLocation < theLocation) ||
528 (regRecord->lastRefPosition->refType != theRefType));
529 associateRefPosWithInterval(newRP);
530
531 DBEXEC(VERBOSE, newRP->dump());
532 return newRP;
533}
534
535//---------------------------------------------------------------------------
536// newRefPosition: allocate and initialize a new RefPosition.
537//
538// Arguments:
539// theInterval - interval to which RefPosition is associated with.
540// theLocation - LSRA location of RefPosition
541// theRefType - RefPosition type
542// theTreeNode - GenTree node for which this RefPosition is created
543// mask - Set of valid registers for this RefPosition
544// multiRegIdx - register position if this RefPosition corresponds to a
545// multi-reg call node.
546//
547// Return Value:
548// a new RefPosition
549//
550RefPosition* LinearScan::newRefPosition(Interval* theInterval,
551 LsraLocation theLocation,
552 RefType theRefType,
553 GenTree* theTreeNode,
554 regMaskTP mask,
555 unsigned multiRegIdx /* = 0 */)
556{
557 if (theInterval != nullptr)
558 {
559 if (mask == RBM_NONE)
560 {
561 mask = allRegs(theInterval->registerType);
562 }
563 }
564 else
565 {
566 assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs);
567 }
568#ifdef DEBUG
569 if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
570 {
571 // In the case we're using floating point registers we must make sure
572 // this flag was set previously in the compiler since this will mandate
573 // whether LSRA will take into consideration FP reg killsets.
574 assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0));
575 }
576#endif // DEBUG
577
578 // If this reference is constrained to a single register (and it's not a dummy
579 // or Kill reftype already), add a RefTypeFixedReg at this location so that its
580 // availability can be more accurately determined
581
582 bool isFixedRegister = isSingleRegister(mask);
583 bool insertFixedRef = false;
584 if (isFixedRegister)
585 {
586 // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB),
587 // but not an internal use (it will already have a FixedRef for the def).
588 if ((theRefType == RefTypeDef) || ((theRefType == RefTypeUse) && !theInterval->isInternal))
589 {
590 insertFixedRef = true;
591 }
592 }
593
594 if (insertFixedRef)
595 {
596 regNumber physicalReg = genRegNumFromMask(mask);
597 RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
598 assert(theInterval != nullptr);
599 assert((allRegs(theInterval->registerType) & mask) != 0);
600 }
601
602 RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
603
604 newRP->setInterval(theInterval);
605
606 // Spill info
607 newRP->isFixedRegRef = isFixedRegister;
608
609#ifndef _TARGET_AMD64_
610 // We don't need this for AMD because the PInvoke method epilog code is explicit
611 // at register allocation time.
612 if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged &&
613 theInterval->varNum == compiler->genReturnLocal)
614 {
615 mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
616 noway_assert(mask != RBM_NONE);
617 }
618#endif // !_TARGET_AMD64_
619 newRP->registerAssignment = mask;
620
621 newRP->setMultiRegIdx(multiRegIdx);
622 newRP->setAllocateIfProfitable(false);
623
624 associateRefPosWithInterval(newRP);
625
626 DBEXEC(VERBOSE, newRP->dump());
627 return newRP;
628}
629
630//---------------------------------------------------------------------------
631// newUseRefPosition: allocate and initialize a RefTypeUse RefPosition at currentLoc.
632//
633// Arguments:
634// theInterval - interval to which RefPosition is associated with.
635// theTreeNode - GenTree node for which this RefPosition is created
636// mask - Set of valid registers for this RefPosition
637// multiRegIdx - register position if this RefPosition corresponds to a
638// multi-reg call node.
639// minRegCount - Minimum number registers that needs to be ensured while
640// constraining candidates for this ref position under
641// LSRA stress. This is a DEBUG only arg.
642//
643// Return Value:
644// a new RefPosition
645//
646// Notes:
647// If the caller knows that 'theTreeNode' is NOT a candidate local, newRefPosition
648// can/should be called directly.
649//
650RefPosition* LinearScan::newUseRefPosition(Interval* theInterval,
651 GenTree* theTreeNode,
652 regMaskTP mask,
653 unsigned multiRegIdx)
654{
655 GenTree* treeNode = isCandidateLocalRef(theTreeNode) ? theTreeNode : nullptr;
656
657 RefPosition* pos = newRefPosition(theInterval, currentLoc, RefTypeUse, treeNode, mask, multiRegIdx);
658 if (theTreeNode->IsRegOptional())
659 {
660 pos->setAllocateIfProfitable(true);
661 }
662 return pos;
663}
664
665//------------------------------------------------------------------------
666// IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
667//
668// Arguments:
669// node - the node of interest.
670//
671// Return value:
672// True if this will definitely be a memory reference that could be contained.
673//
674// Notes:
675// This differs from the isMemoryOp() method on GenTree because it checks for
676// the case of doNotEnregister local. This won't include locals that
677// for some other reason do not become register candidates, nor those that get
678// spilled.
679// Also, because we usually call this before we redo dataflow, any new lclVars
680// introduced after the last dataflow analysis will not yet be marked lvTracked,
681// so we don't use that.
682//
683bool LinearScan::isContainableMemoryOp(GenTree* node)
684{
685 if (node->isMemoryOp())
686 {
687 return true;
688 }
689 if (node->IsLocal())
690 {
691 if (!enregisterLocalVars)
692 {
693 return true;
694 }
695 LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum];
696 return varDsc->lvDoNotEnregister;
697 }
698 return false;
699}
700
701//------------------------------------------------------------------------
702// addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'.
703//
704// Arguments:
705// mask - the mask (set) of registers.
706// currentLoc - the location at which they should be added
707// refType - the type of refposition
708// isLastUse - true IFF this is a last use of the register
709//
710void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
711{
712 for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
713 {
714 if (mask & 1)
715 {
716 // This assumes that these are all "special" RefTypes that
717 // don't need to be recorded on the tree (hence treeNode is nullptr)
718 RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
719 genRegMask(reg)); // This MUST occupy the physical register (obviously)
720
721 if (isLastUse)
722 {
723 pos->lastUse = true;
724 }
725 }
726 }
727}
728
729//------------------------------------------------------------------------
730// getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node.
731// If the GT_STOREIND will generate a write barrier, determine the specific kill
732// set required by the case-specific, platform-specific write barrier. If no
733// write barrier is required, the kill set will be RBM_NONE.
734//
735// Arguments:
736// tree - the GT_STOREIND node
737//
738// Return Value: a register mask of the registers killed
739//
740regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
741{
742 assert(tree->OperIs(GT_STOREIND));
743
744 regMaskTP killMask = RBM_NONE;
745
746 GenTree* data = tree->Data();
747
748 GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data);
749 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
750 {
751 if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm))
752 {
753 // We can't determine the exact helper to be used at this point, because it depends on
754 // the allocated register for the `data` operand. However, all the (x86) optimized
755 // helpers have the same kill set: EDX. And note that currently, only x86 can return
756 // `true` for genUseOptimizedWriteBarriers().
757 killMask = RBM_CALLEE_TRASH_NOGC;
758 }
759 else
760 {
761 // Figure out which helper we're going to use, and then get the kill set for that helper.
762 CorInfoHelpFunc helper =
763 compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm);
764 killMask = compiler->compHelperCallKillSet(helper);
765 }
766 }
767 return killMask;
768}
769
770//------------------------------------------------------------------------
771// getKillSetForShiftRotate: Determine the liveness kill set for a shift or rotate node.
772//
773// Arguments:
774// shiftNode - the shift or rotate node
775//
776// Return Value: a register mask of the registers killed
777//
778regMaskTP LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode)
779{
780 regMaskTP killMask = RBM_NONE;
781#ifdef _TARGET_XARCH_
782 assert(shiftNode->OperIsShiftOrRotate());
783 GenTree* shiftBy = shiftNode->gtGetOp2();
784 if (!shiftBy->isContained())
785 {
786 killMask = RBM_RCX;
787 }
788#endif // _TARGET_XARCH_
789 return killMask;
790}
791
792//------------------------------------------------------------------------
793// getKillSetForMul: Determine the liveness kill set for a multiply node.
794//
795// Arguments:
796// tree - the multiply node
797//
798// Return Value: a register mask of the registers killed
799//
800regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode)
801{
802 regMaskTP killMask = RBM_NONE;
803#ifdef _TARGET_XARCH_
804 assert(mulNode->OperIsMul());
805 if (!mulNode->OperIs(GT_MUL) || (((mulNode->gtFlags & GTF_UNSIGNED) != 0) && mulNode->gtOverflowEx()))
806 {
807 killMask = RBM_RAX | RBM_RDX;
808 }
809#endif // _TARGET_XARCH_
810 return killMask;
811}
812
813//------------------------------------------------------------------------
814// getKillSetForModDiv: Determine the liveness kill set for a mod or div node.
815//
816// Arguments:
817// tree - the mod or div node as a GenTreeOp
818//
819// Return Value: a register mask of the registers killed
820//
821regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node)
822{
823 regMaskTP killMask = RBM_NONE;
824#ifdef _TARGET_XARCH_
825 assert(node->OperIs(GT_MOD, GT_DIV, GT_UMOD, GT_UDIV));
826 if (!varTypeIsFloating(node->TypeGet()))
827 {
828 // Both RAX and RDX are killed by the operation
829 killMask = RBM_RAX | RBM_RDX;
830 }
831#endif // _TARGET_XARCH_
832 return killMask;
833}
834
835//------------------------------------------------------------------------
836// getKillSetForCall: Determine the liveness kill set for a call node.
837//
838// Arguments:
839// tree - the GenTreeCall node
840//
841// Return Value: a register mask of the registers killed
842//
843regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
844{
845 regMaskTP killMask = RBM_NONE;
846#ifdef _TARGET_X86_
847 if (compiler->compFloatingPointUsed)
848 {
849 if (call->TypeGet() == TYP_DOUBLE)
850 {
851 needDoubleTmpForFPCall = true;
852 }
853 else if (call->TypeGet() == TYP_FLOAT)
854 {
855 needFloatTmpForFPCall = true;
856 }
857 }
858#endif // _TARGET_X86_
859#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
860 if (call->IsHelperCall())
861 {
862 CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
863 killMask = compiler->compHelperCallKillSet(helpFunc);
864 }
865 else
866#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
867 {
868 // if there is no FP used, we can ignore the FP kills
869 if (compiler->compFloatingPointUsed)
870 {
871 killMask = RBM_CALLEE_TRASH;
872 }
873 else
874 {
875 killMask = RBM_INT_CALLEE_TRASH;
876 }
877#ifdef _TARGET_ARM_
878 if (call->IsVirtualStub())
879 {
880 killMask |= compiler->virtualStubParamInfo->GetRegMask();
881 }
882#else // !_TARGET_ARM_
883 // Verify that the special virtual stub call registers are in the kill mask.
884 // We don't just add them unconditionally to the killMask because for most architectures
885 // they are already in the RBM_CALLEE_TRASH set,
886 // and we don't want to introduce extra checks and calls in this hot function.
887 assert(!call->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) ==
888 compiler->virtualStubParamInfo->GetRegMask()));
889#endif // !_TARGET_ARM_
890 }
891 return killMask;
892}
893
894//------------------------------------------------------------------------
895// getKillSetForBlockStore: Determine the liveness kill set for a block store node.
896//
897// Arguments:
898// tree - the block store node as a GenTreeBlk
899//
900// Return Value: a register mask of the registers killed
901//
902regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
903{
904 assert(blkNode->OperIsStore());
905 regMaskTP killMask = RBM_NONE;
906
907 if ((blkNode->OperGet() == GT_STORE_OBJ) && blkNode->OperIsCopyBlkOp())
908 {
909 assert(blkNode->AsObj()->gtGcPtrCount != 0);
910 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
911 }
912 else
913 {
914 bool isCopyBlk = varTypeIsStruct(blkNode->Data());
915 switch (blkNode->gtBlkOpKind)
916 {
917 case GenTreeBlk::BlkOpKindHelper:
918 if (isCopyBlk)
919 {
920 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
921 }
922 else
923 {
924 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
925 }
926 break;
927
928#ifdef _TARGET_XARCH_
929 case GenTreeBlk::BlkOpKindRepInstr:
930 if (isCopyBlk)
931 {
932 // rep movs kills RCX, RDI and RSI
933 killMask = RBM_RCX | RBM_RDI | RBM_RSI;
934 }
935 else
936 {
937 // rep stos kills RCX and RDI.
938 // (Note that the Data() node, if not constant, will be assigned to
939 // RCX, but it's find that this kills it, as the value is not available
940 // after this node in any case.)
941 killMask = RBM_RDI | RBM_RCX;
942 }
943 break;
944#else
945 case GenTreeBlk::BlkOpKindRepInstr:
946#endif
947 case GenTreeBlk::BlkOpKindUnroll:
948 case GenTreeBlk::BlkOpKindInvalid:
949 // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
950 break;
951 }
952 }
953 return killMask;
954}
955
956#ifdef FEATURE_HW_INTRINSICS
957//------------------------------------------------------------------------
958// getKillSetForHWIntrinsic: Determine the liveness kill set for a GT_STOREIND node.
959// If the GT_STOREIND will generate a write barrier, determine the specific kill
960// set required by the case-specific, platform-specific write barrier. If no
961// write barrier is required, the kill set will be RBM_NONE.
962//
963// Arguments:
964// tree - the GT_STOREIND node
965//
966// Return Value: a register mask of the registers killed
967//
968regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node)
969{
970 regMaskTP killMask = RBM_NONE;
971#ifdef _TARGET_XARCH_
972 switch (node->gtHWIntrinsicId)
973 {
974 case NI_SSE2_MaskMove:
975 // maskmovdqu uses edi as the implicit address register.
976 // Although it is set as the srcCandidate on the address, if there is also a fixed
977 // assignment for the definition of the address, resolveConflictingDefAndUse() may
978 // change the register assignment on the def or use of a tree temp (SDSU) when there
979 // is a conflict, and the FixedRef on edi won't be sufficient to ensure that another
980 // Interval will not be allocated there.
981 // Issue #17674 tracks this.
982 killMask = RBM_EDI;
983 break;
984
985 default:
986 // Leave killMask as RBM_NONE
987 break;
988 }
989#endif // _TARGET_XARCH_
990 return killMask;
991}
992#endif // FEATURE_HW_INTRINSICS
993
994//------------------------------------------------------------------------
995// getKillSetForReturn: Determine the liveness kill set for a return node.
996//
997// Arguments:
998// NONE (this kill set is independent of the details of the specific return.)
999//
1000// Return Value: a register mask of the registers killed
1001//
1002regMaskTP LinearScan::getKillSetForReturn()
1003{
1004 return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE)
1005 : RBM_NONE;
1006}
1007
1008//------------------------------------------------------------------------
1009// getKillSetForProfilerHook: Determine the liveness kill set for a profiler hook.
1010//
1011// Arguments:
1012// NONE (this kill set is independent of the details of the specific node.)
1013//
1014// Return Value: a register mask of the registers killed
1015//
1016regMaskTP LinearScan::getKillSetForProfilerHook()
1017{
1018 return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL)
1019 : RBM_NONE;
1020}
1021
1022#ifdef DEBUG
1023//------------------------------------------------------------------------
1024// getKillSetForNode: Return the registers killed by the given tree node.
1025//
1026// Arguments:
1027// tree - the tree for which the kill set is needed.
1028//
1029// Return Value: a register mask of the registers killed
1030//
1031regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
1032{
1033 regMaskTP killMask = RBM_NONE;
1034 switch (tree->OperGet())
1035 {
1036 case GT_LSH:
1037 case GT_RSH:
1038 case GT_RSZ:
1039 case GT_ROL:
1040 case GT_ROR:
1041#ifdef _TARGET_X86_
1042 case GT_LSH_HI:
1043 case GT_RSH_LO:
1044#endif
1045 killMask = getKillSetForShiftRotate(tree->AsOp());
1046 break;
1047
1048 case GT_MUL:
1049 case GT_MULHI:
1050#if !defined(_TARGET_64BIT_)
1051 case GT_MUL_LONG:
1052#endif
1053 killMask = getKillSetForMul(tree->AsOp());
1054 break;
1055
1056 case GT_MOD:
1057 case GT_DIV:
1058 case GT_UMOD:
1059 case GT_UDIV:
1060 killMask = getKillSetForModDiv(tree->AsOp());
1061 break;
1062
1063 case GT_STORE_OBJ:
1064 case GT_STORE_BLK:
1065 case GT_STORE_DYN_BLK:
1066 killMask = getKillSetForBlockStore(tree->AsBlk());
1067 break;
1068
1069 case GT_RETURNTRAP:
1070 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
1071 break;
1072
1073 case GT_CALL:
1074 killMask = getKillSetForCall(tree->AsCall());
1075
1076 break;
1077 case GT_STOREIND:
1078 killMask = getKillSetForStoreInd(tree->AsStoreInd());
1079 break;
1080
1081#if defined(PROFILING_SUPPORTED)
1082 // If this method requires profiler ELT hook then mark these nodes as killing
1083 // callee trash registers (excluding RAX and XMM0). The reason for this is that
1084 // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
1085 // more details.
1086 case GT_RETURN:
1087 killMask = getKillSetForReturn();
1088 break;
1089
1090 case GT_PROF_HOOK:
1091 killMask = getKillSetForProfilerHook();
1092 break;
1093#endif // PROFILING_SUPPORTED
1094
1095#ifdef FEATURE_HW_INTRINSICS
1096 case GT_HWIntrinsic:
1097 killMask = getKillSetForHWIntrinsic(tree->AsHWIntrinsic());
1098 break;
1099#endif // FEATURE_HW_INTRINSICS
1100
1101 default:
1102 // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
1103 break;
1104 }
1105 return killMask;
1106}
1107#endif // DEBUG
1108
1109//------------------------------------------------------------------------
1110// buildKillPositionsForNode:
1111// Given some tree node add refpositions for all the registers this node kills
1112//
1113// Arguments:
1114// tree - the tree for which kill positions should be generated
1115// currentLoc - the location at which the kills should be added
1116//
1117// Return Value:
1118// true - kills were inserted
1119// false - no kills were inserted
1120//
1121// Notes:
1122// The return value is needed because if we have any kills, we need to make sure that
1123// all defs are located AFTER the kills. On the other hand, if there aren't kills,
1124// the multiple defs for a regPair are in different locations.
1125// If we generate any kills, we will mark all currentLiveVars as being preferenced
1126// to avoid the killed registers. This is somewhat conservative.
1127
1128bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask)
1129{
1130 bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
1131 if (killMask != RBM_NONE)
1132 {
1133 // The killMask identifies a set of registers that will be used during codegen.
1134 // Mark these as modified here, so when we do final frame layout, we'll know about
1135 // all these registers. This is especially important if killMask contains
1136 // callee-saved registers, which affect the frame size since we need to save/restore them.
1137 // In the case where we have a copyBlk with GC pointers, can need to call the
1138 // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
1139 // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
1140 // which is too late.
1141 compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true));
1142
1143 addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
1144
1145 // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
1146 // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs,
1147 // many of which appear to be regressions (because there is more spill on the infrequently path),
1148 // but are not really because the frequent path becomes smaller. Validating these diffs will need
1149 // to be done before making this change.
1150 // if (!blockSequence[curBBSeqNum]->isRunRarely())
1151 if (enregisterLocalVars)
1152 {
1153 VarSetOps::Iter iter(compiler, currentLiveVars);
1154 unsigned varIndex = 0;
1155 while (iter.NextElem(&varIndex))
1156 {
1157 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
1158 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1159#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1160 if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
1161 {
1162 if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
1163 {
1164 continue;
1165 }
1166 }
1167 else
1168#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1169 if (varTypeIsFloating(varDsc) &&
1170 !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
1171 {
1172 continue;
1173 }
1174 Interval* interval = getIntervalForLocalVar(varIndex);
1175 if (isCallKill)
1176 {
1177 interval->preferCalleeSave = true;
1178 }
1179 regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask);
1180
1181 if (newPreferences != RBM_NONE)
1182 {
1183 interval->updateRegisterPreferences(newPreferences);
1184 }
1185 else
1186 {
1187 // If there are no callee-saved registers, the call could kill all the registers.
1188 // This is a valid state, so in that case assert should not trigger. The RA will spill in order to
1189 // free a register later.
1190 assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE);
1191 }
1192 }
1193 }
1194
1195 if (compiler->killGCRefs(tree))
1196 {
1197 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree,
1198 (allRegs(TYP_REF) & ~RBM_ARG_REGS));
1199 }
1200 return true;
1201 }
1202
1203 return false;
1204}
1205
1206//----------------------------------------------------------------------------
1207// defineNewInternalTemp: Defines a ref position for an internal temp.
1208//
1209// Arguments:
1210// tree - Gentree node requiring an internal register
1211// regType - Register type
1212// currentLoc - Location of the temp Def position
1213// regMask - register mask of candidates for temp
1214//
1215RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask)
1216{
1217 Interval* current = newInterval(regType);
1218 current->isInternal = true;
1219 RefPosition* newDef = newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0);
1220 assert(internalCount < MaxInternalCount);
1221 internalDefs[internalCount++] = newDef;
1222 return newDef;
1223}
1224
1225//------------------------------------------------------------------------
1226// buildInternalRegisterDefForNode - Create an Interval for an internal int register, and a def RefPosition
1227//
1228// Arguments:
1229// tree - Gentree node that needs internal registers
1230// internalCands - The mask of valid registers
1231//
1232// Returns:
1233// The def RefPosition created for this internal temp.
1234//
1235RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands)
1236{
1237 bool fixedReg = false;
1238 // The candidate set should contain only integer registers.
1239 assert((internalCands & ~allRegs(TYP_INT)) == RBM_NONE);
1240 if (genMaxOneBit(internalCands))
1241 {
1242 fixedReg = true;
1243 }
1244
1245 RefPosition* defRefPosition = defineNewInternalTemp(tree, IntRegisterType, internalCands);
1246 return defRefPosition;
1247}
1248
1249//------------------------------------------------------------------------
1250// buildInternalFloatRegisterDefForNode - Create an Interval for an internal fp register, and a def RefPosition
1251//
1252// Arguments:
1253// tree - Gentree node that needs internal registers
1254// internalCands - The mask of valid registers
1255//
1256// Returns:
1257// The def RefPosition created for this internal temp.
1258//
1259RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands)
1260{
1261 bool fixedReg = false;
1262 // The candidate set should contain only float registers.
1263 assert((internalCands & ~allRegs(TYP_FLOAT)) == RBM_NONE);
1264 if (genMaxOneBit(internalCands))
1265 {
1266 fixedReg = true;
1267 }
1268
1269 RefPosition* defRefPosition = defineNewInternalTemp(tree, FloatRegisterType, internalCands);
1270 return defRefPosition;
1271}
1272
1273//------------------------------------------------------------------------
1274// buildInternalRegisterUses - adds use positions for internal
1275// registers required for tree node.
1276//
1277// Notes:
1278// During the BuildNode process, calls to buildInternalIntRegisterDefForNode and
1279// buildInternalFloatRegisterDefForNode put new RefPositions in the 'internalDefs'
1280// array, and increment 'internalCount'. This method must be called to add corresponding
1281// uses. It then resets the 'internalCount' for the handling of the next node.
1282//
1283// If the internal registers must differ from the target register, 'setInternalRegsDelayFree'
1284// must be set to true, so that the uses may be marked 'delayRegFree'.
1285// Note that if a node has both float and int temps, generally the target with either be
1286// int *or* float, and it is not really necessary to set this on the other type, but it does
1287// no harm as it won't restrict the register selection.
1288//
1289void LinearScan::buildInternalRegisterUses()
1290{
1291 assert(internalCount <= MaxInternalCount);
1292 for (int i = 0; i < internalCount; i++)
1293 {
1294 RefPosition* def = internalDefs[i];
1295 regMaskTP mask = def->registerAssignment;
1296 RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0);
1297 if (setInternalRegsDelayFree)
1298 {
1299 use->delayRegFree = true;
1300 pendingDelayFree = true;
1301 }
1302 }
1303 // internalCount = 0;
1304}
1305
1306#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1307//------------------------------------------------------------------------
1308// buildUpperVectorSaveRefPositions - Create special RefPositions for saving
1309// the upper half of a set of large vector.
1310//
1311// Arguments:
1312// tree - The current node being handled
1313// currentLoc - The location of the current node
1314//
1315// Return Value: Returns the set of lclVars that are killed by this node, and therefore
1316// required RefTypeUpperVectorSaveDef RefPositions.
1317//
1318// Notes: The returned set is used by buildUpperVectorRestoreRefPositions.
1319//
1320VARSET_VALRET_TP
1321LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet)
1322{
1323 assert(enregisterLocalVars);
1324 VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler));
1325 if (!VarSetOps::IsEmpty(compiler, largeVectorVars))
1326 {
1327 // We actually need to find any calls that kill the upper-half of the callee-save vector registers.
1328 // But we will use as a proxy any node that kills floating point registers.
1329 // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
1330 // This check should have been done by the caller.
1331 if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
1332 {
1333 VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
1334 VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
1335 VarSetOps::Iter iter(compiler, liveLargeVectors);
1336 unsigned varIndex = 0;
1337 while (iter.NextElem(&varIndex))
1338 {
1339 Interval* varInterval = getIntervalForLocalVar(varIndex);
1340 Interval* tempInterval = newInterval(varInterval->registerType);
1341 tempInterval->isInternal = true;
1342 RefPosition* pos =
1343 newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
1344 // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
1345 // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
1346 // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval,
1347 // and set varInterval as the relatedInterval of tempInterval.
1348 tempInterval->relatedInterval = varInterval->relatedInterval;
1349 varInterval->relatedInterval = tempInterval;
1350 }
1351 }
1352 }
1353 return liveLargeVectors;
1354}
1355
1356// buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring
1357// the upper half of a set of large vectors.
1358//
1359// Arguments:
1360// tree - The current node being handled
1361// currentLoc - The location of the current node
1362// liveLargeVectors - The set of lclVars needing restores (returned by buildUpperVectorSaveRefPositions)
1363//
1364void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree,
1365 LsraLocation currentLoc,
1366 VARSET_VALARG_TP liveLargeVectors)
1367{
1368 assert(enregisterLocalVars);
1369 if (!VarSetOps::IsEmpty(compiler, liveLargeVectors))
1370 {
1371 VarSetOps::Iter iter(compiler, liveLargeVectors);
1372 unsigned varIndex = 0;
1373 while (iter.NextElem(&varIndex))
1374 {
1375 Interval* varInterval = getIntervalForLocalVar(varIndex);
1376 Interval* tempInterval = varInterval->relatedInterval;
1377 assert(tempInterval->isInternal == true);
1378 RefPosition* pos =
1379 newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED);
1380 // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
1381 // of tempInterval.
1382 varInterval->relatedInterval = tempInterval->relatedInterval;
1383 tempInterval->relatedInterval = varInterval;
1384 }
1385 }
1386}
1387#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1388
1389#ifdef DEBUG
1390//------------------------------------------------------------------------
1391// ComputeOperandDstCount: computes the number of registers defined by a
1392// node.
1393//
1394// For most nodes, this is simple:
1395// - Nodes that do not produce values (e.g. stores and other void-typed
1396// nodes) and nodes that immediately use the registers they define
1397// produce no registers
1398// - Nodes that are marked as defining N registers define N registers.
1399//
1400// For contained nodes, however, things are more complicated: for purposes
1401// of bookkeeping, a contained node is treated as producing the transitive
1402// closure of the registers produced by its sources.
1403//
1404// Arguments:
1405// operand - The operand for which to compute a register count.
1406//
1407// Returns:
1408// The number of registers defined by `operand`.
1409//
1410// static
1411int LinearScan::ComputeOperandDstCount(GenTree* operand)
1412{
1413 // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though
1414 // note that it is not in the linear order. It seems best to check for !IsLIR() rather than
1415 // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method.
1416 if (!operand->IsLIR())
1417 {
1418 return 0;
1419 }
1420 if (operand->isContained())
1421 {
1422 int dstCount = 0;
1423 for (GenTree* op : operand->Operands())
1424 {
1425 dstCount += ComputeOperandDstCount(op);
1426 }
1427
1428 return dstCount;
1429 }
1430 if (operand->IsUnusedValue())
1431 {
1432 // Operands that define an unused value do not produce any registers.
1433 return 0;
1434 }
1435 if (operand->IsValue())
1436 {
1437 // Operands that are values and are not contained consume all of their operands
1438 // and produce one or more registers.
1439 return operand->GetRegisterDstCount();
1440 }
1441 else
1442 {
1443 // This must be one of the operand types that are neither contained nor produce a value.
1444 // Stores and void-typed operands may be encountered when processing call nodes, which contain
1445 // pointers to argument setup stores.
1446 assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
1447 operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() ||
1448 operand->TypeGet() == TYP_VOID);
1449 return 0;
1450 }
1451}
1452
1453//------------------------------------------------------------------------
1454// ComputeAvailableSrcCount: computes the number of registers available as
1455// sources for a node.
1456//
1457// This is simply the sum of the number of registers produced by each
1458// operand to the node.
1459//
1460// Arguments:
1461// node - The node for which to compute a source count.
1462//
1463// Return Value:
1464// The number of registers available as sources for `node`.
1465//
1466// static
1467int LinearScan::ComputeAvailableSrcCount(GenTree* node)
1468{
1469 int numSources = 0;
1470 for (GenTree* operand : node->Operands())
1471 {
1472 numSources += ComputeOperandDstCount(operand);
1473 }
1474
1475 return numSources;
1476}
1477#endif // DEBUG
1478
1479//------------------------------------------------------------------------
1480// buildRefPositionsForNode: The main entry point for building the RefPositions
1481// and "tree temp" Intervals for a given node.
1482//
1483// Arguments:
1484// tree - The node for which we are building RefPositions
1485// block - The BasicBlock in which the node resides
1486// currentLoc - The LsraLocation of the given node
1487//
1488void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc)
1489{
1490 // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
1491 // GT_CLS_VAR nodes should have been eliminated by rationalizer.
1492 assert(tree->OperGet() != GT_ARGPLACE);
1493 assert(tree->OperGet() != GT_LIST);
1494 assert(tree->OperGet() != GT_CLS_VAR);
1495
1496 // The LIR traversal visits only the first node in a GT_FIELD_LIST.
1497 assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead());
1498
1499 // The set of internal temporary registers used by this node are stored in the
1500 // gtRsvdRegs register mask. Clear it out.
1501 tree->gtRsvdRegs = RBM_NONE;
1502
1503#ifdef DEBUG
1504 if (VERBOSE)
1505 {
1506 dumpDefList();
1507 compiler->gtDispTree(tree, nullptr, nullptr, true);
1508 }
1509#endif // DEBUG
1510
1511 if (tree->isContained())
1512 {
1513#ifdef _TARGET_XARCH_
1514 // On XArch we can have contained candidate lclVars if they are part of a RMW
1515 // address computation. In this case we need to check whether it is a last use.
1516 if (tree->IsLocal() && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
1517 {
1518 LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
1519 if (isCandidateVar(varDsc))
1520 {
1521 assert(varDsc->lvTracked);
1522 unsigned varIndex = varDsc->lvVarIndex;
1523 VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
1524 }
1525 }
1526#else // _TARGET_XARCH_
1527 assert(!isCandidateLocalRef(tree));
1528#endif // _TARGET_XARCH_
1529 JITDUMP("Contained\n");
1530 return;
1531 }
1532
1533#ifdef DEBUG
1534 // If we are constraining the registers for allocation, we will modify all the RefPositions
1535 // we've built for this node after we've created them. In order to do that, we'll remember
1536 // the last RefPosition prior to those created for this node.
1537 RefPositionIterator refPositionMark = refPositions.backPosition();
1538 int oldDefListCount = defList.Count();
1539#endif // DEBUG
1540
1541 int consume = BuildNode(tree);
1542
1543#ifdef DEBUG
1544 int newDefListCount = defList.Count();
1545 int produce = newDefListCount - oldDefListCount;
1546 assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume));
1547
1548#if defined(_TARGET_AMD64_)
1549 // Multi-reg call node is the only node that could produce multi-reg value
1550 assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
1551#endif // _TARGET_AMD64_
1552
1553#endif // DEBUG
1554
1555#ifdef DEBUG
1556 // If we are constraining registers, modify all the RefPositions we've just built to specify the
1557 // minimum reg count required.
1558 if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT))
1559 {
1560 // The number of registers required for a tree node is the sum of
1561 // { RefTypeUses } + { RefTypeDef for the node itself } + specialPutArgCount
1562 // This is the minimum set of registers that needs to be ensured in the candidate set of ref positions created.
1563 //
1564 // First, we count them.
1565 unsigned minRegCount = 0;
1566
1567 RefPositionIterator iter = refPositionMark;
1568 for (iter++; iter != refPositions.end(); iter++)
1569 {
1570 RefPosition* newRefPosition = &(*iter);
1571 if (newRefPosition->isIntervalRef())
1572 {
1573 if ((newRefPosition->refType == RefTypeUse) ||
1574 ((newRefPosition->refType == RefTypeDef) && !newRefPosition->getInterval()->isInternal))
1575 {
1576 minRegCount++;
1577 }
1578 if (newRefPosition->getInterval()->isSpecialPutArg)
1579 {
1580 minRegCount++;
1581 }
1582 }
1583 }
1584
1585 if (tree->OperIsPutArgSplit())
1586 {
1587 // While we have attempted to account for any "specialPutArg" defs above, we're only looking at RefPositions
1588 // created for this node. We must be defining at least one register in the PutArgSplit, so conservatively
1589 // add one less than the maximum number of registers args to 'minRegCount'.
1590 minRegCount += MAX_REG_ARG - 1;
1591 }
1592 for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++)
1593 {
1594 RefPosition* newRefPosition = &(*refPositionMark);
1595 unsigned minRegCountForRef = minRegCount;
1596 if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree)
1597 {
1598 // If delayRegFree, then Use will interfere with the destination of the consuming node.
1599 // Therefore, we also need add the kill set of the consuming node to minRegCount.
1600 //
1601 // For example consider the following IR on x86, where v01 and v02
1602 // are method args coming in ecx and edx respectively.
1603 // GT_DIV(v01, v02)
1604 //
1605 // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node.
1606 //
1607 // Assume further JitStressRegs=2, which would constrain candidates to callee trashable
1608 // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01.
1609 // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and
1610 // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for
1611 // the use position of v02 also needs to take into account the kill set of its consuming node.
1612 regMaskTP killMask = getKillSetForNode(tree);
1613 if (killMask != RBM_NONE)
1614 {
1615 minRegCountForRef += genCountBits(killMask);
1616 }
1617 }
1618 else if ((newRefPosition->refType) == RefTypeDef && (newRefPosition->getInterval()->isSpecialPutArg))
1619 {
1620 minRegCountForRef++;
1621 }
1622#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1623 else if (newRefPosition->refType == RefTypeUpperVectorSaveDef)
1624 {
1625 // TODO-Cleanup: won't need a register once #18144 is fixed.
1626 minRegCountForRef += 1;
1627 }
1628#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1629
1630 newRefPosition->minRegCandidateCount = minRegCountForRef;
1631 if (newRefPosition->IsActualRef() && doReverseCallerCallee())
1632 {
1633 Interval* interval = newRefPosition->getInterval();
1634 regMaskTP oldAssignment = newRefPosition->registerAssignment;
1635 regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType);
1636 newRefPosition->registerAssignment =
1637 getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef);
1638 if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) &&
1639 !interval->isLocalVar)
1640 {
1641 checkConflictingDefUse(newRefPosition);
1642 }
1643 }
1644 }
1645 }
1646#endif // DEBUG
1647 JITDUMP("\n");
1648}
1649
1650//------------------------------------------------------------------------
1651// buildPhysRegRecords: Make an interval for each physical register
1652//
1653void LinearScan::buildPhysRegRecords()
1654{
1655 RegisterType regType = IntRegisterType;
1656 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
1657 {
1658 RegRecord* curr = &physRegs[reg];
1659 curr->init(reg);
1660 }
1661}
1662
1663//------------------------------------------------------------------------
1664// getNonEmptyBlock: Return the first non-empty block starting with 'block'
1665//
1666// Arguments:
1667// block - the BasicBlock from which we start looking
1668//
1669// Return Value:
1670// The first non-empty BasicBlock we find.
1671//
1672BasicBlock* getNonEmptyBlock(BasicBlock* block)
1673{
1674 while (block != nullptr && block->bbTreeList == nullptr)
1675 {
1676 BasicBlock* nextBlock = block->bbNext;
1677 // Note that here we use the version of NumSucc that does not take a compiler.
1678 // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan.
1679 // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which
1680 // case we don't need the version that takes a compiler.
1681 assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE)));
1682 // sometimes the first block is empty and ends with an uncond branch
1683 // assert( block->GetSucc(0) == nextBlock);
1684 block = nextBlock;
1685 }
1686 assert(block != nullptr && block->bbTreeList != nullptr);
1687 return block;
1688}
1689
1690//------------------------------------------------------------------------
1691// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block
1692//
1693// Notes:
1694// Prior to calling this method, 'currentLiveVars' must be set to the set of register
1695// candidate variables that are liveIn to the first block.
1696// For each register candidate that is live-in to the first block:
1697// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created.
1698// - Otherwise, it will be marked as spilled, since it will not be assigned a register
1699// on entry and will be loaded from memory on the undefined path.
1700// Note that, when the compInitMem option is not set, we may encounter these on
1701// paths that are protected by the same condition as an earlier def. However, since
1702// we don't do the analysis to determine this - and couldn't rely on always identifying
1703// such cases even if we tried - we must conservatively treat the undefined path as
1704// being possible. This is a relatively rare case, so the introduced conservatism is
1705// not expected to warrant the analysis required to determine the best placement of
1706// an initialization.
1707//
1708void LinearScan::insertZeroInitRefPositions()
1709{
1710 assert(enregisterLocalVars);
1711#ifdef DEBUG
1712 VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn));
1713 assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars));
1714#endif // DEBUG
1715
1716 // insert defs for this, then a block boundary
1717
1718 VarSetOps::Iter iter(compiler, currentLiveVars);
1719 unsigned varIndex = 0;
1720 while (iter.NextElem(&varIndex))
1721 {
1722 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
1723 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1724 if (!varDsc->lvIsParam && isCandidateVar(varDsc))
1725 {
1726 JITDUMP("V%02u was live in to first block:", varNum);
1727 Interval* interval = getIntervalForLocalVar(varIndex);
1728 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))
1729 {
1730 JITDUMP(" creating ZeroInit\n");
1731 GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
1732 RefPosition* pos =
1733 newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
1734 varDsc->lvMustInit = true;
1735 }
1736 else
1737 {
1738 setIntervalAsSpilled(interval);
1739 JITDUMP(" marking as spilled\n");
1740 }
1741 }
1742 }
1743}
1744
1745#if defined(UNIX_AMD64_ABI)
1746//------------------------------------------------------------------------
1747// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems.
1748//
1749// Arguments:
1750// argDsc - the LclVarDsc for the argument of interest
1751//
1752// Notes:
1753// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
1754// for how state for argument is updated for unix non-structs and Windows AMD64 structs.
1755//
1756void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
1757{
1758 assert(varTypeIsStruct(argDsc));
1759 RegState* intRegState = &compiler->codeGen->intRegState;
1760 RegState* floatRegState = &compiler->codeGen->floatRegState;
1761
1762 if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
1763 {
1764 if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
1765 {
1766 assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
1767 floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
1768 }
1769 else
1770 {
1771 assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
1772 intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
1773 }
1774 }
1775
1776 if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
1777 {
1778 if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
1779 {
1780 assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
1781 floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
1782 }
1783 else
1784 {
1785 assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
1786 intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
1787 }
1788 }
1789}
1790
1791#endif // defined(UNIX_AMD64_ABI)
1792
1793//------------------------------------------------------------------------
1794// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
1795// regState (either compiler->intRegState or compiler->floatRegState),
1796// with the lvArgReg on "argDsc"
1797//
1798// Arguments:
1799// argDsc - the argument for which the state is to be updated.
1800//
1801// Return Value: None
1802//
1803// Assumptions:
1804// The argument is live on entry to the function
1805// (or is untracked and therefore assumed live)
1806//
1807// Notes:
1808// This relies on a method in regAlloc.cpp that is shared between LSRA
1809// and regAlloc. It is further abstracted here because regState is updated
1810// separately for tracked and untracked variables in LSRA.
1811//
1812void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
1813{
1814#if defined(UNIX_AMD64_ABI)
1815 // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
1816 // Handle them here.
1817 if (varTypeIsStruct(argDsc))
1818 {
1819 unixAmd64UpdateRegStateForArg(argDsc);
1820 }
1821 else
1822#endif // defined(UNIX_AMD64_ABI)
1823 {
1824 RegState* intRegState = &compiler->codeGen->intRegState;
1825 RegState* floatRegState = &compiler->codeGen->floatRegState;
1826 // In the case of AMD64 we'll still use the floating point registers
1827 // to model the register usage for argument on vararg calls, so
1828 // we will ignore the varargs condition to determine whether we use
1829 // XMM registers or not for setting up the call.
1830 bool isFloat = (isFloatRegType(argDsc->lvType)
1831#ifndef _TARGET_AMD64_
1832 && !compiler->info.compIsVarArgs
1833#endif
1834 && !compiler->opts.compUseSoftFP);
1835
1836 if (argDsc->lvIsHfaRegArg())
1837 {
1838 isFloat = true;
1839 }
1840
1841 if (isFloat)
1842 {
1843 JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
1844 compiler->raUpdateRegStateForArg(floatRegState, argDsc);
1845 }
1846 else
1847 {
1848 JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
1849#if FEATURE_MULTIREG_ARGS
1850 if (argDsc->lvOtherArgReg != REG_NA)
1851 {
1852 JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg));
1853 }
1854#endif // FEATURE_MULTIREG_ARGS
1855 compiler->raUpdateRegStateForArg(intRegState, argDsc);
1856 }
1857 }
1858}
1859
1860//------------------------------------------------------------------------
1861// buildIntervals: The main entry point for building the data structures over
1862// which we will do register allocation.
1863//
1864void LinearScan::buildIntervals()
1865{
1866 BasicBlock* block;
1867
1868 JITDUMP("\nbuildIntervals ========\n");
1869
1870 // Build (empty) records for all of the physical registers
1871 buildPhysRegRecords();
1872
1873#ifdef DEBUG
1874 if (VERBOSE)
1875 {
1876 printf("\n-----------------\n");
1877 printf("LIVENESS:\n");
1878 printf("-----------------\n");
1879 foreach_block(compiler, block)
1880 {
1881 printf(FMT_BB " use def in out\n", block->bbNum);
1882 dumpConvertedVarSet(compiler, block->bbVarUse);
1883 printf("\n");
1884 dumpConvertedVarSet(compiler, block->bbVarDef);
1885 printf("\n");
1886 dumpConvertedVarSet(compiler, block->bbLiveIn);
1887 printf("\n");
1888 dumpConvertedVarSet(compiler, block->bbLiveOut);
1889 printf("\n");
1890 }
1891 }
1892#endif // DEBUG
1893
1894#if DOUBLE_ALIGN
1895 // We will determine whether we should double align the frame during
1896 // identifyCandidates(), but we initially assume that we will not.
1897 doDoubleAlign = false;
1898#endif
1899
1900 identifyCandidates();
1901
1902 // Figure out if we're going to use a frame pointer. We need to do this before building
1903 // the ref positions, because those objects will embed the frame register in various register masks
1904 // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will
1905 // remove the frame pointer from the masks.
1906 setFrameType();
1907
1908 DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE));
1909
1910 // second part:
1911 JITDUMP("\nbuildIntervals second part ========\n");
1912 currentLoc = 0;
1913 // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were
1914 // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering.
1915 // Previously, the block sequencing was done for the (formerly separate) Build pass,
1916 // and the curBBNum was left as the last block sequenced. This block was then used to set the
1917 // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the
1918 // normalized entry weight (compiler->fgCalledCount), but that results in a net regression.
1919 if (!blockSequencingDone)
1920 {
1921 setBlockSequence();
1922 }
1923 curBBNum = blockSequence[bbSeqCount - 1]->bbNum;
1924
1925 // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex.
1926 // Assign these RefPositions to the (nonexistent) BB0.
1927 curBBNum = 0;
1928
1929 LclVarDsc* argDsc;
1930 unsigned int lclNum;
1931
1932 RegState* intRegState = &compiler->codeGen->intRegState;
1933 RegState* floatRegState = &compiler->codeGen->floatRegState;
1934 intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
1935 floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
1936
1937 for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
1938 {
1939 lclNum = compiler->lvaTrackedToVarNum[varIndex];
1940 argDsc = &(compiler->lvaTable[lclNum]);
1941
1942 if (!argDsc->lvIsParam)
1943 {
1944 continue;
1945 }
1946
1947 // Only reserve a register if the argument is actually used.
1948 // Is it dead on entry? If compJmpOpUsed is true, then the arguments
1949 // have to be kept alive, so we have to consider it as live on entry.
1950 // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we
1951 // won't have done dataflow on it, but it needs to be marked as live-in so
1952 // it will get saved in the prolog.
1953 if (!compiler->compJmpOpUsed && argDsc->lvRefCnt() == 0 && !compiler->opts.compDbgCode)
1954 {
1955 continue;
1956 }
1957
1958 if (argDsc->lvIsRegArg)
1959 {
1960 updateRegStateForArg(argDsc);
1961 }
1962
1963 if (isCandidateVar(argDsc))
1964 {
1965 Interval* interval = getIntervalForLocalVar(varIndex);
1966 regMaskTP mask = allRegs(TypeGet(argDsc));
1967 if (argDsc->lvIsRegArg)
1968 {
1969 // Set this interval as currently assigned to that register
1970 regNumber inArgReg = argDsc->lvArgReg;
1971 assert(inArgReg < REG_COUNT);
1972 mask = genRegMask(inArgReg);
1973 assignPhysReg(inArgReg, interval);
1974 }
1975 RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask);
1976 }
1977 else if (varTypeIsStruct(argDsc->lvType))
1978 {
1979 for (unsigned fieldVarNum = argDsc->lvFieldLclStart;
1980 fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum)
1981 {
1982 LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]);
1983 if (fieldVarDsc->lvLRACandidate)
1984 {
1985 assert(fieldVarDsc->lvTracked);
1986 Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex);
1987 RefPosition* pos =
1988 newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc)));
1989 }
1990 }
1991 }
1992 else
1993 {
1994 // We can overwrite the register (i.e. codegen saves it on entry)
1995 assert(argDsc->lvRefCnt() == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister ||
1996 !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode));
1997 }
1998 }
1999
2000 // Now set up the reg state for the non-tracked args
2001 // (We do this here because we want to generate the ParamDef RefPositions in tracked
2002 // order, so that loop doesn't hit the non-tracked args)
2003
2004 for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++)
2005 {
2006 argDsc = &(compiler->lvaTable[argNum]);
2007
2008 if (argDsc->lvPromotedStruct())
2009 {
2010 noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
2011
2012 unsigned fieldVarNum = argDsc->lvFieldLclStart;
2013 argDsc = &(compiler->lvaTable[fieldVarNum]);
2014 }
2015 noway_assert(argDsc->lvIsParam);
2016 if (!argDsc->lvTracked && argDsc->lvIsRegArg)
2017 {
2018 updateRegStateForArg(argDsc);
2019 }
2020 }
2021
2022 // If there is a secret stub param, it is also live in
2023 if (compiler->info.compPublishStubParam)
2024 {
2025 intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
2026 }
2027
2028 BasicBlock* predBlock = nullptr;
2029 BasicBlock* prevBlock = nullptr;
2030
2031 // Initialize currentLiveVars to the empty set. We will set it to the current
2032 // live-in at the entry to each block (this will include the incoming args on
2033 // the first block).
2034 VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler));
2035
2036 for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
2037 {
2038 JITDUMP("\nNEW BLOCK " FMT_BB "\n", block->bbNum);
2039
2040 bool predBlockIsAllocated = false;
2041 predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
2042 if (predBlock)
2043 {
2044 JITDUMP("\n\nSetting " FMT_BB " as the predecessor for determining incoming variable registers of " FMT_BB
2045 "\n",
2046 block->bbNum, predBlock->bbNum);
2047 assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
2048 blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
2049 }
2050
2051 if (enregisterLocalVars)
2052 {
2053 VarSetOps::AssignNoCopy(compiler, currentLiveVars,
2054 VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn));
2055
2056 if (block == compiler->fgFirstBB)
2057 {
2058 insertZeroInitRefPositions();
2059 // The first real location is at 1; 0 is for the entry.
2060 currentLoc = 1;
2061 }
2062
2063 // Any lclVars live-in to a block are resolution candidates.
2064 VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars);
2065
2066 // Determine if we need any DummyDefs.
2067 // We need DummyDefs for cases where "predBlock" isn't really a predecessor.
2068 // Note that it's possible to have uses of unitialized variables, in which case even the first
2069 // block may require DummyDefs, which we are not currently adding - this means that these variables
2070 // will always be considered to be in memory on entry (and reloaded when the use is encountered).
2071 // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized
2072 // variables (which may actually be initialized along the dynamically executed paths, but not
2073 // on all static paths), we wind up with excessive liveranges for some of these variables.
2074 VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars));
2075 if (predBlock)
2076 {
2077 // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut
2078 VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut);
2079 }
2080 bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB);
2081
2082 // Create dummy def RefPositions
2083
2084 if (needsDummyDefs)
2085 {
2086 // If we are using locations from a predecessor, we should never require DummyDefs.
2087 assert(!predBlockIsAllocated);
2088
2089 JITDUMP("Creating dummy definitions\n");
2090 VarSetOps::Iter iter(compiler, newLiveIn);
2091 unsigned varIndex = 0;
2092 while (iter.NextElem(&varIndex))
2093 {
2094 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
2095 LclVarDsc* varDsc = compiler->lvaTable + varNum;
2096 // Add a dummyDef for any candidate vars that are in the "newLiveIn" set.
2097 // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs).
2098 if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam))
2099 {
2100 Interval* interval = getIntervalForLocalVar(varIndex);
2101 RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr,
2102 allRegs(interval->registerType));
2103 }
2104 }
2105 JITDUMP("Finished creating dummy definitions\n\n");
2106 }
2107 }
2108
2109 // Add a dummy RefPosition to mark the block boundary.
2110 // Note that we do this AFTER adding the exposed uses above, because the
2111 // register positions for those exposed uses need to be recorded at
2112 // this point.
2113
2114 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
2115 currentLoc += 2;
2116 JITDUMP("\n");
2117
2118 LIR::Range& blockRange = LIR::AsRange(block);
2119 for (GenTree* node : blockRange.NonPhiNodes())
2120 {
2121 // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
2122 // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
2123 // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
2124 // increment the number position by one.
2125 CLANG_FORMAT_COMMENT_ANCHOR;
2126
2127#ifdef DEBUG
2128 node->gtSeqNum = currentLoc;
2129 // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register
2130 // value.
2131 // Although this looks like a no-op it sets the tag.
2132 node->gtRegNum = node->gtRegNum;
2133#endif
2134
2135 buildRefPositionsForNode(node, block, currentLoc);
2136
2137#ifdef DEBUG
2138 if (currentLoc > maxNodeLocation)
2139 {
2140 maxNodeLocation = currentLoc;
2141 }
2142#endif // DEBUG
2143 currentLoc += 2;
2144 }
2145
2146 // Note: the visited set is cleared in LinearScan::doLinearScan()
2147 markBlockVisited(block);
2148 if (!defList.IsEmpty())
2149 {
2150 INDEBUG(dumpDefList());
2151 assert(!"Expected empty defList at end of block");
2152 }
2153
2154 if (enregisterLocalVars)
2155 {
2156 // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the
2157 // next block, or any unvisited successors.
2158 // This will address lclVars that are live on a backedge, as well as those that are kept
2159 // live at a GT_JMP.
2160 //
2161 // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP,
2162 // and jmp call is represented using GT_JMP node which is a leaf node.
2163 // Liveness phase keeps all the arguments of the method live till the end of
2164 // block by adding them to liveout set of the block containing GT_JMP.
2165 //
2166 // The target of a GT_JMP implicitly uses all the current method arguments, however
2167 // there are no actual references to them. This can cause LSRA to assert, because
2168 // the variables are live but it sees no references. In order to correctly model the
2169 // liveness of these arguments, we add dummy exposed uses, in the same manner as for
2170 // backward branches. This will happen automatically via expUseSet.
2171 //
2172 // Note that a block ending with GT_JMP has no successors and hence the variables
2173 // for which dummy use ref positions are added are arguments of the method.
2174
2175 VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut));
2176 VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars);
2177 BasicBlock* nextBlock = getNextBlock();
2178 if (nextBlock != nullptr)
2179 {
2180 VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn);
2181 }
2182 for (BasicBlock* succ : block->GetAllSuccs(compiler))
2183 {
2184 if (VarSetOps::IsEmpty(compiler, expUseSet))
2185 {
2186 break;
2187 }
2188
2189 if (isBlockVisited(succ))
2190 {
2191 continue;
2192 }
2193 VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn);
2194 }
2195
2196 if (!VarSetOps::IsEmpty(compiler, expUseSet))
2197 {
2198 JITDUMP("Exposed uses:");
2199 VarSetOps::Iter iter(compiler, expUseSet);
2200 unsigned varIndex = 0;
2201 while (iter.NextElem(&varIndex))
2202 {
2203 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
2204 LclVarDsc* varDsc = compiler->lvaTable + varNum;
2205 assert(isCandidateVar(varDsc));
2206 Interval* interval = getIntervalForLocalVar(varIndex);
2207 RefPosition* pos =
2208 newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
2209 JITDUMP(" V%02u", varNum);
2210 }
2211 JITDUMP("\n");
2212 }
2213
2214 // Clear the "last use" flag on any vars that are live-out from this block.
2215 {
2216 VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut));
2217 VarSetOps::Iter iter(compiler, bbLiveDefs);
2218 unsigned varIndex = 0;
2219 while (iter.NextElem(&varIndex))
2220 {
2221 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
2222 LclVarDsc* const varDsc = &compiler->lvaTable[varNum];
2223 assert(isCandidateVar(varDsc));
2224 RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition;
2225 // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness
2226 // lies.
2227 if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum))
2228 {
2229 lastRP->lastUse = false;
2230 }
2231 }
2232 }
2233
2234#ifdef DEBUG
2235 checkLastUses(block);
2236
2237 if (VERBOSE)
2238 {
2239 printf("use: ");
2240 dumpConvertedVarSet(compiler, block->bbVarUse);
2241 printf("\ndef: ");
2242 dumpConvertedVarSet(compiler, block->bbVarDef);
2243 printf("\n");
2244 }
2245#endif // DEBUG
2246 }
2247
2248 prevBlock = block;
2249 }
2250
2251 if (enregisterLocalVars)
2252 {
2253 if (compiler->lvaKeepAliveAndReportThis())
2254 {
2255 // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end
2256 unsigned keepAliveVarNum = compiler->info.compThisArg;
2257 assert(compiler->info.compIsStatic == false);
2258 LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum;
2259 if (isCandidateVar(varDsc))
2260 {
2261 JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n");
2262 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
2263 RefPosition* pos =
2264 newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
2265 }
2266 }
2267
2268#ifdef DEBUG
2269 if (getLsraExtendLifeTimes())
2270 {
2271 LclVarDsc* varDsc;
2272 for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
2273 {
2274 if (varDsc->lvLRACandidate)
2275 {
2276 JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum);
2277 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
2278 RefPosition* pos =
2279 newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
2280 }
2281 }
2282 }
2283#endif // DEBUG
2284 }
2285
2286 // If the last block has successors, create a RefTypeBB to record
2287 // what's live
2288
2289 if (prevBlock->NumSucc(compiler) > 0)
2290 {
2291 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
2292 }
2293
2294#ifdef DEBUG
2295 // Make sure we don't have any blocks that were not visited
2296 foreach_block(compiler, block)
2297 {
2298 assert(isBlockVisited(block));
2299 }
2300
2301 if (VERBOSE)
2302 {
2303 lsraDumpIntervals("BEFORE VALIDATING INTERVALS");
2304 dumpRefPositions("BEFORE VALIDATING INTERVALS");
2305 validateIntervals();
2306 }
2307#endif // DEBUG
2308}
2309
2310#ifdef DEBUG
2311//------------------------------------------------------------------------
2312// validateIntervals: A DEBUG-only method that checks that the lclVar RefPositions
2313// do not reflect uses of undefined values
2314//
2315// Notes: If an undefined use is encountered, it merely prints a message.
2316//
2317// TODO-Cleanup: This should probably assert, or at least print the message only
2318// when doing a JITDUMP.
2319//
2320void LinearScan::validateIntervals()
2321{
2322 if (enregisterLocalVars)
2323 {
2324 for (unsigned i = 0; i < compiler->lvaTrackedCount; i++)
2325 {
2326 if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate)
2327 {
2328 continue;
2329 }
2330 Interval* interval = getIntervalForLocalVar(i);
2331
2332 bool defined = false;
2333 printf("-----------------\n");
2334 for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
2335 {
2336 ref->dump();
2337 RefType refType = ref->refType;
2338 if (!defined && RefTypeIsUse(refType))
2339 {
2340 if (compiler->info.compMethodName != nullptr)
2341 {
2342 printf("%s: ", compiler->info.compMethodName);
2343 }
2344 printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation);
2345 }
2346 // Note that there can be multiple last uses if they are on disjoint paths,
2347 // so we can't really check the lastUse flag
2348 if (ref->lastUse)
2349 {
2350 defined = false;
2351 }
2352 if (RefTypeIsDef(refType))
2353 {
2354 defined = true;
2355 }
2356 }
2357 }
2358 }
2359}
2360#endif // DEBUG
2361
2362//------------------------------------------------------------------------
2363// BuildDef: Build a RefTypeDef RefPosition for the given node
2364//
2365// Arguments:
2366// tree - The node that defines a register
2367// dstCandidates - The candidate registers for the definition
2368// multiRegIdx - The index of the definition, defaults to zero.
2369// Only non-zero for multi-reg nodes.
2370//
2371// Return Value:
2372// The newly created RefPosition.
2373//
2374// Notes:
2375// Adds the RefInfo for the definition to the defList.
2376//
2377RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx)
2378{
2379 assert(!tree->isContained());
2380 RegisterType type = getDefType(tree);
2381
2382 if (dstCandidates != RBM_NONE)
2383 {
2384 assert((tree->gtRegNum == REG_NA) || (dstCandidates == genRegMask(tree->GetRegByIndex(multiRegIdx))));
2385 }
2386
2387 if (tree->IsMultiRegNode())
2388 {
2389 type = tree->GetRegTypeByIndex(multiRegIdx);
2390 }
2391
2392 Interval* interval = newInterval(type);
2393 if (tree->gtRegNum != REG_NA)
2394 {
2395 if (!tree->IsMultiRegNode() || (multiRegIdx == 0))
2396 {
2397 assert((dstCandidates == RBM_NONE) || (dstCandidates == genRegMask(tree->gtRegNum)));
2398 dstCandidates = genRegMask(tree->gtRegNum);
2399 }
2400 else
2401 {
2402 assert(isSingleRegister(dstCandidates));
2403 }
2404 }
2405#ifdef _TARGET_X86_
2406 else if (varTypeIsByte(tree))
2407 {
2408 if (dstCandidates == RBM_NONE)
2409 {
2410 dstCandidates = allRegs(TYP_INT);
2411 }
2412 dstCandidates &= ~RBM_NON_BYTE_REGS;
2413 assert(dstCandidates != RBM_NONE);
2414 }
2415#endif // _TARGET_X86_
2416 if (pendingDelayFree)
2417 {
2418 interval->hasInterferingUses = true;
2419 // pendingDelayFree = false;
2420 }
2421 RefPosition* defRefPosition =
2422 newRefPosition(interval, currentLoc + 1, RefTypeDef, tree, dstCandidates, multiRegIdx);
2423 if (tree->IsUnusedValue())
2424 {
2425 defRefPosition->isLocalDefUse = true;
2426 defRefPosition->lastUse = true;
2427 }
2428 else
2429 {
2430 RefInfoListNode* refInfo = listNodePool.GetNode(defRefPosition, tree);
2431 defList.Append(refInfo);
2432 }
2433 if (tgtPrefUse != nullptr)
2434 {
2435 interval->assignRelatedIntervalIfUnassigned(tgtPrefUse->getInterval());
2436 }
2437 return defRefPosition;
2438}
2439
2440//------------------------------------------------------------------------
2441// BuildDef: Build one or more RefTypeDef RefPositions for the given node
2442//
2443// Arguments:
2444// tree - The node that defines a register
2445// dstCount - The number of registers defined by the node
2446// dstCandidates - the candidate registers for the definition
2447//
2448// Notes:
2449// Adds the RefInfo for the definitions to the defList.
2450//
2451void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates)
2452{
2453 bool fixedReg = false;
2454 if ((dstCount > 1) && (dstCandidates != RBM_NONE) && ((int)genCountBits(dstCandidates) == dstCount))
2455 {
2456 fixedReg = true;
2457 }
2458 ReturnTypeDesc* retTypeDesc = nullptr;
2459 if (tree->IsMultiRegCall())
2460 {
2461 retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
2462 }
2463 for (int i = 0; i < dstCount; i++)
2464 {
2465 regMaskTP thisDstCandidates;
2466 if (fixedReg)
2467 {
2468 // In case of multi-reg call node, we have to query the ith position return register.
2469 // For all other cases of multi-reg definitions, the registers must be in sequential order.
2470 if (retTypeDesc != nullptr)
2471 {
2472 thisDstCandidates = genRegMask(tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i));
2473 assert((dstCandidates & thisDstCandidates) != RBM_NONE);
2474 }
2475 else
2476 {
2477 thisDstCandidates = genFindLowestBit(dstCandidates);
2478 }
2479 dstCandidates &= ~thisDstCandidates;
2480 }
2481 else
2482 {
2483 thisDstCandidates = dstCandidates;
2484 }
2485 BuildDef(tree, thisDstCandidates, i);
2486 }
2487}
2488
2489//------------------------------------------------------------------------
2490// BuildDef: Build one or more RefTypeDef RefPositions for the given node,
2491// as well as kills as specified by the given mask.
2492//
2493// Arguments:
2494// tree - The node that defines a register
2495// dstCount - The number of registers defined by the node
2496// dstCandidates - The candidate registers for the definition
2497// killMask - The mask of registers killed by this node
2498//
2499// Notes:
2500// Adds the RefInfo for the definitions to the defList.
2501// The def and kill functionality is folded into a single method so that the
2502// save and restores of upper vector registers can be bracketed around the def.
2503//
2504void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask)
2505{
2506 // Generate Kill RefPositions
2507 assert(killMask == getKillSetForNode(tree));
2508#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2509 VARSET_TP liveLargeVectors(VarSetOps::UninitVal());
2510 bool doLargeVectorRestore = false;
2511#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2512 if (killMask != RBM_NONE)
2513 {
2514 buildKillPositionsForNode(tree, currentLoc + 1, killMask);
2515#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2516 if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE))
2517 {
2518 // Build RefPositions for saving any live large vectors.
2519 // This must be done after the kills, so that we know which large vectors are still live.
2520 VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
2521 buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask));
2522 doLargeVectorRestore = true;
2523 }
2524#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2525 }
2526
2527 // Now, create the Def(s)
2528 BuildDefs(tree, dstCount, dstCandidates);
2529
2530#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2531 // Finally, generate the UpperVectorRestores
2532 if (doLargeVectorRestore)
2533 {
2534 buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
2535 }
2536#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2537}
2538
2539//------------------------------------------------------------------------
2540// BuildUse: Remove the RefInfoListNode for the given multi-reg index of the given node from
2541// the defList, and build a use RefPosition for the associated Interval.
2542//
2543// Arguments:
2544// operand - The node of interest
2545// candidates - The register candidates for the use
2546// multiRegIdx - The index of the multireg def/use
2547//
2548// Return Value:
2549// The newly created use RefPosition
2550//
2551// Notes:
2552// The node must not be contained, and must have been processed by buildRefPositionsForNode().
2553//
2554RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx)
2555{
2556 assert(!operand->isContained());
2557 Interval* interval;
2558 bool regOptional = operand->IsRegOptional();
2559
2560 if (isCandidateLocalRef(operand))
2561 {
2562 interval = getIntervalForLocalVarNode(operand->AsLclVarCommon());
2563
2564 // We have only approximate last-use information at this point. This is because the
2565 // execution order doesn't actually reflect the true order in which the localVars
2566 // are referenced - but the order of the RefPositions will, so we recompute it after
2567 // RefPositions are built.
2568 // Use the old value for setting currentLiveVars - note that we do this with the
2569 // not-quite-correct setting of lastUse. However, this is OK because
2570 // 1) this is only for preferencing, which doesn't require strict correctness, and
2571 // 2) the cases where these out-of-order uses occur should not overlap a kill.
2572 // TODO-Throughput: clean this up once we have the execution order correct. At that point
2573 // we can update currentLiveVars at the same place that we create the RefPosition.
2574 if ((operand->gtFlags & GTF_VAR_DEATH) != 0)
2575 {
2576 unsigned varIndex = interval->getVarIndex(compiler);
2577 VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
2578 }
2579 }
2580 else
2581 {
2582 RefInfoListNode* refInfo = defList.removeListNode(operand, multiRegIdx);
2583 RefPosition* defRefPos = refInfo->ref;
2584 assert(defRefPos->multiRegIdx == multiRegIdx);
2585 interval = defRefPos->getInterval();
2586 listNodePool.ReturnNode(refInfo);
2587 operand = nullptr;
2588 }
2589 RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx);
2590 useRefPos->setAllocateIfProfitable(regOptional);
2591 return useRefPos;
2592}
2593
2594//------------------------------------------------------------------------
2595// BuildIndirUses: Build Use RefPositions for an indirection that might be contained
2596//
2597// Arguments:
2598// indirTree - The indirection node of interest
2599//
2600// Return Value:
2601// The number of source registers used by the *parent* of this node.
2602//
2603// Notes:
2604// This method may only be used if the candidates are the same for all sources.
2605//
2606int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates)
2607{
2608 GenTree* const addr = indirTree->gtOp1;
2609 if (!addr->isContained())
2610 {
2611 BuildUse(addr, candidates);
2612 return 1;
2613 }
2614 if (!addr->OperIs(GT_LEA))
2615 {
2616 return 0;
2617 }
2618
2619 GenTreeAddrMode* const addrMode = addr->AsAddrMode();
2620
2621 unsigned srcCount = 0;
2622 if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
2623 {
2624 BuildUse(addrMode->Base(), candidates);
2625 srcCount++;
2626 }
2627 if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
2628 {
2629 BuildUse(addrMode->Index(), candidates);
2630 srcCount++;
2631 }
2632 return srcCount;
2633}
2634
2635//------------------------------------------------------------------------
2636// BuildOperandUses: Build Use RefPositions for an operand that might be contained.
2637//
2638// Arguments:
2639// node - The node of interest
2640//
2641// Return Value:
2642// The number of source registers used by the *parent* of this node.
2643//
2644int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates)
2645{
2646 if (!node->isContained())
2647 {
2648 BuildUse(node, candidates);
2649 return 1;
2650 }
2651
2652#if !defined(_TARGET_64BIT_)
2653 if (node->OperIs(GT_LONG))
2654 {
2655 return BuildBinaryUses(node->AsOp(), candidates);
2656 }
2657#endif // !defined(_TARGET_64BIT_)
2658 if (node->OperIsIndir())
2659 {
2660 return BuildIndirUses(node->AsIndir(), candidates);
2661 }
2662 if (node->OperIsHWIntrinsic())
2663 {
2664 BuildUse(node->gtGetOp1(), candidates);
2665 return 1;
2666 }
2667
2668 return 0;
2669}
2670
2671//------------------------------------------------------------------------
2672// setDelayFree: Mark a RefPosition as delayRegFree, and set pendingDelayFree
2673//
2674// Arguments:
2675// use - The use RefPosition to mark
2676//
2677void LinearScan::setDelayFree(RefPosition* use)
2678{
2679 use->delayRegFree = true;
2680 pendingDelayFree = true;
2681}
2682
2683//------------------------------------------------------------------------
2684// BuildDelayFreeUses: Build Use RefPositions for an operand that might be contained,
2685// and which need to be marked delayRegFree
2686//
2687// Arguments:
2688// node - The node of interest
2689//
2690// Return Value:
2691// The number of source registers used by the *parent* of this node.
2692//
2693int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates)
2694{
2695 RefPosition* use;
2696 if (!node->isContained())
2697 {
2698 use = BuildUse(node, candidates);
2699 setDelayFree(use);
2700 return 1;
2701 }
2702 if (node->OperIsHWIntrinsic())
2703 {
2704 use = BuildUse(node->gtGetOp1(), candidates);
2705 setDelayFree(use);
2706 return 1;
2707 }
2708 if (!node->OperIsIndir())
2709 {
2710 return 0;
2711 }
2712 GenTreeIndir* indirTree = node->AsIndir();
2713 GenTree* addr = indirTree->gtOp1;
2714 if (!addr->isContained())
2715 {
2716 use = BuildUse(addr, candidates);
2717 setDelayFree(use);
2718 return 1;
2719 }
2720 if (!addr->OperIs(GT_LEA))
2721 {
2722 return 0;
2723 }
2724
2725 GenTreeAddrMode* const addrMode = addr->AsAddrMode();
2726
2727 unsigned srcCount = 0;
2728 if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
2729 {
2730 use = BuildUse(addrMode->Base(), candidates);
2731 setDelayFree(use);
2732 srcCount++;
2733 }
2734 if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
2735 {
2736 use = BuildUse(addrMode->Index(), candidates);
2737 setDelayFree(use);
2738 srcCount++;
2739 }
2740 return srcCount;
2741}
2742
2743//------------------------------------------------------------------------
2744// BuildBinaryUses: Get the RefInfoListNodes for the operands of the
2745// given node, and build uses for them.
2746//
2747// Arguments:
2748// node - a GenTreeOp
2749//
2750// Return Value:
2751// The number of actual register operands.
2752//
2753// Notes:
2754// The operands must already have been processed by buildRefPositionsForNode, and their
2755// RefInfoListNodes placed in the defList.
2756//
2757int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates)
2758{
2759#ifdef _TARGET_XARCH_
2760 RefPosition* tgtPrefUse = nullptr;
2761 if (node->OperIsBinary() && isRMWRegOper(node))
2762 {
2763 return BuildRMWUses(node, candidates);
2764 }
2765#endif // _TARGET_XARCH_
2766 int srcCount = 0;
2767 GenTree* op1 = node->gtOp1;
2768 GenTree* op2 = node->gtGetOp2IfPresent();
2769 if (node->IsReverseOp() && (op2 != nullptr))
2770 {
2771 srcCount += BuildOperandUses(op2, candidates);
2772 op2 = nullptr;
2773 }
2774 if (op1 != nullptr)
2775 {
2776 srcCount += BuildOperandUses(op1, candidates);
2777 }
2778 if (op2 != nullptr)
2779 {
2780 srcCount += BuildOperandUses(op2, candidates);
2781 }
2782 return srcCount;
2783}
2784
2785//------------------------------------------------------------------------
2786// BuildStoreLoc: Set register requirements for a store of a lclVar
2787//
2788// Arguments:
2789// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
2790//
2791// Notes:
2792// This involves:
2793// - Setting the appropriate candidates for a store of a multi-reg call return value.
2794// - Handling of contained immediates.
2795// - Requesting an internal register for SIMD12 stores.
2796//
2797int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
2798{
2799 GenTree* op1 = storeLoc->gtGetOp1();
2800 int srcCount;
2801 RefPosition* singleUseRef = nullptr;
2802 LclVarDsc* varDsc = &compiler->lvaTable[storeLoc->gtLclNum];
2803
2804// First, define internal registers.
2805#ifdef FEATURE_SIMD
2806 RefPosition* internalFloatDef = nullptr;
2807 if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12))
2808 {
2809 // Need an additional register to extract upper 4 bytes of Vector3.
2810 internalFloatDef = buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
2811 }
2812#endif // FEATURE_SIMD
2813
2814 // Second, use source registers.
2815
2816 if (op1->IsMultiRegCall())
2817 {
2818 // This is the case of var = call where call is returning
2819 // a value in multiple return registers.
2820 // Must be a store lclvar.
2821 assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
2822
2823 // srcCount = number of registers in which the value is returned by call
2824 GenTreeCall* call = op1->AsCall();
2825 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
2826 unsigned regCount = retTypeDesc->GetReturnRegCount();
2827 srcCount = retTypeDesc->GetReturnRegCount();
2828
2829 for (int i = 0; i < srcCount; ++i)
2830 {
2831 BuildUse(op1, RBM_NONE, i);
2832 }
2833 }
2834#ifndef _TARGET_64BIT_
2835 else if (varTypeIsLong(op1))
2836 {
2837 if (op1->OperIs(GT_MUL_LONG))
2838 {
2839 srcCount = 2;
2840 BuildUse(op1, allRegs(TYP_INT), 0);
2841 BuildUse(op1, allRegs(TYP_INT), 1);
2842 }
2843 else
2844 {
2845 assert(op1->OperIs(GT_LONG));
2846 assert(op1->isContained() && !op1->gtGetOp1()->isContained() && !op1->gtGetOp2()->isContained());
2847 srcCount = BuildBinaryUses(op1->AsOp());
2848 assert(srcCount == 2);
2849 }
2850 }
2851#endif // !_TARGET_64BIT_
2852 else if (op1->isContained())
2853 {
2854 srcCount = 0;
2855 }
2856 else
2857 {
2858 srcCount = 1;
2859 regMaskTP srcCandidates = RBM_NONE;
2860#ifdef _TARGET_X86_
2861 if (varTypeIsByte(storeLoc))
2862 {
2863 srcCandidates = allByteRegs();
2864 }
2865#endif // _TARGET_X86_
2866 singleUseRef = BuildUse(op1, srcCandidates);
2867 }
2868
2869// Third, use internal registers.
2870#ifdef FEATURE_SIMD
2871 buildInternalRegisterUses();
2872#endif // FEATURE_SIMD
2873
2874 // Fourth, define destination registers.
2875
2876 // Add the lclVar to currentLiveVars (if it will remain live)
2877 if (isCandidateVar(varDsc))
2878 {
2879 assert(varDsc->lvTracked);
2880 unsigned varIndex = varDsc->lvVarIndex;
2881 Interval* varDefInterval = getIntervalForLocalVar(varIndex);
2882 if ((storeLoc->gtFlags & GTF_VAR_DEATH) == 0)
2883 {
2884 VarSetOps::AddElemD(compiler, currentLiveVars, varIndex);
2885 }
2886 if (singleUseRef != nullptr)
2887 {
2888 Interval* srcInterval = singleUseRef->getInterval();
2889 if (srcInterval->relatedInterval == nullptr)
2890 {
2891 // Preference the source to the dest, unless this is a non-last-use localVar.
2892 // Note that the last-use info is not correct, but it is a better approximation than preferencing
2893 // the source to the dest, if the source's lifetime extends beyond the dest.
2894 if (!srcInterval->isLocalVar || (singleUseRef->treeNode->gtFlags & GTF_VAR_DEATH) != 0)
2895 {
2896 srcInterval->assignRelatedInterval(varDefInterval);
2897 }
2898 }
2899 else if (!srcInterval->isLocalVar)
2900 {
2901 // Preference the source to dest, if src is not a local var.
2902 srcInterval->assignRelatedInterval(varDefInterval);
2903 }
2904 }
2905 newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet()));
2906 }
2907 else
2908 {
2909 if (storeLoc->gtOp1->OperIs(GT_BITCAST))
2910 {
2911 storeLoc->gtType = storeLoc->gtOp1->gtType = storeLoc->gtOp1->AsUnOp()->gtOp1->TypeGet();
2912 RegisterType registerType = regType(storeLoc->TypeGet());
2913 noway_assert(singleUseRef != nullptr);
2914
2915 Interval* srcInterval = singleUseRef->getInterval();
2916 srcInterval->registerType = registerType;
2917
2918 RefPosition* srcDefPosition = srcInterval->firstRefPosition;
2919 assert(srcDefPosition != nullptr);
2920 assert(srcDefPosition->refType == RefTypeDef);
2921 assert(srcDefPosition->treeNode == storeLoc->gtOp1);
2922
2923 srcDefPosition->registerAssignment = allRegs(registerType);
2924 singleUseRef->registerAssignment = allRegs(registerType);
2925 }
2926 }
2927
2928 return srcCount;
2929}
2930
2931//------------------------------------------------------------------------
2932// BuildSimple: Builds use RefPositions for trees requiring no special handling
2933//
2934// Arguments:
2935// tree - The node of interest
2936//
2937// Return Value:
2938// The number of use RefPositions created
2939//
2940int LinearScan::BuildSimple(GenTree* tree)
2941{
2942 unsigned kind = tree->OperKind();
2943 int srcCount = 0;
2944 if ((kind & (GTK_CONST | GTK_LEAF)) == 0)
2945 {
2946 assert((kind & GTK_SMPOP) != 0);
2947 srcCount = BuildBinaryUses(tree->AsOp());
2948 }
2949 if (tree->IsValue())
2950 {
2951 BuildDef(tree);
2952 }
2953 return srcCount;
2954}
2955
2956//------------------------------------------------------------------------
2957// BuildReturn: Set the NodeInfo for a GT_RETURN.
2958//
2959// Arguments:
2960// tree - The node of interest
2961//
2962// Return Value:
2963// The number of sources consumed by this node.
2964//
2965int LinearScan::BuildReturn(GenTree* tree)
2966{
2967 GenTree* op1 = tree->gtGetOp1();
2968
2969#if !defined(_TARGET_64BIT_)
2970 if (tree->TypeGet() == TYP_LONG)
2971 {
2972 assert((op1->OperGet() == GT_LONG) && op1->isContained());
2973 GenTree* loVal = op1->gtGetOp1();
2974 GenTree* hiVal = op1->gtGetOp2();
2975 BuildUse(loVal, RBM_LNGRET_LO);
2976 BuildUse(hiVal, RBM_LNGRET_HI);
2977 return 2;
2978 }
2979 else
2980#endif // !defined(_TARGET_64BIT_)
2981 if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
2982 {
2983 regMaskTP useCandidates = RBM_NONE;
2984
2985#if FEATURE_MULTIREG_RET
2986 if (varTypeIsStruct(tree))
2987 {
2988 // op1 has to be either an lclvar or a multi-reg returning call
2989 if (op1->OperGet() == GT_LCL_VAR)
2990 {
2991 BuildUse(op1, useCandidates);
2992 }
2993 else
2994 {
2995 noway_assert(op1->IsMultiRegCall());
2996
2997 ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
2998 int srcCount = retTypeDesc->GetReturnRegCount();
2999 useCandidates = retTypeDesc->GetABIReturnRegs();
3000 for (int i = 0; i < srcCount; i++)
3001 {
3002 BuildUse(op1, useCandidates, i);
3003 }
3004 return srcCount;
3005 }
3006 }
3007 else
3008#endif // FEATURE_MULTIREG_RET
3009 {
3010 // Non-struct type return - determine useCandidates
3011 switch (tree->TypeGet())
3012 {
3013 case TYP_VOID:
3014 useCandidates = RBM_NONE;
3015 break;
3016 case TYP_FLOAT:
3017 useCandidates = RBM_FLOATRET;
3018 break;
3019 case TYP_DOUBLE:
3020 // We ONLY want the valid double register in the RBM_DOUBLERET mask.
3021 useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE);
3022 break;
3023 case TYP_LONG:
3024 useCandidates = RBM_LNGRET;
3025 break;
3026 default:
3027 useCandidates = RBM_INTRET;
3028 break;
3029 }
3030 BuildUse(op1, useCandidates);
3031 return 1;
3032 }
3033 }
3034
3035 // No kills or defs.
3036 return 0;
3037}
3038
3039//------------------------------------------------------------------------
3040// supportsSpecialPutArg: Determine if we can support specialPutArgs
3041//
3042// Return Value:
3043// True iff specialPutArg intervals can be supported.
3044//
3045// Notes:
3046// See below.
3047//
3048
3049bool LinearScan::supportsSpecialPutArg()
3050{
3051#if defined(DEBUG) && defined(_TARGET_X86_)
3052 // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
3053 // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the
3054 // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in
3055 // situations like the following:
3056 //
3057 // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d>
3058 //
3059 // /--* t1026 ref
3060 // t1352 = * putarg_reg ref REG NA
3061 //
3062 // t342 = lclVar int V14 loc6 u:4 REG NA $50c
3063 //
3064 // t343 = const int 1 REG NA $41
3065 //
3066 // /--* t342 int
3067 // +--* t343 int
3068 // t344 = * + int REG NA $495
3069 //
3070 // t345 = lclVar int V04 arg4 u:2 REG NA $100
3071 //
3072 // /--* t344 int
3073 // +--* t345 int
3074 // t346 = * % int REG NA $496
3075 //
3076 // /--* t346 int
3077 // t1353 = * putarg_reg int REG NA
3078 //
3079 // t1354 = lclVar ref V52 tmp35 (last use) REG NA
3080 //
3081 // /--* t1354 ref
3082 // t1355 = * lea(b+0) byref REG NA
3083 //
3084 // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the
3085 // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register
3086 // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to
3087 // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be
3088 // available due to the special put arg.
3089 return getStressLimitRegs() != LSRA_LIMIT_CALLER;
3090#else
3091 return true;
3092#endif
3093}
3094
3095//------------------------------------------------------------------------
3096// BuildPutArgReg: Set the NodeInfo for a PUTARG_REG.
3097//
3098// Arguments:
3099// node - The PUTARG_REG node.
3100// argReg - The register in which to pass the argument.
3101// info - The info for the node's using call.
3102// isVarArgs - True if the call uses a varargs calling convention.
3103// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
3104//
3105// Return Value:
3106// None.
3107//
3108int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
3109{
3110 assert(node != nullptr);
3111 assert(node->OperIsPutArgReg());
3112 regNumber argReg = node->gtRegNum;
3113 assert(argReg != REG_NA);
3114 bool isSpecialPutArg = false;
3115 int srcCount = 1;
3116 GenTree* op1 = node->gtGetOp1();
3117
3118 // First, handle the GT_OBJ case, which loads into the arg register
3119 // (so we don't set the use to prefer that register for the source address).
3120 if (op1->OperIs(GT_OBJ))
3121 {
3122 GenTreeObj* obj = op1->AsObj();
3123 GenTree* addr = obj->Addr();
3124 unsigned size = obj->gtBlkSize;
3125 assert(size <= TARGET_POINTER_SIZE);
3126 if (addr->OperIsLocalAddr())
3127 {
3128 // We don't need a source register.
3129 assert(addr->isContained());
3130 srcCount = 0;
3131 }
3132 else if (!isPow2(size))
3133 {
3134 // We'll need an internal register to do the odd-size load.
3135 // This can only happen with integer registers.
3136 assert(genIsValidIntReg(argReg));
3137 buildInternalIntRegisterDefForNode(node);
3138 BuildUse(addr);
3139 buildInternalRegisterUses();
3140 }
3141 return srcCount;
3142 }
3143
3144 // To avoid redundant moves, have the argument operand computed in the
3145 // register in which the argument is passed to the call.
3146 regMaskTP argMask = genRegMask(argReg);
3147 RefPosition* use = BuildUse(op1, argMask);
3148
3149 if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0))
3150 {
3151 // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
3152 // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
3153 // (which is actually what we hope will happen).
3154 JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
3155
3156 // Preference the destination to the interval of the first register defined by the first operand.
3157 assert(use->getInterval()->isLocalVar);
3158 isSpecialPutArg = true;
3159 tgtPrefUse = use;
3160 }
3161
3162#ifdef _TARGET_ARM_
3163 // If type of node is `long` then it is actually `double`.
3164 // The actual `long` types must have been transformed as a field list with two fields.
3165 if (node->TypeGet() == TYP_LONG)
3166 {
3167 srcCount++;
3168 regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg));
3169 assert(genRegArgNext(argReg) == REG_NEXT(argReg));
3170 use = BuildUse(op1, argMaskHi, 1);
3171 BuildDef(node, argMask, 0);
3172 BuildDef(node, argMaskHi, 1);
3173 }
3174 else
3175#endif // _TARGET_ARM_
3176 {
3177 RefPosition* def = BuildDef(node, argMask);
3178 if (isSpecialPutArg)
3179 {
3180 def->getInterval()->isSpecialPutArg = true;
3181 }
3182 }
3183 return srcCount;
3184}
3185
3186//------------------------------------------------------------------------
3187// HandleFloatVarArgs: Handle additional register requirements for a varargs call
3188//
3189// Arguments:
3190// call - The call node of interest
3191// argNode - The current argument
3192//
3193// Return Value:
3194// None.
3195//
3196// Notes:
3197// In the case of a varargs call, the ABI dictates that if we have floating point args,
3198// we must pass the enregistered arguments in both the integer and floating point registers.
3199// Since the integer register is not associated with the arg node, we will reserve it as
3200// an internal register on the call so that it is not used during the evaluation of the call node
3201// (e.g. for the target).
3202void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
3203{
3204#if FEATURE_VARARG
3205 if (call->IsVarargs() && varTypeIsFloating(argNode))
3206 {
3207 *callHasFloatRegArgs = true;
3208
3209 // We'll have to return the internal def and then later create a use for it.
3210 regNumber argReg = argNode->gtRegNum;
3211 regNumber targetReg = compiler->getCallArgIntRegister(argReg);
3212
3213 buildInternalIntRegisterDefForNode(call, genRegMask(targetReg));
3214 }
3215#endif // FEATURE_VARARG
3216}
3217
3218//------------------------------------------------------------------------
3219// BuildGCWriteBarrier: Handle additional register requirements for a GC write barrier
3220//
3221// Arguments:
3222// tree - The STORE_IND for which a write barrier is required
3223//
3224int LinearScan::BuildGCWriteBarrier(GenTree* tree)
3225{
3226 GenTree* dst = tree;
3227 GenTree* addr = tree->gtGetOp1();
3228 GenTree* src = tree->gtGetOp2();
3229
3230 // In the case where we are doing a helper assignment, even if the dst
3231 // is an indir through an lea, we need to actually instantiate the
3232 // lea in a register
3233 assert(!addr->isContained() && !src->isContained());
3234 int srcCount = 2;
3235 regMaskTP addrCandidates = RBM_ARG_0;
3236 regMaskTP srcCandidates = RBM_ARG_1;
3237
3238#if defined(_TARGET_ARM64_)
3239
3240 // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST)
3241 // the 'src' goes into x15 (REG_WRITE_BARRIER_SRC)
3242 //
3243 addrCandidates = RBM_WRITE_BARRIER_DST;
3244 srcCandidates = RBM_WRITE_BARRIER_SRC;
3245
3246#elif defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3247
3248 bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src);
3249 if (useOptimizedWriteBarrierHelper)
3250 {
3251 // Special write barrier:
3252 // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
3253 // op2 (src) goes into any int register.
3254 addrCandidates = RBM_WRITE_BARRIER;
3255 srcCandidates = RBM_WRITE_BARRIER_SRC;
3256 }
3257
3258#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3259
3260 BuildUse(addr, addrCandidates);
3261 BuildUse(src, srcCandidates);
3262
3263 regMaskTP killMask = getKillSetForStoreInd(tree->AsStoreInd());
3264 buildKillPositionsForNode(tree, currentLoc + 1, killMask);
3265 return 2;
3266}
3267
3268//------------------------------------------------------------------------
3269// BuildCmp: Set the register requirements for a compare.
3270//
3271// Arguments:
3272// tree - The node of interest
3273//
3274// Return Value:
3275// None.
3276//
3277int LinearScan::BuildCmp(GenTree* tree)
3278{
3279 assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP));
3280 regMaskTP dstCandidates = RBM_NONE;
3281 regMaskTP op1Candidates = RBM_NONE;
3282 regMaskTP op2Candidates = RBM_NONE;
3283 GenTree* op1 = tree->gtGetOp1();
3284 GenTree* op2 = tree->gtGetOp2();
3285
3286#ifdef _TARGET_X86_
3287 // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
3288 // to store the result into the low byte of a register, which requires the dst be a byteable register.
3289 if (tree->TypeGet() != TYP_VOID)
3290 {
3291 dstCandidates = allByteRegs();
3292 }
3293 bool needByteRegs = false;
3294 if (varTypeIsByte(tree))
3295 {
3296 if (!varTypeIsFloating(op1))
3297 {
3298 needByteRegs = true;
3299 }
3300 }
3301 // Example1: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
3302 // ubyte as the result of comparison and if the result needs to be materialized into a reg
3303 // simply zero extend it to TYP_INT size. Here is an example of generated code:
3304 // cmp dl, byte ptr[addr mode]
3305 // movzx edx, dl
3306 else if (varTypeIsByte(op1) && varTypeIsByte(op2))
3307 {
3308 needByteRegs = true;
3309 }
3310 // Example2: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
3311 // ubyte as the result of the comparison and if the result needs to be materialized into a reg
3312 // simply zero extend it to TYP_INT size.
3313 else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
3314 {
3315 needByteRegs = true;
3316 }
3317 // Example3: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
3318 // ubyte as the result of the comparison and if the result needs to be materialized into a reg
3319 // simply zero extend it to TYP_INT size.
3320 else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
3321 {
3322 needByteRegs = true;
3323 }
3324 if (needByteRegs)
3325 {
3326 if (!op1->isContained())
3327 {
3328 op1Candidates = allByteRegs();
3329 }
3330 if (!op2->isContained())
3331 {
3332 op2Candidates = allByteRegs();
3333 }
3334 }
3335#endif // _TARGET_X86_
3336
3337 int srcCount = BuildOperandUses(op1, op1Candidates);
3338 srcCount += BuildOperandUses(op2, op2Candidates);
3339 if (tree->TypeGet() != TYP_VOID)
3340 {
3341 BuildDef(tree, dstCandidates);
3342 }
3343 return srcCount;
3344}
3345