1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Interval and RefPosition Building XX |
9 | XX XX |
10 | XX This contains the logic for constructing Intervals and RefPositions that XX |
11 | XX is common across architectures. See lsra{arch}.cpp for the architecture- XX |
12 | XX specific methods for building. XX |
13 | XX XX |
14 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
15 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
16 | */ |
17 | |
18 | #include "jitpch.h" |
19 | #ifdef _MSC_VER |
20 | #pragma hdrstop |
21 | #endif |
22 | |
23 | #include "lsra.h" |
24 | |
25 | //------------------------------------------------------------------------ |
26 | // RefInfoList |
27 | //------------------------------------------------------------------------ |
28 | // removeListNode - retrieve the RefInfoListNode for the given GenTree node |
29 | // |
30 | // Notes: |
31 | // The BuildNode methods use this helper to retrieve the RefPositions for child nodes |
32 | // from the useList being constructed. Note that, if the user knows the order of the operands, |
33 | // it is expected that they should just retrieve them directly. |
34 | |
35 | RefInfoListNode* RefInfoList::removeListNode(GenTree* node) |
36 | { |
37 | RefInfoListNode* prevListNode = nullptr; |
38 | for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next()) |
39 | { |
40 | if (listNode->treeNode == node) |
41 | { |
42 | assert(listNode->ref->getMultiRegIdx() == 0); |
43 | return removeListNode(listNode, prevListNode); |
44 | } |
45 | prevListNode = listNode; |
46 | } |
47 | assert(!"removeListNode didn't find the node" ); |
48 | unreached(); |
49 | } |
50 | |
51 | //------------------------------------------------------------------------ |
52 | // removeListNode - retrieve the RefInfoListNode for one reg of the given multireg GenTree node |
53 | // |
54 | // Notes: |
55 | // The BuildNode methods use this helper to retrieve the RefPositions for child nodes |
56 | // from the useList being constructed. Note that, if the user knows the order of the operands, |
57 | // it is expected that they should just retrieve them directly. |
58 | |
59 | RefInfoListNode* RefInfoList::removeListNode(GenTree* node, unsigned multiRegIdx) |
60 | { |
61 | RefInfoListNode* prevListNode = nullptr; |
62 | for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next()) |
63 | { |
64 | if ((listNode->treeNode == node) && (listNode->ref->getMultiRegIdx() == multiRegIdx)) |
65 | { |
66 | return removeListNode(listNode, prevListNode); |
67 | } |
68 | prevListNode = listNode; |
69 | } |
70 | assert(!"removeListNode didn't find the node" ); |
71 | unreached(); |
72 | } |
73 | |
74 | //------------------------------------------------------------------------ |
75 | // RefInfoListNodePool::RefInfoListNodePool: |
76 | // Creates a pool of `RefInfoListNode` values. |
77 | // |
78 | // Arguments: |
79 | // compiler - The compiler context. |
80 | // preallocate - The number of nodes to preallocate. |
81 | // |
82 | RefInfoListNodePool::RefInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler) |
83 | { |
84 | if (preallocate > 0) |
85 | { |
86 | RefInfoListNode* preallocatedNodes = compiler->getAllocator(CMK_LSRA).allocate<RefInfoListNode>(preallocate); |
87 | |
88 | RefInfoListNode* head = preallocatedNodes; |
89 | head->m_next = nullptr; |
90 | |
91 | for (unsigned i = 1; i < preallocate; i++) |
92 | { |
93 | RefInfoListNode* node = &preallocatedNodes[i]; |
94 | node->m_next = head; |
95 | head = node; |
96 | } |
97 | |
98 | m_freeList = head; |
99 | } |
100 | } |
101 | |
102 | //------------------------------------------------------------------------ |
103 | // RefInfoListNodePool::GetNode: Fetches an unused node from the |
104 | // pool. |
105 | // |
106 | // Arguments: |
107 | // l - - The `LsraLocation` for the `RefInfo` value. |
108 | // i - The interval for the `RefInfo` value. |
109 | // t - The IR node for the `RefInfo` value |
110 | // regIdx - The register index for the `RefInfo` value. |
111 | // |
112 | // Returns: |
113 | // A pooled or newly-allocated `RefInfoListNode`, depending on the |
114 | // contents of the pool. |
115 | RefInfoListNode* RefInfoListNodePool::GetNode(RefPosition* r, GenTree* t, unsigned regIdx) |
116 | { |
117 | RefInfoListNode* head = m_freeList; |
118 | if (head == nullptr) |
119 | { |
120 | head = m_compiler->getAllocator(CMK_LSRA).allocate<RefInfoListNode>(1); |
121 | } |
122 | else |
123 | { |
124 | m_freeList = head->m_next; |
125 | } |
126 | |
127 | head->ref = r; |
128 | head->treeNode = t; |
129 | head->m_next = nullptr; |
130 | |
131 | return head; |
132 | } |
133 | |
134 | //------------------------------------------------------------------------ |
135 | // RefInfoListNodePool::ReturnNode: Returns a list of nodes to the node |
136 | // pool and clears the given list. |
137 | // |
138 | // Arguments: |
139 | // list - The list to return. |
140 | // |
141 | void RefInfoListNodePool::ReturnNode(RefInfoListNode* listNode) |
142 | { |
143 | listNode->m_next = m_freeList; |
144 | m_freeList = listNode; |
145 | } |
146 | |
147 | //------------------------------------------------------------------------ |
148 | // newInterval: Create a new Interval of the given RegisterType. |
149 | // |
150 | // Arguments: |
151 | // theRegisterType - The type of Interval to create. |
152 | // |
153 | // TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately |
154 | // set such fields as isStructField |
155 | // |
156 | Interval* LinearScan::newInterval(RegisterType theRegisterType) |
157 | { |
158 | intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); |
159 | Interval* newInt = &intervals.back(); |
160 | |
161 | #ifdef DEBUG |
162 | newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1); |
163 | #endif // DEBUG |
164 | |
165 | DBEXEC(VERBOSE, newInt->dump()); |
166 | return newInt; |
167 | } |
168 | |
169 | //------------------------------------------------------------------------ |
170 | // newRefPositionRaw: Create a new RefPosition |
171 | // |
172 | // Arguments: |
173 | // nodeLocation - The location of the reference. |
174 | // treeNode - The GenTree of the reference. |
175 | // refType - The type of reference |
176 | // |
177 | // Notes: |
178 | // This is used to create RefPositions for both RegRecords and Intervals, |
179 | // so it does only the common initialization. |
180 | // |
181 | RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) |
182 | { |
183 | refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType); |
184 | RefPosition* newRP = &refPositions.back(); |
185 | #ifdef DEBUG |
186 | newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1); |
187 | #endif // DEBUG |
188 | return newRP; |
189 | } |
190 | |
191 | //------------------------------------------------------------------------ |
192 | // resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use |
193 | // register requirements on a single-def, single-use interval. |
194 | // |
195 | // Arguments: |
196 | // defRefPosition - The interval definition |
197 | // useRefPosition - The (sole) interval use |
198 | // |
199 | // Return Value: |
200 | // None. |
201 | // |
202 | // Assumptions: |
203 | // The two RefPositions are for the same interval, which is a tree-temp. |
204 | // |
205 | // Notes: |
206 | // We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg. |
207 | // In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that, |
208 | // even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs |
209 | // to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4 |
210 | // below. |
211 | // Here are the cases we consider (in this order): |
212 | // 1. If The defRefPosition specifies a single register, and there are no conflicting |
213 | // FixedReg uses of it between the def and use, we use that register, and the code generator |
214 | // will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def. |
215 | // 2. If the useRefPosition specifies a single register, and it is not in use, and there are no |
216 | // conflicting FixedReg uses of it between the def and use, we use that register, and the code generator |
217 | // will insert the copy. |
218 | // 3. If the defRefPosition specifies a single register (but there are conflicts, as determined |
219 | // in 1.), and there are no conflicts with the useRefPosition register (if it's a single register), |
220 | /// we set the register requirements on the defRefPosition to the use registers, and the |
221 | // code generator will insert a copy on the def. We can't rely on the code generator to put a copy |
222 | // on the use if it has multiple possible candidates, as it won't know which one has been allocated. |
223 | // 4. If the useRefPosition specifies a single register, and there are no conflicts with the register |
224 | // on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set |
225 | // the useRefPosition to the def registers, for similar reasons to case #3. |
226 | // 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts, |
227 | // We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are |
228 | // single registers, codegen can insert the copy. |
229 | // 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but |
230 | // has a conflict), we must insert a copy. The copy will be inserted before the use if the |
231 | // use is not fixed (in the fixed case, the code generator will insert the use). |
232 | // |
233 | // TODO-CQ: We get bad register allocation in case #3 in the situation where no register is |
234 | // available for the lifetime. We end up allocating a register that must be spilled, and it probably |
235 | // won't be the register that is actually defined by the target instruction. So, we have to copy it |
236 | // and THEN spill it. In this case, we should be using the def requirement. But we need to change |
237 | // the interface to this method a bit to make that work (e.g. returning a candidate set to use, but |
238 | // leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway |
239 | // we can use the fixed-reg on the def. |
240 | // |
241 | |
242 | void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition) |
243 | { |
244 | assert(!interval->isLocalVar); |
245 | |
246 | RefPosition* useRefPosition = defRefPosition->nextRefPosition; |
247 | regMaskTP defRegAssignment = defRefPosition->registerAssignment; |
248 | regMaskTP useRegAssignment = useRefPosition->registerAssignment; |
249 | RegRecord* defRegRecord = nullptr; |
250 | RegRecord* useRegRecord = nullptr; |
251 | regNumber defReg = REG_NA; |
252 | regNumber useReg = REG_NA; |
253 | bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); |
254 | bool useRegConflict = defRegConflict; |
255 | |
256 | // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment |
257 | // on it, or we will fail to ensure that the fixedReg is busy at the time the target |
258 | // (of the node that uses this interval) is allocated. |
259 | bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree; |
260 | |
261 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT)); |
262 | if (!canChangeUseAssignment) |
263 | { |
264 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE)); |
265 | } |
266 | if (defRefPosition->isFixedRegRef && !defRegConflict) |
267 | { |
268 | defReg = defRefPosition->assignedReg(); |
269 | defRegRecord = getRegisterRecord(defReg); |
270 | if (canChangeUseAssignment) |
271 | { |
272 | RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; |
273 | assert(currFixedRegRefPosition != nullptr && |
274 | currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation); |
275 | |
276 | if (currFixedRegRefPosition->nextRefPosition == nullptr || |
277 | currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation()) |
278 | { |
279 | // This is case #1. Use the defRegAssignment |
280 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1)); |
281 | useRefPosition->registerAssignment = defRegAssignment; |
282 | return; |
283 | } |
284 | else |
285 | { |
286 | defRegConflict = true; |
287 | } |
288 | } |
289 | } |
290 | if (useRefPosition->isFixedRegRef && !useRegConflict) |
291 | { |
292 | useReg = useRefPosition->assignedReg(); |
293 | useRegRecord = getRegisterRecord(useReg); |
294 | RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition; |
295 | |
296 | // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null. |
297 | RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition(); |
298 | assert(nextFixedRegRefPosition != nullptr && |
299 | nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation); |
300 | |
301 | // First, check to see if there are any conflicting FixedReg references between the def and use. |
302 | if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation) |
303 | { |
304 | // OK, no conflicting FixedReg references. |
305 | // Now, check to see whether it is currently in use. |
306 | if (useRegRecord->assignedInterval != nullptr) |
307 | { |
308 | RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition; |
309 | LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation(); |
310 | if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation) |
311 | { |
312 | useRegConflict = true; |
313 | } |
314 | } |
315 | if (!useRegConflict) |
316 | { |
317 | // This is case #2. Use the useRegAssignment |
318 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2, interval)); |
319 | defRefPosition->registerAssignment = useRegAssignment; |
320 | return; |
321 | } |
322 | } |
323 | else |
324 | { |
325 | useRegConflict = true; |
326 | } |
327 | } |
328 | if (defRegRecord != nullptr && !useRegConflict) |
329 | { |
330 | // This is case #3. |
331 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3, interval)); |
332 | defRefPosition->registerAssignment = useRegAssignment; |
333 | return; |
334 | } |
335 | if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) |
336 | { |
337 | // This is case #4. |
338 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4, interval)); |
339 | useRefPosition->registerAssignment = defRegAssignment; |
340 | return; |
341 | } |
342 | if (defRegRecord != nullptr && useRegRecord != nullptr) |
343 | { |
344 | // This is case #5. |
345 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5, interval)); |
346 | RegisterType regType = interval->registerType; |
347 | assert((getRegisterType(interval, defRefPosition) == regType) && |
348 | (getRegisterType(interval, useRefPosition) == regType)); |
349 | regMaskTP candidates = allRegs(regType); |
350 | defRefPosition->registerAssignment = candidates; |
351 | return; |
352 | } |
353 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6, interval)); |
354 | return; |
355 | } |
356 | |
357 | //------------------------------------------------------------------------ |
358 | // applyCalleeSaveHeuristics: Set register preferences for an interval based on the given RefPosition |
359 | // |
360 | // Arguments: |
361 | // rp - The RefPosition of interest |
362 | // |
363 | // Notes: |
364 | // This is slightly more general than its name applies, and updates preferences not just |
365 | // for callee-save registers. |
366 | // |
367 | void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) |
368 | { |
369 | #ifdef _TARGET_AMD64_ |
370 | if (compiler->opts.compDbgEnC) |
371 | { |
372 | // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs. |
373 | return; |
374 | } |
375 | #endif // _TARGET_AMD64_ |
376 | |
377 | Interval* theInterval = rp->getInterval(); |
378 | |
379 | #ifdef DEBUG |
380 | if (!doReverseCallerCallee()) |
381 | #endif // DEBUG |
382 | { |
383 | // Set preferences so that this register set will be preferred for earlier refs |
384 | theInterval->updateRegisterPreferences(rp->registerAssignment); |
385 | } |
386 | } |
387 | |
388 | //------------------------------------------------------------------------ |
389 | // checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps. |
390 | // |
391 | // Arguments: |
392 | // useRP - The use RefPosition of a tree temp (SDSU Interval) |
393 | // |
394 | // Notes: |
395 | // There are a couple of cases where this may over-constrain allocation: |
396 | // 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or |
397 | // 2. In the case where the defining node requires a temp distinct from the target (also a |
398 | // delay-free case). |
399 | // In those cases, if we propagate a single-register restriction from the consumer to the producer |
400 | // the delayed uses will not see a fixed reference in the PhysReg at that position, and may |
401 | // incorrectly allocate that register. |
402 | // TODO-CQ: This means that we may often require a copy at the use of this node's result. |
403 | // This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is |
404 | // created, causing a RefTypeFixedReg to be added at that location. This, however, results in |
405 | // more PhysReg RefPositions (a throughput impact), and a large number of diffs that require |
406 | // further analysis to determine benefit. |
407 | // See Issue #11274. |
408 | // |
409 | void LinearScan::checkConflictingDefUse(RefPosition* useRP) |
410 | { |
411 | assert(useRP->refType == RefTypeUse); |
412 | Interval* theInterval = useRP->getInterval(); |
413 | assert(!theInterval->isLocalVar); |
414 | |
415 | RefPosition* defRP = theInterval->firstRefPosition; |
416 | |
417 | // All defs must have a valid treeNode, but we check it below to be conservative. |
418 | assert(defRP->treeNode != nullptr); |
419 | regMaskTP prevAssignment = defRP->registerAssignment; |
420 | regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); |
421 | if (newAssignment != RBM_NONE) |
422 | { |
423 | if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) |
424 | { |
425 | defRP->registerAssignment = newAssignment; |
426 | } |
427 | } |
428 | else |
429 | { |
430 | theInterval->hasConflictingDefUse = true; |
431 | } |
432 | } |
433 | |
434 | //------------------------------------------------------------------------ |
435 | // associateRefPosWithInterval: Update the Interval based on the given RefPosition. |
436 | // |
437 | // Arguments: |
438 | // rp - The RefPosition of interest |
439 | // |
440 | // Notes: |
441 | // This is called at the time when 'rp' has just been created, so it becomes |
442 | // the nextRefPosition of the recentRefPosition, and both the recentRefPosition |
443 | // and lastRefPosition of its referent. |
444 | // |
445 | void LinearScan::associateRefPosWithInterval(RefPosition* rp) |
446 | { |
447 | Referenceable* theReferent = rp->referent; |
448 | |
449 | if (theReferent != nullptr) |
450 | { |
451 | // All RefPositions except the dummy ones at the beginning of blocks |
452 | |
453 | if (rp->isIntervalRef()) |
454 | { |
455 | Interval* theInterval = rp->getInterval(); |
456 | |
457 | applyCalleeSaveHeuristics(rp); |
458 | |
459 | if (theInterval->isLocalVar) |
460 | { |
461 | if (RefTypeIsUse(rp->refType)) |
462 | { |
463 | RefPosition* const prevRP = theInterval->recentRefPosition; |
464 | if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum)) |
465 | { |
466 | prevRP->lastUse = false; |
467 | } |
468 | } |
469 | |
470 | rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) && |
471 | (rp->refType != RefTypeZeroInit) && !extendLifetimes(); |
472 | } |
473 | else if (rp->refType == RefTypeUse) |
474 | { |
475 | checkConflictingDefUse(rp); |
476 | rp->lastUse = true; |
477 | } |
478 | } |
479 | |
480 | RefPosition* prevRP = theReferent->recentRefPosition; |
481 | if (prevRP != nullptr) |
482 | { |
483 | prevRP->nextRefPosition = rp; |
484 | } |
485 | else |
486 | { |
487 | theReferent->firstRefPosition = rp; |
488 | } |
489 | theReferent->recentRefPosition = rp; |
490 | theReferent->lastRefPosition = rp; |
491 | } |
492 | else |
493 | { |
494 | assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); |
495 | } |
496 | } |
497 | |
498 | //--------------------------------------------------------------------------- |
499 | // newRefPosition: allocate and initialize a new RefPosition. |
500 | // |
501 | // Arguments: |
502 | // reg - reg number that identifies RegRecord to be associated |
503 | // with this RefPosition |
504 | // theLocation - LSRA location of RefPosition |
505 | // theRefType - RefPosition type |
506 | // theTreeNode - GenTree node for which this RefPosition is created |
507 | // mask - Set of valid registers for this RefPosition |
508 | // multiRegIdx - register position if this RefPosition corresponds to a |
509 | // multi-reg call node. |
510 | // |
511 | // Return Value: |
512 | // a new RefPosition |
513 | // |
514 | RefPosition* LinearScan::newRefPosition( |
515 | regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) |
516 | { |
517 | RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); |
518 | |
519 | RegRecord* regRecord = getRegisterRecord(reg); |
520 | newRP->setReg(regRecord); |
521 | newRP->registerAssignment = mask; |
522 | |
523 | newRP->setMultiRegIdx(0); |
524 | newRP->setAllocateIfProfitable(false); |
525 | |
526 | // We can't have two RefPositions on a RegRecord at the same location, unless they are different types. |
527 | assert((regRecord->lastRefPosition == nullptr) || (regRecord->lastRefPosition->nodeLocation < theLocation) || |
528 | (regRecord->lastRefPosition->refType != theRefType)); |
529 | associateRefPosWithInterval(newRP); |
530 | |
531 | DBEXEC(VERBOSE, newRP->dump()); |
532 | return newRP; |
533 | } |
534 | |
535 | //--------------------------------------------------------------------------- |
536 | // newRefPosition: allocate and initialize a new RefPosition. |
537 | // |
538 | // Arguments: |
539 | // theInterval - interval to which RefPosition is associated with. |
540 | // theLocation - LSRA location of RefPosition |
541 | // theRefType - RefPosition type |
542 | // theTreeNode - GenTree node for which this RefPosition is created |
543 | // mask - Set of valid registers for this RefPosition |
544 | // multiRegIdx - register position if this RefPosition corresponds to a |
545 | // multi-reg call node. |
546 | // |
547 | // Return Value: |
548 | // a new RefPosition |
549 | // |
550 | RefPosition* LinearScan::newRefPosition(Interval* theInterval, |
551 | LsraLocation theLocation, |
552 | RefType theRefType, |
553 | GenTree* theTreeNode, |
554 | regMaskTP mask, |
555 | unsigned multiRegIdx /* = 0 */) |
556 | { |
557 | if (theInterval != nullptr) |
558 | { |
559 | if (mask == RBM_NONE) |
560 | { |
561 | mask = allRegs(theInterval->registerType); |
562 | } |
563 | } |
564 | else |
565 | { |
566 | assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs); |
567 | } |
568 | #ifdef DEBUG |
569 | if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) |
570 | { |
571 | // In the case we're using floating point registers we must make sure |
572 | // this flag was set previously in the compiler since this will mandate |
573 | // whether LSRA will take into consideration FP reg killsets. |
574 | assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0)); |
575 | } |
576 | #endif // DEBUG |
577 | |
578 | // If this reference is constrained to a single register (and it's not a dummy |
579 | // or Kill reftype already), add a RefTypeFixedReg at this location so that its |
580 | // availability can be more accurately determined |
581 | |
582 | bool isFixedRegister = isSingleRegister(mask); |
583 | bool insertFixedRef = false; |
584 | if (isFixedRegister) |
585 | { |
586 | // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB), |
587 | // but not an internal use (it will already have a FixedRef for the def). |
588 | if ((theRefType == RefTypeDef) || ((theRefType == RefTypeUse) && !theInterval->isInternal)) |
589 | { |
590 | insertFixedRef = true; |
591 | } |
592 | } |
593 | |
594 | if (insertFixedRef) |
595 | { |
596 | regNumber physicalReg = genRegNumFromMask(mask); |
597 | RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); |
598 | assert(theInterval != nullptr); |
599 | assert((allRegs(theInterval->registerType) & mask) != 0); |
600 | } |
601 | |
602 | RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); |
603 | |
604 | newRP->setInterval(theInterval); |
605 | |
606 | // Spill info |
607 | newRP->isFixedRegRef = isFixedRegister; |
608 | |
609 | #ifndef _TARGET_AMD64_ |
610 | // We don't need this for AMD because the PInvoke method epilog code is explicit |
611 | // at register allocation time. |
612 | if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged && |
613 | theInterval->varNum == compiler->genReturnLocal) |
614 | { |
615 | mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); |
616 | noway_assert(mask != RBM_NONE); |
617 | } |
618 | #endif // !_TARGET_AMD64_ |
619 | newRP->registerAssignment = mask; |
620 | |
621 | newRP->setMultiRegIdx(multiRegIdx); |
622 | newRP->setAllocateIfProfitable(false); |
623 | |
624 | associateRefPosWithInterval(newRP); |
625 | |
626 | DBEXEC(VERBOSE, newRP->dump()); |
627 | return newRP; |
628 | } |
629 | |
630 | //--------------------------------------------------------------------------- |
631 | // newUseRefPosition: allocate and initialize a RefTypeUse RefPosition at currentLoc. |
632 | // |
633 | // Arguments: |
634 | // theInterval - interval to which RefPosition is associated with. |
635 | // theTreeNode - GenTree node for which this RefPosition is created |
636 | // mask - Set of valid registers for this RefPosition |
637 | // multiRegIdx - register position if this RefPosition corresponds to a |
638 | // multi-reg call node. |
639 | // minRegCount - Minimum number registers that needs to be ensured while |
640 | // constraining candidates for this ref position under |
641 | // LSRA stress. This is a DEBUG only arg. |
642 | // |
643 | // Return Value: |
644 | // a new RefPosition |
645 | // |
646 | // Notes: |
647 | // If the caller knows that 'theTreeNode' is NOT a candidate local, newRefPosition |
648 | // can/should be called directly. |
649 | // |
650 | RefPosition* LinearScan::newUseRefPosition(Interval* theInterval, |
651 | GenTree* theTreeNode, |
652 | regMaskTP mask, |
653 | unsigned multiRegIdx) |
654 | { |
655 | GenTree* treeNode = isCandidateLocalRef(theTreeNode) ? theTreeNode : nullptr; |
656 | |
657 | RefPosition* pos = newRefPosition(theInterval, currentLoc, RefTypeUse, treeNode, mask, multiRegIdx); |
658 | if (theTreeNode->IsRegOptional()) |
659 | { |
660 | pos->setAllocateIfProfitable(true); |
661 | } |
662 | return pos; |
663 | } |
664 | |
665 | //------------------------------------------------------------------------ |
666 | // IsContainableMemoryOp: Checks whether this is a memory op that can be contained. |
667 | // |
668 | // Arguments: |
669 | // node - the node of interest. |
670 | // |
671 | // Return value: |
672 | // True if this will definitely be a memory reference that could be contained. |
673 | // |
674 | // Notes: |
675 | // This differs from the isMemoryOp() method on GenTree because it checks for |
676 | // the case of doNotEnregister local. This won't include locals that |
677 | // for some other reason do not become register candidates, nor those that get |
678 | // spilled. |
679 | // Also, because we usually call this before we redo dataflow, any new lclVars |
680 | // introduced after the last dataflow analysis will not yet be marked lvTracked, |
681 | // so we don't use that. |
682 | // |
683 | bool LinearScan::isContainableMemoryOp(GenTree* node) |
684 | { |
685 | if (node->isMemoryOp()) |
686 | { |
687 | return true; |
688 | } |
689 | if (node->IsLocal()) |
690 | { |
691 | if (!enregisterLocalVars) |
692 | { |
693 | return true; |
694 | } |
695 | LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum]; |
696 | return varDsc->lvDoNotEnregister; |
697 | } |
698 | return false; |
699 | } |
700 | |
701 | //------------------------------------------------------------------------ |
702 | // addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'. |
703 | // |
704 | // Arguments: |
705 | // mask - the mask (set) of registers. |
706 | // currentLoc - the location at which they should be added |
707 | // refType - the type of refposition |
708 | // isLastUse - true IFF this is a last use of the register |
709 | // |
710 | void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) |
711 | { |
712 | for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) |
713 | { |
714 | if (mask & 1) |
715 | { |
716 | // This assumes that these are all "special" RefTypes that |
717 | // don't need to be recorded on the tree (hence treeNode is nullptr) |
718 | RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, |
719 | genRegMask(reg)); // This MUST occupy the physical register (obviously) |
720 | |
721 | if (isLastUse) |
722 | { |
723 | pos->lastUse = true; |
724 | } |
725 | } |
726 | } |
727 | } |
728 | |
729 | //------------------------------------------------------------------------ |
730 | // getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node. |
731 | // If the GT_STOREIND will generate a write barrier, determine the specific kill |
732 | // set required by the case-specific, platform-specific write barrier. If no |
733 | // write barrier is required, the kill set will be RBM_NONE. |
734 | // |
735 | // Arguments: |
736 | // tree - the GT_STOREIND node |
737 | // |
738 | // Return Value: a register mask of the registers killed |
739 | // |
740 | regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) |
741 | { |
742 | assert(tree->OperIs(GT_STOREIND)); |
743 | |
744 | regMaskTP killMask = RBM_NONE; |
745 | |
746 | GenTree* data = tree->Data(); |
747 | |
748 | GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data); |
749 | if (writeBarrierForm != GCInfo::WBF_NoBarrier) |
750 | { |
751 | if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm)) |
752 | { |
753 | // We can't determine the exact helper to be used at this point, because it depends on |
754 | // the allocated register for the `data` operand. However, all the (x86) optimized |
755 | // helpers have the same kill set: EDX. And note that currently, only x86 can return |
756 | // `true` for genUseOptimizedWriteBarriers(). |
757 | killMask = RBM_CALLEE_TRASH_NOGC; |
758 | } |
759 | else |
760 | { |
761 | // Figure out which helper we're going to use, and then get the kill set for that helper. |
762 | CorInfoHelpFunc helper = |
763 | compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm); |
764 | killMask = compiler->compHelperCallKillSet(helper); |
765 | } |
766 | } |
767 | return killMask; |
768 | } |
769 | |
770 | //------------------------------------------------------------------------ |
771 | // getKillSetForShiftRotate: Determine the liveness kill set for a shift or rotate node. |
772 | // |
773 | // Arguments: |
774 | // shiftNode - the shift or rotate node |
775 | // |
776 | // Return Value: a register mask of the registers killed |
777 | // |
778 | regMaskTP LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode) |
779 | { |
780 | regMaskTP killMask = RBM_NONE; |
781 | #ifdef _TARGET_XARCH_ |
782 | assert(shiftNode->OperIsShiftOrRotate()); |
783 | GenTree* shiftBy = shiftNode->gtGetOp2(); |
784 | if (!shiftBy->isContained()) |
785 | { |
786 | killMask = RBM_RCX; |
787 | } |
788 | #endif // _TARGET_XARCH_ |
789 | return killMask; |
790 | } |
791 | |
792 | //------------------------------------------------------------------------ |
793 | // getKillSetForMul: Determine the liveness kill set for a multiply node. |
794 | // |
795 | // Arguments: |
796 | // tree - the multiply node |
797 | // |
798 | // Return Value: a register mask of the registers killed |
799 | // |
800 | regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode) |
801 | { |
802 | regMaskTP killMask = RBM_NONE; |
803 | #ifdef _TARGET_XARCH_ |
804 | assert(mulNode->OperIsMul()); |
805 | if (!mulNode->OperIs(GT_MUL) || (((mulNode->gtFlags & GTF_UNSIGNED) != 0) && mulNode->gtOverflowEx())) |
806 | { |
807 | killMask = RBM_RAX | RBM_RDX; |
808 | } |
809 | #endif // _TARGET_XARCH_ |
810 | return killMask; |
811 | } |
812 | |
813 | //------------------------------------------------------------------------ |
814 | // getKillSetForModDiv: Determine the liveness kill set for a mod or div node. |
815 | // |
816 | // Arguments: |
817 | // tree - the mod or div node as a GenTreeOp |
818 | // |
819 | // Return Value: a register mask of the registers killed |
820 | // |
821 | regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node) |
822 | { |
823 | regMaskTP killMask = RBM_NONE; |
824 | #ifdef _TARGET_XARCH_ |
825 | assert(node->OperIs(GT_MOD, GT_DIV, GT_UMOD, GT_UDIV)); |
826 | if (!varTypeIsFloating(node->TypeGet())) |
827 | { |
828 | // Both RAX and RDX are killed by the operation |
829 | killMask = RBM_RAX | RBM_RDX; |
830 | } |
831 | #endif // _TARGET_XARCH_ |
832 | return killMask; |
833 | } |
834 | |
835 | //------------------------------------------------------------------------ |
836 | // getKillSetForCall: Determine the liveness kill set for a call node. |
837 | // |
838 | // Arguments: |
839 | // tree - the GenTreeCall node |
840 | // |
841 | // Return Value: a register mask of the registers killed |
842 | // |
843 | regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) |
844 | { |
845 | regMaskTP killMask = RBM_NONE; |
846 | #ifdef _TARGET_X86_ |
847 | if (compiler->compFloatingPointUsed) |
848 | { |
849 | if (call->TypeGet() == TYP_DOUBLE) |
850 | { |
851 | needDoubleTmpForFPCall = true; |
852 | } |
853 | else if (call->TypeGet() == TYP_FLOAT) |
854 | { |
855 | needFloatTmpForFPCall = true; |
856 | } |
857 | } |
858 | #endif // _TARGET_X86_ |
859 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
860 | if (call->IsHelperCall()) |
861 | { |
862 | CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); |
863 | killMask = compiler->compHelperCallKillSet(helpFunc); |
864 | } |
865 | else |
866 | #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
867 | { |
868 | // if there is no FP used, we can ignore the FP kills |
869 | if (compiler->compFloatingPointUsed) |
870 | { |
871 | killMask = RBM_CALLEE_TRASH; |
872 | } |
873 | else |
874 | { |
875 | killMask = RBM_INT_CALLEE_TRASH; |
876 | } |
877 | #ifdef _TARGET_ARM_ |
878 | if (call->IsVirtualStub()) |
879 | { |
880 | killMask |= compiler->virtualStubParamInfo->GetRegMask(); |
881 | } |
882 | #else // !_TARGET_ARM_ |
883 | // Verify that the special virtual stub call registers are in the kill mask. |
884 | // We don't just add them unconditionally to the killMask because for most architectures |
885 | // they are already in the RBM_CALLEE_TRASH set, |
886 | // and we don't want to introduce extra checks and calls in this hot function. |
887 | assert(!call->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == |
888 | compiler->virtualStubParamInfo->GetRegMask())); |
889 | #endif // !_TARGET_ARM_ |
890 | } |
891 | return killMask; |
892 | } |
893 | |
894 | //------------------------------------------------------------------------ |
895 | // getKillSetForBlockStore: Determine the liveness kill set for a block store node. |
896 | // |
897 | // Arguments: |
898 | // tree - the block store node as a GenTreeBlk |
899 | // |
900 | // Return Value: a register mask of the registers killed |
901 | // |
902 | regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) |
903 | { |
904 | assert(blkNode->OperIsStore()); |
905 | regMaskTP killMask = RBM_NONE; |
906 | |
907 | if ((blkNode->OperGet() == GT_STORE_OBJ) && blkNode->OperIsCopyBlkOp()) |
908 | { |
909 | assert(blkNode->AsObj()->gtGcPtrCount != 0); |
910 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF); |
911 | } |
912 | else |
913 | { |
914 | bool isCopyBlk = varTypeIsStruct(blkNode->Data()); |
915 | switch (blkNode->gtBlkOpKind) |
916 | { |
917 | case GenTreeBlk::BlkOpKindHelper: |
918 | if (isCopyBlk) |
919 | { |
920 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); |
921 | } |
922 | else |
923 | { |
924 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); |
925 | } |
926 | break; |
927 | |
928 | #ifdef _TARGET_XARCH_ |
929 | case GenTreeBlk::BlkOpKindRepInstr: |
930 | if (isCopyBlk) |
931 | { |
932 | // rep movs kills RCX, RDI and RSI |
933 | killMask = RBM_RCX | RBM_RDI | RBM_RSI; |
934 | } |
935 | else |
936 | { |
937 | // rep stos kills RCX and RDI. |
938 | // (Note that the Data() node, if not constant, will be assigned to |
939 | // RCX, but it's find that this kills it, as the value is not available |
940 | // after this node in any case.) |
941 | killMask = RBM_RDI | RBM_RCX; |
942 | } |
943 | break; |
944 | #else |
945 | case GenTreeBlk::BlkOpKindRepInstr: |
946 | #endif |
947 | case GenTreeBlk::BlkOpKindUnroll: |
948 | case GenTreeBlk::BlkOpKindInvalid: |
949 | // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE |
950 | break; |
951 | } |
952 | } |
953 | return killMask; |
954 | } |
955 | |
956 | #ifdef FEATURE_HW_INTRINSICS |
957 | //------------------------------------------------------------------------ |
958 | // getKillSetForHWIntrinsic: Determine the liveness kill set for a GT_STOREIND node. |
959 | // If the GT_STOREIND will generate a write barrier, determine the specific kill |
960 | // set required by the case-specific, platform-specific write barrier. If no |
961 | // write barrier is required, the kill set will be RBM_NONE. |
962 | // |
963 | // Arguments: |
964 | // tree - the GT_STOREIND node |
965 | // |
966 | // Return Value: a register mask of the registers killed |
967 | // |
968 | regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) |
969 | { |
970 | regMaskTP killMask = RBM_NONE; |
971 | #ifdef _TARGET_XARCH_ |
972 | switch (node->gtHWIntrinsicId) |
973 | { |
974 | case NI_SSE2_MaskMove: |
975 | // maskmovdqu uses edi as the implicit address register. |
976 | // Although it is set as the srcCandidate on the address, if there is also a fixed |
977 | // assignment for the definition of the address, resolveConflictingDefAndUse() may |
978 | // change the register assignment on the def or use of a tree temp (SDSU) when there |
979 | // is a conflict, and the FixedRef on edi won't be sufficient to ensure that another |
980 | // Interval will not be allocated there. |
981 | // Issue #17674 tracks this. |
982 | killMask = RBM_EDI; |
983 | break; |
984 | |
985 | default: |
986 | // Leave killMask as RBM_NONE |
987 | break; |
988 | } |
989 | #endif // _TARGET_XARCH_ |
990 | return killMask; |
991 | } |
992 | #endif // FEATURE_HW_INTRINSICS |
993 | |
994 | //------------------------------------------------------------------------ |
995 | // getKillSetForReturn: Determine the liveness kill set for a return node. |
996 | // |
997 | // Arguments: |
998 | // NONE (this kill set is independent of the details of the specific return.) |
999 | // |
1000 | // Return Value: a register mask of the registers killed |
1001 | // |
1002 | regMaskTP LinearScan::getKillSetForReturn() |
1003 | { |
1004 | return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE) |
1005 | : RBM_NONE; |
1006 | } |
1007 | |
1008 | //------------------------------------------------------------------------ |
1009 | // getKillSetForProfilerHook: Determine the liveness kill set for a profiler hook. |
1010 | // |
1011 | // Arguments: |
1012 | // NONE (this kill set is independent of the details of the specific node.) |
1013 | // |
1014 | // Return Value: a register mask of the registers killed |
1015 | // |
1016 | regMaskTP LinearScan::getKillSetForProfilerHook() |
1017 | { |
1018 | return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL) |
1019 | : RBM_NONE; |
1020 | } |
1021 | |
1022 | #ifdef DEBUG |
1023 | //------------------------------------------------------------------------ |
1024 | // getKillSetForNode: Return the registers killed by the given tree node. |
1025 | // |
1026 | // Arguments: |
1027 | // tree - the tree for which the kill set is needed. |
1028 | // |
1029 | // Return Value: a register mask of the registers killed |
1030 | // |
1031 | regMaskTP LinearScan::getKillSetForNode(GenTree* tree) |
1032 | { |
1033 | regMaskTP killMask = RBM_NONE; |
1034 | switch (tree->OperGet()) |
1035 | { |
1036 | case GT_LSH: |
1037 | case GT_RSH: |
1038 | case GT_RSZ: |
1039 | case GT_ROL: |
1040 | case GT_ROR: |
1041 | #ifdef _TARGET_X86_ |
1042 | case GT_LSH_HI: |
1043 | case GT_RSH_LO: |
1044 | #endif |
1045 | killMask = getKillSetForShiftRotate(tree->AsOp()); |
1046 | break; |
1047 | |
1048 | case GT_MUL: |
1049 | case GT_MULHI: |
1050 | #if !defined(_TARGET_64BIT_) |
1051 | case GT_MUL_LONG: |
1052 | #endif |
1053 | killMask = getKillSetForMul(tree->AsOp()); |
1054 | break; |
1055 | |
1056 | case GT_MOD: |
1057 | case GT_DIV: |
1058 | case GT_UMOD: |
1059 | case GT_UDIV: |
1060 | killMask = getKillSetForModDiv(tree->AsOp()); |
1061 | break; |
1062 | |
1063 | case GT_STORE_OBJ: |
1064 | case GT_STORE_BLK: |
1065 | case GT_STORE_DYN_BLK: |
1066 | killMask = getKillSetForBlockStore(tree->AsBlk()); |
1067 | break; |
1068 | |
1069 | case GT_RETURNTRAP: |
1070 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); |
1071 | break; |
1072 | |
1073 | case GT_CALL: |
1074 | killMask = getKillSetForCall(tree->AsCall()); |
1075 | |
1076 | break; |
1077 | case GT_STOREIND: |
1078 | killMask = getKillSetForStoreInd(tree->AsStoreInd()); |
1079 | break; |
1080 | |
1081 | #if defined(PROFILING_SUPPORTED) |
1082 | // If this method requires profiler ELT hook then mark these nodes as killing |
1083 | // callee trash registers (excluding RAX and XMM0). The reason for this is that |
1084 | // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for |
1085 | // more details. |
1086 | case GT_RETURN: |
1087 | killMask = getKillSetForReturn(); |
1088 | break; |
1089 | |
1090 | case GT_PROF_HOOK: |
1091 | killMask = getKillSetForProfilerHook(); |
1092 | break; |
1093 | #endif // PROFILING_SUPPORTED |
1094 | |
1095 | #ifdef FEATURE_HW_INTRINSICS |
1096 | case GT_HWIntrinsic: |
1097 | killMask = getKillSetForHWIntrinsic(tree->AsHWIntrinsic()); |
1098 | break; |
1099 | #endif // FEATURE_HW_INTRINSICS |
1100 | |
1101 | default: |
1102 | // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE |
1103 | break; |
1104 | } |
1105 | return killMask; |
1106 | } |
1107 | #endif // DEBUG |
1108 | |
1109 | //------------------------------------------------------------------------ |
1110 | // buildKillPositionsForNode: |
1111 | // Given some tree node add refpositions for all the registers this node kills |
1112 | // |
1113 | // Arguments: |
1114 | // tree - the tree for which kill positions should be generated |
1115 | // currentLoc - the location at which the kills should be added |
1116 | // |
1117 | // Return Value: |
1118 | // true - kills were inserted |
1119 | // false - no kills were inserted |
1120 | // |
1121 | // Notes: |
1122 | // The return value is needed because if we have any kills, we need to make sure that |
1123 | // all defs are located AFTER the kills. On the other hand, if there aren't kills, |
1124 | // the multiple defs for a regPair are in different locations. |
1125 | // If we generate any kills, we will mark all currentLiveVars as being preferenced |
1126 | // to avoid the killed registers. This is somewhat conservative. |
1127 | |
1128 | bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask) |
1129 | { |
1130 | bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); |
1131 | if (killMask != RBM_NONE) |
1132 | { |
1133 | // The killMask identifies a set of registers that will be used during codegen. |
1134 | // Mark these as modified here, so when we do final frame layout, we'll know about |
1135 | // all these registers. This is especially important if killMask contains |
1136 | // callee-saved registers, which affect the frame size since we need to save/restore them. |
1137 | // In the case where we have a copyBlk with GC pointers, can need to call the |
1138 | // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if |
1139 | // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, |
1140 | // which is too late. |
1141 | compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true)); |
1142 | |
1143 | addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); |
1144 | |
1145 | // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee |
1146 | // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs, |
1147 | // many of which appear to be regressions (because there is more spill on the infrequently path), |
1148 | // but are not really because the frequent path becomes smaller. Validating these diffs will need |
1149 | // to be done before making this change. |
1150 | // if (!blockSequence[curBBSeqNum]->isRunRarely()) |
1151 | if (enregisterLocalVars) |
1152 | { |
1153 | VarSetOps::Iter iter(compiler, currentLiveVars); |
1154 | unsigned varIndex = 0; |
1155 | while (iter.NextElem(&varIndex)) |
1156 | { |
1157 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
1158 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
1159 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
1160 | if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) |
1161 | { |
1162 | if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) |
1163 | { |
1164 | continue; |
1165 | } |
1166 | } |
1167 | else |
1168 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
1169 | if (varTypeIsFloating(varDsc) && |
1170 | !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex)) |
1171 | { |
1172 | continue; |
1173 | } |
1174 | Interval* interval = getIntervalForLocalVar(varIndex); |
1175 | if (isCallKill) |
1176 | { |
1177 | interval->preferCalleeSave = true; |
1178 | } |
1179 | regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); |
1180 | |
1181 | if (newPreferences != RBM_NONE) |
1182 | { |
1183 | interval->updateRegisterPreferences(newPreferences); |
1184 | } |
1185 | else |
1186 | { |
1187 | // If there are no callee-saved registers, the call could kill all the registers. |
1188 | // This is a valid state, so in that case assert should not trigger. The RA will spill in order to |
1189 | // free a register later. |
1190 | assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); |
1191 | } |
1192 | } |
1193 | } |
1194 | |
1195 | if (compiler->killGCRefs(tree)) |
1196 | { |
1197 | RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, |
1198 | (allRegs(TYP_REF) & ~RBM_ARG_REGS)); |
1199 | } |
1200 | return true; |
1201 | } |
1202 | |
1203 | return false; |
1204 | } |
1205 | |
1206 | //---------------------------------------------------------------------------- |
1207 | // defineNewInternalTemp: Defines a ref position for an internal temp. |
1208 | // |
1209 | // Arguments: |
1210 | // tree - Gentree node requiring an internal register |
1211 | // regType - Register type |
1212 | // currentLoc - Location of the temp Def position |
1213 | // regMask - register mask of candidates for temp |
1214 | // |
1215 | RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) |
1216 | { |
1217 | Interval* current = newInterval(regType); |
1218 | current->isInternal = true; |
1219 | RefPosition* newDef = newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0); |
1220 | assert(internalCount < MaxInternalCount); |
1221 | internalDefs[internalCount++] = newDef; |
1222 | return newDef; |
1223 | } |
1224 | |
1225 | //------------------------------------------------------------------------ |
1226 | // buildInternalRegisterDefForNode - Create an Interval for an internal int register, and a def RefPosition |
1227 | // |
1228 | // Arguments: |
1229 | // tree - Gentree node that needs internal registers |
1230 | // internalCands - The mask of valid registers |
1231 | // |
1232 | // Returns: |
1233 | // The def RefPosition created for this internal temp. |
1234 | // |
1235 | RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands) |
1236 | { |
1237 | bool fixedReg = false; |
1238 | // The candidate set should contain only integer registers. |
1239 | assert((internalCands & ~allRegs(TYP_INT)) == RBM_NONE); |
1240 | if (genMaxOneBit(internalCands)) |
1241 | { |
1242 | fixedReg = true; |
1243 | } |
1244 | |
1245 | RefPosition* defRefPosition = defineNewInternalTemp(tree, IntRegisterType, internalCands); |
1246 | return defRefPosition; |
1247 | } |
1248 | |
1249 | //------------------------------------------------------------------------ |
1250 | // buildInternalFloatRegisterDefForNode - Create an Interval for an internal fp register, and a def RefPosition |
1251 | // |
1252 | // Arguments: |
1253 | // tree - Gentree node that needs internal registers |
1254 | // internalCands - The mask of valid registers |
1255 | // |
1256 | // Returns: |
1257 | // The def RefPosition created for this internal temp. |
1258 | // |
1259 | RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands) |
1260 | { |
1261 | bool fixedReg = false; |
1262 | // The candidate set should contain only float registers. |
1263 | assert((internalCands & ~allRegs(TYP_FLOAT)) == RBM_NONE); |
1264 | if (genMaxOneBit(internalCands)) |
1265 | { |
1266 | fixedReg = true; |
1267 | } |
1268 | |
1269 | RefPosition* defRefPosition = defineNewInternalTemp(tree, FloatRegisterType, internalCands); |
1270 | return defRefPosition; |
1271 | } |
1272 | |
1273 | //------------------------------------------------------------------------ |
1274 | // buildInternalRegisterUses - adds use positions for internal |
1275 | // registers required for tree node. |
1276 | // |
1277 | // Notes: |
1278 | // During the BuildNode process, calls to buildInternalIntRegisterDefForNode and |
1279 | // buildInternalFloatRegisterDefForNode put new RefPositions in the 'internalDefs' |
1280 | // array, and increment 'internalCount'. This method must be called to add corresponding |
1281 | // uses. It then resets the 'internalCount' for the handling of the next node. |
1282 | // |
1283 | // If the internal registers must differ from the target register, 'setInternalRegsDelayFree' |
1284 | // must be set to true, so that the uses may be marked 'delayRegFree'. |
1285 | // Note that if a node has both float and int temps, generally the target with either be |
1286 | // int *or* float, and it is not really necessary to set this on the other type, but it does |
1287 | // no harm as it won't restrict the register selection. |
1288 | // |
1289 | void LinearScan::buildInternalRegisterUses() |
1290 | { |
1291 | assert(internalCount <= MaxInternalCount); |
1292 | for (int i = 0; i < internalCount; i++) |
1293 | { |
1294 | RefPosition* def = internalDefs[i]; |
1295 | regMaskTP mask = def->registerAssignment; |
1296 | RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); |
1297 | if (setInternalRegsDelayFree) |
1298 | { |
1299 | use->delayRegFree = true; |
1300 | pendingDelayFree = true; |
1301 | } |
1302 | } |
1303 | // internalCount = 0; |
1304 | } |
1305 | |
1306 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
1307 | //------------------------------------------------------------------------ |
1308 | // buildUpperVectorSaveRefPositions - Create special RefPositions for saving |
1309 | // the upper half of a set of large vector. |
1310 | // |
1311 | // Arguments: |
1312 | // tree - The current node being handled |
1313 | // currentLoc - The location of the current node |
1314 | // |
1315 | // Return Value: Returns the set of lclVars that are killed by this node, and therefore |
1316 | // required RefTypeUpperVectorSaveDef RefPositions. |
1317 | // |
1318 | // Notes: The returned set is used by buildUpperVectorRestoreRefPositions. |
1319 | // |
1320 | VARSET_VALRET_TP |
1321 | LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet) |
1322 | { |
1323 | assert(enregisterLocalVars); |
1324 | VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler)); |
1325 | if (!VarSetOps::IsEmpty(compiler, largeVectorVars)) |
1326 | { |
1327 | // We actually need to find any calls that kill the upper-half of the callee-save vector registers. |
1328 | // But we will use as a proxy any node that kills floating point registers. |
1329 | // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) |
1330 | // This check should have been done by the caller. |
1331 | if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE) |
1332 | { |
1333 | VarSetOps::AssignNoCopy(compiler, liveLargeVectors, |
1334 | VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars)); |
1335 | VarSetOps::Iter iter(compiler, liveLargeVectors); |
1336 | unsigned varIndex = 0; |
1337 | while (iter.NextElem(&varIndex)) |
1338 | { |
1339 | Interval* varInterval = getIntervalForLocalVar(varIndex); |
1340 | Interval* tempInterval = newInterval(varInterval->registerType); |
1341 | tempInterval->isInternal = true; |
1342 | RefPosition* pos = |
1343 | newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); |
1344 | // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set |
1345 | // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding |
1346 | // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval, |
1347 | // and set varInterval as the relatedInterval of tempInterval. |
1348 | tempInterval->relatedInterval = varInterval->relatedInterval; |
1349 | varInterval->relatedInterval = tempInterval; |
1350 | } |
1351 | } |
1352 | } |
1353 | return liveLargeVectors; |
1354 | } |
1355 | |
1356 | // buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring |
1357 | // the upper half of a set of large vectors. |
1358 | // |
1359 | // Arguments: |
1360 | // tree - The current node being handled |
1361 | // currentLoc - The location of the current node |
1362 | // liveLargeVectors - The set of lclVars needing restores (returned by buildUpperVectorSaveRefPositions) |
1363 | // |
1364 | void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, |
1365 | LsraLocation currentLoc, |
1366 | VARSET_VALARG_TP liveLargeVectors) |
1367 | { |
1368 | assert(enregisterLocalVars); |
1369 | if (!VarSetOps::IsEmpty(compiler, liveLargeVectors)) |
1370 | { |
1371 | VarSetOps::Iter iter(compiler, liveLargeVectors); |
1372 | unsigned varIndex = 0; |
1373 | while (iter.NextElem(&varIndex)) |
1374 | { |
1375 | Interval* varInterval = getIntervalForLocalVar(varIndex); |
1376 | Interval* tempInterval = varInterval->relatedInterval; |
1377 | assert(tempInterval->isInternal == true); |
1378 | RefPosition* pos = |
1379 | newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED); |
1380 | // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval |
1381 | // of tempInterval. |
1382 | varInterval->relatedInterval = tempInterval->relatedInterval; |
1383 | tempInterval->relatedInterval = varInterval; |
1384 | } |
1385 | } |
1386 | } |
1387 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
1388 | |
1389 | #ifdef DEBUG |
1390 | //------------------------------------------------------------------------ |
1391 | // ComputeOperandDstCount: computes the number of registers defined by a |
1392 | // node. |
1393 | // |
1394 | // For most nodes, this is simple: |
1395 | // - Nodes that do not produce values (e.g. stores and other void-typed |
1396 | // nodes) and nodes that immediately use the registers they define |
1397 | // produce no registers |
1398 | // - Nodes that are marked as defining N registers define N registers. |
1399 | // |
1400 | // For contained nodes, however, things are more complicated: for purposes |
1401 | // of bookkeeping, a contained node is treated as producing the transitive |
1402 | // closure of the registers produced by its sources. |
1403 | // |
1404 | // Arguments: |
1405 | // operand - The operand for which to compute a register count. |
1406 | // |
1407 | // Returns: |
1408 | // The number of registers defined by `operand`. |
1409 | // |
1410 | // static |
1411 | int LinearScan::ComputeOperandDstCount(GenTree* operand) |
1412 | { |
1413 | // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though |
1414 | // note that it is not in the linear order. It seems best to check for !IsLIR() rather than |
1415 | // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method. |
1416 | if (!operand->IsLIR()) |
1417 | { |
1418 | return 0; |
1419 | } |
1420 | if (operand->isContained()) |
1421 | { |
1422 | int dstCount = 0; |
1423 | for (GenTree* op : operand->Operands()) |
1424 | { |
1425 | dstCount += ComputeOperandDstCount(op); |
1426 | } |
1427 | |
1428 | return dstCount; |
1429 | } |
1430 | if (operand->IsUnusedValue()) |
1431 | { |
1432 | // Operands that define an unused value do not produce any registers. |
1433 | return 0; |
1434 | } |
1435 | if (operand->IsValue()) |
1436 | { |
1437 | // Operands that are values and are not contained consume all of their operands |
1438 | // and produce one or more registers. |
1439 | return operand->GetRegisterDstCount(); |
1440 | } |
1441 | else |
1442 | { |
1443 | // This must be one of the operand types that are neither contained nor produce a value. |
1444 | // Stores and void-typed operands may be encountered when processing call nodes, which contain |
1445 | // pointers to argument setup stores. |
1446 | assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || |
1447 | operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || |
1448 | operand->TypeGet() == TYP_VOID); |
1449 | return 0; |
1450 | } |
1451 | } |
1452 | |
1453 | //------------------------------------------------------------------------ |
1454 | // ComputeAvailableSrcCount: computes the number of registers available as |
1455 | // sources for a node. |
1456 | // |
1457 | // This is simply the sum of the number of registers produced by each |
1458 | // operand to the node. |
1459 | // |
1460 | // Arguments: |
1461 | // node - The node for which to compute a source count. |
1462 | // |
1463 | // Return Value: |
1464 | // The number of registers available as sources for `node`. |
1465 | // |
1466 | // static |
1467 | int LinearScan::ComputeAvailableSrcCount(GenTree* node) |
1468 | { |
1469 | int numSources = 0; |
1470 | for (GenTree* operand : node->Operands()) |
1471 | { |
1472 | numSources += ComputeOperandDstCount(operand); |
1473 | } |
1474 | |
1475 | return numSources; |
1476 | } |
1477 | #endif // DEBUG |
1478 | |
1479 | //------------------------------------------------------------------------ |
1480 | // buildRefPositionsForNode: The main entry point for building the RefPositions |
1481 | // and "tree temp" Intervals for a given node. |
1482 | // |
1483 | // Arguments: |
1484 | // tree - The node for which we are building RefPositions |
1485 | // block - The BasicBlock in which the node resides |
1486 | // currentLoc - The LsraLocation of the given node |
1487 | // |
1488 | void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc) |
1489 | { |
1490 | // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. |
1491 | // GT_CLS_VAR nodes should have been eliminated by rationalizer. |
1492 | assert(tree->OperGet() != GT_ARGPLACE); |
1493 | assert(tree->OperGet() != GT_LIST); |
1494 | assert(tree->OperGet() != GT_CLS_VAR); |
1495 | |
1496 | // The LIR traversal visits only the first node in a GT_FIELD_LIST. |
1497 | assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); |
1498 | |
1499 | // The set of internal temporary registers used by this node are stored in the |
1500 | // gtRsvdRegs register mask. Clear it out. |
1501 | tree->gtRsvdRegs = RBM_NONE; |
1502 | |
1503 | #ifdef DEBUG |
1504 | if (VERBOSE) |
1505 | { |
1506 | dumpDefList(); |
1507 | compiler->gtDispTree(tree, nullptr, nullptr, true); |
1508 | } |
1509 | #endif // DEBUG |
1510 | |
1511 | if (tree->isContained()) |
1512 | { |
1513 | #ifdef _TARGET_XARCH_ |
1514 | // On XArch we can have contained candidate lclVars if they are part of a RMW |
1515 | // address computation. In this case we need to check whether it is a last use. |
1516 | if (tree->IsLocal() && ((tree->gtFlags & GTF_VAR_DEATH) != 0)) |
1517 | { |
1518 | LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; |
1519 | if (isCandidateVar(varDsc)) |
1520 | { |
1521 | assert(varDsc->lvTracked); |
1522 | unsigned varIndex = varDsc->lvVarIndex; |
1523 | VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); |
1524 | } |
1525 | } |
1526 | #else // _TARGET_XARCH_ |
1527 | assert(!isCandidateLocalRef(tree)); |
1528 | #endif // _TARGET_XARCH_ |
1529 | JITDUMP("Contained\n" ); |
1530 | return; |
1531 | } |
1532 | |
1533 | #ifdef DEBUG |
1534 | // If we are constraining the registers for allocation, we will modify all the RefPositions |
1535 | // we've built for this node after we've created them. In order to do that, we'll remember |
1536 | // the last RefPosition prior to those created for this node. |
1537 | RefPositionIterator refPositionMark = refPositions.backPosition(); |
1538 | int oldDefListCount = defList.Count(); |
1539 | #endif // DEBUG |
1540 | |
1541 | int consume = BuildNode(tree); |
1542 | |
1543 | #ifdef DEBUG |
1544 | int newDefListCount = defList.Count(); |
1545 | int produce = newDefListCount - oldDefListCount; |
1546 | assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); |
1547 | |
1548 | #if defined(_TARGET_AMD64_) |
1549 | // Multi-reg call node is the only node that could produce multi-reg value |
1550 | assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); |
1551 | #endif // _TARGET_AMD64_ |
1552 | |
1553 | #endif // DEBUG |
1554 | |
1555 | #ifdef DEBUG |
1556 | // If we are constraining registers, modify all the RefPositions we've just built to specify the |
1557 | // minimum reg count required. |
1558 | if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT)) |
1559 | { |
1560 | // The number of registers required for a tree node is the sum of |
1561 | // { RefTypeUses } + { RefTypeDef for the node itself } + specialPutArgCount |
1562 | // This is the minimum set of registers that needs to be ensured in the candidate set of ref positions created. |
1563 | // |
1564 | // First, we count them. |
1565 | unsigned minRegCount = 0; |
1566 | |
1567 | RefPositionIterator iter = refPositionMark; |
1568 | for (iter++; iter != refPositions.end(); iter++) |
1569 | { |
1570 | RefPosition* newRefPosition = &(*iter); |
1571 | if (newRefPosition->isIntervalRef()) |
1572 | { |
1573 | if ((newRefPosition->refType == RefTypeUse) || |
1574 | ((newRefPosition->refType == RefTypeDef) && !newRefPosition->getInterval()->isInternal)) |
1575 | { |
1576 | minRegCount++; |
1577 | } |
1578 | if (newRefPosition->getInterval()->isSpecialPutArg) |
1579 | { |
1580 | minRegCount++; |
1581 | } |
1582 | } |
1583 | } |
1584 | |
1585 | if (tree->OperIsPutArgSplit()) |
1586 | { |
1587 | // While we have attempted to account for any "specialPutArg" defs above, we're only looking at RefPositions |
1588 | // created for this node. We must be defining at least one register in the PutArgSplit, so conservatively |
1589 | // add one less than the maximum number of registers args to 'minRegCount'. |
1590 | minRegCount += MAX_REG_ARG - 1; |
1591 | } |
1592 | for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) |
1593 | { |
1594 | RefPosition* newRefPosition = &(*refPositionMark); |
1595 | unsigned minRegCountForRef = minRegCount; |
1596 | if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree) |
1597 | { |
1598 | // If delayRegFree, then Use will interfere with the destination of the consuming node. |
1599 | // Therefore, we also need add the kill set of the consuming node to minRegCount. |
1600 | // |
1601 | // For example consider the following IR on x86, where v01 and v02 |
1602 | // are method args coming in ecx and edx respectively. |
1603 | // GT_DIV(v01, v02) |
1604 | // |
1605 | // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node. |
1606 | // |
1607 | // Assume further JitStressRegs=2, which would constrain candidates to callee trashable |
1608 | // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01. |
1609 | // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and |
1610 | // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for |
1611 | // the use position of v02 also needs to take into account the kill set of its consuming node. |
1612 | regMaskTP killMask = getKillSetForNode(tree); |
1613 | if (killMask != RBM_NONE) |
1614 | { |
1615 | minRegCountForRef += genCountBits(killMask); |
1616 | } |
1617 | } |
1618 | else if ((newRefPosition->refType) == RefTypeDef && (newRefPosition->getInterval()->isSpecialPutArg)) |
1619 | { |
1620 | minRegCountForRef++; |
1621 | } |
1622 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
1623 | else if (newRefPosition->refType == RefTypeUpperVectorSaveDef) |
1624 | { |
1625 | // TODO-Cleanup: won't need a register once #18144 is fixed. |
1626 | minRegCountForRef += 1; |
1627 | } |
1628 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
1629 | |
1630 | newRefPosition->minRegCandidateCount = minRegCountForRef; |
1631 | if (newRefPosition->IsActualRef() && doReverseCallerCallee()) |
1632 | { |
1633 | Interval* interval = newRefPosition->getInterval(); |
1634 | regMaskTP oldAssignment = newRefPosition->registerAssignment; |
1635 | regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); |
1636 | newRefPosition->registerAssignment = |
1637 | getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); |
1638 | if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && |
1639 | !interval->isLocalVar) |
1640 | { |
1641 | checkConflictingDefUse(newRefPosition); |
1642 | } |
1643 | } |
1644 | } |
1645 | } |
1646 | #endif // DEBUG |
1647 | JITDUMP("\n" ); |
1648 | } |
1649 | |
1650 | //------------------------------------------------------------------------ |
1651 | // buildPhysRegRecords: Make an interval for each physical register |
1652 | // |
1653 | void LinearScan::buildPhysRegRecords() |
1654 | { |
1655 | RegisterType regType = IntRegisterType; |
1656 | for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) |
1657 | { |
1658 | RegRecord* curr = &physRegs[reg]; |
1659 | curr->init(reg); |
1660 | } |
1661 | } |
1662 | |
1663 | //------------------------------------------------------------------------ |
1664 | // getNonEmptyBlock: Return the first non-empty block starting with 'block' |
1665 | // |
1666 | // Arguments: |
1667 | // block - the BasicBlock from which we start looking |
1668 | // |
1669 | // Return Value: |
1670 | // The first non-empty BasicBlock we find. |
1671 | // |
1672 | BasicBlock* getNonEmptyBlock(BasicBlock* block) |
1673 | { |
1674 | while (block != nullptr && block->bbTreeList == nullptr) |
1675 | { |
1676 | BasicBlock* nextBlock = block->bbNext; |
1677 | // Note that here we use the version of NumSucc that does not take a compiler. |
1678 | // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan. |
1679 | // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which |
1680 | // case we don't need the version that takes a compiler. |
1681 | assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE))); |
1682 | // sometimes the first block is empty and ends with an uncond branch |
1683 | // assert( block->GetSucc(0) == nextBlock); |
1684 | block = nextBlock; |
1685 | } |
1686 | assert(block != nullptr && block->bbTreeList != nullptr); |
1687 | return block; |
1688 | } |
1689 | |
1690 | //------------------------------------------------------------------------ |
1691 | // insertZeroInitRefPositions: Handle lclVars that are live-in to the first block |
1692 | // |
1693 | // Notes: |
1694 | // Prior to calling this method, 'currentLiveVars' must be set to the set of register |
1695 | // candidate variables that are liveIn to the first block. |
1696 | // For each register candidate that is live-in to the first block: |
1697 | // - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. |
1698 | // - Otherwise, it will be marked as spilled, since it will not be assigned a register |
1699 | // on entry and will be loaded from memory on the undefined path. |
1700 | // Note that, when the compInitMem option is not set, we may encounter these on |
1701 | // paths that are protected by the same condition as an earlier def. However, since |
1702 | // we don't do the analysis to determine this - and couldn't rely on always identifying |
1703 | // such cases even if we tried - we must conservatively treat the undefined path as |
1704 | // being possible. This is a relatively rare case, so the introduced conservatism is |
1705 | // not expected to warrant the analysis required to determine the best placement of |
1706 | // an initialization. |
1707 | // |
1708 | void LinearScan::insertZeroInitRefPositions() |
1709 | { |
1710 | assert(enregisterLocalVars); |
1711 | #ifdef DEBUG |
1712 | VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn)); |
1713 | assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars)); |
1714 | #endif // DEBUG |
1715 | |
1716 | // insert defs for this, then a block boundary |
1717 | |
1718 | VarSetOps::Iter iter(compiler, currentLiveVars); |
1719 | unsigned varIndex = 0; |
1720 | while (iter.NextElem(&varIndex)) |
1721 | { |
1722 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
1723 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
1724 | if (!varDsc->lvIsParam && isCandidateVar(varDsc)) |
1725 | { |
1726 | JITDUMP("V%02u was live in to first block:" , varNum); |
1727 | Interval* interval = getIntervalForLocalVar(varIndex); |
1728 | if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) |
1729 | { |
1730 | JITDUMP(" creating ZeroInit\n" ); |
1731 | GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); |
1732 | RefPosition* pos = |
1733 | newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); |
1734 | varDsc->lvMustInit = true; |
1735 | } |
1736 | else |
1737 | { |
1738 | setIntervalAsSpilled(interval); |
1739 | JITDUMP(" marking as spilled\n" ); |
1740 | } |
1741 | } |
1742 | } |
1743 | } |
1744 | |
1745 | #if defined(UNIX_AMD64_ABI) |
1746 | //------------------------------------------------------------------------ |
1747 | // unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems. |
1748 | // |
1749 | // Arguments: |
1750 | // argDsc - the LclVarDsc for the argument of interest |
1751 | // |
1752 | // Notes: |
1753 | // See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp |
1754 | // for how state for argument is updated for unix non-structs and Windows AMD64 structs. |
1755 | // |
1756 | void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) |
1757 | { |
1758 | assert(varTypeIsStruct(argDsc)); |
1759 | RegState* intRegState = &compiler->codeGen->intRegState; |
1760 | RegState* floatRegState = &compiler->codeGen->floatRegState; |
1761 | |
1762 | if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) |
1763 | { |
1764 | if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) |
1765 | { |
1766 | assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); |
1767 | floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); |
1768 | } |
1769 | else |
1770 | { |
1771 | assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); |
1772 | intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); |
1773 | } |
1774 | } |
1775 | |
1776 | if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) |
1777 | { |
1778 | if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) |
1779 | { |
1780 | assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); |
1781 | floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); |
1782 | } |
1783 | else |
1784 | { |
1785 | assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); |
1786 | intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); |
1787 | } |
1788 | } |
1789 | } |
1790 | |
1791 | #endif // defined(UNIX_AMD64_ABI) |
1792 | |
1793 | //------------------------------------------------------------------------ |
1794 | // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate |
1795 | // regState (either compiler->intRegState or compiler->floatRegState), |
1796 | // with the lvArgReg on "argDsc" |
1797 | // |
1798 | // Arguments: |
1799 | // argDsc - the argument for which the state is to be updated. |
1800 | // |
1801 | // Return Value: None |
1802 | // |
1803 | // Assumptions: |
1804 | // The argument is live on entry to the function |
1805 | // (or is untracked and therefore assumed live) |
1806 | // |
1807 | // Notes: |
1808 | // This relies on a method in regAlloc.cpp that is shared between LSRA |
1809 | // and regAlloc. It is further abstracted here because regState is updated |
1810 | // separately for tracked and untracked variables in LSRA. |
1811 | // |
1812 | void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) |
1813 | { |
1814 | #if defined(UNIX_AMD64_ABI) |
1815 | // For System V AMD64 calls the argDsc can have 2 registers (for structs.) |
1816 | // Handle them here. |
1817 | if (varTypeIsStruct(argDsc)) |
1818 | { |
1819 | unixAmd64UpdateRegStateForArg(argDsc); |
1820 | } |
1821 | else |
1822 | #endif // defined(UNIX_AMD64_ABI) |
1823 | { |
1824 | RegState* intRegState = &compiler->codeGen->intRegState; |
1825 | RegState* floatRegState = &compiler->codeGen->floatRegState; |
1826 | // In the case of AMD64 we'll still use the floating point registers |
1827 | // to model the register usage for argument on vararg calls, so |
1828 | // we will ignore the varargs condition to determine whether we use |
1829 | // XMM registers or not for setting up the call. |
1830 | bool isFloat = (isFloatRegType(argDsc->lvType) |
1831 | #ifndef _TARGET_AMD64_ |
1832 | && !compiler->info.compIsVarArgs |
1833 | #endif |
1834 | && !compiler->opts.compUseSoftFP); |
1835 | |
1836 | if (argDsc->lvIsHfaRegArg()) |
1837 | { |
1838 | isFloat = true; |
1839 | } |
1840 | |
1841 | if (isFloat) |
1842 | { |
1843 | JITDUMP("Float arg V%02u in reg %s\n" , (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); |
1844 | compiler->raUpdateRegStateForArg(floatRegState, argDsc); |
1845 | } |
1846 | else |
1847 | { |
1848 | JITDUMP("Int arg V%02u in reg %s\n" , (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); |
1849 | #if FEATURE_MULTIREG_ARGS |
1850 | if (argDsc->lvOtherArgReg != REG_NA) |
1851 | { |
1852 | JITDUMP("(second half) in reg %s\n" , getRegName(argDsc->lvOtherArgReg)); |
1853 | } |
1854 | #endif // FEATURE_MULTIREG_ARGS |
1855 | compiler->raUpdateRegStateForArg(intRegState, argDsc); |
1856 | } |
1857 | } |
1858 | } |
1859 | |
1860 | //------------------------------------------------------------------------ |
1861 | // buildIntervals: The main entry point for building the data structures over |
1862 | // which we will do register allocation. |
1863 | // |
1864 | void LinearScan::buildIntervals() |
1865 | { |
1866 | BasicBlock* block; |
1867 | |
1868 | JITDUMP("\nbuildIntervals ========\n" ); |
1869 | |
1870 | // Build (empty) records for all of the physical registers |
1871 | buildPhysRegRecords(); |
1872 | |
1873 | #ifdef DEBUG |
1874 | if (VERBOSE) |
1875 | { |
1876 | printf("\n-----------------\n" ); |
1877 | printf("LIVENESS:\n" ); |
1878 | printf("-----------------\n" ); |
1879 | foreach_block(compiler, block) |
1880 | { |
1881 | printf(FMT_BB " use def in out\n" , block->bbNum); |
1882 | dumpConvertedVarSet(compiler, block->bbVarUse); |
1883 | printf("\n" ); |
1884 | dumpConvertedVarSet(compiler, block->bbVarDef); |
1885 | printf("\n" ); |
1886 | dumpConvertedVarSet(compiler, block->bbLiveIn); |
1887 | printf("\n" ); |
1888 | dumpConvertedVarSet(compiler, block->bbLiveOut); |
1889 | printf("\n" ); |
1890 | } |
1891 | } |
1892 | #endif // DEBUG |
1893 | |
1894 | #if DOUBLE_ALIGN |
1895 | // We will determine whether we should double align the frame during |
1896 | // identifyCandidates(), but we initially assume that we will not. |
1897 | doDoubleAlign = false; |
1898 | #endif |
1899 | |
1900 | identifyCandidates(); |
1901 | |
1902 | // Figure out if we're going to use a frame pointer. We need to do this before building |
1903 | // the ref positions, because those objects will embed the frame register in various register masks |
1904 | // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will |
1905 | // remove the frame pointer from the masks. |
1906 | setFrameType(); |
1907 | |
1908 | DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); |
1909 | |
1910 | // second part: |
1911 | JITDUMP("\nbuildIntervals second part ========\n" ); |
1912 | currentLoc = 0; |
1913 | // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were |
1914 | // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering. |
1915 | // Previously, the block sequencing was done for the (formerly separate) Build pass, |
1916 | // and the curBBNum was left as the last block sequenced. This block was then used to set the |
1917 | // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the |
1918 | // normalized entry weight (compiler->fgCalledCount), but that results in a net regression. |
1919 | if (!blockSequencingDone) |
1920 | { |
1921 | setBlockSequence(); |
1922 | } |
1923 | curBBNum = blockSequence[bbSeqCount - 1]->bbNum; |
1924 | |
1925 | // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex. |
1926 | // Assign these RefPositions to the (nonexistent) BB0. |
1927 | curBBNum = 0; |
1928 | |
1929 | LclVarDsc* argDsc; |
1930 | unsigned int lclNum; |
1931 | |
1932 | RegState* intRegState = &compiler->codeGen->intRegState; |
1933 | RegState* floatRegState = &compiler->codeGen->floatRegState; |
1934 | intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; |
1935 | floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; |
1936 | |
1937 | for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) |
1938 | { |
1939 | lclNum = compiler->lvaTrackedToVarNum[varIndex]; |
1940 | argDsc = &(compiler->lvaTable[lclNum]); |
1941 | |
1942 | if (!argDsc->lvIsParam) |
1943 | { |
1944 | continue; |
1945 | } |
1946 | |
1947 | // Only reserve a register if the argument is actually used. |
1948 | // Is it dead on entry? If compJmpOpUsed is true, then the arguments |
1949 | // have to be kept alive, so we have to consider it as live on entry. |
1950 | // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we |
1951 | // won't have done dataflow on it, but it needs to be marked as live-in so |
1952 | // it will get saved in the prolog. |
1953 | if (!compiler->compJmpOpUsed && argDsc->lvRefCnt() == 0 && !compiler->opts.compDbgCode) |
1954 | { |
1955 | continue; |
1956 | } |
1957 | |
1958 | if (argDsc->lvIsRegArg) |
1959 | { |
1960 | updateRegStateForArg(argDsc); |
1961 | } |
1962 | |
1963 | if (isCandidateVar(argDsc)) |
1964 | { |
1965 | Interval* interval = getIntervalForLocalVar(varIndex); |
1966 | regMaskTP mask = allRegs(TypeGet(argDsc)); |
1967 | if (argDsc->lvIsRegArg) |
1968 | { |
1969 | // Set this interval as currently assigned to that register |
1970 | regNumber inArgReg = argDsc->lvArgReg; |
1971 | assert(inArgReg < REG_COUNT); |
1972 | mask = genRegMask(inArgReg); |
1973 | assignPhysReg(inArgReg, interval); |
1974 | } |
1975 | RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); |
1976 | } |
1977 | else if (varTypeIsStruct(argDsc->lvType)) |
1978 | { |
1979 | for (unsigned fieldVarNum = argDsc->lvFieldLclStart; |
1980 | fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) |
1981 | { |
1982 | LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]); |
1983 | if (fieldVarDsc->lvLRACandidate) |
1984 | { |
1985 | assert(fieldVarDsc->lvTracked); |
1986 | Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex); |
1987 | RefPosition* pos = |
1988 | newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc))); |
1989 | } |
1990 | } |
1991 | } |
1992 | else |
1993 | { |
1994 | // We can overwrite the register (i.e. codegen saves it on entry) |
1995 | assert(argDsc->lvRefCnt() == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || |
1996 | !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); |
1997 | } |
1998 | } |
1999 | |
2000 | // Now set up the reg state for the non-tracked args |
2001 | // (We do this here because we want to generate the ParamDef RefPositions in tracked |
2002 | // order, so that loop doesn't hit the non-tracked args) |
2003 | |
2004 | for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++) |
2005 | { |
2006 | argDsc = &(compiler->lvaTable[argNum]); |
2007 | |
2008 | if (argDsc->lvPromotedStruct()) |
2009 | { |
2010 | noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here |
2011 | |
2012 | unsigned fieldVarNum = argDsc->lvFieldLclStart; |
2013 | argDsc = &(compiler->lvaTable[fieldVarNum]); |
2014 | } |
2015 | noway_assert(argDsc->lvIsParam); |
2016 | if (!argDsc->lvTracked && argDsc->lvIsRegArg) |
2017 | { |
2018 | updateRegStateForArg(argDsc); |
2019 | } |
2020 | } |
2021 | |
2022 | // If there is a secret stub param, it is also live in |
2023 | if (compiler->info.compPublishStubParam) |
2024 | { |
2025 | intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; |
2026 | } |
2027 | |
2028 | BasicBlock* predBlock = nullptr; |
2029 | BasicBlock* prevBlock = nullptr; |
2030 | |
2031 | // Initialize currentLiveVars to the empty set. We will set it to the current |
2032 | // live-in at the entry to each block (this will include the incoming args on |
2033 | // the first block). |
2034 | VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler)); |
2035 | |
2036 | for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) |
2037 | { |
2038 | JITDUMP("\nNEW BLOCK " FMT_BB "\n" , block->bbNum); |
2039 | |
2040 | bool predBlockIsAllocated = false; |
2041 | predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); |
2042 | if (predBlock) |
2043 | { |
2044 | JITDUMP("\n\nSetting " FMT_BB " as the predecessor for determining incoming variable registers of " FMT_BB |
2045 | "\n" , |
2046 | block->bbNum, predBlock->bbNum); |
2047 | assert(predBlock->bbNum <= bbNumMaxBeforeResolution); |
2048 | blockInfo[block->bbNum].predBBNum = predBlock->bbNum; |
2049 | } |
2050 | |
2051 | if (enregisterLocalVars) |
2052 | { |
2053 | VarSetOps::AssignNoCopy(compiler, currentLiveVars, |
2054 | VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn)); |
2055 | |
2056 | if (block == compiler->fgFirstBB) |
2057 | { |
2058 | insertZeroInitRefPositions(); |
2059 | // The first real location is at 1; 0 is for the entry. |
2060 | currentLoc = 1; |
2061 | } |
2062 | |
2063 | // Any lclVars live-in to a block are resolution candidates. |
2064 | VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); |
2065 | |
2066 | // Determine if we need any DummyDefs. |
2067 | // We need DummyDefs for cases where "predBlock" isn't really a predecessor. |
2068 | // Note that it's possible to have uses of unitialized variables, in which case even the first |
2069 | // block may require DummyDefs, which we are not currently adding - this means that these variables |
2070 | // will always be considered to be in memory on entry (and reloaded when the use is encountered). |
2071 | // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized |
2072 | // variables (which may actually be initialized along the dynamically executed paths, but not |
2073 | // on all static paths), we wind up with excessive liveranges for some of these variables. |
2074 | VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); |
2075 | if (predBlock) |
2076 | { |
2077 | // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut |
2078 | VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); |
2079 | } |
2080 | bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); |
2081 | |
2082 | // Create dummy def RefPositions |
2083 | |
2084 | if (needsDummyDefs) |
2085 | { |
2086 | // If we are using locations from a predecessor, we should never require DummyDefs. |
2087 | assert(!predBlockIsAllocated); |
2088 | |
2089 | JITDUMP("Creating dummy definitions\n" ); |
2090 | VarSetOps::Iter iter(compiler, newLiveIn); |
2091 | unsigned varIndex = 0; |
2092 | while (iter.NextElem(&varIndex)) |
2093 | { |
2094 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
2095 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
2096 | // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. |
2097 | // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs). |
2098 | if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam)) |
2099 | { |
2100 | Interval* interval = getIntervalForLocalVar(varIndex); |
2101 | RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, |
2102 | allRegs(interval->registerType)); |
2103 | } |
2104 | } |
2105 | JITDUMP("Finished creating dummy definitions\n\n" ); |
2106 | } |
2107 | } |
2108 | |
2109 | // Add a dummy RefPosition to mark the block boundary. |
2110 | // Note that we do this AFTER adding the exposed uses above, because the |
2111 | // register positions for those exposed uses need to be recorded at |
2112 | // this point. |
2113 | |
2114 | RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); |
2115 | currentLoc += 2; |
2116 | JITDUMP("\n" ); |
2117 | |
2118 | LIR::Range& blockRange = LIR::AsRange(block); |
2119 | for (GenTree* node : blockRange.NonPhiNodes()) |
2120 | { |
2121 | // We increment the number position of each tree node by 2 to simplify the logic when there's the case of |
2122 | // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to |
2123 | // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only |
2124 | // increment the number position by one. |
2125 | CLANG_FORMAT_COMMENT_ANCHOR; |
2126 | |
2127 | #ifdef DEBUG |
2128 | node->gtSeqNum = currentLoc; |
2129 | // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register |
2130 | // value. |
2131 | // Although this looks like a no-op it sets the tag. |
2132 | node->gtRegNum = node->gtRegNum; |
2133 | #endif |
2134 | |
2135 | buildRefPositionsForNode(node, block, currentLoc); |
2136 | |
2137 | #ifdef DEBUG |
2138 | if (currentLoc > maxNodeLocation) |
2139 | { |
2140 | maxNodeLocation = currentLoc; |
2141 | } |
2142 | #endif // DEBUG |
2143 | currentLoc += 2; |
2144 | } |
2145 | |
2146 | // Note: the visited set is cleared in LinearScan::doLinearScan() |
2147 | markBlockVisited(block); |
2148 | if (!defList.IsEmpty()) |
2149 | { |
2150 | INDEBUG(dumpDefList()); |
2151 | assert(!"Expected empty defList at end of block" ); |
2152 | } |
2153 | |
2154 | if (enregisterLocalVars) |
2155 | { |
2156 | // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the |
2157 | // next block, or any unvisited successors. |
2158 | // This will address lclVars that are live on a backedge, as well as those that are kept |
2159 | // live at a GT_JMP. |
2160 | // |
2161 | // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, |
2162 | // and jmp call is represented using GT_JMP node which is a leaf node. |
2163 | // Liveness phase keeps all the arguments of the method live till the end of |
2164 | // block by adding them to liveout set of the block containing GT_JMP. |
2165 | // |
2166 | // The target of a GT_JMP implicitly uses all the current method arguments, however |
2167 | // there are no actual references to them. This can cause LSRA to assert, because |
2168 | // the variables are live but it sees no references. In order to correctly model the |
2169 | // liveness of these arguments, we add dummy exposed uses, in the same manner as for |
2170 | // backward branches. This will happen automatically via expUseSet. |
2171 | // |
2172 | // Note that a block ending with GT_JMP has no successors and hence the variables |
2173 | // for which dummy use ref positions are added are arguments of the method. |
2174 | |
2175 | VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); |
2176 | VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); |
2177 | BasicBlock* nextBlock = getNextBlock(); |
2178 | if (nextBlock != nullptr) |
2179 | { |
2180 | VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); |
2181 | } |
2182 | for (BasicBlock* succ : block->GetAllSuccs(compiler)) |
2183 | { |
2184 | if (VarSetOps::IsEmpty(compiler, expUseSet)) |
2185 | { |
2186 | break; |
2187 | } |
2188 | |
2189 | if (isBlockVisited(succ)) |
2190 | { |
2191 | continue; |
2192 | } |
2193 | VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); |
2194 | } |
2195 | |
2196 | if (!VarSetOps::IsEmpty(compiler, expUseSet)) |
2197 | { |
2198 | JITDUMP("Exposed uses:" ); |
2199 | VarSetOps::Iter iter(compiler, expUseSet); |
2200 | unsigned varIndex = 0; |
2201 | while (iter.NextElem(&varIndex)) |
2202 | { |
2203 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
2204 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
2205 | assert(isCandidateVar(varDsc)); |
2206 | Interval* interval = getIntervalForLocalVar(varIndex); |
2207 | RefPosition* pos = |
2208 | newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); |
2209 | JITDUMP(" V%02u" , varNum); |
2210 | } |
2211 | JITDUMP("\n" ); |
2212 | } |
2213 | |
2214 | // Clear the "last use" flag on any vars that are live-out from this block. |
2215 | { |
2216 | VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut)); |
2217 | VarSetOps::Iter iter(compiler, bbLiveDefs); |
2218 | unsigned varIndex = 0; |
2219 | while (iter.NextElem(&varIndex)) |
2220 | { |
2221 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
2222 | LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; |
2223 | assert(isCandidateVar(varDsc)); |
2224 | RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; |
2225 | // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness |
2226 | // lies. |
2227 | if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) |
2228 | { |
2229 | lastRP->lastUse = false; |
2230 | } |
2231 | } |
2232 | } |
2233 | |
2234 | #ifdef DEBUG |
2235 | checkLastUses(block); |
2236 | |
2237 | if (VERBOSE) |
2238 | { |
2239 | printf("use: " ); |
2240 | dumpConvertedVarSet(compiler, block->bbVarUse); |
2241 | printf("\ndef: " ); |
2242 | dumpConvertedVarSet(compiler, block->bbVarDef); |
2243 | printf("\n" ); |
2244 | } |
2245 | #endif // DEBUG |
2246 | } |
2247 | |
2248 | prevBlock = block; |
2249 | } |
2250 | |
2251 | if (enregisterLocalVars) |
2252 | { |
2253 | if (compiler->lvaKeepAliveAndReportThis()) |
2254 | { |
2255 | // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end |
2256 | unsigned keepAliveVarNum = compiler->info.compThisArg; |
2257 | assert(compiler->info.compIsStatic == false); |
2258 | LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum; |
2259 | if (isCandidateVar(varDsc)) |
2260 | { |
2261 | JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n" ); |
2262 | Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); |
2263 | RefPosition* pos = |
2264 | newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); |
2265 | } |
2266 | } |
2267 | |
2268 | #ifdef DEBUG |
2269 | if (getLsraExtendLifeTimes()) |
2270 | { |
2271 | LclVarDsc* varDsc; |
2272 | for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) |
2273 | { |
2274 | if (varDsc->lvLRACandidate) |
2275 | { |
2276 | JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n" , lclNum); |
2277 | Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); |
2278 | RefPosition* pos = |
2279 | newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); |
2280 | } |
2281 | } |
2282 | } |
2283 | #endif // DEBUG |
2284 | } |
2285 | |
2286 | // If the last block has successors, create a RefTypeBB to record |
2287 | // what's live |
2288 | |
2289 | if (prevBlock->NumSucc(compiler) > 0) |
2290 | { |
2291 | RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); |
2292 | } |
2293 | |
2294 | #ifdef DEBUG |
2295 | // Make sure we don't have any blocks that were not visited |
2296 | foreach_block(compiler, block) |
2297 | { |
2298 | assert(isBlockVisited(block)); |
2299 | } |
2300 | |
2301 | if (VERBOSE) |
2302 | { |
2303 | lsraDumpIntervals("BEFORE VALIDATING INTERVALS" ); |
2304 | dumpRefPositions("BEFORE VALIDATING INTERVALS" ); |
2305 | validateIntervals(); |
2306 | } |
2307 | #endif // DEBUG |
2308 | } |
2309 | |
2310 | #ifdef DEBUG |
2311 | //------------------------------------------------------------------------ |
2312 | // validateIntervals: A DEBUG-only method that checks that the lclVar RefPositions |
2313 | // do not reflect uses of undefined values |
2314 | // |
2315 | // Notes: If an undefined use is encountered, it merely prints a message. |
2316 | // |
2317 | // TODO-Cleanup: This should probably assert, or at least print the message only |
2318 | // when doing a JITDUMP. |
2319 | // |
2320 | void LinearScan::validateIntervals() |
2321 | { |
2322 | if (enregisterLocalVars) |
2323 | { |
2324 | for (unsigned i = 0; i < compiler->lvaTrackedCount; i++) |
2325 | { |
2326 | if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate) |
2327 | { |
2328 | continue; |
2329 | } |
2330 | Interval* interval = getIntervalForLocalVar(i); |
2331 | |
2332 | bool defined = false; |
2333 | printf("-----------------\n" ); |
2334 | for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) |
2335 | { |
2336 | ref->dump(); |
2337 | RefType refType = ref->refType; |
2338 | if (!defined && RefTypeIsUse(refType)) |
2339 | { |
2340 | if (compiler->info.compMethodName != nullptr) |
2341 | { |
2342 | printf("%s: " , compiler->info.compMethodName); |
2343 | } |
2344 | printf("LocalVar V%02u: undefined use at %u\n" , interval->varNum, ref->nodeLocation); |
2345 | } |
2346 | // Note that there can be multiple last uses if they are on disjoint paths, |
2347 | // so we can't really check the lastUse flag |
2348 | if (ref->lastUse) |
2349 | { |
2350 | defined = false; |
2351 | } |
2352 | if (RefTypeIsDef(refType)) |
2353 | { |
2354 | defined = true; |
2355 | } |
2356 | } |
2357 | } |
2358 | } |
2359 | } |
2360 | #endif // DEBUG |
2361 | |
2362 | //------------------------------------------------------------------------ |
2363 | // BuildDef: Build a RefTypeDef RefPosition for the given node |
2364 | // |
2365 | // Arguments: |
2366 | // tree - The node that defines a register |
2367 | // dstCandidates - The candidate registers for the definition |
2368 | // multiRegIdx - The index of the definition, defaults to zero. |
2369 | // Only non-zero for multi-reg nodes. |
2370 | // |
2371 | // Return Value: |
2372 | // The newly created RefPosition. |
2373 | // |
2374 | // Notes: |
2375 | // Adds the RefInfo for the definition to the defList. |
2376 | // |
2377 | RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx) |
2378 | { |
2379 | assert(!tree->isContained()); |
2380 | RegisterType type = getDefType(tree); |
2381 | |
2382 | if (dstCandidates != RBM_NONE) |
2383 | { |
2384 | assert((tree->gtRegNum == REG_NA) || (dstCandidates == genRegMask(tree->GetRegByIndex(multiRegIdx)))); |
2385 | } |
2386 | |
2387 | if (tree->IsMultiRegNode()) |
2388 | { |
2389 | type = tree->GetRegTypeByIndex(multiRegIdx); |
2390 | } |
2391 | |
2392 | Interval* interval = newInterval(type); |
2393 | if (tree->gtRegNum != REG_NA) |
2394 | { |
2395 | if (!tree->IsMultiRegNode() || (multiRegIdx == 0)) |
2396 | { |
2397 | assert((dstCandidates == RBM_NONE) || (dstCandidates == genRegMask(tree->gtRegNum))); |
2398 | dstCandidates = genRegMask(tree->gtRegNum); |
2399 | } |
2400 | else |
2401 | { |
2402 | assert(isSingleRegister(dstCandidates)); |
2403 | } |
2404 | } |
2405 | #ifdef _TARGET_X86_ |
2406 | else if (varTypeIsByte(tree)) |
2407 | { |
2408 | if (dstCandidates == RBM_NONE) |
2409 | { |
2410 | dstCandidates = allRegs(TYP_INT); |
2411 | } |
2412 | dstCandidates &= ~RBM_NON_BYTE_REGS; |
2413 | assert(dstCandidates != RBM_NONE); |
2414 | } |
2415 | #endif // _TARGET_X86_ |
2416 | if (pendingDelayFree) |
2417 | { |
2418 | interval->hasInterferingUses = true; |
2419 | // pendingDelayFree = false; |
2420 | } |
2421 | RefPosition* defRefPosition = |
2422 | newRefPosition(interval, currentLoc + 1, RefTypeDef, tree, dstCandidates, multiRegIdx); |
2423 | if (tree->IsUnusedValue()) |
2424 | { |
2425 | defRefPosition->isLocalDefUse = true; |
2426 | defRefPosition->lastUse = true; |
2427 | } |
2428 | else |
2429 | { |
2430 | RefInfoListNode* refInfo = listNodePool.GetNode(defRefPosition, tree); |
2431 | defList.Append(refInfo); |
2432 | } |
2433 | if (tgtPrefUse != nullptr) |
2434 | { |
2435 | interval->assignRelatedIntervalIfUnassigned(tgtPrefUse->getInterval()); |
2436 | } |
2437 | return defRefPosition; |
2438 | } |
2439 | |
2440 | //------------------------------------------------------------------------ |
2441 | // BuildDef: Build one or more RefTypeDef RefPositions for the given node |
2442 | // |
2443 | // Arguments: |
2444 | // tree - The node that defines a register |
2445 | // dstCount - The number of registers defined by the node |
2446 | // dstCandidates - the candidate registers for the definition |
2447 | // |
2448 | // Notes: |
2449 | // Adds the RefInfo for the definitions to the defList. |
2450 | // |
2451 | void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) |
2452 | { |
2453 | bool fixedReg = false; |
2454 | if ((dstCount > 1) && (dstCandidates != RBM_NONE) && ((int)genCountBits(dstCandidates) == dstCount)) |
2455 | { |
2456 | fixedReg = true; |
2457 | } |
2458 | ReturnTypeDesc* retTypeDesc = nullptr; |
2459 | if (tree->IsMultiRegCall()) |
2460 | { |
2461 | retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); |
2462 | } |
2463 | for (int i = 0; i < dstCount; i++) |
2464 | { |
2465 | regMaskTP thisDstCandidates; |
2466 | if (fixedReg) |
2467 | { |
2468 | // In case of multi-reg call node, we have to query the ith position return register. |
2469 | // For all other cases of multi-reg definitions, the registers must be in sequential order. |
2470 | if (retTypeDesc != nullptr) |
2471 | { |
2472 | thisDstCandidates = genRegMask(tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i)); |
2473 | assert((dstCandidates & thisDstCandidates) != RBM_NONE); |
2474 | } |
2475 | else |
2476 | { |
2477 | thisDstCandidates = genFindLowestBit(dstCandidates); |
2478 | } |
2479 | dstCandidates &= ~thisDstCandidates; |
2480 | } |
2481 | else |
2482 | { |
2483 | thisDstCandidates = dstCandidates; |
2484 | } |
2485 | BuildDef(tree, thisDstCandidates, i); |
2486 | } |
2487 | } |
2488 | |
2489 | //------------------------------------------------------------------------ |
2490 | // BuildDef: Build one or more RefTypeDef RefPositions for the given node, |
2491 | // as well as kills as specified by the given mask. |
2492 | // |
2493 | // Arguments: |
2494 | // tree - The node that defines a register |
2495 | // dstCount - The number of registers defined by the node |
2496 | // dstCandidates - The candidate registers for the definition |
2497 | // killMask - The mask of registers killed by this node |
2498 | // |
2499 | // Notes: |
2500 | // Adds the RefInfo for the definitions to the defList. |
2501 | // The def and kill functionality is folded into a single method so that the |
2502 | // save and restores of upper vector registers can be bracketed around the def. |
2503 | // |
2504 | void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) |
2505 | { |
2506 | // Generate Kill RefPositions |
2507 | assert(killMask == getKillSetForNode(tree)); |
2508 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
2509 | VARSET_TP liveLargeVectors(VarSetOps::UninitVal()); |
2510 | bool doLargeVectorRestore = false; |
2511 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
2512 | if (killMask != RBM_NONE) |
2513 | { |
2514 | buildKillPositionsForNode(tree, currentLoc + 1, killMask); |
2515 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
2516 | if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE)) |
2517 | { |
2518 | // Build RefPositions for saving any live large vectors. |
2519 | // This must be done after the kills, so that we know which large vectors are still live. |
2520 | VarSetOps::AssignNoCopy(compiler, liveLargeVectors, |
2521 | buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask)); |
2522 | doLargeVectorRestore = true; |
2523 | } |
2524 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
2525 | } |
2526 | |
2527 | // Now, create the Def(s) |
2528 | BuildDefs(tree, dstCount, dstCandidates); |
2529 | |
2530 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
2531 | // Finally, generate the UpperVectorRestores |
2532 | if (doLargeVectorRestore) |
2533 | { |
2534 | buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors); |
2535 | } |
2536 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
2537 | } |
2538 | |
2539 | //------------------------------------------------------------------------ |
2540 | // BuildUse: Remove the RefInfoListNode for the given multi-reg index of the given node from |
2541 | // the defList, and build a use RefPosition for the associated Interval. |
2542 | // |
2543 | // Arguments: |
2544 | // operand - The node of interest |
2545 | // candidates - The register candidates for the use |
2546 | // multiRegIdx - The index of the multireg def/use |
2547 | // |
2548 | // Return Value: |
2549 | // The newly created use RefPosition |
2550 | // |
2551 | // Notes: |
2552 | // The node must not be contained, and must have been processed by buildRefPositionsForNode(). |
2553 | // |
2554 | RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) |
2555 | { |
2556 | assert(!operand->isContained()); |
2557 | Interval* interval; |
2558 | bool regOptional = operand->IsRegOptional(); |
2559 | |
2560 | if (isCandidateLocalRef(operand)) |
2561 | { |
2562 | interval = getIntervalForLocalVarNode(operand->AsLclVarCommon()); |
2563 | |
2564 | // We have only approximate last-use information at this point. This is because the |
2565 | // execution order doesn't actually reflect the true order in which the localVars |
2566 | // are referenced - but the order of the RefPositions will, so we recompute it after |
2567 | // RefPositions are built. |
2568 | // Use the old value for setting currentLiveVars - note that we do this with the |
2569 | // not-quite-correct setting of lastUse. However, this is OK because |
2570 | // 1) this is only for preferencing, which doesn't require strict correctness, and |
2571 | // 2) the cases where these out-of-order uses occur should not overlap a kill. |
2572 | // TODO-Throughput: clean this up once we have the execution order correct. At that point |
2573 | // we can update currentLiveVars at the same place that we create the RefPosition. |
2574 | if ((operand->gtFlags & GTF_VAR_DEATH) != 0) |
2575 | { |
2576 | unsigned varIndex = interval->getVarIndex(compiler); |
2577 | VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); |
2578 | } |
2579 | } |
2580 | else |
2581 | { |
2582 | RefInfoListNode* refInfo = defList.removeListNode(operand, multiRegIdx); |
2583 | RefPosition* defRefPos = refInfo->ref; |
2584 | assert(defRefPos->multiRegIdx == multiRegIdx); |
2585 | interval = defRefPos->getInterval(); |
2586 | listNodePool.ReturnNode(refInfo); |
2587 | operand = nullptr; |
2588 | } |
2589 | RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx); |
2590 | useRefPos->setAllocateIfProfitable(regOptional); |
2591 | return useRefPos; |
2592 | } |
2593 | |
2594 | //------------------------------------------------------------------------ |
2595 | // BuildIndirUses: Build Use RefPositions for an indirection that might be contained |
2596 | // |
2597 | // Arguments: |
2598 | // indirTree - The indirection node of interest |
2599 | // |
2600 | // Return Value: |
2601 | // The number of source registers used by the *parent* of this node. |
2602 | // |
2603 | // Notes: |
2604 | // This method may only be used if the candidates are the same for all sources. |
2605 | // |
2606 | int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates) |
2607 | { |
2608 | GenTree* const addr = indirTree->gtOp1; |
2609 | if (!addr->isContained()) |
2610 | { |
2611 | BuildUse(addr, candidates); |
2612 | return 1; |
2613 | } |
2614 | if (!addr->OperIs(GT_LEA)) |
2615 | { |
2616 | return 0; |
2617 | } |
2618 | |
2619 | GenTreeAddrMode* const addrMode = addr->AsAddrMode(); |
2620 | |
2621 | unsigned srcCount = 0; |
2622 | if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) |
2623 | { |
2624 | BuildUse(addrMode->Base(), candidates); |
2625 | srcCount++; |
2626 | } |
2627 | if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) |
2628 | { |
2629 | BuildUse(addrMode->Index(), candidates); |
2630 | srcCount++; |
2631 | } |
2632 | return srcCount; |
2633 | } |
2634 | |
2635 | //------------------------------------------------------------------------ |
2636 | // BuildOperandUses: Build Use RefPositions for an operand that might be contained. |
2637 | // |
2638 | // Arguments: |
2639 | // node - The node of interest |
2640 | // |
2641 | // Return Value: |
2642 | // The number of source registers used by the *parent* of this node. |
2643 | // |
2644 | int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) |
2645 | { |
2646 | if (!node->isContained()) |
2647 | { |
2648 | BuildUse(node, candidates); |
2649 | return 1; |
2650 | } |
2651 | |
2652 | #if !defined(_TARGET_64BIT_) |
2653 | if (node->OperIs(GT_LONG)) |
2654 | { |
2655 | return BuildBinaryUses(node->AsOp(), candidates); |
2656 | } |
2657 | #endif // !defined(_TARGET_64BIT_) |
2658 | if (node->OperIsIndir()) |
2659 | { |
2660 | return BuildIndirUses(node->AsIndir(), candidates); |
2661 | } |
2662 | if (node->OperIsHWIntrinsic()) |
2663 | { |
2664 | BuildUse(node->gtGetOp1(), candidates); |
2665 | return 1; |
2666 | } |
2667 | |
2668 | return 0; |
2669 | } |
2670 | |
2671 | //------------------------------------------------------------------------ |
2672 | // setDelayFree: Mark a RefPosition as delayRegFree, and set pendingDelayFree |
2673 | // |
2674 | // Arguments: |
2675 | // use - The use RefPosition to mark |
2676 | // |
2677 | void LinearScan::setDelayFree(RefPosition* use) |
2678 | { |
2679 | use->delayRegFree = true; |
2680 | pendingDelayFree = true; |
2681 | } |
2682 | |
2683 | //------------------------------------------------------------------------ |
2684 | // BuildDelayFreeUses: Build Use RefPositions for an operand that might be contained, |
2685 | // and which need to be marked delayRegFree |
2686 | // |
2687 | // Arguments: |
2688 | // node - The node of interest |
2689 | // |
2690 | // Return Value: |
2691 | // The number of source registers used by the *parent* of this node. |
2692 | // |
2693 | int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates) |
2694 | { |
2695 | RefPosition* use; |
2696 | if (!node->isContained()) |
2697 | { |
2698 | use = BuildUse(node, candidates); |
2699 | setDelayFree(use); |
2700 | return 1; |
2701 | } |
2702 | if (node->OperIsHWIntrinsic()) |
2703 | { |
2704 | use = BuildUse(node->gtGetOp1(), candidates); |
2705 | setDelayFree(use); |
2706 | return 1; |
2707 | } |
2708 | if (!node->OperIsIndir()) |
2709 | { |
2710 | return 0; |
2711 | } |
2712 | GenTreeIndir* indirTree = node->AsIndir(); |
2713 | GenTree* addr = indirTree->gtOp1; |
2714 | if (!addr->isContained()) |
2715 | { |
2716 | use = BuildUse(addr, candidates); |
2717 | setDelayFree(use); |
2718 | return 1; |
2719 | } |
2720 | if (!addr->OperIs(GT_LEA)) |
2721 | { |
2722 | return 0; |
2723 | } |
2724 | |
2725 | GenTreeAddrMode* const addrMode = addr->AsAddrMode(); |
2726 | |
2727 | unsigned srcCount = 0; |
2728 | if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) |
2729 | { |
2730 | use = BuildUse(addrMode->Base(), candidates); |
2731 | setDelayFree(use); |
2732 | srcCount++; |
2733 | } |
2734 | if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) |
2735 | { |
2736 | use = BuildUse(addrMode->Index(), candidates); |
2737 | setDelayFree(use); |
2738 | srcCount++; |
2739 | } |
2740 | return srcCount; |
2741 | } |
2742 | |
2743 | //------------------------------------------------------------------------ |
2744 | // BuildBinaryUses: Get the RefInfoListNodes for the operands of the |
2745 | // given node, and build uses for them. |
2746 | // |
2747 | // Arguments: |
2748 | // node - a GenTreeOp |
2749 | // |
2750 | // Return Value: |
2751 | // The number of actual register operands. |
2752 | // |
2753 | // Notes: |
2754 | // The operands must already have been processed by buildRefPositionsForNode, and their |
2755 | // RefInfoListNodes placed in the defList. |
2756 | // |
2757 | int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) |
2758 | { |
2759 | #ifdef _TARGET_XARCH_ |
2760 | RefPosition* tgtPrefUse = nullptr; |
2761 | if (node->OperIsBinary() && isRMWRegOper(node)) |
2762 | { |
2763 | return BuildRMWUses(node, candidates); |
2764 | } |
2765 | #endif // _TARGET_XARCH_ |
2766 | int srcCount = 0; |
2767 | GenTree* op1 = node->gtOp1; |
2768 | GenTree* op2 = node->gtGetOp2IfPresent(); |
2769 | if (node->IsReverseOp() && (op2 != nullptr)) |
2770 | { |
2771 | srcCount += BuildOperandUses(op2, candidates); |
2772 | op2 = nullptr; |
2773 | } |
2774 | if (op1 != nullptr) |
2775 | { |
2776 | srcCount += BuildOperandUses(op1, candidates); |
2777 | } |
2778 | if (op2 != nullptr) |
2779 | { |
2780 | srcCount += BuildOperandUses(op2, candidates); |
2781 | } |
2782 | return srcCount; |
2783 | } |
2784 | |
2785 | //------------------------------------------------------------------------ |
2786 | // BuildStoreLoc: Set register requirements for a store of a lclVar |
2787 | // |
2788 | // Arguments: |
2789 | // storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) |
2790 | // |
2791 | // Notes: |
2792 | // This involves: |
2793 | // - Setting the appropriate candidates for a store of a multi-reg call return value. |
2794 | // - Handling of contained immediates. |
2795 | // - Requesting an internal register for SIMD12 stores. |
2796 | // |
2797 | int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) |
2798 | { |
2799 | GenTree* op1 = storeLoc->gtGetOp1(); |
2800 | int srcCount; |
2801 | RefPosition* singleUseRef = nullptr; |
2802 | LclVarDsc* varDsc = &compiler->lvaTable[storeLoc->gtLclNum]; |
2803 | |
2804 | // First, define internal registers. |
2805 | #ifdef FEATURE_SIMD |
2806 | RefPosition* internalFloatDef = nullptr; |
2807 | if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12)) |
2808 | { |
2809 | // Need an additional register to extract upper 4 bytes of Vector3. |
2810 | internalFloatDef = buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs()); |
2811 | } |
2812 | #endif // FEATURE_SIMD |
2813 | |
2814 | // Second, use source registers. |
2815 | |
2816 | if (op1->IsMultiRegCall()) |
2817 | { |
2818 | // This is the case of var = call where call is returning |
2819 | // a value in multiple return registers. |
2820 | // Must be a store lclvar. |
2821 | assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); |
2822 | |
2823 | // srcCount = number of registers in which the value is returned by call |
2824 | GenTreeCall* call = op1->AsCall(); |
2825 | ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); |
2826 | unsigned regCount = retTypeDesc->GetReturnRegCount(); |
2827 | srcCount = retTypeDesc->GetReturnRegCount(); |
2828 | |
2829 | for (int i = 0; i < srcCount; ++i) |
2830 | { |
2831 | BuildUse(op1, RBM_NONE, i); |
2832 | } |
2833 | } |
2834 | #ifndef _TARGET_64BIT_ |
2835 | else if (varTypeIsLong(op1)) |
2836 | { |
2837 | if (op1->OperIs(GT_MUL_LONG)) |
2838 | { |
2839 | srcCount = 2; |
2840 | BuildUse(op1, allRegs(TYP_INT), 0); |
2841 | BuildUse(op1, allRegs(TYP_INT), 1); |
2842 | } |
2843 | else |
2844 | { |
2845 | assert(op1->OperIs(GT_LONG)); |
2846 | assert(op1->isContained() && !op1->gtGetOp1()->isContained() && !op1->gtGetOp2()->isContained()); |
2847 | srcCount = BuildBinaryUses(op1->AsOp()); |
2848 | assert(srcCount == 2); |
2849 | } |
2850 | } |
2851 | #endif // !_TARGET_64BIT_ |
2852 | else if (op1->isContained()) |
2853 | { |
2854 | srcCount = 0; |
2855 | } |
2856 | else |
2857 | { |
2858 | srcCount = 1; |
2859 | regMaskTP srcCandidates = RBM_NONE; |
2860 | #ifdef _TARGET_X86_ |
2861 | if (varTypeIsByte(storeLoc)) |
2862 | { |
2863 | srcCandidates = allByteRegs(); |
2864 | } |
2865 | #endif // _TARGET_X86_ |
2866 | singleUseRef = BuildUse(op1, srcCandidates); |
2867 | } |
2868 | |
2869 | // Third, use internal registers. |
2870 | #ifdef FEATURE_SIMD |
2871 | buildInternalRegisterUses(); |
2872 | #endif // FEATURE_SIMD |
2873 | |
2874 | // Fourth, define destination registers. |
2875 | |
2876 | // Add the lclVar to currentLiveVars (if it will remain live) |
2877 | if (isCandidateVar(varDsc)) |
2878 | { |
2879 | assert(varDsc->lvTracked); |
2880 | unsigned varIndex = varDsc->lvVarIndex; |
2881 | Interval* varDefInterval = getIntervalForLocalVar(varIndex); |
2882 | if ((storeLoc->gtFlags & GTF_VAR_DEATH) == 0) |
2883 | { |
2884 | VarSetOps::AddElemD(compiler, currentLiveVars, varIndex); |
2885 | } |
2886 | if (singleUseRef != nullptr) |
2887 | { |
2888 | Interval* srcInterval = singleUseRef->getInterval(); |
2889 | if (srcInterval->relatedInterval == nullptr) |
2890 | { |
2891 | // Preference the source to the dest, unless this is a non-last-use localVar. |
2892 | // Note that the last-use info is not correct, but it is a better approximation than preferencing |
2893 | // the source to the dest, if the source's lifetime extends beyond the dest. |
2894 | if (!srcInterval->isLocalVar || (singleUseRef->treeNode->gtFlags & GTF_VAR_DEATH) != 0) |
2895 | { |
2896 | srcInterval->assignRelatedInterval(varDefInterval); |
2897 | } |
2898 | } |
2899 | else if (!srcInterval->isLocalVar) |
2900 | { |
2901 | // Preference the source to dest, if src is not a local var. |
2902 | srcInterval->assignRelatedInterval(varDefInterval); |
2903 | } |
2904 | } |
2905 | newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet())); |
2906 | } |
2907 | else |
2908 | { |
2909 | if (storeLoc->gtOp1->OperIs(GT_BITCAST)) |
2910 | { |
2911 | storeLoc->gtType = storeLoc->gtOp1->gtType = storeLoc->gtOp1->AsUnOp()->gtOp1->TypeGet(); |
2912 | RegisterType registerType = regType(storeLoc->TypeGet()); |
2913 | noway_assert(singleUseRef != nullptr); |
2914 | |
2915 | Interval* srcInterval = singleUseRef->getInterval(); |
2916 | srcInterval->registerType = registerType; |
2917 | |
2918 | RefPosition* srcDefPosition = srcInterval->firstRefPosition; |
2919 | assert(srcDefPosition != nullptr); |
2920 | assert(srcDefPosition->refType == RefTypeDef); |
2921 | assert(srcDefPosition->treeNode == storeLoc->gtOp1); |
2922 | |
2923 | srcDefPosition->registerAssignment = allRegs(registerType); |
2924 | singleUseRef->registerAssignment = allRegs(registerType); |
2925 | } |
2926 | } |
2927 | |
2928 | return srcCount; |
2929 | } |
2930 | |
2931 | //------------------------------------------------------------------------ |
2932 | // BuildSimple: Builds use RefPositions for trees requiring no special handling |
2933 | // |
2934 | // Arguments: |
2935 | // tree - The node of interest |
2936 | // |
2937 | // Return Value: |
2938 | // The number of use RefPositions created |
2939 | // |
2940 | int LinearScan::BuildSimple(GenTree* tree) |
2941 | { |
2942 | unsigned kind = tree->OperKind(); |
2943 | int srcCount = 0; |
2944 | if ((kind & (GTK_CONST | GTK_LEAF)) == 0) |
2945 | { |
2946 | assert((kind & GTK_SMPOP) != 0); |
2947 | srcCount = BuildBinaryUses(tree->AsOp()); |
2948 | } |
2949 | if (tree->IsValue()) |
2950 | { |
2951 | BuildDef(tree); |
2952 | } |
2953 | return srcCount; |
2954 | } |
2955 | |
2956 | //------------------------------------------------------------------------ |
2957 | // BuildReturn: Set the NodeInfo for a GT_RETURN. |
2958 | // |
2959 | // Arguments: |
2960 | // tree - The node of interest |
2961 | // |
2962 | // Return Value: |
2963 | // The number of sources consumed by this node. |
2964 | // |
2965 | int LinearScan::BuildReturn(GenTree* tree) |
2966 | { |
2967 | GenTree* op1 = tree->gtGetOp1(); |
2968 | |
2969 | #if !defined(_TARGET_64BIT_) |
2970 | if (tree->TypeGet() == TYP_LONG) |
2971 | { |
2972 | assert((op1->OperGet() == GT_LONG) && op1->isContained()); |
2973 | GenTree* loVal = op1->gtGetOp1(); |
2974 | GenTree* hiVal = op1->gtGetOp2(); |
2975 | BuildUse(loVal, RBM_LNGRET_LO); |
2976 | BuildUse(hiVal, RBM_LNGRET_HI); |
2977 | return 2; |
2978 | } |
2979 | else |
2980 | #endif // !defined(_TARGET_64BIT_) |
2981 | if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) |
2982 | { |
2983 | regMaskTP useCandidates = RBM_NONE; |
2984 | |
2985 | #if FEATURE_MULTIREG_RET |
2986 | if (varTypeIsStruct(tree)) |
2987 | { |
2988 | // op1 has to be either an lclvar or a multi-reg returning call |
2989 | if (op1->OperGet() == GT_LCL_VAR) |
2990 | { |
2991 | BuildUse(op1, useCandidates); |
2992 | } |
2993 | else |
2994 | { |
2995 | noway_assert(op1->IsMultiRegCall()); |
2996 | |
2997 | ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); |
2998 | int srcCount = retTypeDesc->GetReturnRegCount(); |
2999 | useCandidates = retTypeDesc->GetABIReturnRegs(); |
3000 | for (int i = 0; i < srcCount; i++) |
3001 | { |
3002 | BuildUse(op1, useCandidates, i); |
3003 | } |
3004 | return srcCount; |
3005 | } |
3006 | } |
3007 | else |
3008 | #endif // FEATURE_MULTIREG_RET |
3009 | { |
3010 | // Non-struct type return - determine useCandidates |
3011 | switch (tree->TypeGet()) |
3012 | { |
3013 | case TYP_VOID: |
3014 | useCandidates = RBM_NONE; |
3015 | break; |
3016 | case TYP_FLOAT: |
3017 | useCandidates = RBM_FLOATRET; |
3018 | break; |
3019 | case TYP_DOUBLE: |
3020 | // We ONLY want the valid double register in the RBM_DOUBLERET mask. |
3021 | useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); |
3022 | break; |
3023 | case TYP_LONG: |
3024 | useCandidates = RBM_LNGRET; |
3025 | break; |
3026 | default: |
3027 | useCandidates = RBM_INTRET; |
3028 | break; |
3029 | } |
3030 | BuildUse(op1, useCandidates); |
3031 | return 1; |
3032 | } |
3033 | } |
3034 | |
3035 | // No kills or defs. |
3036 | return 0; |
3037 | } |
3038 | |
3039 | //------------------------------------------------------------------------ |
3040 | // supportsSpecialPutArg: Determine if we can support specialPutArgs |
3041 | // |
3042 | // Return Value: |
3043 | // True iff specialPutArg intervals can be supported. |
3044 | // |
3045 | // Notes: |
3046 | // See below. |
3047 | // |
3048 | |
3049 | bool LinearScan::supportsSpecialPutArg() |
3050 | { |
3051 | #if defined(DEBUG) && defined(_TARGET_X86_) |
3052 | // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode |
3053 | // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the |
3054 | // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in |
3055 | // situations like the following: |
3056 | // |
3057 | // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> |
3058 | // |
3059 | // /--* t1026 ref |
3060 | // t1352 = * putarg_reg ref REG NA |
3061 | // |
3062 | // t342 = lclVar int V14 loc6 u:4 REG NA $50c |
3063 | // |
3064 | // t343 = const int 1 REG NA $41 |
3065 | // |
3066 | // /--* t342 int |
3067 | // +--* t343 int |
3068 | // t344 = * + int REG NA $495 |
3069 | // |
3070 | // t345 = lclVar int V04 arg4 u:2 REG NA $100 |
3071 | // |
3072 | // /--* t344 int |
3073 | // +--* t345 int |
3074 | // t346 = * % int REG NA $496 |
3075 | // |
3076 | // /--* t346 int |
3077 | // t1353 = * putarg_reg int REG NA |
3078 | // |
3079 | // t1354 = lclVar ref V52 tmp35 (last use) REG NA |
3080 | // |
3081 | // /--* t1354 ref |
3082 | // t1355 = * lea(b+0) byref REG NA |
3083 | // |
3084 | // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the |
3085 | // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register |
3086 | // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to |
3087 | // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be |
3088 | // available due to the special put arg. |
3089 | return getStressLimitRegs() != LSRA_LIMIT_CALLER; |
3090 | #else |
3091 | return true; |
3092 | #endif |
3093 | } |
3094 | |
3095 | //------------------------------------------------------------------------ |
3096 | // BuildPutArgReg: Set the NodeInfo for a PUTARG_REG. |
3097 | // |
3098 | // Arguments: |
3099 | // node - The PUTARG_REG node. |
3100 | // argReg - The register in which to pass the argument. |
3101 | // info - The info for the node's using call. |
3102 | // isVarArgs - True if the call uses a varargs calling convention. |
3103 | // callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. |
3104 | // |
3105 | // Return Value: |
3106 | // None. |
3107 | // |
3108 | int LinearScan::BuildPutArgReg(GenTreeUnOp* node) |
3109 | { |
3110 | assert(node != nullptr); |
3111 | assert(node->OperIsPutArgReg()); |
3112 | regNumber argReg = node->gtRegNum; |
3113 | assert(argReg != REG_NA); |
3114 | bool isSpecialPutArg = false; |
3115 | int srcCount = 1; |
3116 | GenTree* op1 = node->gtGetOp1(); |
3117 | |
3118 | // First, handle the GT_OBJ case, which loads into the arg register |
3119 | // (so we don't set the use to prefer that register for the source address). |
3120 | if (op1->OperIs(GT_OBJ)) |
3121 | { |
3122 | GenTreeObj* obj = op1->AsObj(); |
3123 | GenTree* addr = obj->Addr(); |
3124 | unsigned size = obj->gtBlkSize; |
3125 | assert(size <= TARGET_POINTER_SIZE); |
3126 | if (addr->OperIsLocalAddr()) |
3127 | { |
3128 | // We don't need a source register. |
3129 | assert(addr->isContained()); |
3130 | srcCount = 0; |
3131 | } |
3132 | else if (!isPow2(size)) |
3133 | { |
3134 | // We'll need an internal register to do the odd-size load. |
3135 | // This can only happen with integer registers. |
3136 | assert(genIsValidIntReg(argReg)); |
3137 | buildInternalIntRegisterDefForNode(node); |
3138 | BuildUse(addr); |
3139 | buildInternalRegisterUses(); |
3140 | } |
3141 | return srcCount; |
3142 | } |
3143 | |
3144 | // To avoid redundant moves, have the argument operand computed in the |
3145 | // register in which the argument is passed to the call. |
3146 | regMaskTP argMask = genRegMask(argReg); |
3147 | RefPosition* use = BuildUse(op1, argMask); |
3148 | |
3149 | if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0)) |
3150 | { |
3151 | // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, |
3152 | // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register |
3153 | // (which is actually what we hope will happen). |
3154 | JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n" ); |
3155 | |
3156 | // Preference the destination to the interval of the first register defined by the first operand. |
3157 | assert(use->getInterval()->isLocalVar); |
3158 | isSpecialPutArg = true; |
3159 | tgtPrefUse = use; |
3160 | } |
3161 | |
3162 | #ifdef _TARGET_ARM_ |
3163 | // If type of node is `long` then it is actually `double`. |
3164 | // The actual `long` types must have been transformed as a field list with two fields. |
3165 | if (node->TypeGet() == TYP_LONG) |
3166 | { |
3167 | srcCount++; |
3168 | regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg)); |
3169 | assert(genRegArgNext(argReg) == REG_NEXT(argReg)); |
3170 | use = BuildUse(op1, argMaskHi, 1); |
3171 | BuildDef(node, argMask, 0); |
3172 | BuildDef(node, argMaskHi, 1); |
3173 | } |
3174 | else |
3175 | #endif // _TARGET_ARM_ |
3176 | { |
3177 | RefPosition* def = BuildDef(node, argMask); |
3178 | if (isSpecialPutArg) |
3179 | { |
3180 | def->getInterval()->isSpecialPutArg = true; |
3181 | } |
3182 | } |
3183 | return srcCount; |
3184 | } |
3185 | |
3186 | //------------------------------------------------------------------------ |
3187 | // HandleFloatVarArgs: Handle additional register requirements for a varargs call |
3188 | // |
3189 | // Arguments: |
3190 | // call - The call node of interest |
3191 | // argNode - The current argument |
3192 | // |
3193 | // Return Value: |
3194 | // None. |
3195 | // |
3196 | // Notes: |
3197 | // In the case of a varargs call, the ABI dictates that if we have floating point args, |
3198 | // we must pass the enregistered arguments in both the integer and floating point registers. |
3199 | // Since the integer register is not associated with the arg node, we will reserve it as |
3200 | // an internal register on the call so that it is not used during the evaluation of the call node |
3201 | // (e.g. for the target). |
3202 | void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) |
3203 | { |
3204 | #if FEATURE_VARARG |
3205 | if (call->IsVarargs() && varTypeIsFloating(argNode)) |
3206 | { |
3207 | *callHasFloatRegArgs = true; |
3208 | |
3209 | // We'll have to return the internal def and then later create a use for it. |
3210 | regNumber argReg = argNode->gtRegNum; |
3211 | regNumber targetReg = compiler->getCallArgIntRegister(argReg); |
3212 | |
3213 | buildInternalIntRegisterDefForNode(call, genRegMask(targetReg)); |
3214 | } |
3215 | #endif // FEATURE_VARARG |
3216 | } |
3217 | |
3218 | //------------------------------------------------------------------------ |
3219 | // BuildGCWriteBarrier: Handle additional register requirements for a GC write barrier |
3220 | // |
3221 | // Arguments: |
3222 | // tree - The STORE_IND for which a write barrier is required |
3223 | // |
3224 | int LinearScan::BuildGCWriteBarrier(GenTree* tree) |
3225 | { |
3226 | GenTree* dst = tree; |
3227 | GenTree* addr = tree->gtGetOp1(); |
3228 | GenTree* src = tree->gtGetOp2(); |
3229 | |
3230 | // In the case where we are doing a helper assignment, even if the dst |
3231 | // is an indir through an lea, we need to actually instantiate the |
3232 | // lea in a register |
3233 | assert(!addr->isContained() && !src->isContained()); |
3234 | int srcCount = 2; |
3235 | regMaskTP addrCandidates = RBM_ARG_0; |
3236 | regMaskTP srcCandidates = RBM_ARG_1; |
3237 | |
3238 | #if defined(_TARGET_ARM64_) |
3239 | |
3240 | // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST) |
3241 | // the 'src' goes into x15 (REG_WRITE_BARRIER_SRC) |
3242 | // |
3243 | addrCandidates = RBM_WRITE_BARRIER_DST; |
3244 | srcCandidates = RBM_WRITE_BARRIER_SRC; |
3245 | |
3246 | #elif defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS |
3247 | |
3248 | bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); |
3249 | if (useOptimizedWriteBarrierHelper) |
3250 | { |
3251 | // Special write barrier: |
3252 | // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and |
3253 | // op2 (src) goes into any int register. |
3254 | addrCandidates = RBM_WRITE_BARRIER; |
3255 | srcCandidates = RBM_WRITE_BARRIER_SRC; |
3256 | } |
3257 | |
3258 | #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS |
3259 | |
3260 | BuildUse(addr, addrCandidates); |
3261 | BuildUse(src, srcCandidates); |
3262 | |
3263 | regMaskTP killMask = getKillSetForStoreInd(tree->AsStoreInd()); |
3264 | buildKillPositionsForNode(tree, currentLoc + 1, killMask); |
3265 | return 2; |
3266 | } |
3267 | |
3268 | //------------------------------------------------------------------------ |
3269 | // BuildCmp: Set the register requirements for a compare. |
3270 | // |
3271 | // Arguments: |
3272 | // tree - The node of interest |
3273 | // |
3274 | // Return Value: |
3275 | // None. |
3276 | // |
3277 | int LinearScan::BuildCmp(GenTree* tree) |
3278 | { |
3279 | assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP)); |
3280 | regMaskTP dstCandidates = RBM_NONE; |
3281 | regMaskTP op1Candidates = RBM_NONE; |
3282 | regMaskTP op2Candidates = RBM_NONE; |
3283 | GenTree* op1 = tree->gtGetOp1(); |
3284 | GenTree* op2 = tree->gtGetOp2(); |
3285 | |
3286 | #ifdef _TARGET_X86_ |
3287 | // If the compare is used by a jump, we just need to set the condition codes. If not, then we need |
3288 | // to store the result into the low byte of a register, which requires the dst be a byteable register. |
3289 | if (tree->TypeGet() != TYP_VOID) |
3290 | { |
3291 | dstCandidates = allByteRegs(); |
3292 | } |
3293 | bool needByteRegs = false; |
3294 | if (varTypeIsByte(tree)) |
3295 | { |
3296 | if (!varTypeIsFloating(op1)) |
3297 | { |
3298 | needByteRegs = true; |
3299 | } |
3300 | } |
3301 | // Example1: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses |
3302 | // ubyte as the result of comparison and if the result needs to be materialized into a reg |
3303 | // simply zero extend it to TYP_INT size. Here is an example of generated code: |
3304 | // cmp dl, byte ptr[addr mode] |
3305 | // movzx edx, dl |
3306 | else if (varTypeIsByte(op1) && varTypeIsByte(op2)) |
3307 | { |
3308 | needByteRegs = true; |
3309 | } |
3310 | // Example2: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses |
3311 | // ubyte as the result of the comparison and if the result needs to be materialized into a reg |
3312 | // simply zero extend it to TYP_INT size. |
3313 | else if (varTypeIsByte(op1) && op2->IsCnsIntOrI()) |
3314 | { |
3315 | needByteRegs = true; |
3316 | } |
3317 | // Example3: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses |
3318 | // ubyte as the result of the comparison and if the result needs to be materialized into a reg |
3319 | // simply zero extend it to TYP_INT size. |
3320 | else if (op1->IsCnsIntOrI() && varTypeIsByte(op2)) |
3321 | { |
3322 | needByteRegs = true; |
3323 | } |
3324 | if (needByteRegs) |
3325 | { |
3326 | if (!op1->isContained()) |
3327 | { |
3328 | op1Candidates = allByteRegs(); |
3329 | } |
3330 | if (!op2->isContained()) |
3331 | { |
3332 | op2Candidates = allByteRegs(); |
3333 | } |
3334 | } |
3335 | #endif // _TARGET_X86_ |
3336 | |
3337 | int srcCount = BuildOperandUses(op1, op1Candidates); |
3338 | srcCount += BuildOperandUses(op2, op2Candidates); |
3339 | if (tree->TypeGet() != TYP_VOID) |
3340 | { |
3341 | BuildDef(tree, dstCandidates); |
3342 | } |
3343 | return srcCount; |
3344 | } |
3345 | |