| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 7 | XX XX |
| 8 | XX Interval and RefPosition Building XX |
| 9 | XX XX |
| 10 | XX This contains the logic for constructing Intervals and RefPositions that XX |
| 11 | XX is common across architectures. See lsra{arch}.cpp for the architecture- XX |
| 12 | XX specific methods for building. XX |
| 13 | XX XX |
| 14 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 15 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 16 | */ |
| 17 | |
| 18 | #include "jitpch.h" |
| 19 | #ifdef _MSC_VER |
| 20 | #pragma hdrstop |
| 21 | #endif |
| 22 | |
| 23 | #include "lsra.h" |
| 24 | |
| 25 | //------------------------------------------------------------------------ |
| 26 | // RefInfoList |
| 27 | //------------------------------------------------------------------------ |
| 28 | // removeListNode - retrieve the RefInfoListNode for the given GenTree node |
| 29 | // |
| 30 | // Notes: |
| 31 | // The BuildNode methods use this helper to retrieve the RefPositions for child nodes |
| 32 | // from the useList being constructed. Note that, if the user knows the order of the operands, |
| 33 | // it is expected that they should just retrieve them directly. |
| 34 | |
| 35 | RefInfoListNode* RefInfoList::removeListNode(GenTree* node) |
| 36 | { |
| 37 | RefInfoListNode* prevListNode = nullptr; |
| 38 | for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next()) |
| 39 | { |
| 40 | if (listNode->treeNode == node) |
| 41 | { |
| 42 | assert(listNode->ref->getMultiRegIdx() == 0); |
| 43 | return removeListNode(listNode, prevListNode); |
| 44 | } |
| 45 | prevListNode = listNode; |
| 46 | } |
| 47 | assert(!"removeListNode didn't find the node" ); |
| 48 | unreached(); |
| 49 | } |
| 50 | |
| 51 | //------------------------------------------------------------------------ |
| 52 | // removeListNode - retrieve the RefInfoListNode for one reg of the given multireg GenTree node |
| 53 | // |
| 54 | // Notes: |
| 55 | // The BuildNode methods use this helper to retrieve the RefPositions for child nodes |
| 56 | // from the useList being constructed. Note that, if the user knows the order of the operands, |
| 57 | // it is expected that they should just retrieve them directly. |
| 58 | |
| 59 | RefInfoListNode* RefInfoList::removeListNode(GenTree* node, unsigned multiRegIdx) |
| 60 | { |
| 61 | RefInfoListNode* prevListNode = nullptr; |
| 62 | for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next()) |
| 63 | { |
| 64 | if ((listNode->treeNode == node) && (listNode->ref->getMultiRegIdx() == multiRegIdx)) |
| 65 | { |
| 66 | return removeListNode(listNode, prevListNode); |
| 67 | } |
| 68 | prevListNode = listNode; |
| 69 | } |
| 70 | assert(!"removeListNode didn't find the node" ); |
| 71 | unreached(); |
| 72 | } |
| 73 | |
| 74 | //------------------------------------------------------------------------ |
| 75 | // RefInfoListNodePool::RefInfoListNodePool: |
| 76 | // Creates a pool of `RefInfoListNode` values. |
| 77 | // |
| 78 | // Arguments: |
| 79 | // compiler - The compiler context. |
| 80 | // preallocate - The number of nodes to preallocate. |
| 81 | // |
| 82 | RefInfoListNodePool::RefInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler) |
| 83 | { |
| 84 | if (preallocate > 0) |
| 85 | { |
| 86 | RefInfoListNode* preallocatedNodes = compiler->getAllocator(CMK_LSRA).allocate<RefInfoListNode>(preallocate); |
| 87 | |
| 88 | RefInfoListNode* head = preallocatedNodes; |
| 89 | head->m_next = nullptr; |
| 90 | |
| 91 | for (unsigned i = 1; i < preallocate; i++) |
| 92 | { |
| 93 | RefInfoListNode* node = &preallocatedNodes[i]; |
| 94 | node->m_next = head; |
| 95 | head = node; |
| 96 | } |
| 97 | |
| 98 | m_freeList = head; |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | //------------------------------------------------------------------------ |
| 103 | // RefInfoListNodePool::GetNode: Fetches an unused node from the |
| 104 | // pool. |
| 105 | // |
| 106 | // Arguments: |
| 107 | // l - - The `LsraLocation` for the `RefInfo` value. |
| 108 | // i - The interval for the `RefInfo` value. |
| 109 | // t - The IR node for the `RefInfo` value |
| 110 | // regIdx - The register index for the `RefInfo` value. |
| 111 | // |
| 112 | // Returns: |
| 113 | // A pooled or newly-allocated `RefInfoListNode`, depending on the |
| 114 | // contents of the pool. |
| 115 | RefInfoListNode* RefInfoListNodePool::GetNode(RefPosition* r, GenTree* t, unsigned regIdx) |
| 116 | { |
| 117 | RefInfoListNode* head = m_freeList; |
| 118 | if (head == nullptr) |
| 119 | { |
| 120 | head = m_compiler->getAllocator(CMK_LSRA).allocate<RefInfoListNode>(1); |
| 121 | } |
| 122 | else |
| 123 | { |
| 124 | m_freeList = head->m_next; |
| 125 | } |
| 126 | |
| 127 | head->ref = r; |
| 128 | head->treeNode = t; |
| 129 | head->m_next = nullptr; |
| 130 | |
| 131 | return head; |
| 132 | } |
| 133 | |
| 134 | //------------------------------------------------------------------------ |
| 135 | // RefInfoListNodePool::ReturnNode: Returns a list of nodes to the node |
| 136 | // pool and clears the given list. |
| 137 | // |
| 138 | // Arguments: |
| 139 | // list - The list to return. |
| 140 | // |
| 141 | void RefInfoListNodePool::ReturnNode(RefInfoListNode* listNode) |
| 142 | { |
| 143 | listNode->m_next = m_freeList; |
| 144 | m_freeList = listNode; |
| 145 | } |
| 146 | |
| 147 | //------------------------------------------------------------------------ |
| 148 | // newInterval: Create a new Interval of the given RegisterType. |
| 149 | // |
| 150 | // Arguments: |
| 151 | // theRegisterType - The type of Interval to create. |
| 152 | // |
| 153 | // TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately |
| 154 | // set such fields as isStructField |
| 155 | // |
| 156 | Interval* LinearScan::newInterval(RegisterType theRegisterType) |
| 157 | { |
| 158 | intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); |
| 159 | Interval* newInt = &intervals.back(); |
| 160 | |
| 161 | #ifdef DEBUG |
| 162 | newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1); |
| 163 | #endif // DEBUG |
| 164 | |
| 165 | DBEXEC(VERBOSE, newInt->dump()); |
| 166 | return newInt; |
| 167 | } |
| 168 | |
| 169 | //------------------------------------------------------------------------ |
| 170 | // newRefPositionRaw: Create a new RefPosition |
| 171 | // |
| 172 | // Arguments: |
| 173 | // nodeLocation - The location of the reference. |
| 174 | // treeNode - The GenTree of the reference. |
| 175 | // refType - The type of reference |
| 176 | // |
| 177 | // Notes: |
| 178 | // This is used to create RefPositions for both RegRecords and Intervals, |
| 179 | // so it does only the common initialization. |
| 180 | // |
| 181 | RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) |
| 182 | { |
| 183 | refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType); |
| 184 | RefPosition* newRP = &refPositions.back(); |
| 185 | #ifdef DEBUG |
| 186 | newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1); |
| 187 | #endif // DEBUG |
| 188 | return newRP; |
| 189 | } |
| 190 | |
| 191 | //------------------------------------------------------------------------ |
| 192 | // resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use |
| 193 | // register requirements on a single-def, single-use interval. |
| 194 | // |
| 195 | // Arguments: |
| 196 | // defRefPosition - The interval definition |
| 197 | // useRefPosition - The (sole) interval use |
| 198 | // |
| 199 | // Return Value: |
| 200 | // None. |
| 201 | // |
| 202 | // Assumptions: |
| 203 | // The two RefPositions are for the same interval, which is a tree-temp. |
| 204 | // |
| 205 | // Notes: |
| 206 | // We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg. |
| 207 | // In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that, |
| 208 | // even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs |
| 209 | // to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4 |
| 210 | // below. |
| 211 | // Here are the cases we consider (in this order): |
| 212 | // 1. If The defRefPosition specifies a single register, and there are no conflicting |
| 213 | // FixedReg uses of it between the def and use, we use that register, and the code generator |
| 214 | // will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def. |
| 215 | // 2. If the useRefPosition specifies a single register, and it is not in use, and there are no |
| 216 | // conflicting FixedReg uses of it between the def and use, we use that register, and the code generator |
| 217 | // will insert the copy. |
| 218 | // 3. If the defRefPosition specifies a single register (but there are conflicts, as determined |
| 219 | // in 1.), and there are no conflicts with the useRefPosition register (if it's a single register), |
| 220 | /// we set the register requirements on the defRefPosition to the use registers, and the |
| 221 | // code generator will insert a copy on the def. We can't rely on the code generator to put a copy |
| 222 | // on the use if it has multiple possible candidates, as it won't know which one has been allocated. |
| 223 | // 4. If the useRefPosition specifies a single register, and there are no conflicts with the register |
| 224 | // on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set |
| 225 | // the useRefPosition to the def registers, for similar reasons to case #3. |
| 226 | // 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts, |
| 227 | // We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are |
| 228 | // single registers, codegen can insert the copy. |
| 229 | // 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but |
| 230 | // has a conflict), we must insert a copy. The copy will be inserted before the use if the |
| 231 | // use is not fixed (in the fixed case, the code generator will insert the use). |
| 232 | // |
| 233 | // TODO-CQ: We get bad register allocation in case #3 in the situation where no register is |
| 234 | // available for the lifetime. We end up allocating a register that must be spilled, and it probably |
| 235 | // won't be the register that is actually defined by the target instruction. So, we have to copy it |
| 236 | // and THEN spill it. In this case, we should be using the def requirement. But we need to change |
| 237 | // the interface to this method a bit to make that work (e.g. returning a candidate set to use, but |
| 238 | // leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway |
| 239 | // we can use the fixed-reg on the def. |
| 240 | // |
| 241 | |
| 242 | void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition) |
| 243 | { |
| 244 | assert(!interval->isLocalVar); |
| 245 | |
| 246 | RefPosition* useRefPosition = defRefPosition->nextRefPosition; |
| 247 | regMaskTP defRegAssignment = defRefPosition->registerAssignment; |
| 248 | regMaskTP useRegAssignment = useRefPosition->registerAssignment; |
| 249 | RegRecord* defRegRecord = nullptr; |
| 250 | RegRecord* useRegRecord = nullptr; |
| 251 | regNumber defReg = REG_NA; |
| 252 | regNumber useReg = REG_NA; |
| 253 | bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); |
| 254 | bool useRegConflict = defRegConflict; |
| 255 | |
| 256 | // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment |
| 257 | // on it, or we will fail to ensure that the fixedReg is busy at the time the target |
| 258 | // (of the node that uses this interval) is allocated. |
| 259 | bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree; |
| 260 | |
| 261 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT)); |
| 262 | if (!canChangeUseAssignment) |
| 263 | { |
| 264 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE)); |
| 265 | } |
| 266 | if (defRefPosition->isFixedRegRef && !defRegConflict) |
| 267 | { |
| 268 | defReg = defRefPosition->assignedReg(); |
| 269 | defRegRecord = getRegisterRecord(defReg); |
| 270 | if (canChangeUseAssignment) |
| 271 | { |
| 272 | RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; |
| 273 | assert(currFixedRegRefPosition != nullptr && |
| 274 | currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation); |
| 275 | |
| 276 | if (currFixedRegRefPosition->nextRefPosition == nullptr || |
| 277 | currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation()) |
| 278 | { |
| 279 | // This is case #1. Use the defRegAssignment |
| 280 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1)); |
| 281 | useRefPosition->registerAssignment = defRegAssignment; |
| 282 | return; |
| 283 | } |
| 284 | else |
| 285 | { |
| 286 | defRegConflict = true; |
| 287 | } |
| 288 | } |
| 289 | } |
| 290 | if (useRefPosition->isFixedRegRef && !useRegConflict) |
| 291 | { |
| 292 | useReg = useRefPosition->assignedReg(); |
| 293 | useRegRecord = getRegisterRecord(useReg); |
| 294 | RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition; |
| 295 | |
| 296 | // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null. |
| 297 | RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition(); |
| 298 | assert(nextFixedRegRefPosition != nullptr && |
| 299 | nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation); |
| 300 | |
| 301 | // First, check to see if there are any conflicting FixedReg references between the def and use. |
| 302 | if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation) |
| 303 | { |
| 304 | // OK, no conflicting FixedReg references. |
| 305 | // Now, check to see whether it is currently in use. |
| 306 | if (useRegRecord->assignedInterval != nullptr) |
| 307 | { |
| 308 | RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition; |
| 309 | LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation(); |
| 310 | if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation) |
| 311 | { |
| 312 | useRegConflict = true; |
| 313 | } |
| 314 | } |
| 315 | if (!useRegConflict) |
| 316 | { |
| 317 | // This is case #2. Use the useRegAssignment |
| 318 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2, interval)); |
| 319 | defRefPosition->registerAssignment = useRegAssignment; |
| 320 | return; |
| 321 | } |
| 322 | } |
| 323 | else |
| 324 | { |
| 325 | useRegConflict = true; |
| 326 | } |
| 327 | } |
| 328 | if (defRegRecord != nullptr && !useRegConflict) |
| 329 | { |
| 330 | // This is case #3. |
| 331 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3, interval)); |
| 332 | defRefPosition->registerAssignment = useRegAssignment; |
| 333 | return; |
| 334 | } |
| 335 | if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) |
| 336 | { |
| 337 | // This is case #4. |
| 338 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4, interval)); |
| 339 | useRefPosition->registerAssignment = defRegAssignment; |
| 340 | return; |
| 341 | } |
| 342 | if (defRegRecord != nullptr && useRegRecord != nullptr) |
| 343 | { |
| 344 | // This is case #5. |
| 345 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5, interval)); |
| 346 | RegisterType regType = interval->registerType; |
| 347 | assert((getRegisterType(interval, defRefPosition) == regType) && |
| 348 | (getRegisterType(interval, useRefPosition) == regType)); |
| 349 | regMaskTP candidates = allRegs(regType); |
| 350 | defRefPosition->registerAssignment = candidates; |
| 351 | return; |
| 352 | } |
| 353 | INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6, interval)); |
| 354 | return; |
| 355 | } |
| 356 | |
| 357 | //------------------------------------------------------------------------ |
| 358 | // applyCalleeSaveHeuristics: Set register preferences for an interval based on the given RefPosition |
| 359 | // |
| 360 | // Arguments: |
| 361 | // rp - The RefPosition of interest |
| 362 | // |
| 363 | // Notes: |
| 364 | // This is slightly more general than its name applies, and updates preferences not just |
| 365 | // for callee-save registers. |
| 366 | // |
| 367 | void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) |
| 368 | { |
| 369 | #ifdef _TARGET_AMD64_ |
| 370 | if (compiler->opts.compDbgEnC) |
| 371 | { |
| 372 | // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs. |
| 373 | return; |
| 374 | } |
| 375 | #endif // _TARGET_AMD64_ |
| 376 | |
| 377 | Interval* theInterval = rp->getInterval(); |
| 378 | |
| 379 | #ifdef DEBUG |
| 380 | if (!doReverseCallerCallee()) |
| 381 | #endif // DEBUG |
| 382 | { |
| 383 | // Set preferences so that this register set will be preferred for earlier refs |
| 384 | theInterval->updateRegisterPreferences(rp->registerAssignment); |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | //------------------------------------------------------------------------ |
| 389 | // checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps. |
| 390 | // |
| 391 | // Arguments: |
| 392 | // useRP - The use RefPosition of a tree temp (SDSU Interval) |
| 393 | // |
| 394 | // Notes: |
| 395 | // There are a couple of cases where this may over-constrain allocation: |
| 396 | // 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or |
| 397 | // 2. In the case where the defining node requires a temp distinct from the target (also a |
| 398 | // delay-free case). |
| 399 | // In those cases, if we propagate a single-register restriction from the consumer to the producer |
| 400 | // the delayed uses will not see a fixed reference in the PhysReg at that position, and may |
| 401 | // incorrectly allocate that register. |
| 402 | // TODO-CQ: This means that we may often require a copy at the use of this node's result. |
| 403 | // This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is |
| 404 | // created, causing a RefTypeFixedReg to be added at that location. This, however, results in |
| 405 | // more PhysReg RefPositions (a throughput impact), and a large number of diffs that require |
| 406 | // further analysis to determine benefit. |
| 407 | // See Issue #11274. |
| 408 | // |
| 409 | void LinearScan::checkConflictingDefUse(RefPosition* useRP) |
| 410 | { |
| 411 | assert(useRP->refType == RefTypeUse); |
| 412 | Interval* theInterval = useRP->getInterval(); |
| 413 | assert(!theInterval->isLocalVar); |
| 414 | |
| 415 | RefPosition* defRP = theInterval->firstRefPosition; |
| 416 | |
| 417 | // All defs must have a valid treeNode, but we check it below to be conservative. |
| 418 | assert(defRP->treeNode != nullptr); |
| 419 | regMaskTP prevAssignment = defRP->registerAssignment; |
| 420 | regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); |
| 421 | if (newAssignment != RBM_NONE) |
| 422 | { |
| 423 | if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) |
| 424 | { |
| 425 | defRP->registerAssignment = newAssignment; |
| 426 | } |
| 427 | } |
| 428 | else |
| 429 | { |
| 430 | theInterval->hasConflictingDefUse = true; |
| 431 | } |
| 432 | } |
| 433 | |
| 434 | //------------------------------------------------------------------------ |
| 435 | // associateRefPosWithInterval: Update the Interval based on the given RefPosition. |
| 436 | // |
| 437 | // Arguments: |
| 438 | // rp - The RefPosition of interest |
| 439 | // |
| 440 | // Notes: |
| 441 | // This is called at the time when 'rp' has just been created, so it becomes |
| 442 | // the nextRefPosition of the recentRefPosition, and both the recentRefPosition |
| 443 | // and lastRefPosition of its referent. |
| 444 | // |
| 445 | void LinearScan::associateRefPosWithInterval(RefPosition* rp) |
| 446 | { |
| 447 | Referenceable* theReferent = rp->referent; |
| 448 | |
| 449 | if (theReferent != nullptr) |
| 450 | { |
| 451 | // All RefPositions except the dummy ones at the beginning of blocks |
| 452 | |
| 453 | if (rp->isIntervalRef()) |
| 454 | { |
| 455 | Interval* theInterval = rp->getInterval(); |
| 456 | |
| 457 | applyCalleeSaveHeuristics(rp); |
| 458 | |
| 459 | if (theInterval->isLocalVar) |
| 460 | { |
| 461 | if (RefTypeIsUse(rp->refType)) |
| 462 | { |
| 463 | RefPosition* const prevRP = theInterval->recentRefPosition; |
| 464 | if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum)) |
| 465 | { |
| 466 | prevRP->lastUse = false; |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) && |
| 471 | (rp->refType != RefTypeZeroInit) && !extendLifetimes(); |
| 472 | } |
| 473 | else if (rp->refType == RefTypeUse) |
| 474 | { |
| 475 | checkConflictingDefUse(rp); |
| 476 | rp->lastUse = true; |
| 477 | } |
| 478 | } |
| 479 | |
| 480 | RefPosition* prevRP = theReferent->recentRefPosition; |
| 481 | if (prevRP != nullptr) |
| 482 | { |
| 483 | prevRP->nextRefPosition = rp; |
| 484 | } |
| 485 | else |
| 486 | { |
| 487 | theReferent->firstRefPosition = rp; |
| 488 | } |
| 489 | theReferent->recentRefPosition = rp; |
| 490 | theReferent->lastRefPosition = rp; |
| 491 | } |
| 492 | else |
| 493 | { |
| 494 | assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | //--------------------------------------------------------------------------- |
| 499 | // newRefPosition: allocate and initialize a new RefPosition. |
| 500 | // |
| 501 | // Arguments: |
| 502 | // reg - reg number that identifies RegRecord to be associated |
| 503 | // with this RefPosition |
| 504 | // theLocation - LSRA location of RefPosition |
| 505 | // theRefType - RefPosition type |
| 506 | // theTreeNode - GenTree node for which this RefPosition is created |
| 507 | // mask - Set of valid registers for this RefPosition |
| 508 | // multiRegIdx - register position if this RefPosition corresponds to a |
| 509 | // multi-reg call node. |
| 510 | // |
| 511 | // Return Value: |
| 512 | // a new RefPosition |
| 513 | // |
| 514 | RefPosition* LinearScan::newRefPosition( |
| 515 | regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) |
| 516 | { |
| 517 | RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); |
| 518 | |
| 519 | RegRecord* regRecord = getRegisterRecord(reg); |
| 520 | newRP->setReg(regRecord); |
| 521 | newRP->registerAssignment = mask; |
| 522 | |
| 523 | newRP->setMultiRegIdx(0); |
| 524 | newRP->setAllocateIfProfitable(false); |
| 525 | |
| 526 | // We can't have two RefPositions on a RegRecord at the same location, unless they are different types. |
| 527 | assert((regRecord->lastRefPosition == nullptr) || (regRecord->lastRefPosition->nodeLocation < theLocation) || |
| 528 | (regRecord->lastRefPosition->refType != theRefType)); |
| 529 | associateRefPosWithInterval(newRP); |
| 530 | |
| 531 | DBEXEC(VERBOSE, newRP->dump()); |
| 532 | return newRP; |
| 533 | } |
| 534 | |
| 535 | //--------------------------------------------------------------------------- |
| 536 | // newRefPosition: allocate and initialize a new RefPosition. |
| 537 | // |
| 538 | // Arguments: |
| 539 | // theInterval - interval to which RefPosition is associated with. |
| 540 | // theLocation - LSRA location of RefPosition |
| 541 | // theRefType - RefPosition type |
| 542 | // theTreeNode - GenTree node for which this RefPosition is created |
| 543 | // mask - Set of valid registers for this RefPosition |
| 544 | // multiRegIdx - register position if this RefPosition corresponds to a |
| 545 | // multi-reg call node. |
| 546 | // |
| 547 | // Return Value: |
| 548 | // a new RefPosition |
| 549 | // |
| 550 | RefPosition* LinearScan::newRefPosition(Interval* theInterval, |
| 551 | LsraLocation theLocation, |
| 552 | RefType theRefType, |
| 553 | GenTree* theTreeNode, |
| 554 | regMaskTP mask, |
| 555 | unsigned multiRegIdx /* = 0 */) |
| 556 | { |
| 557 | if (theInterval != nullptr) |
| 558 | { |
| 559 | if (mask == RBM_NONE) |
| 560 | { |
| 561 | mask = allRegs(theInterval->registerType); |
| 562 | } |
| 563 | } |
| 564 | else |
| 565 | { |
| 566 | assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs); |
| 567 | } |
| 568 | #ifdef DEBUG |
| 569 | if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) |
| 570 | { |
| 571 | // In the case we're using floating point registers we must make sure |
| 572 | // this flag was set previously in the compiler since this will mandate |
| 573 | // whether LSRA will take into consideration FP reg killsets. |
| 574 | assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0)); |
| 575 | } |
| 576 | #endif // DEBUG |
| 577 | |
| 578 | // If this reference is constrained to a single register (and it's not a dummy |
| 579 | // or Kill reftype already), add a RefTypeFixedReg at this location so that its |
| 580 | // availability can be more accurately determined |
| 581 | |
| 582 | bool isFixedRegister = isSingleRegister(mask); |
| 583 | bool insertFixedRef = false; |
| 584 | if (isFixedRegister) |
| 585 | { |
| 586 | // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB), |
| 587 | // but not an internal use (it will already have a FixedRef for the def). |
| 588 | if ((theRefType == RefTypeDef) || ((theRefType == RefTypeUse) && !theInterval->isInternal)) |
| 589 | { |
| 590 | insertFixedRef = true; |
| 591 | } |
| 592 | } |
| 593 | |
| 594 | if (insertFixedRef) |
| 595 | { |
| 596 | regNumber physicalReg = genRegNumFromMask(mask); |
| 597 | RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); |
| 598 | assert(theInterval != nullptr); |
| 599 | assert((allRegs(theInterval->registerType) & mask) != 0); |
| 600 | } |
| 601 | |
| 602 | RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); |
| 603 | |
| 604 | newRP->setInterval(theInterval); |
| 605 | |
| 606 | // Spill info |
| 607 | newRP->isFixedRegRef = isFixedRegister; |
| 608 | |
| 609 | #ifndef _TARGET_AMD64_ |
| 610 | // We don't need this for AMD because the PInvoke method epilog code is explicit |
| 611 | // at register allocation time. |
| 612 | if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged && |
| 613 | theInterval->varNum == compiler->genReturnLocal) |
| 614 | { |
| 615 | mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); |
| 616 | noway_assert(mask != RBM_NONE); |
| 617 | } |
| 618 | #endif // !_TARGET_AMD64_ |
| 619 | newRP->registerAssignment = mask; |
| 620 | |
| 621 | newRP->setMultiRegIdx(multiRegIdx); |
| 622 | newRP->setAllocateIfProfitable(false); |
| 623 | |
| 624 | associateRefPosWithInterval(newRP); |
| 625 | |
| 626 | DBEXEC(VERBOSE, newRP->dump()); |
| 627 | return newRP; |
| 628 | } |
| 629 | |
| 630 | //--------------------------------------------------------------------------- |
| 631 | // newUseRefPosition: allocate and initialize a RefTypeUse RefPosition at currentLoc. |
| 632 | // |
| 633 | // Arguments: |
| 634 | // theInterval - interval to which RefPosition is associated with. |
| 635 | // theTreeNode - GenTree node for which this RefPosition is created |
| 636 | // mask - Set of valid registers for this RefPosition |
| 637 | // multiRegIdx - register position if this RefPosition corresponds to a |
| 638 | // multi-reg call node. |
| 639 | // minRegCount - Minimum number registers that needs to be ensured while |
| 640 | // constraining candidates for this ref position under |
| 641 | // LSRA stress. This is a DEBUG only arg. |
| 642 | // |
| 643 | // Return Value: |
| 644 | // a new RefPosition |
| 645 | // |
| 646 | // Notes: |
| 647 | // If the caller knows that 'theTreeNode' is NOT a candidate local, newRefPosition |
| 648 | // can/should be called directly. |
| 649 | // |
| 650 | RefPosition* LinearScan::newUseRefPosition(Interval* theInterval, |
| 651 | GenTree* theTreeNode, |
| 652 | regMaskTP mask, |
| 653 | unsigned multiRegIdx) |
| 654 | { |
| 655 | GenTree* treeNode = isCandidateLocalRef(theTreeNode) ? theTreeNode : nullptr; |
| 656 | |
| 657 | RefPosition* pos = newRefPosition(theInterval, currentLoc, RefTypeUse, treeNode, mask, multiRegIdx); |
| 658 | if (theTreeNode->IsRegOptional()) |
| 659 | { |
| 660 | pos->setAllocateIfProfitable(true); |
| 661 | } |
| 662 | return pos; |
| 663 | } |
| 664 | |
| 665 | //------------------------------------------------------------------------ |
| 666 | // IsContainableMemoryOp: Checks whether this is a memory op that can be contained. |
| 667 | // |
| 668 | // Arguments: |
| 669 | // node - the node of interest. |
| 670 | // |
| 671 | // Return value: |
| 672 | // True if this will definitely be a memory reference that could be contained. |
| 673 | // |
| 674 | // Notes: |
| 675 | // This differs from the isMemoryOp() method on GenTree because it checks for |
| 676 | // the case of doNotEnregister local. This won't include locals that |
| 677 | // for some other reason do not become register candidates, nor those that get |
| 678 | // spilled. |
| 679 | // Also, because we usually call this before we redo dataflow, any new lclVars |
| 680 | // introduced after the last dataflow analysis will not yet be marked lvTracked, |
| 681 | // so we don't use that. |
| 682 | // |
| 683 | bool LinearScan::isContainableMemoryOp(GenTree* node) |
| 684 | { |
| 685 | if (node->isMemoryOp()) |
| 686 | { |
| 687 | return true; |
| 688 | } |
| 689 | if (node->IsLocal()) |
| 690 | { |
| 691 | if (!enregisterLocalVars) |
| 692 | { |
| 693 | return true; |
| 694 | } |
| 695 | LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum]; |
| 696 | return varDsc->lvDoNotEnregister; |
| 697 | } |
| 698 | return false; |
| 699 | } |
| 700 | |
| 701 | //------------------------------------------------------------------------ |
| 702 | // addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'. |
| 703 | // |
| 704 | // Arguments: |
| 705 | // mask - the mask (set) of registers. |
| 706 | // currentLoc - the location at which they should be added |
| 707 | // refType - the type of refposition |
| 708 | // isLastUse - true IFF this is a last use of the register |
| 709 | // |
| 710 | void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) |
| 711 | { |
| 712 | for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) |
| 713 | { |
| 714 | if (mask & 1) |
| 715 | { |
| 716 | // This assumes that these are all "special" RefTypes that |
| 717 | // don't need to be recorded on the tree (hence treeNode is nullptr) |
| 718 | RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, |
| 719 | genRegMask(reg)); // This MUST occupy the physical register (obviously) |
| 720 | |
| 721 | if (isLastUse) |
| 722 | { |
| 723 | pos->lastUse = true; |
| 724 | } |
| 725 | } |
| 726 | } |
| 727 | } |
| 728 | |
| 729 | //------------------------------------------------------------------------ |
| 730 | // getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node. |
| 731 | // If the GT_STOREIND will generate a write barrier, determine the specific kill |
| 732 | // set required by the case-specific, platform-specific write barrier. If no |
| 733 | // write barrier is required, the kill set will be RBM_NONE. |
| 734 | // |
| 735 | // Arguments: |
| 736 | // tree - the GT_STOREIND node |
| 737 | // |
| 738 | // Return Value: a register mask of the registers killed |
| 739 | // |
| 740 | regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) |
| 741 | { |
| 742 | assert(tree->OperIs(GT_STOREIND)); |
| 743 | |
| 744 | regMaskTP killMask = RBM_NONE; |
| 745 | |
| 746 | GenTree* data = tree->Data(); |
| 747 | |
| 748 | GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data); |
| 749 | if (writeBarrierForm != GCInfo::WBF_NoBarrier) |
| 750 | { |
| 751 | if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm)) |
| 752 | { |
| 753 | // We can't determine the exact helper to be used at this point, because it depends on |
| 754 | // the allocated register for the `data` operand. However, all the (x86) optimized |
| 755 | // helpers have the same kill set: EDX. And note that currently, only x86 can return |
| 756 | // `true` for genUseOptimizedWriteBarriers(). |
| 757 | killMask = RBM_CALLEE_TRASH_NOGC; |
| 758 | } |
| 759 | else |
| 760 | { |
| 761 | // Figure out which helper we're going to use, and then get the kill set for that helper. |
| 762 | CorInfoHelpFunc helper = |
| 763 | compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm); |
| 764 | killMask = compiler->compHelperCallKillSet(helper); |
| 765 | } |
| 766 | } |
| 767 | return killMask; |
| 768 | } |
| 769 | |
| 770 | //------------------------------------------------------------------------ |
| 771 | // getKillSetForShiftRotate: Determine the liveness kill set for a shift or rotate node. |
| 772 | // |
| 773 | // Arguments: |
| 774 | // shiftNode - the shift or rotate node |
| 775 | // |
| 776 | // Return Value: a register mask of the registers killed |
| 777 | // |
| 778 | regMaskTP LinearScan::getKillSetForShiftRotate(GenTreeOp* shiftNode) |
| 779 | { |
| 780 | regMaskTP killMask = RBM_NONE; |
| 781 | #ifdef _TARGET_XARCH_ |
| 782 | assert(shiftNode->OperIsShiftOrRotate()); |
| 783 | GenTree* shiftBy = shiftNode->gtGetOp2(); |
| 784 | if (!shiftBy->isContained()) |
| 785 | { |
| 786 | killMask = RBM_RCX; |
| 787 | } |
| 788 | #endif // _TARGET_XARCH_ |
| 789 | return killMask; |
| 790 | } |
| 791 | |
| 792 | //------------------------------------------------------------------------ |
| 793 | // getKillSetForMul: Determine the liveness kill set for a multiply node. |
| 794 | // |
| 795 | // Arguments: |
| 796 | // tree - the multiply node |
| 797 | // |
| 798 | // Return Value: a register mask of the registers killed |
| 799 | // |
| 800 | regMaskTP LinearScan::getKillSetForMul(GenTreeOp* mulNode) |
| 801 | { |
| 802 | regMaskTP killMask = RBM_NONE; |
| 803 | #ifdef _TARGET_XARCH_ |
| 804 | assert(mulNode->OperIsMul()); |
| 805 | if (!mulNode->OperIs(GT_MUL) || (((mulNode->gtFlags & GTF_UNSIGNED) != 0) && mulNode->gtOverflowEx())) |
| 806 | { |
| 807 | killMask = RBM_RAX | RBM_RDX; |
| 808 | } |
| 809 | #endif // _TARGET_XARCH_ |
| 810 | return killMask; |
| 811 | } |
| 812 | |
| 813 | //------------------------------------------------------------------------ |
| 814 | // getKillSetForModDiv: Determine the liveness kill set for a mod or div node. |
| 815 | // |
| 816 | // Arguments: |
| 817 | // tree - the mod or div node as a GenTreeOp |
| 818 | // |
| 819 | // Return Value: a register mask of the registers killed |
| 820 | // |
| 821 | regMaskTP LinearScan::getKillSetForModDiv(GenTreeOp* node) |
| 822 | { |
| 823 | regMaskTP killMask = RBM_NONE; |
| 824 | #ifdef _TARGET_XARCH_ |
| 825 | assert(node->OperIs(GT_MOD, GT_DIV, GT_UMOD, GT_UDIV)); |
| 826 | if (!varTypeIsFloating(node->TypeGet())) |
| 827 | { |
| 828 | // Both RAX and RDX are killed by the operation |
| 829 | killMask = RBM_RAX | RBM_RDX; |
| 830 | } |
| 831 | #endif // _TARGET_XARCH_ |
| 832 | return killMask; |
| 833 | } |
| 834 | |
| 835 | //------------------------------------------------------------------------ |
| 836 | // getKillSetForCall: Determine the liveness kill set for a call node. |
| 837 | // |
| 838 | // Arguments: |
| 839 | // tree - the GenTreeCall node |
| 840 | // |
| 841 | // Return Value: a register mask of the registers killed |
| 842 | // |
| 843 | regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) |
| 844 | { |
| 845 | regMaskTP killMask = RBM_NONE; |
| 846 | #ifdef _TARGET_X86_ |
| 847 | if (compiler->compFloatingPointUsed) |
| 848 | { |
| 849 | if (call->TypeGet() == TYP_DOUBLE) |
| 850 | { |
| 851 | needDoubleTmpForFPCall = true; |
| 852 | } |
| 853 | else if (call->TypeGet() == TYP_FLOAT) |
| 854 | { |
| 855 | needFloatTmpForFPCall = true; |
| 856 | } |
| 857 | } |
| 858 | #endif // _TARGET_X86_ |
| 859 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
| 860 | if (call->IsHelperCall()) |
| 861 | { |
| 862 | CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd); |
| 863 | killMask = compiler->compHelperCallKillSet(helpFunc); |
| 864 | } |
| 865 | else |
| 866 | #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
| 867 | { |
| 868 | // if there is no FP used, we can ignore the FP kills |
| 869 | if (compiler->compFloatingPointUsed) |
| 870 | { |
| 871 | killMask = RBM_CALLEE_TRASH; |
| 872 | } |
| 873 | else |
| 874 | { |
| 875 | killMask = RBM_INT_CALLEE_TRASH; |
| 876 | } |
| 877 | #ifdef _TARGET_ARM_ |
| 878 | if (call->IsVirtualStub()) |
| 879 | { |
| 880 | killMask |= compiler->virtualStubParamInfo->GetRegMask(); |
| 881 | } |
| 882 | #else // !_TARGET_ARM_ |
| 883 | // Verify that the special virtual stub call registers are in the kill mask. |
| 884 | // We don't just add them unconditionally to the killMask because for most architectures |
| 885 | // they are already in the RBM_CALLEE_TRASH set, |
| 886 | // and we don't want to introduce extra checks and calls in this hot function. |
| 887 | assert(!call->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) == |
| 888 | compiler->virtualStubParamInfo->GetRegMask())); |
| 889 | #endif // !_TARGET_ARM_ |
| 890 | } |
| 891 | return killMask; |
| 892 | } |
| 893 | |
| 894 | //------------------------------------------------------------------------ |
| 895 | // getKillSetForBlockStore: Determine the liveness kill set for a block store node. |
| 896 | // |
| 897 | // Arguments: |
| 898 | // tree - the block store node as a GenTreeBlk |
| 899 | // |
| 900 | // Return Value: a register mask of the registers killed |
| 901 | // |
| 902 | regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) |
| 903 | { |
| 904 | assert(blkNode->OperIsStore()); |
| 905 | regMaskTP killMask = RBM_NONE; |
| 906 | |
| 907 | if ((blkNode->OperGet() == GT_STORE_OBJ) && blkNode->OperIsCopyBlkOp()) |
| 908 | { |
| 909 | assert(blkNode->AsObj()->gtGcPtrCount != 0); |
| 910 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF); |
| 911 | } |
| 912 | else |
| 913 | { |
| 914 | bool isCopyBlk = varTypeIsStruct(blkNode->Data()); |
| 915 | switch (blkNode->gtBlkOpKind) |
| 916 | { |
| 917 | case GenTreeBlk::BlkOpKindHelper: |
| 918 | if (isCopyBlk) |
| 919 | { |
| 920 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); |
| 921 | } |
| 922 | else |
| 923 | { |
| 924 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); |
| 925 | } |
| 926 | break; |
| 927 | |
| 928 | #ifdef _TARGET_XARCH_ |
| 929 | case GenTreeBlk::BlkOpKindRepInstr: |
| 930 | if (isCopyBlk) |
| 931 | { |
| 932 | // rep movs kills RCX, RDI and RSI |
| 933 | killMask = RBM_RCX | RBM_RDI | RBM_RSI; |
| 934 | } |
| 935 | else |
| 936 | { |
| 937 | // rep stos kills RCX and RDI. |
| 938 | // (Note that the Data() node, if not constant, will be assigned to |
| 939 | // RCX, but it's find that this kills it, as the value is not available |
| 940 | // after this node in any case.) |
| 941 | killMask = RBM_RDI | RBM_RCX; |
| 942 | } |
| 943 | break; |
| 944 | #else |
| 945 | case GenTreeBlk::BlkOpKindRepInstr: |
| 946 | #endif |
| 947 | case GenTreeBlk::BlkOpKindUnroll: |
| 948 | case GenTreeBlk::BlkOpKindInvalid: |
| 949 | // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE |
| 950 | break; |
| 951 | } |
| 952 | } |
| 953 | return killMask; |
| 954 | } |
| 955 | |
| 956 | #ifdef FEATURE_HW_INTRINSICS |
| 957 | //------------------------------------------------------------------------ |
| 958 | // getKillSetForHWIntrinsic: Determine the liveness kill set for a GT_STOREIND node. |
| 959 | // If the GT_STOREIND will generate a write barrier, determine the specific kill |
| 960 | // set required by the case-specific, platform-specific write barrier. If no |
| 961 | // write barrier is required, the kill set will be RBM_NONE. |
| 962 | // |
| 963 | // Arguments: |
| 964 | // tree - the GT_STOREIND node |
| 965 | // |
| 966 | // Return Value: a register mask of the registers killed |
| 967 | // |
| 968 | regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) |
| 969 | { |
| 970 | regMaskTP killMask = RBM_NONE; |
| 971 | #ifdef _TARGET_XARCH_ |
| 972 | switch (node->gtHWIntrinsicId) |
| 973 | { |
| 974 | case NI_SSE2_MaskMove: |
| 975 | // maskmovdqu uses edi as the implicit address register. |
| 976 | // Although it is set as the srcCandidate on the address, if there is also a fixed |
| 977 | // assignment for the definition of the address, resolveConflictingDefAndUse() may |
| 978 | // change the register assignment on the def or use of a tree temp (SDSU) when there |
| 979 | // is a conflict, and the FixedRef on edi won't be sufficient to ensure that another |
| 980 | // Interval will not be allocated there. |
| 981 | // Issue #17674 tracks this. |
| 982 | killMask = RBM_EDI; |
| 983 | break; |
| 984 | |
| 985 | default: |
| 986 | // Leave killMask as RBM_NONE |
| 987 | break; |
| 988 | } |
| 989 | #endif // _TARGET_XARCH_ |
| 990 | return killMask; |
| 991 | } |
| 992 | #endif // FEATURE_HW_INTRINSICS |
| 993 | |
| 994 | //------------------------------------------------------------------------ |
| 995 | // getKillSetForReturn: Determine the liveness kill set for a return node. |
| 996 | // |
| 997 | // Arguments: |
| 998 | // NONE (this kill set is independent of the details of the specific return.) |
| 999 | // |
| 1000 | // Return Value: a register mask of the registers killed |
| 1001 | // |
| 1002 | regMaskTP LinearScan::getKillSetForReturn() |
| 1003 | { |
| 1004 | return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE) |
| 1005 | : RBM_NONE; |
| 1006 | } |
| 1007 | |
| 1008 | //------------------------------------------------------------------------ |
| 1009 | // getKillSetForProfilerHook: Determine the liveness kill set for a profiler hook. |
| 1010 | // |
| 1011 | // Arguments: |
| 1012 | // NONE (this kill set is independent of the details of the specific node.) |
| 1013 | // |
| 1014 | // Return Value: a register mask of the registers killed |
| 1015 | // |
| 1016 | regMaskTP LinearScan::getKillSetForProfilerHook() |
| 1017 | { |
| 1018 | return compiler->compIsProfilerHookNeeded() ? compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL) |
| 1019 | : RBM_NONE; |
| 1020 | } |
| 1021 | |
| 1022 | #ifdef DEBUG |
| 1023 | //------------------------------------------------------------------------ |
| 1024 | // getKillSetForNode: Return the registers killed by the given tree node. |
| 1025 | // |
| 1026 | // Arguments: |
| 1027 | // tree - the tree for which the kill set is needed. |
| 1028 | // |
| 1029 | // Return Value: a register mask of the registers killed |
| 1030 | // |
| 1031 | regMaskTP LinearScan::getKillSetForNode(GenTree* tree) |
| 1032 | { |
| 1033 | regMaskTP killMask = RBM_NONE; |
| 1034 | switch (tree->OperGet()) |
| 1035 | { |
| 1036 | case GT_LSH: |
| 1037 | case GT_RSH: |
| 1038 | case GT_RSZ: |
| 1039 | case GT_ROL: |
| 1040 | case GT_ROR: |
| 1041 | #ifdef _TARGET_X86_ |
| 1042 | case GT_LSH_HI: |
| 1043 | case GT_RSH_LO: |
| 1044 | #endif |
| 1045 | killMask = getKillSetForShiftRotate(tree->AsOp()); |
| 1046 | break; |
| 1047 | |
| 1048 | case GT_MUL: |
| 1049 | case GT_MULHI: |
| 1050 | #if !defined(_TARGET_64BIT_) |
| 1051 | case GT_MUL_LONG: |
| 1052 | #endif |
| 1053 | killMask = getKillSetForMul(tree->AsOp()); |
| 1054 | break; |
| 1055 | |
| 1056 | case GT_MOD: |
| 1057 | case GT_DIV: |
| 1058 | case GT_UMOD: |
| 1059 | case GT_UDIV: |
| 1060 | killMask = getKillSetForModDiv(tree->AsOp()); |
| 1061 | break; |
| 1062 | |
| 1063 | case GT_STORE_OBJ: |
| 1064 | case GT_STORE_BLK: |
| 1065 | case GT_STORE_DYN_BLK: |
| 1066 | killMask = getKillSetForBlockStore(tree->AsBlk()); |
| 1067 | break; |
| 1068 | |
| 1069 | case GT_RETURNTRAP: |
| 1070 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); |
| 1071 | break; |
| 1072 | |
| 1073 | case GT_CALL: |
| 1074 | killMask = getKillSetForCall(tree->AsCall()); |
| 1075 | |
| 1076 | break; |
| 1077 | case GT_STOREIND: |
| 1078 | killMask = getKillSetForStoreInd(tree->AsStoreInd()); |
| 1079 | break; |
| 1080 | |
| 1081 | #if defined(PROFILING_SUPPORTED) |
| 1082 | // If this method requires profiler ELT hook then mark these nodes as killing |
| 1083 | // callee trash registers (excluding RAX and XMM0). The reason for this is that |
| 1084 | // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for |
| 1085 | // more details. |
| 1086 | case GT_RETURN: |
| 1087 | killMask = getKillSetForReturn(); |
| 1088 | break; |
| 1089 | |
| 1090 | case GT_PROF_HOOK: |
| 1091 | killMask = getKillSetForProfilerHook(); |
| 1092 | break; |
| 1093 | #endif // PROFILING_SUPPORTED |
| 1094 | |
| 1095 | #ifdef FEATURE_HW_INTRINSICS |
| 1096 | case GT_HWIntrinsic: |
| 1097 | killMask = getKillSetForHWIntrinsic(tree->AsHWIntrinsic()); |
| 1098 | break; |
| 1099 | #endif // FEATURE_HW_INTRINSICS |
| 1100 | |
| 1101 | default: |
| 1102 | // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE |
| 1103 | break; |
| 1104 | } |
| 1105 | return killMask; |
| 1106 | } |
| 1107 | #endif // DEBUG |
| 1108 | |
| 1109 | //------------------------------------------------------------------------ |
| 1110 | // buildKillPositionsForNode: |
| 1111 | // Given some tree node add refpositions for all the registers this node kills |
| 1112 | // |
| 1113 | // Arguments: |
| 1114 | // tree - the tree for which kill positions should be generated |
| 1115 | // currentLoc - the location at which the kills should be added |
| 1116 | // |
| 1117 | // Return Value: |
| 1118 | // true - kills were inserted |
| 1119 | // false - no kills were inserted |
| 1120 | // |
| 1121 | // Notes: |
| 1122 | // The return value is needed because if we have any kills, we need to make sure that |
| 1123 | // all defs are located AFTER the kills. On the other hand, if there aren't kills, |
| 1124 | // the multiple defs for a regPair are in different locations. |
| 1125 | // If we generate any kills, we will mark all currentLiveVars as being preferenced |
| 1126 | // to avoid the killed registers. This is somewhat conservative. |
| 1127 | |
| 1128 | bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask) |
| 1129 | { |
| 1130 | bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); |
| 1131 | if (killMask != RBM_NONE) |
| 1132 | { |
| 1133 | // The killMask identifies a set of registers that will be used during codegen. |
| 1134 | // Mark these as modified here, so when we do final frame layout, we'll know about |
| 1135 | // all these registers. This is especially important if killMask contains |
| 1136 | // callee-saved registers, which affect the frame size since we need to save/restore them. |
| 1137 | // In the case where we have a copyBlk with GC pointers, can need to call the |
| 1138 | // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if |
| 1139 | // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, |
| 1140 | // which is too late. |
| 1141 | compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true)); |
| 1142 | |
| 1143 | addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); |
| 1144 | |
| 1145 | // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee |
| 1146 | // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs, |
| 1147 | // many of which appear to be regressions (because there is more spill on the infrequently path), |
| 1148 | // but are not really because the frequent path becomes smaller. Validating these diffs will need |
| 1149 | // to be done before making this change. |
| 1150 | // if (!blockSequence[curBBSeqNum]->isRunRarely()) |
| 1151 | if (enregisterLocalVars) |
| 1152 | { |
| 1153 | VarSetOps::Iter iter(compiler, currentLiveVars); |
| 1154 | unsigned varIndex = 0; |
| 1155 | while (iter.NextElem(&varIndex)) |
| 1156 | { |
| 1157 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
| 1158 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
| 1159 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 1160 | if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) |
| 1161 | { |
| 1162 | if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) |
| 1163 | { |
| 1164 | continue; |
| 1165 | } |
| 1166 | } |
| 1167 | else |
| 1168 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 1169 | if (varTypeIsFloating(varDsc) && |
| 1170 | !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex)) |
| 1171 | { |
| 1172 | continue; |
| 1173 | } |
| 1174 | Interval* interval = getIntervalForLocalVar(varIndex); |
| 1175 | if (isCallKill) |
| 1176 | { |
| 1177 | interval->preferCalleeSave = true; |
| 1178 | } |
| 1179 | regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); |
| 1180 | |
| 1181 | if (newPreferences != RBM_NONE) |
| 1182 | { |
| 1183 | interval->updateRegisterPreferences(newPreferences); |
| 1184 | } |
| 1185 | else |
| 1186 | { |
| 1187 | // If there are no callee-saved registers, the call could kill all the registers. |
| 1188 | // This is a valid state, so in that case assert should not trigger. The RA will spill in order to |
| 1189 | // free a register later. |
| 1190 | assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); |
| 1191 | } |
| 1192 | } |
| 1193 | } |
| 1194 | |
| 1195 | if (compiler->killGCRefs(tree)) |
| 1196 | { |
| 1197 | RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, |
| 1198 | (allRegs(TYP_REF) & ~RBM_ARG_REGS)); |
| 1199 | } |
| 1200 | return true; |
| 1201 | } |
| 1202 | |
| 1203 | return false; |
| 1204 | } |
| 1205 | |
| 1206 | //---------------------------------------------------------------------------- |
| 1207 | // defineNewInternalTemp: Defines a ref position for an internal temp. |
| 1208 | // |
| 1209 | // Arguments: |
| 1210 | // tree - Gentree node requiring an internal register |
| 1211 | // regType - Register type |
| 1212 | // currentLoc - Location of the temp Def position |
| 1213 | // regMask - register mask of candidates for temp |
| 1214 | // |
| 1215 | RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) |
| 1216 | { |
| 1217 | Interval* current = newInterval(regType); |
| 1218 | current->isInternal = true; |
| 1219 | RefPosition* newDef = newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0); |
| 1220 | assert(internalCount < MaxInternalCount); |
| 1221 | internalDefs[internalCount++] = newDef; |
| 1222 | return newDef; |
| 1223 | } |
| 1224 | |
| 1225 | //------------------------------------------------------------------------ |
| 1226 | // buildInternalRegisterDefForNode - Create an Interval for an internal int register, and a def RefPosition |
| 1227 | // |
| 1228 | // Arguments: |
| 1229 | // tree - Gentree node that needs internal registers |
| 1230 | // internalCands - The mask of valid registers |
| 1231 | // |
| 1232 | // Returns: |
| 1233 | // The def RefPosition created for this internal temp. |
| 1234 | // |
| 1235 | RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands) |
| 1236 | { |
| 1237 | bool fixedReg = false; |
| 1238 | // The candidate set should contain only integer registers. |
| 1239 | assert((internalCands & ~allRegs(TYP_INT)) == RBM_NONE); |
| 1240 | if (genMaxOneBit(internalCands)) |
| 1241 | { |
| 1242 | fixedReg = true; |
| 1243 | } |
| 1244 | |
| 1245 | RefPosition* defRefPosition = defineNewInternalTemp(tree, IntRegisterType, internalCands); |
| 1246 | return defRefPosition; |
| 1247 | } |
| 1248 | |
| 1249 | //------------------------------------------------------------------------ |
| 1250 | // buildInternalFloatRegisterDefForNode - Create an Interval for an internal fp register, and a def RefPosition |
| 1251 | // |
| 1252 | // Arguments: |
| 1253 | // tree - Gentree node that needs internal registers |
| 1254 | // internalCands - The mask of valid registers |
| 1255 | // |
| 1256 | // Returns: |
| 1257 | // The def RefPosition created for this internal temp. |
| 1258 | // |
| 1259 | RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands) |
| 1260 | { |
| 1261 | bool fixedReg = false; |
| 1262 | // The candidate set should contain only float registers. |
| 1263 | assert((internalCands & ~allRegs(TYP_FLOAT)) == RBM_NONE); |
| 1264 | if (genMaxOneBit(internalCands)) |
| 1265 | { |
| 1266 | fixedReg = true; |
| 1267 | } |
| 1268 | |
| 1269 | RefPosition* defRefPosition = defineNewInternalTemp(tree, FloatRegisterType, internalCands); |
| 1270 | return defRefPosition; |
| 1271 | } |
| 1272 | |
| 1273 | //------------------------------------------------------------------------ |
| 1274 | // buildInternalRegisterUses - adds use positions for internal |
| 1275 | // registers required for tree node. |
| 1276 | // |
| 1277 | // Notes: |
| 1278 | // During the BuildNode process, calls to buildInternalIntRegisterDefForNode and |
| 1279 | // buildInternalFloatRegisterDefForNode put new RefPositions in the 'internalDefs' |
| 1280 | // array, and increment 'internalCount'. This method must be called to add corresponding |
| 1281 | // uses. It then resets the 'internalCount' for the handling of the next node. |
| 1282 | // |
| 1283 | // If the internal registers must differ from the target register, 'setInternalRegsDelayFree' |
| 1284 | // must be set to true, so that the uses may be marked 'delayRegFree'. |
| 1285 | // Note that if a node has both float and int temps, generally the target with either be |
| 1286 | // int *or* float, and it is not really necessary to set this on the other type, but it does |
| 1287 | // no harm as it won't restrict the register selection. |
| 1288 | // |
| 1289 | void LinearScan::buildInternalRegisterUses() |
| 1290 | { |
| 1291 | assert(internalCount <= MaxInternalCount); |
| 1292 | for (int i = 0; i < internalCount; i++) |
| 1293 | { |
| 1294 | RefPosition* def = internalDefs[i]; |
| 1295 | regMaskTP mask = def->registerAssignment; |
| 1296 | RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); |
| 1297 | if (setInternalRegsDelayFree) |
| 1298 | { |
| 1299 | use->delayRegFree = true; |
| 1300 | pendingDelayFree = true; |
| 1301 | } |
| 1302 | } |
| 1303 | // internalCount = 0; |
| 1304 | } |
| 1305 | |
| 1306 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 1307 | //------------------------------------------------------------------------ |
| 1308 | // buildUpperVectorSaveRefPositions - Create special RefPositions for saving |
| 1309 | // the upper half of a set of large vector. |
| 1310 | // |
| 1311 | // Arguments: |
| 1312 | // tree - The current node being handled |
| 1313 | // currentLoc - The location of the current node |
| 1314 | // |
| 1315 | // Return Value: Returns the set of lclVars that are killed by this node, and therefore |
| 1316 | // required RefTypeUpperVectorSaveDef RefPositions. |
| 1317 | // |
| 1318 | // Notes: The returned set is used by buildUpperVectorRestoreRefPositions. |
| 1319 | // |
| 1320 | VARSET_VALRET_TP |
| 1321 | LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet) |
| 1322 | { |
| 1323 | assert(enregisterLocalVars); |
| 1324 | VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler)); |
| 1325 | if (!VarSetOps::IsEmpty(compiler, largeVectorVars)) |
| 1326 | { |
| 1327 | // We actually need to find any calls that kill the upper-half of the callee-save vector registers. |
| 1328 | // But we will use as a proxy any node that kills floating point registers. |
| 1329 | // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) |
| 1330 | // This check should have been done by the caller. |
| 1331 | if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE) |
| 1332 | { |
| 1333 | VarSetOps::AssignNoCopy(compiler, liveLargeVectors, |
| 1334 | VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars)); |
| 1335 | VarSetOps::Iter iter(compiler, liveLargeVectors); |
| 1336 | unsigned varIndex = 0; |
| 1337 | while (iter.NextElem(&varIndex)) |
| 1338 | { |
| 1339 | Interval* varInterval = getIntervalForLocalVar(varIndex); |
| 1340 | Interval* tempInterval = newInterval(varInterval->registerType); |
| 1341 | tempInterval->isInternal = true; |
| 1342 | RefPosition* pos = |
| 1343 | newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); |
| 1344 | // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set |
| 1345 | // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding |
| 1346 | // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval, |
| 1347 | // and set varInterval as the relatedInterval of tempInterval. |
| 1348 | tempInterval->relatedInterval = varInterval->relatedInterval; |
| 1349 | varInterval->relatedInterval = tempInterval; |
| 1350 | } |
| 1351 | } |
| 1352 | } |
| 1353 | return liveLargeVectors; |
| 1354 | } |
| 1355 | |
| 1356 | // buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring |
| 1357 | // the upper half of a set of large vectors. |
| 1358 | // |
| 1359 | // Arguments: |
| 1360 | // tree - The current node being handled |
| 1361 | // currentLoc - The location of the current node |
| 1362 | // liveLargeVectors - The set of lclVars needing restores (returned by buildUpperVectorSaveRefPositions) |
| 1363 | // |
| 1364 | void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree, |
| 1365 | LsraLocation currentLoc, |
| 1366 | VARSET_VALARG_TP liveLargeVectors) |
| 1367 | { |
| 1368 | assert(enregisterLocalVars); |
| 1369 | if (!VarSetOps::IsEmpty(compiler, liveLargeVectors)) |
| 1370 | { |
| 1371 | VarSetOps::Iter iter(compiler, liveLargeVectors); |
| 1372 | unsigned varIndex = 0; |
| 1373 | while (iter.NextElem(&varIndex)) |
| 1374 | { |
| 1375 | Interval* varInterval = getIntervalForLocalVar(varIndex); |
| 1376 | Interval* tempInterval = varInterval->relatedInterval; |
| 1377 | assert(tempInterval->isInternal == true); |
| 1378 | RefPosition* pos = |
| 1379 | newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED); |
| 1380 | // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval |
| 1381 | // of tempInterval. |
| 1382 | varInterval->relatedInterval = tempInterval->relatedInterval; |
| 1383 | tempInterval->relatedInterval = varInterval; |
| 1384 | } |
| 1385 | } |
| 1386 | } |
| 1387 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 1388 | |
| 1389 | #ifdef DEBUG |
| 1390 | //------------------------------------------------------------------------ |
| 1391 | // ComputeOperandDstCount: computes the number of registers defined by a |
| 1392 | // node. |
| 1393 | // |
| 1394 | // For most nodes, this is simple: |
| 1395 | // - Nodes that do not produce values (e.g. stores and other void-typed |
| 1396 | // nodes) and nodes that immediately use the registers they define |
| 1397 | // produce no registers |
| 1398 | // - Nodes that are marked as defining N registers define N registers. |
| 1399 | // |
| 1400 | // For contained nodes, however, things are more complicated: for purposes |
| 1401 | // of bookkeeping, a contained node is treated as producing the transitive |
| 1402 | // closure of the registers produced by its sources. |
| 1403 | // |
| 1404 | // Arguments: |
| 1405 | // operand - The operand for which to compute a register count. |
| 1406 | // |
| 1407 | // Returns: |
| 1408 | // The number of registers defined by `operand`. |
| 1409 | // |
| 1410 | // static |
| 1411 | int LinearScan::ComputeOperandDstCount(GenTree* operand) |
| 1412 | { |
| 1413 | // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though |
| 1414 | // note that it is not in the linear order. It seems best to check for !IsLIR() rather than |
| 1415 | // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method. |
| 1416 | if (!operand->IsLIR()) |
| 1417 | { |
| 1418 | return 0; |
| 1419 | } |
| 1420 | if (operand->isContained()) |
| 1421 | { |
| 1422 | int dstCount = 0; |
| 1423 | for (GenTree* op : operand->Operands()) |
| 1424 | { |
| 1425 | dstCount += ComputeOperandDstCount(op); |
| 1426 | } |
| 1427 | |
| 1428 | return dstCount; |
| 1429 | } |
| 1430 | if (operand->IsUnusedValue()) |
| 1431 | { |
| 1432 | // Operands that define an unused value do not produce any registers. |
| 1433 | return 0; |
| 1434 | } |
| 1435 | if (operand->IsValue()) |
| 1436 | { |
| 1437 | // Operands that are values and are not contained consume all of their operands |
| 1438 | // and produce one or more registers. |
| 1439 | return operand->GetRegisterDstCount(); |
| 1440 | } |
| 1441 | else |
| 1442 | { |
| 1443 | // This must be one of the operand types that are neither contained nor produce a value. |
| 1444 | // Stores and void-typed operands may be encountered when processing call nodes, which contain |
| 1445 | // pointers to argument setup stores. |
| 1446 | assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || |
| 1447 | operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || |
| 1448 | operand->TypeGet() == TYP_VOID); |
| 1449 | return 0; |
| 1450 | } |
| 1451 | } |
| 1452 | |
| 1453 | //------------------------------------------------------------------------ |
| 1454 | // ComputeAvailableSrcCount: computes the number of registers available as |
| 1455 | // sources for a node. |
| 1456 | // |
| 1457 | // This is simply the sum of the number of registers produced by each |
| 1458 | // operand to the node. |
| 1459 | // |
| 1460 | // Arguments: |
| 1461 | // node - The node for which to compute a source count. |
| 1462 | // |
| 1463 | // Return Value: |
| 1464 | // The number of registers available as sources for `node`. |
| 1465 | // |
| 1466 | // static |
| 1467 | int LinearScan::ComputeAvailableSrcCount(GenTree* node) |
| 1468 | { |
| 1469 | int numSources = 0; |
| 1470 | for (GenTree* operand : node->Operands()) |
| 1471 | { |
| 1472 | numSources += ComputeOperandDstCount(operand); |
| 1473 | } |
| 1474 | |
| 1475 | return numSources; |
| 1476 | } |
| 1477 | #endif // DEBUG |
| 1478 | |
| 1479 | //------------------------------------------------------------------------ |
| 1480 | // buildRefPositionsForNode: The main entry point for building the RefPositions |
| 1481 | // and "tree temp" Intervals for a given node. |
| 1482 | // |
| 1483 | // Arguments: |
| 1484 | // tree - The node for which we are building RefPositions |
| 1485 | // block - The BasicBlock in which the node resides |
| 1486 | // currentLoc - The LsraLocation of the given node |
| 1487 | // |
| 1488 | void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc) |
| 1489 | { |
| 1490 | // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. |
| 1491 | // GT_CLS_VAR nodes should have been eliminated by rationalizer. |
| 1492 | assert(tree->OperGet() != GT_ARGPLACE); |
| 1493 | assert(tree->OperGet() != GT_LIST); |
| 1494 | assert(tree->OperGet() != GT_CLS_VAR); |
| 1495 | |
| 1496 | // The LIR traversal visits only the first node in a GT_FIELD_LIST. |
| 1497 | assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); |
| 1498 | |
| 1499 | // The set of internal temporary registers used by this node are stored in the |
| 1500 | // gtRsvdRegs register mask. Clear it out. |
| 1501 | tree->gtRsvdRegs = RBM_NONE; |
| 1502 | |
| 1503 | #ifdef DEBUG |
| 1504 | if (VERBOSE) |
| 1505 | { |
| 1506 | dumpDefList(); |
| 1507 | compiler->gtDispTree(tree, nullptr, nullptr, true); |
| 1508 | } |
| 1509 | #endif // DEBUG |
| 1510 | |
| 1511 | if (tree->isContained()) |
| 1512 | { |
| 1513 | #ifdef _TARGET_XARCH_ |
| 1514 | // On XArch we can have contained candidate lclVars if they are part of a RMW |
| 1515 | // address computation. In this case we need to check whether it is a last use. |
| 1516 | if (tree->IsLocal() && ((tree->gtFlags & GTF_VAR_DEATH) != 0)) |
| 1517 | { |
| 1518 | LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; |
| 1519 | if (isCandidateVar(varDsc)) |
| 1520 | { |
| 1521 | assert(varDsc->lvTracked); |
| 1522 | unsigned varIndex = varDsc->lvVarIndex; |
| 1523 | VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); |
| 1524 | } |
| 1525 | } |
| 1526 | #else // _TARGET_XARCH_ |
| 1527 | assert(!isCandidateLocalRef(tree)); |
| 1528 | #endif // _TARGET_XARCH_ |
| 1529 | JITDUMP("Contained\n" ); |
| 1530 | return; |
| 1531 | } |
| 1532 | |
| 1533 | #ifdef DEBUG |
| 1534 | // If we are constraining the registers for allocation, we will modify all the RefPositions |
| 1535 | // we've built for this node after we've created them. In order to do that, we'll remember |
| 1536 | // the last RefPosition prior to those created for this node. |
| 1537 | RefPositionIterator refPositionMark = refPositions.backPosition(); |
| 1538 | int oldDefListCount = defList.Count(); |
| 1539 | #endif // DEBUG |
| 1540 | |
| 1541 | int consume = BuildNode(tree); |
| 1542 | |
| 1543 | #ifdef DEBUG |
| 1544 | int newDefListCount = defList.Count(); |
| 1545 | int produce = newDefListCount - oldDefListCount; |
| 1546 | assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); |
| 1547 | |
| 1548 | #if defined(_TARGET_AMD64_) |
| 1549 | // Multi-reg call node is the only node that could produce multi-reg value |
| 1550 | assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); |
| 1551 | #endif // _TARGET_AMD64_ |
| 1552 | |
| 1553 | #endif // DEBUG |
| 1554 | |
| 1555 | #ifdef DEBUG |
| 1556 | // If we are constraining registers, modify all the RefPositions we've just built to specify the |
| 1557 | // minimum reg count required. |
| 1558 | if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT)) |
| 1559 | { |
| 1560 | // The number of registers required for a tree node is the sum of |
| 1561 | // { RefTypeUses } + { RefTypeDef for the node itself } + specialPutArgCount |
| 1562 | // This is the minimum set of registers that needs to be ensured in the candidate set of ref positions created. |
| 1563 | // |
| 1564 | // First, we count them. |
| 1565 | unsigned minRegCount = 0; |
| 1566 | |
| 1567 | RefPositionIterator iter = refPositionMark; |
| 1568 | for (iter++; iter != refPositions.end(); iter++) |
| 1569 | { |
| 1570 | RefPosition* newRefPosition = &(*iter); |
| 1571 | if (newRefPosition->isIntervalRef()) |
| 1572 | { |
| 1573 | if ((newRefPosition->refType == RefTypeUse) || |
| 1574 | ((newRefPosition->refType == RefTypeDef) && !newRefPosition->getInterval()->isInternal)) |
| 1575 | { |
| 1576 | minRegCount++; |
| 1577 | } |
| 1578 | if (newRefPosition->getInterval()->isSpecialPutArg) |
| 1579 | { |
| 1580 | minRegCount++; |
| 1581 | } |
| 1582 | } |
| 1583 | } |
| 1584 | |
| 1585 | if (tree->OperIsPutArgSplit()) |
| 1586 | { |
| 1587 | // While we have attempted to account for any "specialPutArg" defs above, we're only looking at RefPositions |
| 1588 | // created for this node. We must be defining at least one register in the PutArgSplit, so conservatively |
| 1589 | // add one less than the maximum number of registers args to 'minRegCount'. |
| 1590 | minRegCount += MAX_REG_ARG - 1; |
| 1591 | } |
| 1592 | for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) |
| 1593 | { |
| 1594 | RefPosition* newRefPosition = &(*refPositionMark); |
| 1595 | unsigned minRegCountForRef = minRegCount; |
| 1596 | if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree) |
| 1597 | { |
| 1598 | // If delayRegFree, then Use will interfere with the destination of the consuming node. |
| 1599 | // Therefore, we also need add the kill set of the consuming node to minRegCount. |
| 1600 | // |
| 1601 | // For example consider the following IR on x86, where v01 and v02 |
| 1602 | // are method args coming in ecx and edx respectively. |
| 1603 | // GT_DIV(v01, v02) |
| 1604 | // |
| 1605 | // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node. |
| 1606 | // |
| 1607 | // Assume further JitStressRegs=2, which would constrain candidates to callee trashable |
| 1608 | // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01. |
| 1609 | // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and |
| 1610 | // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for |
| 1611 | // the use position of v02 also needs to take into account the kill set of its consuming node. |
| 1612 | regMaskTP killMask = getKillSetForNode(tree); |
| 1613 | if (killMask != RBM_NONE) |
| 1614 | { |
| 1615 | minRegCountForRef += genCountBits(killMask); |
| 1616 | } |
| 1617 | } |
| 1618 | else if ((newRefPosition->refType) == RefTypeDef && (newRefPosition->getInterval()->isSpecialPutArg)) |
| 1619 | { |
| 1620 | minRegCountForRef++; |
| 1621 | } |
| 1622 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 1623 | else if (newRefPosition->refType == RefTypeUpperVectorSaveDef) |
| 1624 | { |
| 1625 | // TODO-Cleanup: won't need a register once #18144 is fixed. |
| 1626 | minRegCountForRef += 1; |
| 1627 | } |
| 1628 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 1629 | |
| 1630 | newRefPosition->minRegCandidateCount = minRegCountForRef; |
| 1631 | if (newRefPosition->IsActualRef() && doReverseCallerCallee()) |
| 1632 | { |
| 1633 | Interval* interval = newRefPosition->getInterval(); |
| 1634 | regMaskTP oldAssignment = newRefPosition->registerAssignment; |
| 1635 | regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); |
| 1636 | newRefPosition->registerAssignment = |
| 1637 | getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); |
| 1638 | if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && |
| 1639 | !interval->isLocalVar) |
| 1640 | { |
| 1641 | checkConflictingDefUse(newRefPosition); |
| 1642 | } |
| 1643 | } |
| 1644 | } |
| 1645 | } |
| 1646 | #endif // DEBUG |
| 1647 | JITDUMP("\n" ); |
| 1648 | } |
| 1649 | |
| 1650 | //------------------------------------------------------------------------ |
| 1651 | // buildPhysRegRecords: Make an interval for each physical register |
| 1652 | // |
| 1653 | void LinearScan::buildPhysRegRecords() |
| 1654 | { |
| 1655 | RegisterType regType = IntRegisterType; |
| 1656 | for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) |
| 1657 | { |
| 1658 | RegRecord* curr = &physRegs[reg]; |
| 1659 | curr->init(reg); |
| 1660 | } |
| 1661 | } |
| 1662 | |
| 1663 | //------------------------------------------------------------------------ |
| 1664 | // getNonEmptyBlock: Return the first non-empty block starting with 'block' |
| 1665 | // |
| 1666 | // Arguments: |
| 1667 | // block - the BasicBlock from which we start looking |
| 1668 | // |
| 1669 | // Return Value: |
| 1670 | // The first non-empty BasicBlock we find. |
| 1671 | // |
| 1672 | BasicBlock* getNonEmptyBlock(BasicBlock* block) |
| 1673 | { |
| 1674 | while (block != nullptr && block->bbTreeList == nullptr) |
| 1675 | { |
| 1676 | BasicBlock* nextBlock = block->bbNext; |
| 1677 | // Note that here we use the version of NumSucc that does not take a compiler. |
| 1678 | // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan. |
| 1679 | // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which |
| 1680 | // case we don't need the version that takes a compiler. |
| 1681 | assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE))); |
| 1682 | // sometimes the first block is empty and ends with an uncond branch |
| 1683 | // assert( block->GetSucc(0) == nextBlock); |
| 1684 | block = nextBlock; |
| 1685 | } |
| 1686 | assert(block != nullptr && block->bbTreeList != nullptr); |
| 1687 | return block; |
| 1688 | } |
| 1689 | |
| 1690 | //------------------------------------------------------------------------ |
| 1691 | // insertZeroInitRefPositions: Handle lclVars that are live-in to the first block |
| 1692 | // |
| 1693 | // Notes: |
| 1694 | // Prior to calling this method, 'currentLiveVars' must be set to the set of register |
| 1695 | // candidate variables that are liveIn to the first block. |
| 1696 | // For each register candidate that is live-in to the first block: |
| 1697 | // - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. |
| 1698 | // - Otherwise, it will be marked as spilled, since it will not be assigned a register |
| 1699 | // on entry and will be loaded from memory on the undefined path. |
| 1700 | // Note that, when the compInitMem option is not set, we may encounter these on |
| 1701 | // paths that are protected by the same condition as an earlier def. However, since |
| 1702 | // we don't do the analysis to determine this - and couldn't rely on always identifying |
| 1703 | // such cases even if we tried - we must conservatively treat the undefined path as |
| 1704 | // being possible. This is a relatively rare case, so the introduced conservatism is |
| 1705 | // not expected to warrant the analysis required to determine the best placement of |
| 1706 | // an initialization. |
| 1707 | // |
| 1708 | void LinearScan::insertZeroInitRefPositions() |
| 1709 | { |
| 1710 | assert(enregisterLocalVars); |
| 1711 | #ifdef DEBUG |
| 1712 | VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn)); |
| 1713 | assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars)); |
| 1714 | #endif // DEBUG |
| 1715 | |
| 1716 | // insert defs for this, then a block boundary |
| 1717 | |
| 1718 | VarSetOps::Iter iter(compiler, currentLiveVars); |
| 1719 | unsigned varIndex = 0; |
| 1720 | while (iter.NextElem(&varIndex)) |
| 1721 | { |
| 1722 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
| 1723 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
| 1724 | if (!varDsc->lvIsParam && isCandidateVar(varDsc)) |
| 1725 | { |
| 1726 | JITDUMP("V%02u was live in to first block:" , varNum); |
| 1727 | Interval* interval = getIntervalForLocalVar(varIndex); |
| 1728 | if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) |
| 1729 | { |
| 1730 | JITDUMP(" creating ZeroInit\n" ); |
| 1731 | GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); |
| 1732 | RefPosition* pos = |
| 1733 | newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); |
| 1734 | varDsc->lvMustInit = true; |
| 1735 | } |
| 1736 | else |
| 1737 | { |
| 1738 | setIntervalAsSpilled(interval); |
| 1739 | JITDUMP(" marking as spilled\n" ); |
| 1740 | } |
| 1741 | } |
| 1742 | } |
| 1743 | } |
| 1744 | |
| 1745 | #if defined(UNIX_AMD64_ABI) |
| 1746 | //------------------------------------------------------------------------ |
| 1747 | // unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems. |
| 1748 | // |
| 1749 | // Arguments: |
| 1750 | // argDsc - the LclVarDsc for the argument of interest |
| 1751 | // |
| 1752 | // Notes: |
| 1753 | // See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp |
| 1754 | // for how state for argument is updated for unix non-structs and Windows AMD64 structs. |
| 1755 | // |
| 1756 | void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc) |
| 1757 | { |
| 1758 | assert(varTypeIsStruct(argDsc)); |
| 1759 | RegState* intRegState = &compiler->codeGen->intRegState; |
| 1760 | RegState* floatRegState = &compiler->codeGen->floatRegState; |
| 1761 | |
| 1762 | if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA)) |
| 1763 | { |
| 1764 | if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT)) |
| 1765 | { |
| 1766 | assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS)); |
| 1767 | floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); |
| 1768 | } |
| 1769 | else |
| 1770 | { |
| 1771 | assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS)); |
| 1772 | intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg); |
| 1773 | } |
| 1774 | } |
| 1775 | |
| 1776 | if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) |
| 1777 | { |
| 1778 | if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT)) |
| 1779 | { |
| 1780 | assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS)); |
| 1781 | floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); |
| 1782 | } |
| 1783 | else |
| 1784 | { |
| 1785 | assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS)); |
| 1786 | intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg); |
| 1787 | } |
| 1788 | } |
| 1789 | } |
| 1790 | |
| 1791 | #endif // defined(UNIX_AMD64_ABI) |
| 1792 | |
| 1793 | //------------------------------------------------------------------------ |
| 1794 | // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate |
| 1795 | // regState (either compiler->intRegState or compiler->floatRegState), |
| 1796 | // with the lvArgReg on "argDsc" |
| 1797 | // |
| 1798 | // Arguments: |
| 1799 | // argDsc - the argument for which the state is to be updated. |
| 1800 | // |
| 1801 | // Return Value: None |
| 1802 | // |
| 1803 | // Assumptions: |
| 1804 | // The argument is live on entry to the function |
| 1805 | // (or is untracked and therefore assumed live) |
| 1806 | // |
| 1807 | // Notes: |
| 1808 | // This relies on a method in regAlloc.cpp that is shared between LSRA |
| 1809 | // and regAlloc. It is further abstracted here because regState is updated |
| 1810 | // separately for tracked and untracked variables in LSRA. |
| 1811 | // |
| 1812 | void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) |
| 1813 | { |
| 1814 | #if defined(UNIX_AMD64_ABI) |
| 1815 | // For System V AMD64 calls the argDsc can have 2 registers (for structs.) |
| 1816 | // Handle them here. |
| 1817 | if (varTypeIsStruct(argDsc)) |
| 1818 | { |
| 1819 | unixAmd64UpdateRegStateForArg(argDsc); |
| 1820 | } |
| 1821 | else |
| 1822 | #endif // defined(UNIX_AMD64_ABI) |
| 1823 | { |
| 1824 | RegState* intRegState = &compiler->codeGen->intRegState; |
| 1825 | RegState* floatRegState = &compiler->codeGen->floatRegState; |
| 1826 | // In the case of AMD64 we'll still use the floating point registers |
| 1827 | // to model the register usage for argument on vararg calls, so |
| 1828 | // we will ignore the varargs condition to determine whether we use |
| 1829 | // XMM registers or not for setting up the call. |
| 1830 | bool isFloat = (isFloatRegType(argDsc->lvType) |
| 1831 | #ifndef _TARGET_AMD64_ |
| 1832 | && !compiler->info.compIsVarArgs |
| 1833 | #endif |
| 1834 | && !compiler->opts.compUseSoftFP); |
| 1835 | |
| 1836 | if (argDsc->lvIsHfaRegArg()) |
| 1837 | { |
| 1838 | isFloat = true; |
| 1839 | } |
| 1840 | |
| 1841 | if (isFloat) |
| 1842 | { |
| 1843 | JITDUMP("Float arg V%02u in reg %s\n" , (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); |
| 1844 | compiler->raUpdateRegStateForArg(floatRegState, argDsc); |
| 1845 | } |
| 1846 | else |
| 1847 | { |
| 1848 | JITDUMP("Int arg V%02u in reg %s\n" , (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg)); |
| 1849 | #if FEATURE_MULTIREG_ARGS |
| 1850 | if (argDsc->lvOtherArgReg != REG_NA) |
| 1851 | { |
| 1852 | JITDUMP("(second half) in reg %s\n" , getRegName(argDsc->lvOtherArgReg)); |
| 1853 | } |
| 1854 | #endif // FEATURE_MULTIREG_ARGS |
| 1855 | compiler->raUpdateRegStateForArg(intRegState, argDsc); |
| 1856 | } |
| 1857 | } |
| 1858 | } |
| 1859 | |
| 1860 | //------------------------------------------------------------------------ |
| 1861 | // buildIntervals: The main entry point for building the data structures over |
| 1862 | // which we will do register allocation. |
| 1863 | // |
| 1864 | void LinearScan::buildIntervals() |
| 1865 | { |
| 1866 | BasicBlock* block; |
| 1867 | |
| 1868 | JITDUMP("\nbuildIntervals ========\n" ); |
| 1869 | |
| 1870 | // Build (empty) records for all of the physical registers |
| 1871 | buildPhysRegRecords(); |
| 1872 | |
| 1873 | #ifdef DEBUG |
| 1874 | if (VERBOSE) |
| 1875 | { |
| 1876 | printf("\n-----------------\n" ); |
| 1877 | printf("LIVENESS:\n" ); |
| 1878 | printf("-----------------\n" ); |
| 1879 | foreach_block(compiler, block) |
| 1880 | { |
| 1881 | printf(FMT_BB " use def in out\n" , block->bbNum); |
| 1882 | dumpConvertedVarSet(compiler, block->bbVarUse); |
| 1883 | printf("\n" ); |
| 1884 | dumpConvertedVarSet(compiler, block->bbVarDef); |
| 1885 | printf("\n" ); |
| 1886 | dumpConvertedVarSet(compiler, block->bbLiveIn); |
| 1887 | printf("\n" ); |
| 1888 | dumpConvertedVarSet(compiler, block->bbLiveOut); |
| 1889 | printf("\n" ); |
| 1890 | } |
| 1891 | } |
| 1892 | #endif // DEBUG |
| 1893 | |
| 1894 | #if DOUBLE_ALIGN |
| 1895 | // We will determine whether we should double align the frame during |
| 1896 | // identifyCandidates(), but we initially assume that we will not. |
| 1897 | doDoubleAlign = false; |
| 1898 | #endif |
| 1899 | |
| 1900 | identifyCandidates(); |
| 1901 | |
| 1902 | // Figure out if we're going to use a frame pointer. We need to do this before building |
| 1903 | // the ref positions, because those objects will embed the frame register in various register masks |
| 1904 | // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will |
| 1905 | // remove the frame pointer from the masks. |
| 1906 | setFrameType(); |
| 1907 | |
| 1908 | DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); |
| 1909 | |
| 1910 | // second part: |
| 1911 | JITDUMP("\nbuildIntervals second part ========\n" ); |
| 1912 | currentLoc = 0; |
| 1913 | // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were |
| 1914 | // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering. |
| 1915 | // Previously, the block sequencing was done for the (formerly separate) Build pass, |
| 1916 | // and the curBBNum was left as the last block sequenced. This block was then used to set the |
| 1917 | // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the |
| 1918 | // normalized entry weight (compiler->fgCalledCount), but that results in a net regression. |
| 1919 | if (!blockSequencingDone) |
| 1920 | { |
| 1921 | setBlockSequence(); |
| 1922 | } |
| 1923 | curBBNum = blockSequence[bbSeqCount - 1]->bbNum; |
| 1924 | |
| 1925 | // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex. |
| 1926 | // Assign these RefPositions to the (nonexistent) BB0. |
| 1927 | curBBNum = 0; |
| 1928 | |
| 1929 | LclVarDsc* argDsc; |
| 1930 | unsigned int lclNum; |
| 1931 | |
| 1932 | RegState* intRegState = &compiler->codeGen->intRegState; |
| 1933 | RegState* floatRegState = &compiler->codeGen->floatRegState; |
| 1934 | intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; |
| 1935 | floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE; |
| 1936 | |
| 1937 | for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) |
| 1938 | { |
| 1939 | lclNum = compiler->lvaTrackedToVarNum[varIndex]; |
| 1940 | argDsc = &(compiler->lvaTable[lclNum]); |
| 1941 | |
| 1942 | if (!argDsc->lvIsParam) |
| 1943 | { |
| 1944 | continue; |
| 1945 | } |
| 1946 | |
| 1947 | // Only reserve a register if the argument is actually used. |
| 1948 | // Is it dead on entry? If compJmpOpUsed is true, then the arguments |
| 1949 | // have to be kept alive, so we have to consider it as live on entry. |
| 1950 | // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we |
| 1951 | // won't have done dataflow on it, but it needs to be marked as live-in so |
| 1952 | // it will get saved in the prolog. |
| 1953 | if (!compiler->compJmpOpUsed && argDsc->lvRefCnt() == 0 && !compiler->opts.compDbgCode) |
| 1954 | { |
| 1955 | continue; |
| 1956 | } |
| 1957 | |
| 1958 | if (argDsc->lvIsRegArg) |
| 1959 | { |
| 1960 | updateRegStateForArg(argDsc); |
| 1961 | } |
| 1962 | |
| 1963 | if (isCandidateVar(argDsc)) |
| 1964 | { |
| 1965 | Interval* interval = getIntervalForLocalVar(varIndex); |
| 1966 | regMaskTP mask = allRegs(TypeGet(argDsc)); |
| 1967 | if (argDsc->lvIsRegArg) |
| 1968 | { |
| 1969 | // Set this interval as currently assigned to that register |
| 1970 | regNumber inArgReg = argDsc->lvArgReg; |
| 1971 | assert(inArgReg < REG_COUNT); |
| 1972 | mask = genRegMask(inArgReg); |
| 1973 | assignPhysReg(inArgReg, interval); |
| 1974 | } |
| 1975 | RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); |
| 1976 | } |
| 1977 | else if (varTypeIsStruct(argDsc->lvType)) |
| 1978 | { |
| 1979 | for (unsigned fieldVarNum = argDsc->lvFieldLclStart; |
| 1980 | fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) |
| 1981 | { |
| 1982 | LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]); |
| 1983 | if (fieldVarDsc->lvLRACandidate) |
| 1984 | { |
| 1985 | assert(fieldVarDsc->lvTracked); |
| 1986 | Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex); |
| 1987 | RefPosition* pos = |
| 1988 | newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc))); |
| 1989 | } |
| 1990 | } |
| 1991 | } |
| 1992 | else |
| 1993 | { |
| 1994 | // We can overwrite the register (i.e. codegen saves it on entry) |
| 1995 | assert(argDsc->lvRefCnt() == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || |
| 1996 | !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); |
| 1997 | } |
| 1998 | } |
| 1999 | |
| 2000 | // Now set up the reg state for the non-tracked args |
| 2001 | // (We do this here because we want to generate the ParamDef RefPositions in tracked |
| 2002 | // order, so that loop doesn't hit the non-tracked args) |
| 2003 | |
| 2004 | for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++) |
| 2005 | { |
| 2006 | argDsc = &(compiler->lvaTable[argNum]); |
| 2007 | |
| 2008 | if (argDsc->lvPromotedStruct()) |
| 2009 | { |
| 2010 | noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here |
| 2011 | |
| 2012 | unsigned fieldVarNum = argDsc->lvFieldLclStart; |
| 2013 | argDsc = &(compiler->lvaTable[fieldVarNum]); |
| 2014 | } |
| 2015 | noway_assert(argDsc->lvIsParam); |
| 2016 | if (!argDsc->lvTracked && argDsc->lvIsRegArg) |
| 2017 | { |
| 2018 | updateRegStateForArg(argDsc); |
| 2019 | } |
| 2020 | } |
| 2021 | |
| 2022 | // If there is a secret stub param, it is also live in |
| 2023 | if (compiler->info.compPublishStubParam) |
| 2024 | { |
| 2025 | intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM; |
| 2026 | } |
| 2027 | |
| 2028 | BasicBlock* predBlock = nullptr; |
| 2029 | BasicBlock* prevBlock = nullptr; |
| 2030 | |
| 2031 | // Initialize currentLiveVars to the empty set. We will set it to the current |
| 2032 | // live-in at the entry to each block (this will include the incoming args on |
| 2033 | // the first block). |
| 2034 | VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler)); |
| 2035 | |
| 2036 | for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock()) |
| 2037 | { |
| 2038 | JITDUMP("\nNEW BLOCK " FMT_BB "\n" , block->bbNum); |
| 2039 | |
| 2040 | bool predBlockIsAllocated = false; |
| 2041 | predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated)); |
| 2042 | if (predBlock) |
| 2043 | { |
| 2044 | JITDUMP("\n\nSetting " FMT_BB " as the predecessor for determining incoming variable registers of " FMT_BB |
| 2045 | "\n" , |
| 2046 | block->bbNum, predBlock->bbNum); |
| 2047 | assert(predBlock->bbNum <= bbNumMaxBeforeResolution); |
| 2048 | blockInfo[block->bbNum].predBBNum = predBlock->bbNum; |
| 2049 | } |
| 2050 | |
| 2051 | if (enregisterLocalVars) |
| 2052 | { |
| 2053 | VarSetOps::AssignNoCopy(compiler, currentLiveVars, |
| 2054 | VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn)); |
| 2055 | |
| 2056 | if (block == compiler->fgFirstBB) |
| 2057 | { |
| 2058 | insertZeroInitRefPositions(); |
| 2059 | // The first real location is at 1; 0 is for the entry. |
| 2060 | currentLoc = 1; |
| 2061 | } |
| 2062 | |
| 2063 | // Any lclVars live-in to a block are resolution candidates. |
| 2064 | VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); |
| 2065 | |
| 2066 | // Determine if we need any DummyDefs. |
| 2067 | // We need DummyDefs for cases where "predBlock" isn't really a predecessor. |
| 2068 | // Note that it's possible to have uses of unitialized variables, in which case even the first |
| 2069 | // block may require DummyDefs, which we are not currently adding - this means that these variables |
| 2070 | // will always be considered to be in memory on entry (and reloaded when the use is encountered). |
| 2071 | // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized |
| 2072 | // variables (which may actually be initialized along the dynamically executed paths, but not |
| 2073 | // on all static paths), we wind up with excessive liveranges for some of these variables. |
| 2074 | VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); |
| 2075 | if (predBlock) |
| 2076 | { |
| 2077 | // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut |
| 2078 | VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); |
| 2079 | } |
| 2080 | bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); |
| 2081 | |
| 2082 | // Create dummy def RefPositions |
| 2083 | |
| 2084 | if (needsDummyDefs) |
| 2085 | { |
| 2086 | // If we are using locations from a predecessor, we should never require DummyDefs. |
| 2087 | assert(!predBlockIsAllocated); |
| 2088 | |
| 2089 | JITDUMP("Creating dummy definitions\n" ); |
| 2090 | VarSetOps::Iter iter(compiler, newLiveIn); |
| 2091 | unsigned varIndex = 0; |
| 2092 | while (iter.NextElem(&varIndex)) |
| 2093 | { |
| 2094 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
| 2095 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
| 2096 | // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. |
| 2097 | // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs). |
| 2098 | if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam)) |
| 2099 | { |
| 2100 | Interval* interval = getIntervalForLocalVar(varIndex); |
| 2101 | RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, |
| 2102 | allRegs(interval->registerType)); |
| 2103 | } |
| 2104 | } |
| 2105 | JITDUMP("Finished creating dummy definitions\n\n" ); |
| 2106 | } |
| 2107 | } |
| 2108 | |
| 2109 | // Add a dummy RefPosition to mark the block boundary. |
| 2110 | // Note that we do this AFTER adding the exposed uses above, because the |
| 2111 | // register positions for those exposed uses need to be recorded at |
| 2112 | // this point. |
| 2113 | |
| 2114 | RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); |
| 2115 | currentLoc += 2; |
| 2116 | JITDUMP("\n" ); |
| 2117 | |
| 2118 | LIR::Range& blockRange = LIR::AsRange(block); |
| 2119 | for (GenTree* node : blockRange.NonPhiNodes()) |
| 2120 | { |
| 2121 | // We increment the number position of each tree node by 2 to simplify the logic when there's the case of |
| 2122 | // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to |
| 2123 | // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only |
| 2124 | // increment the number position by one. |
| 2125 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 2126 | |
| 2127 | #ifdef DEBUG |
| 2128 | node->gtSeqNum = currentLoc; |
| 2129 | // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register |
| 2130 | // value. |
| 2131 | // Although this looks like a no-op it sets the tag. |
| 2132 | node->gtRegNum = node->gtRegNum; |
| 2133 | #endif |
| 2134 | |
| 2135 | buildRefPositionsForNode(node, block, currentLoc); |
| 2136 | |
| 2137 | #ifdef DEBUG |
| 2138 | if (currentLoc > maxNodeLocation) |
| 2139 | { |
| 2140 | maxNodeLocation = currentLoc; |
| 2141 | } |
| 2142 | #endif // DEBUG |
| 2143 | currentLoc += 2; |
| 2144 | } |
| 2145 | |
| 2146 | // Note: the visited set is cleared in LinearScan::doLinearScan() |
| 2147 | markBlockVisited(block); |
| 2148 | if (!defList.IsEmpty()) |
| 2149 | { |
| 2150 | INDEBUG(dumpDefList()); |
| 2151 | assert(!"Expected empty defList at end of block" ); |
| 2152 | } |
| 2153 | |
| 2154 | if (enregisterLocalVars) |
| 2155 | { |
| 2156 | // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the |
| 2157 | // next block, or any unvisited successors. |
| 2158 | // This will address lclVars that are live on a backedge, as well as those that are kept |
| 2159 | // live at a GT_JMP. |
| 2160 | // |
| 2161 | // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, |
| 2162 | // and jmp call is represented using GT_JMP node which is a leaf node. |
| 2163 | // Liveness phase keeps all the arguments of the method live till the end of |
| 2164 | // block by adding them to liveout set of the block containing GT_JMP. |
| 2165 | // |
| 2166 | // The target of a GT_JMP implicitly uses all the current method arguments, however |
| 2167 | // there are no actual references to them. This can cause LSRA to assert, because |
| 2168 | // the variables are live but it sees no references. In order to correctly model the |
| 2169 | // liveness of these arguments, we add dummy exposed uses, in the same manner as for |
| 2170 | // backward branches. This will happen automatically via expUseSet. |
| 2171 | // |
| 2172 | // Note that a block ending with GT_JMP has no successors and hence the variables |
| 2173 | // for which dummy use ref positions are added are arguments of the method. |
| 2174 | |
| 2175 | VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); |
| 2176 | VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); |
| 2177 | BasicBlock* nextBlock = getNextBlock(); |
| 2178 | if (nextBlock != nullptr) |
| 2179 | { |
| 2180 | VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); |
| 2181 | } |
| 2182 | for (BasicBlock* succ : block->GetAllSuccs(compiler)) |
| 2183 | { |
| 2184 | if (VarSetOps::IsEmpty(compiler, expUseSet)) |
| 2185 | { |
| 2186 | break; |
| 2187 | } |
| 2188 | |
| 2189 | if (isBlockVisited(succ)) |
| 2190 | { |
| 2191 | continue; |
| 2192 | } |
| 2193 | VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); |
| 2194 | } |
| 2195 | |
| 2196 | if (!VarSetOps::IsEmpty(compiler, expUseSet)) |
| 2197 | { |
| 2198 | JITDUMP("Exposed uses:" ); |
| 2199 | VarSetOps::Iter iter(compiler, expUseSet); |
| 2200 | unsigned varIndex = 0; |
| 2201 | while (iter.NextElem(&varIndex)) |
| 2202 | { |
| 2203 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
| 2204 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
| 2205 | assert(isCandidateVar(varDsc)); |
| 2206 | Interval* interval = getIntervalForLocalVar(varIndex); |
| 2207 | RefPosition* pos = |
| 2208 | newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); |
| 2209 | JITDUMP(" V%02u" , varNum); |
| 2210 | } |
| 2211 | JITDUMP("\n" ); |
| 2212 | } |
| 2213 | |
| 2214 | // Clear the "last use" flag on any vars that are live-out from this block. |
| 2215 | { |
| 2216 | VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut)); |
| 2217 | VarSetOps::Iter iter(compiler, bbLiveDefs); |
| 2218 | unsigned varIndex = 0; |
| 2219 | while (iter.NextElem(&varIndex)) |
| 2220 | { |
| 2221 | unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; |
| 2222 | LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; |
| 2223 | assert(isCandidateVar(varDsc)); |
| 2224 | RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; |
| 2225 | // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness |
| 2226 | // lies. |
| 2227 | if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) |
| 2228 | { |
| 2229 | lastRP->lastUse = false; |
| 2230 | } |
| 2231 | } |
| 2232 | } |
| 2233 | |
| 2234 | #ifdef DEBUG |
| 2235 | checkLastUses(block); |
| 2236 | |
| 2237 | if (VERBOSE) |
| 2238 | { |
| 2239 | printf("use: " ); |
| 2240 | dumpConvertedVarSet(compiler, block->bbVarUse); |
| 2241 | printf("\ndef: " ); |
| 2242 | dumpConvertedVarSet(compiler, block->bbVarDef); |
| 2243 | printf("\n" ); |
| 2244 | } |
| 2245 | #endif // DEBUG |
| 2246 | } |
| 2247 | |
| 2248 | prevBlock = block; |
| 2249 | } |
| 2250 | |
| 2251 | if (enregisterLocalVars) |
| 2252 | { |
| 2253 | if (compiler->lvaKeepAliveAndReportThis()) |
| 2254 | { |
| 2255 | // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end |
| 2256 | unsigned keepAliveVarNum = compiler->info.compThisArg; |
| 2257 | assert(compiler->info.compIsStatic == false); |
| 2258 | LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum; |
| 2259 | if (isCandidateVar(varDsc)) |
| 2260 | { |
| 2261 | JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n" ); |
| 2262 | Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); |
| 2263 | RefPosition* pos = |
| 2264 | newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); |
| 2265 | } |
| 2266 | } |
| 2267 | |
| 2268 | #ifdef DEBUG |
| 2269 | if (getLsraExtendLifeTimes()) |
| 2270 | { |
| 2271 | LclVarDsc* varDsc; |
| 2272 | for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) |
| 2273 | { |
| 2274 | if (varDsc->lvLRACandidate) |
| 2275 | { |
| 2276 | JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n" , lclNum); |
| 2277 | Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); |
| 2278 | RefPosition* pos = |
| 2279 | newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); |
| 2280 | } |
| 2281 | } |
| 2282 | } |
| 2283 | #endif // DEBUG |
| 2284 | } |
| 2285 | |
| 2286 | // If the last block has successors, create a RefTypeBB to record |
| 2287 | // what's live |
| 2288 | |
| 2289 | if (prevBlock->NumSucc(compiler) > 0) |
| 2290 | { |
| 2291 | RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE); |
| 2292 | } |
| 2293 | |
| 2294 | #ifdef DEBUG |
| 2295 | // Make sure we don't have any blocks that were not visited |
| 2296 | foreach_block(compiler, block) |
| 2297 | { |
| 2298 | assert(isBlockVisited(block)); |
| 2299 | } |
| 2300 | |
| 2301 | if (VERBOSE) |
| 2302 | { |
| 2303 | lsraDumpIntervals("BEFORE VALIDATING INTERVALS" ); |
| 2304 | dumpRefPositions("BEFORE VALIDATING INTERVALS" ); |
| 2305 | validateIntervals(); |
| 2306 | } |
| 2307 | #endif // DEBUG |
| 2308 | } |
| 2309 | |
| 2310 | #ifdef DEBUG |
| 2311 | //------------------------------------------------------------------------ |
| 2312 | // validateIntervals: A DEBUG-only method that checks that the lclVar RefPositions |
| 2313 | // do not reflect uses of undefined values |
| 2314 | // |
| 2315 | // Notes: If an undefined use is encountered, it merely prints a message. |
| 2316 | // |
| 2317 | // TODO-Cleanup: This should probably assert, or at least print the message only |
| 2318 | // when doing a JITDUMP. |
| 2319 | // |
| 2320 | void LinearScan::validateIntervals() |
| 2321 | { |
| 2322 | if (enregisterLocalVars) |
| 2323 | { |
| 2324 | for (unsigned i = 0; i < compiler->lvaTrackedCount; i++) |
| 2325 | { |
| 2326 | if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate) |
| 2327 | { |
| 2328 | continue; |
| 2329 | } |
| 2330 | Interval* interval = getIntervalForLocalVar(i); |
| 2331 | |
| 2332 | bool defined = false; |
| 2333 | printf("-----------------\n" ); |
| 2334 | for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) |
| 2335 | { |
| 2336 | ref->dump(); |
| 2337 | RefType refType = ref->refType; |
| 2338 | if (!defined && RefTypeIsUse(refType)) |
| 2339 | { |
| 2340 | if (compiler->info.compMethodName != nullptr) |
| 2341 | { |
| 2342 | printf("%s: " , compiler->info.compMethodName); |
| 2343 | } |
| 2344 | printf("LocalVar V%02u: undefined use at %u\n" , interval->varNum, ref->nodeLocation); |
| 2345 | } |
| 2346 | // Note that there can be multiple last uses if they are on disjoint paths, |
| 2347 | // so we can't really check the lastUse flag |
| 2348 | if (ref->lastUse) |
| 2349 | { |
| 2350 | defined = false; |
| 2351 | } |
| 2352 | if (RefTypeIsDef(refType)) |
| 2353 | { |
| 2354 | defined = true; |
| 2355 | } |
| 2356 | } |
| 2357 | } |
| 2358 | } |
| 2359 | } |
| 2360 | #endif // DEBUG |
| 2361 | |
| 2362 | //------------------------------------------------------------------------ |
| 2363 | // BuildDef: Build a RefTypeDef RefPosition for the given node |
| 2364 | // |
| 2365 | // Arguments: |
| 2366 | // tree - The node that defines a register |
| 2367 | // dstCandidates - The candidate registers for the definition |
| 2368 | // multiRegIdx - The index of the definition, defaults to zero. |
| 2369 | // Only non-zero for multi-reg nodes. |
| 2370 | // |
| 2371 | // Return Value: |
| 2372 | // The newly created RefPosition. |
| 2373 | // |
| 2374 | // Notes: |
| 2375 | // Adds the RefInfo for the definition to the defList. |
| 2376 | // |
| 2377 | RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx) |
| 2378 | { |
| 2379 | assert(!tree->isContained()); |
| 2380 | RegisterType type = getDefType(tree); |
| 2381 | |
| 2382 | if (dstCandidates != RBM_NONE) |
| 2383 | { |
| 2384 | assert((tree->gtRegNum == REG_NA) || (dstCandidates == genRegMask(tree->GetRegByIndex(multiRegIdx)))); |
| 2385 | } |
| 2386 | |
| 2387 | if (tree->IsMultiRegNode()) |
| 2388 | { |
| 2389 | type = tree->GetRegTypeByIndex(multiRegIdx); |
| 2390 | } |
| 2391 | |
| 2392 | Interval* interval = newInterval(type); |
| 2393 | if (tree->gtRegNum != REG_NA) |
| 2394 | { |
| 2395 | if (!tree->IsMultiRegNode() || (multiRegIdx == 0)) |
| 2396 | { |
| 2397 | assert((dstCandidates == RBM_NONE) || (dstCandidates == genRegMask(tree->gtRegNum))); |
| 2398 | dstCandidates = genRegMask(tree->gtRegNum); |
| 2399 | } |
| 2400 | else |
| 2401 | { |
| 2402 | assert(isSingleRegister(dstCandidates)); |
| 2403 | } |
| 2404 | } |
| 2405 | #ifdef _TARGET_X86_ |
| 2406 | else if (varTypeIsByte(tree)) |
| 2407 | { |
| 2408 | if (dstCandidates == RBM_NONE) |
| 2409 | { |
| 2410 | dstCandidates = allRegs(TYP_INT); |
| 2411 | } |
| 2412 | dstCandidates &= ~RBM_NON_BYTE_REGS; |
| 2413 | assert(dstCandidates != RBM_NONE); |
| 2414 | } |
| 2415 | #endif // _TARGET_X86_ |
| 2416 | if (pendingDelayFree) |
| 2417 | { |
| 2418 | interval->hasInterferingUses = true; |
| 2419 | // pendingDelayFree = false; |
| 2420 | } |
| 2421 | RefPosition* defRefPosition = |
| 2422 | newRefPosition(interval, currentLoc + 1, RefTypeDef, tree, dstCandidates, multiRegIdx); |
| 2423 | if (tree->IsUnusedValue()) |
| 2424 | { |
| 2425 | defRefPosition->isLocalDefUse = true; |
| 2426 | defRefPosition->lastUse = true; |
| 2427 | } |
| 2428 | else |
| 2429 | { |
| 2430 | RefInfoListNode* refInfo = listNodePool.GetNode(defRefPosition, tree); |
| 2431 | defList.Append(refInfo); |
| 2432 | } |
| 2433 | if (tgtPrefUse != nullptr) |
| 2434 | { |
| 2435 | interval->assignRelatedIntervalIfUnassigned(tgtPrefUse->getInterval()); |
| 2436 | } |
| 2437 | return defRefPosition; |
| 2438 | } |
| 2439 | |
| 2440 | //------------------------------------------------------------------------ |
| 2441 | // BuildDef: Build one or more RefTypeDef RefPositions for the given node |
| 2442 | // |
| 2443 | // Arguments: |
| 2444 | // tree - The node that defines a register |
| 2445 | // dstCount - The number of registers defined by the node |
| 2446 | // dstCandidates - the candidate registers for the definition |
| 2447 | // |
| 2448 | // Notes: |
| 2449 | // Adds the RefInfo for the definitions to the defList. |
| 2450 | // |
| 2451 | void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) |
| 2452 | { |
| 2453 | bool fixedReg = false; |
| 2454 | if ((dstCount > 1) && (dstCandidates != RBM_NONE) && ((int)genCountBits(dstCandidates) == dstCount)) |
| 2455 | { |
| 2456 | fixedReg = true; |
| 2457 | } |
| 2458 | ReturnTypeDesc* retTypeDesc = nullptr; |
| 2459 | if (tree->IsMultiRegCall()) |
| 2460 | { |
| 2461 | retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); |
| 2462 | } |
| 2463 | for (int i = 0; i < dstCount; i++) |
| 2464 | { |
| 2465 | regMaskTP thisDstCandidates; |
| 2466 | if (fixedReg) |
| 2467 | { |
| 2468 | // In case of multi-reg call node, we have to query the ith position return register. |
| 2469 | // For all other cases of multi-reg definitions, the registers must be in sequential order. |
| 2470 | if (retTypeDesc != nullptr) |
| 2471 | { |
| 2472 | thisDstCandidates = genRegMask(tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i)); |
| 2473 | assert((dstCandidates & thisDstCandidates) != RBM_NONE); |
| 2474 | } |
| 2475 | else |
| 2476 | { |
| 2477 | thisDstCandidates = genFindLowestBit(dstCandidates); |
| 2478 | } |
| 2479 | dstCandidates &= ~thisDstCandidates; |
| 2480 | } |
| 2481 | else |
| 2482 | { |
| 2483 | thisDstCandidates = dstCandidates; |
| 2484 | } |
| 2485 | BuildDef(tree, thisDstCandidates, i); |
| 2486 | } |
| 2487 | } |
| 2488 | |
| 2489 | //------------------------------------------------------------------------ |
| 2490 | // BuildDef: Build one or more RefTypeDef RefPositions for the given node, |
| 2491 | // as well as kills as specified by the given mask. |
| 2492 | // |
| 2493 | // Arguments: |
| 2494 | // tree - The node that defines a register |
| 2495 | // dstCount - The number of registers defined by the node |
| 2496 | // dstCandidates - The candidate registers for the definition |
| 2497 | // killMask - The mask of registers killed by this node |
| 2498 | // |
| 2499 | // Notes: |
| 2500 | // Adds the RefInfo for the definitions to the defList. |
| 2501 | // The def and kill functionality is folded into a single method so that the |
| 2502 | // save and restores of upper vector registers can be bracketed around the def. |
| 2503 | // |
| 2504 | void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) |
| 2505 | { |
| 2506 | // Generate Kill RefPositions |
| 2507 | assert(killMask == getKillSetForNode(tree)); |
| 2508 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 2509 | VARSET_TP liveLargeVectors(VarSetOps::UninitVal()); |
| 2510 | bool doLargeVectorRestore = false; |
| 2511 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 2512 | if (killMask != RBM_NONE) |
| 2513 | { |
| 2514 | buildKillPositionsForNode(tree, currentLoc + 1, killMask); |
| 2515 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 2516 | if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE)) |
| 2517 | { |
| 2518 | // Build RefPositions for saving any live large vectors. |
| 2519 | // This must be done after the kills, so that we know which large vectors are still live. |
| 2520 | VarSetOps::AssignNoCopy(compiler, liveLargeVectors, |
| 2521 | buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask)); |
| 2522 | doLargeVectorRestore = true; |
| 2523 | } |
| 2524 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 2525 | } |
| 2526 | |
| 2527 | // Now, create the Def(s) |
| 2528 | BuildDefs(tree, dstCount, dstCandidates); |
| 2529 | |
| 2530 | #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 2531 | // Finally, generate the UpperVectorRestores |
| 2532 | if (doLargeVectorRestore) |
| 2533 | { |
| 2534 | buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors); |
| 2535 | } |
| 2536 | #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE |
| 2537 | } |
| 2538 | |
| 2539 | //------------------------------------------------------------------------ |
| 2540 | // BuildUse: Remove the RefInfoListNode for the given multi-reg index of the given node from |
| 2541 | // the defList, and build a use RefPosition for the associated Interval. |
| 2542 | // |
| 2543 | // Arguments: |
| 2544 | // operand - The node of interest |
| 2545 | // candidates - The register candidates for the use |
| 2546 | // multiRegIdx - The index of the multireg def/use |
| 2547 | // |
| 2548 | // Return Value: |
| 2549 | // The newly created use RefPosition |
| 2550 | // |
| 2551 | // Notes: |
| 2552 | // The node must not be contained, and must have been processed by buildRefPositionsForNode(). |
| 2553 | // |
| 2554 | RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) |
| 2555 | { |
| 2556 | assert(!operand->isContained()); |
| 2557 | Interval* interval; |
| 2558 | bool regOptional = operand->IsRegOptional(); |
| 2559 | |
| 2560 | if (isCandidateLocalRef(operand)) |
| 2561 | { |
| 2562 | interval = getIntervalForLocalVarNode(operand->AsLclVarCommon()); |
| 2563 | |
| 2564 | // We have only approximate last-use information at this point. This is because the |
| 2565 | // execution order doesn't actually reflect the true order in which the localVars |
| 2566 | // are referenced - but the order of the RefPositions will, so we recompute it after |
| 2567 | // RefPositions are built. |
| 2568 | // Use the old value for setting currentLiveVars - note that we do this with the |
| 2569 | // not-quite-correct setting of lastUse. However, this is OK because |
| 2570 | // 1) this is only for preferencing, which doesn't require strict correctness, and |
| 2571 | // 2) the cases where these out-of-order uses occur should not overlap a kill. |
| 2572 | // TODO-Throughput: clean this up once we have the execution order correct. At that point |
| 2573 | // we can update currentLiveVars at the same place that we create the RefPosition. |
| 2574 | if ((operand->gtFlags & GTF_VAR_DEATH) != 0) |
| 2575 | { |
| 2576 | unsigned varIndex = interval->getVarIndex(compiler); |
| 2577 | VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex); |
| 2578 | } |
| 2579 | } |
| 2580 | else |
| 2581 | { |
| 2582 | RefInfoListNode* refInfo = defList.removeListNode(operand, multiRegIdx); |
| 2583 | RefPosition* defRefPos = refInfo->ref; |
| 2584 | assert(defRefPos->multiRegIdx == multiRegIdx); |
| 2585 | interval = defRefPos->getInterval(); |
| 2586 | listNodePool.ReturnNode(refInfo); |
| 2587 | operand = nullptr; |
| 2588 | } |
| 2589 | RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx); |
| 2590 | useRefPos->setAllocateIfProfitable(regOptional); |
| 2591 | return useRefPos; |
| 2592 | } |
| 2593 | |
| 2594 | //------------------------------------------------------------------------ |
| 2595 | // BuildIndirUses: Build Use RefPositions for an indirection that might be contained |
| 2596 | // |
| 2597 | // Arguments: |
| 2598 | // indirTree - The indirection node of interest |
| 2599 | // |
| 2600 | // Return Value: |
| 2601 | // The number of source registers used by the *parent* of this node. |
| 2602 | // |
| 2603 | // Notes: |
| 2604 | // This method may only be used if the candidates are the same for all sources. |
| 2605 | // |
| 2606 | int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates) |
| 2607 | { |
| 2608 | GenTree* const addr = indirTree->gtOp1; |
| 2609 | if (!addr->isContained()) |
| 2610 | { |
| 2611 | BuildUse(addr, candidates); |
| 2612 | return 1; |
| 2613 | } |
| 2614 | if (!addr->OperIs(GT_LEA)) |
| 2615 | { |
| 2616 | return 0; |
| 2617 | } |
| 2618 | |
| 2619 | GenTreeAddrMode* const addrMode = addr->AsAddrMode(); |
| 2620 | |
| 2621 | unsigned srcCount = 0; |
| 2622 | if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) |
| 2623 | { |
| 2624 | BuildUse(addrMode->Base(), candidates); |
| 2625 | srcCount++; |
| 2626 | } |
| 2627 | if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) |
| 2628 | { |
| 2629 | BuildUse(addrMode->Index(), candidates); |
| 2630 | srcCount++; |
| 2631 | } |
| 2632 | return srcCount; |
| 2633 | } |
| 2634 | |
| 2635 | //------------------------------------------------------------------------ |
| 2636 | // BuildOperandUses: Build Use RefPositions for an operand that might be contained. |
| 2637 | // |
| 2638 | // Arguments: |
| 2639 | // node - The node of interest |
| 2640 | // |
| 2641 | // Return Value: |
| 2642 | // The number of source registers used by the *parent* of this node. |
| 2643 | // |
| 2644 | int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) |
| 2645 | { |
| 2646 | if (!node->isContained()) |
| 2647 | { |
| 2648 | BuildUse(node, candidates); |
| 2649 | return 1; |
| 2650 | } |
| 2651 | |
| 2652 | #if !defined(_TARGET_64BIT_) |
| 2653 | if (node->OperIs(GT_LONG)) |
| 2654 | { |
| 2655 | return BuildBinaryUses(node->AsOp(), candidates); |
| 2656 | } |
| 2657 | #endif // !defined(_TARGET_64BIT_) |
| 2658 | if (node->OperIsIndir()) |
| 2659 | { |
| 2660 | return BuildIndirUses(node->AsIndir(), candidates); |
| 2661 | } |
| 2662 | if (node->OperIsHWIntrinsic()) |
| 2663 | { |
| 2664 | BuildUse(node->gtGetOp1(), candidates); |
| 2665 | return 1; |
| 2666 | } |
| 2667 | |
| 2668 | return 0; |
| 2669 | } |
| 2670 | |
| 2671 | //------------------------------------------------------------------------ |
| 2672 | // setDelayFree: Mark a RefPosition as delayRegFree, and set pendingDelayFree |
| 2673 | // |
| 2674 | // Arguments: |
| 2675 | // use - The use RefPosition to mark |
| 2676 | // |
| 2677 | void LinearScan::setDelayFree(RefPosition* use) |
| 2678 | { |
| 2679 | use->delayRegFree = true; |
| 2680 | pendingDelayFree = true; |
| 2681 | } |
| 2682 | |
| 2683 | //------------------------------------------------------------------------ |
| 2684 | // BuildDelayFreeUses: Build Use RefPositions for an operand that might be contained, |
| 2685 | // and which need to be marked delayRegFree |
| 2686 | // |
| 2687 | // Arguments: |
| 2688 | // node - The node of interest |
| 2689 | // |
| 2690 | // Return Value: |
| 2691 | // The number of source registers used by the *parent* of this node. |
| 2692 | // |
| 2693 | int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates) |
| 2694 | { |
| 2695 | RefPosition* use; |
| 2696 | if (!node->isContained()) |
| 2697 | { |
| 2698 | use = BuildUse(node, candidates); |
| 2699 | setDelayFree(use); |
| 2700 | return 1; |
| 2701 | } |
| 2702 | if (node->OperIsHWIntrinsic()) |
| 2703 | { |
| 2704 | use = BuildUse(node->gtGetOp1(), candidates); |
| 2705 | setDelayFree(use); |
| 2706 | return 1; |
| 2707 | } |
| 2708 | if (!node->OperIsIndir()) |
| 2709 | { |
| 2710 | return 0; |
| 2711 | } |
| 2712 | GenTreeIndir* indirTree = node->AsIndir(); |
| 2713 | GenTree* addr = indirTree->gtOp1; |
| 2714 | if (!addr->isContained()) |
| 2715 | { |
| 2716 | use = BuildUse(addr, candidates); |
| 2717 | setDelayFree(use); |
| 2718 | return 1; |
| 2719 | } |
| 2720 | if (!addr->OperIs(GT_LEA)) |
| 2721 | { |
| 2722 | return 0; |
| 2723 | } |
| 2724 | |
| 2725 | GenTreeAddrMode* const addrMode = addr->AsAddrMode(); |
| 2726 | |
| 2727 | unsigned srcCount = 0; |
| 2728 | if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) |
| 2729 | { |
| 2730 | use = BuildUse(addrMode->Base(), candidates); |
| 2731 | setDelayFree(use); |
| 2732 | srcCount++; |
| 2733 | } |
| 2734 | if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) |
| 2735 | { |
| 2736 | use = BuildUse(addrMode->Index(), candidates); |
| 2737 | setDelayFree(use); |
| 2738 | srcCount++; |
| 2739 | } |
| 2740 | return srcCount; |
| 2741 | } |
| 2742 | |
| 2743 | //------------------------------------------------------------------------ |
| 2744 | // BuildBinaryUses: Get the RefInfoListNodes for the operands of the |
| 2745 | // given node, and build uses for them. |
| 2746 | // |
| 2747 | // Arguments: |
| 2748 | // node - a GenTreeOp |
| 2749 | // |
| 2750 | // Return Value: |
| 2751 | // The number of actual register operands. |
| 2752 | // |
| 2753 | // Notes: |
| 2754 | // The operands must already have been processed by buildRefPositionsForNode, and their |
| 2755 | // RefInfoListNodes placed in the defList. |
| 2756 | // |
| 2757 | int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) |
| 2758 | { |
| 2759 | #ifdef _TARGET_XARCH_ |
| 2760 | RefPosition* tgtPrefUse = nullptr; |
| 2761 | if (node->OperIsBinary() && isRMWRegOper(node)) |
| 2762 | { |
| 2763 | return BuildRMWUses(node, candidates); |
| 2764 | } |
| 2765 | #endif // _TARGET_XARCH_ |
| 2766 | int srcCount = 0; |
| 2767 | GenTree* op1 = node->gtOp1; |
| 2768 | GenTree* op2 = node->gtGetOp2IfPresent(); |
| 2769 | if (node->IsReverseOp() && (op2 != nullptr)) |
| 2770 | { |
| 2771 | srcCount += BuildOperandUses(op2, candidates); |
| 2772 | op2 = nullptr; |
| 2773 | } |
| 2774 | if (op1 != nullptr) |
| 2775 | { |
| 2776 | srcCount += BuildOperandUses(op1, candidates); |
| 2777 | } |
| 2778 | if (op2 != nullptr) |
| 2779 | { |
| 2780 | srcCount += BuildOperandUses(op2, candidates); |
| 2781 | } |
| 2782 | return srcCount; |
| 2783 | } |
| 2784 | |
| 2785 | //------------------------------------------------------------------------ |
| 2786 | // BuildStoreLoc: Set register requirements for a store of a lclVar |
| 2787 | // |
| 2788 | // Arguments: |
| 2789 | // storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) |
| 2790 | // |
| 2791 | // Notes: |
| 2792 | // This involves: |
| 2793 | // - Setting the appropriate candidates for a store of a multi-reg call return value. |
| 2794 | // - Handling of contained immediates. |
| 2795 | // - Requesting an internal register for SIMD12 stores. |
| 2796 | // |
| 2797 | int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) |
| 2798 | { |
| 2799 | GenTree* op1 = storeLoc->gtGetOp1(); |
| 2800 | int srcCount; |
| 2801 | RefPosition* singleUseRef = nullptr; |
| 2802 | LclVarDsc* varDsc = &compiler->lvaTable[storeLoc->gtLclNum]; |
| 2803 | |
| 2804 | // First, define internal registers. |
| 2805 | #ifdef FEATURE_SIMD |
| 2806 | RefPosition* internalFloatDef = nullptr; |
| 2807 | if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12)) |
| 2808 | { |
| 2809 | // Need an additional register to extract upper 4 bytes of Vector3. |
| 2810 | internalFloatDef = buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs()); |
| 2811 | } |
| 2812 | #endif // FEATURE_SIMD |
| 2813 | |
| 2814 | // Second, use source registers. |
| 2815 | |
| 2816 | if (op1->IsMultiRegCall()) |
| 2817 | { |
| 2818 | // This is the case of var = call where call is returning |
| 2819 | // a value in multiple return registers. |
| 2820 | // Must be a store lclvar. |
| 2821 | assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); |
| 2822 | |
| 2823 | // srcCount = number of registers in which the value is returned by call |
| 2824 | GenTreeCall* call = op1->AsCall(); |
| 2825 | ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); |
| 2826 | unsigned regCount = retTypeDesc->GetReturnRegCount(); |
| 2827 | srcCount = retTypeDesc->GetReturnRegCount(); |
| 2828 | |
| 2829 | for (int i = 0; i < srcCount; ++i) |
| 2830 | { |
| 2831 | BuildUse(op1, RBM_NONE, i); |
| 2832 | } |
| 2833 | } |
| 2834 | #ifndef _TARGET_64BIT_ |
| 2835 | else if (varTypeIsLong(op1)) |
| 2836 | { |
| 2837 | if (op1->OperIs(GT_MUL_LONG)) |
| 2838 | { |
| 2839 | srcCount = 2; |
| 2840 | BuildUse(op1, allRegs(TYP_INT), 0); |
| 2841 | BuildUse(op1, allRegs(TYP_INT), 1); |
| 2842 | } |
| 2843 | else |
| 2844 | { |
| 2845 | assert(op1->OperIs(GT_LONG)); |
| 2846 | assert(op1->isContained() && !op1->gtGetOp1()->isContained() && !op1->gtGetOp2()->isContained()); |
| 2847 | srcCount = BuildBinaryUses(op1->AsOp()); |
| 2848 | assert(srcCount == 2); |
| 2849 | } |
| 2850 | } |
| 2851 | #endif // !_TARGET_64BIT_ |
| 2852 | else if (op1->isContained()) |
| 2853 | { |
| 2854 | srcCount = 0; |
| 2855 | } |
| 2856 | else |
| 2857 | { |
| 2858 | srcCount = 1; |
| 2859 | regMaskTP srcCandidates = RBM_NONE; |
| 2860 | #ifdef _TARGET_X86_ |
| 2861 | if (varTypeIsByte(storeLoc)) |
| 2862 | { |
| 2863 | srcCandidates = allByteRegs(); |
| 2864 | } |
| 2865 | #endif // _TARGET_X86_ |
| 2866 | singleUseRef = BuildUse(op1, srcCandidates); |
| 2867 | } |
| 2868 | |
| 2869 | // Third, use internal registers. |
| 2870 | #ifdef FEATURE_SIMD |
| 2871 | buildInternalRegisterUses(); |
| 2872 | #endif // FEATURE_SIMD |
| 2873 | |
| 2874 | // Fourth, define destination registers. |
| 2875 | |
| 2876 | // Add the lclVar to currentLiveVars (if it will remain live) |
| 2877 | if (isCandidateVar(varDsc)) |
| 2878 | { |
| 2879 | assert(varDsc->lvTracked); |
| 2880 | unsigned varIndex = varDsc->lvVarIndex; |
| 2881 | Interval* varDefInterval = getIntervalForLocalVar(varIndex); |
| 2882 | if ((storeLoc->gtFlags & GTF_VAR_DEATH) == 0) |
| 2883 | { |
| 2884 | VarSetOps::AddElemD(compiler, currentLiveVars, varIndex); |
| 2885 | } |
| 2886 | if (singleUseRef != nullptr) |
| 2887 | { |
| 2888 | Interval* srcInterval = singleUseRef->getInterval(); |
| 2889 | if (srcInterval->relatedInterval == nullptr) |
| 2890 | { |
| 2891 | // Preference the source to the dest, unless this is a non-last-use localVar. |
| 2892 | // Note that the last-use info is not correct, but it is a better approximation than preferencing |
| 2893 | // the source to the dest, if the source's lifetime extends beyond the dest. |
| 2894 | if (!srcInterval->isLocalVar || (singleUseRef->treeNode->gtFlags & GTF_VAR_DEATH) != 0) |
| 2895 | { |
| 2896 | srcInterval->assignRelatedInterval(varDefInterval); |
| 2897 | } |
| 2898 | } |
| 2899 | else if (!srcInterval->isLocalVar) |
| 2900 | { |
| 2901 | // Preference the source to dest, if src is not a local var. |
| 2902 | srcInterval->assignRelatedInterval(varDefInterval); |
| 2903 | } |
| 2904 | } |
| 2905 | newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet())); |
| 2906 | } |
| 2907 | else |
| 2908 | { |
| 2909 | if (storeLoc->gtOp1->OperIs(GT_BITCAST)) |
| 2910 | { |
| 2911 | storeLoc->gtType = storeLoc->gtOp1->gtType = storeLoc->gtOp1->AsUnOp()->gtOp1->TypeGet(); |
| 2912 | RegisterType registerType = regType(storeLoc->TypeGet()); |
| 2913 | noway_assert(singleUseRef != nullptr); |
| 2914 | |
| 2915 | Interval* srcInterval = singleUseRef->getInterval(); |
| 2916 | srcInterval->registerType = registerType; |
| 2917 | |
| 2918 | RefPosition* srcDefPosition = srcInterval->firstRefPosition; |
| 2919 | assert(srcDefPosition != nullptr); |
| 2920 | assert(srcDefPosition->refType == RefTypeDef); |
| 2921 | assert(srcDefPosition->treeNode == storeLoc->gtOp1); |
| 2922 | |
| 2923 | srcDefPosition->registerAssignment = allRegs(registerType); |
| 2924 | singleUseRef->registerAssignment = allRegs(registerType); |
| 2925 | } |
| 2926 | } |
| 2927 | |
| 2928 | return srcCount; |
| 2929 | } |
| 2930 | |
| 2931 | //------------------------------------------------------------------------ |
| 2932 | // BuildSimple: Builds use RefPositions for trees requiring no special handling |
| 2933 | // |
| 2934 | // Arguments: |
| 2935 | // tree - The node of interest |
| 2936 | // |
| 2937 | // Return Value: |
| 2938 | // The number of use RefPositions created |
| 2939 | // |
| 2940 | int LinearScan::BuildSimple(GenTree* tree) |
| 2941 | { |
| 2942 | unsigned kind = tree->OperKind(); |
| 2943 | int srcCount = 0; |
| 2944 | if ((kind & (GTK_CONST | GTK_LEAF)) == 0) |
| 2945 | { |
| 2946 | assert((kind & GTK_SMPOP) != 0); |
| 2947 | srcCount = BuildBinaryUses(tree->AsOp()); |
| 2948 | } |
| 2949 | if (tree->IsValue()) |
| 2950 | { |
| 2951 | BuildDef(tree); |
| 2952 | } |
| 2953 | return srcCount; |
| 2954 | } |
| 2955 | |
| 2956 | //------------------------------------------------------------------------ |
| 2957 | // BuildReturn: Set the NodeInfo for a GT_RETURN. |
| 2958 | // |
| 2959 | // Arguments: |
| 2960 | // tree - The node of interest |
| 2961 | // |
| 2962 | // Return Value: |
| 2963 | // The number of sources consumed by this node. |
| 2964 | // |
| 2965 | int LinearScan::BuildReturn(GenTree* tree) |
| 2966 | { |
| 2967 | GenTree* op1 = tree->gtGetOp1(); |
| 2968 | |
| 2969 | #if !defined(_TARGET_64BIT_) |
| 2970 | if (tree->TypeGet() == TYP_LONG) |
| 2971 | { |
| 2972 | assert((op1->OperGet() == GT_LONG) && op1->isContained()); |
| 2973 | GenTree* loVal = op1->gtGetOp1(); |
| 2974 | GenTree* hiVal = op1->gtGetOp2(); |
| 2975 | BuildUse(loVal, RBM_LNGRET_LO); |
| 2976 | BuildUse(hiVal, RBM_LNGRET_HI); |
| 2977 | return 2; |
| 2978 | } |
| 2979 | else |
| 2980 | #endif // !defined(_TARGET_64BIT_) |
| 2981 | if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) |
| 2982 | { |
| 2983 | regMaskTP useCandidates = RBM_NONE; |
| 2984 | |
| 2985 | #if FEATURE_MULTIREG_RET |
| 2986 | if (varTypeIsStruct(tree)) |
| 2987 | { |
| 2988 | // op1 has to be either an lclvar or a multi-reg returning call |
| 2989 | if (op1->OperGet() == GT_LCL_VAR) |
| 2990 | { |
| 2991 | BuildUse(op1, useCandidates); |
| 2992 | } |
| 2993 | else |
| 2994 | { |
| 2995 | noway_assert(op1->IsMultiRegCall()); |
| 2996 | |
| 2997 | ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); |
| 2998 | int srcCount = retTypeDesc->GetReturnRegCount(); |
| 2999 | useCandidates = retTypeDesc->GetABIReturnRegs(); |
| 3000 | for (int i = 0; i < srcCount; i++) |
| 3001 | { |
| 3002 | BuildUse(op1, useCandidates, i); |
| 3003 | } |
| 3004 | return srcCount; |
| 3005 | } |
| 3006 | } |
| 3007 | else |
| 3008 | #endif // FEATURE_MULTIREG_RET |
| 3009 | { |
| 3010 | // Non-struct type return - determine useCandidates |
| 3011 | switch (tree->TypeGet()) |
| 3012 | { |
| 3013 | case TYP_VOID: |
| 3014 | useCandidates = RBM_NONE; |
| 3015 | break; |
| 3016 | case TYP_FLOAT: |
| 3017 | useCandidates = RBM_FLOATRET; |
| 3018 | break; |
| 3019 | case TYP_DOUBLE: |
| 3020 | // We ONLY want the valid double register in the RBM_DOUBLERET mask. |
| 3021 | useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); |
| 3022 | break; |
| 3023 | case TYP_LONG: |
| 3024 | useCandidates = RBM_LNGRET; |
| 3025 | break; |
| 3026 | default: |
| 3027 | useCandidates = RBM_INTRET; |
| 3028 | break; |
| 3029 | } |
| 3030 | BuildUse(op1, useCandidates); |
| 3031 | return 1; |
| 3032 | } |
| 3033 | } |
| 3034 | |
| 3035 | // No kills or defs. |
| 3036 | return 0; |
| 3037 | } |
| 3038 | |
| 3039 | //------------------------------------------------------------------------ |
| 3040 | // supportsSpecialPutArg: Determine if we can support specialPutArgs |
| 3041 | // |
| 3042 | // Return Value: |
| 3043 | // True iff specialPutArg intervals can be supported. |
| 3044 | // |
| 3045 | // Notes: |
| 3046 | // See below. |
| 3047 | // |
| 3048 | |
| 3049 | bool LinearScan::supportsSpecialPutArg() |
| 3050 | { |
| 3051 | #if defined(DEBUG) && defined(_TARGET_X86_) |
| 3052 | // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode |
| 3053 | // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the |
| 3054 | // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in |
| 3055 | // situations like the following: |
| 3056 | // |
| 3057 | // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> |
| 3058 | // |
| 3059 | // /--* t1026 ref |
| 3060 | // t1352 = * putarg_reg ref REG NA |
| 3061 | // |
| 3062 | // t342 = lclVar int V14 loc6 u:4 REG NA $50c |
| 3063 | // |
| 3064 | // t343 = const int 1 REG NA $41 |
| 3065 | // |
| 3066 | // /--* t342 int |
| 3067 | // +--* t343 int |
| 3068 | // t344 = * + int REG NA $495 |
| 3069 | // |
| 3070 | // t345 = lclVar int V04 arg4 u:2 REG NA $100 |
| 3071 | // |
| 3072 | // /--* t344 int |
| 3073 | // +--* t345 int |
| 3074 | // t346 = * % int REG NA $496 |
| 3075 | // |
| 3076 | // /--* t346 int |
| 3077 | // t1353 = * putarg_reg int REG NA |
| 3078 | // |
| 3079 | // t1354 = lclVar ref V52 tmp35 (last use) REG NA |
| 3080 | // |
| 3081 | // /--* t1354 ref |
| 3082 | // t1355 = * lea(b+0) byref REG NA |
| 3083 | // |
| 3084 | // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the |
| 3085 | // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register |
| 3086 | // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to |
| 3087 | // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be |
| 3088 | // available due to the special put arg. |
| 3089 | return getStressLimitRegs() != LSRA_LIMIT_CALLER; |
| 3090 | #else |
| 3091 | return true; |
| 3092 | #endif |
| 3093 | } |
| 3094 | |
| 3095 | //------------------------------------------------------------------------ |
| 3096 | // BuildPutArgReg: Set the NodeInfo for a PUTARG_REG. |
| 3097 | // |
| 3098 | // Arguments: |
| 3099 | // node - The PUTARG_REG node. |
| 3100 | // argReg - The register in which to pass the argument. |
| 3101 | // info - The info for the node's using call. |
| 3102 | // isVarArgs - True if the call uses a varargs calling convention. |
| 3103 | // callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. |
| 3104 | // |
| 3105 | // Return Value: |
| 3106 | // None. |
| 3107 | // |
| 3108 | int LinearScan::BuildPutArgReg(GenTreeUnOp* node) |
| 3109 | { |
| 3110 | assert(node != nullptr); |
| 3111 | assert(node->OperIsPutArgReg()); |
| 3112 | regNumber argReg = node->gtRegNum; |
| 3113 | assert(argReg != REG_NA); |
| 3114 | bool isSpecialPutArg = false; |
| 3115 | int srcCount = 1; |
| 3116 | GenTree* op1 = node->gtGetOp1(); |
| 3117 | |
| 3118 | // First, handle the GT_OBJ case, which loads into the arg register |
| 3119 | // (so we don't set the use to prefer that register for the source address). |
| 3120 | if (op1->OperIs(GT_OBJ)) |
| 3121 | { |
| 3122 | GenTreeObj* obj = op1->AsObj(); |
| 3123 | GenTree* addr = obj->Addr(); |
| 3124 | unsigned size = obj->gtBlkSize; |
| 3125 | assert(size <= TARGET_POINTER_SIZE); |
| 3126 | if (addr->OperIsLocalAddr()) |
| 3127 | { |
| 3128 | // We don't need a source register. |
| 3129 | assert(addr->isContained()); |
| 3130 | srcCount = 0; |
| 3131 | } |
| 3132 | else if (!isPow2(size)) |
| 3133 | { |
| 3134 | // We'll need an internal register to do the odd-size load. |
| 3135 | // This can only happen with integer registers. |
| 3136 | assert(genIsValidIntReg(argReg)); |
| 3137 | buildInternalIntRegisterDefForNode(node); |
| 3138 | BuildUse(addr); |
| 3139 | buildInternalRegisterUses(); |
| 3140 | } |
| 3141 | return srcCount; |
| 3142 | } |
| 3143 | |
| 3144 | // To avoid redundant moves, have the argument operand computed in the |
| 3145 | // register in which the argument is passed to the call. |
| 3146 | regMaskTP argMask = genRegMask(argReg); |
| 3147 | RefPosition* use = BuildUse(op1, argMask); |
| 3148 | |
| 3149 | if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0)) |
| 3150 | { |
| 3151 | // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, |
| 3152 | // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register |
| 3153 | // (which is actually what we hope will happen). |
| 3154 | JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n" ); |
| 3155 | |
| 3156 | // Preference the destination to the interval of the first register defined by the first operand. |
| 3157 | assert(use->getInterval()->isLocalVar); |
| 3158 | isSpecialPutArg = true; |
| 3159 | tgtPrefUse = use; |
| 3160 | } |
| 3161 | |
| 3162 | #ifdef _TARGET_ARM_ |
| 3163 | // If type of node is `long` then it is actually `double`. |
| 3164 | // The actual `long` types must have been transformed as a field list with two fields. |
| 3165 | if (node->TypeGet() == TYP_LONG) |
| 3166 | { |
| 3167 | srcCount++; |
| 3168 | regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg)); |
| 3169 | assert(genRegArgNext(argReg) == REG_NEXT(argReg)); |
| 3170 | use = BuildUse(op1, argMaskHi, 1); |
| 3171 | BuildDef(node, argMask, 0); |
| 3172 | BuildDef(node, argMaskHi, 1); |
| 3173 | } |
| 3174 | else |
| 3175 | #endif // _TARGET_ARM_ |
| 3176 | { |
| 3177 | RefPosition* def = BuildDef(node, argMask); |
| 3178 | if (isSpecialPutArg) |
| 3179 | { |
| 3180 | def->getInterval()->isSpecialPutArg = true; |
| 3181 | } |
| 3182 | } |
| 3183 | return srcCount; |
| 3184 | } |
| 3185 | |
| 3186 | //------------------------------------------------------------------------ |
| 3187 | // HandleFloatVarArgs: Handle additional register requirements for a varargs call |
| 3188 | // |
| 3189 | // Arguments: |
| 3190 | // call - The call node of interest |
| 3191 | // argNode - The current argument |
| 3192 | // |
| 3193 | // Return Value: |
| 3194 | // None. |
| 3195 | // |
| 3196 | // Notes: |
| 3197 | // In the case of a varargs call, the ABI dictates that if we have floating point args, |
| 3198 | // we must pass the enregistered arguments in both the integer and floating point registers. |
| 3199 | // Since the integer register is not associated with the arg node, we will reserve it as |
| 3200 | // an internal register on the call so that it is not used during the evaluation of the call node |
| 3201 | // (e.g. for the target). |
| 3202 | void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs) |
| 3203 | { |
| 3204 | #if FEATURE_VARARG |
| 3205 | if (call->IsVarargs() && varTypeIsFloating(argNode)) |
| 3206 | { |
| 3207 | *callHasFloatRegArgs = true; |
| 3208 | |
| 3209 | // We'll have to return the internal def and then later create a use for it. |
| 3210 | regNumber argReg = argNode->gtRegNum; |
| 3211 | regNumber targetReg = compiler->getCallArgIntRegister(argReg); |
| 3212 | |
| 3213 | buildInternalIntRegisterDefForNode(call, genRegMask(targetReg)); |
| 3214 | } |
| 3215 | #endif // FEATURE_VARARG |
| 3216 | } |
| 3217 | |
| 3218 | //------------------------------------------------------------------------ |
| 3219 | // BuildGCWriteBarrier: Handle additional register requirements for a GC write barrier |
| 3220 | // |
| 3221 | // Arguments: |
| 3222 | // tree - The STORE_IND for which a write barrier is required |
| 3223 | // |
| 3224 | int LinearScan::BuildGCWriteBarrier(GenTree* tree) |
| 3225 | { |
| 3226 | GenTree* dst = tree; |
| 3227 | GenTree* addr = tree->gtGetOp1(); |
| 3228 | GenTree* src = tree->gtGetOp2(); |
| 3229 | |
| 3230 | // In the case where we are doing a helper assignment, even if the dst |
| 3231 | // is an indir through an lea, we need to actually instantiate the |
| 3232 | // lea in a register |
| 3233 | assert(!addr->isContained() && !src->isContained()); |
| 3234 | int srcCount = 2; |
| 3235 | regMaskTP addrCandidates = RBM_ARG_0; |
| 3236 | regMaskTP srcCandidates = RBM_ARG_1; |
| 3237 | |
| 3238 | #if defined(_TARGET_ARM64_) |
| 3239 | |
| 3240 | // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST) |
| 3241 | // the 'src' goes into x15 (REG_WRITE_BARRIER_SRC) |
| 3242 | // |
| 3243 | addrCandidates = RBM_WRITE_BARRIER_DST; |
| 3244 | srcCandidates = RBM_WRITE_BARRIER_SRC; |
| 3245 | |
| 3246 | #elif defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS |
| 3247 | |
| 3248 | bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); |
| 3249 | if (useOptimizedWriteBarrierHelper) |
| 3250 | { |
| 3251 | // Special write barrier: |
| 3252 | // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and |
| 3253 | // op2 (src) goes into any int register. |
| 3254 | addrCandidates = RBM_WRITE_BARRIER; |
| 3255 | srcCandidates = RBM_WRITE_BARRIER_SRC; |
| 3256 | } |
| 3257 | |
| 3258 | #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS |
| 3259 | |
| 3260 | BuildUse(addr, addrCandidates); |
| 3261 | BuildUse(src, srcCandidates); |
| 3262 | |
| 3263 | regMaskTP killMask = getKillSetForStoreInd(tree->AsStoreInd()); |
| 3264 | buildKillPositionsForNode(tree, currentLoc + 1, killMask); |
| 3265 | return 2; |
| 3266 | } |
| 3267 | |
| 3268 | //------------------------------------------------------------------------ |
| 3269 | // BuildCmp: Set the register requirements for a compare. |
| 3270 | // |
| 3271 | // Arguments: |
| 3272 | // tree - The node of interest |
| 3273 | // |
| 3274 | // Return Value: |
| 3275 | // None. |
| 3276 | // |
| 3277 | int LinearScan::BuildCmp(GenTree* tree) |
| 3278 | { |
| 3279 | assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP)); |
| 3280 | regMaskTP dstCandidates = RBM_NONE; |
| 3281 | regMaskTP op1Candidates = RBM_NONE; |
| 3282 | regMaskTP op2Candidates = RBM_NONE; |
| 3283 | GenTree* op1 = tree->gtGetOp1(); |
| 3284 | GenTree* op2 = tree->gtGetOp2(); |
| 3285 | |
| 3286 | #ifdef _TARGET_X86_ |
| 3287 | // If the compare is used by a jump, we just need to set the condition codes. If not, then we need |
| 3288 | // to store the result into the low byte of a register, which requires the dst be a byteable register. |
| 3289 | if (tree->TypeGet() != TYP_VOID) |
| 3290 | { |
| 3291 | dstCandidates = allByteRegs(); |
| 3292 | } |
| 3293 | bool needByteRegs = false; |
| 3294 | if (varTypeIsByte(tree)) |
| 3295 | { |
| 3296 | if (!varTypeIsFloating(op1)) |
| 3297 | { |
| 3298 | needByteRegs = true; |
| 3299 | } |
| 3300 | } |
| 3301 | // Example1: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses |
| 3302 | // ubyte as the result of comparison and if the result needs to be materialized into a reg |
| 3303 | // simply zero extend it to TYP_INT size. Here is an example of generated code: |
| 3304 | // cmp dl, byte ptr[addr mode] |
| 3305 | // movzx edx, dl |
| 3306 | else if (varTypeIsByte(op1) && varTypeIsByte(op2)) |
| 3307 | { |
| 3308 | needByteRegs = true; |
| 3309 | } |
| 3310 | // Example2: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses |
| 3311 | // ubyte as the result of the comparison and if the result needs to be materialized into a reg |
| 3312 | // simply zero extend it to TYP_INT size. |
| 3313 | else if (varTypeIsByte(op1) && op2->IsCnsIntOrI()) |
| 3314 | { |
| 3315 | needByteRegs = true; |
| 3316 | } |
| 3317 | // Example3: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses |
| 3318 | // ubyte as the result of the comparison and if the result needs to be materialized into a reg |
| 3319 | // simply zero extend it to TYP_INT size. |
| 3320 | else if (op1->IsCnsIntOrI() && varTypeIsByte(op2)) |
| 3321 | { |
| 3322 | needByteRegs = true; |
| 3323 | } |
| 3324 | if (needByteRegs) |
| 3325 | { |
| 3326 | if (!op1->isContained()) |
| 3327 | { |
| 3328 | op1Candidates = allByteRegs(); |
| 3329 | } |
| 3330 | if (!op2->isContained()) |
| 3331 | { |
| 3332 | op2Candidates = allByteRegs(); |
| 3333 | } |
| 3334 | } |
| 3335 | #endif // _TARGET_X86_ |
| 3336 | |
| 3337 | int srcCount = BuildOperandUses(op1, op1Candidates); |
| 3338 | srcCount += BuildOperandUses(op2, op2Candidates); |
| 3339 | if (tree->TypeGet() != TYP_VOID) |
| 3340 | { |
| 3341 | BuildDef(tree, dstCandidates); |
| 3342 | } |
| 3343 | return srcCount; |
| 3344 | } |
| 3345 | |