| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 7 | XX XX |
| 8 | XX Lower XX |
| 9 | XX XX |
| 10 | XX Preconditions: XX |
| 11 | XX XX |
| 12 | XX Postconditions (for the nodes currently handled): XX |
| 13 | XX - All operands requiring a register are explicit in the graph XX |
| 14 | XX XX |
| 15 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 16 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 17 | */ |
| 18 | |
| 19 | #include "jitpch.h" |
| 20 | #ifdef _MSC_VER |
| 21 | #pragma hdrstop |
| 22 | #endif |
| 23 | |
| 24 | #include "lower.h" |
| 25 | |
| 26 | #if !defined(_TARGET_64BIT_) |
| 27 | #include "decomposelongs.h" |
| 28 | #endif // !defined(_TARGET_64BIT_) |
| 29 | |
| 30 | //------------------------------------------------------------------------ |
| 31 | // MakeSrcContained: Make "childNode" a contained node |
| 32 | // |
| 33 | // Arguments: |
| 34 | // parentNode - is a non-leaf node that can contain its 'childNode' |
| 35 | // childNode - is an op that will now be contained by its parent. |
| 36 | // |
| 37 | // Notes: |
| 38 | // If 'childNode' it has any existing sources, they will now be sources for the parent. |
| 39 | // |
| 40 | void Lowering::MakeSrcContained(GenTree* parentNode, GenTree* childNode) |
| 41 | { |
| 42 | assert(!parentNode->OperIsLeaf()); |
| 43 | assert(childNode->canBeContained()); |
| 44 | childNode->SetContained(); |
| 45 | assert(childNode->isContained()); |
| 46 | } |
| 47 | |
| 48 | //------------------------------------------------------------------------ |
| 49 | // CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate |
| 50 | // and, if so, makes it contained. |
| 51 | // |
| 52 | // Arguments: |
| 53 | // parentNode - is any non-leaf node |
| 54 | // childNode - is an child op of 'parentNode' |
| 55 | // |
| 56 | // Return value: |
| 57 | // true if we are able to make childNode a contained immediate |
| 58 | // |
| 59 | bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode) |
| 60 | { |
| 61 | assert(!parentNode->OperIsLeaf()); |
| 62 | // If childNode is a containable immediate |
| 63 | if (IsContainableImmed(parentNode, childNode)) |
| 64 | { |
| 65 | // then make it contained within the parentNode |
| 66 | MakeSrcContained(parentNode, childNode); |
| 67 | return true; |
| 68 | } |
| 69 | return false; |
| 70 | } |
| 71 | |
| 72 | //------------------------------------------------------------------------ |
| 73 | // IsSafeToContainMem: Checks for conflicts between childNode and parentNode, |
| 74 | // and returns 'true' iff memory operand childNode can be contained in parentNode. |
| 75 | // |
| 76 | // Arguments: |
| 77 | // parentNode - any non-leaf node |
| 78 | // childNode - some node that is an input to `parentNode` |
| 79 | // |
| 80 | // Return value: |
| 81 | // true if it is safe to make childNode a contained memory operand. |
| 82 | // |
| 83 | bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) |
| 84 | { |
| 85 | m_scratchSideEffects.Clear(); |
| 86 | m_scratchSideEffects.AddNode(comp, childNode); |
| 87 | |
| 88 | for (GenTree* node = childNode->gtNext; node != parentNode; node = node->gtNext) |
| 89 | { |
| 90 | if (m_scratchSideEffects.InterferesWith(comp, node, false)) |
| 91 | { |
| 92 | return false; |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | return true; |
| 97 | } |
| 98 | |
| 99 | //------------------------------------------------------------------------ |
| 100 | |
| 101 | // This is the main entry point for Lowering. |
| 102 | GenTree* Lowering::LowerNode(GenTree* node) |
| 103 | { |
| 104 | assert(node != nullptr); |
| 105 | switch (node->gtOper) |
| 106 | { |
| 107 | case GT_IND: |
| 108 | TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true); |
| 109 | ContainCheckIndir(node->AsIndir()); |
| 110 | break; |
| 111 | |
| 112 | case GT_STOREIND: |
| 113 | TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true); |
| 114 | if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(node)) |
| 115 | { |
| 116 | LowerStoreIndir(node->AsIndir()); |
| 117 | } |
| 118 | break; |
| 119 | |
| 120 | case GT_ADD: |
| 121 | { |
| 122 | GenTree* afterTransform = LowerAdd(node); |
| 123 | if (afterTransform != nullptr) |
| 124 | { |
| 125 | return afterTransform; |
| 126 | } |
| 127 | __fallthrough; |
| 128 | } |
| 129 | |
| 130 | #if !defined(_TARGET_64BIT_) |
| 131 | case GT_ADD_LO: |
| 132 | case GT_ADD_HI: |
| 133 | case GT_SUB_LO: |
| 134 | case GT_SUB_HI: |
| 135 | #endif |
| 136 | case GT_SUB: |
| 137 | case GT_AND: |
| 138 | case GT_OR: |
| 139 | case GT_XOR: |
| 140 | ContainCheckBinary(node->AsOp()); |
| 141 | break; |
| 142 | |
| 143 | case GT_MUL: |
| 144 | case GT_MULHI: |
| 145 | #if defined(_TARGET_X86_) |
| 146 | case GT_MUL_LONG: |
| 147 | #endif |
| 148 | ContainCheckMul(node->AsOp()); |
| 149 | break; |
| 150 | |
| 151 | case GT_UDIV: |
| 152 | case GT_UMOD: |
| 153 | if (!LowerUnsignedDivOrMod(node->AsOp())) |
| 154 | { |
| 155 | ContainCheckDivOrMod(node->AsOp()); |
| 156 | } |
| 157 | break; |
| 158 | |
| 159 | case GT_DIV: |
| 160 | case GT_MOD: |
| 161 | return LowerSignedDivOrMod(node); |
| 162 | |
| 163 | case GT_SWITCH: |
| 164 | return LowerSwitch(node); |
| 165 | |
| 166 | case GT_CALL: |
| 167 | LowerCall(node); |
| 168 | break; |
| 169 | |
| 170 | case GT_LT: |
| 171 | case GT_LE: |
| 172 | case GT_GT: |
| 173 | case GT_GE: |
| 174 | case GT_EQ: |
| 175 | case GT_NE: |
| 176 | case GT_TEST_EQ: |
| 177 | case GT_TEST_NE: |
| 178 | case GT_CMP: |
| 179 | return LowerCompare(node); |
| 180 | |
| 181 | case GT_JTRUE: |
| 182 | return LowerJTrue(node->AsOp()); |
| 183 | |
| 184 | case GT_JMP: |
| 185 | LowerJmpMethod(node); |
| 186 | break; |
| 187 | |
| 188 | case GT_RETURN: |
| 189 | LowerRet(node); |
| 190 | break; |
| 191 | |
| 192 | case GT_RETURNTRAP: |
| 193 | ContainCheckReturnTrap(node->AsOp()); |
| 194 | break; |
| 195 | |
| 196 | case GT_CAST: |
| 197 | LowerCast(node); |
| 198 | break; |
| 199 | |
| 200 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
| 201 | case GT_ARR_BOUNDS_CHECK: |
| 202 | #ifdef FEATURE_SIMD |
| 203 | case GT_SIMD_CHK: |
| 204 | #endif // FEATURE_SIMD |
| 205 | #ifdef FEATURE_HW_INTRINSICS |
| 206 | case GT_HW_INTRINSIC_CHK: |
| 207 | #endif // FEATURE_HW_INTRINSICS |
| 208 | ContainCheckBoundsChk(node->AsBoundsChk()); |
| 209 | break; |
| 210 | #endif // _TARGET_XARCH_ |
| 211 | case GT_ARR_ELEM: |
| 212 | return LowerArrElem(node); |
| 213 | |
| 214 | case GT_ARR_OFFSET: |
| 215 | ContainCheckArrOffset(node->AsArrOffs()); |
| 216 | break; |
| 217 | |
| 218 | case GT_ROL: |
| 219 | case GT_ROR: |
| 220 | LowerRotate(node); |
| 221 | break; |
| 222 | |
| 223 | #ifndef _TARGET_64BIT_ |
| 224 | case GT_LSH_HI: |
| 225 | case GT_RSH_LO: |
| 226 | ContainCheckShiftRotate(node->AsOp()); |
| 227 | break; |
| 228 | #endif // !_TARGET_64BIT_ |
| 229 | |
| 230 | case GT_LSH: |
| 231 | case GT_RSH: |
| 232 | case GT_RSZ: |
| 233 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
| 234 | LowerShift(node->AsOp()); |
| 235 | #else |
| 236 | ContainCheckShiftRotate(node->AsOp()); |
| 237 | #endif |
| 238 | break; |
| 239 | |
| 240 | case GT_STORE_BLK: |
| 241 | case GT_STORE_OBJ: |
| 242 | case GT_STORE_DYN_BLK: |
| 243 | { |
| 244 | GenTreeBlk* blkNode = node->AsBlk(); |
| 245 | TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false); |
| 246 | LowerBlockStore(blkNode); |
| 247 | } |
| 248 | break; |
| 249 | |
| 250 | case GT_LCLHEAP: |
| 251 | ContainCheckLclHeap(node->AsOp()); |
| 252 | break; |
| 253 | |
| 254 | #ifdef _TARGET_XARCH_ |
| 255 | case GT_INTRINSIC: |
| 256 | ContainCheckIntrinsic(node->AsOp()); |
| 257 | break; |
| 258 | #endif // _TARGET_XARCH_ |
| 259 | |
| 260 | #ifdef FEATURE_SIMD |
| 261 | case GT_SIMD: |
| 262 | LowerSIMD(node->AsSIMD()); |
| 263 | break; |
| 264 | #endif // FEATURE_SIMD |
| 265 | |
| 266 | #ifdef FEATURE_HW_INTRINSICS |
| 267 | case GT_HWIntrinsic: |
| 268 | LowerHWIntrinsic(node->AsHWIntrinsic()); |
| 269 | break; |
| 270 | #endif // FEATURE_HW_INTRINSICS |
| 271 | |
| 272 | case GT_LCL_FLD: |
| 273 | { |
| 274 | // We should only encounter this for lclVars that are lvDoNotEnregister. |
| 275 | verifyLclFldDoNotEnregister(node->AsLclVarCommon()->gtLclNum); |
| 276 | break; |
| 277 | } |
| 278 | |
| 279 | case GT_LCL_VAR: |
| 280 | WidenSIMD12IfNecessary(node->AsLclVarCommon()); |
| 281 | break; |
| 282 | |
| 283 | case GT_STORE_LCL_VAR: |
| 284 | WidenSIMD12IfNecessary(node->AsLclVarCommon()); |
| 285 | __fallthrough; |
| 286 | |
| 287 | case GT_STORE_LCL_FLD: |
| 288 | { |
| 289 | #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD) |
| 290 | GenTreeLclVarCommon* const store = node->AsLclVarCommon(); |
| 291 | if ((store->TypeGet() == TYP_SIMD8) != (store->gtOp1->TypeGet() == TYP_SIMD8)) |
| 292 | { |
| 293 | GenTreeUnOp* bitcast = |
| 294 | new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, store->TypeGet(), store->gtOp1, nullptr); |
| 295 | store->gtOp1 = bitcast; |
| 296 | BlockRange().InsertBefore(store, bitcast); |
| 297 | } |
| 298 | #endif // _TARGET_AMD64_ |
| 299 | // TODO-1stClassStructs: Once we remove the requirement that all struct stores |
| 300 | // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local |
| 301 | // store under a block store if codegen will require it. |
| 302 | if ((node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI)) |
| 303 | { |
| 304 | #if FEATURE_MULTIREG_RET |
| 305 | GenTree* src = node->gtGetOp1(); |
| 306 | assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal()); |
| 307 | #else // !FEATURE_MULTIREG_RET |
| 308 | assert(!"Unexpected struct local store in Lowering" ); |
| 309 | #endif // !FEATURE_MULTIREG_RET |
| 310 | } |
| 311 | LowerStoreLoc(node->AsLclVarCommon()); |
| 312 | break; |
| 313 | } |
| 314 | |
| 315 | #if defined(_TARGET_ARM64_) |
| 316 | case GT_CMPXCHG: |
| 317 | CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand); |
| 318 | break; |
| 319 | |
| 320 | case GT_XADD: |
| 321 | CheckImmedAndMakeContained(node, node->gtOp.gtOp2); |
| 322 | break; |
| 323 | #elif defined(_TARGET_XARCH_) |
| 324 | case GT_XADD: |
| 325 | if (node->IsUnusedValue()) |
| 326 | { |
| 327 | node->ClearUnusedValue(); |
| 328 | // Make sure the types are identical, since the node type is changed to VOID |
| 329 | // CodeGen relies on op2's type to determine the instruction size. |
| 330 | // Note that the node type cannot be a small int but the data operand can. |
| 331 | assert(genActualType(node->gtGetOp2()->TypeGet()) == node->TypeGet()); |
| 332 | node->SetOper(GT_LOCKADD); |
| 333 | node->gtType = TYP_VOID; |
| 334 | CheckImmedAndMakeContained(node, node->gtGetOp2()); |
| 335 | } |
| 336 | break; |
| 337 | #endif |
| 338 | |
| 339 | #ifndef _TARGET_ARMARCH_ |
| 340 | // TODO-ARMARCH-CQ: We should contain this as long as the offset fits. |
| 341 | case GT_OBJ: |
| 342 | if (node->AsObj()->Addr()->OperIsLocalAddr()) |
| 343 | { |
| 344 | node->AsObj()->Addr()->SetContained(); |
| 345 | } |
| 346 | break; |
| 347 | #endif // !_TARGET_ARMARCH_ |
| 348 | |
| 349 | default: |
| 350 | break; |
| 351 | } |
| 352 | |
| 353 | return node->gtNext; |
| 354 | } |
| 355 | |
| 356 | /** -- Switch Lowering -- |
| 357 | * The main idea of switch lowering is to keep transparency of the register requirements of this node |
| 358 | * downstream in LSRA. Given that the switch instruction is inherently a control statement which in the JIT |
| 359 | * is represented as a simple tree node, at the time we actually generate code for it we end up |
| 360 | * generating instructions that actually modify the flow of execution that imposes complicated |
| 361 | * register requirement and lifetimes. |
| 362 | * |
| 363 | * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually |
| 364 | * means and more importantly, which and when do we need a register for each instruction we want to issue |
| 365 | * to correctly allocate them downstream. |
| 366 | * |
| 367 | * For this purpose, this procedure performs switch lowering in two different ways: |
| 368 | * |
| 369 | * a) Represent the switch statement as a zero-index jump table construct. This means that for every destination |
| 370 | * of the switch, we will store this destination in an array of addresses and the code generator will issue |
| 371 | * a data section where this array will live and will emit code that based on the switch index, will indirect and |
| 372 | * jump to the destination specified in the jump table. |
| 373 | * |
| 374 | * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch |
| 375 | * node for jump table based switches. |
| 376 | * The overall structure of a GT_SWITCH_TABLE is: |
| 377 | * |
| 378 | * GT_SWITCH_TABLE |
| 379 | * |_________ localVar (a temporary local that holds the switch index) |
| 380 | * |_________ jumpTable (this is a special node that holds the address of the jump table array) |
| 381 | * |
| 382 | * Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following: |
| 383 | * |
| 384 | * Input: GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH) |
| 385 | * |_____ expr (an arbitrarily complex GT_NODE that represents the switch index) |
| 386 | * |
| 387 | * This gets transformed into the following statements inside a BBJ_COND basic block (the target would be |
| 388 | * the default case of the switch in case the conditional is evaluated to true). |
| 389 | * |
| 390 | * ----- original block, transformed |
| 391 | * GT_STORE_LCL_VAR tempLocal (a new temporary local variable used to store the switch index) |
| 392 | * |_____ expr (the index expression) |
| 393 | * |
| 394 | * GT_JTRUE |
| 395 | * |_____ GT_COND |
| 396 | * |_____ GT_GE |
| 397 | * |___ Int_Constant (This constant is the index of the default case |
| 398 | * that happens to be the highest index in the jump table). |
| 399 | * |___ tempLocal (The local variable were we stored the index expression). |
| 400 | * |
| 401 | * ----- new basic block |
| 402 | * GT_SWITCH_TABLE |
| 403 | * |_____ tempLocal |
| 404 | * |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly |
| 405 | * and LinearCodeGen will be responsible to generate downstream). |
| 406 | * |
| 407 | * This way there are no implicit temporaries. |
| 408 | * |
| 409 | * b) For small-sized switches, we will actually morph them into a series of conditionals of the form |
| 410 | * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case } |
| 411 | * (For the default case conditional, we'll be constructing the exact same code as the jump table case one). |
| 412 | * else if (case == firstCase){ goto jumpTable[1]; } |
| 413 | * else if (case == secondCase) { goto jumptable[2]; } and so on. |
| 414 | * |
| 415 | * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer |
| 416 | * require internal temporaries to maintain the index we're evaluating plus we're using existing code from |
| 417 | * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and |
| 418 | * InstrGroups downstream. |
| 419 | */ |
| 420 | |
| 421 | GenTree* Lowering::LowerSwitch(GenTree* node) |
| 422 | { |
| 423 | unsigned jumpCnt; |
| 424 | unsigned targetCnt; |
| 425 | BasicBlock** jumpTab; |
| 426 | |
| 427 | assert(node->gtOper == GT_SWITCH); |
| 428 | |
| 429 | // The first step is to build the default case conditional construct that is |
| 430 | // shared between both kinds of expansion of the switch node. |
| 431 | |
| 432 | // To avoid confusion, we'll alias m_block to originalSwitchBB |
| 433 | // that represents the node we're morphing. |
| 434 | BasicBlock* originalSwitchBB = m_block; |
| 435 | LIR::Range& switchBBRange = LIR::AsRange(originalSwitchBB); |
| 436 | |
| 437 | // jumpCnt is the number of elements in the jump table array. |
| 438 | // jumpTab is the actual pointer to the jump table array. |
| 439 | // targetCnt is the number of unique targets in the jump table array. |
| 440 | jumpCnt = originalSwitchBB->bbJumpSwt->bbsCount; |
| 441 | jumpTab = originalSwitchBB->bbJumpSwt->bbsDstTab; |
| 442 | targetCnt = originalSwitchBB->NumSucc(comp); |
| 443 | |
| 444 | // GT_SWITCH must be a top-level node with no use. |
| 445 | #ifdef DEBUG |
| 446 | { |
| 447 | LIR::Use use; |
| 448 | assert(!switchBBRange.TryGetUse(node, &use)); |
| 449 | } |
| 450 | #endif |
| 451 | |
| 452 | JITDUMP("Lowering switch " FMT_BB ", %d cases\n" , originalSwitchBB->bbNum, jumpCnt); |
| 453 | |
| 454 | // Handle a degenerate case: if the switch has only a default case, just convert it |
| 455 | // to an unconditional branch. This should only happen in minopts or with debuggable |
| 456 | // code. |
| 457 | if (targetCnt == 1) |
| 458 | { |
| 459 | JITDUMP("Lowering switch " FMT_BB ": single target; converting to BBJ_ALWAYS\n" , originalSwitchBB->bbNum); |
| 460 | noway_assert(comp->opts.OptimizationDisabled()); |
| 461 | if (originalSwitchBB->bbNext == jumpTab[0]) |
| 462 | { |
| 463 | originalSwitchBB->bbJumpKind = BBJ_NONE; |
| 464 | originalSwitchBB->bbJumpDest = nullptr; |
| 465 | } |
| 466 | else |
| 467 | { |
| 468 | originalSwitchBB->bbJumpKind = BBJ_ALWAYS; |
| 469 | originalSwitchBB->bbJumpDest = jumpTab[0]; |
| 470 | } |
| 471 | // Remove extra predecessor links if there was more than one case. |
| 472 | for (unsigned i = 1; i < jumpCnt; ++i) |
| 473 | { |
| 474 | (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB); |
| 475 | } |
| 476 | |
| 477 | // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign |
| 478 | // the result of the child subtree to a temp. |
| 479 | GenTree* rhs = node->gtOp.gtOp1; |
| 480 | |
| 481 | unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable" )); |
| 482 | comp->lvaTable[lclNum].lvType = rhs->TypeGet(); |
| 483 | |
| 484 | GenTreeLclVar* store = |
| 485 | new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET); |
| 486 | store->gtOp1 = rhs; |
| 487 | store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK); |
| 488 | store->gtFlags |= GTF_VAR_DEF; |
| 489 | |
| 490 | switchBBRange.InsertAfter(node, store); |
| 491 | switchBBRange.Remove(node); |
| 492 | |
| 493 | return store; |
| 494 | } |
| 495 | |
| 496 | noway_assert(jumpCnt >= 2); |
| 497 | |
| 498 | // Spill the argument to the switch node into a local so that it can be used later. |
| 499 | unsigned blockWeight = originalSwitchBB->getBBWeight(comp); |
| 500 | |
| 501 | LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node); |
| 502 | ReplaceWithLclVar(use); |
| 503 | |
| 504 | // GT_SWITCH(indexExpression) is now two statements: |
| 505 | // 1. a statement containing 'asg' (for temp = indexExpression) |
| 506 | // 2. and a statement with GT_SWITCH(temp) |
| 507 | |
| 508 | assert(node->gtOper == GT_SWITCH); |
| 509 | GenTree* temp = node->gtOp.gtOp1; |
| 510 | assert(temp->gtOper == GT_LCL_VAR); |
| 511 | unsigned tempLclNum = temp->gtLclVarCommon.gtLclNum; |
| 512 | LclVarDsc* tempVarDsc = comp->lvaTable + tempLclNum; |
| 513 | var_types tempLclType = temp->TypeGet(); |
| 514 | |
| 515 | BasicBlock* defaultBB = jumpTab[jumpCnt - 1]; |
| 516 | BasicBlock* followingBB = originalSwitchBB->bbNext; |
| 517 | |
| 518 | /* Is the number of cases right for a test and jump switch? */ |
| 519 | const bool fFirstCaseFollows = (followingBB == jumpTab[0]); |
| 520 | const bool fDefaultFollows = (followingBB == defaultBB); |
| 521 | |
| 522 | unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc |
| 523 | |
| 524 | // This means really just a single cmp/jcc (aka a simple if/else) |
| 525 | if (fFirstCaseFollows || fDefaultFollows) |
| 526 | { |
| 527 | minSwitchTabJumpCnt++; |
| 528 | } |
| 529 | |
| 530 | #if defined(_TARGET_ARM_) |
| 531 | // On ARM for small switch tables we will |
| 532 | // generate a sequence of compare and branch instructions |
| 533 | // because the code to load the base of the switch |
| 534 | // table is huge and hideous due to the relocation... :( |
| 535 | minSwitchTabJumpCnt += 2; |
| 536 | #endif // _TARGET_ARM_ |
| 537 | |
| 538 | // Once we have the temporary variable, we construct the conditional branch for |
| 539 | // the default case. As stated above, this conditional is being shared between |
| 540 | // both GT_SWITCH lowering code paths. |
| 541 | // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; } |
| 542 | GenTree* gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType), |
| 543 | comp->gtNewIconNode(jumpCnt - 2, genActualType(tempLclType))); |
| 544 | |
| 545 | // Make sure we perform an unsigned comparison, just in case the switch index in 'temp' |
| 546 | // is now less than zero 0 (that would also hit the default case). |
| 547 | gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED; |
| 548 | |
| 549 | GenTree* gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond); |
| 550 | gtDefaultCaseJump->gtFlags = node->gtFlags; |
| 551 | |
| 552 | LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump); |
| 553 | switchBBRange.InsertAtEnd(std::move(condRange)); |
| 554 | |
| 555 | BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode()); |
| 556 | |
| 557 | // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor. |
| 558 | // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock |
| 559 | // representing the fall-through flow from originalSwitchBB. |
| 560 | assert(originalSwitchBB->bbJumpKind == BBJ_NONE); |
| 561 | assert(originalSwitchBB->bbNext == afterDefaultCondBlock); |
| 562 | assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH); |
| 563 | assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault); |
| 564 | assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet. |
| 565 | |
| 566 | // The GT_SWITCH code is still in originalSwitchBB (it will be removed later). |
| 567 | |
| 568 | // Turn originalSwitchBB into a BBJ_COND. |
| 569 | originalSwitchBB->bbJumpKind = BBJ_COND; |
| 570 | originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1]; |
| 571 | |
| 572 | // Fix the pred for the default case: the default block target still has originalSwitchBB |
| 573 | // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point |
| 574 | // to afterDefaultCondBlock. |
| 575 | flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock); |
| 576 | comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge); |
| 577 | |
| 578 | bool useJumpSequence = jumpCnt < minSwitchTabJumpCnt; |
| 579 | |
| 580 | #if defined(_TARGET_UNIX_) && defined(_TARGET_ARM_) |
| 581 | // Force using an inlined jumping instead switch table generation. |
| 582 | // Switch jump table is generated with incorrect values in CoreRT case, |
| 583 | // so any large switch will crash after loading to PC any such value. |
| 584 | // I think this is due to the fact that we use absolute addressing |
| 585 | // instead of relative. But in CoreRT is used as a rule relative |
| 586 | // addressing when we generate an executable. |
| 587 | // See also https://github.com/dotnet/coreclr/issues/13194 |
| 588 | // Also https://github.com/dotnet/coreclr/pull/13197 |
| 589 | useJumpSequence = useJumpSequence || comp->IsTargetAbi(CORINFO_CORERT_ABI); |
| 590 | #endif // defined(_TARGET_UNIX_) && defined(_TARGET_ARM_) |
| 591 | |
| 592 | // If we originally had 2 unique successors, check to see whether there is a unique |
| 593 | // non-default case, in which case we can eliminate the switch altogether. |
| 594 | // Note that the single unique successor case is handled above. |
| 595 | BasicBlock* uniqueSucc = nullptr; |
| 596 | if (targetCnt == 2) |
| 597 | { |
| 598 | uniqueSucc = jumpTab[0]; |
| 599 | noway_assert(jumpCnt >= 2); |
| 600 | for (unsigned i = 1; i < jumpCnt - 1; i++) |
| 601 | { |
| 602 | if (jumpTab[i] != uniqueSucc) |
| 603 | { |
| 604 | uniqueSucc = nullptr; |
| 605 | break; |
| 606 | } |
| 607 | } |
| 608 | } |
| 609 | if (uniqueSucc != nullptr) |
| 610 | { |
| 611 | // If the unique successor immediately follows this block, we have nothing to do - |
| 612 | // it will simply fall-through after we remove the switch, below. |
| 613 | // Otherwise, make this a BBJ_ALWAYS. |
| 614 | // Now, fixup the predecessor links to uniqueSucc. In the original jumpTab: |
| 615 | // jumpTab[i-1] was the default target, which we handled above, |
| 616 | // jumpTab[0] is the first target, and we'll leave that predecessor link. |
| 617 | // Remove any additional predecessor links to uniqueSucc. |
| 618 | for (unsigned i = 1; i < jumpCnt - 1; ++i) |
| 619 | { |
| 620 | assert(jumpTab[i] == uniqueSucc); |
| 621 | (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock); |
| 622 | } |
| 623 | if (afterDefaultCondBlock->bbNext == uniqueSucc) |
| 624 | { |
| 625 | afterDefaultCondBlock->bbJumpKind = BBJ_NONE; |
| 626 | afterDefaultCondBlock->bbJumpDest = nullptr; |
| 627 | } |
| 628 | else |
| 629 | { |
| 630 | afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS; |
| 631 | afterDefaultCondBlock->bbJumpDest = uniqueSucc; |
| 632 | } |
| 633 | } |
| 634 | // If the number of possible destinations is small enough, we proceed to expand the switch |
| 635 | // into a series of conditional branches, otherwise we follow the jump table based switch |
| 636 | // transformation. |
| 637 | else if (useJumpSequence || comp->compStressCompile(Compiler::STRESS_SWITCH_CMP_BR_EXPANSION, 50)) |
| 638 | { |
| 639 | // Lower the switch into a series of compare and branch IR trees. |
| 640 | // |
| 641 | // In this case we will morph the node in the following way: |
| 642 | // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.) |
| 643 | // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain |
| 644 | // a statement that is responsible for performing a comparison of the table index and conditional |
| 645 | // branch if equal. |
| 646 | |
| 647 | JITDUMP("Lowering switch " FMT_BB ": using compare/branch expansion\n" , originalSwitchBB->bbNum); |
| 648 | |
| 649 | // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new |
| 650 | // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through), |
| 651 | // we'll delete it. |
| 652 | bool fUsedAfterDefaultCondBlock = false; |
| 653 | BasicBlock* currentBlock = afterDefaultCondBlock; |
| 654 | LIR::Range* currentBBRange = &LIR::AsRange(currentBlock); |
| 655 | |
| 656 | // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through. |
| 657 | // If no case target follows, the last one doesn't need to be a compare/branch: it can be an |
| 658 | // unconditional branch. |
| 659 | bool fAnyTargetFollows = false; |
| 660 | for (unsigned i = 0; i < jumpCnt - 1; ++i) |
| 661 | { |
| 662 | assert(currentBlock != nullptr); |
| 663 | |
| 664 | // Remove the switch from the predecessor list of this case target's block. |
| 665 | // We'll add the proper new predecessor edge later. |
| 666 | flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock); |
| 667 | |
| 668 | if (jumpTab[i] == followingBB) |
| 669 | { |
| 670 | // This case label follows the switch; let it fall through. |
| 671 | fAnyTargetFollows = true; |
| 672 | continue; |
| 673 | } |
| 674 | |
| 675 | // We need a block to put in the new compare and/or branch. |
| 676 | // If we haven't used the afterDefaultCondBlock yet, then use that. |
| 677 | if (fUsedAfterDefaultCondBlock) |
| 678 | { |
| 679 | BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true); |
| 680 | comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor. |
| 681 | currentBlock = newBlock; |
| 682 | currentBBRange = &LIR::AsRange(currentBlock); |
| 683 | } |
| 684 | else |
| 685 | { |
| 686 | assert(currentBlock == afterDefaultCondBlock); |
| 687 | fUsedAfterDefaultCondBlock = true; |
| 688 | } |
| 689 | |
| 690 | // We're going to have a branch, either a conditional or unconditional, |
| 691 | // to the target. Set the target. |
| 692 | currentBlock->bbJumpDest = jumpTab[i]; |
| 693 | |
| 694 | // Wire up the predecessor list for the "branch" case. |
| 695 | comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge); |
| 696 | |
| 697 | if (!fAnyTargetFollows && (i == jumpCnt - 2)) |
| 698 | { |
| 699 | // We're processing the last one, and there is no fall through from any case |
| 700 | // to the following block, so we can use an unconditional branch to the final |
| 701 | // case: there is no need to compare against the case index, since it's |
| 702 | // guaranteed to be taken (since the default case was handled first, above). |
| 703 | |
| 704 | currentBlock->bbJumpKind = BBJ_ALWAYS; |
| 705 | } |
| 706 | else |
| 707 | { |
| 708 | // Otherwise, it's a conditional branch. Set the branch kind, then add the |
| 709 | // condition statement. |
| 710 | currentBlock->bbJumpKind = BBJ_COND; |
| 711 | |
| 712 | // Now, build the conditional statement for the current case that is |
| 713 | // being evaluated: |
| 714 | // GT_JTRUE |
| 715 | // |__ GT_COND |
| 716 | // |____GT_EQ |
| 717 | // |____ (switchIndex) (The temp variable) |
| 718 | // |____ (ICon) (The actual case constant) |
| 719 | GenTree* gtCaseCond = comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType), |
| 720 | comp->gtNewIconNode(i, tempLclType)); |
| 721 | GenTree* gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond); |
| 722 | LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch); |
| 723 | currentBBRange->InsertAtEnd(std::move(caseRange)); |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | if (fAnyTargetFollows) |
| 728 | { |
| 729 | // There is a fall-through to the following block. In the loop |
| 730 | // above, we deleted all the predecessor edges from the switch. |
| 731 | // In this case, we need to add one back. |
| 732 | comp->fgAddRefPred(currentBlock->bbNext, currentBlock); |
| 733 | } |
| 734 | |
| 735 | if (!fUsedAfterDefaultCondBlock) |
| 736 | { |
| 737 | // All the cases were fall-through! We don't need this block. |
| 738 | // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag |
| 739 | // so fgRemoveBlock() doesn't complain. |
| 740 | JITDUMP("Lowering switch " FMT_BB ": all switch cases were fall-through\n" , originalSwitchBB->bbNum); |
| 741 | assert(currentBlock == afterDefaultCondBlock); |
| 742 | assert(currentBlock->bbJumpKind == BBJ_SWITCH); |
| 743 | currentBlock->bbJumpKind = BBJ_NONE; |
| 744 | currentBlock->bbFlags &= ~BBF_DONT_REMOVE; |
| 745 | comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block. |
| 746 | } |
| 747 | } |
| 748 | else |
| 749 | { |
| 750 | // At this point the default case has already been handled and we need to generate a jump |
| 751 | // table based switch or a bit test based switch at the end of afterDefaultCondBlock. Both |
| 752 | // switch variants need the switch value so create the necessary LclVar node here. |
| 753 | GenTree* switchValue = comp->gtNewLclvNode(tempLclNum, tempLclType); |
| 754 | LIR::Range& switchBlockRange = LIR::AsRange(afterDefaultCondBlock); |
| 755 | switchBlockRange.InsertAtEnd(switchValue); |
| 756 | |
| 757 | // Try generating a bit test based switch first, |
| 758 | // if that's not possible a jump table based switch will be generated. |
| 759 | if (!TryLowerSwitchToBitTest(jumpTab, jumpCnt, targetCnt, afterDefaultCondBlock, switchValue)) |
| 760 | { |
| 761 | JITDUMP("Lowering switch " FMT_BB ": using jump table expansion\n" , originalSwitchBB->bbNum); |
| 762 | |
| 763 | #ifdef _TARGET_64BIT_ |
| 764 | if (tempLclType != TYP_I_IMPL) |
| 765 | { |
| 766 | // SWITCH_TABLE expects the switch value (the index into the jump table) to be TYP_I_IMPL. |
| 767 | // Note that the switch value is unsigned so the cast should be unsigned as well. |
| 768 | switchValue = comp->gtNewCastNode(TYP_I_IMPL, switchValue, true, TYP_U_IMPL); |
| 769 | switchBlockRange.InsertAtEnd(switchValue); |
| 770 | } |
| 771 | #endif |
| 772 | |
| 773 | GenTree* switchTable = comp->gtNewJmpTableNode(); |
| 774 | GenTree* switchJump = comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, switchValue, switchTable); |
| 775 | switchBlockRange.InsertAfter(switchValue, switchTable, switchJump); |
| 776 | |
| 777 | // this block no longer branches to the default block |
| 778 | afterDefaultCondBlock->bbJumpSwt->removeDefault(); |
| 779 | } |
| 780 | |
| 781 | comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock); |
| 782 | } |
| 783 | |
| 784 | GenTree* next = node->gtNext; |
| 785 | |
| 786 | // Get rid of the GT_SWITCH(temp). |
| 787 | switchBBRange.Remove(node->gtOp.gtOp1); |
| 788 | switchBBRange.Remove(node); |
| 789 | |
| 790 | return next; |
| 791 | } |
| 792 | |
| 793 | //------------------------------------------------------------------------ |
| 794 | // TryLowerSwitchToBitTest: Attempts to transform a jump table switch into a bit test. |
| 795 | // |
| 796 | // Arguments: |
| 797 | // jumpTable - The jump table |
| 798 | // jumpCount - The number of blocks in the jump table |
| 799 | // targetCount - The number of distinct blocks in the jump table |
| 800 | // bbSwitch - The switch block |
| 801 | // switchValue - A LclVar node that provides the switch value |
| 802 | // |
| 803 | // Return value: |
| 804 | // true if the switch has been lowered to a bit test |
| 805 | // |
| 806 | // Notes: |
| 807 | // If the jump table contains less than 32 (64 on 64 bit targets) entries and there |
| 808 | // are at most 2 distinct jump targets then the jump table can be converted to a word |
| 809 | // of bits where a 0 bit corresponds to one jump target and a 1 bit corresponds to the |
| 810 | // other jump target. Instead of the indirect jump a BT-JCC sequence is used to jump |
| 811 | // to the appropriate target: |
| 812 | // mov eax, 245 ; jump table converted to a "bit table" |
| 813 | // bt eax, ebx ; ebx is supposed to contain the switch value |
| 814 | // jc target1 |
| 815 | // target0: |
| 816 | // ... |
| 817 | // target1: |
| 818 | // Such code is both shorter and faster (in part due to the removal of a memory load) |
| 819 | // than the traditional jump table base code. And of course, it also avoids the need |
| 820 | // to emit the jump table itself that can reach up to 256 bytes (for 64 entries). |
| 821 | // |
| 822 | bool Lowering::TryLowerSwitchToBitTest( |
| 823 | BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue) |
| 824 | { |
| 825 | #ifndef _TARGET_XARCH_ |
| 826 | // Other architectures may use this if they substitute GT_BT with equivalent code. |
| 827 | return false; |
| 828 | #else |
| 829 | assert(jumpCount >= 2); |
| 830 | assert(targetCount >= 2); |
| 831 | assert(bbSwitch->bbJumpKind == BBJ_SWITCH); |
| 832 | assert(switchValue->OperIs(GT_LCL_VAR)); |
| 833 | |
| 834 | // |
| 835 | // Quick check to see if it's worth going through the jump table. The bit test switch supports |
| 836 | // up to 2 targets but targetCount also includes the default block so we need to allow 3 targets. |
| 837 | // We'll ensure that there are only 2 targets when building the bit table. |
| 838 | // |
| 839 | |
| 840 | if (targetCount > 3) |
| 841 | { |
| 842 | return false; |
| 843 | } |
| 844 | |
| 845 | // |
| 846 | // The number of bits in the bit table is the same as the number of jump table entries. But the |
| 847 | // jump table also includes the default target (at the end) so we need to ignore it. The default |
| 848 | // has already been handled by a JTRUE(GT(switchValue, jumpCount - 2)) that LowerSwitch generates. |
| 849 | // |
| 850 | |
| 851 | const unsigned bitCount = jumpCount - 1; |
| 852 | |
| 853 | if (bitCount > (genTypeSize(TYP_I_IMPL) * 8)) |
| 854 | { |
| 855 | return false; |
| 856 | } |
| 857 | |
| 858 | // |
| 859 | // Build a bit table where a bit set to 0 corresponds to bbCase0 and a bit set to 1 corresponds to |
| 860 | // bbCase1. Simply use the first block in the jump table as bbCase1, later we can invert the bit |
| 861 | // table and/or swap the blocks if it's beneficial. |
| 862 | // |
| 863 | |
| 864 | BasicBlock* bbCase0 = nullptr; |
| 865 | BasicBlock* bbCase1 = jumpTable[0]; |
| 866 | size_t bitTable = 1; |
| 867 | |
| 868 | for (unsigned bitIndex = 1; bitIndex < bitCount; bitIndex++) |
| 869 | { |
| 870 | if (jumpTable[bitIndex] == bbCase1) |
| 871 | { |
| 872 | bitTable |= (size_t(1) << bitIndex); |
| 873 | } |
| 874 | else if (bbCase0 == nullptr) |
| 875 | { |
| 876 | bbCase0 = jumpTable[bitIndex]; |
| 877 | } |
| 878 | else if (jumpTable[bitIndex] != bbCase0) |
| 879 | { |
| 880 | // If it's neither bbCase0 nor bbCase1 then it means we have 3 targets. There can't be more |
| 881 | // than 3 because of the check at the start of the function. |
| 882 | assert(targetCount == 3); |
| 883 | return false; |
| 884 | } |
| 885 | } |
| 886 | |
| 887 | // |
| 888 | // One of the case blocks has to follow the switch block. This requirement could be avoided |
| 889 | // by adding a BBJ_ALWAYS block after the switch block but doing that sometimes negatively |
| 890 | // impacts register allocation. |
| 891 | // |
| 892 | |
| 893 | if ((bbSwitch->bbNext != bbCase0) && (bbSwitch->bbNext != bbCase1)) |
| 894 | { |
| 895 | return false; |
| 896 | } |
| 897 | |
| 898 | #ifdef _TARGET_64BIT_ |
| 899 | // |
| 900 | // See if we can avoid a 8 byte immediate on 64 bit targets. If all upper 32 bits are 1 |
| 901 | // then inverting the bit table will make them 0 so that the table now fits in 32 bits. |
| 902 | // Note that this does not change the number of bits in the bit table, it just takes |
| 903 | // advantage of the fact that loading a 32 bit immediate into a 64 bit register zero |
| 904 | // extends the immediate value to 64 bit. |
| 905 | // |
| 906 | |
| 907 | if (~bitTable <= UINT32_MAX) |
| 908 | { |
| 909 | bitTable = ~bitTable; |
| 910 | std::swap(bbCase0, bbCase1); |
| 911 | } |
| 912 | #endif |
| 913 | |
| 914 | // |
| 915 | // Rewire the blocks as needed and figure out the condition to use for JCC. |
| 916 | // |
| 917 | |
| 918 | genTreeOps bbSwitchCondition = GT_NONE; |
| 919 | bbSwitch->bbJumpKind = BBJ_COND; |
| 920 | |
| 921 | comp->fgRemoveAllRefPreds(bbCase1, bbSwitch); |
| 922 | comp->fgRemoveAllRefPreds(bbCase0, bbSwitch); |
| 923 | |
| 924 | if (bbSwitch->bbNext == bbCase0) |
| 925 | { |
| 926 | // GT_LT + GTF_UNSIGNED generates JC so we jump to bbCase1 when the bit is set |
| 927 | bbSwitchCondition = GT_LT; |
| 928 | bbSwitch->bbJumpDest = bbCase1; |
| 929 | |
| 930 | comp->fgAddRefPred(bbCase0, bbSwitch); |
| 931 | comp->fgAddRefPred(bbCase1, bbSwitch); |
| 932 | } |
| 933 | else |
| 934 | { |
| 935 | assert(bbSwitch->bbNext == bbCase1); |
| 936 | |
| 937 | // GT_GE + GTF_UNSIGNED generates JNC so we jump to bbCase0 when the bit is not set |
| 938 | bbSwitchCondition = GT_GE; |
| 939 | bbSwitch->bbJumpDest = bbCase0; |
| 940 | |
| 941 | comp->fgAddRefPred(bbCase0, bbSwitch); |
| 942 | comp->fgAddRefPred(bbCase1, bbSwitch); |
| 943 | } |
| 944 | |
| 945 | // |
| 946 | // Append BT(bitTable, switchValue) and JCC(condition) to the switch block. |
| 947 | // |
| 948 | |
| 949 | var_types bitTableType = (bitCount <= (genTypeSize(TYP_INT) * 8)) ? TYP_INT : TYP_LONG; |
| 950 | GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType); |
| 951 | GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue); |
| 952 | bitTest->gtFlags |= GTF_SET_FLAGS; |
| 953 | GenTreeCC* jcc = new (comp, GT_JCC) GenTreeCC(GT_JCC, bbSwitchCondition); |
| 954 | jcc->gtFlags |= GTF_UNSIGNED | GTF_USE_FLAGS; |
| 955 | |
| 956 | LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc); |
| 957 | |
| 958 | return true; |
| 959 | #endif // _TARGET_XARCH_ |
| 960 | } |
| 961 | |
| 962 | // NOTE: this method deliberately does not update the call arg table. It must only |
| 963 | // be used by NewPutArg and LowerArg; these functions are responsible for updating |
| 964 | // the call arg table as necessary. |
| 965 | void Lowering::ReplaceArgWithPutArgOrBitcast(GenTree** argSlot, GenTree* putArgOrBitcast) |
| 966 | { |
| 967 | assert(argSlot != nullptr); |
| 968 | assert(*argSlot != nullptr); |
| 969 | assert(putArgOrBitcast->OperIsPutArg() || putArgOrBitcast->OperIs(GT_BITCAST)); |
| 970 | |
| 971 | GenTree* arg = *argSlot; |
| 972 | |
| 973 | // Replace the argument with the putarg/copy |
| 974 | *argSlot = putArgOrBitcast; |
| 975 | putArgOrBitcast->gtOp.gtOp1 = arg; |
| 976 | |
| 977 | // Insert the putarg/copy into the block |
| 978 | BlockRange().InsertAfter(arg, putArgOrBitcast); |
| 979 | } |
| 980 | |
| 981 | //------------------------------------------------------------------------ |
| 982 | // NewPutArg: rewrites the tree to put an arg in a register or on the stack. |
| 983 | // |
| 984 | // Arguments: |
| 985 | // call - the call whose arg is being rewritten. |
| 986 | // arg - the arg being rewritten. |
| 987 | // info - the fgArgTabEntry information for the argument. |
| 988 | // type - the type of the argument. |
| 989 | // |
| 990 | // Return Value: |
| 991 | // The new tree that was created to put the arg in the right place |
| 992 | // or the incoming arg if the arg tree was not rewritten. |
| 993 | // |
| 994 | // Assumptions: |
| 995 | // call, arg, and info must be non-null. |
| 996 | // |
| 997 | // Notes: |
| 998 | // For System V systems with native struct passing (i.e. UNIX_AMD64_ABI defined) |
| 999 | // this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs |
| 1000 | // for two eightbyte structs. |
| 1001 | // |
| 1002 | // For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing |
| 1003 | // (i.e. UNIX_AMD64_ABI defined) this method also sets the GC pointers count and the pointers |
| 1004 | // layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value. |
| 1005 | // (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.) |
| 1006 | // |
| 1007 | GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* info, var_types type) |
| 1008 | { |
| 1009 | assert(call != nullptr); |
| 1010 | assert(arg != nullptr); |
| 1011 | assert(info != nullptr); |
| 1012 | |
| 1013 | GenTree* putArg = nullptr; |
| 1014 | bool updateArgTable = true; |
| 1015 | |
| 1016 | bool isOnStack = true; |
| 1017 | isOnStack = info->regNum == REG_STK; |
| 1018 | |
| 1019 | #ifdef _TARGET_ARMARCH_ |
| 1020 | // Mark contained when we pass struct |
| 1021 | // GT_FIELD_LIST is always marked contained when it is generated |
| 1022 | if (type == TYP_STRUCT) |
| 1023 | { |
| 1024 | arg->SetContained(); |
| 1025 | if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR)) |
| 1026 | { |
| 1027 | MakeSrcContained(arg, arg->AsObj()->Addr()); |
| 1028 | } |
| 1029 | } |
| 1030 | #endif |
| 1031 | |
| 1032 | #if FEATURE_ARG_SPLIT |
| 1033 | // Struct can be split into register(s) and stack on ARM |
| 1034 | if (info->isSplit) |
| 1035 | { |
| 1036 | assert(arg->OperGet() == GT_OBJ || arg->OperGet() == GT_FIELD_LIST); |
| 1037 | // TODO: Need to check correctness for FastTailCall |
| 1038 | if (call->IsFastTailCall()) |
| 1039 | { |
| 1040 | #ifdef _TARGET_ARM_ |
| 1041 | NYI_ARM("lower: struct argument by fast tail call" ); |
| 1042 | #endif // _TARGET_ARM_ |
| 1043 | } |
| 1044 | |
| 1045 | putArg = new (comp, GT_PUTARG_SPLIT) |
| 1046 | GenTreePutArgSplit(arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), info->numRegs, |
| 1047 | call->IsFastTailCall(), call); |
| 1048 | |
| 1049 | // If struct argument is morphed to GT_FIELD_LIST node(s), |
| 1050 | // we can know GC info by type of each GT_FIELD_LIST node. |
| 1051 | // So we skip setting GC Pointer info. |
| 1052 | // |
| 1053 | GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit(); |
| 1054 | for (unsigned regIndex = 0; regIndex < info->numRegs; regIndex++) |
| 1055 | { |
| 1056 | argSplit->SetRegNumByIdx(info->getRegNum(regIndex), regIndex); |
| 1057 | } |
| 1058 | |
| 1059 | if (arg->OperGet() == GT_OBJ) |
| 1060 | { |
| 1061 | BYTE* gcLayout = nullptr; |
| 1062 | unsigned numRefs = 0; |
| 1063 | GenTreeObj* argObj = arg->AsObj(); |
| 1064 | |
| 1065 | if (argObj->IsGCInfoInitialized()) |
| 1066 | { |
| 1067 | gcLayout = argObj->gtGcPtrs; |
| 1068 | numRefs = argObj->GetGcPtrCount(); |
| 1069 | } |
| 1070 | else |
| 1071 | { |
| 1072 | // Set GC Pointer info |
| 1073 | gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots + info->numRegs]; |
| 1074 | numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); |
| 1075 | argSplit->setGcPointers(numRefs, gcLayout); |
| 1076 | } |
| 1077 | |
| 1078 | // Set type of registers |
| 1079 | for (unsigned index = 0; index < info->numRegs; index++) |
| 1080 | { |
| 1081 | var_types regType = comp->getJitGCType(gcLayout[index]); |
| 1082 | // Account for the possibility that float fields may be passed in integer registers. |
| 1083 | if (varTypeIsFloating(regType) && !genIsValidFloatReg(argSplit->GetRegNumByIdx(index))) |
| 1084 | { |
| 1085 | regType = (regType == TYP_FLOAT) ? TYP_INT : TYP_LONG; |
| 1086 | } |
| 1087 | argSplit->m_regType[index] = regType; |
| 1088 | } |
| 1089 | } |
| 1090 | else |
| 1091 | { |
| 1092 | GenTreeFieldList* fieldListPtr = arg->AsFieldList(); |
| 1093 | for (unsigned index = 0; index < info->numRegs; fieldListPtr = fieldListPtr->Rest(), index++) |
| 1094 | { |
| 1095 | var_types regType = fieldListPtr->gtGetOp1()->TypeGet(); |
| 1096 | // Account for the possibility that float fields may be passed in integer registers. |
| 1097 | if (varTypeIsFloating(regType) && !genIsValidFloatReg(argSplit->GetRegNumByIdx(index))) |
| 1098 | { |
| 1099 | regType = (regType == TYP_FLOAT) ? TYP_INT : TYP_LONG; |
| 1100 | } |
| 1101 | argSplit->m_regType[index] = regType; |
| 1102 | |
| 1103 | // Clear the register assignments on the fieldList nodes, as these are contained. |
| 1104 | fieldListPtr->gtRegNum = REG_NA; |
| 1105 | } |
| 1106 | } |
| 1107 | } |
| 1108 | else |
| 1109 | #endif // FEATURE_ARG_SPLIT |
| 1110 | { |
| 1111 | if (!isOnStack) |
| 1112 | { |
| 1113 | #if FEATURE_MULTIREG_ARGS |
| 1114 | if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST)) |
| 1115 | { |
| 1116 | assert(arg->OperGet() == GT_FIELD_LIST); |
| 1117 | |
| 1118 | assert(arg->AsFieldList()->IsFieldListHead()); |
| 1119 | unsigned int regIndex = 0; |
| 1120 | for (GenTreeFieldList* fieldListPtr = arg->AsFieldList(); fieldListPtr != nullptr; |
| 1121 | fieldListPtr = fieldListPtr->Rest()) |
| 1122 | { |
| 1123 | regNumber argReg = info->getRegNum(regIndex); |
| 1124 | GenTree* curOp = fieldListPtr->gtOp.gtOp1; |
| 1125 | var_types curTyp = curOp->TypeGet(); |
| 1126 | |
| 1127 | // Create a new GT_PUTARG_REG node with op1 |
| 1128 | GenTree* newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg); |
| 1129 | |
| 1130 | // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST |
| 1131 | ReplaceArgWithPutArgOrBitcast(&fieldListPtr->gtOp.gtOp1, newOper); |
| 1132 | regIndex++; |
| 1133 | |
| 1134 | // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal. |
| 1135 | fieldListPtr->gtRegNum = REG_NA; |
| 1136 | } |
| 1137 | |
| 1138 | // Just return arg. The GT_FIELD_LIST is not replaced. |
| 1139 | // Nothing more to do. |
| 1140 | return arg; |
| 1141 | } |
| 1142 | else |
| 1143 | #endif // FEATURE_MULTIREG_ARGS |
| 1144 | { |
| 1145 | putArg = comp->gtNewPutArgReg(type, arg, info->regNum); |
| 1146 | } |
| 1147 | } |
| 1148 | else |
| 1149 | { |
| 1150 | // Mark this one as tail call arg if it is a fast tail call. |
| 1151 | // This provides the info to put this argument in in-coming arg area slot |
| 1152 | // instead of in out-going arg area slot. |
| 1153 | |
| 1154 | // Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce |
| 1155 | // a result. So the type of its operand must be the correct type to push on the stack. |
| 1156 | // For a FIELD_LIST, this will be the type of the field (not the type of the arg), |
| 1157 | // but otherwise it is generally the type of the operand. |
| 1158 | info->checkIsStruct(); |
| 1159 | if ((arg->OperGet() != GT_FIELD_LIST)) |
| 1160 | { |
| 1161 | #if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK) |
| 1162 | if (type == TYP_SIMD12) |
| 1163 | { |
| 1164 | assert(info->numSlots == 3); |
| 1165 | } |
| 1166 | else |
| 1167 | #endif // defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK) |
| 1168 | { |
| 1169 | assert(genActualType(arg->TypeGet()) == type); |
| 1170 | } |
| 1171 | } |
| 1172 | |
| 1173 | putArg = |
| 1174 | new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, |
| 1175 | info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), |
| 1176 | call->IsFastTailCall(), call); |
| 1177 | |
| 1178 | #ifdef FEATURE_PUT_STRUCT_ARG_STK |
| 1179 | // If the ArgTabEntry indicates that this arg is a struct |
| 1180 | // get and store the number of slots that are references. |
| 1181 | // This is later used in the codegen for PUT_ARG_STK implementation |
| 1182 | // for struct to decide whether and how many single eight-byte copies |
| 1183 | // to be done (only for reference slots), so gcinfo is emitted. |
| 1184 | // For non-reference slots faster/smaller size instructions are used - |
| 1185 | // pair copying using XMM registers or rep mov instructions. |
| 1186 | if (info->isStruct) |
| 1187 | { |
| 1188 | // We use GT_OBJ only for non-lclVar, non-SIMD, non-FIELD_LIST struct arguments. |
| 1189 | if (arg->OperIsLocal()) |
| 1190 | { |
| 1191 | // This must have a type with a known size (SIMD or has been morphed to a primitive type). |
| 1192 | assert(arg->TypeGet() != TYP_STRUCT); |
| 1193 | } |
| 1194 | else if (arg->OperIs(GT_OBJ)) |
| 1195 | { |
| 1196 | unsigned numRefs = 0; |
| 1197 | BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots]; |
| 1198 | assert(!varTypeIsSIMD(arg)); |
| 1199 | numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); |
| 1200 | putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); |
| 1201 | |
| 1202 | #ifdef _TARGET_X86_ |
| 1203 | // On x86 VM lies about the type of a struct containing a pointer sized |
| 1204 | // integer field by returning the type of its field as the type of struct. |
| 1205 | // Such struct can be passed in a register depending its position in |
| 1206 | // parameter list. VM does this unwrapping only one level and therefore |
| 1207 | // a type like Struct Foo { Struct Bar { int f}} awlays needs to be |
| 1208 | // passed on stack. Also, VM doesn't lie about type of such a struct |
| 1209 | // when it is a field of another struct. That is VM doesn't lie about |
| 1210 | // the type of Foo.Bar |
| 1211 | // |
| 1212 | // We now support the promotion of fields that are of type struct. |
| 1213 | // However we only support a limited case where the struct field has a |
| 1214 | // single field and that single field must be a scalar type. Say Foo.Bar |
| 1215 | // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT, |
| 1216 | // as per x86 ABI it should always be passed on stack. Therefore GenTree |
| 1217 | // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where |
| 1218 | // local v1 could be a promoted field standing for Foo.Bar. Note that |
| 1219 | // the type of v1 will be the type of field of Foo.Bar.f when Foo is |
| 1220 | // promoted. That is v1 will be a scalar type. In this case we need to |
| 1221 | // pass v1 on stack instead of in a register. |
| 1222 | // |
| 1223 | // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is |
| 1224 | // a scalar type and the width of GT_OBJ matches the type size of v1. |
| 1225 | // Note that this cannot be done till call node arguments are morphed |
| 1226 | // because we should not lose the fact that the type of argument is |
| 1227 | // a struct so that the arg gets correctly marked to be passed on stack. |
| 1228 | GenTree* objOp1 = arg->gtGetOp1(); |
| 1229 | if (objOp1->OperGet() == GT_LCL_VAR_ADDR) |
| 1230 | { |
| 1231 | unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum(); |
| 1232 | if (comp->lvaTable[lclNum].lvType != TYP_STRUCT) |
| 1233 | { |
| 1234 | comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr)); |
| 1235 | } |
| 1236 | } |
| 1237 | #endif // _TARGET_X86_ |
| 1238 | } |
| 1239 | else if (!arg->OperIs(GT_FIELD_LIST)) |
| 1240 | { |
| 1241 | assert(varTypeIsSIMD(arg) || (info->numSlots == 1)); |
| 1242 | } |
| 1243 | } |
| 1244 | #endif // FEATURE_PUT_STRUCT_ARG_STK |
| 1245 | } |
| 1246 | } |
| 1247 | |
| 1248 | JITDUMP("new node is : " ); |
| 1249 | DISPNODE(putArg); |
| 1250 | JITDUMP("\n" ); |
| 1251 | |
| 1252 | if (arg->gtFlags & GTF_LATE_ARG) |
| 1253 | { |
| 1254 | putArg->gtFlags |= GTF_LATE_ARG; |
| 1255 | } |
| 1256 | else if (updateArgTable) |
| 1257 | { |
| 1258 | info->node = putArg; |
| 1259 | } |
| 1260 | return putArg; |
| 1261 | } |
| 1262 | |
| 1263 | //------------------------------------------------------------------------ |
| 1264 | // LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between |
| 1265 | // the argument evaluation and the call. This is the point at which the source is |
| 1266 | // consumed and the value transitions from control of the register allocator to the calling |
| 1267 | // convention. |
| 1268 | // |
| 1269 | // Arguments: |
| 1270 | // call - The call node |
| 1271 | // ppArg - Pointer to the call argument pointer. We might replace the call argument by |
| 1272 | // changing *ppArg. |
| 1273 | // |
| 1274 | // Return Value: |
| 1275 | // None. |
| 1276 | // |
| 1277 | void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) |
| 1278 | { |
| 1279 | GenTree* arg = *ppArg; |
| 1280 | |
| 1281 | JITDUMP("lowering arg : " ); |
| 1282 | DISPNODE(arg); |
| 1283 | |
| 1284 | // No assignments should remain by Lowering. |
| 1285 | assert(!arg->OperIs(GT_ASG)); |
| 1286 | assert(!arg->OperIsPutArgStk()); |
| 1287 | |
| 1288 | // Assignments/stores at this level are not really placing an argument. |
| 1289 | // They are setting up temporary locals that will later be placed into |
| 1290 | // outgoing regs or stack. |
| 1291 | // Note that atomic ops may be stores and still produce a value. |
| 1292 | if (!arg->IsValue()) |
| 1293 | { |
| 1294 | assert((arg->OperIsStore() && !arg->IsValue()) || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || |
| 1295 | arg->OperIsCopyBlkOp()); |
| 1296 | return; |
| 1297 | } |
| 1298 | |
| 1299 | fgArgTabEntry* info = comp->gtArgEntryByNode(call, arg); |
| 1300 | assert(info->node == arg); |
| 1301 | var_types type = arg->TypeGet(); |
| 1302 | |
| 1303 | if (varTypeIsSmall(type)) |
| 1304 | { |
| 1305 | // Normalize 'type', it represents the item that we will be storing in the Outgoing Args |
| 1306 | type = TYP_INT; |
| 1307 | } |
| 1308 | |
| 1309 | #if defined(FEATURE_SIMD) |
| 1310 | #if defined(_TARGET_X86_) |
| 1311 | // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their |
| 1312 | // allocated size (see lvSize()). However, when passing the variables as arguments, and |
| 1313 | // storing the variables to the outgoing argument area on the stack, we must use their |
| 1314 | // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written. |
| 1315 | if (type == TYP_SIMD16) |
| 1316 | { |
| 1317 | if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR)) |
| 1318 | { |
| 1319 | unsigned varNum = arg->AsLclVarCommon()->GetLclNum(); |
| 1320 | LclVarDsc* varDsc = &comp->lvaTable[varNum]; |
| 1321 | type = varDsc->lvType; |
| 1322 | } |
| 1323 | else if (arg->OperGet() == GT_SIMD) |
| 1324 | { |
| 1325 | assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12)); |
| 1326 | |
| 1327 | if (arg->AsSIMD()->gtSIMDSize == 12) |
| 1328 | { |
| 1329 | type = TYP_SIMD12; |
| 1330 | } |
| 1331 | } |
| 1332 | } |
| 1333 | #elif defined(_TARGET_AMD64_) |
| 1334 | // TYP_SIMD8 parameters that are passed as longs |
| 1335 | if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum)) |
| 1336 | { |
| 1337 | GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, arg, nullptr); |
| 1338 | BlockRange().InsertAfter(arg, bitcast); |
| 1339 | |
| 1340 | info->node = *ppArg = arg = bitcast; |
| 1341 | type = TYP_LONG; |
| 1342 | } |
| 1343 | #endif // defined(_TARGET_X86_) |
| 1344 | #endif // defined(FEATURE_SIMD) |
| 1345 | |
| 1346 | // If we hit this we are probably double-lowering. |
| 1347 | assert(!arg->OperIsPutArg()); |
| 1348 | |
| 1349 | #if !defined(_TARGET_64BIT_) |
| 1350 | if (varTypeIsLong(type)) |
| 1351 | { |
| 1352 | bool isReg = (info->regNum != REG_STK); |
| 1353 | if (isReg) |
| 1354 | { |
| 1355 | noway_assert(arg->OperGet() == GT_LONG); |
| 1356 | assert(info->numRegs == 2); |
| 1357 | |
| 1358 | GenTree* argLo = arg->gtGetOp1(); |
| 1359 | GenTree* argHi = arg->gtGetOp2(); |
| 1360 | |
| 1361 | GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr); |
| 1362 | // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence. |
| 1363 | (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList); |
| 1364 | GenTree* putArg = NewPutArg(call, fieldList, info, type); |
| 1365 | |
| 1366 | BlockRange().InsertBefore(arg, putArg); |
| 1367 | BlockRange().Remove(arg); |
| 1368 | *ppArg = fieldList; |
| 1369 | info->node = fieldList; |
| 1370 | } |
| 1371 | else |
| 1372 | { |
| 1373 | assert(arg->OperGet() == GT_LONG); |
| 1374 | // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK. |
| 1375 | // Although the hi argument needs to be pushed first, that will be handled by the general case, |
| 1376 | // in which the fields will be reversed. |
| 1377 | assert(info->numSlots == 2); |
| 1378 | GenTree* argLo = arg->gtGetOp1(); |
| 1379 | GenTree* argHi = arg->gtGetOp2(); |
| 1380 | GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr); |
| 1381 | // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence. |
| 1382 | (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList); |
| 1383 | GenTree* putArg = NewPutArg(call, fieldList, info, type); |
| 1384 | putArg->gtRegNum = info->regNum; |
| 1385 | |
| 1386 | // We can't call ReplaceArgWithPutArgOrBitcast here because it presumes that we are keeping the original |
| 1387 | // arg. |
| 1388 | BlockRange().InsertBefore(arg, fieldList, putArg); |
| 1389 | BlockRange().Remove(arg); |
| 1390 | *ppArg = putArg; |
| 1391 | } |
| 1392 | } |
| 1393 | else |
| 1394 | #endif // !defined(_TARGET_64BIT_) |
| 1395 | { |
| 1396 | |
| 1397 | #ifdef _TARGET_ARMARCH_ |
| 1398 | if (call->IsVarargs() || comp->opts.compUseSoftFP) |
| 1399 | { |
| 1400 | // For vararg call or on armel, reg args should be all integer. |
| 1401 | // Insert copies as needed to move float value to integer register. |
| 1402 | GenTree* newNode = LowerFloatArg(ppArg, info); |
| 1403 | if (newNode != nullptr) |
| 1404 | { |
| 1405 | type = newNode->TypeGet(); |
| 1406 | } |
| 1407 | } |
| 1408 | #endif // _TARGET_ARMARCH_ |
| 1409 | |
| 1410 | GenTree* putArg = NewPutArg(call, arg, info, type); |
| 1411 | |
| 1412 | // In the case of register passable struct (in one or two registers) |
| 1413 | // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.) |
| 1414 | // If an extra node is returned, splice it in the right place in the tree. |
| 1415 | if (arg != putArg) |
| 1416 | { |
| 1417 | ReplaceArgWithPutArgOrBitcast(ppArg, putArg); |
| 1418 | } |
| 1419 | } |
| 1420 | } |
| 1421 | |
| 1422 | #ifdef _TARGET_ARMARCH_ |
| 1423 | //------------------------------------------------------------------------ |
| 1424 | // LowerFloatArg: Lower float call arguments on the arm platform. |
| 1425 | // |
| 1426 | // Arguments: |
| 1427 | // arg - The arg node |
| 1428 | // info - call argument info |
| 1429 | // |
| 1430 | // Return Value: |
| 1431 | // Return nullptr, if no transformation was done; |
| 1432 | // return arg if there was in place transformation; |
| 1433 | // return a new tree if the root was changed. |
| 1434 | // |
| 1435 | // Notes: |
| 1436 | // This must handle scalar float arguments as well as GT_FIELD_LISTs |
| 1437 | // with floating point fields. |
| 1438 | // |
| 1439 | GenTree* Lowering::LowerFloatArg(GenTree** pArg, fgArgTabEntry* info) |
| 1440 | { |
| 1441 | GenTree* arg = *pArg; |
| 1442 | if (info->regNum != REG_STK) |
| 1443 | { |
| 1444 | if (arg->OperIsFieldList()) |
| 1445 | { |
| 1446 | GenTreeFieldList* currListNode = arg->AsFieldList(); |
| 1447 | regNumber currRegNumber = info->regNum; |
| 1448 | |
| 1449 | // Transform fields that are passed as registers in place. |
| 1450 | unsigned fieldRegCount; |
| 1451 | for (unsigned i = 0; i < info->numRegs; i += fieldRegCount) |
| 1452 | { |
| 1453 | assert(currListNode != nullptr); |
| 1454 | GenTree* node = currListNode->Current(); |
| 1455 | if (varTypeIsFloating(node)) |
| 1456 | { |
| 1457 | GenTree* intNode = LowerFloatArgReg(node, currRegNumber); |
| 1458 | assert(intNode != nullptr); |
| 1459 | |
| 1460 | ReplaceArgWithPutArgOrBitcast(currListNode->pCurrent(), intNode); |
| 1461 | currListNode->ChangeType(intNode->TypeGet()); |
| 1462 | } |
| 1463 | |
| 1464 | if (node->TypeGet() == TYP_DOUBLE) |
| 1465 | { |
| 1466 | currRegNumber = REG_NEXT(REG_NEXT(currRegNumber)); |
| 1467 | fieldRegCount = 2; |
| 1468 | } |
| 1469 | else |
| 1470 | { |
| 1471 | currRegNumber = REG_NEXT(currRegNumber); |
| 1472 | fieldRegCount = 1; |
| 1473 | } |
| 1474 | currListNode = currListNode->Rest(); |
| 1475 | } |
| 1476 | // List fields were replaced in place. |
| 1477 | return arg; |
| 1478 | } |
| 1479 | else if (varTypeIsFloating(arg)) |
| 1480 | { |
| 1481 | GenTree* intNode = LowerFloatArgReg(arg, info->regNum); |
| 1482 | assert(intNode != nullptr); |
| 1483 | ReplaceArgWithPutArgOrBitcast(pArg, intNode); |
| 1484 | return *pArg; |
| 1485 | } |
| 1486 | } |
| 1487 | return nullptr; |
| 1488 | } |
| 1489 | |
| 1490 | //------------------------------------------------------------------------ |
| 1491 | // LowerFloatArgReg: Lower the float call argument node that is passed via register. |
| 1492 | // |
| 1493 | // Arguments: |
| 1494 | // arg - The arg node |
| 1495 | // regNum - register number |
| 1496 | // |
| 1497 | // Return Value: |
| 1498 | // Return new bitcast node, that moves float to int register. |
| 1499 | // |
| 1500 | GenTree* Lowering::LowerFloatArgReg(GenTree* arg, regNumber regNum) |
| 1501 | { |
| 1502 | var_types floatType = arg->TypeGet(); |
| 1503 | assert(varTypeIsFloating(floatType)); |
| 1504 | var_types intType = (floatType == TYP_DOUBLE) ? TYP_LONG : TYP_INT; |
| 1505 | GenTree* intArg = comp->gtNewBitCastNode(intType, arg); |
| 1506 | intArg->gtRegNum = regNum; |
| 1507 | #ifdef _TARGET_ARM_ |
| 1508 | if (floatType == TYP_DOUBLE) |
| 1509 | { |
| 1510 | regNumber nextReg = REG_NEXT(regNum); |
| 1511 | intArg->AsMultiRegOp()->gtOtherReg = nextReg; |
| 1512 | } |
| 1513 | #endif |
| 1514 | return intArg; |
| 1515 | } |
| 1516 | #endif |
| 1517 | |
| 1518 | // do lowering steps for each arg of a call |
| 1519 | void Lowering::LowerArgsForCall(GenTreeCall* call) |
| 1520 | { |
| 1521 | JITDUMP("objp:\n======\n" ); |
| 1522 | if (call->gtCallObjp) |
| 1523 | { |
| 1524 | LowerArg(call, &call->gtCallObjp); |
| 1525 | } |
| 1526 | |
| 1527 | GenTreeArgList* args = call->gtCallArgs; |
| 1528 | |
| 1529 | JITDUMP("\nargs:\n======\n" ); |
| 1530 | for (; args; args = args->Rest()) |
| 1531 | { |
| 1532 | LowerArg(call, &args->Current()); |
| 1533 | } |
| 1534 | |
| 1535 | JITDUMP("\nlate:\n======\n" ); |
| 1536 | for (args = call->gtCallLateArgs; args; args = args->Rest()) |
| 1537 | { |
| 1538 | LowerArg(call, &args->Current()); |
| 1539 | } |
| 1540 | } |
| 1541 | |
| 1542 | // helper that create a node representing a relocatable physical address computation |
| 1543 | GenTree* Lowering::AddrGen(ssize_t addr) |
| 1544 | { |
| 1545 | // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr) |
| 1546 | GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); |
| 1547 | return result; |
| 1548 | } |
| 1549 | |
| 1550 | // variant that takes a void* |
| 1551 | GenTree* Lowering::AddrGen(void* addr) |
| 1552 | { |
| 1553 | return AddrGen((ssize_t)addr); |
| 1554 | } |
| 1555 | |
| 1556 | // do lowering steps for a call |
| 1557 | // this includes: |
| 1558 | // - adding the placement nodes (either stack or register variety) for arguments |
| 1559 | // - lowering the expression that calculates the target address |
| 1560 | // - adding nodes for other operations that occur after the call sequence starts and before |
| 1561 | // control transfer occurs (profiling and tail call helpers, pinvoke incantations) |
| 1562 | // |
| 1563 | void Lowering::LowerCall(GenTree* node) |
| 1564 | { |
| 1565 | GenTreeCall* call = node->AsCall(); |
| 1566 | |
| 1567 | JITDUMP("lowering call (before):\n" ); |
| 1568 | DISPTREERANGE(BlockRange(), call); |
| 1569 | JITDUMP("\n" ); |
| 1570 | |
| 1571 | call->ClearOtherRegs(); |
| 1572 | LowerArgsForCall(call); |
| 1573 | |
| 1574 | // note that everything generated from this point on runs AFTER the outgoing args are placed |
| 1575 | GenTree* controlExpr = nullptr; |
| 1576 | |
| 1577 | // for x86, this is where we record ESP for checking later to make sure stack is balanced |
| 1578 | |
| 1579 | // Check for Delegate.Invoke(). If so, we inline it. We get the |
| 1580 | // target-object and target-function from the delegate-object, and do |
| 1581 | // an indirect call. |
| 1582 | if (call->IsDelegateInvoke()) |
| 1583 | { |
| 1584 | controlExpr = LowerDelegateInvoke(call); |
| 1585 | } |
| 1586 | else |
| 1587 | { |
| 1588 | // Virtual and interface calls |
| 1589 | switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK) |
| 1590 | { |
| 1591 | case GTF_CALL_VIRT_STUB: |
| 1592 | controlExpr = LowerVirtualStubCall(call); |
| 1593 | break; |
| 1594 | |
| 1595 | case GTF_CALL_VIRT_VTABLE: |
| 1596 | // stub dispatching is off or this is not a virtual call (could be a tailcall) |
| 1597 | controlExpr = LowerVirtualVtableCall(call); |
| 1598 | break; |
| 1599 | |
| 1600 | case GTF_CALL_NONVIRT: |
| 1601 | if (call->IsUnmanaged()) |
| 1602 | { |
| 1603 | controlExpr = LowerNonvirtPinvokeCall(call); |
| 1604 | } |
| 1605 | else if (call->gtCallType == CT_INDIRECT) |
| 1606 | { |
| 1607 | controlExpr = LowerIndirectNonvirtCall(call); |
| 1608 | } |
| 1609 | else |
| 1610 | { |
| 1611 | controlExpr = LowerDirectCall(call); |
| 1612 | } |
| 1613 | break; |
| 1614 | |
| 1615 | default: |
| 1616 | noway_assert(!"strange call type" ); |
| 1617 | break; |
| 1618 | } |
| 1619 | } |
| 1620 | |
| 1621 | if (call->IsTailCallViaHelper()) |
| 1622 | { |
| 1623 | // Either controlExpr or gtCallAddr must contain real call target. |
| 1624 | if (controlExpr == nullptr) |
| 1625 | { |
| 1626 | assert(call->gtCallType == CT_INDIRECT); |
| 1627 | assert(call->gtCallAddr != nullptr); |
| 1628 | controlExpr = call->gtCallAddr; |
| 1629 | } |
| 1630 | |
| 1631 | controlExpr = LowerTailCallViaHelper(call, controlExpr); |
| 1632 | } |
| 1633 | |
| 1634 | if (controlExpr != nullptr) |
| 1635 | { |
| 1636 | LIR::Range controlExprRange = LIR::SeqTree(comp, controlExpr); |
| 1637 | |
| 1638 | JITDUMP("results of lowering call:\n" ); |
| 1639 | DISPRANGE(controlExprRange); |
| 1640 | |
| 1641 | GenTree* insertionPoint = call; |
| 1642 | if (!call->IsTailCallViaHelper()) |
| 1643 | { |
| 1644 | // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist |
| 1645 | // |
| 1646 | // TODO-LIR: find out what's really required here, as this is currently a tree order |
| 1647 | // dependency. |
| 1648 | if (call->gtCallType == CT_INDIRECT) |
| 1649 | { |
| 1650 | bool isClosed = false; |
| 1651 | if (call->gtCallCookie != nullptr) |
| 1652 | { |
| 1653 | #ifdef DEBUG |
| 1654 | GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); |
| 1655 | assert(isClosed); |
| 1656 | assert(call->gtCallCookie->Precedes(firstCallAddrNode)); |
| 1657 | #endif // DEBUG |
| 1658 | |
| 1659 | insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode(); |
| 1660 | assert(isClosed); |
| 1661 | } |
| 1662 | else if (call->gtCallAddr != nullptr) |
| 1663 | { |
| 1664 | insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); |
| 1665 | assert(isClosed); |
| 1666 | } |
| 1667 | } |
| 1668 | } |
| 1669 | |
| 1670 | ContainCheckRange(controlExprRange); |
| 1671 | BlockRange().InsertBefore(insertionPoint, std::move(controlExprRange)); |
| 1672 | |
| 1673 | call->gtControlExpr = controlExpr; |
| 1674 | } |
| 1675 | if (call->IsFastTailCall()) |
| 1676 | { |
| 1677 | // Lower fast tail call can introduce new temps to set up args correctly for Callee. |
| 1678 | // This involves patching LCL_VAR and LCL_VAR_ADDR nodes holding Caller stack args |
| 1679 | // and replacing them with a new temp. Control expr also can contain nodes that need |
| 1680 | // to be patched. |
| 1681 | // Therefore lower fast tail call must be done after controlExpr is inserted into LIR. |
| 1682 | // There is one side effect which is flipping the order of PME and control expression |
| 1683 | // since LowerFastTailCall calls InsertPInvokeMethodEpilog. |
| 1684 | LowerFastTailCall(call); |
| 1685 | } |
| 1686 | |
| 1687 | if (comp->opts.IsJit64Compat()) |
| 1688 | { |
| 1689 | CheckVSQuirkStackPaddingNeeded(call); |
| 1690 | } |
| 1691 | |
| 1692 | ContainCheckCallOperands(call); |
| 1693 | JITDUMP("lowering call (after):\n" ); |
| 1694 | DISPTREERANGE(BlockRange(), call); |
| 1695 | JITDUMP("\n" ); |
| 1696 | } |
| 1697 | |
| 1698 | // Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14), |
| 1699 | // we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012) |
| 1700 | // continues to work. |
| 1701 | // This quirk is excluded from other targets that have no back compat burden. |
| 1702 | // |
| 1703 | // Quirk for VS debug-launch scenario to work: |
| 1704 | // See if this is a PInvoke call with exactly one param that is the address of a struct local. |
| 1705 | // In such a case indicate to frame-layout logic to add 16-bytes of padding |
| 1706 | // between save-reg area and locals. This is to protect against the buffer |
| 1707 | // overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop(). |
| 1708 | // |
| 1709 | // A work-around to this bug is to disable IntelliTrace debugging |
| 1710 | // (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option). |
| 1711 | // The reason why this works on Jit64 is that at the point of AV the call stack is |
| 1712 | // |
| 1713 | // GetSystemInfo() Native call |
| 1714 | // IL_Stub generated for PInvoke declaration. |
| 1715 | // ProfilerInterface::InitInterop() |
| 1716 | // ProfilerInterface.Cctor() |
| 1717 | // VM asm worker |
| 1718 | // |
| 1719 | // The cctor body has just the call to InitInterop(). VM asm worker is holding |
| 1720 | // something in rbx that is used immediately after the Cctor call. Jit64 generated |
| 1721 | // InitInterop() method is pushing the registers in the following order |
| 1722 | // |
| 1723 | // rbx |
| 1724 | // rbp |
| 1725 | // rsi |
| 1726 | // rdi |
| 1727 | // r12 |
| 1728 | // r13 |
| 1729 | // Struct local |
| 1730 | // |
| 1731 | // Due to buffer overrun, rbx doesn't get impacted. Whereas RyuJIT jitted code of |
| 1732 | // the same method is pushing regs in the following order |
| 1733 | // |
| 1734 | // rbp |
| 1735 | // rdi |
| 1736 | // rsi |
| 1737 | // rbx |
| 1738 | // struct local |
| 1739 | // |
| 1740 | // Therefore as a fix, we add padding between save-reg area and locals to |
| 1741 | // make this scenario work against JB. |
| 1742 | // |
| 1743 | // Note: If this quirk gets broken due to other JIT optimizations, we should consider |
| 1744 | // more tolerant fix. One such fix is to padd the struct. |
| 1745 | void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call) |
| 1746 | { |
| 1747 | assert(comp->opts.IsJit64Compat()); |
| 1748 | |
| 1749 | #ifdef _TARGET_AMD64_ |
| 1750 | // Confine this to IL stub calls which aren't marked as unmanaged. |
| 1751 | if (call->IsPInvoke() && !call->IsUnmanaged()) |
| 1752 | { |
| 1753 | bool paddingNeeded = false; |
| 1754 | GenTree* firstPutArgReg = nullptr; |
| 1755 | for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest()) |
| 1756 | { |
| 1757 | GenTree* tmp = args->Current(); |
| 1758 | if (tmp->OperGet() == GT_PUTARG_REG) |
| 1759 | { |
| 1760 | if (firstPutArgReg == nullptr) |
| 1761 | { |
| 1762 | firstPutArgReg = tmp; |
| 1763 | GenTree* op1 = firstPutArgReg->gtOp.gtOp1; |
| 1764 | |
| 1765 | if (op1->OperGet() == GT_LCL_VAR_ADDR) |
| 1766 | { |
| 1767 | unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); |
| 1768 | // TODO-1stClassStructs: This is here to duplicate previous behavior, |
| 1769 | // but is not needed because the scenario being quirked did not involve |
| 1770 | // a SIMD or enregisterable struct. |
| 1771 | // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT) |
| 1772 | if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet())) |
| 1773 | { |
| 1774 | // First arg is addr of a struct local. |
| 1775 | paddingNeeded = true; |
| 1776 | } |
| 1777 | else |
| 1778 | { |
| 1779 | // Not a struct local. |
| 1780 | assert(paddingNeeded == false); |
| 1781 | break; |
| 1782 | } |
| 1783 | } |
| 1784 | else |
| 1785 | { |
| 1786 | // First arg is not a local var addr. |
| 1787 | assert(paddingNeeded == false); |
| 1788 | break; |
| 1789 | } |
| 1790 | } |
| 1791 | else |
| 1792 | { |
| 1793 | // Has more than one arg. |
| 1794 | paddingNeeded = false; |
| 1795 | break; |
| 1796 | } |
| 1797 | } |
| 1798 | } |
| 1799 | |
| 1800 | if (paddingNeeded) |
| 1801 | { |
| 1802 | comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD; |
| 1803 | } |
| 1804 | } |
| 1805 | #endif // _TARGET_AMD64_ |
| 1806 | } |
| 1807 | |
| 1808 | // Inserts profiler hook, GT_PROF_HOOK for a tail call node. |
| 1809 | // |
| 1810 | // AMD64: |
| 1811 | // We need to insert this after all nested calls, but before all the arguments to this call have been set up. |
| 1812 | // To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before |
| 1813 | // that. If there are no args, then it should be inserted before the call node. |
| 1814 | // |
| 1815 | // For example: |
| 1816 | // * stmtExpr void (top level) (IL 0x000...0x010) |
| 1817 | // arg0 SETUP | /--* argPlace ref REG NA $c5 |
| 1818 | // this in rcx | | /--* argPlace ref REG NA $c1 |
| 1819 | // | | | /--* call ref System.Globalization.CultureInfo.get_InvariantCulture $c2 |
| 1820 | // arg1 SETUP | | +--* st.lclVar ref V02 tmp1 REG NA $c2 |
| 1821 | // | | | /--* lclVar ref V02 tmp1 u : 2 (last use) REG NA $c2 |
| 1822 | // arg1 in rdx | | +--* putarg_reg ref REG NA |
| 1823 | // | | | /--* lclVar ref V00 arg0 u : 2 (last use) REG NA $80 |
| 1824 | // this in rcx | | +--* putarg_reg ref REG NA |
| 1825 | // | | /--* call nullcheck ref System.String.ToLower $c5 |
| 1826 | // | | { * stmtExpr void (embedded)(IL 0x000... ? ? ? ) |
| 1827 | // | | { \--* prof_hook void REG NA |
| 1828 | // arg0 in rcx | +--* putarg_reg ref REG NA |
| 1829 | // control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA |
| 1830 | // \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void |
| 1831 | // |
| 1832 | // In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call |
| 1833 | // (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call. |
| 1834 | // |
| 1835 | // X86: |
| 1836 | // Insert the profiler hook immediately before the call. The profiler hook will preserve |
| 1837 | // all argument registers (ECX, EDX), but nothing else. |
| 1838 | // |
| 1839 | // Params: |
| 1840 | // callNode - tail call node |
| 1841 | // insertionPoint - if non-null, insert the profiler hook before this point. |
| 1842 | // If null, insert the profiler hook before args are setup |
| 1843 | // but after all arg side effects are computed. |
| 1844 | // |
| 1845 | void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint) |
| 1846 | { |
| 1847 | assert(call->IsTailCall()); |
| 1848 | assert(comp->compIsProfilerHookNeeded()); |
| 1849 | |
| 1850 | #if defined(_TARGET_X86_) |
| 1851 | |
| 1852 | if (insertionPoint == nullptr) |
| 1853 | { |
| 1854 | insertionPoint = call; |
| 1855 | } |
| 1856 | |
| 1857 | #else // !defined(_TARGET_X86_) |
| 1858 | |
| 1859 | if (insertionPoint == nullptr) |
| 1860 | { |
| 1861 | GenTree* tmp = nullptr; |
| 1862 | for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest()) |
| 1863 | { |
| 1864 | tmp = args->Current(); |
| 1865 | assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs |
| 1866 | if (tmp->OperGet() == GT_PUTARG_STK) |
| 1867 | { |
| 1868 | // found it |
| 1869 | insertionPoint = tmp; |
| 1870 | break; |
| 1871 | } |
| 1872 | } |
| 1873 | |
| 1874 | if (insertionPoint == nullptr) |
| 1875 | { |
| 1876 | for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest()) |
| 1877 | { |
| 1878 | tmp = args->Current(); |
| 1879 | if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK)) |
| 1880 | { |
| 1881 | // found it |
| 1882 | insertionPoint = tmp; |
| 1883 | break; |
| 1884 | } |
| 1885 | } |
| 1886 | |
| 1887 | // If there are no args, insert before the call node |
| 1888 | if (insertionPoint == nullptr) |
| 1889 | { |
| 1890 | insertionPoint = call; |
| 1891 | } |
| 1892 | } |
| 1893 | } |
| 1894 | |
| 1895 | #endif // !defined(_TARGET_X86_) |
| 1896 | |
| 1897 | assert(insertionPoint != nullptr); |
| 1898 | GenTree* profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID); |
| 1899 | BlockRange().InsertBefore(insertionPoint, profHookNode); |
| 1900 | } |
| 1901 | |
| 1902 | // Lower fast tail call implemented as epilog+jmp. |
| 1903 | // Also inserts PInvoke method epilog if required. |
| 1904 | void Lowering::LowerFastTailCall(GenTreeCall* call) |
| 1905 | { |
| 1906 | #if FEATURE_FASTTAILCALL |
| 1907 | // Tail call restrictions i.e. conditions under which tail prefix is ignored. |
| 1908 | // Most of these checks are already done by importer or fgMorphTailCall(). |
| 1909 | // This serves as a double sanity check. |
| 1910 | assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods |
| 1911 | assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check |
| 1912 | assert(!call->IsUnmanaged()); // tail calls to unamanaged methods |
| 1913 | assert(!comp->compLocallocUsed); // tail call from methods that also do localloc |
| 1914 | |
| 1915 | #ifdef _TARGET_AMD64_ |
| 1916 | assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check |
| 1917 | #endif // _TARGET_AMD64_ |
| 1918 | |
| 1919 | // We expect to see a call that meets the following conditions |
| 1920 | assert(call->IsFastTailCall()); |
| 1921 | |
| 1922 | // VM cannot use return address hijacking when A() and B() tail call each |
| 1923 | // other in mutual recursion. Therefore, this block is reachable through |
| 1924 | // a GC-safe point or the whole method is marked as fully interruptible. |
| 1925 | // |
| 1926 | // TODO-Cleanup: |
| 1927 | // optReachWithoutCall() depends on the fact that loop headers blocks |
| 1928 | // will have a block number > fgLastBB. These loop headers gets added |
| 1929 | // after dominator computation and get skipped by OptReachWithoutCall(). |
| 1930 | // The below condition cannot be asserted in lower because fgSimpleLowering() |
| 1931 | // can add a new basic block for range check failure which becomes |
| 1932 | // fgLastBB with block number > loop header block number. |
| 1933 | // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || |
| 1934 | // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible); |
| 1935 | |
| 1936 | // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that |
| 1937 | // a method returns. This is a case of caller method has both PInvokes and tail calls. |
| 1938 | if (comp->info.compCallUnmanaged) |
| 1939 | { |
| 1940 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call)); |
| 1941 | } |
| 1942 | |
| 1943 | // Args for tail call are setup in incoming arg area. The gc-ness of args of |
| 1944 | // caller and callee (which being tail called) may not match. Therefore, everything |
| 1945 | // from arg setup until the epilog need to be non-interuptible by GC. This is |
| 1946 | // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node |
| 1947 | // of call is setup. Note that once a stack arg is setup, it cannot have nested |
| 1948 | // calls subsequently in execution order to setup other args, because the nested |
| 1949 | // call could over-write the stack arg that is setup earlier. |
| 1950 | GenTree* firstPutArgStk = nullptr; |
| 1951 | GenTreeArgList* args; |
| 1952 | ArrayStack<GenTree*> putargs(comp->getAllocator(CMK_ArrayStack)); |
| 1953 | |
| 1954 | for (args = call->gtCallArgs; args; args = args->Rest()) |
| 1955 | { |
| 1956 | GenTree* tmp = args->Current(); |
| 1957 | if (tmp->OperGet() == GT_PUTARG_STK) |
| 1958 | { |
| 1959 | putargs.Push(tmp); |
| 1960 | } |
| 1961 | } |
| 1962 | |
| 1963 | for (args = call->gtCallLateArgs; args; args = args->Rest()) |
| 1964 | { |
| 1965 | GenTree* tmp = args->Current(); |
| 1966 | if (tmp->OperGet() == GT_PUTARG_STK) |
| 1967 | { |
| 1968 | putargs.Push(tmp); |
| 1969 | } |
| 1970 | } |
| 1971 | |
| 1972 | if (!putargs.Empty()) |
| 1973 | { |
| 1974 | firstPutArgStk = putargs.Bottom(); |
| 1975 | } |
| 1976 | |
| 1977 | // If we have a putarg_stk node, also count the number of non-standard args the |
| 1978 | // call node has. Note that while determining whether a tail call can be fast |
| 1979 | // tail called, we don't count non-standard args (passed in R10 or R11) since they |
| 1980 | // don't contribute to outgoing arg space. These non-standard args are not |
| 1981 | // accounted in caller's arg count but accounted in callee's arg count after |
| 1982 | // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping |
| 1983 | // callee's stack arg num to corresponding caller's stack arg num. |
| 1984 | unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp); |
| 1985 | |
| 1986 | // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a) |
| 1987 | // i.e. passes its arguments in reverse to Callee. During call site |
| 1988 | // setup, after computing argument side effects, stack args are setup |
| 1989 | // first and reg args next. In the above example, both Callers and |
| 1990 | // Callee stack args (e and a respectively) share the same stack slot |
| 1991 | // and are alive at the same time. The act of setting up Callee's |
| 1992 | // stack arg will over-write the stack arg of Caller and if there are |
| 1993 | // further uses of Caller stack arg we have to make sure that we move |
| 1994 | // it to a temp before over-writing its slot and use temp in place of |
| 1995 | // the corresponding Caller stack arg. |
| 1996 | // |
| 1997 | // For the above example, conceptually this is what is done |
| 1998 | // tmp = e; |
| 1999 | // Stack slot of e = a |
| 2000 | // R9 = b, R8 = c, RDx = d |
| 2001 | // RCX = tmp |
| 2002 | // |
| 2003 | // The below logic is meant to detect cases like this and introduce |
| 2004 | // temps to set up args correctly for Callee. |
| 2005 | |
| 2006 | for (int i = 0; i < putargs.Height(); i++) |
| 2007 | { |
| 2008 | GenTree* putArgStkNode = putargs.Bottom(i); |
| 2009 | |
| 2010 | assert(putArgStkNode->OperGet() == GT_PUTARG_STK); |
| 2011 | |
| 2012 | // Get the caller arg num corresponding to this callee arg. |
| 2013 | // Note that these two args share the same stack slot. Therefore, |
| 2014 | // if there are further uses of corresponding caller arg, we need |
| 2015 | // to move it to a temp and use the temp in this call tree. |
| 2016 | // |
| 2017 | // Note that Caller is guaranteed to have a param corresponding to |
| 2018 | // this Callee's arg since fast tail call mechanism counts the |
| 2019 | // stack slots required for both Caller and Callee for passing params |
| 2020 | // and allow fast tail call only if stack slots required by Caller >= |
| 2021 | // Callee. |
| 2022 | fgArgTabEntry* argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode); |
| 2023 | assert(argTabEntry); |
| 2024 | unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount; |
| 2025 | noway_assert(callerArgNum < comp->info.compArgsCount); |
| 2026 | |
| 2027 | unsigned callerArgLclNum = callerArgNum; |
| 2028 | LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum; |
| 2029 | if (callerArgDsc->lvPromoted) |
| 2030 | { |
| 2031 | callerArgLclNum = |
| 2032 | callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum |
| 2033 | callerArgDsc = comp->lvaTable + callerArgLclNum; |
| 2034 | } |
| 2035 | noway_assert(callerArgDsc->lvIsParam); |
| 2036 | |
| 2037 | // Start searching in execution order list till we encounter call node |
| 2038 | unsigned tmpLclNum = BAD_VAR_NUM; |
| 2039 | var_types tmpType = TYP_UNDEF; |
| 2040 | for (GenTree* treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext) |
| 2041 | { |
| 2042 | if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr()) |
| 2043 | { |
| 2044 | // This should not be a GT_PHI_ARG. |
| 2045 | assert(treeNode->OperGet() != GT_PHI_ARG); |
| 2046 | |
| 2047 | GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon(); |
| 2048 | LclVarDsc* lclVar = &comp->lvaTable[lcl->gtLclNum]; |
| 2049 | |
| 2050 | // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args. |
| 2051 | // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot |
| 2052 | // is getting over-written by setting up of a stack arg and there are further uses of |
| 2053 | // any of its fields if such a struct is type-dependently promoted. In this case too |
| 2054 | // we need to introduce a temp. |
| 2055 | if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum)) |
| 2056 | { |
| 2057 | // Create tmp and use it in place of callerArgDsc |
| 2058 | if (tmpLclNum == BAD_VAR_NUM) |
| 2059 | { |
| 2060 | // Set tmpType first before calling lvaGrabTemp, as that call invalidates callerArgDsc |
| 2061 | tmpType = genActualType(callerArgDsc->lvaArgType()); |
| 2062 | tmpLclNum = comp->lvaGrabTemp( |
| 2063 | true DEBUGARG("Fast tail call lowering is creating a new local variable" )); |
| 2064 | |
| 2065 | comp->lvaTable[tmpLclNum].lvType = tmpType; |
| 2066 | comp->lvaTable[tmpLclNum].lvDoNotEnregister = comp->lvaTable[lcl->gtLclNum].lvDoNotEnregister; |
| 2067 | } |
| 2068 | |
| 2069 | lcl->SetLclNum(tmpLclNum); |
| 2070 | } |
| 2071 | } |
| 2072 | } |
| 2073 | |
| 2074 | // If we have created a temp, insert an embedded assignment stmnt before |
| 2075 | // the first putargStkNode i.e. |
| 2076 | // tmpLcl = CallerArg |
| 2077 | if (tmpLclNum != BAD_VAR_NUM) |
| 2078 | { |
| 2079 | assert(tmpType != TYP_UNDEF); |
| 2080 | GenTreeLclVar* local = |
| 2081 | new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET); |
| 2082 | GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local); |
| 2083 | ContainCheckRange(local, assignExpr); |
| 2084 | BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr)); |
| 2085 | } |
| 2086 | } |
| 2087 | |
| 2088 | // Insert GT_START_NONGC node before the first GT_PUTARG_STK node. |
| 2089 | // Note that if there are no args to be setup on stack, no need to |
| 2090 | // insert GT_START_NONGC node. |
| 2091 | GenTree* startNonGCNode = nullptr; |
| 2092 | if (firstPutArgStk != nullptr) |
| 2093 | { |
| 2094 | startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID); |
| 2095 | BlockRange().InsertBefore(firstPutArgStk, startNonGCNode); |
| 2096 | |
| 2097 | // Gc-interruptability in the following case: |
| 2098 | // foo(a, b, c, d, e) { bar(a, b, c, d, e); } |
| 2099 | // bar(a, b, c, d, e) { foo(a, b, d, d, e); } |
| 2100 | // |
| 2101 | // Since the instruction group starting from the instruction that sets up first |
| 2102 | // stack arg to the end of the tail call is marked as non-gc interruptible, |
| 2103 | // this will form a non-interruptible tight loop causing gc-starvation. To fix |
| 2104 | // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method |
| 2105 | // has a single basic block and is not a GC-safe point. The presence of a single |
| 2106 | // nop outside non-gc interruptible region will prevent gc starvation. |
| 2107 | if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT)) |
| 2108 | { |
| 2109 | assert(comp->fgFirstBB == comp->compCurBB); |
| 2110 | GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID); |
| 2111 | BlockRange().InsertBefore(startNonGCNode, noOp); |
| 2112 | } |
| 2113 | } |
| 2114 | |
| 2115 | // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be |
| 2116 | // inserted before the args are setup but after the side effects of args are |
| 2117 | // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC |
| 2118 | // node if one exists. |
| 2119 | if (comp->compIsProfilerHookNeeded()) |
| 2120 | { |
| 2121 | InsertProfTailCallHook(call, startNonGCNode); |
| 2122 | } |
| 2123 | |
| 2124 | #else // !FEATURE_FASTTAILCALL |
| 2125 | |
| 2126 | // Platform choose not to implement fast tail call mechanism. |
| 2127 | // In such a case we should never be reaching this method as |
| 2128 | // the expectation is that IsTailCallViaHelper() will always |
| 2129 | // be true on such a platform. |
| 2130 | unreached(); |
| 2131 | #endif |
| 2132 | } |
| 2133 | |
| 2134 | //------------------------------------------------------------------------ |
| 2135 | // LowerTailCallViaHelper: lower a call via the tailcall helper. Morph |
| 2136 | // has already inserted tailcall helper special arguments. This function |
| 2137 | // inserts actual data for some placeholders. |
| 2138 | // |
| 2139 | // For ARM32, AMD64, lower |
| 2140 | // tail.call(void* copyRoutine, void* dummyArg, ...) |
| 2141 | // as |
| 2142 | // Jit_TailCall(void* copyRoutine, void* callTarget, ...) |
| 2143 | // |
| 2144 | // For x86, lower |
| 2145 | // tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg) |
| 2146 | // as |
| 2147 | // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* |
| 2148 | // callTarget) |
| 2149 | // Note that the special arguments are on the stack, whereas the function arguments follow the normal convention. |
| 2150 | // |
| 2151 | // Also inserts PInvoke method epilog if required. |
| 2152 | // |
| 2153 | // Arguments: |
| 2154 | // call - The call node |
| 2155 | // callTarget - The real call target. This is used to replace the dummyArg during lowering. |
| 2156 | // |
| 2157 | // Return Value: |
| 2158 | // Returns control expression tree for making a call to helper Jit_TailCall. |
| 2159 | // |
| 2160 | GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget) |
| 2161 | { |
| 2162 | // Tail call restrictions i.e. conditions under which tail prefix is ignored. |
| 2163 | // Most of these checks are already done by importer or fgMorphTailCall(). |
| 2164 | // This serves as a double sanity check. |
| 2165 | assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods |
| 2166 | assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check |
| 2167 | assert(!call->IsUnmanaged()); // tail calls to unamanaged methods |
| 2168 | assert(!comp->compLocallocUsed); // tail call from methods that also do localloc |
| 2169 | |
| 2170 | #ifdef _TARGET_AMD64_ |
| 2171 | assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check |
| 2172 | #endif // _TARGET_AMD64_ |
| 2173 | |
| 2174 | // We expect to see a call that meets the following conditions |
| 2175 | assert(call->IsTailCallViaHelper()); |
| 2176 | assert(callTarget != nullptr); |
| 2177 | |
| 2178 | // The TailCall helper call never returns to the caller and is not GC interruptible. |
| 2179 | // Therefore the block containing the tail call should be a GC safe point to avoid |
| 2180 | // GC starvation. It is legal for the block to be unmarked iff the entry block is a |
| 2181 | // GC safe point, as the entry block trivially dominates every reachable block. |
| 2182 | assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || (comp->fgFirstBB->bbFlags & BBF_GC_SAFE_POINT)); |
| 2183 | |
| 2184 | // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that |
| 2185 | // a method returns. This is a case of caller method has both PInvokes and tail calls. |
| 2186 | if (comp->info.compCallUnmanaged) |
| 2187 | { |
| 2188 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call)); |
| 2189 | } |
| 2190 | |
| 2191 | // Remove gtCallAddr from execution order if present. |
| 2192 | if (call->gtCallType == CT_INDIRECT) |
| 2193 | { |
| 2194 | assert(call->gtCallAddr != nullptr); |
| 2195 | |
| 2196 | bool isClosed; |
| 2197 | LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed); |
| 2198 | assert(isClosed); |
| 2199 | |
| 2200 | BlockRange().Remove(std::move(callAddrRange)); |
| 2201 | } |
| 2202 | |
| 2203 | // The callTarget tree needs to be sequenced. |
| 2204 | LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget); |
| 2205 | |
| 2206 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) |
| 2207 | |
| 2208 | // For ARM32 and AMD64, first argument is CopyRoutine and second argument is a place holder node. |
| 2209 | fgArgTabEntry* argEntry; |
| 2210 | |
| 2211 | #ifdef DEBUG |
| 2212 | argEntry = comp->gtArgEntryByArgNum(call, 0); |
| 2213 | assert(argEntry != nullptr); |
| 2214 | assert(argEntry->node->gtOper == GT_PUTARG_REG); |
| 2215 | GenTree* firstArg = argEntry->node->gtOp.gtOp1; |
| 2216 | assert(firstArg->gtOper == GT_CNS_INT); |
| 2217 | #endif |
| 2218 | |
| 2219 | // Replace second arg by callTarget. |
| 2220 | argEntry = comp->gtArgEntryByArgNum(call, 1); |
| 2221 | assert(argEntry != nullptr); |
| 2222 | assert(argEntry->node->gtOper == GT_PUTARG_REG); |
| 2223 | GenTree* secondArg = argEntry->node->gtOp.gtOp1; |
| 2224 | |
| 2225 | ContainCheckRange(callTargetRange); |
| 2226 | BlockRange().InsertAfter(secondArg, std::move(callTargetRange)); |
| 2227 | |
| 2228 | bool isClosed; |
| 2229 | LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed); |
| 2230 | assert(isClosed); |
| 2231 | |
| 2232 | BlockRange().Remove(std::move(secondArgRange)); |
| 2233 | |
| 2234 | argEntry->node->gtOp.gtOp1 = callTarget; |
| 2235 | |
| 2236 | #elif defined(_TARGET_X86_) |
| 2237 | |
| 2238 | // Verify the special args are what we expect, and replace the dummy args with real values. |
| 2239 | // We need to figure out the size of the outgoing stack arguments, not including the special args. |
| 2240 | // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes. |
| 2241 | // This number is exactly the next slot number in the call's argument info struct. |
| 2242 | unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum(); |
| 2243 | assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args. |
| 2244 | nNewStkArgsWords -= 4; |
| 2245 | |
| 2246 | unsigned numArgs = call->fgArgInfo->ArgCount(); |
| 2247 | |
| 2248 | fgArgTabEntry* argEntry; |
| 2249 | |
| 2250 | // arg 0 == callTarget. |
| 2251 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1); |
| 2252 | assert(argEntry != nullptr); |
| 2253 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
| 2254 | GenTree* arg0 = argEntry->node->gtOp.gtOp1; |
| 2255 | |
| 2256 | ContainCheckRange(callTargetRange); |
| 2257 | BlockRange().InsertAfter(arg0, std::move(callTargetRange)); |
| 2258 | |
| 2259 | bool isClosed; |
| 2260 | LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed); |
| 2261 | assert(isClosed); |
| 2262 | BlockRange().Remove(std::move(secondArgRange)); |
| 2263 | |
| 2264 | argEntry->node->gtOp.gtOp1 = callTarget; |
| 2265 | |
| 2266 | // arg 1 == flags |
| 2267 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2); |
| 2268 | assert(argEntry != nullptr); |
| 2269 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
| 2270 | GenTree* arg1 = argEntry->node->gtOp.gtOp1; |
| 2271 | assert(arg1->gtOper == GT_CNS_INT); |
| 2272 | |
| 2273 | ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX |
| 2274 | (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag |
| 2275 | arg1->gtIntCon.gtIconVal = tailCallHelperFlags; |
| 2276 | |
| 2277 | // arg 2 == numberOfNewStackArgsWords |
| 2278 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3); |
| 2279 | assert(argEntry != nullptr); |
| 2280 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
| 2281 | GenTree* arg2 = argEntry->node->gtOp.gtOp1; |
| 2282 | assert(arg2->gtOper == GT_CNS_INT); |
| 2283 | |
| 2284 | arg2->gtIntCon.gtIconVal = nNewStkArgsWords; |
| 2285 | |
| 2286 | #ifdef DEBUG |
| 2287 | // arg 3 == numberOfOldStackArgsWords |
| 2288 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4); |
| 2289 | assert(argEntry != nullptr); |
| 2290 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
| 2291 | GenTree* arg3 = argEntry->node->gtOp.gtOp1; |
| 2292 | assert(arg3->gtOper == GT_CNS_INT); |
| 2293 | #endif // DEBUG |
| 2294 | |
| 2295 | #else |
| 2296 | NYI("LowerTailCallViaHelper" ); |
| 2297 | #endif // _TARGET_* |
| 2298 | |
| 2299 | // Transform this call node into a call to Jit tail call helper. |
| 2300 | call->gtCallType = CT_HELPER; |
| 2301 | call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL); |
| 2302 | call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; |
| 2303 | |
| 2304 | // Lower this as if it were a pure helper call. |
| 2305 | call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER); |
| 2306 | GenTree* result = LowerDirectCall(call); |
| 2307 | |
| 2308 | // Now add back tail call flags for identifying this node as tail call dispatched via helper. |
| 2309 | call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; |
| 2310 | |
| 2311 | #ifdef PROFILING_SUPPORTED |
| 2312 | // Insert profiler tail call hook if needed. |
| 2313 | // Since we don't know the insertion point, pass null for second param. |
| 2314 | if (comp->compIsProfilerHookNeeded()) |
| 2315 | { |
| 2316 | InsertProfTailCallHook(call, nullptr); |
| 2317 | } |
| 2318 | #endif // PROFILING_SUPPORTED |
| 2319 | |
| 2320 | assert(call->IsTailCallViaHelper()); |
| 2321 | |
| 2322 | return result; |
| 2323 | } |
| 2324 | |
| 2325 | #ifndef _TARGET_64BIT_ |
| 2326 | //------------------------------------------------------------------------ |
| 2327 | // Lowering::DecomposeLongCompare: Decomposes a TYP_LONG compare node. |
| 2328 | // |
| 2329 | // Arguments: |
| 2330 | // cmp - the compare node |
| 2331 | // |
| 2332 | // Return Value: |
| 2333 | // The next node to lower. |
| 2334 | // |
| 2335 | // Notes: |
| 2336 | // This is done during lowering because DecomposeLongs handles only nodes |
| 2337 | // that produce TYP_LONG values. Compare nodes may consume TYP_LONG values |
| 2338 | // but produce TYP_INT values. |
| 2339 | // |
| 2340 | GenTree* Lowering::DecomposeLongCompare(GenTree* cmp) |
| 2341 | { |
| 2342 | assert(cmp->gtGetOp1()->TypeGet() == TYP_LONG); |
| 2343 | |
| 2344 | GenTree* src1 = cmp->gtGetOp1(); |
| 2345 | GenTree* src2 = cmp->gtGetOp2(); |
| 2346 | assert(src1->OperIs(GT_LONG)); |
| 2347 | assert(src2->OperIs(GT_LONG)); |
| 2348 | GenTree* loSrc1 = src1->gtGetOp1(); |
| 2349 | GenTree* hiSrc1 = src1->gtGetOp2(); |
| 2350 | GenTree* loSrc2 = src2->gtGetOp1(); |
| 2351 | GenTree* hiSrc2 = src2->gtGetOp2(); |
| 2352 | BlockRange().Remove(src1); |
| 2353 | BlockRange().Remove(src2); |
| 2354 | |
| 2355 | genTreeOps condition = cmp->OperGet(); |
| 2356 | GenTree* loCmp; |
| 2357 | GenTree* hiCmp; |
| 2358 | |
| 2359 | if (cmp->OperIs(GT_EQ, GT_NE)) |
| 2360 | { |
| 2361 | // |
| 2362 | // Transform (x EQ|NE y) into (((x.lo XOR y.lo) OR (x.hi XOR y.hi)) EQ|NE 0). If y is 0 then this can |
| 2363 | // be reduced to just ((x.lo OR x.hi) EQ|NE 0). The OR is expected to set the condition flags so we |
| 2364 | // don't need to generate a redundant compare against 0, we only generate a SETCC|JCC instruction. |
| 2365 | // |
| 2366 | // XOR is used rather than SUB because it is commutative and thus allows swapping the operands when |
| 2367 | // the first happens to be a constant. Usually only the second compare operand is a constant but it's |
| 2368 | // still possible to have a constant on the left side. For example, when src1 is a uint->ulong cast |
| 2369 | // then hiSrc1 would be 0. |
| 2370 | // |
| 2371 | |
| 2372 | if (loSrc1->OperIs(GT_CNS_INT)) |
| 2373 | { |
| 2374 | std::swap(loSrc1, loSrc2); |
| 2375 | } |
| 2376 | |
| 2377 | if (loSrc2->IsIntegralConst(0)) |
| 2378 | { |
| 2379 | BlockRange().Remove(loSrc2); |
| 2380 | loCmp = loSrc1; |
| 2381 | } |
| 2382 | else |
| 2383 | { |
| 2384 | loCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, loSrc1, loSrc2); |
| 2385 | BlockRange().InsertBefore(cmp, loCmp); |
| 2386 | ContainCheckBinary(loCmp->AsOp()); |
| 2387 | } |
| 2388 | |
| 2389 | if (hiSrc1->OperIs(GT_CNS_INT)) |
| 2390 | { |
| 2391 | std::swap(hiSrc1, hiSrc2); |
| 2392 | } |
| 2393 | |
| 2394 | if (hiSrc2->IsIntegralConst(0)) |
| 2395 | { |
| 2396 | BlockRange().Remove(hiSrc2); |
| 2397 | hiCmp = hiSrc1; |
| 2398 | } |
| 2399 | else |
| 2400 | { |
| 2401 | hiCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, hiSrc1, hiSrc2); |
| 2402 | BlockRange().InsertBefore(cmp, hiCmp); |
| 2403 | ContainCheckBinary(hiCmp->AsOp()); |
| 2404 | } |
| 2405 | |
| 2406 | hiCmp = comp->gtNewOperNode(GT_OR, TYP_INT, loCmp, hiCmp); |
| 2407 | BlockRange().InsertBefore(cmp, hiCmp); |
| 2408 | ContainCheckBinary(hiCmp->AsOp()); |
| 2409 | } |
| 2410 | else |
| 2411 | { |
| 2412 | assert(cmp->OperIs(GT_LT, GT_LE, GT_GE, GT_GT)); |
| 2413 | |
| 2414 | // |
| 2415 | // If the compare is signed then (x LT|GE y) can be transformed into ((x SUB y) LT|GE 0). |
| 2416 | // If the compare is unsigned we can still use SUB but we need to check the Carry flag, |
| 2417 | // not the actual result. In both cases we can simply check the appropiate condition flags |
| 2418 | // and ignore the actual result: |
| 2419 | // SUB_LO loSrc1, loSrc2 |
| 2420 | // SUB_HI hiSrc1, hiSrc2 |
| 2421 | // SETCC|JCC (signed|unsigned LT|GE) |
| 2422 | // If loSrc2 happens to be 0 then the first SUB can be eliminated and the second one can |
| 2423 | // be turned into a CMP because the first SUB would have set carry to 0. This effectively |
| 2424 | // transforms a long compare against 0 into an int compare of the high part against 0. |
| 2425 | // |
| 2426 | // (x LE|GT y) can to be transformed into ((x SUB y) LE|GT 0) but checking that a long value |
| 2427 | // is greater than 0 is not so easy. We need to turn this into a positive/negative check |
| 2428 | // like the one we get for LT|GE compares, this can be achieved by swapping the compare: |
| 2429 | // (x LE|GT y) becomes (y GE|LT x) |
| 2430 | // |
| 2431 | // Having to swap operands is problematic when the second operand is a constant. The constant |
| 2432 | // moves to the first operand where it cannot be contained and thus needs a register. This can |
| 2433 | // be avoided by changing the constant such that LE|GT becomes LT|GE: |
| 2434 | // (x LE|GT 41) becomes (x LT|GE 42) |
| 2435 | // |
| 2436 | |
| 2437 | if (cmp->OperIs(GT_LE, GT_GT)) |
| 2438 | { |
| 2439 | bool mustSwap = true; |
| 2440 | |
| 2441 | if (loSrc2->OperIs(GT_CNS_INT) && hiSrc2->OperIs(GT_CNS_INT)) |
| 2442 | { |
| 2443 | uint32_t loValue = static_cast<uint32_t>(loSrc2->AsIntCon()->IconValue()); |
| 2444 | uint32_t hiValue = static_cast<uint32_t>(hiSrc2->AsIntCon()->IconValue()); |
| 2445 | uint64_t value = static_cast<uint64_t>(loValue) | (static_cast<uint64_t>(hiValue) << 32); |
| 2446 | uint64_t maxValue = cmp->IsUnsigned() ? UINT64_MAX : INT64_MAX; |
| 2447 | |
| 2448 | if (value != maxValue) |
| 2449 | { |
| 2450 | value++; |
| 2451 | loValue = value & UINT32_MAX; |
| 2452 | hiValue = (value >> 32) & UINT32_MAX; |
| 2453 | loSrc2->AsIntCon()->SetIconValue(loValue); |
| 2454 | hiSrc2->AsIntCon()->SetIconValue(hiValue); |
| 2455 | |
| 2456 | condition = cmp->OperIs(GT_LE) ? GT_LT : GT_GE; |
| 2457 | mustSwap = false; |
| 2458 | } |
| 2459 | } |
| 2460 | |
| 2461 | if (mustSwap) |
| 2462 | { |
| 2463 | std::swap(loSrc1, loSrc2); |
| 2464 | std::swap(hiSrc1, hiSrc2); |
| 2465 | condition = GenTree::SwapRelop(condition); |
| 2466 | } |
| 2467 | } |
| 2468 | |
| 2469 | assert((condition == GT_LT) || (condition == GT_GE)); |
| 2470 | |
| 2471 | if (loSrc2->IsIntegralConst(0)) |
| 2472 | { |
| 2473 | BlockRange().Remove(loSrc2); |
| 2474 | |
| 2475 | // Very conservative dead code removal... but it helps. |
| 2476 | |
| 2477 | if (loSrc1->OperIs(GT_CNS_INT, GT_LCL_VAR, GT_LCL_FLD)) |
| 2478 | { |
| 2479 | BlockRange().Remove(loSrc1); |
| 2480 | } |
| 2481 | else |
| 2482 | { |
| 2483 | loSrc1->SetUnusedValue(); |
| 2484 | } |
| 2485 | |
| 2486 | hiCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, hiSrc1, hiSrc2); |
| 2487 | BlockRange().InsertBefore(cmp, hiCmp); |
| 2488 | ContainCheckCompare(hiCmp->AsOp()); |
| 2489 | } |
| 2490 | else |
| 2491 | { |
| 2492 | loCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, loSrc1, loSrc2); |
| 2493 | hiCmp = comp->gtNewOperNode(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2); |
| 2494 | BlockRange().InsertBefore(cmp, loCmp, hiCmp); |
| 2495 | ContainCheckCompare(loCmp->AsOp()); |
| 2496 | ContainCheckBinary(hiCmp->AsOp()); |
| 2497 | |
| 2498 | // |
| 2499 | // Try to move the first SUB_HI operands right in front of it, this allows using |
| 2500 | // a single temporary register instead of 2 (one for CMP and one for SUB_HI). Do |
| 2501 | // this only for locals as they won't change condition flags. Note that we could |
| 2502 | // move constants (except 0 which generates XOR reg, reg) but it's extremly rare |
| 2503 | // to have a constant as the first operand. |
| 2504 | // |
| 2505 | |
| 2506 | if (hiSrc1->OperIs(GT_LCL_VAR, GT_LCL_FLD)) |
| 2507 | { |
| 2508 | BlockRange().Remove(hiSrc1); |
| 2509 | BlockRange().InsertBefore(hiCmp, hiSrc1); |
| 2510 | } |
| 2511 | } |
| 2512 | } |
| 2513 | |
| 2514 | hiCmp->gtFlags |= GTF_SET_FLAGS; |
| 2515 | if (hiCmp->IsValue()) |
| 2516 | { |
| 2517 | hiCmp->SetUnusedValue(); |
| 2518 | } |
| 2519 | |
| 2520 | LIR::Use cmpUse; |
| 2521 | if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE)) |
| 2522 | { |
| 2523 | BlockRange().Remove(cmp); |
| 2524 | |
| 2525 | GenTree* jcc = cmpUse.User(); |
| 2526 | jcc->gtOp.gtOp1 = nullptr; |
| 2527 | jcc->ChangeOper(GT_JCC); |
| 2528 | jcc->gtFlags |= (cmp->gtFlags & GTF_UNSIGNED) | GTF_USE_FLAGS; |
| 2529 | jcc->AsCC()->gtCondition = condition; |
| 2530 | } |
| 2531 | else |
| 2532 | { |
| 2533 | cmp->gtOp.gtOp1 = nullptr; |
| 2534 | cmp->gtOp.gtOp2 = nullptr; |
| 2535 | cmp->ChangeOper(GT_SETCC); |
| 2536 | cmp->gtFlags |= GTF_USE_FLAGS; |
| 2537 | cmp->AsCC()->gtCondition = condition; |
| 2538 | } |
| 2539 | |
| 2540 | return cmp->gtNext; |
| 2541 | } |
| 2542 | #endif // !_TARGET_64BIT_ |
| 2543 | |
| 2544 | //------------------------------------------------------------------------ |
| 2545 | // Lowering::OptimizeConstCompare: Performs various "compare with const" optimizations. |
| 2546 | // |
| 2547 | // Arguments: |
| 2548 | // cmp - the compare node |
| 2549 | // |
| 2550 | // Return Value: |
| 2551 | // The original compare node if lowering should proceed as usual or the next node |
| 2552 | // to lower if the compare node was changed in such a way that lowering is no |
| 2553 | // longer needed. |
| 2554 | // |
| 2555 | // Notes: |
| 2556 | // - Narrow operands to enable memory operand containment (XARCH specific). |
| 2557 | // - Transform cmp(and(x, y), 0) into test(x, y) (XARCH/Arm64 specific but could |
| 2558 | // be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added). |
| 2559 | // - Transform TEST(x, LSH(1, y)) into BT(x, y) (XARCH specific) |
| 2560 | // - Transform RELOP(OP, 0) into SETCC(OP) or JCC(OP) if OP can set the |
| 2561 | // condition flags appropriately (XARCH/ARM64 specific but could be extended |
| 2562 | // to ARM32 as well if ARM32 codegen supports GTF_SET_FLAGS). |
| 2563 | // |
| 2564 | GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) |
| 2565 | { |
| 2566 | assert(cmp->gtGetOp2()->IsIntegralConst()); |
| 2567 | |
| 2568 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
| 2569 | GenTree* op1 = cmp->gtGetOp1(); |
| 2570 | var_types op1Type = op1->TypeGet(); |
| 2571 | GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); |
| 2572 | ssize_t op2Value = op2->IconValue(); |
| 2573 | |
| 2574 | #ifdef _TARGET_XARCH_ |
| 2575 | if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && genSmallTypeCanRepresentValue(op1Type, op2Value)) |
| 2576 | { |
| 2577 | // |
| 2578 | // If op1's type is small then try to narrow op2 so it has the same type as op1. |
| 2579 | // Small types are usually used by memory loads and if both compare operands have |
| 2580 | // the same type then the memory load can be contained. In certain situations |
| 2581 | // (e.g "cmp ubyte, 200") we also get a smaller instruction encoding. |
| 2582 | // |
| 2583 | |
| 2584 | op2->gtType = op1Type; |
| 2585 | } |
| 2586 | else |
| 2587 | #endif |
| 2588 | if (op1->OperIs(GT_CAST) && !op1->gtOverflow()) |
| 2589 | { |
| 2590 | GenTreeCast* cast = op1->AsCast(); |
| 2591 | var_types castToType = cast->CastToType(); |
| 2592 | GenTree* castOp = cast->gtGetOp1(); |
| 2593 | |
| 2594 | if (((castToType == TYP_BOOL) || (castToType == TYP_UBYTE)) && FitsIn<UINT8>(op2Value)) |
| 2595 | { |
| 2596 | // |
| 2597 | // Since we're going to remove the cast we need to be able to narrow the cast operand |
| 2598 | // to the cast type. This can be done safely only for certain opers (e.g AND, OR, XOR). |
| 2599 | // Some opers just can't be narrowed (e.g DIV, MUL) while other could be narrowed but |
| 2600 | // doing so would produce incorrect results (e.g. RSZ, RSH). |
| 2601 | // |
| 2602 | // The below list of handled opers is conservative but enough to handle the most common |
| 2603 | // situations. In particular this include CALL, sometimes the JIT unnecessarilly widens |
| 2604 | // the result of bool returning calls. |
| 2605 | // |
| 2606 | bool removeCast = |
| 2607 | #ifdef _TARGET_ARM64_ |
| 2608 | (op2Value == 0) && cmp->OperIs(GT_EQ, GT_NE, GT_GT) && |
| 2609 | #endif |
| 2610 | (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() |
| 2611 | #ifdef _TARGET_XARCH_ |
| 2612 | || IsContainableMemoryOp(castOp) |
| 2613 | #endif |
| 2614 | ); |
| 2615 | |
| 2616 | if (removeCast) |
| 2617 | { |
| 2618 | assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation |
| 2619 | |
| 2620 | #ifdef _TARGET_ARM64_ |
| 2621 | bool cmpEq = cmp->OperIs(GT_EQ); |
| 2622 | |
| 2623 | cmp->SetOperRaw(cmpEq ? GT_TEST_EQ : GT_TEST_NE); |
| 2624 | op2->SetIconValue(0xff); |
| 2625 | op2->gtType = castOp->gtType; |
| 2626 | #else |
| 2627 | castOp->gtType = castToType; |
| 2628 | op2->gtType = castToType; |
| 2629 | #endif |
| 2630 | // If we have any contained memory ops on castOp, they must now not be contained. |
| 2631 | if (castOp->OperIsLogical()) |
| 2632 | { |
| 2633 | GenTree* op1 = castOp->gtGetOp1(); |
| 2634 | if ((op1 != nullptr) && !op1->IsCnsIntOrI()) |
| 2635 | { |
| 2636 | op1->ClearContained(); |
| 2637 | } |
| 2638 | GenTree* op2 = castOp->gtGetOp2(); |
| 2639 | if ((op2 != nullptr) && !op2->IsCnsIntOrI()) |
| 2640 | { |
| 2641 | op2->ClearContained(); |
| 2642 | } |
| 2643 | } |
| 2644 | cmp->gtOp.gtOp1 = castOp; |
| 2645 | |
| 2646 | BlockRange().Remove(cast); |
| 2647 | } |
| 2648 | } |
| 2649 | } |
| 2650 | else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE)) |
| 2651 | { |
| 2652 | // |
| 2653 | // Transform ((x AND y) EQ|NE 0) into (x TEST_EQ|TEST_NE y) when possible. |
| 2654 | // |
| 2655 | |
| 2656 | GenTree* andOp1 = op1->gtGetOp1(); |
| 2657 | GenTree* andOp2 = op1->gtGetOp2(); |
| 2658 | |
| 2659 | if (op2Value != 0) |
| 2660 | { |
| 2661 | // |
| 2662 | // If we don't have a 0 compare we can get one by transforming ((x AND mask) EQ|NE mask) |
| 2663 | // into ((x AND mask) NE|EQ 0) when mask is a single bit. |
| 2664 | // |
| 2665 | |
| 2666 | if (isPow2(static_cast<size_t>(op2Value)) && andOp2->IsIntegralConst(op2Value)) |
| 2667 | { |
| 2668 | op2Value = 0; |
| 2669 | op2->SetIconValue(0); |
| 2670 | cmp->SetOperRaw(GenTree::ReverseRelop(cmp->OperGet())); |
| 2671 | } |
| 2672 | } |
| 2673 | |
| 2674 | if (op2Value == 0) |
| 2675 | { |
| 2676 | BlockRange().Remove(op1); |
| 2677 | BlockRange().Remove(op2); |
| 2678 | |
| 2679 | cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE); |
| 2680 | cmp->gtOp.gtOp1 = andOp1; |
| 2681 | cmp->gtOp.gtOp2 = andOp2; |
| 2682 | // We will re-evaluate containment below |
| 2683 | andOp1->ClearContained(); |
| 2684 | andOp2->ClearContained(); |
| 2685 | |
| 2686 | #ifdef _TARGET_XARCH_ |
| 2687 | if (IsContainableMemoryOp(andOp1) && andOp2->IsIntegralConst()) |
| 2688 | { |
| 2689 | // |
| 2690 | // For "test" we only care about the bits that are set in the second operand (mask). |
| 2691 | // If the mask fits in a small type then we can narrow both operands to generate a "test" |
| 2692 | // instruction with a smaller encoding ("test" does not have a r/m32, imm8 form) and avoid |
| 2693 | // a widening load in some cases. |
| 2694 | // |
| 2695 | // For 16 bit operands we narrow only if the memory operand is already 16 bit. This matches |
| 2696 | // the behavior of a previous implementation and avoids adding more cases where we generate |
| 2697 | // 16 bit instructions that require a length changing prefix (0x66). These suffer from |
| 2698 | // significant decoder stalls on Intel CPUs. |
| 2699 | // |
| 2700 | // We could also do this for 64 bit masks that fit into 32 bit but it doesn't help. |
| 2701 | // In such cases morph narrows down the existing GT_AND by inserting a cast between it and |
| 2702 | // the memory operand so we'd need to add more code to recognize and eliminate that cast. |
| 2703 | // |
| 2704 | |
| 2705 | size_t mask = static_cast<size_t>(andOp2->AsIntCon()->IconValue()); |
| 2706 | |
| 2707 | if (FitsIn<UINT8>(mask)) |
| 2708 | { |
| 2709 | andOp1->gtType = TYP_UBYTE; |
| 2710 | andOp2->gtType = TYP_UBYTE; |
| 2711 | } |
| 2712 | else if (FitsIn<UINT16>(mask) && genTypeSize(andOp1) == 2) |
| 2713 | { |
| 2714 | andOp1->gtType = TYP_USHORT; |
| 2715 | andOp2->gtType = TYP_USHORT; |
| 2716 | } |
| 2717 | } |
| 2718 | #endif |
| 2719 | } |
| 2720 | } |
| 2721 | |
| 2722 | if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) |
| 2723 | { |
| 2724 | #ifdef _TARGET_XARCH_ |
| 2725 | // |
| 2726 | // Transform TEST_EQ|NE(x, LSH(1, y)) into BT(x, y) when possible. Using BT |
| 2727 | // results in smaller and faster code. It also doesn't have special register |
| 2728 | // requirements, unlike LSH that requires the shift count to be in ECX. |
| 2729 | // Note that BT has the same behavior as LSH when the bit index exceeds the |
| 2730 | // operand bit size - it uses (bit_index MOD bit_size). |
| 2731 | // |
| 2732 | |
| 2733 | GenTree* lsh = cmp->gtGetOp2(); |
| 2734 | LIR::Use cmpUse; |
| 2735 | |
| 2736 | if (lsh->OperIs(GT_LSH) && varTypeIsIntOrI(lsh->TypeGet()) && lsh->gtGetOp1()->IsIntegralConst(1) && |
| 2737 | BlockRange().TryGetUse(cmp, &cmpUse)) |
| 2738 | { |
| 2739 | genTreeOps condition = cmp->OperIs(GT_TEST_NE) ? GT_LT : GT_GE; |
| 2740 | |
| 2741 | cmp->SetOper(GT_BT); |
| 2742 | cmp->gtType = TYP_VOID; |
| 2743 | cmp->gtFlags |= GTF_SET_FLAGS; |
| 2744 | cmp->gtOp.gtOp2 = lsh->gtGetOp2(); |
| 2745 | cmp->gtGetOp2()->ClearContained(); |
| 2746 | |
| 2747 | BlockRange().Remove(lsh->gtGetOp1()); |
| 2748 | BlockRange().Remove(lsh); |
| 2749 | |
| 2750 | GenTreeCC* cc; |
| 2751 | |
| 2752 | if (cmpUse.User()->OperIs(GT_JTRUE)) |
| 2753 | { |
| 2754 | cmpUse.User()->ChangeOper(GT_JCC); |
| 2755 | cc = cmpUse.User()->AsCC(); |
| 2756 | cc->gtCondition = condition; |
| 2757 | } |
| 2758 | else |
| 2759 | { |
| 2760 | cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT); |
| 2761 | BlockRange().InsertAfter(cmp, cc); |
| 2762 | cmpUse.ReplaceWith(comp, cc); |
| 2763 | } |
| 2764 | |
| 2765 | cc->gtFlags |= GTF_USE_FLAGS | GTF_UNSIGNED; |
| 2766 | |
| 2767 | return cmp->gtNext; |
| 2768 | } |
| 2769 | #endif // _TARGET_XARCH_ |
| 2770 | } |
| 2771 | else if (cmp->OperIs(GT_EQ, GT_NE)) |
| 2772 | { |
| 2773 | GenTree* op1 = cmp->gtGetOp1(); |
| 2774 | GenTree* op2 = cmp->gtGetOp2(); |
| 2775 | |
| 2776 | // TODO-CQ: right now the below peep is inexpensive and gets the benefit in most |
| 2777 | // cases because in majority of cases op1, op2 and cmp would be in that order in |
| 2778 | // execution. In general we should be able to check that all the nodes that come |
| 2779 | // after op1 do not modify the flags so that it is safe to avoid generating a |
| 2780 | // test instruction. |
| 2781 | |
| 2782 | if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && |
| 2783 | #ifdef _TARGET_XARCH_ |
| 2784 | op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)) |
| 2785 | #else // _TARGET_ARM64_ |
| 2786 | op1->OperIs(GT_AND, GT_ADD, GT_SUB)) |
| 2787 | #endif |
| 2788 | { |
| 2789 | op1->gtFlags |= GTF_SET_FLAGS; |
| 2790 | op1->SetUnusedValue(); |
| 2791 | |
| 2792 | BlockRange().Remove(op2); |
| 2793 | |
| 2794 | GenTree* next = cmp->gtNext; |
| 2795 | GenTree* cc; |
| 2796 | genTreeOps ccOp; |
| 2797 | LIR::Use cmpUse; |
| 2798 | |
| 2799 | // Fast check for the common case - relop used by a JTRUE that immediately follows it. |
| 2800 | if ((next != nullptr) && next->OperIs(GT_JTRUE) && (next->gtGetOp1() == cmp)) |
| 2801 | { |
| 2802 | cc = next; |
| 2803 | ccOp = GT_JCC; |
| 2804 | next = nullptr; |
| 2805 | BlockRange().Remove(cmp); |
| 2806 | } |
| 2807 | else if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE)) |
| 2808 | { |
| 2809 | cc = cmpUse.User(); |
| 2810 | ccOp = GT_JCC; |
| 2811 | next = nullptr; |
| 2812 | BlockRange().Remove(cmp); |
| 2813 | } |
| 2814 | else // The relop is not used by a JTRUE or it is not used at all. |
| 2815 | { |
| 2816 | // Transform the relop node it into a SETCC. If it's not used we could remove |
| 2817 | // it completely but that means doing more work to handle a rare case. |
| 2818 | cc = cmp; |
| 2819 | ccOp = GT_SETCC; |
| 2820 | } |
| 2821 | |
| 2822 | genTreeOps condition = cmp->OperGet(); |
| 2823 | cc->ChangeOper(ccOp); |
| 2824 | cc->AsCC()->gtCondition = condition; |
| 2825 | cc->gtFlags |= GTF_USE_FLAGS | (cmp->gtFlags & GTF_UNSIGNED); |
| 2826 | |
| 2827 | return next; |
| 2828 | } |
| 2829 | } |
| 2830 | #endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
| 2831 | |
| 2832 | return cmp; |
| 2833 | } |
| 2834 | |
| 2835 | //------------------------------------------------------------------------ |
| 2836 | // Lowering::LowerCompare: Lowers a compare node. |
| 2837 | // |
| 2838 | // Arguments: |
| 2839 | // cmp - the compare node |
| 2840 | // |
| 2841 | // Return Value: |
| 2842 | // The next node to lower. |
| 2843 | // |
| 2844 | GenTree* Lowering::LowerCompare(GenTree* cmp) |
| 2845 | { |
| 2846 | #ifndef _TARGET_64BIT_ |
| 2847 | if (cmp->gtGetOp1()->TypeGet() == TYP_LONG) |
| 2848 | { |
| 2849 | return DecomposeLongCompare(cmp); |
| 2850 | } |
| 2851 | #endif |
| 2852 | |
| 2853 | if (cmp->gtGetOp2()->IsIntegralConst() && !comp->opts.MinOpts()) |
| 2854 | { |
| 2855 | GenTree* next = OptimizeConstCompare(cmp); |
| 2856 | |
| 2857 | // If OptimizeConstCompare return the compare node as "next" then we need to continue lowering. |
| 2858 | if (next != cmp) |
| 2859 | { |
| 2860 | return next; |
| 2861 | } |
| 2862 | } |
| 2863 | |
| 2864 | #ifdef _TARGET_XARCH_ |
| 2865 | if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet()) |
| 2866 | { |
| 2867 | if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet())) |
| 2868 | { |
| 2869 | // |
| 2870 | // If both operands have the same type then codegen will use the common operand type to |
| 2871 | // determine the instruction type. For small types this would result in performing a |
| 2872 | // signed comparison of two small unsigned values without zero extending them to TYP_INT |
| 2873 | // which is incorrect. Note that making the comparison unsigned doesn't imply that codegen |
| 2874 | // has to generate a small comparison, it can still correctly generate a TYP_INT comparison. |
| 2875 | // |
| 2876 | |
| 2877 | cmp->gtFlags |= GTF_UNSIGNED; |
| 2878 | } |
| 2879 | } |
| 2880 | #endif // _TARGET_XARCH_ |
| 2881 | ContainCheckCompare(cmp->AsOp()); |
| 2882 | return cmp->gtNext; |
| 2883 | } |
| 2884 | |
| 2885 | //------------------------------------------------------------------------ |
| 2886 | // Lowering::LowerJTrue: Lowers a JTRUE node. |
| 2887 | // |
| 2888 | // Arguments: |
| 2889 | // jtrue - the JTRUE node |
| 2890 | // |
| 2891 | // Return Value: |
| 2892 | // The next node to lower (usually nullptr). |
| 2893 | // |
| 2894 | // Notes: |
| 2895 | // On ARM64 this may remove the JTRUE node and transform its associated |
| 2896 | // relop into a JCMP node. |
| 2897 | // |
| 2898 | GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) |
| 2899 | { |
| 2900 | #ifdef _TARGET_ARM64_ |
| 2901 | GenTree* relop = jtrue->gtGetOp1(); |
| 2902 | GenTree* relopOp2 = relop->gtOp.gtGetOp2(); |
| 2903 | |
| 2904 | if ((relop->gtNext == jtrue) && relopOp2->IsCnsIntOrI()) |
| 2905 | { |
| 2906 | bool useJCMP = false; |
| 2907 | unsigned flags = 0; |
| 2908 | |
| 2909 | if (relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0)) |
| 2910 | { |
| 2911 | // Codegen will use cbz or cbnz in codegen which do not affect the flag register |
| 2912 | flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : 0; |
| 2913 | useJCMP = true; |
| 2914 | } |
| 2915 | else if (relop->OperIs(GT_TEST_EQ, GT_TEST_NE) && isPow2(relopOp2->AsIntCon()->IconValue())) |
| 2916 | { |
| 2917 | // Codegen will use tbz or tbnz in codegen which do not affect the flag register |
| 2918 | flags = GTF_JCMP_TST | (relop->OperIs(GT_TEST_EQ) ? GTF_JCMP_EQ : 0); |
| 2919 | useJCMP = true; |
| 2920 | } |
| 2921 | |
| 2922 | if (useJCMP) |
| 2923 | { |
| 2924 | relop->SetOper(GT_JCMP); |
| 2925 | relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ); |
| 2926 | relop->gtFlags |= flags; |
| 2927 | relop->gtType = TYP_VOID; |
| 2928 | |
| 2929 | relopOp2->SetContained(); |
| 2930 | |
| 2931 | BlockRange().Remove(jtrue); |
| 2932 | |
| 2933 | assert(relop->gtNext == nullptr); |
| 2934 | return nullptr; |
| 2935 | } |
| 2936 | } |
| 2937 | #endif // _TARGET_ARM64_ |
| 2938 | |
| 2939 | ContainCheckJTrue(jtrue); |
| 2940 | |
| 2941 | assert(jtrue->gtNext == nullptr); |
| 2942 | return nullptr; |
| 2943 | } |
| 2944 | |
| 2945 | // Lower "jmp <method>" tail call to insert PInvoke method epilog if required. |
| 2946 | void Lowering::LowerJmpMethod(GenTree* jmp) |
| 2947 | { |
| 2948 | assert(jmp->OperGet() == GT_JMP); |
| 2949 | |
| 2950 | JITDUMP("lowering GT_JMP\n" ); |
| 2951 | DISPNODE(jmp); |
| 2952 | JITDUMP("============" ); |
| 2953 | |
| 2954 | // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that |
| 2955 | // a method returns. |
| 2956 | if (comp->info.compCallUnmanaged) |
| 2957 | { |
| 2958 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp)); |
| 2959 | } |
| 2960 | } |
| 2961 | |
| 2962 | // Lower GT_RETURN node to insert PInvoke method epilog if required. |
| 2963 | void Lowering::LowerRet(GenTree* ret) |
| 2964 | { |
| 2965 | assert(ret->OperGet() == GT_RETURN); |
| 2966 | |
| 2967 | JITDUMP("lowering GT_RETURN\n" ); |
| 2968 | DISPNODE(ret); |
| 2969 | JITDUMP("============" ); |
| 2970 | |
| 2971 | #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD) |
| 2972 | GenTreeUnOp* const unOp = ret->AsUnOp(); |
| 2973 | if ((unOp->TypeGet() == TYP_LONG) && (unOp->gtOp1->TypeGet() == TYP_SIMD8)) |
| 2974 | { |
| 2975 | GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, unOp->gtOp1, nullptr); |
| 2976 | unOp->gtOp1 = bitcast; |
| 2977 | BlockRange().InsertBefore(unOp, bitcast); |
| 2978 | } |
| 2979 | #endif // _TARGET_AMD64_ |
| 2980 | |
| 2981 | // Method doing PInvokes has exactly one return block unless it has tail calls. |
| 2982 | if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB)) |
| 2983 | { |
| 2984 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret)); |
| 2985 | } |
| 2986 | ContainCheckRet(ret->AsOp()); |
| 2987 | } |
| 2988 | |
| 2989 | GenTree* Lowering::LowerDirectCall(GenTreeCall* call) |
| 2990 | { |
| 2991 | noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER); |
| 2992 | |
| 2993 | // Don't support tail calling helper methods. |
| 2994 | // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper. |
| 2995 | noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC); |
| 2996 | |
| 2997 | // Non-virtual direct/indirect calls: Work out if the address of the |
| 2998 | // call is known at JIT time. If not it is either an indirect call |
| 2999 | // or the address must be accessed via an single/double indirection. |
| 3000 | |
| 3001 | void* addr; |
| 3002 | InfoAccessType accessType; |
| 3003 | CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd); |
| 3004 | |
| 3005 | #ifdef FEATURE_READYTORUN_COMPILER |
| 3006 | if (call->gtEntryPoint.addr != nullptr) |
| 3007 | { |
| 3008 | accessType = call->gtEntryPoint.accessType; |
| 3009 | addr = call->gtEntryPoint.addr; |
| 3010 | } |
| 3011 | else |
| 3012 | #endif |
| 3013 | if (call->gtCallType == CT_HELPER) |
| 3014 | { |
| 3015 | noway_assert(helperNum != CORINFO_HELP_UNDEF); |
| 3016 | |
| 3017 | // the convention on getHelperFtn seems to be (it's not documented) |
| 3018 | // that it returns an address or if it returns null, pAddr is set to |
| 3019 | // another address, which requires an indirection |
| 3020 | void* pAddr; |
| 3021 | addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr); |
| 3022 | |
| 3023 | if (addr != nullptr) |
| 3024 | { |
| 3025 | assert(pAddr == nullptr); |
| 3026 | accessType = IAT_VALUE; |
| 3027 | } |
| 3028 | else |
| 3029 | { |
| 3030 | accessType = IAT_PVALUE; |
| 3031 | addr = pAddr; |
| 3032 | } |
| 3033 | } |
| 3034 | else |
| 3035 | { |
| 3036 | noway_assert(helperNum == CORINFO_HELP_UNDEF); |
| 3037 | |
| 3038 | CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; |
| 3039 | |
| 3040 | if (call->IsSameThis()) |
| 3041 | { |
| 3042 | aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); |
| 3043 | } |
| 3044 | |
| 3045 | if (!call->NeedsNullCheck()) |
| 3046 | { |
| 3047 | aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); |
| 3048 | } |
| 3049 | |
| 3050 | CORINFO_CONST_LOOKUP addrInfo; |
| 3051 | comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags); |
| 3052 | |
| 3053 | accessType = addrInfo.accessType; |
| 3054 | addr = addrInfo.addr; |
| 3055 | } |
| 3056 | |
| 3057 | GenTree* result = nullptr; |
| 3058 | switch (accessType) |
| 3059 | { |
| 3060 | case IAT_VALUE: |
| 3061 | // Non-virtual direct call to known address |
| 3062 | if (!IsCallTargetInRange(addr) || call->IsTailCall()) |
| 3063 | { |
| 3064 | result = AddrGen(addr); |
| 3065 | } |
| 3066 | else |
| 3067 | { |
| 3068 | // a direct call within range of hardware relative call instruction |
| 3069 | // stash the address for codegen |
| 3070 | call->gtDirectCallAddress = addr; |
| 3071 | } |
| 3072 | break; |
| 3073 | |
| 3074 | case IAT_PVALUE: |
| 3075 | { |
| 3076 | // Non-virtual direct calls to addresses accessed by |
| 3077 | // a single indirection. |
| 3078 | GenTree* cellAddr = AddrGen(addr); |
| 3079 | GenTree* indir = Ind(cellAddr); |
| 3080 | result = indir; |
| 3081 | break; |
| 3082 | } |
| 3083 | |
| 3084 | case IAT_PPVALUE: |
| 3085 | // Non-virtual direct calls to addresses accessed by |
| 3086 | // a double indirection. |
| 3087 | // |
| 3088 | // Double-indirection. Load the address into a register |
| 3089 | // and call indirectly through the register |
| 3090 | noway_assert(helperNum == CORINFO_HELP_UNDEF); |
| 3091 | result = AddrGen(addr); |
| 3092 | result = Ind(Ind(result)); |
| 3093 | break; |
| 3094 | |
| 3095 | case IAT_RELPVALUE: |
| 3096 | { |
| 3097 | // Non-virtual direct calls to addresses accessed by |
| 3098 | // a single relative indirection. |
| 3099 | GenTree* cellAddr = AddrGen(addr); |
| 3100 | GenTree* indir = Ind(cellAddr); |
| 3101 | result = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, indir, AddrGen(addr)); |
| 3102 | break; |
| 3103 | } |
| 3104 | |
| 3105 | default: |
| 3106 | noway_assert(!"Bad accessType" ); |
| 3107 | break; |
| 3108 | } |
| 3109 | |
| 3110 | return result; |
| 3111 | } |
| 3112 | |
| 3113 | GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call) |
| 3114 | { |
| 3115 | noway_assert(call->gtCallType == CT_USER_FUNC); |
| 3116 | |
| 3117 | assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) & |
| 3118 | (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)); |
| 3119 | |
| 3120 | GenTree* thisArgNode; |
| 3121 | if (call->IsTailCallViaHelper()) |
| 3122 | { |
| 3123 | #ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args. |
| 3124 | const unsigned argNum = 0; |
| 3125 | #else // !_TARGET_X86_ |
| 3126 | // In case of helper dispatched tail calls, "thisptr" will be the third arg. |
| 3127 | // The first two args are: real call target and addr of args copy routine. |
| 3128 | const unsigned argNum = 2; |
| 3129 | #endif // !_TARGET_X86_ |
| 3130 | |
| 3131 | fgArgTabEntry* thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum); |
| 3132 | thisArgNode = thisArgTabEntry->node; |
| 3133 | } |
| 3134 | else |
| 3135 | { |
| 3136 | thisArgNode = comp->gtGetThisArg(call); |
| 3137 | } |
| 3138 | |
| 3139 | assert(thisArgNode->gtOper == GT_PUTARG_REG); |
| 3140 | GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1; |
| 3141 | GenTree* thisExpr = originalThisExpr; |
| 3142 | |
| 3143 | // We're going to use the 'this' expression multiple times, so make a local to copy it. |
| 3144 | |
| 3145 | unsigned lclNum; |
| 3146 | |
| 3147 | #ifdef _TARGET_X86_ |
| 3148 | if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal()) |
| 3149 | { |
| 3150 | // For ordering purposes for the special tailcall arguments on x86, we forced the |
| 3151 | // 'this' pointer in this case to a local in Compiler::fgMorphTailCall(). |
| 3152 | // We could possibly use this case to remove copies for all architectures and non-tailcall |
| 3153 | // calls by creating a new lcl var or lcl field reference, as is done in the |
| 3154 | // LowerVirtualVtableCall() code. |
| 3155 | assert(originalThisExpr->OperGet() == GT_LCL_VAR); |
| 3156 | lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum(); |
| 3157 | } |
| 3158 | else |
| 3159 | #endif // _TARGET_X86_ |
| 3160 | { |
| 3161 | unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call" )); |
| 3162 | |
| 3163 | LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode); |
| 3164 | ReplaceWithLclVar(thisExprUse, delegateInvokeTmp); |
| 3165 | |
| 3166 | thisExpr = thisExprUse.Def(); // it's changed; reload it. |
| 3167 | lclNum = delegateInvokeTmp; |
| 3168 | } |
| 3169 | |
| 3170 | // replace original expression feeding into thisPtr with |
| 3171 | // [originalThis + offsetOfDelegateInstance] |
| 3172 | |
| 3173 | GenTree* newThisAddr = new (comp, GT_LEA) |
| 3174 | GenTreeAddrMode(TYP_BYREF, thisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance); |
| 3175 | |
| 3176 | GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr); |
| 3177 | |
| 3178 | BlockRange().InsertAfter(thisExpr, newThisAddr, newThis); |
| 3179 | |
| 3180 | thisArgNode->gtOp.gtOp1 = newThis; |
| 3181 | ContainCheckIndir(newThis->AsIndir()); |
| 3182 | |
| 3183 | // the control target is |
| 3184 | // [originalThis + firstTgtOffs] |
| 3185 | |
| 3186 | GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET); |
| 3187 | |
| 3188 | unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget; |
| 3189 | GenTree* result = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs); |
| 3190 | GenTree* callTarget = Ind(result); |
| 3191 | |
| 3192 | // don't need to sequence and insert this tree, caller will do it |
| 3193 | |
| 3194 | return callTarget; |
| 3195 | } |
| 3196 | |
| 3197 | GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call) |
| 3198 | { |
| 3199 | #ifdef _TARGET_X86_ |
| 3200 | if (call->gtCallCookie != nullptr) |
| 3201 | { |
| 3202 | NYI_X86("Morphing indirect non-virtual call with non-standard args" ); |
| 3203 | } |
| 3204 | #endif |
| 3205 | |
| 3206 | // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args. |
| 3207 | // Hence we should never see this type of call in lower. |
| 3208 | |
| 3209 | noway_assert(call->gtCallCookie == nullptr); |
| 3210 | |
| 3211 | return nullptr; |
| 3212 | } |
| 3213 | |
| 3214 | //------------------------------------------------------------------------ |
| 3215 | // CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke |
| 3216 | // epilogs to invoke a GC under a condition. The return trap checks some global |
| 3217 | // location (the runtime tells us where that is and how many indirections to make), |
| 3218 | // then, based on the result, conditionally calls a GC helper. We use a special node |
| 3219 | // for this because at this time (late in the compilation phases), introducing flow |
| 3220 | // is tedious/difficult. |
| 3221 | // |
| 3222 | // This is used for PInvoke inlining. |
| 3223 | // |
| 3224 | // Return Value: |
| 3225 | // Code tree to perform the action. |
| 3226 | // |
| 3227 | GenTree* Lowering::CreateReturnTrapSeq() |
| 3228 | { |
| 3229 | // The GT_RETURNTRAP node expands to this: |
| 3230 | // if (g_TrapReturningThreads) |
| 3231 | // { |
| 3232 | // RareDisablePreemptiveGC(); |
| 3233 | // } |
| 3234 | |
| 3235 | // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'. |
| 3236 | |
| 3237 | void* pAddrOfCaptureThreadGlobal = nullptr; |
| 3238 | LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal); |
| 3239 | |
| 3240 | GenTree* testTree; |
| 3241 | if (addrOfCaptureThreadGlobal != nullptr) |
| 3242 | { |
| 3243 | testTree = Ind(AddrGen(addrOfCaptureThreadGlobal)); |
| 3244 | } |
| 3245 | else |
| 3246 | { |
| 3247 | testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal))); |
| 3248 | } |
| 3249 | return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree); |
| 3250 | } |
| 3251 | |
| 3252 | //------------------------------------------------------------------------ |
| 3253 | // SetGCState: Create a tree that stores the given constant (0 or 1) into the |
| 3254 | // thread's GC state field. |
| 3255 | // |
| 3256 | // This is used for PInvoke inlining. |
| 3257 | // |
| 3258 | // Arguments: |
| 3259 | // state - constant (0 or 1) to store into the thread's GC state field. |
| 3260 | // |
| 3261 | // Return Value: |
| 3262 | // Code tree to perform the action. |
| 3263 | // |
| 3264 | GenTree* Lowering::SetGCState(int state) |
| 3265 | { |
| 3266 | // Thread.offsetOfGcState = 0/1 |
| 3267 | |
| 3268 | assert(state == 0 || state == 1); |
| 3269 | |
| 3270 | const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo(); |
| 3271 | |
| 3272 | GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1); |
| 3273 | |
| 3274 | GenTree* stateNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state); |
| 3275 | GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState); |
| 3276 | GenTree* storeGcState = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_BYTE, addr, stateNode); |
| 3277 | return storeGcState; |
| 3278 | } |
| 3279 | |
| 3280 | //------------------------------------------------------------------------ |
| 3281 | // CreateFrameLinkUpdate: Create a tree that either links or unlinks the |
| 3282 | // locally-allocated InlinedCallFrame from the Frame list. |
| 3283 | // |
| 3284 | // This is used for PInvoke inlining. |
| 3285 | // |
| 3286 | // Arguments: |
| 3287 | // action - whether to link (push) or unlink (pop) the Frame |
| 3288 | // |
| 3289 | // Return Value: |
| 3290 | // Code tree to perform the action. |
| 3291 | // |
| 3292 | GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action) |
| 3293 | { |
| 3294 | const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo(); |
| 3295 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo; |
| 3296 | |
| 3297 | GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot, |
| 3298 | (IL_OFFSET)-1); // cast to resolve ambiguity. |
| 3299 | |
| 3300 | // Thread->m_pFrame |
| 3301 | GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame); |
| 3302 | |
| 3303 | GenTree* data = nullptr; |
| 3304 | |
| 3305 | if (action == PushFrame) |
| 3306 | { |
| 3307 | // Thread->m_pFrame = &inlinedCallFrame; |
| 3308 | data = new (comp, GT_LCL_FLD_ADDR) |
| 3309 | GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr); |
| 3310 | } |
| 3311 | else |
| 3312 | { |
| 3313 | assert(action == PopFrame); |
| 3314 | // Thread->m_pFrame = inlinedCallFrame.m_pNext; |
| 3315 | |
| 3316 | data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, |
| 3317 | pInfo->inlinedCallFrameInfo.offsetOfFrameLink); |
| 3318 | } |
| 3319 | GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data); |
| 3320 | return storeInd; |
| 3321 | } |
| 3322 | |
| 3323 | //------------------------------------------------------------------------ |
| 3324 | // InsertPInvokeMethodProlog: Create the code that runs at the start of |
| 3325 | // every method that has PInvoke calls. |
| 3326 | // |
| 3327 | // Initialize the TCB local and the InlinedCallFrame object. Then link ("push") |
| 3328 | // the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame |
| 3329 | // is defined in vm/frames.h. See also vm/jitinterface.cpp for more information. |
| 3330 | // The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo(). |
| 3331 | // |
| 3332 | // The (current) layout is as follows: |
| 3333 | // |
| 3334 | // 64-bit 32-bit CORINFO_EE_INFO |
| 3335 | // offset offset field name offset when set |
| 3336 | // ----------------------------------------------------------------------------------------- |
| 3337 | // +00h +00h GS cookie offsetOfGSCookie |
| 3338 | // +08h +04h vptr for class InlinedCallFrame offsetOfFrameVptr method prolog |
| 3339 | // +10h +08h m_Next offsetOfFrameLink method prolog |
| 3340 | // +18h +0Ch m_Datum offsetOfCallTarget call site |
| 3341 | // +20h n/a m_StubSecretArg not set by JIT |
| 3342 | // +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method |
| 3343 | // prolog; |
| 3344 | // non-x86: method prolog (SP remains |
| 3345 | // constant in function, after prolog: no |
| 3346 | // localloc and PInvoke in same function) |
| 3347 | // +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site |
| 3348 | // +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT |
| 3349 | // +1Ch JIT retval spill area (int) before call_gc ??? |
| 3350 | // +20h JIT retval spill area (long) before call_gc ??? |
| 3351 | // +24h Saved value of EBP method prolog ??? |
| 3352 | // |
| 3353 | // Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points |
| 3354 | // to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before* |
| 3355 | // the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location, |
| 3356 | // and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie. |
| 3357 | // |
| 3358 | // Return Value: |
| 3359 | // none |
| 3360 | // |
| 3361 | void Lowering::InsertPInvokeMethodProlog() |
| 3362 | { |
| 3363 | noway_assert(comp->info.compCallUnmanaged); |
| 3364 | noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); |
| 3365 | |
| 3366 | if (comp->opts.ShouldUsePInvokeHelpers()) |
| 3367 | { |
| 3368 | return; |
| 3369 | } |
| 3370 | |
| 3371 | JITDUMP("======= Inserting PInvoke method prolog\n" ); |
| 3372 | |
| 3373 | // The first BB must be a scratch BB in order for us to be able to safely insert the P/Invoke prolog. |
| 3374 | assert(comp->fgFirstBBisScratch()); |
| 3375 | |
| 3376 | LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB); |
| 3377 | |
| 3378 | const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo(); |
| 3379 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo; |
| 3380 | |
| 3381 | // First arg: &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr |
| 3382 | |
| 3383 | GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR) |
| 3384 | GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr); |
| 3385 | |
| 3386 | // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list: |
| 3387 | // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg); |
| 3388 | // for x86, don't pass the secretArg. |
| 3389 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3390 | |
| 3391 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
| 3392 | GenTreeArgList* argList = comp->gtNewArgList(frameAddr); |
| 3393 | #else |
| 3394 | GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM)); |
| 3395 | #endif |
| 3396 | |
| 3397 | GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, argList); |
| 3398 | |
| 3399 | // some sanity checks on the frame list root vardsc |
| 3400 | LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot]; |
| 3401 | noway_assert(!varDsc->lvIsParam); |
| 3402 | noway_assert(varDsc->lvType == TYP_I_IMPL); |
| 3403 | |
| 3404 | GenTree* store = |
| 3405 | new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot, |
| 3406 | (IL_OFFSET)-1); // cast to resolve ambiguity. |
| 3407 | store->gtOp.gtOp1 = call; |
| 3408 | store->gtFlags |= GTF_VAR_DEF; |
| 3409 | |
| 3410 | GenTree* const insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode(); |
| 3411 | |
| 3412 | comp->fgMorphTree(store); |
| 3413 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store)); |
| 3414 | DISPTREERANGE(firstBlockRange, store); |
| 3415 | |
| 3416 | #if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_) |
| 3417 | // For x86, this step is done at the call site (due to stack pointer not being static in the function). |
| 3418 | // For arm32, CallSiteSP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME. |
| 3419 | |
| 3420 | // -------------------------------------------------------- |
| 3421 | // InlinedCallFrame.m_pCallSiteSP = @RSP; |
| 3422 | |
| 3423 | GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD) |
| 3424 | GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP); |
| 3425 | storeSP->gtOp1 = PhysReg(REG_SPBASE); |
| 3426 | storeSP->gtFlags |= GTF_VAR_DEF; |
| 3427 | |
| 3428 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP)); |
| 3429 | DISPTREERANGE(firstBlockRange, storeSP); |
| 3430 | |
| 3431 | #endif // !defined(_TARGET_X86_) && !defined(_TARGET_ARM_) |
| 3432 | |
| 3433 | #if !defined(_TARGET_ARM_) |
| 3434 | // For arm32, CalleeSavedFP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME. |
| 3435 | |
| 3436 | // -------------------------------------------------------- |
| 3437 | // InlinedCallFrame.m_pCalleeSavedEBP = @RBP; |
| 3438 | |
| 3439 | GenTreeLclFld* storeFP = |
| 3440 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
| 3441 | callFrameInfo.offsetOfCalleeSavedFP); |
| 3442 | storeFP->gtOp1 = PhysReg(REG_FPBASE); |
| 3443 | storeFP->gtFlags |= GTF_VAR_DEF; |
| 3444 | |
| 3445 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP)); |
| 3446 | DISPTREERANGE(firstBlockRange, storeFP); |
| 3447 | #endif // !defined(_TARGET_ARM_) |
| 3448 | |
| 3449 | // -------------------------------------------------------- |
| 3450 | // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto |
| 3451 | // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame. |
| 3452 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3453 | |
| 3454 | #ifdef _TARGET_64BIT_ |
| 3455 | if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
| 3456 | { |
| 3457 | // Push a frame - if we are NOT in an IL stub, this is done right before the call |
| 3458 | // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack |
| 3459 | GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); |
| 3460 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); |
| 3461 | ContainCheckStoreIndir(frameUpd->AsIndir()); |
| 3462 | DISPTREERANGE(firstBlockRange, frameUpd); |
| 3463 | } |
| 3464 | #endif // _TARGET_64BIT_ |
| 3465 | } |
| 3466 | |
| 3467 | //------------------------------------------------------------------------ |
| 3468 | // InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method |
| 3469 | // that has PInvoke inlines. This needs to be inserted any place you can exit the |
| 3470 | // function: returns, tailcalls and jmps. |
| 3471 | // |
| 3472 | // Arguments: |
| 3473 | // returnBB - basic block from which a method can return |
| 3474 | // lastExpr - GenTree of the last top level stmnt of returnBB (debug only arg) |
| 3475 | // |
| 3476 | // Return Value: |
| 3477 | // Code tree to perform the action. |
| 3478 | // |
| 3479 | void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree* lastExpr)) |
| 3480 | { |
| 3481 | assert(returnBB != nullptr); |
| 3482 | assert(comp->info.compCallUnmanaged); |
| 3483 | |
| 3484 | if (comp->opts.ShouldUsePInvokeHelpers()) |
| 3485 | { |
| 3486 | return; |
| 3487 | } |
| 3488 | |
| 3489 | JITDUMP("======= Inserting PInvoke method epilog\n" ); |
| 3490 | |
| 3491 | // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls. |
| 3492 | assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) || |
| 3493 | returnBB->endsWithTailCallOrJmp(comp)); |
| 3494 | |
| 3495 | LIR::Range& returnBlockRange = LIR::AsRange(returnBB); |
| 3496 | |
| 3497 | GenTree* insertionPoint = returnBlockRange.LastNode(); |
| 3498 | assert(insertionPoint == lastExpr); |
| 3499 | |
| 3500 | // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution |
| 3501 | // order so that it is guaranteed that there will be no further PInvokes after that point in the method. |
| 3502 | // |
| 3503 | // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be |
| 3504 | // Op1, PME, GT_RETURN |
| 3505 | // |
| 3506 | // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be |
| 3507 | // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL |
| 3508 | // After inserting PME execution order would be: |
| 3509 | // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL |
| 3510 | // |
| 3511 | // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP |
| 3512 | // That is after PME, args for GT_JMP call will be setup. |
| 3513 | |
| 3514 | // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a |
| 3515 | // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant, |
| 3516 | // it is harmeless. |
| 3517 | // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has |
| 3518 | // PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot |
| 3519 | // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to |
| 3520 | // properly extend the life of compLvFrameListRoot var. |
| 3521 | // |
| 3522 | // Thread.offsetOfGcState = 0/1 |
| 3523 | // That is [tcb + offsetOfGcState] = 1 |
| 3524 | GenTree* storeGCState = SetGCState(1); |
| 3525 | returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState)); |
| 3526 | ContainCheckStoreIndir(storeGCState->AsIndir()); |
| 3527 | |
| 3528 | // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do |
| 3529 | // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call. |
| 3530 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3531 | |
| 3532 | #ifdef _TARGET_64BIT_ |
| 3533 | if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
| 3534 | #endif // _TARGET_64BIT_ |
| 3535 | { |
| 3536 | GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame); |
| 3537 | returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); |
| 3538 | ContainCheckStoreIndir(frameUpd->AsIndir()); |
| 3539 | } |
| 3540 | } |
| 3541 | |
| 3542 | //------------------------------------------------------------------------ |
| 3543 | // InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code. |
| 3544 | // It does all the necessary call-site setup of the InlinedCallFrame. |
| 3545 | // |
| 3546 | // Arguments: |
| 3547 | // call - the call for which we are inserting the PInvoke prolog. |
| 3548 | // |
| 3549 | // Return Value: |
| 3550 | // None. |
| 3551 | // |
| 3552 | void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) |
| 3553 | { |
| 3554 | JITDUMP("======= Inserting PInvoke call prolog\n" ); |
| 3555 | |
| 3556 | GenTree* insertBefore = call; |
| 3557 | if (call->gtCallType == CT_INDIRECT) |
| 3558 | { |
| 3559 | bool isClosed; |
| 3560 | insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); |
| 3561 | assert(isClosed); |
| 3562 | } |
| 3563 | |
| 3564 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo; |
| 3565 | |
| 3566 | gtCallTypes callType = (gtCallTypes)call->gtCallType; |
| 3567 | |
| 3568 | noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); |
| 3569 | |
| 3570 | if (comp->opts.ShouldUsePInvokeHelpers()) |
| 3571 | { |
| 3572 | // First argument is the address of the frame variable. |
| 3573 | GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR) |
| 3574 | GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET); |
| 3575 | |
| 3576 | // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN |
| 3577 | GenTree* helperCall = |
| 3578 | comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, comp->gtNewArgList(frameAddr)); |
| 3579 | |
| 3580 | comp->fgMorphTree(helperCall); |
| 3581 | BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall)); |
| 3582 | LowerNode(helperCall); // helper call is inserted before current node and should be lowered here. |
| 3583 | return; |
| 3584 | } |
| 3585 | |
| 3586 | // Emit the following sequence: |
| 3587 | // |
| 3588 | // InlinedCallFrame.callTarget = methodHandle // stored in m_Datum |
| 3589 | // InlinedCallFrame.m_pCallSiteSP = SP // x86 only |
| 3590 | // InlinedCallFrame.m_pCallerReturnAddress = return address |
| 3591 | // Thread.gcState = 0 |
| 3592 | // (non-stub) - update top Frame on TCB // 64-bit targets only |
| 3593 | |
| 3594 | // ---------------------------------------------------------------------------------- |
| 3595 | // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it). |
| 3596 | // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings. |
| 3597 | |
| 3598 | GenTree* src = nullptr; |
| 3599 | |
| 3600 | if (callType == CT_INDIRECT) |
| 3601 | { |
| 3602 | #if !defined(_TARGET_64BIT_) |
| 3603 | // On 32-bit targets, indirect calls need the size of the stack args in InlinedCallFrame.m_Datum. |
| 3604 | const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE; |
| 3605 | |
| 3606 | src = comp->gtNewIconNode(numStkArgBytes, TYP_INT); |
| 3607 | #else |
| 3608 | // On 64-bit targets, indirect calls may need the stub parameter value in InlinedCallFrame.m_Datum. |
| 3609 | // If the stub parameter value is not needed, m_Datum will be initialized by the VM. |
| 3610 | if (comp->info.compPublishStubParam) |
| 3611 | { |
| 3612 | src = comp->gtNewLclvNode(comp->lvaStubArgumentVar, TYP_I_IMPL); |
| 3613 | } |
| 3614 | #endif // !defined(_TARGET_64BIT_) |
| 3615 | } |
| 3616 | else |
| 3617 | { |
| 3618 | assert(callType == CT_USER_FUNC); |
| 3619 | |
| 3620 | void* pEmbedMethodHandle = nullptr; |
| 3621 | CORINFO_METHOD_HANDLE embedMethodHandle = |
| 3622 | comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle); |
| 3623 | |
| 3624 | noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle)); |
| 3625 | |
| 3626 | if (embedMethodHandle != nullptr) |
| 3627 | { |
| 3628 | // InlinedCallFrame.callSiteTarget = methodHandle |
| 3629 | src = AddrGen(embedMethodHandle); |
| 3630 | } |
| 3631 | else |
| 3632 | { |
| 3633 | // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle |
| 3634 | src = Ind(AddrGen(pEmbedMethodHandle)); |
| 3635 | } |
| 3636 | } |
| 3637 | |
| 3638 | if (src != nullptr) |
| 3639 | { |
| 3640 | // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget. |
| 3641 | GenTreeLclFld* store = |
| 3642 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
| 3643 | callFrameInfo.offsetOfCallTarget); |
| 3644 | store->gtOp1 = src; |
| 3645 | store->gtFlags |= GTF_VAR_DEF; |
| 3646 | |
| 3647 | InsertTreeBeforeAndContainCheck(insertBefore, store); |
| 3648 | } |
| 3649 | |
| 3650 | #ifdef _TARGET_X86_ |
| 3651 | |
| 3652 | // ---------------------------------------------------------------------------------- |
| 3653 | // InlinedCallFrame.m_pCallSiteSP = SP |
| 3654 | |
| 3655 | GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD) |
| 3656 | GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP); |
| 3657 | |
| 3658 | storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE); |
| 3659 | storeCallSiteSP->gtFlags |= GTF_VAR_DEF; |
| 3660 | |
| 3661 | InsertTreeBeforeAndContainCheck(insertBefore, storeCallSiteSP); |
| 3662 | |
| 3663 | #endif |
| 3664 | |
| 3665 | // ---------------------------------------------------------------------------------- |
| 3666 | // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call) |
| 3667 | |
| 3668 | GenTreeLclFld* storeLab = |
| 3669 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
| 3670 | callFrameInfo.offsetOfReturnAddress); |
| 3671 | |
| 3672 | // We don't have a real label, and inserting one is hard (even if we made a special node), |
| 3673 | // so for now we will just 'know' what this means in codegen. |
| 3674 | GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr); |
| 3675 | labelRef->gtType = TYP_I_IMPL; |
| 3676 | storeLab->gtOp1 = labelRef; |
| 3677 | storeLab->gtFlags |= GTF_VAR_DEF; |
| 3678 | |
| 3679 | InsertTreeBeforeAndContainCheck(insertBefore, storeLab); |
| 3680 | |
| 3681 | // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method |
| 3682 | // contains PInvokes; on 64-bit targets this is necessary in non-stubs. |
| 3683 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3684 | |
| 3685 | #ifdef _TARGET_64BIT_ |
| 3686 | if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
| 3687 | { |
| 3688 | // Set the TCB's frame to be the one we just created. |
| 3689 | // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME) |
| 3690 | // has prepended it to the linked list to maintain the stack of Frames. |
| 3691 | // |
| 3692 | // Stubs do this once per stub, not once per call. |
| 3693 | GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); |
| 3694 | BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd)); |
| 3695 | ContainCheckStoreIndir(frameUpd->AsIndir()); |
| 3696 | } |
| 3697 | #endif // _TARGET_64BIT_ |
| 3698 | |
| 3699 | // IMPORTANT **** This instruction must come last!!! **** |
| 3700 | // It changes the thread's state to Preemptive mode |
| 3701 | // ---------------------------------------------------------------------------------- |
| 3702 | // [tcb + offsetOfGcState] = 0 |
| 3703 | |
| 3704 | GenTree* storeGCState = SetGCState(0); |
| 3705 | BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState)); |
| 3706 | ContainCheckStoreIndir(storeGCState->AsIndir()); |
| 3707 | } |
| 3708 | |
| 3709 | //------------------------------------------------------------------------ |
| 3710 | // InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call. |
| 3711 | // |
| 3712 | // Arguments: |
| 3713 | // call - the call for which we are inserting the PInvoke epilog. |
| 3714 | // |
| 3715 | // Return Value: |
| 3716 | // None. |
| 3717 | // |
| 3718 | void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) |
| 3719 | { |
| 3720 | JITDUMP("======= Inserting PInvoke call epilog\n" ); |
| 3721 | |
| 3722 | if (comp->opts.ShouldUsePInvokeHelpers()) |
| 3723 | { |
| 3724 | noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); |
| 3725 | |
| 3726 | // First argument is the address of the frame variable. |
| 3727 | GenTree* frameAddr = |
| 3728 | new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET); |
| 3729 | frameAddr->SetOperRaw(GT_LCL_VAR_ADDR); |
| 3730 | |
| 3731 | // Insert call to CORINFO_HELP_JIT_PINVOKE_END |
| 3732 | GenTreeCall* helperCall = |
| 3733 | comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, comp->gtNewArgList(frameAddr)); |
| 3734 | |
| 3735 | comp->fgMorphTree(helperCall); |
| 3736 | BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall)); |
| 3737 | ContainCheckCallOperands(helperCall); |
| 3738 | return; |
| 3739 | } |
| 3740 | |
| 3741 | // gcstate = 1 |
| 3742 | GenTree* insertionPoint = call->gtNext; |
| 3743 | |
| 3744 | GenTree* tree = SetGCState(1); |
| 3745 | BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); |
| 3746 | ContainCheckStoreIndir(tree->AsIndir()); |
| 3747 | |
| 3748 | tree = CreateReturnTrapSeq(); |
| 3749 | BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); |
| 3750 | ContainCheckReturnTrap(tree->AsOp()); |
| 3751 | |
| 3752 | // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi |
| 3753 | // happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive. |
| 3754 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3755 | |
| 3756 | #ifdef _TARGET_64BIT_ |
| 3757 | if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
| 3758 | { |
| 3759 | tree = CreateFrameLinkUpdate(PopFrame); |
| 3760 | BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); |
| 3761 | ContainCheckStoreIndir(tree->AsIndir()); |
| 3762 | } |
| 3763 | #else |
| 3764 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo; |
| 3765 | |
| 3766 | // ---------------------------------------------------------------------------------- |
| 3767 | // InlinedCallFrame.m_pCallerReturnAddress = nullptr |
| 3768 | |
| 3769 | GenTreeLclFld* const storeCallSiteTracker = |
| 3770 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
| 3771 | callFrameInfo.offsetOfReturnAddress); |
| 3772 | |
| 3773 | GenTreeIntCon* const constantZero = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0); |
| 3774 | |
| 3775 | storeCallSiteTracker->gtOp1 = constantZero; |
| 3776 | storeCallSiteTracker->gtFlags |= GTF_VAR_DEF; |
| 3777 | |
| 3778 | BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker); |
| 3779 | ContainCheckStoreLoc(storeCallSiteTracker); |
| 3780 | #endif // _TARGET_64BIT_ |
| 3781 | } |
| 3782 | |
| 3783 | //------------------------------------------------------------------------ |
| 3784 | // LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call |
| 3785 | // |
| 3786 | // Arguments: |
| 3787 | // call - The call to lower. |
| 3788 | // |
| 3789 | // Return Value: |
| 3790 | // The lowered call tree. |
| 3791 | // |
| 3792 | GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call) |
| 3793 | { |
| 3794 | // PInvoke lowering varies depending on the flags passed in by the EE. By default, |
| 3795 | // GC transitions are generated inline; if CORJIT_FLAG_USE_PINVOKE_HELPERS is specified, |
| 3796 | // GC transitions are instead performed using helper calls. Examples of each case are given |
| 3797 | // below. Note that the data structure that is used to store information about a call frame |
| 3798 | // containing any P/Invoke calls is initialized in the method prolog (see |
| 3799 | // InsertPInvokeMethod{Prolog,Epilog} for details). |
| 3800 | // |
| 3801 | // Inline transitions: |
| 3802 | // InlinedCallFrame inlinedCallFrame; |
| 3803 | // |
| 3804 | // ... |
| 3805 | // |
| 3806 | // // Set up frame information |
| 3807 | // inlinedCallFrame.callTarget = methodHandle; // stored in m_Datum |
| 3808 | // inlinedCallFrame.m_pCallSiteSP = SP; // x86 only |
| 3809 | // inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the |
| 3810 | // call) |
| 3811 | // Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only) |
| 3812 | // |
| 3813 | // // Switch the thread's GC mode to preemptive mode |
| 3814 | // thread->m_fPreemptiveGCDisabled = 0; |
| 3815 | // |
| 3816 | // // Call the unmanaged method |
| 3817 | // target(); |
| 3818 | // |
| 3819 | // // Switch the thread's GC mode back to cooperative mode |
| 3820 | // thread->m_fPreemptiveGCDisabled = 1; |
| 3821 | // |
| 3822 | // // Rendezvous with a running collection if necessary |
| 3823 | // if (g_TrapReturningThreads) |
| 3824 | // RareDisablePreemptiveGC(); |
| 3825 | // |
| 3826 | // Transistions using helpers: |
| 3827 | // |
| 3828 | // OpaqueFrame opaqueFrame; |
| 3829 | // |
| 3830 | // ... |
| 3831 | // |
| 3832 | // // Call the JIT_PINVOKE_BEGIN helper |
| 3833 | // JIT_PINVOKE_BEGIN(&opaqueFrame); |
| 3834 | // |
| 3835 | // // Call the unmanaged method |
| 3836 | // target(); |
| 3837 | // |
| 3838 | // // Call the JIT_PINVOKE_END helper |
| 3839 | // JIT_PINVOKE_END(&opaqueFrame); |
| 3840 | // |
| 3841 | // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target |
| 3842 | // platform. They may be changed in the future such that they preserve all register values. |
| 3843 | |
| 3844 | GenTree* result = nullptr; |
| 3845 | void* addr = nullptr; |
| 3846 | |
| 3847 | // assert we have seen one of these |
| 3848 | noway_assert(comp->info.compCallUnmanaged != 0); |
| 3849 | |
| 3850 | // All code generated by this function must not contain the randomly-inserted NOPs |
| 3851 | // that we insert to inhibit JIT spraying in partial trust scenarios. |
| 3852 | // The PINVOKE_PROLOG op signals this to the code generator/emitter. |
| 3853 | |
| 3854 | GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID); |
| 3855 | BlockRange().InsertBefore(call, prolog); |
| 3856 | |
| 3857 | InsertPInvokeCallProlog(call); |
| 3858 | |
| 3859 | if (call->gtCallType != CT_INDIRECT) |
| 3860 | { |
| 3861 | noway_assert(call->gtCallType == CT_USER_FUNC); |
| 3862 | CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd; |
| 3863 | |
| 3864 | CORINFO_CONST_LOOKUP lookup; |
| 3865 | comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup); |
| 3866 | |
| 3867 | void* addr = lookup.addr; |
| 3868 | switch (lookup.accessType) |
| 3869 | { |
| 3870 | case IAT_VALUE: |
| 3871 | if (!IsCallTargetInRange(addr)) |
| 3872 | { |
| 3873 | result = AddrGen(addr); |
| 3874 | } |
| 3875 | else |
| 3876 | { |
| 3877 | // a direct call within range of hardware relative call instruction |
| 3878 | // stash the address for codegen |
| 3879 | call->gtDirectCallAddress = addr; |
| 3880 | #ifdef FEATURE_READYTORUN_COMPILER |
| 3881 | call->gtEntryPoint.addr = nullptr; |
| 3882 | call->gtEntryPoint.accessType = IAT_VALUE; |
| 3883 | #endif |
| 3884 | } |
| 3885 | break; |
| 3886 | |
| 3887 | case IAT_PVALUE: |
| 3888 | result = Ind(AddrGen(addr)); |
| 3889 | break; |
| 3890 | |
| 3891 | case IAT_PPVALUE: |
| 3892 | result = Ind(Ind(AddrGen(addr))); |
| 3893 | break; |
| 3894 | |
| 3895 | case IAT_RELPVALUE: |
| 3896 | unreached(); |
| 3897 | } |
| 3898 | } |
| 3899 | |
| 3900 | InsertPInvokeCallEpilog(call); |
| 3901 | |
| 3902 | return result; |
| 3903 | } |
| 3904 | |
| 3905 | // Expand the code necessary to calculate the control target. |
| 3906 | // Returns: the expression needed to calculate the control target |
| 3907 | // May insert embedded statements |
| 3908 | GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call) |
| 3909 | { |
| 3910 | noway_assert(call->gtCallType == CT_USER_FUNC); |
| 3911 | |
| 3912 | // If this is a tail call via helper, thisPtr will be the third argument. |
| 3913 | int thisPtrArgNum; |
| 3914 | regNumber thisPtrArgReg; |
| 3915 | |
| 3916 | #ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args. |
| 3917 | if (call->IsTailCallViaHelper()) |
| 3918 | { |
| 3919 | thisPtrArgNum = 2; |
| 3920 | thisPtrArgReg = REG_ARG_2; |
| 3921 | } |
| 3922 | else |
| 3923 | #endif // !_TARGET_X86_ |
| 3924 | { |
| 3925 | thisPtrArgNum = 0; |
| 3926 | thisPtrArgReg = comp->codeGen->genGetThisArgReg(call); |
| 3927 | } |
| 3928 | |
| 3929 | // get a reference to the thisPtr being passed |
| 3930 | fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum); |
| 3931 | assert(argEntry->regNum == thisPtrArgReg); |
| 3932 | assert(argEntry->node->gtOper == GT_PUTARG_REG); |
| 3933 | GenTree* thisPtr = argEntry->node->gtOp.gtOp1; |
| 3934 | |
| 3935 | // If what we are passing as the thisptr is not already a local, make a new local to place it in |
| 3936 | // because we will be creating expressions based on it. |
| 3937 | unsigned lclNum; |
| 3938 | if (thisPtr->IsLocal()) |
| 3939 | { |
| 3940 | lclNum = thisPtr->gtLclVarCommon.gtLclNum; |
| 3941 | } |
| 3942 | else |
| 3943 | { |
| 3944 | // Split off the thisPtr and store to a temporary variable. |
| 3945 | if (vtableCallTemp == BAD_VAR_NUM) |
| 3946 | { |
| 3947 | vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call" )); |
| 3948 | } |
| 3949 | |
| 3950 | LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node); |
| 3951 | ReplaceWithLclVar(thisPtrUse, vtableCallTemp); |
| 3952 | |
| 3953 | lclNum = vtableCallTemp; |
| 3954 | } |
| 3955 | |
| 3956 | // Get hold of the vtable offset (note: this might be expensive) |
| 3957 | unsigned vtabOffsOfIndirection; |
| 3958 | unsigned vtabOffsAfterIndirection; |
| 3959 | bool isRelative; |
| 3960 | comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, |
| 3961 | &vtabOffsAfterIndirection, &isRelative); |
| 3962 | |
| 3963 | // If the thisPtr is a local field, then construct a local field type node |
| 3964 | GenTree* local; |
| 3965 | if (thisPtr->isLclField()) |
| 3966 | { |
| 3967 | local = new (comp, GT_LCL_FLD) |
| 3968 | GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs); |
| 3969 | } |
| 3970 | else |
| 3971 | { |
| 3972 | local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET); |
| 3973 | } |
| 3974 | |
| 3975 | // pointer to virtual table = [REG_CALL_THIS + offs] |
| 3976 | GenTree* result = Ind(Offset(local, VPTR_OFFS)); |
| 3977 | |
| 3978 | // Get the appropriate vtable chunk |
| 3979 | if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK) |
| 3980 | { |
| 3981 | if (isRelative) |
| 3982 | { |
| 3983 | // MethodTable offset is a relative pointer. |
| 3984 | // |
| 3985 | // Additional temporary variable is used to store virtual table pointer. |
| 3986 | // Address of method is obtained by the next computations: |
| 3987 | // |
| 3988 | // Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of |
| 3989 | // vtable-1st-level-indirection): |
| 3990 | // tmp = vtab |
| 3991 | // |
| 3992 | // Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection): |
| 3993 | // result = [tmp + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp + vtabOffsOfIndirection]] |
| 3994 | // |
| 3995 | // |
| 3996 | // If relative pointers are also in second level indirection, additional temporary is used: |
| 3997 | // tmp1 = vtab |
| 3998 | // tmp2 = tmp1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp1 + vtabOffsOfIndirection] |
| 3999 | // result = tmp2 + [tmp2] |
| 4000 | // |
| 4001 | unsigned lclNumTmp = comp->lvaGrabTemp(true DEBUGARG("lclNumTmp" )); |
| 4002 | unsigned lclNumTmp2 = comp->lvaGrabTemp(true DEBUGARG("lclNumTmp2" )); |
| 4003 | |
| 4004 | GenTree* lclvNodeStore = comp->gtNewTempAssign(lclNumTmp, result); |
| 4005 | |
| 4006 | GenTree* tmpTree = comp->gtNewLclvNode(lclNumTmp, result->TypeGet()); |
| 4007 | tmpTree = Offset(tmpTree, vtabOffsOfIndirection); |
| 4008 | |
| 4009 | tmpTree = comp->gtNewOperNode(GT_IND, TYP_I_IMPL, tmpTree, false); |
| 4010 | GenTree* offs = comp->gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_INT); |
| 4011 | result = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, comp->gtNewLclvNode(lclNumTmp, result->TypeGet()), offs); |
| 4012 | |
| 4013 | GenTree* base = OffsetByIndexWithScale(result, tmpTree, 1); |
| 4014 | GenTree* lclvNodeStore2 = comp->gtNewTempAssign(lclNumTmp2, base); |
| 4015 | |
| 4016 | LIR::Range range = LIR::SeqTree(comp, lclvNodeStore); |
| 4017 | JITDUMP("result of obtaining pointer to virtual table:\n" ); |
| 4018 | DISPRANGE(range); |
| 4019 | BlockRange().InsertBefore(call, std::move(range)); |
| 4020 | |
| 4021 | LIR::Range range2 = LIR::SeqTree(comp, lclvNodeStore2); |
| 4022 | JITDUMP("result of obtaining pointer to virtual table 2nd level indirection:\n" ); |
| 4023 | DISPRANGE(range2); |
| 4024 | BlockRange().InsertAfter(lclvNodeStore, std::move(range2)); |
| 4025 | |
| 4026 | result = Ind(comp->gtNewLclvNode(lclNumTmp2, result->TypeGet())); |
| 4027 | result = |
| 4028 | comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, result, comp->gtNewLclvNode(lclNumTmp2, result->TypeGet())); |
| 4029 | } |
| 4030 | else |
| 4031 | { |
| 4032 | // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection] |
| 4033 | result = Ind(Offset(result, vtabOffsOfIndirection)); |
| 4034 | } |
| 4035 | } |
| 4036 | else |
| 4037 | { |
| 4038 | assert(!isRelative); |
| 4039 | } |
| 4040 | |
| 4041 | // Load the function address |
| 4042 | // result = [reg+vtabOffs] |
| 4043 | if (!isRelative) |
| 4044 | { |
| 4045 | result = Ind(Offset(result, vtabOffsAfterIndirection)); |
| 4046 | } |
| 4047 | |
| 4048 | return result; |
| 4049 | } |
| 4050 | |
| 4051 | // Lower stub dispatched virtual calls. |
| 4052 | GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) |
| 4053 | { |
| 4054 | assert(call->IsVirtualStub()); |
| 4055 | |
| 4056 | // An x86 JIT which uses full stub dispatch must generate only |
| 4057 | // the following stub dispatch calls: |
| 4058 | // |
| 4059 | // (1) isCallRelativeIndirect: |
| 4060 | // call dword ptr [rel32] ; FF 15 ---rel32---- |
| 4061 | // (2) isCallRelative: |
| 4062 | // call abc ; E8 ---rel32---- |
| 4063 | // (3) isCallRegisterIndirect: |
| 4064 | // 3-byte nop ; |
| 4065 | // call dword ptr [eax] ; FF 10 |
| 4066 | // |
| 4067 | // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN |
| 4068 | // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect. |
| 4069 | |
| 4070 | GenTree* result = nullptr; |
| 4071 | |
| 4072 | #ifdef _TARGET_64BIT_ |
| 4073 | // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef |
| 4074 | // exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates |
| 4075 | // an explicit null check. |
| 4076 | // |
| 4077 | // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit |
| 4078 | // null check. |
| 4079 | |
| 4080 | // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this. |
| 4081 | // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and |
| 4082 | // it would be difficult to change this in a way so that it affects only the right stubs. |
| 4083 | |
| 4084 | if (!call->IsTailCallViaHelper()) |
| 4085 | { |
| 4086 | call->gtFlags |= GTF_CALL_NULLCHECK; |
| 4087 | } |
| 4088 | #endif |
| 4089 | |
| 4090 | // This is code to set up an indirect call to a stub address computed |
| 4091 | // via dictionary lookup. |
| 4092 | if (call->gtCallType == CT_INDIRECT) |
| 4093 | { |
| 4094 | // The importer decided we needed a stub call via a computed |
| 4095 | // stub dispatch address, i.e. an address which came from a dictionary lookup. |
| 4096 | // - The dictionary lookup produces an indirected address, suitable for call |
| 4097 | // via "call [VirtualStubParam.reg]" |
| 4098 | // |
| 4099 | // This combination will only be generated for shared generic code and when |
| 4100 | // stub dispatch is active. |
| 4101 | |
| 4102 | // fgMorphArgs will have created trees to pass the address in VirtualStubParam.reg. |
| 4103 | // All we have to do here is add an indirection to generate the actual call target. |
| 4104 | |
| 4105 | GenTree* ind = Ind(call->gtCallAddr); |
| 4106 | BlockRange().InsertAfter(call->gtCallAddr, ind); |
| 4107 | call->gtCallAddr = ind; |
| 4108 | |
| 4109 | ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; |
| 4110 | |
| 4111 | ContainCheckIndir(ind->AsIndir()); |
| 4112 | } |
| 4113 | else |
| 4114 | { |
| 4115 | // Direct stub call. |
| 4116 | // Get stub addr. This will return NULL if virtual call stubs are not active |
| 4117 | void* stubAddr = call->gtStubCallStubAddr; |
| 4118 | noway_assert(stubAddr != nullptr); |
| 4119 | |
| 4120 | // If not CT_INDIRECT, then it should always be relative indir call. |
| 4121 | // This is ensured by VM. |
| 4122 | noway_assert(call->IsVirtualStubRelativeIndir()); |
| 4123 | |
| 4124 | // Direct stub calls, though the stubAddr itself may still need to be |
| 4125 | // accessed via an indirection. |
| 4126 | GenTree* addr = AddrGen(stubAddr); |
| 4127 | |
| 4128 | #ifdef _TARGET_X86_ |
| 4129 | // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as |
| 4130 | // the target address, and we set a flag that it's a VSD call. The helper then |
| 4131 | // handles any necessary indirection. |
| 4132 | if (call->IsTailCallViaHelper()) |
| 4133 | { |
| 4134 | result = addr; |
| 4135 | } |
| 4136 | #endif // _TARGET_X86_ |
| 4137 | |
| 4138 | if (result == nullptr) |
| 4139 | { |
| 4140 | result = Ind(addr); |
| 4141 | } |
| 4142 | } |
| 4143 | |
| 4144 | // TODO-Cleanup: start emitting random NOPS |
| 4145 | return result; |
| 4146 | } |
| 4147 | |
| 4148 | //------------------------------------------------------------------------ |
| 4149 | // AddrModeCleanupHelper: Remove the nodes that are no longer used after an |
| 4150 | // addressing mode is constructed |
| 4151 | // |
| 4152 | // Arguments: |
| 4153 | // addrMode - A pointer to a new GenTreeAddrMode |
| 4154 | // node - The node currently being considered for removal |
| 4155 | // |
| 4156 | // Return Value: |
| 4157 | // None. |
| 4158 | // |
| 4159 | // Assumptions: |
| 4160 | // 'addrMode' and 'node' must be contained in the current block |
| 4161 | // |
| 4162 | void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node) |
| 4163 | { |
| 4164 | if (node == addrMode->Base() || node == addrMode->Index()) |
| 4165 | { |
| 4166 | return; |
| 4167 | } |
| 4168 | |
| 4169 | // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing |
| 4170 | node->VisitOperands([this, addrMode](GenTree* operand) -> GenTree::VisitResult { |
| 4171 | AddrModeCleanupHelper(addrMode, operand); |
| 4172 | return GenTree::VisitResult::Continue; |
| 4173 | }); |
| 4174 | |
| 4175 | BlockRange().Remove(node); |
| 4176 | } |
| 4177 | |
| 4178 | //------------------------------------------------------------------------ |
| 4179 | // Lowering::AreSourcesPossibleModifiedLocals: |
| 4180 | // Given two nodes which will be used in an addressing mode (base, |
| 4181 | // index), check to see if they are lclVar reads, and if so, walk |
| 4182 | // backwards from the use until both reads have been visited to |
| 4183 | // determine if they are potentially modified in that range. |
| 4184 | // |
| 4185 | // Arguments: |
| 4186 | // addr - the node that uses the base and index nodes |
| 4187 | // base - the base node |
| 4188 | // index - the index node |
| 4189 | // |
| 4190 | // Returns: true if either the base or index may be modified between the |
| 4191 | // node and addr. |
| 4192 | // |
| 4193 | bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index) |
| 4194 | { |
| 4195 | assert(addr != nullptr); |
| 4196 | |
| 4197 | unsigned markCount = 0; |
| 4198 | |
| 4199 | SideEffectSet baseSideEffects; |
| 4200 | if (base != nullptr) |
| 4201 | { |
| 4202 | if (base->OperIsLocalRead()) |
| 4203 | { |
| 4204 | baseSideEffects.AddNode(comp, base); |
| 4205 | } |
| 4206 | else |
| 4207 | { |
| 4208 | base = nullptr; |
| 4209 | } |
| 4210 | } |
| 4211 | |
| 4212 | SideEffectSet indexSideEffects; |
| 4213 | if (index != nullptr) |
| 4214 | { |
| 4215 | if (index->OperIsLocalRead()) |
| 4216 | { |
| 4217 | indexSideEffects.AddNode(comp, index); |
| 4218 | } |
| 4219 | else |
| 4220 | { |
| 4221 | index = nullptr; |
| 4222 | } |
| 4223 | } |
| 4224 | |
| 4225 | for (GenTree* cursor = addr;; cursor = cursor->gtPrev) |
| 4226 | { |
| 4227 | assert(cursor != nullptr); |
| 4228 | |
| 4229 | if (cursor == base) |
| 4230 | { |
| 4231 | base = nullptr; |
| 4232 | } |
| 4233 | |
| 4234 | if (cursor == index) |
| 4235 | { |
| 4236 | index = nullptr; |
| 4237 | } |
| 4238 | |
| 4239 | if ((base == nullptr) && (index == nullptr)) |
| 4240 | { |
| 4241 | return false; |
| 4242 | } |
| 4243 | |
| 4244 | m_scratchSideEffects.Clear(); |
| 4245 | m_scratchSideEffects.AddNode(comp, cursor); |
| 4246 | if ((base != nullptr) && m_scratchSideEffects.InterferesWith(baseSideEffects, false)) |
| 4247 | { |
| 4248 | return true; |
| 4249 | } |
| 4250 | |
| 4251 | if ((index != nullptr) && m_scratchSideEffects.InterferesWith(indexSideEffects, false)) |
| 4252 | { |
| 4253 | return true; |
| 4254 | } |
| 4255 | } |
| 4256 | } |
| 4257 | |
| 4258 | //------------------------------------------------------------------------ |
| 4259 | // TryCreateAddrMode: recognize trees which can be implemented using an |
| 4260 | // addressing mode and transform them to a GT_LEA |
| 4261 | // |
| 4262 | // Arguments: |
| 4263 | // use: the use of the address we want to transform |
| 4264 | // isIndir: true if this addressing mode is the child of an indir |
| 4265 | // |
| 4266 | // Returns: |
| 4267 | // The created LEA node or the original address node if an LEA could |
| 4268 | // not be formed. |
| 4269 | // |
| 4270 | GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir) |
| 4271 | { |
| 4272 | GenTree* addr = use.Def(); |
| 4273 | GenTree* base = nullptr; |
| 4274 | GenTree* index = nullptr; |
| 4275 | unsigned scale = 0; |
| 4276 | ssize_t offset = 0; |
| 4277 | bool rev = false; |
| 4278 | |
| 4279 | // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously |
| 4280 | // block ops were not considered for addressing modes, but an add under it may have been. |
| 4281 | // This should be replaced with logic that more carefully determines when an addressing mode |
| 4282 | // would be beneficial for a block op. |
| 4283 | if (isIndir) |
| 4284 | { |
| 4285 | GenTree* indir = use.User(); |
| 4286 | if (indir->TypeGet() == TYP_STRUCT) |
| 4287 | { |
| 4288 | isIndir = false; |
| 4289 | } |
| 4290 | else if (varTypeIsStruct(indir)) |
| 4291 | { |
| 4292 | // We can have an indirection on the rhs of a block copy (it is the source |
| 4293 | // object). This is not a "regular" indirection. |
| 4294 | // (Note that the user check could be costly.) |
| 4295 | LIR::Use indirUse; |
| 4296 | if (BlockRange().TryGetUse(indir, &indirUse) && indirUse.User()->OperIsIndir()) |
| 4297 | { |
| 4298 | isIndir = false; |
| 4299 | } |
| 4300 | else |
| 4301 | { |
| 4302 | isIndir = !indir->OperIsBlk(); |
| 4303 | } |
| 4304 | } |
| 4305 | } |
| 4306 | |
| 4307 | // Find out if an addressing mode can be constructed |
| 4308 | bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address |
| 4309 | true, // fold |
| 4310 | &rev, // reverse ops |
| 4311 | &base, // base addr |
| 4312 | &index, // index val |
| 4313 | #if SCALED_ADDR_MODES |
| 4314 | &scale, // scaling |
| 4315 | #endif // SCALED_ADDR_MODES |
| 4316 | &offset); // displacement |
| 4317 | |
| 4318 | if (scale == 0) |
| 4319 | { |
| 4320 | scale = 1; |
| 4321 | } |
| 4322 | |
| 4323 | if (!isIndir) |
| 4324 | { |
| 4325 | // this is just a reg-const add |
| 4326 | if (index == nullptr) |
| 4327 | { |
| 4328 | return addr; |
| 4329 | } |
| 4330 | |
| 4331 | // this is just a reg-reg add |
| 4332 | if (scale == 1 && offset == 0) |
| 4333 | { |
| 4334 | return addr; |
| 4335 | } |
| 4336 | } |
| 4337 | |
| 4338 | // make sure there are not any side effects between def of leaves and use |
| 4339 | if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index)) |
| 4340 | { |
| 4341 | JITDUMP("No addressing mode:\n " ); |
| 4342 | DISPNODE(addr); |
| 4343 | return addr; |
| 4344 | } |
| 4345 | |
| 4346 | GenTree* arrLength = nullptr; |
| 4347 | |
| 4348 | JITDUMP("Addressing mode:\n" ); |
| 4349 | JITDUMP(" Base\n " ); |
| 4350 | DISPNODE(base); |
| 4351 | if (index != nullptr) |
| 4352 | { |
| 4353 | JITDUMP(" + Index * %u + %d\n " , scale, offset); |
| 4354 | DISPNODE(index); |
| 4355 | } |
| 4356 | else |
| 4357 | { |
| 4358 | JITDUMP(" + %d\n" , offset); |
| 4359 | } |
| 4360 | |
| 4361 | var_types addrModeType = addr->TypeGet(); |
| 4362 | if (addrModeType == TYP_REF) |
| 4363 | { |
| 4364 | addrModeType = TYP_BYREF; |
| 4365 | } |
| 4366 | |
| 4367 | GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset); |
| 4368 | |
| 4369 | // Neither the base nor the index should now be contained. |
| 4370 | if (base != nullptr) |
| 4371 | { |
| 4372 | base->ClearContained(); |
| 4373 | } |
| 4374 | if (index != nullptr) |
| 4375 | { |
| 4376 | index->ClearContained(); |
| 4377 | } |
| 4378 | addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS); |
| 4379 | addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free. |
| 4380 | |
| 4381 | JITDUMP("New addressing mode node:\n" ); |
| 4382 | DISPNODE(addrMode); |
| 4383 | JITDUMP("\n" ); |
| 4384 | |
| 4385 | BlockRange().InsertAfter(addr, addrMode); |
| 4386 | |
| 4387 | // Now we need to remove all the nodes subsumed by the addrMode |
| 4388 | AddrModeCleanupHelper(addrMode, addr); |
| 4389 | |
| 4390 | // Replace the original address node with the addrMode. |
| 4391 | use.ReplaceWith(comp, addrMode); |
| 4392 | |
| 4393 | return addrMode; |
| 4394 | } |
| 4395 | |
| 4396 | //------------------------------------------------------------------------ |
| 4397 | // LowerAdd: turn this add into a GT_LEA if that would be profitable |
| 4398 | // |
| 4399 | // Arguments: |
| 4400 | // node - the node we care about |
| 4401 | // |
| 4402 | // Returns: |
| 4403 | // The next node to lower if we have transformed the ADD; nullptr otherwise. |
| 4404 | // |
| 4405 | GenTree* Lowering::LowerAdd(GenTree* node) |
| 4406 | { |
| 4407 | GenTree* next = node->gtNext; |
| 4408 | |
| 4409 | #ifndef _TARGET_ARMARCH_ |
| 4410 | if (varTypeIsIntegralOrI(node)) |
| 4411 | { |
| 4412 | LIR::Use use; |
| 4413 | if (BlockRange().TryGetUse(node, &use)) |
| 4414 | { |
| 4415 | // If this is a child of an indir, let the parent handle it. |
| 4416 | // If there is a chain of adds, only look at the topmost one. |
| 4417 | GenTree* parent = use.User(); |
| 4418 | if (!parent->OperIsIndir() && (parent->gtOper != GT_ADD)) |
| 4419 | { |
| 4420 | GenTree* addr = TryCreateAddrMode(std::move(use), false); |
| 4421 | if (addr != node) |
| 4422 | { |
| 4423 | return addr->gtNext; |
| 4424 | } |
| 4425 | } |
| 4426 | } |
| 4427 | } |
| 4428 | #endif // !_TARGET_ARMARCH_ |
| 4429 | |
| 4430 | return nullptr; |
| 4431 | } |
| 4432 | |
| 4433 | //------------------------------------------------------------------------ |
| 4434 | // LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node. |
| 4435 | // |
| 4436 | // Arguments: |
| 4437 | // divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered |
| 4438 | // |
| 4439 | // Return Value: |
| 4440 | // Returns a boolean indicating whether the node was transformed. |
| 4441 | // |
| 4442 | // Notes: |
| 4443 | // - Transform UDIV/UMOD by power of 2 into RSZ/AND |
| 4444 | // - Transform UDIV by constant >= 2^(N-1) into GE |
| 4445 | // - Transform UDIV/UMOD by constant >= 3 into "magic division" |
| 4446 | // |
| 4447 | |
| 4448 | bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) |
| 4449 | { |
| 4450 | assert(divMod->OperIs(GT_UDIV, GT_UMOD)); |
| 4451 | |
| 4452 | #if defined(USE_HELPERS_FOR_INT_DIV) |
| 4453 | if (!varTypeIsIntegral(divMod->TypeGet())) |
| 4454 | { |
| 4455 | assert(!"unreachable: integral GT_UDIV/GT_UMOD should get morphed into helper calls" ); |
| 4456 | } |
| 4457 | assert(varTypeIsFloating(divMod->TypeGet())); |
| 4458 | #endif // USE_HELPERS_FOR_INT_DIV |
| 4459 | #if defined(_TARGET_ARM64_) |
| 4460 | assert(divMod->OperGet() != GT_UMOD); |
| 4461 | #endif // _TARGET_ARM64_ |
| 4462 | |
| 4463 | GenTree* next = divMod->gtNext; |
| 4464 | GenTree* dividend = divMod->gtGetOp1(); |
| 4465 | GenTree* divisor = divMod->gtGetOp2(); |
| 4466 | |
| 4467 | #if !defined(_TARGET_64BIT_) |
| 4468 | if (dividend->OperIs(GT_LONG)) |
| 4469 | { |
| 4470 | return false; |
| 4471 | } |
| 4472 | #endif |
| 4473 | |
| 4474 | if (!divisor->IsCnsIntOrI()) |
| 4475 | { |
| 4476 | return false; |
| 4477 | } |
| 4478 | |
| 4479 | if (dividend->IsCnsIntOrI()) |
| 4480 | { |
| 4481 | // We shouldn't see a divmod with constant operands here but if we do then it's likely |
| 4482 | // because optimizations are disabled or it's a case that's supposed to throw an exception. |
| 4483 | // Don't optimize this. |
| 4484 | return false; |
| 4485 | } |
| 4486 | |
| 4487 | const var_types type = divMod->TypeGet(); |
| 4488 | assert((type == TYP_INT) || (type == TYP_I_IMPL)); |
| 4489 | |
| 4490 | size_t divisorValue = static_cast<size_t>(divisor->AsIntCon()->IconValue()); |
| 4491 | |
| 4492 | if (type == TYP_INT) |
| 4493 | { |
| 4494 | // Clear up the upper 32 bits of the value, they may be set to 1 because constants |
| 4495 | // are treated as signed and stored in ssize_t which is 64 bit in size on 64 bit targets. |
| 4496 | divisorValue &= UINT32_MAX; |
| 4497 | } |
| 4498 | |
| 4499 | if (divisorValue == 0) |
| 4500 | { |
| 4501 | return false; |
| 4502 | } |
| 4503 | |
| 4504 | const bool isDiv = divMod->OperIs(GT_UDIV); |
| 4505 | |
| 4506 | if (isPow2(divisorValue)) |
| 4507 | { |
| 4508 | genTreeOps newOper; |
| 4509 | |
| 4510 | if (isDiv) |
| 4511 | { |
| 4512 | newOper = GT_RSZ; |
| 4513 | divisorValue = genLog2(divisorValue); |
| 4514 | } |
| 4515 | else |
| 4516 | { |
| 4517 | newOper = GT_AND; |
| 4518 | divisorValue -= 1; |
| 4519 | } |
| 4520 | |
| 4521 | divMod->SetOper(newOper); |
| 4522 | divisor->gtIntCon.SetIconValue(divisorValue); |
| 4523 | ContainCheckNode(divMod); |
| 4524 | return true; |
| 4525 | } |
| 4526 | if (isDiv) |
| 4527 | { |
| 4528 | // If the divisor is greater or equal than 2^(N - 1) then the result is 1 |
| 4529 | // iff the dividend is greater or equal than the divisor. |
| 4530 | if (((type == TYP_INT) && (divisorValue > (UINT32_MAX / 2))) || |
| 4531 | ((type == TYP_LONG) && (divisorValue > (UINT64_MAX / 2)))) |
| 4532 | { |
| 4533 | divMod->SetOper(GT_GE); |
| 4534 | divMod->gtFlags |= GTF_UNSIGNED; |
| 4535 | ContainCheckNode(divMod); |
| 4536 | return true; |
| 4537 | } |
| 4538 | } |
| 4539 | |
| 4540 | // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 |
| 4541 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
| 4542 | if (!comp->opts.MinOpts() && (divisorValue >= 3)) |
| 4543 | { |
| 4544 | size_t magic; |
| 4545 | bool add; |
| 4546 | int shift; |
| 4547 | |
| 4548 | if (type == TYP_INT) |
| 4549 | { |
| 4550 | magic = MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &add, &shift); |
| 4551 | } |
| 4552 | else |
| 4553 | { |
| 4554 | #ifdef _TARGET_64BIT_ |
| 4555 | magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &add, &shift); |
| 4556 | #else |
| 4557 | unreached(); |
| 4558 | #endif |
| 4559 | } |
| 4560 | |
| 4561 | // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate: |
| 4562 | // add == false (when divisor == 3 for example): |
| 4563 | // div = (dividend MULHI magic) RSZ shift |
| 4564 | // add == true (when divisor == 7 for example): |
| 4565 | // mulhi = dividend MULHI magic |
| 4566 | // div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1) |
| 4567 | const bool requiresAdjustment = add; |
| 4568 | const bool requiresDividendMultiuse = requiresAdjustment || !isDiv; |
| 4569 | const unsigned curBBWeight = m_block->getBBWeight(comp); |
| 4570 | |
| 4571 | if (requiresDividendMultiuse) |
| 4572 | { |
| 4573 | LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod); |
| 4574 | dividend = ReplaceWithLclVar(dividendUse); |
| 4575 | } |
| 4576 | |
| 4577 | // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node. |
| 4578 | // The existing node will later be transformed into a GT_RSZ/GT_SUB that |
| 4579 | // computes the final result. This way don't need to find and change the use |
| 4580 | // of the existing node. |
| 4581 | GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, dividend, divisor); |
| 4582 | mulhi->gtFlags |= GTF_UNSIGNED; |
| 4583 | divisor->AsIntCon()->SetIconValue(magic); |
| 4584 | BlockRange().InsertBefore(divMod, mulhi); |
| 4585 | GenTree* firstNode = mulhi; |
| 4586 | |
| 4587 | if (requiresAdjustment) |
| 4588 | { |
| 4589 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
| 4590 | GenTree* sub = comp->gtNewOperNode(GT_SUB, type, dividend, mulhi); |
| 4591 | BlockRange().InsertBefore(divMod, dividend, sub); |
| 4592 | |
| 4593 | GenTree* one = comp->gtNewIconNode(1, TYP_INT); |
| 4594 | GenTree* rsz = comp->gtNewOperNode(GT_RSZ, type, sub, one); |
| 4595 | BlockRange().InsertBefore(divMod, one, rsz); |
| 4596 | |
| 4597 | LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub); |
| 4598 | mulhi = ReplaceWithLclVar(mulhiUse); |
| 4599 | |
| 4600 | mulhi = comp->gtNewLclvNode(mulhi->AsLclVar()->GetLclNum(), mulhi->TypeGet()); |
| 4601 | GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhi); |
| 4602 | BlockRange().InsertBefore(divMod, mulhi, add); |
| 4603 | |
| 4604 | mulhi = add; |
| 4605 | shift -= 1; |
| 4606 | } |
| 4607 | |
| 4608 | GenTree* shiftBy = comp->gtNewIconNode(shift, TYP_INT); |
| 4609 | BlockRange().InsertBefore(divMod, shiftBy); |
| 4610 | |
| 4611 | if (isDiv) |
| 4612 | { |
| 4613 | divMod->SetOper(GT_RSZ); |
| 4614 | divMod->gtOp1 = mulhi; |
| 4615 | divMod->gtOp2 = shiftBy; |
| 4616 | } |
| 4617 | else |
| 4618 | { |
| 4619 | GenTree* div = comp->gtNewOperNode(GT_RSZ, type, mulhi, shiftBy); |
| 4620 | |
| 4621 | // divisor UMOD dividend = dividend SUB (div MUL divisor) |
| 4622 | GenTree* divisor = comp->gtNewIconNode(divisorValue, type); |
| 4623 | GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor); |
| 4624 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
| 4625 | |
| 4626 | divMod->SetOper(GT_SUB); |
| 4627 | divMod->gtOp1 = dividend; |
| 4628 | divMod->gtOp2 = mul; |
| 4629 | |
| 4630 | BlockRange().InsertBefore(divMod, div, divisor, mul, dividend); |
| 4631 | } |
| 4632 | ContainCheckRange(firstNode, divMod); |
| 4633 | |
| 4634 | return true; |
| 4635 | } |
| 4636 | #endif |
| 4637 | return false; |
| 4638 | } |
| 4639 | |
| 4640 | // LowerConstIntDivOrMod: Transform integer GT_DIV/GT_MOD nodes with a power of 2 |
| 4641 | // const divisor into equivalent but faster sequences. |
| 4642 | // |
| 4643 | // Arguments: |
| 4644 | // node - pointer to the DIV or MOD node |
| 4645 | // |
| 4646 | // Returns: |
| 4647 | // nullptr if no transformation is done, or the next node in the transformed node sequence that |
| 4648 | // needs to be lowered. |
| 4649 | // |
| 4650 | GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) |
| 4651 | { |
| 4652 | assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); |
| 4653 | GenTree* divMod = node; |
| 4654 | GenTree* dividend = divMod->gtGetOp1(); |
| 4655 | GenTree* divisor = divMod->gtGetOp2(); |
| 4656 | |
| 4657 | const var_types type = divMod->TypeGet(); |
| 4658 | assert((type == TYP_INT) || (type == TYP_LONG)); |
| 4659 | |
| 4660 | #if defined(USE_HELPERS_FOR_INT_DIV) |
| 4661 | assert(!"unreachable: integral GT_DIV/GT_MOD should get morphed into helper calls" ); |
| 4662 | #endif // USE_HELPERS_FOR_INT_DIV |
| 4663 | #if defined(_TARGET_ARM64_) |
| 4664 | assert(node->OperGet() != GT_MOD); |
| 4665 | #endif // _TARGET_ARM64_ |
| 4666 | |
| 4667 | if (!divisor->IsCnsIntOrI()) |
| 4668 | { |
| 4669 | return nullptr; // no transformations to make |
| 4670 | } |
| 4671 | |
| 4672 | if (dividend->IsCnsIntOrI()) |
| 4673 | { |
| 4674 | // We shouldn't see a divmod with constant operands here but if we do then it's likely |
| 4675 | // because optimizations are disabled or it's a case that's supposed to throw an exception. |
| 4676 | // Don't optimize this. |
| 4677 | return nullptr; |
| 4678 | } |
| 4679 | |
| 4680 | ssize_t divisorValue = divisor->gtIntCon.IconValue(); |
| 4681 | |
| 4682 | if (divisorValue == -1 || divisorValue == 0) |
| 4683 | { |
| 4684 | // x / 0 and x % 0 can't be optimized because they are required to throw an exception. |
| 4685 | |
| 4686 | // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception. |
| 4687 | |
| 4688 | // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is |
| 4689 | // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this |
| 4690 | // case so optimizing this case would break C# code. |
| 4691 | |
| 4692 | // A runtime check could be used to handle this case but it's probably too rare to matter. |
| 4693 | return nullptr; |
| 4694 | } |
| 4695 | |
| 4696 | bool isDiv = divMod->OperGet() == GT_DIV; |
| 4697 | |
| 4698 | if (isDiv) |
| 4699 | { |
| 4700 | if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN)) |
| 4701 | { |
| 4702 | // If the divisor is the minimum representable integer value then we can use a compare, |
| 4703 | // the result is 1 iff the dividend equals divisor. |
| 4704 | divMod->SetOper(GT_EQ); |
| 4705 | return node; |
| 4706 | } |
| 4707 | } |
| 4708 | |
| 4709 | size_t absDivisorValue = |
| 4710 | (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue)); |
| 4711 | |
| 4712 | if (!isPow2(absDivisorValue)) |
| 4713 | { |
| 4714 | if (comp->opts.MinOpts()) |
| 4715 | { |
| 4716 | return nullptr; |
| 4717 | } |
| 4718 | |
| 4719 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
| 4720 | ssize_t magic; |
| 4721 | int shift; |
| 4722 | |
| 4723 | if (type == TYP_INT) |
| 4724 | { |
| 4725 | magic = MagicDivide::GetSigned32Magic(static_cast<int32_t>(divisorValue), &shift); |
| 4726 | } |
| 4727 | else |
| 4728 | { |
| 4729 | #ifdef _TARGET_64BIT_ |
| 4730 | magic = MagicDivide::GetSigned64Magic(static_cast<int64_t>(divisorValue), &shift); |
| 4731 | #else // !_TARGET_64BIT_ |
| 4732 | unreached(); |
| 4733 | #endif // !_TARGET_64BIT_ |
| 4734 | } |
| 4735 | |
| 4736 | divisor->gtIntConCommon.SetIconValue(magic); |
| 4737 | |
| 4738 | // Insert a new GT_MULHI node in front of the existing GT_DIV/GT_MOD node. |
| 4739 | // The existing node will later be transformed into a GT_ADD/GT_SUB that |
| 4740 | // computes the final result. This way don't need to find and change the |
| 4741 | // use of the existing node. |
| 4742 | GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, divisor, dividend); |
| 4743 | BlockRange().InsertBefore(divMod, mulhi); |
| 4744 | |
| 4745 | // mulhi was the easy part. Now we need to generate different code depending |
| 4746 | // on the divisor value: |
| 4747 | // For 3 we need: |
| 4748 | // div = signbit(mulhi) + mulhi |
| 4749 | // For 5 we need: |
| 4750 | // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust |
| 4751 | // For 7 we need: |
| 4752 | // mulhi += dividend ; requires add adjust |
| 4753 | // div = signbit(mulhi) + sar(mulhi, 2) ; requires shift adjust |
| 4754 | // For -3 we need: |
| 4755 | // mulhi -= dividend ; requires sub adjust |
| 4756 | // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust |
| 4757 | bool requiresAddSubAdjust = signum(divisorValue) != signum(magic); |
| 4758 | bool requiresShiftAdjust = shift != 0; |
| 4759 | bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv; |
| 4760 | unsigned curBBWeight = comp->compCurBB->getBBWeight(comp); |
| 4761 | |
| 4762 | if (requiresDividendMultiuse) |
| 4763 | { |
| 4764 | LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi); |
| 4765 | dividend = ReplaceWithLclVar(dividendUse); |
| 4766 | } |
| 4767 | |
| 4768 | GenTree* adjusted; |
| 4769 | |
| 4770 | if (requiresAddSubAdjust) |
| 4771 | { |
| 4772 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
| 4773 | adjusted = comp->gtNewOperNode(divisorValue > 0 ? GT_ADD : GT_SUB, type, mulhi, dividend); |
| 4774 | BlockRange().InsertBefore(divMod, dividend, adjusted); |
| 4775 | } |
| 4776 | else |
| 4777 | { |
| 4778 | adjusted = mulhi; |
| 4779 | } |
| 4780 | |
| 4781 | GenTree* shiftBy = comp->gtNewIconNode(genTypeSize(type) * 8 - 1, type); |
| 4782 | GenTree* signBit = comp->gtNewOperNode(GT_RSZ, type, adjusted, shiftBy); |
| 4783 | BlockRange().InsertBefore(divMod, shiftBy, signBit); |
| 4784 | |
| 4785 | LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit); |
| 4786 | adjusted = ReplaceWithLclVar(adjustedUse); |
| 4787 | adjusted = comp->gtNewLclvNode(adjusted->AsLclVar()->GetLclNum(), adjusted->TypeGet()); |
| 4788 | BlockRange().InsertBefore(divMod, adjusted); |
| 4789 | |
| 4790 | if (requiresShiftAdjust) |
| 4791 | { |
| 4792 | shiftBy = comp->gtNewIconNode(shift, TYP_INT); |
| 4793 | adjusted = comp->gtNewOperNode(GT_RSH, type, adjusted, shiftBy); |
| 4794 | BlockRange().InsertBefore(divMod, shiftBy, adjusted); |
| 4795 | } |
| 4796 | |
| 4797 | if (isDiv) |
| 4798 | { |
| 4799 | divMod->SetOperRaw(GT_ADD); |
| 4800 | divMod->gtOp.gtOp1 = adjusted; |
| 4801 | divMod->gtOp.gtOp2 = signBit; |
| 4802 | } |
| 4803 | else |
| 4804 | { |
| 4805 | GenTree* div = comp->gtNewOperNode(GT_ADD, type, adjusted, signBit); |
| 4806 | |
| 4807 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
| 4808 | |
| 4809 | // divisor % dividend = dividend - divisor x div |
| 4810 | GenTree* divisor = comp->gtNewIconNode(divisorValue, type); |
| 4811 | GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor); |
| 4812 | BlockRange().InsertBefore(divMod, dividend, div, divisor, mul); |
| 4813 | |
| 4814 | divMod->SetOperRaw(GT_SUB); |
| 4815 | divMod->gtOp.gtOp1 = dividend; |
| 4816 | divMod->gtOp.gtOp2 = mul; |
| 4817 | } |
| 4818 | |
| 4819 | return mulhi; |
| 4820 | #elif defined(_TARGET_ARM_) |
| 4821 | // Currently there's no GT_MULHI for ARM32 |
| 4822 | return nullptr; |
| 4823 | #else |
| 4824 | #error Unsupported or unset target architecture |
| 4825 | #endif |
| 4826 | } |
| 4827 | |
| 4828 | // We're committed to the conversion now. Go find the use if any. |
| 4829 | LIR::Use use; |
| 4830 | if (!BlockRange().TryGetUse(node, &use)) |
| 4831 | { |
| 4832 | return nullptr; |
| 4833 | } |
| 4834 | |
| 4835 | // We need to use the dividend node multiple times so its value needs to be |
| 4836 | // computed once and stored in a temp variable. |
| 4837 | |
| 4838 | unsigned curBBWeight = comp->compCurBB->getBBWeight(comp); |
| 4839 | |
| 4840 | LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod); |
| 4841 | dividend = ReplaceWithLclVar(opDividend); |
| 4842 | |
| 4843 | GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63)); |
| 4844 | |
| 4845 | if (absDivisorValue == 2) |
| 4846 | { |
| 4847 | // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1. |
| 4848 | // We can get the same result by using GT_RSZ instead of GT_RSH. |
| 4849 | adjustment->SetOper(GT_RSZ); |
| 4850 | } |
| 4851 | else |
| 4852 | { |
| 4853 | adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type)); |
| 4854 | } |
| 4855 | |
| 4856 | GenTree* adjustedDividend = |
| 4857 | comp->gtNewOperNode(GT_ADD, type, adjustment, |
| 4858 | comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet())); |
| 4859 | |
| 4860 | GenTree* newDivMod; |
| 4861 | |
| 4862 | if (isDiv) |
| 4863 | { |
| 4864 | // perform the division by right shifting the adjusted dividend |
| 4865 | divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue)); |
| 4866 | |
| 4867 | newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor); |
| 4868 | ContainCheckShiftRotate(newDivMod->AsOp()); |
| 4869 | |
| 4870 | if (divisorValue < 0) |
| 4871 | { |
| 4872 | // negate the result if the divisor is negative |
| 4873 | newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod); |
| 4874 | ContainCheckNode(newDivMod); |
| 4875 | } |
| 4876 | } |
| 4877 | else |
| 4878 | { |
| 4879 | // divisor % dividend = dividend - divisor x (dividend / divisor) |
| 4880 | // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor) |
| 4881 | // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1) |
| 4882 | divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1)); |
| 4883 | |
| 4884 | newDivMod = comp->gtNewOperNode(GT_SUB, type, |
| 4885 | comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()), |
| 4886 | comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor)); |
| 4887 | } |
| 4888 | |
| 4889 | // Remove the divisor and dividend nodes from the linear order, |
| 4890 | // since we have reused them and will resequence the tree |
| 4891 | BlockRange().Remove(divisor); |
| 4892 | BlockRange().Remove(dividend); |
| 4893 | |
| 4894 | // linearize and insert the new tree before the original divMod node |
| 4895 | InsertTreeBeforeAndContainCheck(divMod, newDivMod); |
| 4896 | BlockRange().Remove(divMod); |
| 4897 | |
| 4898 | // replace the original divmod node with the new divmod tree |
| 4899 | use.ReplaceWith(comp, newDivMod); |
| 4900 | |
| 4901 | return newDivMod->gtNext; |
| 4902 | } |
| 4903 | //------------------------------------------------------------------------ |
| 4904 | // LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2 |
| 4905 | // const divisor into equivalent but faster sequences. |
| 4906 | // |
| 4907 | // Arguments: |
| 4908 | // node - the DIV or MOD node |
| 4909 | // |
| 4910 | // Returns: |
| 4911 | // The next node to lower. |
| 4912 | // |
| 4913 | GenTree* Lowering::LowerSignedDivOrMod(GenTree* node) |
| 4914 | { |
| 4915 | assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); |
| 4916 | GenTree* next = node->gtNext; |
| 4917 | GenTree* divMod = node; |
| 4918 | GenTree* dividend = divMod->gtGetOp1(); |
| 4919 | GenTree* divisor = divMod->gtGetOp2(); |
| 4920 | |
| 4921 | if (varTypeIsIntegral(node->TypeGet())) |
| 4922 | { |
| 4923 | // LowerConstIntDivOrMod will return nullptr if it doesn't transform the node. |
| 4924 | GenTree* newNode = LowerConstIntDivOrMod(node); |
| 4925 | if (newNode != nullptr) |
| 4926 | { |
| 4927 | return newNode; |
| 4928 | } |
| 4929 | } |
| 4930 | ContainCheckDivOrMod(node->AsOp()); |
| 4931 | |
| 4932 | return next; |
| 4933 | } |
| 4934 | |
| 4935 | //------------------------------------------------------------------------ |
| 4936 | // LowerShift: Lower shift nodes |
| 4937 | // |
| 4938 | // Arguments: |
| 4939 | // shift - the shift node (GT_LSH, GT_RSH or GT_RSZ) |
| 4940 | // |
| 4941 | // Notes: |
| 4942 | // Remove unnecessary shift count masking, xarch shift instructions |
| 4943 | // mask the shift count to 5 bits (or 6 bits for 64 bit operations). |
| 4944 | |
| 4945 | void Lowering::LowerShift(GenTreeOp* shift) |
| 4946 | { |
| 4947 | assert(shift->OperIs(GT_LSH, GT_RSH, GT_RSZ)); |
| 4948 | |
| 4949 | size_t mask = 0x1f; |
| 4950 | #ifdef _TARGET_64BIT_ |
| 4951 | if (varTypeIsLong(shift->TypeGet())) |
| 4952 | { |
| 4953 | mask = 0x3f; |
| 4954 | } |
| 4955 | #else |
| 4956 | assert(!varTypeIsLong(shift->TypeGet())); |
| 4957 | #endif |
| 4958 | |
| 4959 | for (GenTree* andOp = shift->gtGetOp2(); andOp->OperIs(GT_AND); andOp = andOp->gtGetOp1()) |
| 4960 | { |
| 4961 | GenTree* maskOp = andOp->gtGetOp2(); |
| 4962 | |
| 4963 | if (!maskOp->IsCnsIntOrI()) |
| 4964 | { |
| 4965 | break; |
| 4966 | } |
| 4967 | |
| 4968 | if ((static_cast<size_t>(maskOp->AsIntCon()->IconValue()) & mask) != mask) |
| 4969 | { |
| 4970 | break; |
| 4971 | } |
| 4972 | |
| 4973 | shift->gtOp2 = andOp->gtGetOp1(); |
| 4974 | BlockRange().Remove(andOp); |
| 4975 | BlockRange().Remove(maskOp); |
| 4976 | // The parent was replaced, clear contain and regOpt flag. |
| 4977 | shift->gtOp2->ClearContained(); |
| 4978 | } |
| 4979 | ContainCheckShiftRotate(shift); |
| 4980 | } |
| 4981 | |
| 4982 | void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node) |
| 4983 | { |
| 4984 | #ifdef FEATURE_SIMD |
| 4985 | if (node->TypeGet() == TYP_SIMD12) |
| 4986 | { |
| 4987 | // Assumption 1: |
| 4988 | // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off |
| 4989 | // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for |
| 4990 | // reading and writing purposes. |
| 4991 | // |
| 4992 | // Assumption 2: |
| 4993 | // RyuJit backend is making another implicit assumption that Vector3 type args when passed in |
| 4994 | // registers or on stack, the upper most 4-bytes will be zero. |
| 4995 | // |
| 4996 | // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee |
| 4997 | // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is |
| 4998 | // invalid. |
| 4999 | // |
| 5000 | // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12 |
| 5001 | // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and |
| 5002 | // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason, |
| 5003 | // there is no need to clear upper 4-bytes of Vector3 type args. |
| 5004 | // |
| 5005 | // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16. |
| 5006 | // Vector3 return values are returned two return registers and Caller assembles them into a |
| 5007 | // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3 |
| 5008 | // type args in prolog and Vector3 type return value of a call |
| 5009 | // |
| 5010 | // RyuJIT x86 Windows: all non-param Vector3 local vars are allocated as 16 bytes. Vector3 arguments |
| 5011 | // are pushed as 12 bytes. For return values, a 16-byte local is allocated and the address passed |
| 5012 | // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear |
| 5013 | // it either. |
| 5014 | |
| 5015 | unsigned varNum = node->AsLclVarCommon()->GetLclNum(); |
| 5016 | LclVarDsc* varDsc = &comp->lvaTable[varNum]; |
| 5017 | |
| 5018 | if (comp->lvaMapSimd12ToSimd16(varDsc)) |
| 5019 | { |
| 5020 | JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n" ); |
| 5021 | DISPNODE(node); |
| 5022 | JITDUMP("============" ); |
| 5023 | |
| 5024 | node->gtType = TYP_SIMD16; |
| 5025 | } |
| 5026 | } |
| 5027 | #endif // FEATURE_SIMD |
| 5028 | } |
| 5029 | |
| 5030 | //------------------------------------------------------------------------ |
| 5031 | // LowerArrElem: Lower a GT_ARR_ELEM node |
| 5032 | // |
| 5033 | // Arguments: |
| 5034 | // node - the GT_ARR_ELEM node to lower. |
| 5035 | // |
| 5036 | // Return Value: |
| 5037 | // The next node to lower. |
| 5038 | // |
| 5039 | // Assumptions: |
| 5040 | // pTree points to a pointer to a GT_ARR_ELEM node. |
| 5041 | // |
| 5042 | // Notes: |
| 5043 | // This performs the following lowering. We start with a node of the form: |
| 5044 | // /--* <arrObj> |
| 5045 | // +--* <index0> |
| 5046 | // +--* <index1> |
| 5047 | // /--* arrMD&[,] |
| 5048 | // |
| 5049 | // First, we create temps for arrObj if it is not already a lclVar, and for any of the index |
| 5050 | // expressions that have side-effects. |
| 5051 | // We then transform the tree into: |
| 5052 | // <offset is null - no accumulated offset for the first index> |
| 5053 | // /--* <arrObj> |
| 5054 | // +--* <index0> |
| 5055 | // /--* ArrIndex[i, ] |
| 5056 | // +--* <arrObj> |
| 5057 | // /--| arrOffs[i, ] |
| 5058 | // | +--* <arrObj> |
| 5059 | // | +--* <index1> |
| 5060 | // +--* ArrIndex[*,j] |
| 5061 | // +--* <arrObj> |
| 5062 | // /--| arrOffs[*,j] |
| 5063 | // +--* lclVar NewTemp |
| 5064 | // /--* lea (scale = element size, offset = offset of first element) |
| 5065 | // |
| 5066 | // The new stmtExpr may be omitted if the <arrObj> is a lclVar. |
| 5067 | // The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for |
| 5068 | // the statement containing the original arrMD. |
| 5069 | // Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second |
| 5070 | // reference to NewTemp), because that provides more accurate lifetimes. |
| 5071 | // There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively. |
| 5072 | // |
| 5073 | GenTree* Lowering::LowerArrElem(GenTree* node) |
| 5074 | { |
| 5075 | // This will assert if we don't have an ArrElem node |
| 5076 | GenTreeArrElem* arrElem = node->AsArrElem(); |
| 5077 | const unsigned char rank = arrElem->gtArrElem.gtArrRank; |
| 5078 | const unsigned blockWeight = m_block->getBBWeight(comp); |
| 5079 | |
| 5080 | JITDUMP("Lowering ArrElem\n" ); |
| 5081 | JITDUMP("============\n" ); |
| 5082 | DISPTREERANGE(BlockRange(), arrElem); |
| 5083 | JITDUMP("\n" ); |
| 5084 | |
| 5085 | assert(arrElem->gtArrObj->TypeGet() == TYP_REF); |
| 5086 | |
| 5087 | // We need to have the array object in a lclVar. |
| 5088 | if (!arrElem->gtArrObj->IsLocal()) |
| 5089 | { |
| 5090 | LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem); |
| 5091 | ReplaceWithLclVar(arrObjUse); |
| 5092 | } |
| 5093 | |
| 5094 | GenTree* arrObjNode = arrElem->gtArrObj; |
| 5095 | assert(arrObjNode->IsLocal()); |
| 5096 | |
| 5097 | LclVarDsc* const varDsc = &comp->lvaTable[arrElem->gtArrObj->AsLclVarCommon()->gtLclNum]; |
| 5098 | |
| 5099 | GenTree* insertionPoint = arrElem; |
| 5100 | |
| 5101 | // The first ArrOffs node will have 0 for the offset of the previous dimension. |
| 5102 | GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0); |
| 5103 | BlockRange().InsertBefore(insertionPoint, prevArrOffs); |
| 5104 | GenTree* nextToLower = prevArrOffs; |
| 5105 | |
| 5106 | for (unsigned char dim = 0; dim < rank; dim++) |
| 5107 | { |
| 5108 | GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim]; |
| 5109 | |
| 5110 | // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones. |
| 5111 | GenTree* idxArrObjNode; |
| 5112 | if (dim == 0) |
| 5113 | { |
| 5114 | idxArrObjNode = arrObjNode; |
| 5115 | } |
| 5116 | else |
| 5117 | { |
| 5118 | idxArrObjNode = comp->gtClone(arrObjNode); |
| 5119 | BlockRange().InsertBefore(insertionPoint, idxArrObjNode); |
| 5120 | } |
| 5121 | |
| 5122 | // Next comes the GT_ARR_INDEX node. |
| 5123 | GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX) |
| 5124 | GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType); |
| 5125 | arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT); |
| 5126 | BlockRange().InsertBefore(insertionPoint, arrMDIdx); |
| 5127 | |
| 5128 | GenTree* offsArrObjNode = comp->gtClone(arrObjNode); |
| 5129 | BlockRange().InsertBefore(insertionPoint, offsArrObjNode); |
| 5130 | |
| 5131 | GenTreeArrOffs* arrOffs = |
| 5132 | new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank, |
| 5133 | arrElem->gtArrElem.gtArrElemType); |
| 5134 | arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT); |
| 5135 | BlockRange().InsertBefore(insertionPoint, arrOffs); |
| 5136 | |
| 5137 | prevArrOffs = arrOffs; |
| 5138 | } |
| 5139 | |
| 5140 | // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the |
| 5141 | // base. |
| 5142 | unsigned scale = arrElem->gtArrElem.gtArrElemSize; |
| 5143 | unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank); |
| 5144 | |
| 5145 | GenTree* leaIndexNode = prevArrOffs; |
| 5146 | if (!jitIsScaleIndexMul(scale)) |
| 5147 | { |
| 5148 | // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are |
| 5149 | // TYP_INT |
| 5150 | GenTree* scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale); |
| 5151 | GenTree* mulNode = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode); |
| 5152 | BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode); |
| 5153 | leaIndexNode = mulNode; |
| 5154 | scale = 1; |
| 5155 | } |
| 5156 | |
| 5157 | GenTree* leaBase = comp->gtClone(arrObjNode); |
| 5158 | BlockRange().InsertBefore(insertionPoint, leaBase); |
| 5159 | |
| 5160 | GenTree* leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset); |
| 5161 | |
| 5162 | BlockRange().InsertBefore(insertionPoint, leaNode); |
| 5163 | |
| 5164 | LIR::Use arrElemUse; |
| 5165 | if (BlockRange().TryGetUse(arrElem, &arrElemUse)) |
| 5166 | { |
| 5167 | arrElemUse.ReplaceWith(comp, leaNode); |
| 5168 | } |
| 5169 | else |
| 5170 | { |
| 5171 | leaNode->SetUnusedValue(); |
| 5172 | } |
| 5173 | |
| 5174 | BlockRange().Remove(arrElem); |
| 5175 | |
| 5176 | JITDUMP("Results of lowering ArrElem:\n" ); |
| 5177 | DISPTREERANGE(BlockRange(), leaNode); |
| 5178 | JITDUMP("\n\n" ); |
| 5179 | |
| 5180 | return nextToLower; |
| 5181 | } |
| 5182 | |
| 5183 | void Lowering::DoPhase() |
| 5184 | { |
| 5185 | // If we have any PInvoke calls, insert the one-time prolog code. We'll inserted the epilog code in the |
| 5186 | // appropriate spots later. NOTE: there is a minor optimization opportunity here, as we still create p/invoke |
| 5187 | // data structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination. |
| 5188 | if (comp->info.compCallUnmanaged) |
| 5189 | { |
| 5190 | InsertPInvokeMethodProlog(); |
| 5191 | } |
| 5192 | |
| 5193 | #if !defined(_TARGET_64BIT_) |
| 5194 | DecomposeLongs decomp(comp); // Initialize the long decomposition class. |
| 5195 | if (comp->compLongUsed) |
| 5196 | { |
| 5197 | decomp.PrepareForDecomposition(); |
| 5198 | } |
| 5199 | #endif // !defined(_TARGET_64BIT_) |
| 5200 | |
| 5201 | for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext) |
| 5202 | { |
| 5203 | /* Make the block publicly available */ |
| 5204 | comp->compCurBB = block; |
| 5205 | |
| 5206 | #if !defined(_TARGET_64BIT_) |
| 5207 | if (comp->compLongUsed) |
| 5208 | { |
| 5209 | decomp.DecomposeBlock(block); |
| 5210 | } |
| 5211 | #endif //!_TARGET_64BIT_ |
| 5212 | |
| 5213 | LowerBlock(block); |
| 5214 | } |
| 5215 | |
| 5216 | #ifdef DEBUG |
| 5217 | JITDUMP("Lower has completed modifying nodes.\n" ); |
| 5218 | if (VERBOSE) |
| 5219 | { |
| 5220 | comp->fgDispBasicBlocks(true); |
| 5221 | } |
| 5222 | #endif |
| 5223 | |
| 5224 | // Recompute local var ref counts before potentially sorting for liveness. |
| 5225 | // Note this does minimal work in cases where we are not going to sort. |
| 5226 | const bool isRecompute = true; |
| 5227 | const bool setSlotNumbers = false; |
| 5228 | comp->lvaComputeRefCounts(isRecompute, setSlotNumbers); |
| 5229 | |
| 5230 | comp->fgLocalVarLiveness(); |
| 5231 | // local var liveness can delete code, which may create empty blocks |
| 5232 | if (comp->opts.OptimizationEnabled()) |
| 5233 | { |
| 5234 | comp->optLoopsMarked = false; |
| 5235 | bool modified = comp->fgUpdateFlowGraph(); |
| 5236 | if (modified) |
| 5237 | { |
| 5238 | JITDUMP("had to run another liveness pass:\n" ); |
| 5239 | comp->fgLocalVarLiveness(); |
| 5240 | } |
| 5241 | } |
| 5242 | |
| 5243 | // Recompute local var ref counts again after liveness to reflect |
| 5244 | // impact of any dead code removal. Note this may leave us with |
| 5245 | // tracked vars that have zero refs. |
| 5246 | comp->lvaComputeRefCounts(isRecompute, setSlotNumbers); |
| 5247 | |
| 5248 | #ifdef DEBUG |
| 5249 | JITDUMP("Liveness pass finished after lowering, IR:\n" ); |
| 5250 | if (VERBOSE) |
| 5251 | { |
| 5252 | comp->fgDispBasicBlocks(true); |
| 5253 | } |
| 5254 | |
| 5255 | for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext) |
| 5256 | { |
| 5257 | assert(LIR::AsRange(block).CheckLIR(comp, true)); |
| 5258 | } |
| 5259 | #endif |
| 5260 | } |
| 5261 | |
| 5262 | #ifdef DEBUG |
| 5263 | |
| 5264 | //------------------------------------------------------------------------ |
| 5265 | // Lowering::CheckCallArg: check that a call argument is in an expected |
| 5266 | // form after lowering. |
| 5267 | // |
| 5268 | // Arguments: |
| 5269 | // arg - the argument to check. |
| 5270 | // |
| 5271 | void Lowering::CheckCallArg(GenTree* arg) |
| 5272 | { |
| 5273 | if (!arg->IsValue() && !arg->OperIsPutArgStk()) |
| 5274 | { |
| 5275 | assert((arg->OperIsStore() && !arg->IsValue()) || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || |
| 5276 | arg->OperIsCopyBlkOp()); |
| 5277 | return; |
| 5278 | } |
| 5279 | |
| 5280 | switch (arg->OperGet()) |
| 5281 | { |
| 5282 | case GT_FIELD_LIST: |
| 5283 | { |
| 5284 | GenTreeFieldList* list = arg->AsFieldList(); |
| 5285 | assert(list->isContained()); |
| 5286 | assert(list->IsFieldListHead()); |
| 5287 | |
| 5288 | for (; list != nullptr; list = list->Rest()) |
| 5289 | { |
| 5290 | assert(list->Current()->OperIsPutArg()); |
| 5291 | } |
| 5292 | } |
| 5293 | break; |
| 5294 | |
| 5295 | default: |
| 5296 | assert(arg->OperIsPutArg()); |
| 5297 | break; |
| 5298 | } |
| 5299 | } |
| 5300 | |
| 5301 | //------------------------------------------------------------------------ |
| 5302 | // Lowering::CheckCall: check that a call is in an expected form after |
| 5303 | // lowering. Currently this amounts to checking its |
| 5304 | // arguments, but could be expanded to verify more |
| 5305 | // properties in the future. |
| 5306 | // |
| 5307 | // Arguments: |
| 5308 | // call - the call to check. |
| 5309 | // |
| 5310 | void Lowering::CheckCall(GenTreeCall* call) |
| 5311 | { |
| 5312 | if (call->gtCallObjp != nullptr) |
| 5313 | { |
| 5314 | CheckCallArg(call->gtCallObjp); |
| 5315 | } |
| 5316 | |
| 5317 | for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest()) |
| 5318 | { |
| 5319 | CheckCallArg(args->Current()); |
| 5320 | } |
| 5321 | |
| 5322 | for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest()) |
| 5323 | { |
| 5324 | CheckCallArg(args->Current()); |
| 5325 | } |
| 5326 | } |
| 5327 | |
| 5328 | //------------------------------------------------------------------------ |
| 5329 | // Lowering::CheckNode: check that an LIR node is in an expected form |
| 5330 | // after lowering. |
| 5331 | // |
| 5332 | // Arguments: |
| 5333 | // compiler - the compiler context. |
| 5334 | // node - the node to check. |
| 5335 | // |
| 5336 | void Lowering::CheckNode(Compiler* compiler, GenTree* node) |
| 5337 | { |
| 5338 | switch (node->OperGet()) |
| 5339 | { |
| 5340 | case GT_CALL: |
| 5341 | CheckCall(node->AsCall()); |
| 5342 | break; |
| 5343 | |
| 5344 | #ifdef FEATURE_SIMD |
| 5345 | case GT_SIMD: |
| 5346 | assert(node->TypeGet() != TYP_SIMD12); |
| 5347 | break; |
| 5348 | #ifdef _TARGET_64BIT_ |
| 5349 | case GT_LCL_VAR: |
| 5350 | case GT_STORE_LCL_VAR: |
| 5351 | { |
| 5352 | unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); |
| 5353 | LclVarDsc* lclVar = &compiler->lvaTable[lclNum]; |
| 5354 | assert(node->TypeGet() != TYP_SIMD12 || compiler->lvaIsFieldOfDependentlyPromotedStruct(lclVar)); |
| 5355 | } |
| 5356 | break; |
| 5357 | #endif // _TARGET_64BIT_ |
| 5358 | #endif // SIMD |
| 5359 | |
| 5360 | default: |
| 5361 | break; |
| 5362 | } |
| 5363 | } |
| 5364 | |
| 5365 | //------------------------------------------------------------------------ |
| 5366 | // Lowering::CheckBlock: check that the contents of an LIR block are in an |
| 5367 | // expected form after lowering. |
| 5368 | // |
| 5369 | // Arguments: |
| 5370 | // compiler - the compiler context. |
| 5371 | // block - the block to check. |
| 5372 | // |
| 5373 | bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block) |
| 5374 | { |
| 5375 | assert(block->isEmpty() || block->IsLIR()); |
| 5376 | |
| 5377 | LIR::Range& blockRange = LIR::AsRange(block); |
| 5378 | for (GenTree* node : blockRange) |
| 5379 | { |
| 5380 | CheckNode(compiler, node); |
| 5381 | } |
| 5382 | |
| 5383 | assert(blockRange.CheckLIR(compiler, true)); |
| 5384 | return true; |
| 5385 | } |
| 5386 | #endif |
| 5387 | |
| 5388 | void Lowering::LowerBlock(BasicBlock* block) |
| 5389 | { |
| 5390 | assert(block == comp->compCurBB); // compCurBB must already be set. |
| 5391 | assert(block->isEmpty() || block->IsLIR()); |
| 5392 | |
| 5393 | m_block = block; |
| 5394 | |
| 5395 | // NOTE: some of the lowering methods insert calls before the node being |
| 5396 | // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In |
| 5397 | // general, any code that is inserted before the current node should be |
| 5398 | // "pre-lowered" as they won't be subject to further processing. |
| 5399 | // Lowering::CheckBlock() runs some extra checks on call arguments in |
| 5400 | // order to help catch unlowered nodes. |
| 5401 | |
| 5402 | GenTree* node = BlockRange().FirstNode(); |
| 5403 | while (node != nullptr) |
| 5404 | { |
| 5405 | node = LowerNode(node); |
| 5406 | } |
| 5407 | |
| 5408 | assert(CheckBlock(comp, block)); |
| 5409 | } |
| 5410 | |
| 5411 | /** Verifies if both of these trees represent the same indirection. |
| 5412 | * Used by Lower to annotate if CodeGen generate an instruction of the |
| 5413 | * form *addrMode BinOp= expr |
| 5414 | * |
| 5415 | * Preconditions: both trees are children of GT_INDs and their underlying children |
| 5416 | * have the same gtOper. |
| 5417 | * |
| 5418 | * This is a first iteration to actually recognize trees that can be code-generated |
| 5419 | * as a single read-modify-write instruction on AMD64/x86. For now |
| 5420 | * this method only supports the recognition of simple addressing modes (through GT_LEA) |
| 5421 | * or local var indirections. Local fields, array access and other more complex nodes are |
| 5422 | * not yet supported. |
| 5423 | * |
| 5424 | * TODO-CQ: Perform tree recognition by using the Value Numbering Package, that way we can recognize |
| 5425 | * arbitrary complex trees and support much more addressing patterns. |
| 5426 | */ |
| 5427 | bool Lowering::IndirsAreEquivalent(GenTree* candidate, GenTree* storeInd) |
| 5428 | { |
| 5429 | assert(candidate->OperGet() == GT_IND); |
| 5430 | assert(storeInd->OperGet() == GT_STOREIND); |
| 5431 | |
| 5432 | // We should check the size of the indirections. If they are |
| 5433 | // different, say because of a cast, then we can't call them equivalent. Doing so could cause us |
| 5434 | // to drop a cast. |
| 5435 | // Signed-ness difference is okay and expected since a store indirection must always |
| 5436 | // be signed based on the CIL spec, but a load could be unsigned. |
| 5437 | if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType)) |
| 5438 | { |
| 5439 | return false; |
| 5440 | } |
| 5441 | |
| 5442 | GenTree* pTreeA = candidate->gtGetOp1(); |
| 5443 | GenTree* pTreeB = storeInd->gtGetOp1(); |
| 5444 | |
| 5445 | // This method will be called by codegen (as well as during lowering). |
| 5446 | // After register allocation, the sources may have been spilled and reloaded |
| 5447 | // to a different register, indicated by an inserted GT_RELOAD node. |
| 5448 | pTreeA = pTreeA->gtSkipReloadOrCopy(); |
| 5449 | pTreeB = pTreeB->gtSkipReloadOrCopy(); |
| 5450 | |
| 5451 | genTreeOps oper; |
| 5452 | |
| 5453 | if (pTreeA->OperGet() != pTreeB->OperGet()) |
| 5454 | { |
| 5455 | return false; |
| 5456 | } |
| 5457 | |
| 5458 | oper = pTreeA->OperGet(); |
| 5459 | switch (oper) |
| 5460 | { |
| 5461 | case GT_LCL_VAR: |
| 5462 | case GT_LCL_VAR_ADDR: |
| 5463 | case GT_CLS_VAR_ADDR: |
| 5464 | case GT_CNS_INT: |
| 5465 | return NodesAreEquivalentLeaves(pTreeA, pTreeB); |
| 5466 | |
| 5467 | case GT_LEA: |
| 5468 | { |
| 5469 | GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode(); |
| 5470 | GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode(); |
| 5471 | return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) && |
| 5472 | NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) && |
| 5473 | (gtAddr1->gtScale == gtAddr2->gtScale) && (gtAddr1->Offset() == gtAddr2->Offset()); |
| 5474 | } |
| 5475 | default: |
| 5476 | // We don't handle anything that is not either a constant, |
| 5477 | // a local var or LEA. |
| 5478 | return false; |
| 5479 | } |
| 5480 | } |
| 5481 | |
| 5482 | /** Test whether the two given nodes are the same leaves. |
| 5483 | * Right now, only constant integers and local variables are supported |
| 5484 | */ |
| 5485 | bool Lowering::NodesAreEquivalentLeaves(GenTree* tree1, GenTree* tree2) |
| 5486 | { |
| 5487 | if (tree1 == nullptr && tree2 == nullptr) |
| 5488 | { |
| 5489 | return true; |
| 5490 | } |
| 5491 | |
| 5492 | // both null, they are equivalent, otherwise if either is null not equivalent |
| 5493 | if (tree1 == nullptr || tree2 == nullptr) |
| 5494 | { |
| 5495 | return false; |
| 5496 | } |
| 5497 | |
| 5498 | tree1 = tree1->gtSkipReloadOrCopy(); |
| 5499 | tree2 = tree2->gtSkipReloadOrCopy(); |
| 5500 | |
| 5501 | if (tree1->TypeGet() != tree2->TypeGet()) |
| 5502 | { |
| 5503 | return false; |
| 5504 | } |
| 5505 | |
| 5506 | if (tree1->OperGet() != tree2->OperGet()) |
| 5507 | { |
| 5508 | return false; |
| 5509 | } |
| 5510 | |
| 5511 | if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf()) |
| 5512 | { |
| 5513 | return false; |
| 5514 | } |
| 5515 | |
| 5516 | switch (tree1->OperGet()) |
| 5517 | { |
| 5518 | case GT_CNS_INT: |
| 5519 | return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal && |
| 5520 | tree1->IsIconHandle() == tree2->IsIconHandle(); |
| 5521 | case GT_LCL_VAR: |
| 5522 | case GT_LCL_VAR_ADDR: |
| 5523 | return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum; |
| 5524 | case GT_CLS_VAR_ADDR: |
| 5525 | return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd; |
| 5526 | default: |
| 5527 | return false; |
| 5528 | } |
| 5529 | } |
| 5530 | |
| 5531 | //------------------------------------------------------------------------ |
| 5532 | // Containment Analysis |
| 5533 | //------------------------------------------------------------------------ |
| 5534 | void Lowering::ContainCheckNode(GenTree* node) |
| 5535 | { |
| 5536 | switch (node->gtOper) |
| 5537 | { |
| 5538 | case GT_STORE_LCL_VAR: |
| 5539 | case GT_STORE_LCL_FLD: |
| 5540 | ContainCheckStoreLoc(node->AsLclVarCommon()); |
| 5541 | break; |
| 5542 | |
| 5543 | case GT_EQ: |
| 5544 | case GT_NE: |
| 5545 | case GT_LT: |
| 5546 | case GT_LE: |
| 5547 | case GT_GE: |
| 5548 | case GT_GT: |
| 5549 | case GT_TEST_EQ: |
| 5550 | case GT_TEST_NE: |
| 5551 | case GT_CMP: |
| 5552 | case GT_JCMP: |
| 5553 | ContainCheckCompare(node->AsOp()); |
| 5554 | break; |
| 5555 | |
| 5556 | case GT_JTRUE: |
| 5557 | ContainCheckJTrue(node->AsOp()); |
| 5558 | break; |
| 5559 | |
| 5560 | case GT_ADD: |
| 5561 | case GT_SUB: |
| 5562 | #if !defined(_TARGET_64BIT_) |
| 5563 | case GT_ADD_LO: |
| 5564 | case GT_ADD_HI: |
| 5565 | case GT_SUB_LO: |
| 5566 | case GT_SUB_HI: |
| 5567 | #endif |
| 5568 | case GT_AND: |
| 5569 | case GT_OR: |
| 5570 | case GT_XOR: |
| 5571 | ContainCheckBinary(node->AsOp()); |
| 5572 | break; |
| 5573 | |
| 5574 | #if defined(_TARGET_X86_) |
| 5575 | case GT_MUL_LONG: |
| 5576 | #endif |
| 5577 | case GT_MUL: |
| 5578 | case GT_MULHI: |
| 5579 | ContainCheckMul(node->AsOp()); |
| 5580 | break; |
| 5581 | case GT_DIV: |
| 5582 | case GT_MOD: |
| 5583 | case GT_UDIV: |
| 5584 | case GT_UMOD: |
| 5585 | ContainCheckDivOrMod(node->AsOp()); |
| 5586 | break; |
| 5587 | case GT_LSH: |
| 5588 | case GT_RSH: |
| 5589 | case GT_RSZ: |
| 5590 | case GT_ROL: |
| 5591 | case GT_ROR: |
| 5592 | #ifndef _TARGET_64BIT_ |
| 5593 | case GT_LSH_HI: |
| 5594 | case GT_RSH_LO: |
| 5595 | #endif |
| 5596 | ContainCheckShiftRotate(node->AsOp()); |
| 5597 | break; |
| 5598 | case GT_ARR_OFFSET: |
| 5599 | ContainCheckArrOffset(node->AsArrOffs()); |
| 5600 | break; |
| 5601 | case GT_LCLHEAP: |
| 5602 | ContainCheckLclHeap(node->AsOp()); |
| 5603 | break; |
| 5604 | case GT_RETURN: |
| 5605 | ContainCheckRet(node->AsOp()); |
| 5606 | break; |
| 5607 | case GT_RETURNTRAP: |
| 5608 | ContainCheckReturnTrap(node->AsOp()); |
| 5609 | break; |
| 5610 | case GT_STOREIND: |
| 5611 | ContainCheckStoreIndir(node->AsIndir()); |
| 5612 | case GT_IND: |
| 5613 | ContainCheckIndir(node->AsIndir()); |
| 5614 | break; |
| 5615 | case GT_PUTARG_REG: |
| 5616 | case GT_PUTARG_STK: |
| 5617 | #if FEATURE_ARG_SPLIT |
| 5618 | case GT_PUTARG_SPLIT: |
| 5619 | #endif // FEATURE_ARG_SPLIT |
| 5620 | // The regNum must have been set by the lowering of the call. |
| 5621 | assert(node->gtRegNum != REG_NA); |
| 5622 | break; |
| 5623 | #ifdef _TARGET_XARCH_ |
| 5624 | case GT_INTRINSIC: |
| 5625 | ContainCheckIntrinsic(node->AsOp()); |
| 5626 | break; |
| 5627 | #endif // _TARGET_XARCH_ |
| 5628 | #ifdef FEATURE_SIMD |
| 5629 | case GT_SIMD: |
| 5630 | ContainCheckSIMD(node->AsSIMD()); |
| 5631 | break; |
| 5632 | #endif // FEATURE_SIMD |
| 5633 | #ifdef FEATURE_HW_INTRINSICS |
| 5634 | case GT_HWIntrinsic: |
| 5635 | ContainCheckHWIntrinsic(node->AsHWIntrinsic()); |
| 5636 | break; |
| 5637 | #endif // FEATURE_HW_INTRINSICS |
| 5638 | default: |
| 5639 | break; |
| 5640 | } |
| 5641 | } |
| 5642 | |
| 5643 | //------------------------------------------------------------------------ |
| 5644 | // ContainCheckReturnTrap: determine whether the source of a RETURNTRAP should be contained. |
| 5645 | // |
| 5646 | // Arguments: |
| 5647 | // node - pointer to the GT_RETURNTRAP node |
| 5648 | // |
| 5649 | void Lowering::ContainCheckReturnTrap(GenTreeOp* node) |
| 5650 | { |
| 5651 | #ifdef _TARGET_XARCH_ |
| 5652 | assert(node->OperIs(GT_RETURNTRAP)); |
| 5653 | // This just turns into a compare of its child with an int + a conditional call |
| 5654 | if (node->gtOp1->isIndir()) |
| 5655 | { |
| 5656 | MakeSrcContained(node, node->gtOp1); |
| 5657 | } |
| 5658 | #endif // _TARGET_XARCH_ |
| 5659 | } |
| 5660 | |
| 5661 | //------------------------------------------------------------------------ |
| 5662 | // ContainCheckArrOffset: determine whether the source of an ARR_OFFSET should be contained. |
| 5663 | // |
| 5664 | // Arguments: |
| 5665 | // node - pointer to the GT_ARR_OFFSET node |
| 5666 | // |
| 5667 | void Lowering::ContainCheckArrOffset(GenTreeArrOffs* node) |
| 5668 | { |
| 5669 | assert(node->OperIs(GT_ARR_OFFSET)); |
| 5670 | // we don't want to generate code for this |
| 5671 | if (node->gtOffset->IsIntegralConst(0)) |
| 5672 | { |
| 5673 | MakeSrcContained(node, node->gtArrOffs.gtOffset); |
| 5674 | } |
| 5675 | } |
| 5676 | |
| 5677 | //------------------------------------------------------------------------ |
| 5678 | // ContainCheckLclHeap: determine whether the source of a GT_LCLHEAP node should be contained. |
| 5679 | // |
| 5680 | // Arguments: |
| 5681 | // node - pointer to the node |
| 5682 | // |
| 5683 | void Lowering::ContainCheckLclHeap(GenTreeOp* node) |
| 5684 | { |
| 5685 | assert(node->OperIs(GT_LCLHEAP)); |
| 5686 | GenTree* size = node->gtOp.gtOp1; |
| 5687 | if (size->IsCnsIntOrI()) |
| 5688 | { |
| 5689 | MakeSrcContained(node, size); |
| 5690 | } |
| 5691 | } |
| 5692 | |
| 5693 | //------------------------------------------------------------------------ |
| 5694 | // ContainCheckRet: determine whether the source of a node should be contained. |
| 5695 | // |
| 5696 | // Arguments: |
| 5697 | // node - pointer to the node |
| 5698 | // |
| 5699 | void Lowering::ContainCheckRet(GenTreeOp* ret) |
| 5700 | { |
| 5701 | assert(ret->OperIs(GT_RETURN)); |
| 5702 | |
| 5703 | #if !defined(_TARGET_64BIT_) |
| 5704 | if (ret->TypeGet() == TYP_LONG) |
| 5705 | { |
| 5706 | GenTree* op1 = ret->gtGetOp1(); |
| 5707 | noway_assert(op1->OperGet() == GT_LONG); |
| 5708 | MakeSrcContained(ret, op1); |
| 5709 | } |
| 5710 | #endif // !defined(_TARGET_64BIT_) |
| 5711 | #if FEATURE_MULTIREG_RET |
| 5712 | if (varTypeIsStruct(ret)) |
| 5713 | { |
| 5714 | GenTree* op1 = ret->gtGetOp1(); |
| 5715 | // op1 must be either a lclvar or a multi-reg returning call |
| 5716 | if (op1->OperGet() == GT_LCL_VAR) |
| 5717 | { |
| 5718 | GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon(); |
| 5719 | LclVarDsc* varDsc = &(comp->lvaTable[lclVarCommon->gtLclNum]); |
| 5720 | assert(varDsc->lvIsMultiRegRet); |
| 5721 | |
| 5722 | // Mark var as contained if not enregistrable. |
| 5723 | if (!varTypeIsEnregisterableStruct(op1)) |
| 5724 | { |
| 5725 | MakeSrcContained(ret, op1); |
| 5726 | } |
| 5727 | } |
| 5728 | } |
| 5729 | #endif // FEATURE_MULTIREG_RET |
| 5730 | } |
| 5731 | |
| 5732 | //------------------------------------------------------------------------ |
| 5733 | // ContainCheckJTrue: determine whether the source of a JTRUE should be contained. |
| 5734 | // |
| 5735 | // Arguments: |
| 5736 | // node - pointer to the node |
| 5737 | // |
| 5738 | void Lowering::ContainCheckJTrue(GenTreeOp* node) |
| 5739 | { |
| 5740 | // The compare does not need to be generated into a register. |
| 5741 | GenTree* cmp = node->gtGetOp1(); |
| 5742 | cmp->gtType = TYP_VOID; |
| 5743 | cmp->gtFlags |= GTF_SET_FLAGS; |
| 5744 | } |
| 5745 | |