| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 7 | XX XX |
| 8 | XX Register Requirements for ARM64 XX |
| 9 | XX XX |
| 10 | XX This encapsulates all the logic for setting register requirements for XX |
| 11 | XX the ARM64 architecture. XX |
| 12 | XX XX |
| 13 | XX XX |
| 14 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 15 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 16 | */ |
| 17 | |
| 18 | #include "jitpch.h" |
| 19 | #ifdef _MSC_VER |
| 20 | #pragma hdrstop |
| 21 | #endif |
| 22 | |
| 23 | #ifdef _TARGET_ARM64_ |
| 24 | |
| 25 | #include "jit.h" |
| 26 | #include "sideeffects.h" |
| 27 | #include "lower.h" |
| 28 | |
| 29 | //------------------------------------------------------------------------ |
| 30 | // BuildNode: Build the RefPositions for for a node |
| 31 | // |
| 32 | // Arguments: |
| 33 | // treeNode - the node of interest |
| 34 | // |
| 35 | // Return Value: |
| 36 | // The number of sources consumed by this node. |
| 37 | // |
| 38 | // Notes: |
| 39 | // Preconditions: |
| 40 | // LSRA Has been initialized. |
| 41 | // |
| 42 | // Postconditions: |
| 43 | // RefPositions have been built for all the register defs and uses required |
| 44 | // for this node. |
| 45 | // |
| 46 | int LinearScan::BuildNode(GenTree* tree) |
| 47 | { |
| 48 | assert(!tree->isContained()); |
| 49 | Interval* prefSrcInterval = nullptr; |
| 50 | int srcCount; |
| 51 | int dstCount = 0; |
| 52 | regMaskTP dstCandidates = RBM_NONE; |
| 53 | regMaskTP killMask = RBM_NONE; |
| 54 | bool isLocalDefUse = false; |
| 55 | |
| 56 | // Reset the build-related members of LinearScan. |
| 57 | clearBuildState(); |
| 58 | |
| 59 | RegisterType registerType = TypeGet(tree); |
| 60 | |
| 61 | // Set the default dstCount. This may be modified below. |
| 62 | if (tree->IsValue()) |
| 63 | { |
| 64 | dstCount = 1; |
| 65 | if (tree->IsUnusedValue()) |
| 66 | { |
| 67 | isLocalDefUse = true; |
| 68 | } |
| 69 | } |
| 70 | else |
| 71 | { |
| 72 | dstCount = 0; |
| 73 | } |
| 74 | |
| 75 | switch (tree->OperGet()) |
| 76 | { |
| 77 | default: |
| 78 | srcCount = BuildSimple(tree); |
| 79 | break; |
| 80 | |
| 81 | case GT_LCL_VAR: |
| 82 | case GT_LCL_FLD: |
| 83 | { |
| 84 | // We handle tracked variables differently from non-tracked ones. If it is tracked, |
| 85 | // we will simply add a use of the tracked variable at its parent/consumer. |
| 86 | // Otherwise, for a use we need to actually add the appropriate references for loading |
| 87 | // or storing the variable. |
| 88 | // |
| 89 | // A tracked variable won't actually get used until the appropriate ancestor tree node |
| 90 | // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument |
| 91 | // to a call or an orphaned dead node. |
| 92 | // |
| 93 | LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; |
| 94 | if (isCandidateVar(varDsc)) |
| 95 | { |
| 96 | INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1)); |
| 97 | return 0; |
| 98 | } |
| 99 | srcCount = 0; |
| 100 | #ifdef FEATURE_SIMD |
| 101 | // Need an additional register to read upper 4 bytes of Vector3. |
| 102 | if (tree->TypeGet() == TYP_SIMD12) |
| 103 | { |
| 104 | // We need an internal register different from targetReg in which 'tree' produces its result |
| 105 | // because both targetReg and internal reg will be in use at the same time. |
| 106 | buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); |
| 107 | setInternalRegsDelayFree = true; |
| 108 | buildInternalRegisterUses(); |
| 109 | } |
| 110 | #endif |
| 111 | BuildDef(tree); |
| 112 | } |
| 113 | break; |
| 114 | |
| 115 | case GT_STORE_LCL_FLD: |
| 116 | case GT_STORE_LCL_VAR: |
| 117 | srcCount = 1; |
| 118 | assert(dstCount == 0); |
| 119 | srcCount = BuildStoreLoc(tree->AsLclVarCommon()); |
| 120 | break; |
| 121 | |
| 122 | case GT_FIELD_LIST: |
| 123 | // These should always be contained. We don't correctly allocate or |
| 124 | // generate code for a non-contained GT_FIELD_LIST. |
| 125 | noway_assert(!"Non-contained GT_FIELD_LIST" ); |
| 126 | srcCount = 0; |
| 127 | break; |
| 128 | |
| 129 | case GT_LIST: |
| 130 | case GT_ARGPLACE: |
| 131 | case GT_NO_OP: |
| 132 | case GT_START_NONGC: |
| 133 | case GT_PROF_HOOK: |
| 134 | srcCount = 0; |
| 135 | assert(dstCount == 0); |
| 136 | break; |
| 137 | |
| 138 | case GT_CNS_DBL: |
| 139 | { |
| 140 | GenTreeDblCon* dblConst = tree->AsDblCon(); |
| 141 | double constValue = dblConst->gtDblCon.gtDconVal; |
| 142 | |
| 143 | if (emitter::emitIns_valid_imm_for_fmov(constValue)) |
| 144 | { |
| 145 | // Directly encode constant to instructions. |
| 146 | } |
| 147 | else |
| 148 | { |
| 149 | // Reserve int to load constant from memory (IF_LARGELDC) |
| 150 | buildInternalIntRegisterDefForNode(tree); |
| 151 | buildInternalRegisterUses(); |
| 152 | } |
| 153 | } |
| 154 | __fallthrough; |
| 155 | |
| 156 | case GT_CNS_INT: |
| 157 | { |
| 158 | srcCount = 0; |
| 159 | assert(dstCount == 1); |
| 160 | RefPosition* def = BuildDef(tree); |
| 161 | def->getInterval()->isConstant = true; |
| 162 | } |
| 163 | break; |
| 164 | |
| 165 | case GT_BOX: |
| 166 | case GT_COMMA: |
| 167 | case GT_QMARK: |
| 168 | case GT_COLON: |
| 169 | srcCount = 0; |
| 170 | assert(dstCount == 0); |
| 171 | unreached(); |
| 172 | break; |
| 173 | |
| 174 | case GT_RETURN: |
| 175 | srcCount = BuildReturn(tree); |
| 176 | break; |
| 177 | |
| 178 | case GT_RETFILT: |
| 179 | assert(dstCount == 0); |
| 180 | if (tree->TypeGet() == TYP_VOID) |
| 181 | { |
| 182 | srcCount = 0; |
| 183 | } |
| 184 | else |
| 185 | { |
| 186 | assert(tree->TypeGet() == TYP_INT); |
| 187 | srcCount = 1; |
| 188 | BuildUse(tree->gtGetOp1(), RBM_INTRET); |
| 189 | } |
| 190 | break; |
| 191 | |
| 192 | case GT_NOP: |
| 193 | // A GT_NOP is either a passthrough (if it is void, or if it has |
| 194 | // a child), but must be considered to produce a dummy value if it |
| 195 | // has a type but no child. |
| 196 | srcCount = 0; |
| 197 | if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) |
| 198 | { |
| 199 | assert(dstCount == 1); |
| 200 | BuildDef(tree); |
| 201 | } |
| 202 | else |
| 203 | { |
| 204 | assert(dstCount == 0); |
| 205 | } |
| 206 | break; |
| 207 | |
| 208 | case GT_JTRUE: |
| 209 | srcCount = 0; |
| 210 | assert(dstCount == 0); |
| 211 | break; |
| 212 | |
| 213 | case GT_JMP: |
| 214 | srcCount = 0; |
| 215 | assert(dstCount == 0); |
| 216 | break; |
| 217 | |
| 218 | case GT_SWITCH: |
| 219 | // This should never occur since switch nodes must not be visible at this |
| 220 | // point in the JIT. |
| 221 | srcCount = 0; |
| 222 | noway_assert(!"Switch must be lowered at this point" ); |
| 223 | break; |
| 224 | |
| 225 | case GT_JMPTABLE: |
| 226 | srcCount = 0; |
| 227 | assert(dstCount == 1); |
| 228 | BuildDef(tree); |
| 229 | break; |
| 230 | |
| 231 | case GT_SWITCH_TABLE: |
| 232 | buildInternalIntRegisterDefForNode(tree); |
| 233 | srcCount = BuildBinaryUses(tree->AsOp()); |
| 234 | assert(dstCount == 0); |
| 235 | break; |
| 236 | |
| 237 | case GT_ASG: |
| 238 | noway_assert(!"We should never hit any assignment operator in lowering" ); |
| 239 | srcCount = 0; |
| 240 | break; |
| 241 | |
| 242 | case GT_ADD: |
| 243 | case GT_SUB: |
| 244 | if (varTypeIsFloating(tree->TypeGet())) |
| 245 | { |
| 246 | // overflow operations aren't supported on float/double types. |
| 247 | assert(!tree->gtOverflow()); |
| 248 | |
| 249 | // No implicit conversions at this stage as the expectation is that |
| 250 | // everything is made explicit by adding casts. |
| 251 | assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); |
| 252 | } |
| 253 | |
| 254 | __fallthrough; |
| 255 | |
| 256 | case GT_AND: |
| 257 | case GT_OR: |
| 258 | case GT_XOR: |
| 259 | case GT_LSH: |
| 260 | case GT_RSH: |
| 261 | case GT_RSZ: |
| 262 | case GT_ROR: |
| 263 | srcCount = BuildBinaryUses(tree->AsOp()); |
| 264 | assert(dstCount == 1); |
| 265 | BuildDef(tree); |
| 266 | break; |
| 267 | |
| 268 | case GT_RETURNTRAP: |
| 269 | // this just turns into a compare of its child with an int |
| 270 | // + a conditional call |
| 271 | BuildUse(tree->gtGetOp1()); |
| 272 | srcCount = 1; |
| 273 | assert(dstCount == 0); |
| 274 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); |
| 275 | BuildDefsWithKills(tree, 0, RBM_NONE, killMask); |
| 276 | break; |
| 277 | |
| 278 | case GT_MOD: |
| 279 | case GT_UMOD: |
| 280 | NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64" ); |
| 281 | assert(!"Shouldn't see an integer typed GT_MOD node in ARM64" ); |
| 282 | srcCount = 0; |
| 283 | break; |
| 284 | |
| 285 | case GT_MUL: |
| 286 | if (tree->gtOverflow()) |
| 287 | { |
| 288 | // Need a register different from target reg to check for overflow. |
| 289 | buildInternalIntRegisterDefForNode(tree); |
| 290 | setInternalRegsDelayFree = true; |
| 291 | } |
| 292 | __fallthrough; |
| 293 | |
| 294 | case GT_DIV: |
| 295 | case GT_MULHI: |
| 296 | case GT_UDIV: |
| 297 | { |
| 298 | srcCount = BuildBinaryUses(tree->AsOp()); |
| 299 | buildInternalRegisterUses(); |
| 300 | assert(dstCount == 1); |
| 301 | BuildDef(tree); |
| 302 | } |
| 303 | break; |
| 304 | |
| 305 | case GT_INTRINSIC: |
| 306 | { |
| 307 | noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) || |
| 308 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) || |
| 309 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) || |
| 310 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) || |
| 311 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)); |
| 312 | |
| 313 | // Both operand and its result must be of the same floating point type. |
| 314 | GenTree* op1 = tree->gtGetOp1(); |
| 315 | assert(varTypeIsFloating(op1)); |
| 316 | assert(op1->TypeGet() == tree->TypeGet()); |
| 317 | |
| 318 | BuildUse(op1); |
| 319 | srcCount = 1; |
| 320 | assert(dstCount == 1); |
| 321 | BuildDef(tree); |
| 322 | } |
| 323 | break; |
| 324 | |
| 325 | #ifdef FEATURE_SIMD |
| 326 | case GT_SIMD: |
| 327 | srcCount = BuildSIMD(tree->AsSIMD()); |
| 328 | break; |
| 329 | #endif // FEATURE_SIMD |
| 330 | |
| 331 | #ifdef FEATURE_HW_INTRINSICS |
| 332 | case GT_HWIntrinsic: |
| 333 | srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic()); |
| 334 | break; |
| 335 | #endif // FEATURE_HW_INTRINSICS |
| 336 | |
| 337 | case GT_CAST: |
| 338 | assert(dstCount == 1); |
| 339 | srcCount = BuildCast(tree->AsCast()); |
| 340 | break; |
| 341 | |
| 342 | case GT_NEG: |
| 343 | case GT_NOT: |
| 344 | BuildUse(tree->gtGetOp1()); |
| 345 | srcCount = 1; |
| 346 | assert(dstCount == 1); |
| 347 | BuildDef(tree); |
| 348 | break; |
| 349 | |
| 350 | case GT_EQ: |
| 351 | case GT_NE: |
| 352 | case GT_LT: |
| 353 | case GT_LE: |
| 354 | case GT_GE: |
| 355 | case GT_GT: |
| 356 | case GT_TEST_EQ: |
| 357 | case GT_TEST_NE: |
| 358 | case GT_JCMP: |
| 359 | srcCount = BuildCmp(tree); |
| 360 | break; |
| 361 | |
| 362 | case GT_CKFINITE: |
| 363 | srcCount = 1; |
| 364 | assert(dstCount == 1); |
| 365 | buildInternalIntRegisterDefForNode(tree); |
| 366 | BuildUse(tree->gtGetOp1()); |
| 367 | BuildDef(tree); |
| 368 | buildInternalRegisterUses(); |
| 369 | break; |
| 370 | |
| 371 | case GT_CMPXCHG: |
| 372 | { |
| 373 | GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); |
| 374 | srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; |
| 375 | assert(dstCount == 1); |
| 376 | |
| 377 | if (!compiler->compSupports(InstructionSet_Atomics)) |
| 378 | { |
| 379 | // For ARMv8 exclusives requires a single internal register |
| 380 | buildInternalIntRegisterDefForNode(tree); |
| 381 | } |
| 382 | |
| 383 | // For ARMv8 exclusives the lifetime of the addr and data must be extended because |
| 384 | // it may be used used multiple during retries |
| 385 | |
| 386 | // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent |
| 387 | // them being reused as the target register which must be destroyed early |
| 388 | |
| 389 | RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation); |
| 390 | setDelayFree(locationUse); |
| 391 | RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue); |
| 392 | setDelayFree(valueUse); |
| 393 | if (!cmpXchgNode->gtOpComparand->isContained()) |
| 394 | { |
| 395 | RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand); |
| 396 | |
| 397 | // For ARMv8 exclusives the lifetime of the comparand must be extended because |
| 398 | // it may be used used multiple during retries |
| 399 | if (!compiler->compSupports(InstructionSet_Atomics)) |
| 400 | { |
| 401 | setDelayFree(comparandUse); |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | // Internals may not collide with target |
| 406 | setInternalRegsDelayFree = true; |
| 407 | buildInternalRegisterUses(); |
| 408 | BuildDef(tree); |
| 409 | } |
| 410 | break; |
| 411 | |
| 412 | case GT_LOCKADD: |
| 413 | case GT_XADD: |
| 414 | case GT_XCHG: |
| 415 | { |
| 416 | assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); |
| 417 | srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; |
| 418 | |
| 419 | if (!compiler->compSupports(InstructionSet_Atomics)) |
| 420 | { |
| 421 | // GT_XCHG requires a single internal register; the others require two. |
| 422 | buildInternalIntRegisterDefForNode(tree); |
| 423 | if (tree->OperGet() != GT_XCHG) |
| 424 | { |
| 425 | buildInternalIntRegisterDefForNode(tree); |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | assert(!tree->gtGetOp1()->isContained()); |
| 430 | RefPosition* op1Use = BuildUse(tree->gtGetOp1()); |
| 431 | RefPosition* op2Use = nullptr; |
| 432 | if (!tree->gtGetOp2()->isContained()) |
| 433 | { |
| 434 | op2Use = BuildUse(tree->gtGetOp2()); |
| 435 | } |
| 436 | |
| 437 | // For ARMv8 exclusives the lifetime of the addr and data must be extended because |
| 438 | // it may be used used multiple during retries |
| 439 | if (!compiler->compSupports(InstructionSet_Atomics)) |
| 440 | { |
| 441 | // Internals may not collide with target |
| 442 | if (dstCount == 1) |
| 443 | { |
| 444 | setDelayFree(op1Use); |
| 445 | if (op2Use != nullptr) |
| 446 | { |
| 447 | setDelayFree(op2Use); |
| 448 | } |
| 449 | setInternalRegsDelayFree = true; |
| 450 | } |
| 451 | buildInternalRegisterUses(); |
| 452 | } |
| 453 | if (dstCount == 1) |
| 454 | { |
| 455 | BuildDef(tree); |
| 456 | } |
| 457 | } |
| 458 | break; |
| 459 | |
| 460 | #if FEATURE_ARG_SPLIT |
| 461 | case GT_PUTARG_SPLIT: |
| 462 | srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); |
| 463 | dstCount = tree->AsPutArgSplit()->gtNumRegs; |
| 464 | break; |
| 465 | #endif // FEATURE _SPLIT_ARG |
| 466 | |
| 467 | case GT_PUTARG_STK: |
| 468 | srcCount = BuildPutArgStk(tree->AsPutArgStk()); |
| 469 | break; |
| 470 | |
| 471 | case GT_PUTARG_REG: |
| 472 | srcCount = BuildPutArgReg(tree->AsUnOp()); |
| 473 | break; |
| 474 | |
| 475 | case GT_CALL: |
| 476 | srcCount = BuildCall(tree->AsCall()); |
| 477 | if (tree->AsCall()->HasMultiRegRetVal()) |
| 478 | { |
| 479 | dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); |
| 480 | } |
| 481 | break; |
| 482 | |
| 483 | case GT_ADDR: |
| 484 | { |
| 485 | // For a GT_ADDR, the child node should not be evaluated into a register |
| 486 | GenTree* child = tree->gtGetOp1(); |
| 487 | assert(!isCandidateLocalRef(child)); |
| 488 | assert(child->isContained()); |
| 489 | assert(dstCount == 1); |
| 490 | srcCount = 0; |
| 491 | BuildDef(tree); |
| 492 | } |
| 493 | break; |
| 494 | |
| 495 | case GT_BLK: |
| 496 | case GT_DYN_BLK: |
| 497 | // These should all be eliminated prior to Lowering. |
| 498 | assert(!"Non-store block node in Lowering" ); |
| 499 | srcCount = 0; |
| 500 | break; |
| 501 | |
| 502 | case GT_STORE_BLK: |
| 503 | case GT_STORE_OBJ: |
| 504 | case GT_STORE_DYN_BLK: |
| 505 | srcCount = BuildBlockStore(tree->AsBlk()); |
| 506 | break; |
| 507 | |
| 508 | case GT_INIT_VAL: |
| 509 | // Always a passthrough of its child's value. |
| 510 | assert(!"INIT_VAL should always be contained" ); |
| 511 | srcCount = 0; |
| 512 | break; |
| 513 | |
| 514 | case GT_LCLHEAP: |
| 515 | { |
| 516 | assert(dstCount == 1); |
| 517 | |
| 518 | // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): |
| 519 | // Here '-' means don't care. |
| 520 | // |
| 521 | // Size? Init Memory? # temp regs |
| 522 | // 0 - 0 |
| 523 | // const and <=6 ptr words - 0 |
| 524 | // const and <PageSize No 0 |
| 525 | // >6 ptr words Yes 0 |
| 526 | // Non-const Yes 0 |
| 527 | // Non-const No 2 |
| 528 | // |
| 529 | |
| 530 | GenTree* size = tree->gtGetOp1(); |
| 531 | if (size->IsCnsIntOrI()) |
| 532 | { |
| 533 | assert(size->isContained()); |
| 534 | srcCount = 0; |
| 535 | |
| 536 | size_t sizeVal = size->gtIntCon.gtIconVal; |
| 537 | |
| 538 | if (sizeVal != 0) |
| 539 | { |
| 540 | // Compute the amount of memory to properly STACK_ALIGN. |
| 541 | // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. |
| 542 | // This should also help in debugging as we can examine the original size specified with |
| 543 | // localloc. |
| 544 | sizeVal = AlignUp(sizeVal, STACK_ALIGN); |
| 545 | size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); |
| 546 | |
| 547 | // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) |
| 548 | // |
| 549 | if (stpCount <= 4) |
| 550 | { |
| 551 | // Need no internal registers |
| 552 | } |
| 553 | else if (!compiler->info.compInitMem) |
| 554 | { |
| 555 | // No need to initialize allocated stack space. |
| 556 | if (sizeVal < compiler->eeGetPageSize()) |
| 557 | { |
| 558 | // Need no internal registers |
| 559 | } |
| 560 | else |
| 561 | { |
| 562 | // We need two registers: regCnt and RegTmp |
| 563 | buildInternalIntRegisterDefForNode(tree); |
| 564 | buildInternalIntRegisterDefForNode(tree); |
| 565 | } |
| 566 | } |
| 567 | } |
| 568 | } |
| 569 | else |
| 570 | { |
| 571 | srcCount = 1; |
| 572 | if (!compiler->info.compInitMem) |
| 573 | { |
| 574 | buildInternalIntRegisterDefForNode(tree); |
| 575 | buildInternalIntRegisterDefForNode(tree); |
| 576 | } |
| 577 | } |
| 578 | |
| 579 | if (!size->isContained()) |
| 580 | { |
| 581 | BuildUse(size); |
| 582 | } |
| 583 | buildInternalRegisterUses(); |
| 584 | BuildDef(tree); |
| 585 | } |
| 586 | break; |
| 587 | |
| 588 | case GT_ARR_BOUNDS_CHECK: |
| 589 | #ifdef FEATURE_SIMD |
| 590 | case GT_SIMD_CHK: |
| 591 | #endif // FEATURE_SIMD |
| 592 | { |
| 593 | GenTreeBoundsChk* node = tree->AsBoundsChk(); |
| 594 | // Consumes arrLen & index - has no result |
| 595 | assert(dstCount == 0); |
| 596 | |
| 597 | GenTree* intCns = nullptr; |
| 598 | GenTree* other = nullptr; |
| 599 | srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex); |
| 600 | srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen); |
| 601 | } |
| 602 | break; |
| 603 | |
| 604 | case GT_ARR_ELEM: |
| 605 | // These must have been lowered to GT_ARR_INDEX |
| 606 | noway_assert(!"We should never see a GT_ARR_ELEM in lowering" ); |
| 607 | srcCount = 0; |
| 608 | assert(dstCount == 0); |
| 609 | break; |
| 610 | |
| 611 | case GT_ARR_INDEX: |
| 612 | { |
| 613 | srcCount = 2; |
| 614 | assert(dstCount == 1); |
| 615 | buildInternalIntRegisterDefForNode(tree); |
| 616 | setInternalRegsDelayFree = true; |
| 617 | |
| 618 | // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple |
| 619 | // times while the result is being computed. |
| 620 | RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); |
| 621 | setDelayFree(arrObjUse); |
| 622 | BuildUse(tree->AsArrIndex()->IndexExpr()); |
| 623 | buildInternalRegisterUses(); |
| 624 | BuildDef(tree); |
| 625 | } |
| 626 | break; |
| 627 | |
| 628 | case GT_ARR_OFFSET: |
| 629 | // This consumes the offset, if any, the arrObj and the effective index, |
| 630 | // and produces the flattened offset for this dimension. |
| 631 | srcCount = 2; |
| 632 | if (!tree->gtArrOffs.gtOffset->isContained()) |
| 633 | { |
| 634 | BuildUse(tree->AsArrOffs()->gtOffset); |
| 635 | srcCount++; |
| 636 | } |
| 637 | BuildUse(tree->AsArrOffs()->gtIndex); |
| 638 | BuildUse(tree->AsArrOffs()->gtArrObj); |
| 639 | assert(dstCount == 1); |
| 640 | buildInternalIntRegisterDefForNode(tree); |
| 641 | buildInternalRegisterUses(); |
| 642 | BuildDef(tree); |
| 643 | break; |
| 644 | |
| 645 | case GT_LEA: |
| 646 | { |
| 647 | GenTreeAddrMode* lea = tree->AsAddrMode(); |
| 648 | |
| 649 | GenTree* base = lea->Base(); |
| 650 | GenTree* index = lea->Index(); |
| 651 | int cns = lea->Offset(); |
| 652 | |
| 653 | // This LEA is instantiating an address, so we set up the srcCount here. |
| 654 | srcCount = 0; |
| 655 | if (base != nullptr) |
| 656 | { |
| 657 | srcCount++; |
| 658 | BuildUse(base); |
| 659 | } |
| 660 | if (index != nullptr) |
| 661 | { |
| 662 | srcCount++; |
| 663 | BuildUse(index); |
| 664 | } |
| 665 | assert(dstCount == 1); |
| 666 | |
| 667 | // On ARM64 we may need a single internal register |
| 668 | // (when both conditions are true then we still only need a single internal register) |
| 669 | if ((index != nullptr) && (cns != 0)) |
| 670 | { |
| 671 | // ARM64 does not support both Index and offset so we need an internal register |
| 672 | buildInternalIntRegisterDefForNode(tree); |
| 673 | } |
| 674 | else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE)) |
| 675 | { |
| 676 | // This offset can't be contained in the add instruction, so we need an internal register |
| 677 | buildInternalIntRegisterDefForNode(tree); |
| 678 | } |
| 679 | buildInternalRegisterUses(); |
| 680 | BuildDef(tree); |
| 681 | } |
| 682 | break; |
| 683 | |
| 684 | case GT_STOREIND: |
| 685 | { |
| 686 | assert(dstCount == 0); |
| 687 | |
| 688 | if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree)) |
| 689 | { |
| 690 | srcCount = BuildGCWriteBarrier(tree); |
| 691 | break; |
| 692 | } |
| 693 | |
| 694 | srcCount = BuildIndir(tree->AsIndir()); |
| 695 | if (!tree->gtGetOp2()->isContained()) |
| 696 | { |
| 697 | BuildUse(tree->gtGetOp2()); |
| 698 | srcCount++; |
| 699 | } |
| 700 | } |
| 701 | break; |
| 702 | |
| 703 | case GT_NULLCHECK: |
| 704 | // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register |
| 705 | // is required, and it is not a localDefUse. |
| 706 | assert(dstCount == 0); |
| 707 | assert(!tree->gtGetOp1()->isContained()); |
| 708 | BuildUse(tree->gtGetOp1()); |
| 709 | srcCount = 1; |
| 710 | break; |
| 711 | |
| 712 | case GT_IND: |
| 713 | assert(dstCount == 1); |
| 714 | srcCount = BuildIndir(tree->AsIndir()); |
| 715 | break; |
| 716 | |
| 717 | case GT_CATCH_ARG: |
| 718 | srcCount = 0; |
| 719 | assert(dstCount == 1); |
| 720 | BuildDef(tree, RBM_EXCEPTION_OBJECT); |
| 721 | break; |
| 722 | |
| 723 | case GT_CLS_VAR: |
| 724 | srcCount = 0; |
| 725 | // GT_CLS_VAR, by the time we reach the backend, must always |
| 726 | // be a pure use. |
| 727 | // It will produce a result of the type of the |
| 728 | // node, and use an internal register for the address. |
| 729 | |
| 730 | assert(dstCount == 1); |
| 731 | assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); |
| 732 | buildInternalIntRegisterDefForNode(tree); |
| 733 | buildInternalRegisterUses(); |
| 734 | BuildDef(tree); |
| 735 | break; |
| 736 | |
| 737 | case GT_INDEX_ADDR: |
| 738 | assert(dstCount == 1); |
| 739 | srcCount = BuildBinaryUses(tree->AsOp()); |
| 740 | buildInternalIntRegisterDefForNode(tree); |
| 741 | buildInternalRegisterUses(); |
| 742 | BuildDef(tree); |
| 743 | break; |
| 744 | |
| 745 | } // end switch (tree->OperGet()) |
| 746 | |
| 747 | if (tree->IsUnusedValue() && (dstCount != 0)) |
| 748 | { |
| 749 | isLocalDefUse = true; |
| 750 | } |
| 751 | // We need to be sure that we've set srcCount and dstCount appropriately |
| 752 | assert((dstCount < 2) || tree->IsMultiRegCall()); |
| 753 | assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); |
| 754 | assert(!tree->IsUnusedValue() || (dstCount != 0)); |
| 755 | assert(dstCount == tree->GetRegisterDstCount()); |
| 756 | INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount)); |
| 757 | return srcCount; |
| 758 | } |
| 759 | |
| 760 | #ifdef FEATURE_SIMD |
| 761 | //------------------------------------------------------------------------ |
| 762 | // BuildSIMD: Set the NodeInfo for a GT_SIMD tree. |
| 763 | // |
| 764 | // Arguments: |
| 765 | // tree - The GT_SIMD node of interest |
| 766 | // |
| 767 | // Return Value: |
| 768 | // The number of sources consumed by this node. |
| 769 | // |
| 770 | int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) |
| 771 | { |
| 772 | int srcCount = 0; |
| 773 | // Only SIMDIntrinsicInit can be contained |
| 774 | if (simdTree->isContained()) |
| 775 | { |
| 776 | assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); |
| 777 | } |
| 778 | int dstCount = simdTree->IsValue() ? 1 : 0; |
| 779 | assert(dstCount == 1); |
| 780 | |
| 781 | bool buildUses = true; |
| 782 | |
| 783 | GenTree* op1 = simdTree->gtGetOp1(); |
| 784 | GenTree* op2 = simdTree->gtGetOp2(); |
| 785 | |
| 786 | switch (simdTree->gtSIMDIntrinsicID) |
| 787 | { |
| 788 | case SIMDIntrinsicInit: |
| 789 | case SIMDIntrinsicCast: |
| 790 | case SIMDIntrinsicSqrt: |
| 791 | case SIMDIntrinsicAbs: |
| 792 | case SIMDIntrinsicConvertToSingle: |
| 793 | case SIMDIntrinsicConvertToInt32: |
| 794 | case SIMDIntrinsicConvertToDouble: |
| 795 | case SIMDIntrinsicConvertToInt64: |
| 796 | case SIMDIntrinsicWidenLo: |
| 797 | case SIMDIntrinsicWidenHi: |
| 798 | // No special handling required. |
| 799 | break; |
| 800 | |
| 801 | case SIMDIntrinsicGetItem: |
| 802 | { |
| 803 | op1 = simdTree->gtGetOp1(); |
| 804 | op2 = simdTree->gtGetOp2(); |
| 805 | |
| 806 | // We have an object and an index, either of which may be contained. |
| 807 | bool setOp2DelayFree = false; |
| 808 | if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) |
| 809 | { |
| 810 | // If the index is not a constant and the object is not contained or is a local |
| 811 | // we will need a general purpose register to calculate the address |
| 812 | // internal register must not clobber input index |
| 813 | // TODO-Cleanup: An internal register will never clobber a source; this code actually |
| 814 | // ensures that the index (op2) doesn't interfere with the target. |
| 815 | buildInternalIntRegisterDefForNode(simdTree); |
| 816 | setOp2DelayFree = true; |
| 817 | } |
| 818 | srcCount += BuildOperandUses(op1); |
| 819 | if (!op2->isContained()) |
| 820 | { |
| 821 | RefPosition* op2Use = BuildUse(op2); |
| 822 | if (setOp2DelayFree) |
| 823 | { |
| 824 | setDelayFree(op2Use); |
| 825 | } |
| 826 | srcCount++; |
| 827 | } |
| 828 | |
| 829 | if (!op2->IsCnsIntOrI() && (!op1->isContained())) |
| 830 | { |
| 831 | // If vector is not already in memory (contained) and the index is not a constant, |
| 832 | // we will use the SIMD temp location to store the vector. |
| 833 | compiler->getSIMDInitTempVarNum(); |
| 834 | } |
| 835 | buildUses = false; |
| 836 | } |
| 837 | break; |
| 838 | |
| 839 | case SIMDIntrinsicAdd: |
| 840 | case SIMDIntrinsicSub: |
| 841 | case SIMDIntrinsicMul: |
| 842 | case SIMDIntrinsicDiv: |
| 843 | case SIMDIntrinsicBitwiseAnd: |
| 844 | case SIMDIntrinsicBitwiseAndNot: |
| 845 | case SIMDIntrinsicBitwiseOr: |
| 846 | case SIMDIntrinsicBitwiseXor: |
| 847 | case SIMDIntrinsicMin: |
| 848 | case SIMDIntrinsicMax: |
| 849 | case SIMDIntrinsicEqual: |
| 850 | case SIMDIntrinsicLessThan: |
| 851 | case SIMDIntrinsicGreaterThan: |
| 852 | case SIMDIntrinsicLessThanOrEqual: |
| 853 | case SIMDIntrinsicGreaterThanOrEqual: |
| 854 | // No special handling required. |
| 855 | break; |
| 856 | |
| 857 | case SIMDIntrinsicSetX: |
| 858 | case SIMDIntrinsicSetY: |
| 859 | case SIMDIntrinsicSetZ: |
| 860 | case SIMDIntrinsicSetW: |
| 861 | case SIMDIntrinsicNarrow: |
| 862 | { |
| 863 | // Op1 will write to dst before Op2 is free |
| 864 | BuildUse(op1); |
| 865 | RefPosition* op2Use = BuildUse(op2); |
| 866 | setDelayFree(op2Use); |
| 867 | srcCount = 2; |
| 868 | buildUses = false; |
| 869 | break; |
| 870 | } |
| 871 | |
| 872 | case SIMDIntrinsicInitN: |
| 873 | { |
| 874 | var_types baseType = simdTree->gtSIMDBaseType; |
| 875 | srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); |
| 876 | if (varTypeIsFloating(simdTree->gtSIMDBaseType)) |
| 877 | { |
| 878 | // Need an internal register to stitch together all the values into a single vector in a SIMD reg. |
| 879 | buildInternalFloatRegisterDefForNode(simdTree); |
| 880 | } |
| 881 | |
| 882 | int initCount = 0; |
| 883 | for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) |
| 884 | { |
| 885 | assert(list->OperGet() == GT_LIST); |
| 886 | GenTree* listItem = list->gtGetOp1(); |
| 887 | assert(listItem->TypeGet() == baseType); |
| 888 | assert(!listItem->isContained()); |
| 889 | BuildUse(listItem); |
| 890 | initCount++; |
| 891 | } |
| 892 | assert(initCount == srcCount); |
| 893 | buildUses = false; |
| 894 | |
| 895 | break; |
| 896 | } |
| 897 | |
| 898 | case SIMDIntrinsicInitArray: |
| 899 | // We have an array and an index, which may be contained. |
| 900 | break; |
| 901 | |
| 902 | case SIMDIntrinsicOpEquality: |
| 903 | case SIMDIntrinsicOpInEquality: |
| 904 | buildInternalFloatRegisterDefForNode(simdTree); |
| 905 | break; |
| 906 | |
| 907 | case SIMDIntrinsicDotProduct: |
| 908 | buildInternalFloatRegisterDefForNode(simdTree); |
| 909 | break; |
| 910 | |
| 911 | case SIMDIntrinsicSelect: |
| 912 | // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB |
| 913 | // bsl target register must be VC. Reserve a temp in case we need to shuffle things. |
| 914 | // This will require a different approach, as GenTreeSIMD has only two operands. |
| 915 | assert(!"SIMDIntrinsicSelect not yet supported" ); |
| 916 | buildInternalFloatRegisterDefForNode(simdTree); |
| 917 | break; |
| 918 | |
| 919 | case SIMDIntrinsicInitArrayX: |
| 920 | case SIMDIntrinsicInitFixed: |
| 921 | case SIMDIntrinsicCopyToArray: |
| 922 | case SIMDIntrinsicCopyToArrayX: |
| 923 | case SIMDIntrinsicNone: |
| 924 | case SIMDIntrinsicGetCount: |
| 925 | case SIMDIntrinsicGetOne: |
| 926 | case SIMDIntrinsicGetZero: |
| 927 | case SIMDIntrinsicGetAllOnes: |
| 928 | case SIMDIntrinsicGetX: |
| 929 | case SIMDIntrinsicGetY: |
| 930 | case SIMDIntrinsicGetZ: |
| 931 | case SIMDIntrinsicGetW: |
| 932 | case SIMDIntrinsicInstEquals: |
| 933 | case SIMDIntrinsicHWAccel: |
| 934 | case SIMDIntrinsicWiden: |
| 935 | case SIMDIntrinsicInvalid: |
| 936 | assert(!"These intrinsics should not be seen during register allocation" ); |
| 937 | __fallthrough; |
| 938 | |
| 939 | default: |
| 940 | noway_assert(!"Unimplemented SIMD node type." ); |
| 941 | unreached(); |
| 942 | } |
| 943 | if (buildUses) |
| 944 | { |
| 945 | assert(!op1->OperIs(GT_LIST)); |
| 946 | assert(srcCount == 0); |
| 947 | srcCount = BuildOperandUses(op1); |
| 948 | if ((op2 != nullptr) && !op2->isContained()) |
| 949 | { |
| 950 | srcCount += BuildOperandUses(op2); |
| 951 | } |
| 952 | } |
| 953 | assert(internalCount <= MaxInternalCount); |
| 954 | buildInternalRegisterUses(); |
| 955 | if (dstCount == 1) |
| 956 | { |
| 957 | BuildDef(simdTree); |
| 958 | } |
| 959 | else |
| 960 | { |
| 961 | assert(dstCount == 0); |
| 962 | } |
| 963 | return srcCount; |
| 964 | } |
| 965 | #endif // FEATURE_SIMD |
| 966 | |
| 967 | #ifdef FEATURE_HW_INTRINSICS |
| 968 | #include "hwintrinsic.h" |
| 969 | //------------------------------------------------------------------------ |
| 970 | // BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. |
| 971 | // |
| 972 | // Arguments: |
| 973 | // tree - The GT_HWIntrinsic node of interest |
| 974 | // |
| 975 | // Return Value: |
| 976 | // The number of sources consumed by this node. |
| 977 | // |
| 978 | int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) |
| 979 | { |
| 980 | NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; |
| 981 | int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree); |
| 982 | |
| 983 | GenTree* op1 = intrinsicTree->gtGetOp1(); |
| 984 | GenTree* op2 = intrinsicTree->gtGetOp2(); |
| 985 | GenTree* op3 = nullptr; |
| 986 | int srcCount = 0; |
| 987 | |
| 988 | if ((op1 != nullptr) && op1->OperIsList()) |
| 989 | { |
| 990 | // op2 must be null, and there must be at least two more arguments. |
| 991 | assert(op2 == nullptr); |
| 992 | noway_assert(op1->AsArgList()->Rest() != nullptr); |
| 993 | noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr); |
| 994 | assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr); |
| 995 | op2 = op1->AsArgList()->Rest()->Current(); |
| 996 | op3 = op1->AsArgList()->Rest()->Rest()->Current(); |
| 997 | op1 = op1->AsArgList()->Current(); |
| 998 | } |
| 999 | |
| 1000 | int dstCount = intrinsicTree->IsValue() ? 1 : 0; |
| 1001 | bool op2IsDelayFree = false; |
| 1002 | bool op3IsDelayFree = false; |
| 1003 | |
| 1004 | // Create internal temps, and handle any other special requirements. |
| 1005 | switch (HWIntrinsicInfo::lookup(intrinsicID).form) |
| 1006 | { |
| 1007 | case HWIntrinsicInfo::Sha1HashOp: |
| 1008 | assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); |
| 1009 | if (!op2->isContained()) |
| 1010 | { |
| 1011 | assert(!op3->isContained()); |
| 1012 | op2IsDelayFree = true; |
| 1013 | op3IsDelayFree = true; |
| 1014 | setInternalRegsDelayFree = true; |
| 1015 | } |
| 1016 | buildInternalFloatRegisterDefForNode(intrinsicTree); |
| 1017 | break; |
| 1018 | case HWIntrinsicInfo::SimdTernaryRMWOp: |
| 1019 | assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); |
| 1020 | if (!op2->isContained()) |
| 1021 | { |
| 1022 | assert(!op3->isContained()); |
| 1023 | op2IsDelayFree = true; |
| 1024 | op3IsDelayFree = true; |
| 1025 | } |
| 1026 | break; |
| 1027 | case HWIntrinsicInfo::Sha1RotateOp: |
| 1028 | buildInternalFloatRegisterDefForNode(intrinsicTree); |
| 1029 | break; |
| 1030 | |
| 1031 | case HWIntrinsicInfo::SimdExtractOp: |
| 1032 | case HWIntrinsicInfo::SimdInsertOp: |
| 1033 | if (!op2->isContained()) |
| 1034 | { |
| 1035 | // We need a temp to create a switch table |
| 1036 | buildInternalIntRegisterDefForNode(intrinsicTree); |
| 1037 | } |
| 1038 | break; |
| 1039 | |
| 1040 | default: |
| 1041 | break; |
| 1042 | } |
| 1043 | |
| 1044 | // Next, build uses |
| 1045 | if (numArgs > 3) |
| 1046 | { |
| 1047 | srcCount = 0; |
| 1048 | assert(!op2IsDelayFree && !op3IsDelayFree); |
| 1049 | assert(op1->OperIs(GT_LIST)); |
| 1050 | { |
| 1051 | for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest()) |
| 1052 | { |
| 1053 | srcCount += BuildOperandUses(list->Current()); |
| 1054 | } |
| 1055 | } |
| 1056 | assert(srcCount == numArgs); |
| 1057 | } |
| 1058 | else |
| 1059 | { |
| 1060 | if (op1 != nullptr) |
| 1061 | { |
| 1062 | srcCount += BuildOperandUses(op1); |
| 1063 | if (op2 != nullptr) |
| 1064 | { |
| 1065 | srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2); |
| 1066 | if (op3 != nullptr) |
| 1067 | { |
| 1068 | srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); |
| 1069 | } |
| 1070 | } |
| 1071 | } |
| 1072 | } |
| 1073 | buildInternalRegisterUses(); |
| 1074 | |
| 1075 | // Now defs |
| 1076 | if (intrinsicTree->IsValue()) |
| 1077 | { |
| 1078 | BuildDef(intrinsicTree); |
| 1079 | } |
| 1080 | |
| 1081 | return srcCount; |
| 1082 | } |
| 1083 | #endif |
| 1084 | |
| 1085 | #endif // _TARGET_ARM64_ |
| 1086 | |