| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 7 | XX XX |
| 8 | XX Register Requirements for ARM and ARM64 common code XX |
| 9 | XX XX |
| 10 | XX This encapsulates common logic for setting register requirements for XX |
| 11 | XX the ARM and ARM64 architectures. XX |
| 12 | XX XX |
| 13 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 14 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 15 | */ |
| 16 | |
| 17 | #include "jitpch.h" |
| 18 | #ifdef _MSC_VER |
| 19 | #pragma hdrstop |
| 20 | #endif |
| 21 | |
| 22 | #ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures |
| 23 | |
| 24 | #include "jit.h" |
| 25 | #include "sideeffects.h" |
| 26 | #include "lower.h" |
| 27 | #include "lsra.h" |
| 28 | |
| 29 | //------------------------------------------------------------------------ |
| 30 | // BuildIndir: Specify register requirements for address expression |
| 31 | // of an indirection operation. |
| 32 | // |
| 33 | // Arguments: |
| 34 | // indirTree - GT_IND, GT_STOREIND or block gentree node |
| 35 | // |
| 36 | // Return Value: |
| 37 | // The number of sources consumed by this node. |
| 38 | // |
| 39 | int LinearScan::BuildIndir(GenTreeIndir* indirTree) |
| 40 | { |
| 41 | int srcCount = 0; |
| 42 | // If this is the rhs of a block copy (i.e. non-enregisterable struct), |
| 43 | // it has no register requirements. |
| 44 | if (indirTree->TypeGet() == TYP_STRUCT) |
| 45 | { |
| 46 | return srcCount; |
| 47 | } |
| 48 | |
| 49 | bool isStore = (indirTree->gtOper == GT_STOREIND); |
| 50 | |
| 51 | GenTree* addr = indirTree->Addr(); |
| 52 | GenTree* index = nullptr; |
| 53 | int cns = 0; |
| 54 | |
| 55 | #ifdef _TARGET_ARM_ |
| 56 | // Unaligned loads/stores for floating point values must first be loaded into integer register(s) |
| 57 | if (indirTree->gtFlags & GTF_IND_UNALIGNED) |
| 58 | { |
| 59 | var_types type = TYP_UNDEF; |
| 60 | if (indirTree->OperGet() == GT_STOREIND) |
| 61 | { |
| 62 | type = indirTree->AsStoreInd()->Data()->TypeGet(); |
| 63 | } |
| 64 | else if (indirTree->OperGet() == GT_IND) |
| 65 | { |
| 66 | type = indirTree->TypeGet(); |
| 67 | } |
| 68 | |
| 69 | if (type == TYP_FLOAT) |
| 70 | { |
| 71 | buildInternalIntRegisterDefForNode(indirTree); |
| 72 | } |
| 73 | else if (type == TYP_DOUBLE) |
| 74 | { |
| 75 | buildInternalIntRegisterDefForNode(indirTree); |
| 76 | buildInternalIntRegisterDefForNode(indirTree); |
| 77 | } |
| 78 | } |
| 79 | #endif |
| 80 | |
| 81 | if (addr->isContained()) |
| 82 | { |
| 83 | assert(addr->OperGet() == GT_LEA); |
| 84 | GenTreeAddrMode* lea = addr->AsAddrMode(); |
| 85 | index = lea->Index(); |
| 86 | cns = lea->Offset(); |
| 87 | |
| 88 | // On ARM we may need a single internal register |
| 89 | // (when both conditions are true then we still only need a single internal register) |
| 90 | if ((index != nullptr) && (cns != 0)) |
| 91 | { |
| 92 | // ARM does not support both Index and offset so we need an internal register |
| 93 | buildInternalIntRegisterDefForNode(indirTree); |
| 94 | } |
| 95 | else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) |
| 96 | { |
| 97 | // This offset can't be contained in the ldr/str instruction, so we need an internal register |
| 98 | buildInternalIntRegisterDefForNode(indirTree); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | #ifdef FEATURE_SIMD |
| 103 | if (indirTree->TypeGet() == TYP_SIMD12) |
| 104 | { |
| 105 | // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). |
| 106 | assert(!addr->isContained()); |
| 107 | |
| 108 | // Vector3 is read/written as two reads/writes: 8 byte and 4 byte. |
| 109 | // To assemble the vector properly we would need an additional int register |
| 110 | buildInternalIntRegisterDefForNode(indirTree); |
| 111 | } |
| 112 | #endif // FEATURE_SIMD |
| 113 | |
| 114 | srcCount = BuildIndirUses(indirTree); |
| 115 | buildInternalRegisterUses(); |
| 116 | |
| 117 | if (indirTree->gtOper != GT_STOREIND) |
| 118 | { |
| 119 | BuildDef(indirTree); |
| 120 | } |
| 121 | return srcCount; |
| 122 | } |
| 123 | |
| 124 | //------------------------------------------------------------------------ |
| 125 | // BuildCall: Set the NodeInfo for a call. |
| 126 | // |
| 127 | // Arguments: |
| 128 | // call - The call node of interest |
| 129 | // |
| 130 | // Return Value: |
| 131 | // The number of sources consumed by this node. |
| 132 | // |
| 133 | int LinearScan::BuildCall(GenTreeCall* call) |
| 134 | { |
| 135 | bool hasMultiRegRetVal = false; |
| 136 | ReturnTypeDesc* retTypeDesc = nullptr; |
| 137 | regMaskTP dstCandidates = RBM_NONE; |
| 138 | |
| 139 | int srcCount = 0; |
| 140 | int dstCount = 0; |
| 141 | if (call->TypeGet() != TYP_VOID) |
| 142 | { |
| 143 | hasMultiRegRetVal = call->HasMultiRegRetVal(); |
| 144 | if (hasMultiRegRetVal) |
| 145 | { |
| 146 | // dst count = number of registers in which the value is returned by call |
| 147 | retTypeDesc = call->GetReturnTypeDesc(); |
| 148 | dstCount = retTypeDesc->GetReturnRegCount(); |
| 149 | } |
| 150 | else |
| 151 | { |
| 152 | dstCount = 1; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | GenTree* ctrlExpr = call->gtControlExpr; |
| 157 | regMaskTP ctrlExprCandidates = RBM_NONE; |
| 158 | if (call->gtCallType == CT_INDIRECT) |
| 159 | { |
| 160 | // either gtControlExpr != null or gtCallAddr != null. |
| 161 | // Both cannot be non-null at the same time. |
| 162 | assert(ctrlExpr == nullptr); |
| 163 | assert(call->gtCallAddr != nullptr); |
| 164 | ctrlExpr = call->gtCallAddr; |
| 165 | } |
| 166 | |
| 167 | // set reg requirements on call target represented as control sequence. |
| 168 | if (ctrlExpr != nullptr) |
| 169 | { |
| 170 | // we should never see a gtControlExpr whose type is void. |
| 171 | assert(ctrlExpr->TypeGet() != TYP_VOID); |
| 172 | |
| 173 | // In case of fast tail implemented as jmp, make sure that gtControlExpr is |
| 174 | // computed into a register. |
| 175 | if (call->IsFastTailCall()) |
| 176 | { |
| 177 | // Fast tail call - make sure that call target is always computed in R12(ARM32)/IP0(ARM64) |
| 178 | // so that epilog sequence can generate "br xip0/r12" to achieve fast tail call. |
| 179 | ctrlExprCandidates = RBM_FASTTAILCALL_TARGET; |
| 180 | } |
| 181 | } |
| 182 | #ifdef _TARGET_ARM_ |
| 183 | else |
| 184 | { |
| 185 | buildInternalIntRegisterDefForNode(call); |
| 186 | } |
| 187 | |
| 188 | if (call->NeedsNullCheck()) |
| 189 | { |
| 190 | buildInternalIntRegisterDefForNode(call); |
| 191 | } |
| 192 | |
| 193 | #endif // _TARGET_ARM_ |
| 194 | |
| 195 | RegisterType registerType = call->TypeGet(); |
| 196 | |
| 197 | // Set destination candidates for return value of the call. |
| 198 | |
| 199 | #ifdef _TARGET_ARM_ |
| 200 | if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) |
| 201 | { |
| 202 | // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with |
| 203 | // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. |
| 204 | dstCandidates = RBM_PINVOKE_TCB; |
| 205 | } |
| 206 | else |
| 207 | #endif // _TARGET_ARM_ |
| 208 | if (hasMultiRegRetVal) |
| 209 | { |
| 210 | assert(retTypeDesc != nullptr); |
| 211 | dstCandidates = retTypeDesc->GetABIReturnRegs(); |
| 212 | } |
| 213 | else if (varTypeIsFloating(registerType)) |
| 214 | { |
| 215 | dstCandidates = RBM_FLOATRET; |
| 216 | } |
| 217 | else if (registerType == TYP_LONG) |
| 218 | { |
| 219 | dstCandidates = RBM_LNGRET; |
| 220 | } |
| 221 | else |
| 222 | { |
| 223 | dstCandidates = RBM_INTRET; |
| 224 | } |
| 225 | |
| 226 | // First, count reg args |
| 227 | // Each register argument corresponds to one source. |
| 228 | bool callHasFloatRegArgs = false; |
| 229 | |
| 230 | for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext()) |
| 231 | { |
| 232 | assert(list->OperIsList()); |
| 233 | |
| 234 | GenTree* argNode = list->Current(); |
| 235 | |
| 236 | #ifdef DEBUG |
| 237 | // During Build, we only use the ArgTabEntry for validation, |
| 238 | // as getting it is rather expensive. |
| 239 | fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); |
| 240 | regNumber argReg = curArgTabEntry->regNum; |
| 241 | assert(curArgTabEntry); |
| 242 | #endif |
| 243 | |
| 244 | if (argNode->gtOper == GT_PUTARG_STK) |
| 245 | { |
| 246 | // late arg that is not passed in a register |
| 247 | assert(curArgTabEntry->regNum == REG_STK); |
| 248 | // These should never be contained. |
| 249 | assert(!argNode->isContained()); |
| 250 | continue; |
| 251 | } |
| 252 | |
| 253 | // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct |
| 254 | if (argNode->OperGet() == GT_FIELD_LIST) |
| 255 | { |
| 256 | assert(argNode->isContained()); |
| 257 | |
| 258 | // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) |
| 259 | for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) |
| 260 | { |
| 261 | #ifdef DEBUG |
| 262 | assert(entry->Current()->OperIs(GT_PUTARG_REG)); |
| 263 | assert(entry->Current()->gtRegNum == argReg); |
| 264 | // Update argReg for the next putarg_reg (if any) |
| 265 | argReg = genRegArgNext(argReg); |
| 266 | |
| 267 | #if defined(_TARGET_ARM_) |
| 268 | // A double register is modelled as an even-numbered single one |
| 269 | if (entry->Current()->TypeGet() == TYP_DOUBLE) |
| 270 | { |
| 271 | argReg = genRegArgNext(argReg); |
| 272 | } |
| 273 | #endif // _TARGET_ARM_ |
| 274 | #endif |
| 275 | BuildUse(entry->Current(), genRegMask(entry->Current()->gtRegNum)); |
| 276 | srcCount++; |
| 277 | } |
| 278 | } |
| 279 | #if FEATURE_ARG_SPLIT |
| 280 | else if (argNode->OperGet() == GT_PUTARG_SPLIT) |
| 281 | { |
| 282 | unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; |
| 283 | assert(regCount == curArgTabEntry->numRegs); |
| 284 | for (unsigned int i = 0; i < regCount; i++) |
| 285 | { |
| 286 | BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); |
| 287 | } |
| 288 | srcCount += regCount; |
| 289 | } |
| 290 | #endif // FEATURE_ARG_SPLIT |
| 291 | else |
| 292 | { |
| 293 | assert(argNode->OperIs(GT_PUTARG_REG)); |
| 294 | assert(argNode->gtRegNum == argReg); |
| 295 | HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); |
| 296 | #ifdef _TARGET_ARM_ |
| 297 | // The `double` types have been transformed to `long` on armel, |
| 298 | // while the actual long types have been decomposed. |
| 299 | // On ARM we may have bitcasts from DOUBLE to LONG. |
| 300 | if (argNode->TypeGet() == TYP_LONG) |
| 301 | { |
| 302 | assert(argNode->IsMultiRegNode()); |
| 303 | BuildUse(argNode, genRegMask(argNode->gtRegNum), 0); |
| 304 | BuildUse(argNode, genRegMask(genRegArgNext(argNode->gtRegNum)), 1); |
| 305 | srcCount += 2; |
| 306 | } |
| 307 | else |
| 308 | #endif // _TARGET_ARM_ |
| 309 | { |
| 310 | BuildUse(argNode, genRegMask(argNode->gtRegNum)); |
| 311 | srcCount++; |
| 312 | } |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | // Now, count stack args |
| 317 | // Note that these need to be computed into a register, but then |
| 318 | // they're just stored to the stack - so the reg doesn't |
| 319 | // need to remain live until the call. In fact, it must not |
| 320 | // because the code generator doesn't actually consider it live, |
| 321 | // so it can't be spilled. |
| 322 | |
| 323 | GenTree* args = call->gtCallArgs; |
| 324 | while (args) |
| 325 | { |
| 326 | GenTree* arg = args->gtGetOp1(); |
| 327 | |
| 328 | // Skip arguments that have been moved to the Late Arg list |
| 329 | if (!(args->gtFlags & GTF_LATE_ARG)) |
| 330 | { |
| 331 | #ifdef DEBUG |
| 332 | fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); |
| 333 | assert(curArgTabEntry); |
| 334 | #endif |
| 335 | #if FEATURE_ARG_SPLIT |
| 336 | // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they |
| 337 | // define registers used by the call. |
| 338 | assert(arg->OperGet() != GT_PUTARG_SPLIT); |
| 339 | #endif // FEATURE_ARG_SPLIT |
| 340 | if (arg->gtOper == GT_PUTARG_STK) |
| 341 | { |
| 342 | assert(curArgTabEntry->regNum == REG_STK); |
| 343 | } |
| 344 | else |
| 345 | { |
| 346 | assert(!arg->IsValue() || arg->IsUnusedValue()); |
| 347 | } |
| 348 | } |
| 349 | args = args->gtGetOp2(); |
| 350 | } |
| 351 | |
| 352 | // If it is a fast tail call, it is already preferenced to use IP0. |
| 353 | // Therefore, no need set src candidates on call tgt again. |
| 354 | if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) |
| 355 | { |
| 356 | NYI_ARM("float reg varargs" ); |
| 357 | |
| 358 | // Don't assign the call target to any of the argument registers because |
| 359 | // we will use them to also pass floating point arguments as required |
| 360 | // by Arm64 ABI. |
| 361 | ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); |
| 362 | } |
| 363 | |
| 364 | if (ctrlExpr != nullptr) |
| 365 | { |
| 366 | BuildUse(ctrlExpr, ctrlExprCandidates); |
| 367 | srcCount++; |
| 368 | } |
| 369 | |
| 370 | buildInternalRegisterUses(); |
| 371 | |
| 372 | // Now generate defs and kills. |
| 373 | regMaskTP killMask = getKillSetForCall(call); |
| 374 | BuildDefsWithKills(call, dstCount, dstCandidates, killMask); |
| 375 | return srcCount; |
| 376 | } |
| 377 | |
| 378 | //------------------------------------------------------------------------ |
| 379 | // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node |
| 380 | // |
| 381 | // Arguments: |
| 382 | // argNode - a GT_PUTARG_STK node |
| 383 | // |
| 384 | // Return Value: |
| 385 | // The number of sources consumed by this node. |
| 386 | // |
| 387 | // Notes: |
| 388 | // Set the child node(s) to be contained when we have a multireg arg |
| 389 | // |
| 390 | int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) |
| 391 | { |
| 392 | assert(argNode->gtOper == GT_PUTARG_STK); |
| 393 | |
| 394 | GenTree* putArgChild = argNode->gtGetOp1(); |
| 395 | |
| 396 | int srcCount = 0; |
| 397 | |
| 398 | // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct |
| 399 | if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) |
| 400 | { |
| 401 | // We will use store instructions that each write a register sized value |
| 402 | |
| 403 | if (putArgChild->OperGet() == GT_FIELD_LIST) |
| 404 | { |
| 405 | assert(putArgChild->isContained()); |
| 406 | // We consume all of the items in the GT_FIELD_LIST |
| 407 | for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest()) |
| 408 | { |
| 409 | BuildUse(current->Current()); |
| 410 | srcCount++; |
| 411 | } |
| 412 | } |
| 413 | else |
| 414 | { |
| 415 | // We can use a ldp/stp sequence so we need two internal registers for ARM64; one for ARM. |
| 416 | buildInternalIntRegisterDefForNode(argNode); |
| 417 | #ifdef _TARGET_ARM64_ |
| 418 | buildInternalIntRegisterDefForNode(argNode); |
| 419 | #endif // _TARGET_ARM64_ |
| 420 | |
| 421 | if (putArgChild->OperGet() == GT_OBJ) |
| 422 | { |
| 423 | assert(putArgChild->isContained()); |
| 424 | GenTree* objChild = putArgChild->gtGetOp1(); |
| 425 | if (objChild->OperGet() == GT_LCL_VAR_ADDR) |
| 426 | { |
| 427 | // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR |
| 428 | // as one contained operation, and there are no source registers. |
| 429 | // |
| 430 | assert(objChild->isContained()); |
| 431 | } |
| 432 | else |
| 433 | { |
| 434 | // We will generate all of the code for the GT_PUTARG_STK and its child node |
| 435 | // as one contained operation |
| 436 | // |
| 437 | srcCount = BuildOperandUses(objChild); |
| 438 | } |
| 439 | } |
| 440 | else |
| 441 | { |
| 442 | // No source registers. |
| 443 | putArgChild->OperIs(GT_LCL_VAR); |
| 444 | } |
| 445 | } |
| 446 | } |
| 447 | else |
| 448 | { |
| 449 | assert(!putArgChild->isContained()); |
| 450 | srcCount = BuildOperandUses(putArgChild); |
| 451 | } |
| 452 | buildInternalRegisterUses(); |
| 453 | return srcCount; |
| 454 | } |
| 455 | |
| 456 | #if FEATURE_ARG_SPLIT |
| 457 | //------------------------------------------------------------------------ |
| 458 | // BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node |
| 459 | // |
| 460 | // Arguments: |
| 461 | // argNode - a GT_PUTARG_SPLIT node |
| 462 | // |
| 463 | // Return Value: |
| 464 | // The number of sources consumed by this node. |
| 465 | // |
| 466 | // Notes: |
| 467 | // Set the child node(s) to be contained |
| 468 | // |
| 469 | int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) |
| 470 | { |
| 471 | int srcCount = 0; |
| 472 | assert(argNode->gtOper == GT_PUTARG_SPLIT); |
| 473 | |
| 474 | GenTree* putArgChild = argNode->gtGetOp1(); |
| 475 | |
| 476 | // Registers for split argument corresponds to source |
| 477 | int dstCount = argNode->gtNumRegs; |
| 478 | |
| 479 | regNumber argReg = argNode->gtRegNum; |
| 480 | regMaskTP argMask = RBM_NONE; |
| 481 | for (unsigned i = 0; i < argNode->gtNumRegs; i++) |
| 482 | { |
| 483 | regNumber thisArgReg = (regNumber)((unsigned)argReg + i); |
| 484 | argMask |= genRegMask(thisArgReg); |
| 485 | argNode->SetRegNumByIdx(thisArgReg, i); |
| 486 | } |
| 487 | |
| 488 | if (putArgChild->OperGet() == GT_FIELD_LIST) |
| 489 | { |
| 490 | // Generated code: |
| 491 | // 1. Consume all of the items in the GT_FIELD_LIST (source) |
| 492 | // 2. Store to target slot and move to target registers (destination) from source |
| 493 | // |
| 494 | unsigned sourceRegCount = 0; |
| 495 | |
| 496 | // To avoid redundant moves, have the argument operand computed in the |
| 497 | // register in which the argument is passed to the call. |
| 498 | |
| 499 | for (GenTreeFieldList* fieldListPtr = putArgChild->AsFieldList(); fieldListPtr != nullptr; |
| 500 | fieldListPtr = fieldListPtr->Rest()) |
| 501 | { |
| 502 | GenTree* node = fieldListPtr->gtGetOp1(); |
| 503 | assert(!node->isContained()); |
| 504 | // The only multi-reg nodes we should see are OperIsMultiRegOp() |
| 505 | unsigned currentRegCount; |
| 506 | #ifdef _TARGET_ARM_ |
| 507 | if (node->OperIsMultiRegOp()) |
| 508 | { |
| 509 | currentRegCount = node->AsMultiRegOp()->GetRegCount(); |
| 510 | } |
| 511 | else |
| 512 | #endif // _TARGET_ARM |
| 513 | { |
| 514 | assert(!node->IsMultiRegNode()); |
| 515 | currentRegCount = 1; |
| 516 | } |
| 517 | // Consume all the registers, setting the appropriate register mask for the ones that |
| 518 | // go into registers. |
| 519 | for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) |
| 520 | { |
| 521 | regMaskTP sourceMask = RBM_NONE; |
| 522 | if (sourceRegCount < argNode->gtNumRegs) |
| 523 | { |
| 524 | sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); |
| 525 | } |
| 526 | sourceRegCount++; |
| 527 | BuildUse(node, sourceMask, regIndex); |
| 528 | } |
| 529 | } |
| 530 | srcCount += sourceRegCount; |
| 531 | assert(putArgChild->isContained()); |
| 532 | } |
| 533 | else |
| 534 | { |
| 535 | assert(putArgChild->TypeGet() == TYP_STRUCT); |
| 536 | assert(putArgChild->OperGet() == GT_OBJ); |
| 537 | |
| 538 | // We can use a ldr/str sequence so we need an internal register |
| 539 | buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); |
| 540 | |
| 541 | GenTree* objChild = putArgChild->gtGetOp1(); |
| 542 | if (objChild->OperGet() == GT_LCL_VAR_ADDR) |
| 543 | { |
| 544 | // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR |
| 545 | // as one contained operation |
| 546 | // |
| 547 | assert(objChild->isContained()); |
| 548 | } |
| 549 | else |
| 550 | { |
| 551 | srcCount = BuildIndirUses(putArgChild->AsIndir()); |
| 552 | } |
| 553 | assert(putArgChild->isContained()); |
| 554 | } |
| 555 | buildInternalRegisterUses(); |
| 556 | BuildDefs(argNode, dstCount, argMask); |
| 557 | return srcCount; |
| 558 | } |
| 559 | #endif // FEATURE_ARG_SPLIT |
| 560 | |
| 561 | //------------------------------------------------------------------------ |
| 562 | // BuildBlockStore: Set the NodeInfo for a block store. |
| 563 | // |
| 564 | // Arguments: |
| 565 | // blkNode - The block store node of interest |
| 566 | // |
| 567 | // Return Value: |
| 568 | // The number of sources consumed by this node. |
| 569 | // |
| 570 | int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) |
| 571 | { |
| 572 | GenTree* dstAddr = blkNode->Addr(); |
| 573 | unsigned size = blkNode->gtBlkSize; |
| 574 | GenTree* source = blkNode->Data(); |
| 575 | int srcCount = 0; |
| 576 | |
| 577 | GenTree* srcAddrOrFill = nullptr; |
| 578 | bool isInitBlk = blkNode->OperIsInitBlkOp(); |
| 579 | |
| 580 | regMaskTP dstAddrRegMask = RBM_NONE; |
| 581 | regMaskTP sourceRegMask = RBM_NONE; |
| 582 | regMaskTP blkSizeRegMask = RBM_NONE; |
| 583 | regMaskTP internalIntCandidates = RBM_NONE; |
| 584 | |
| 585 | if (isInitBlk) |
| 586 | { |
| 587 | GenTree* initVal = source; |
| 588 | if (initVal->OperIsInitVal()) |
| 589 | { |
| 590 | assert(initVal->isContained()); |
| 591 | initVal = initVal->gtGetOp1(); |
| 592 | } |
| 593 | srcAddrOrFill = initVal; |
| 594 | |
| 595 | if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) |
| 596 | { |
| 597 | // TODO-ARM-CQ: Currently we generate a helper call for every |
| 598 | // initblk we encounter. Later on we should implement loop unrolling |
| 599 | // code sequences to improve CQ. |
| 600 | // For reference see the code in lsraxarch.cpp. |
| 601 | NYI_ARM("initblk loop unrolling is currently not implemented." ); |
| 602 | } |
| 603 | else |
| 604 | { |
| 605 | assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); |
| 606 | assert(!initVal->isContained()); |
| 607 | // The helper follows the regular ABI. |
| 608 | dstAddrRegMask = RBM_ARG_0; |
| 609 | sourceRegMask = RBM_ARG_1; |
| 610 | blkSizeRegMask = RBM_ARG_2; |
| 611 | } |
| 612 | } |
| 613 | else |
| 614 | { |
| 615 | // CopyObj or CopyBlk |
| 616 | // Sources are src and dest and size if not constant. |
| 617 | if (source->gtOper == GT_IND) |
| 618 | { |
| 619 | assert(source->isContained()); |
| 620 | srcAddrOrFill = source->gtGetOp1(); |
| 621 | assert(!srcAddrOrFill->isContained()); |
| 622 | } |
| 623 | if (blkNode->OperGet() == GT_STORE_OBJ) |
| 624 | { |
| 625 | // CopyObj |
| 626 | // We don't need to materialize the struct size but we still need |
| 627 | // a temporary register to perform the sequence of loads and stores. |
| 628 | // We can't use the special Write Barrier registers, so exclude them from the mask |
| 629 | internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); |
| 630 | buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); |
| 631 | |
| 632 | if (size >= 2 * REGSIZE_BYTES) |
| 633 | { |
| 634 | // We will use ldp/stp to reduce code size and improve performance |
| 635 | // so we need to reserve an extra internal register |
| 636 | buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); |
| 637 | } |
| 638 | |
| 639 | // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. |
| 640 | dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; |
| 641 | |
| 642 | // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. |
| 643 | // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, |
| 644 | // which is killed by a StoreObj (and thus needn't be reserved). |
| 645 | if (srcAddrOrFill != nullptr) |
| 646 | { |
| 647 | sourceRegMask = RBM_WRITE_BARRIER_SRC_BYREF; |
| 648 | } |
| 649 | } |
| 650 | else |
| 651 | { |
| 652 | // CopyBlk |
| 653 | if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) |
| 654 | { |
| 655 | // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size |
| 656 | // we should unroll the loop to improve CQ. |
| 657 | // For reference see the code in lsraxarch.cpp. |
| 658 | |
| 659 | buildInternalIntRegisterDefForNode(blkNode); |
| 660 | |
| 661 | #ifdef _TARGET_ARM64_ |
| 662 | if (size >= 2 * REGSIZE_BYTES) |
| 663 | { |
| 664 | // We will use ldp/stp to reduce code size and improve performance |
| 665 | // so we need to reserve an extra internal register |
| 666 | buildInternalIntRegisterDefForNode(blkNode); |
| 667 | } |
| 668 | #endif // _TARGET_ARM64_ |
| 669 | } |
| 670 | else |
| 671 | { |
| 672 | assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); |
| 673 | dstAddrRegMask = RBM_ARG_0; |
| 674 | // The srcAddr goes in arg1. |
| 675 | if (srcAddrOrFill != nullptr) |
| 676 | { |
| 677 | sourceRegMask = RBM_ARG_1; |
| 678 | } |
| 679 | blkSizeRegMask = RBM_ARG_2; |
| 680 | } |
| 681 | } |
| 682 | } |
| 683 | |
| 684 | if ((size != 0) && (blkSizeRegMask != RBM_NONE)) |
| 685 | { |
| 686 | // Reserve a temp register for the block size argument. |
| 687 | buildInternalIntRegisterDefForNode(blkNode, blkSizeRegMask); |
| 688 | } |
| 689 | |
| 690 | if (!dstAddr->isContained() && !blkNode->IsReverseOp()) |
| 691 | { |
| 692 | srcCount++; |
| 693 | BuildUse(dstAddr, dstAddrRegMask); |
| 694 | } |
| 695 | if ((srcAddrOrFill != nullptr) && !srcAddrOrFill->isContained()) |
| 696 | { |
| 697 | srcCount++; |
| 698 | BuildUse(srcAddrOrFill, sourceRegMask); |
| 699 | } |
| 700 | if (!dstAddr->isContained() && blkNode->IsReverseOp()) |
| 701 | { |
| 702 | srcCount++; |
| 703 | BuildUse(dstAddr, dstAddrRegMask); |
| 704 | } |
| 705 | |
| 706 | if (size == 0) |
| 707 | { |
| 708 | assert(blkNode->OperIs(GT_STORE_DYN_BLK)); |
| 709 | // The block size argument is a third argument to GT_STORE_DYN_BLK |
| 710 | srcCount++; |
| 711 | GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; |
| 712 | BuildUse(blockSize, blkSizeRegMask); |
| 713 | } |
| 714 | |
| 715 | buildInternalRegisterUses(); |
| 716 | regMaskTP killMask = getKillSetForBlockStore(blkNode); |
| 717 | BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); |
| 718 | return srcCount; |
| 719 | } |
| 720 | |
| 721 | //------------------------------------------------------------------------ |
| 722 | // BuildCast: Set the NodeInfo for a GT_CAST. |
| 723 | // |
| 724 | // Arguments: |
| 725 | // cast - The GT_CAST node |
| 726 | // |
| 727 | // Return Value: |
| 728 | // The number of sources consumed by this node. |
| 729 | // |
| 730 | int LinearScan::BuildCast(GenTreeCast* cast) |
| 731 | { |
| 732 | GenTree* src = cast->gtGetOp1(); |
| 733 | |
| 734 | const var_types srcType = genActualType(src->TypeGet()); |
| 735 | const var_types castType = cast->gtCastType; |
| 736 | |
| 737 | #ifdef _TARGET_ARM_ |
| 738 | assert(!varTypeIsLong(srcType) || (src->OperIs(GT_LONG) && src->isContained())); |
| 739 | |
| 740 | // Floating point to integer casts requires a temporary register. |
| 741 | if (varTypeIsFloating(srcType) && !varTypeIsFloating(castType)) |
| 742 | { |
| 743 | buildInternalFloatRegisterDefForNode(cast, RBM_ALLFLOAT); |
| 744 | setInternalRegsDelayFree = true; |
| 745 | } |
| 746 | #else |
| 747 | // Overflow checking cast from TYP_LONG to TYP_INT requires a temporary register to |
| 748 | // store the min and max immediate values that cannot be encoded in the CMP instruction. |
| 749 | if (cast->gtOverflow() && varTypeIsLong(srcType) && !cast->IsUnsigned() && (castType == TYP_INT)) |
| 750 | { |
| 751 | buildInternalIntRegisterDefForNode(cast); |
| 752 | } |
| 753 | #endif |
| 754 | |
| 755 | int srcCount = BuildOperandUses(src); |
| 756 | buildInternalRegisterUses(); |
| 757 | BuildDef(cast); |
| 758 | return srcCount; |
| 759 | } |
| 760 | |
| 761 | #endif // _TARGET_ARMARCH_ |
| 762 | |