1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Register Requirements for ARM and ARM64 common code XX |
9 | XX XX |
10 | XX This encapsulates common logic for setting register requirements for XX |
11 | XX the ARM and ARM64 architectures. XX |
12 | XX XX |
13 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
14 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
15 | */ |
16 | |
17 | #include "jitpch.h" |
18 | #ifdef _MSC_VER |
19 | #pragma hdrstop |
20 | #endif |
21 | |
22 | #ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures |
23 | |
24 | #include "jit.h" |
25 | #include "sideeffects.h" |
26 | #include "lower.h" |
27 | #include "lsra.h" |
28 | |
29 | //------------------------------------------------------------------------ |
30 | // BuildIndir: Specify register requirements for address expression |
31 | // of an indirection operation. |
32 | // |
33 | // Arguments: |
34 | // indirTree - GT_IND, GT_STOREIND or block gentree node |
35 | // |
36 | // Return Value: |
37 | // The number of sources consumed by this node. |
38 | // |
39 | int LinearScan::BuildIndir(GenTreeIndir* indirTree) |
40 | { |
41 | int srcCount = 0; |
42 | // If this is the rhs of a block copy (i.e. non-enregisterable struct), |
43 | // it has no register requirements. |
44 | if (indirTree->TypeGet() == TYP_STRUCT) |
45 | { |
46 | return srcCount; |
47 | } |
48 | |
49 | bool isStore = (indirTree->gtOper == GT_STOREIND); |
50 | |
51 | GenTree* addr = indirTree->Addr(); |
52 | GenTree* index = nullptr; |
53 | int cns = 0; |
54 | |
55 | #ifdef _TARGET_ARM_ |
56 | // Unaligned loads/stores for floating point values must first be loaded into integer register(s) |
57 | if (indirTree->gtFlags & GTF_IND_UNALIGNED) |
58 | { |
59 | var_types type = TYP_UNDEF; |
60 | if (indirTree->OperGet() == GT_STOREIND) |
61 | { |
62 | type = indirTree->AsStoreInd()->Data()->TypeGet(); |
63 | } |
64 | else if (indirTree->OperGet() == GT_IND) |
65 | { |
66 | type = indirTree->TypeGet(); |
67 | } |
68 | |
69 | if (type == TYP_FLOAT) |
70 | { |
71 | buildInternalIntRegisterDefForNode(indirTree); |
72 | } |
73 | else if (type == TYP_DOUBLE) |
74 | { |
75 | buildInternalIntRegisterDefForNode(indirTree); |
76 | buildInternalIntRegisterDefForNode(indirTree); |
77 | } |
78 | } |
79 | #endif |
80 | |
81 | if (addr->isContained()) |
82 | { |
83 | assert(addr->OperGet() == GT_LEA); |
84 | GenTreeAddrMode* lea = addr->AsAddrMode(); |
85 | index = lea->Index(); |
86 | cns = lea->Offset(); |
87 | |
88 | // On ARM we may need a single internal register |
89 | // (when both conditions are true then we still only need a single internal register) |
90 | if ((index != nullptr) && (cns != 0)) |
91 | { |
92 | // ARM does not support both Index and offset so we need an internal register |
93 | buildInternalIntRegisterDefForNode(indirTree); |
94 | } |
95 | else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) |
96 | { |
97 | // This offset can't be contained in the ldr/str instruction, so we need an internal register |
98 | buildInternalIntRegisterDefForNode(indirTree); |
99 | } |
100 | } |
101 | |
102 | #ifdef FEATURE_SIMD |
103 | if (indirTree->TypeGet() == TYP_SIMD12) |
104 | { |
105 | // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir(). |
106 | assert(!addr->isContained()); |
107 | |
108 | // Vector3 is read/written as two reads/writes: 8 byte and 4 byte. |
109 | // To assemble the vector properly we would need an additional int register |
110 | buildInternalIntRegisterDefForNode(indirTree); |
111 | } |
112 | #endif // FEATURE_SIMD |
113 | |
114 | srcCount = BuildIndirUses(indirTree); |
115 | buildInternalRegisterUses(); |
116 | |
117 | if (indirTree->gtOper != GT_STOREIND) |
118 | { |
119 | BuildDef(indirTree); |
120 | } |
121 | return srcCount; |
122 | } |
123 | |
124 | //------------------------------------------------------------------------ |
125 | // BuildCall: Set the NodeInfo for a call. |
126 | // |
127 | // Arguments: |
128 | // call - The call node of interest |
129 | // |
130 | // Return Value: |
131 | // The number of sources consumed by this node. |
132 | // |
133 | int LinearScan::BuildCall(GenTreeCall* call) |
134 | { |
135 | bool hasMultiRegRetVal = false; |
136 | ReturnTypeDesc* retTypeDesc = nullptr; |
137 | regMaskTP dstCandidates = RBM_NONE; |
138 | |
139 | int srcCount = 0; |
140 | int dstCount = 0; |
141 | if (call->TypeGet() != TYP_VOID) |
142 | { |
143 | hasMultiRegRetVal = call->HasMultiRegRetVal(); |
144 | if (hasMultiRegRetVal) |
145 | { |
146 | // dst count = number of registers in which the value is returned by call |
147 | retTypeDesc = call->GetReturnTypeDesc(); |
148 | dstCount = retTypeDesc->GetReturnRegCount(); |
149 | } |
150 | else |
151 | { |
152 | dstCount = 1; |
153 | } |
154 | } |
155 | |
156 | GenTree* ctrlExpr = call->gtControlExpr; |
157 | regMaskTP ctrlExprCandidates = RBM_NONE; |
158 | if (call->gtCallType == CT_INDIRECT) |
159 | { |
160 | // either gtControlExpr != null or gtCallAddr != null. |
161 | // Both cannot be non-null at the same time. |
162 | assert(ctrlExpr == nullptr); |
163 | assert(call->gtCallAddr != nullptr); |
164 | ctrlExpr = call->gtCallAddr; |
165 | } |
166 | |
167 | // set reg requirements on call target represented as control sequence. |
168 | if (ctrlExpr != nullptr) |
169 | { |
170 | // we should never see a gtControlExpr whose type is void. |
171 | assert(ctrlExpr->TypeGet() != TYP_VOID); |
172 | |
173 | // In case of fast tail implemented as jmp, make sure that gtControlExpr is |
174 | // computed into a register. |
175 | if (call->IsFastTailCall()) |
176 | { |
177 | // Fast tail call - make sure that call target is always computed in R12(ARM32)/IP0(ARM64) |
178 | // so that epilog sequence can generate "br xip0/r12" to achieve fast tail call. |
179 | ctrlExprCandidates = RBM_FASTTAILCALL_TARGET; |
180 | } |
181 | } |
182 | #ifdef _TARGET_ARM_ |
183 | else |
184 | { |
185 | buildInternalIntRegisterDefForNode(call); |
186 | } |
187 | |
188 | if (call->NeedsNullCheck()) |
189 | { |
190 | buildInternalIntRegisterDefForNode(call); |
191 | } |
192 | |
193 | #endif // _TARGET_ARM_ |
194 | |
195 | RegisterType registerType = call->TypeGet(); |
196 | |
197 | // Set destination candidates for return value of the call. |
198 | |
199 | #ifdef _TARGET_ARM_ |
200 | if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) |
201 | { |
202 | // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with |
203 | // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. |
204 | dstCandidates = RBM_PINVOKE_TCB; |
205 | } |
206 | else |
207 | #endif // _TARGET_ARM_ |
208 | if (hasMultiRegRetVal) |
209 | { |
210 | assert(retTypeDesc != nullptr); |
211 | dstCandidates = retTypeDesc->GetABIReturnRegs(); |
212 | } |
213 | else if (varTypeIsFloating(registerType)) |
214 | { |
215 | dstCandidates = RBM_FLOATRET; |
216 | } |
217 | else if (registerType == TYP_LONG) |
218 | { |
219 | dstCandidates = RBM_LNGRET; |
220 | } |
221 | else |
222 | { |
223 | dstCandidates = RBM_INTRET; |
224 | } |
225 | |
226 | // First, count reg args |
227 | // Each register argument corresponds to one source. |
228 | bool callHasFloatRegArgs = false; |
229 | |
230 | for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext()) |
231 | { |
232 | assert(list->OperIsList()); |
233 | |
234 | GenTree* argNode = list->Current(); |
235 | |
236 | #ifdef DEBUG |
237 | // During Build, we only use the ArgTabEntry for validation, |
238 | // as getting it is rather expensive. |
239 | fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); |
240 | regNumber argReg = curArgTabEntry->regNum; |
241 | assert(curArgTabEntry); |
242 | #endif |
243 | |
244 | if (argNode->gtOper == GT_PUTARG_STK) |
245 | { |
246 | // late arg that is not passed in a register |
247 | assert(curArgTabEntry->regNum == REG_STK); |
248 | // These should never be contained. |
249 | assert(!argNode->isContained()); |
250 | continue; |
251 | } |
252 | |
253 | // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct |
254 | if (argNode->OperGet() == GT_FIELD_LIST) |
255 | { |
256 | assert(argNode->isContained()); |
257 | |
258 | // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) |
259 | for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) |
260 | { |
261 | #ifdef DEBUG |
262 | assert(entry->Current()->OperIs(GT_PUTARG_REG)); |
263 | assert(entry->Current()->gtRegNum == argReg); |
264 | // Update argReg for the next putarg_reg (if any) |
265 | argReg = genRegArgNext(argReg); |
266 | |
267 | #if defined(_TARGET_ARM_) |
268 | // A double register is modelled as an even-numbered single one |
269 | if (entry->Current()->TypeGet() == TYP_DOUBLE) |
270 | { |
271 | argReg = genRegArgNext(argReg); |
272 | } |
273 | #endif // _TARGET_ARM_ |
274 | #endif |
275 | BuildUse(entry->Current(), genRegMask(entry->Current()->gtRegNum)); |
276 | srcCount++; |
277 | } |
278 | } |
279 | #if FEATURE_ARG_SPLIT |
280 | else if (argNode->OperGet() == GT_PUTARG_SPLIT) |
281 | { |
282 | unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; |
283 | assert(regCount == curArgTabEntry->numRegs); |
284 | for (unsigned int i = 0; i < regCount; i++) |
285 | { |
286 | BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i); |
287 | } |
288 | srcCount += regCount; |
289 | } |
290 | #endif // FEATURE_ARG_SPLIT |
291 | else |
292 | { |
293 | assert(argNode->OperIs(GT_PUTARG_REG)); |
294 | assert(argNode->gtRegNum == argReg); |
295 | HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs); |
296 | #ifdef _TARGET_ARM_ |
297 | // The `double` types have been transformed to `long` on armel, |
298 | // while the actual long types have been decomposed. |
299 | // On ARM we may have bitcasts from DOUBLE to LONG. |
300 | if (argNode->TypeGet() == TYP_LONG) |
301 | { |
302 | assert(argNode->IsMultiRegNode()); |
303 | BuildUse(argNode, genRegMask(argNode->gtRegNum), 0); |
304 | BuildUse(argNode, genRegMask(genRegArgNext(argNode->gtRegNum)), 1); |
305 | srcCount += 2; |
306 | } |
307 | else |
308 | #endif // _TARGET_ARM_ |
309 | { |
310 | BuildUse(argNode, genRegMask(argNode->gtRegNum)); |
311 | srcCount++; |
312 | } |
313 | } |
314 | } |
315 | |
316 | // Now, count stack args |
317 | // Note that these need to be computed into a register, but then |
318 | // they're just stored to the stack - so the reg doesn't |
319 | // need to remain live until the call. In fact, it must not |
320 | // because the code generator doesn't actually consider it live, |
321 | // so it can't be spilled. |
322 | |
323 | GenTree* args = call->gtCallArgs; |
324 | while (args) |
325 | { |
326 | GenTree* arg = args->gtGetOp1(); |
327 | |
328 | // Skip arguments that have been moved to the Late Arg list |
329 | if (!(args->gtFlags & GTF_LATE_ARG)) |
330 | { |
331 | #ifdef DEBUG |
332 | fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); |
333 | assert(curArgTabEntry); |
334 | #endif |
335 | #if FEATURE_ARG_SPLIT |
336 | // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they |
337 | // define registers used by the call. |
338 | assert(arg->OperGet() != GT_PUTARG_SPLIT); |
339 | #endif // FEATURE_ARG_SPLIT |
340 | if (arg->gtOper == GT_PUTARG_STK) |
341 | { |
342 | assert(curArgTabEntry->regNum == REG_STK); |
343 | } |
344 | else |
345 | { |
346 | assert(!arg->IsValue() || arg->IsUnusedValue()); |
347 | } |
348 | } |
349 | args = args->gtGetOp2(); |
350 | } |
351 | |
352 | // If it is a fast tail call, it is already preferenced to use IP0. |
353 | // Therefore, no need set src candidates on call tgt again. |
354 | if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) |
355 | { |
356 | NYI_ARM("float reg varargs" ); |
357 | |
358 | // Don't assign the call target to any of the argument registers because |
359 | // we will use them to also pass floating point arguments as required |
360 | // by Arm64 ABI. |
361 | ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); |
362 | } |
363 | |
364 | if (ctrlExpr != nullptr) |
365 | { |
366 | BuildUse(ctrlExpr, ctrlExprCandidates); |
367 | srcCount++; |
368 | } |
369 | |
370 | buildInternalRegisterUses(); |
371 | |
372 | // Now generate defs and kills. |
373 | regMaskTP killMask = getKillSetForCall(call); |
374 | BuildDefsWithKills(call, dstCount, dstCandidates, killMask); |
375 | return srcCount; |
376 | } |
377 | |
378 | //------------------------------------------------------------------------ |
379 | // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node |
380 | // |
381 | // Arguments: |
382 | // argNode - a GT_PUTARG_STK node |
383 | // |
384 | // Return Value: |
385 | // The number of sources consumed by this node. |
386 | // |
387 | // Notes: |
388 | // Set the child node(s) to be contained when we have a multireg arg |
389 | // |
390 | int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) |
391 | { |
392 | assert(argNode->gtOper == GT_PUTARG_STK); |
393 | |
394 | GenTree* putArgChild = argNode->gtGetOp1(); |
395 | |
396 | int srcCount = 0; |
397 | |
398 | // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct |
399 | if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) |
400 | { |
401 | // We will use store instructions that each write a register sized value |
402 | |
403 | if (putArgChild->OperGet() == GT_FIELD_LIST) |
404 | { |
405 | assert(putArgChild->isContained()); |
406 | // We consume all of the items in the GT_FIELD_LIST |
407 | for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest()) |
408 | { |
409 | BuildUse(current->Current()); |
410 | srcCount++; |
411 | } |
412 | } |
413 | else |
414 | { |
415 | // We can use a ldp/stp sequence so we need two internal registers for ARM64; one for ARM. |
416 | buildInternalIntRegisterDefForNode(argNode); |
417 | #ifdef _TARGET_ARM64_ |
418 | buildInternalIntRegisterDefForNode(argNode); |
419 | #endif // _TARGET_ARM64_ |
420 | |
421 | if (putArgChild->OperGet() == GT_OBJ) |
422 | { |
423 | assert(putArgChild->isContained()); |
424 | GenTree* objChild = putArgChild->gtGetOp1(); |
425 | if (objChild->OperGet() == GT_LCL_VAR_ADDR) |
426 | { |
427 | // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR |
428 | // as one contained operation, and there are no source registers. |
429 | // |
430 | assert(objChild->isContained()); |
431 | } |
432 | else |
433 | { |
434 | // We will generate all of the code for the GT_PUTARG_STK and its child node |
435 | // as one contained operation |
436 | // |
437 | srcCount = BuildOperandUses(objChild); |
438 | } |
439 | } |
440 | else |
441 | { |
442 | // No source registers. |
443 | putArgChild->OperIs(GT_LCL_VAR); |
444 | } |
445 | } |
446 | } |
447 | else |
448 | { |
449 | assert(!putArgChild->isContained()); |
450 | srcCount = BuildOperandUses(putArgChild); |
451 | } |
452 | buildInternalRegisterUses(); |
453 | return srcCount; |
454 | } |
455 | |
456 | #if FEATURE_ARG_SPLIT |
457 | //------------------------------------------------------------------------ |
458 | // BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node |
459 | // |
460 | // Arguments: |
461 | // argNode - a GT_PUTARG_SPLIT node |
462 | // |
463 | // Return Value: |
464 | // The number of sources consumed by this node. |
465 | // |
466 | // Notes: |
467 | // Set the child node(s) to be contained |
468 | // |
469 | int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) |
470 | { |
471 | int srcCount = 0; |
472 | assert(argNode->gtOper == GT_PUTARG_SPLIT); |
473 | |
474 | GenTree* putArgChild = argNode->gtGetOp1(); |
475 | |
476 | // Registers for split argument corresponds to source |
477 | int dstCount = argNode->gtNumRegs; |
478 | |
479 | regNumber argReg = argNode->gtRegNum; |
480 | regMaskTP argMask = RBM_NONE; |
481 | for (unsigned i = 0; i < argNode->gtNumRegs; i++) |
482 | { |
483 | regNumber thisArgReg = (regNumber)((unsigned)argReg + i); |
484 | argMask |= genRegMask(thisArgReg); |
485 | argNode->SetRegNumByIdx(thisArgReg, i); |
486 | } |
487 | |
488 | if (putArgChild->OperGet() == GT_FIELD_LIST) |
489 | { |
490 | // Generated code: |
491 | // 1. Consume all of the items in the GT_FIELD_LIST (source) |
492 | // 2. Store to target slot and move to target registers (destination) from source |
493 | // |
494 | unsigned sourceRegCount = 0; |
495 | |
496 | // To avoid redundant moves, have the argument operand computed in the |
497 | // register in which the argument is passed to the call. |
498 | |
499 | for (GenTreeFieldList* fieldListPtr = putArgChild->AsFieldList(); fieldListPtr != nullptr; |
500 | fieldListPtr = fieldListPtr->Rest()) |
501 | { |
502 | GenTree* node = fieldListPtr->gtGetOp1(); |
503 | assert(!node->isContained()); |
504 | // The only multi-reg nodes we should see are OperIsMultiRegOp() |
505 | unsigned currentRegCount; |
506 | #ifdef _TARGET_ARM_ |
507 | if (node->OperIsMultiRegOp()) |
508 | { |
509 | currentRegCount = node->AsMultiRegOp()->GetRegCount(); |
510 | } |
511 | else |
512 | #endif // _TARGET_ARM |
513 | { |
514 | assert(!node->IsMultiRegNode()); |
515 | currentRegCount = 1; |
516 | } |
517 | // Consume all the registers, setting the appropriate register mask for the ones that |
518 | // go into registers. |
519 | for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) |
520 | { |
521 | regMaskTP sourceMask = RBM_NONE; |
522 | if (sourceRegCount < argNode->gtNumRegs) |
523 | { |
524 | sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); |
525 | } |
526 | sourceRegCount++; |
527 | BuildUse(node, sourceMask, regIndex); |
528 | } |
529 | } |
530 | srcCount += sourceRegCount; |
531 | assert(putArgChild->isContained()); |
532 | } |
533 | else |
534 | { |
535 | assert(putArgChild->TypeGet() == TYP_STRUCT); |
536 | assert(putArgChild->OperGet() == GT_OBJ); |
537 | |
538 | // We can use a ldr/str sequence so we need an internal register |
539 | buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); |
540 | |
541 | GenTree* objChild = putArgChild->gtGetOp1(); |
542 | if (objChild->OperGet() == GT_LCL_VAR_ADDR) |
543 | { |
544 | // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR |
545 | // as one contained operation |
546 | // |
547 | assert(objChild->isContained()); |
548 | } |
549 | else |
550 | { |
551 | srcCount = BuildIndirUses(putArgChild->AsIndir()); |
552 | } |
553 | assert(putArgChild->isContained()); |
554 | } |
555 | buildInternalRegisterUses(); |
556 | BuildDefs(argNode, dstCount, argMask); |
557 | return srcCount; |
558 | } |
559 | #endif // FEATURE_ARG_SPLIT |
560 | |
561 | //------------------------------------------------------------------------ |
562 | // BuildBlockStore: Set the NodeInfo for a block store. |
563 | // |
564 | // Arguments: |
565 | // blkNode - The block store node of interest |
566 | // |
567 | // Return Value: |
568 | // The number of sources consumed by this node. |
569 | // |
570 | int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) |
571 | { |
572 | GenTree* dstAddr = blkNode->Addr(); |
573 | unsigned size = blkNode->gtBlkSize; |
574 | GenTree* source = blkNode->Data(); |
575 | int srcCount = 0; |
576 | |
577 | GenTree* srcAddrOrFill = nullptr; |
578 | bool isInitBlk = blkNode->OperIsInitBlkOp(); |
579 | |
580 | regMaskTP dstAddrRegMask = RBM_NONE; |
581 | regMaskTP sourceRegMask = RBM_NONE; |
582 | regMaskTP blkSizeRegMask = RBM_NONE; |
583 | regMaskTP internalIntCandidates = RBM_NONE; |
584 | |
585 | if (isInitBlk) |
586 | { |
587 | GenTree* initVal = source; |
588 | if (initVal->OperIsInitVal()) |
589 | { |
590 | assert(initVal->isContained()); |
591 | initVal = initVal->gtGetOp1(); |
592 | } |
593 | srcAddrOrFill = initVal; |
594 | |
595 | if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) |
596 | { |
597 | // TODO-ARM-CQ: Currently we generate a helper call for every |
598 | // initblk we encounter. Later on we should implement loop unrolling |
599 | // code sequences to improve CQ. |
600 | // For reference see the code in lsraxarch.cpp. |
601 | NYI_ARM("initblk loop unrolling is currently not implemented." ); |
602 | } |
603 | else |
604 | { |
605 | assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); |
606 | assert(!initVal->isContained()); |
607 | // The helper follows the regular ABI. |
608 | dstAddrRegMask = RBM_ARG_0; |
609 | sourceRegMask = RBM_ARG_1; |
610 | blkSizeRegMask = RBM_ARG_2; |
611 | } |
612 | } |
613 | else |
614 | { |
615 | // CopyObj or CopyBlk |
616 | // Sources are src and dest and size if not constant. |
617 | if (source->gtOper == GT_IND) |
618 | { |
619 | assert(source->isContained()); |
620 | srcAddrOrFill = source->gtGetOp1(); |
621 | assert(!srcAddrOrFill->isContained()); |
622 | } |
623 | if (blkNode->OperGet() == GT_STORE_OBJ) |
624 | { |
625 | // CopyObj |
626 | // We don't need to materialize the struct size but we still need |
627 | // a temporary register to perform the sequence of loads and stores. |
628 | // We can't use the special Write Barrier registers, so exclude them from the mask |
629 | internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); |
630 | buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); |
631 | |
632 | if (size >= 2 * REGSIZE_BYTES) |
633 | { |
634 | // We will use ldp/stp to reduce code size and improve performance |
635 | // so we need to reserve an extra internal register |
636 | buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); |
637 | } |
638 | |
639 | // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. |
640 | dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; |
641 | |
642 | // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. |
643 | // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, |
644 | // which is killed by a StoreObj (and thus needn't be reserved). |
645 | if (srcAddrOrFill != nullptr) |
646 | { |
647 | sourceRegMask = RBM_WRITE_BARRIER_SRC_BYREF; |
648 | } |
649 | } |
650 | else |
651 | { |
652 | // CopyBlk |
653 | if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) |
654 | { |
655 | // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size |
656 | // we should unroll the loop to improve CQ. |
657 | // For reference see the code in lsraxarch.cpp. |
658 | |
659 | buildInternalIntRegisterDefForNode(blkNode); |
660 | |
661 | #ifdef _TARGET_ARM64_ |
662 | if (size >= 2 * REGSIZE_BYTES) |
663 | { |
664 | // We will use ldp/stp to reduce code size and improve performance |
665 | // so we need to reserve an extra internal register |
666 | buildInternalIntRegisterDefForNode(blkNode); |
667 | } |
668 | #endif // _TARGET_ARM64_ |
669 | } |
670 | else |
671 | { |
672 | assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); |
673 | dstAddrRegMask = RBM_ARG_0; |
674 | // The srcAddr goes in arg1. |
675 | if (srcAddrOrFill != nullptr) |
676 | { |
677 | sourceRegMask = RBM_ARG_1; |
678 | } |
679 | blkSizeRegMask = RBM_ARG_2; |
680 | } |
681 | } |
682 | } |
683 | |
684 | if ((size != 0) && (blkSizeRegMask != RBM_NONE)) |
685 | { |
686 | // Reserve a temp register for the block size argument. |
687 | buildInternalIntRegisterDefForNode(blkNode, blkSizeRegMask); |
688 | } |
689 | |
690 | if (!dstAddr->isContained() && !blkNode->IsReverseOp()) |
691 | { |
692 | srcCount++; |
693 | BuildUse(dstAddr, dstAddrRegMask); |
694 | } |
695 | if ((srcAddrOrFill != nullptr) && !srcAddrOrFill->isContained()) |
696 | { |
697 | srcCount++; |
698 | BuildUse(srcAddrOrFill, sourceRegMask); |
699 | } |
700 | if (!dstAddr->isContained() && blkNode->IsReverseOp()) |
701 | { |
702 | srcCount++; |
703 | BuildUse(dstAddr, dstAddrRegMask); |
704 | } |
705 | |
706 | if (size == 0) |
707 | { |
708 | assert(blkNode->OperIs(GT_STORE_DYN_BLK)); |
709 | // The block size argument is a third argument to GT_STORE_DYN_BLK |
710 | srcCount++; |
711 | GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; |
712 | BuildUse(blockSize, blkSizeRegMask); |
713 | } |
714 | |
715 | buildInternalRegisterUses(); |
716 | regMaskTP killMask = getKillSetForBlockStore(blkNode); |
717 | BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); |
718 | return srcCount; |
719 | } |
720 | |
721 | //------------------------------------------------------------------------ |
722 | // BuildCast: Set the NodeInfo for a GT_CAST. |
723 | // |
724 | // Arguments: |
725 | // cast - The GT_CAST node |
726 | // |
727 | // Return Value: |
728 | // The number of sources consumed by this node. |
729 | // |
730 | int LinearScan::BuildCast(GenTreeCast* cast) |
731 | { |
732 | GenTree* src = cast->gtGetOp1(); |
733 | |
734 | const var_types srcType = genActualType(src->TypeGet()); |
735 | const var_types castType = cast->gtCastType; |
736 | |
737 | #ifdef _TARGET_ARM_ |
738 | assert(!varTypeIsLong(srcType) || (src->OperIs(GT_LONG) && src->isContained())); |
739 | |
740 | // Floating point to integer casts requires a temporary register. |
741 | if (varTypeIsFloating(srcType) && !varTypeIsFloating(castType)) |
742 | { |
743 | buildInternalFloatRegisterDefForNode(cast, RBM_ALLFLOAT); |
744 | setInternalRegsDelayFree = true; |
745 | } |
746 | #else |
747 | // Overflow checking cast from TYP_LONG to TYP_INT requires a temporary register to |
748 | // store the min and max immediate values that cannot be encoded in the CMP instruction. |
749 | if (cast->gtOverflow() && varTypeIsLong(srcType) && !cast->IsUnsigned() && (castType == TYP_INT)) |
750 | { |
751 | buildInternalIntRegisterDefForNode(cast); |
752 | } |
753 | #endif |
754 | |
755 | int srcCount = BuildOperandUses(src); |
756 | buildInternalRegisterUses(); |
757 | BuildDef(cast); |
758 | return srcCount; |
759 | } |
760 | |
761 | #endif // _TARGET_ARMARCH_ |
762 | |