1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX Register Requirements for ARM64 XX
9XX XX
10XX This encapsulates all the logic for setting register requirements for XX
11XX the ARM64 architecture. XX
12XX XX
13XX XX
14XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16*/
17
18#include "jitpch.h"
19#ifdef _MSC_VER
20#pragma hdrstop
21#endif
22
23#ifdef _TARGET_ARM64_
24
25#include "jit.h"
26#include "sideeffects.h"
27#include "lower.h"
28
29//------------------------------------------------------------------------
30// BuildNode: Build the RefPositions for for a node
31//
32// Arguments:
33// treeNode - the node of interest
34//
35// Return Value:
36// The number of sources consumed by this node.
37//
38// Notes:
39// Preconditions:
40// LSRA Has been initialized.
41//
42// Postconditions:
43// RefPositions have been built for all the register defs and uses required
44// for this node.
45//
46int LinearScan::BuildNode(GenTree* tree)
47{
48 assert(!tree->isContained());
49 Interval* prefSrcInterval = nullptr;
50 int srcCount;
51 int dstCount = 0;
52 regMaskTP dstCandidates = RBM_NONE;
53 regMaskTP killMask = RBM_NONE;
54 bool isLocalDefUse = false;
55
56 // Reset the build-related members of LinearScan.
57 clearBuildState();
58
59 RegisterType registerType = TypeGet(tree);
60
61 // Set the default dstCount. This may be modified below.
62 if (tree->IsValue())
63 {
64 dstCount = 1;
65 if (tree->IsUnusedValue())
66 {
67 isLocalDefUse = true;
68 }
69 }
70 else
71 {
72 dstCount = 0;
73 }
74
75 switch (tree->OperGet())
76 {
77 default:
78 srcCount = BuildSimple(tree);
79 break;
80
81 case GT_LCL_VAR:
82 case GT_LCL_FLD:
83 {
84 // We handle tracked variables differently from non-tracked ones. If it is tracked,
85 // we will simply add a use of the tracked variable at its parent/consumer.
86 // Otherwise, for a use we need to actually add the appropriate references for loading
87 // or storing the variable.
88 //
89 // A tracked variable won't actually get used until the appropriate ancestor tree node
90 // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
91 // to a call or an orphaned dead node.
92 //
93 LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
94 if (isCandidateVar(varDsc))
95 {
96 INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1));
97 return 0;
98 }
99 srcCount = 0;
100#ifdef FEATURE_SIMD
101 // Need an additional register to read upper 4 bytes of Vector3.
102 if (tree->TypeGet() == TYP_SIMD12)
103 {
104 // We need an internal register different from targetReg in which 'tree' produces its result
105 // because both targetReg and internal reg will be in use at the same time.
106 buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
107 setInternalRegsDelayFree = true;
108 buildInternalRegisterUses();
109 }
110#endif
111 BuildDef(tree);
112 }
113 break;
114
115 case GT_STORE_LCL_FLD:
116 case GT_STORE_LCL_VAR:
117 srcCount = 1;
118 assert(dstCount == 0);
119 srcCount = BuildStoreLoc(tree->AsLclVarCommon());
120 break;
121
122 case GT_FIELD_LIST:
123 // These should always be contained. We don't correctly allocate or
124 // generate code for a non-contained GT_FIELD_LIST.
125 noway_assert(!"Non-contained GT_FIELD_LIST");
126 srcCount = 0;
127 break;
128
129 case GT_LIST:
130 case GT_ARGPLACE:
131 case GT_NO_OP:
132 case GT_START_NONGC:
133 case GT_PROF_HOOK:
134 srcCount = 0;
135 assert(dstCount == 0);
136 break;
137
138 case GT_CNS_DBL:
139 {
140 GenTreeDblCon* dblConst = tree->AsDblCon();
141 double constValue = dblConst->gtDblCon.gtDconVal;
142
143 if (emitter::emitIns_valid_imm_for_fmov(constValue))
144 {
145 // Directly encode constant to instructions.
146 }
147 else
148 {
149 // Reserve int to load constant from memory (IF_LARGELDC)
150 buildInternalIntRegisterDefForNode(tree);
151 buildInternalRegisterUses();
152 }
153 }
154 __fallthrough;
155
156 case GT_CNS_INT:
157 {
158 srcCount = 0;
159 assert(dstCount == 1);
160 RefPosition* def = BuildDef(tree);
161 def->getInterval()->isConstant = true;
162 }
163 break;
164
165 case GT_BOX:
166 case GT_COMMA:
167 case GT_QMARK:
168 case GT_COLON:
169 srcCount = 0;
170 assert(dstCount == 0);
171 unreached();
172 break;
173
174 case GT_RETURN:
175 srcCount = BuildReturn(tree);
176 break;
177
178 case GT_RETFILT:
179 assert(dstCount == 0);
180 if (tree->TypeGet() == TYP_VOID)
181 {
182 srcCount = 0;
183 }
184 else
185 {
186 assert(tree->TypeGet() == TYP_INT);
187 srcCount = 1;
188 BuildUse(tree->gtGetOp1(), RBM_INTRET);
189 }
190 break;
191
192 case GT_NOP:
193 // A GT_NOP is either a passthrough (if it is void, or if it has
194 // a child), but must be considered to produce a dummy value if it
195 // has a type but no child.
196 srcCount = 0;
197 if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
198 {
199 assert(dstCount == 1);
200 BuildDef(tree);
201 }
202 else
203 {
204 assert(dstCount == 0);
205 }
206 break;
207
208 case GT_JTRUE:
209 srcCount = 0;
210 assert(dstCount == 0);
211 break;
212
213 case GT_JMP:
214 srcCount = 0;
215 assert(dstCount == 0);
216 break;
217
218 case GT_SWITCH:
219 // This should never occur since switch nodes must not be visible at this
220 // point in the JIT.
221 srcCount = 0;
222 noway_assert(!"Switch must be lowered at this point");
223 break;
224
225 case GT_JMPTABLE:
226 srcCount = 0;
227 assert(dstCount == 1);
228 BuildDef(tree);
229 break;
230
231 case GT_SWITCH_TABLE:
232 buildInternalIntRegisterDefForNode(tree);
233 srcCount = BuildBinaryUses(tree->AsOp());
234 assert(dstCount == 0);
235 break;
236
237 case GT_ASG:
238 noway_assert(!"We should never hit any assignment operator in lowering");
239 srcCount = 0;
240 break;
241
242 case GT_ADD:
243 case GT_SUB:
244 if (varTypeIsFloating(tree->TypeGet()))
245 {
246 // overflow operations aren't supported on float/double types.
247 assert(!tree->gtOverflow());
248
249 // No implicit conversions at this stage as the expectation is that
250 // everything is made explicit by adding casts.
251 assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
252 }
253
254 __fallthrough;
255
256 case GT_AND:
257 case GT_OR:
258 case GT_XOR:
259 case GT_LSH:
260 case GT_RSH:
261 case GT_RSZ:
262 case GT_ROR:
263 srcCount = BuildBinaryUses(tree->AsOp());
264 assert(dstCount == 1);
265 BuildDef(tree);
266 break;
267
268 case GT_RETURNTRAP:
269 // this just turns into a compare of its child with an int
270 // + a conditional call
271 BuildUse(tree->gtGetOp1());
272 srcCount = 1;
273 assert(dstCount == 0);
274 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
275 BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
276 break;
277
278 case GT_MOD:
279 case GT_UMOD:
280 NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
281 assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
282 srcCount = 0;
283 break;
284
285 case GT_MUL:
286 if (tree->gtOverflow())
287 {
288 // Need a register different from target reg to check for overflow.
289 buildInternalIntRegisterDefForNode(tree);
290 setInternalRegsDelayFree = true;
291 }
292 __fallthrough;
293
294 case GT_DIV:
295 case GT_MULHI:
296 case GT_UDIV:
297 {
298 srcCount = BuildBinaryUses(tree->AsOp());
299 buildInternalRegisterUses();
300 assert(dstCount == 1);
301 BuildDef(tree);
302 }
303 break;
304
305 case GT_INTRINSIC:
306 {
307 noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
308 (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) ||
309 (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) ||
310 (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
311 (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
312
313 // Both operand and its result must be of the same floating point type.
314 GenTree* op1 = tree->gtGetOp1();
315 assert(varTypeIsFloating(op1));
316 assert(op1->TypeGet() == tree->TypeGet());
317
318 BuildUse(op1);
319 srcCount = 1;
320 assert(dstCount == 1);
321 BuildDef(tree);
322 }
323 break;
324
325#ifdef FEATURE_SIMD
326 case GT_SIMD:
327 srcCount = BuildSIMD(tree->AsSIMD());
328 break;
329#endif // FEATURE_SIMD
330
331#ifdef FEATURE_HW_INTRINSICS
332 case GT_HWIntrinsic:
333 srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
334 break;
335#endif // FEATURE_HW_INTRINSICS
336
337 case GT_CAST:
338 assert(dstCount == 1);
339 srcCount = BuildCast(tree->AsCast());
340 break;
341
342 case GT_NEG:
343 case GT_NOT:
344 BuildUse(tree->gtGetOp1());
345 srcCount = 1;
346 assert(dstCount == 1);
347 BuildDef(tree);
348 break;
349
350 case GT_EQ:
351 case GT_NE:
352 case GT_LT:
353 case GT_LE:
354 case GT_GE:
355 case GT_GT:
356 case GT_TEST_EQ:
357 case GT_TEST_NE:
358 case GT_JCMP:
359 srcCount = BuildCmp(tree);
360 break;
361
362 case GT_CKFINITE:
363 srcCount = 1;
364 assert(dstCount == 1);
365 buildInternalIntRegisterDefForNode(tree);
366 BuildUse(tree->gtGetOp1());
367 BuildDef(tree);
368 buildInternalRegisterUses();
369 break;
370
371 case GT_CMPXCHG:
372 {
373 GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
374 srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
375 assert(dstCount == 1);
376
377 if (!compiler->compSupports(InstructionSet_Atomics))
378 {
379 // For ARMv8 exclusives requires a single internal register
380 buildInternalIntRegisterDefForNode(tree);
381 }
382
383 // For ARMv8 exclusives the lifetime of the addr and data must be extended because
384 // it may be used used multiple during retries
385
386 // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent
387 // them being reused as the target register which must be destroyed early
388
389 RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation);
390 setDelayFree(locationUse);
391 RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue);
392 setDelayFree(valueUse);
393 if (!cmpXchgNode->gtOpComparand->isContained())
394 {
395 RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand);
396
397 // For ARMv8 exclusives the lifetime of the comparand must be extended because
398 // it may be used used multiple during retries
399 if (!compiler->compSupports(InstructionSet_Atomics))
400 {
401 setDelayFree(comparandUse);
402 }
403 }
404
405 // Internals may not collide with target
406 setInternalRegsDelayFree = true;
407 buildInternalRegisterUses();
408 BuildDef(tree);
409 }
410 break;
411
412 case GT_LOCKADD:
413 case GT_XADD:
414 case GT_XCHG:
415 {
416 assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
417 srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;
418
419 if (!compiler->compSupports(InstructionSet_Atomics))
420 {
421 // GT_XCHG requires a single internal register; the others require two.
422 buildInternalIntRegisterDefForNode(tree);
423 if (tree->OperGet() != GT_XCHG)
424 {
425 buildInternalIntRegisterDefForNode(tree);
426 }
427 }
428
429 assert(!tree->gtGetOp1()->isContained());
430 RefPosition* op1Use = BuildUse(tree->gtGetOp1());
431 RefPosition* op2Use = nullptr;
432 if (!tree->gtGetOp2()->isContained())
433 {
434 op2Use = BuildUse(tree->gtGetOp2());
435 }
436
437 // For ARMv8 exclusives the lifetime of the addr and data must be extended because
438 // it may be used used multiple during retries
439 if (!compiler->compSupports(InstructionSet_Atomics))
440 {
441 // Internals may not collide with target
442 if (dstCount == 1)
443 {
444 setDelayFree(op1Use);
445 if (op2Use != nullptr)
446 {
447 setDelayFree(op2Use);
448 }
449 setInternalRegsDelayFree = true;
450 }
451 buildInternalRegisterUses();
452 }
453 if (dstCount == 1)
454 {
455 BuildDef(tree);
456 }
457 }
458 break;
459
460#if FEATURE_ARG_SPLIT
461 case GT_PUTARG_SPLIT:
462 srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
463 dstCount = tree->AsPutArgSplit()->gtNumRegs;
464 break;
465#endif // FEATURE _SPLIT_ARG
466
467 case GT_PUTARG_STK:
468 srcCount = BuildPutArgStk(tree->AsPutArgStk());
469 break;
470
471 case GT_PUTARG_REG:
472 srcCount = BuildPutArgReg(tree->AsUnOp());
473 break;
474
475 case GT_CALL:
476 srcCount = BuildCall(tree->AsCall());
477 if (tree->AsCall()->HasMultiRegRetVal())
478 {
479 dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
480 }
481 break;
482
483 case GT_ADDR:
484 {
485 // For a GT_ADDR, the child node should not be evaluated into a register
486 GenTree* child = tree->gtGetOp1();
487 assert(!isCandidateLocalRef(child));
488 assert(child->isContained());
489 assert(dstCount == 1);
490 srcCount = 0;
491 BuildDef(tree);
492 }
493 break;
494
495 case GT_BLK:
496 case GT_DYN_BLK:
497 // These should all be eliminated prior to Lowering.
498 assert(!"Non-store block node in Lowering");
499 srcCount = 0;
500 break;
501
502 case GT_STORE_BLK:
503 case GT_STORE_OBJ:
504 case GT_STORE_DYN_BLK:
505 srcCount = BuildBlockStore(tree->AsBlk());
506 break;
507
508 case GT_INIT_VAL:
509 // Always a passthrough of its child's value.
510 assert(!"INIT_VAL should always be contained");
511 srcCount = 0;
512 break;
513
514 case GT_LCLHEAP:
515 {
516 assert(dstCount == 1);
517
518 // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
519 // Here '-' means don't care.
520 //
521 // Size? Init Memory? # temp regs
522 // 0 - 0
523 // const and <=6 ptr words - 0
524 // const and <PageSize No 0
525 // >6 ptr words Yes 0
526 // Non-const Yes 0
527 // Non-const No 2
528 //
529
530 GenTree* size = tree->gtGetOp1();
531 if (size->IsCnsIntOrI())
532 {
533 assert(size->isContained());
534 srcCount = 0;
535
536 size_t sizeVal = size->gtIntCon.gtIconVal;
537
538 if (sizeVal != 0)
539 {
540 // Compute the amount of memory to properly STACK_ALIGN.
541 // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
542 // This should also help in debugging as we can examine the original size specified with
543 // localloc.
544 sizeVal = AlignUp(sizeVal, STACK_ALIGN);
545 size_t stpCount = sizeVal / (REGSIZE_BYTES * 2);
546
547 // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc)
548 //
549 if (stpCount <= 4)
550 {
551 // Need no internal registers
552 }
553 else if (!compiler->info.compInitMem)
554 {
555 // No need to initialize allocated stack space.
556 if (sizeVal < compiler->eeGetPageSize())
557 {
558 // Need no internal registers
559 }
560 else
561 {
562 // We need two registers: regCnt and RegTmp
563 buildInternalIntRegisterDefForNode(tree);
564 buildInternalIntRegisterDefForNode(tree);
565 }
566 }
567 }
568 }
569 else
570 {
571 srcCount = 1;
572 if (!compiler->info.compInitMem)
573 {
574 buildInternalIntRegisterDefForNode(tree);
575 buildInternalIntRegisterDefForNode(tree);
576 }
577 }
578
579 if (!size->isContained())
580 {
581 BuildUse(size);
582 }
583 buildInternalRegisterUses();
584 BuildDef(tree);
585 }
586 break;
587
588 case GT_ARR_BOUNDS_CHECK:
589#ifdef FEATURE_SIMD
590 case GT_SIMD_CHK:
591#endif // FEATURE_SIMD
592 {
593 GenTreeBoundsChk* node = tree->AsBoundsChk();
594 // Consumes arrLen & index - has no result
595 assert(dstCount == 0);
596
597 GenTree* intCns = nullptr;
598 GenTree* other = nullptr;
599 srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex);
600 srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen);
601 }
602 break;
603
604 case GT_ARR_ELEM:
605 // These must have been lowered to GT_ARR_INDEX
606 noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
607 srcCount = 0;
608 assert(dstCount == 0);
609 break;
610
611 case GT_ARR_INDEX:
612 {
613 srcCount = 2;
614 assert(dstCount == 1);
615 buildInternalIntRegisterDefForNode(tree);
616 setInternalRegsDelayFree = true;
617
618 // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
619 // times while the result is being computed.
620 RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
621 setDelayFree(arrObjUse);
622 BuildUse(tree->AsArrIndex()->IndexExpr());
623 buildInternalRegisterUses();
624 BuildDef(tree);
625 }
626 break;
627
628 case GT_ARR_OFFSET:
629 // This consumes the offset, if any, the arrObj and the effective index,
630 // and produces the flattened offset for this dimension.
631 srcCount = 2;
632 if (!tree->gtArrOffs.gtOffset->isContained())
633 {
634 BuildUse(tree->AsArrOffs()->gtOffset);
635 srcCount++;
636 }
637 BuildUse(tree->AsArrOffs()->gtIndex);
638 BuildUse(tree->AsArrOffs()->gtArrObj);
639 assert(dstCount == 1);
640 buildInternalIntRegisterDefForNode(tree);
641 buildInternalRegisterUses();
642 BuildDef(tree);
643 break;
644
645 case GT_LEA:
646 {
647 GenTreeAddrMode* lea = tree->AsAddrMode();
648
649 GenTree* base = lea->Base();
650 GenTree* index = lea->Index();
651 int cns = lea->Offset();
652
653 // This LEA is instantiating an address, so we set up the srcCount here.
654 srcCount = 0;
655 if (base != nullptr)
656 {
657 srcCount++;
658 BuildUse(base);
659 }
660 if (index != nullptr)
661 {
662 srcCount++;
663 BuildUse(index);
664 }
665 assert(dstCount == 1);
666
667 // On ARM64 we may need a single internal register
668 // (when both conditions are true then we still only need a single internal register)
669 if ((index != nullptr) && (cns != 0))
670 {
671 // ARM64 does not support both Index and offset so we need an internal register
672 buildInternalIntRegisterDefForNode(tree);
673 }
674 else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
675 {
676 // This offset can't be contained in the add instruction, so we need an internal register
677 buildInternalIntRegisterDefForNode(tree);
678 }
679 buildInternalRegisterUses();
680 BuildDef(tree);
681 }
682 break;
683
684 case GT_STOREIND:
685 {
686 assert(dstCount == 0);
687
688 if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree))
689 {
690 srcCount = BuildGCWriteBarrier(tree);
691 break;
692 }
693
694 srcCount = BuildIndir(tree->AsIndir());
695 if (!tree->gtGetOp2()->isContained())
696 {
697 BuildUse(tree->gtGetOp2());
698 srcCount++;
699 }
700 }
701 break;
702
703 case GT_NULLCHECK:
704 // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register
705 // is required, and it is not a localDefUse.
706 assert(dstCount == 0);
707 assert(!tree->gtGetOp1()->isContained());
708 BuildUse(tree->gtGetOp1());
709 srcCount = 1;
710 break;
711
712 case GT_IND:
713 assert(dstCount == 1);
714 srcCount = BuildIndir(tree->AsIndir());
715 break;
716
717 case GT_CATCH_ARG:
718 srcCount = 0;
719 assert(dstCount == 1);
720 BuildDef(tree, RBM_EXCEPTION_OBJECT);
721 break;
722
723 case GT_CLS_VAR:
724 srcCount = 0;
725 // GT_CLS_VAR, by the time we reach the backend, must always
726 // be a pure use.
727 // It will produce a result of the type of the
728 // node, and use an internal register for the address.
729
730 assert(dstCount == 1);
731 assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
732 buildInternalIntRegisterDefForNode(tree);
733 buildInternalRegisterUses();
734 BuildDef(tree);
735 break;
736
737 case GT_INDEX_ADDR:
738 assert(dstCount == 1);
739 srcCount = BuildBinaryUses(tree->AsOp());
740 buildInternalIntRegisterDefForNode(tree);
741 buildInternalRegisterUses();
742 BuildDef(tree);
743 break;
744
745 } // end switch (tree->OperGet())
746
747 if (tree->IsUnusedValue() && (dstCount != 0))
748 {
749 isLocalDefUse = true;
750 }
751 // We need to be sure that we've set srcCount and dstCount appropriately
752 assert((dstCount < 2) || tree->IsMultiRegCall());
753 assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
754 assert(!tree->IsUnusedValue() || (dstCount != 0));
755 assert(dstCount == tree->GetRegisterDstCount());
756 INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount));
757 return srcCount;
758}
759
760#ifdef FEATURE_SIMD
761//------------------------------------------------------------------------
762// BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
763//
764// Arguments:
765// tree - The GT_SIMD node of interest
766//
767// Return Value:
768// The number of sources consumed by this node.
769//
770int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
771{
772 int srcCount = 0;
773 // Only SIMDIntrinsicInit can be contained
774 if (simdTree->isContained())
775 {
776 assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
777 }
778 int dstCount = simdTree->IsValue() ? 1 : 0;
779 assert(dstCount == 1);
780
781 bool buildUses = true;
782
783 GenTree* op1 = simdTree->gtGetOp1();
784 GenTree* op2 = simdTree->gtGetOp2();
785
786 switch (simdTree->gtSIMDIntrinsicID)
787 {
788 case SIMDIntrinsicInit:
789 case SIMDIntrinsicCast:
790 case SIMDIntrinsicSqrt:
791 case SIMDIntrinsicAbs:
792 case SIMDIntrinsicConvertToSingle:
793 case SIMDIntrinsicConvertToInt32:
794 case SIMDIntrinsicConvertToDouble:
795 case SIMDIntrinsicConvertToInt64:
796 case SIMDIntrinsicWidenLo:
797 case SIMDIntrinsicWidenHi:
798 // No special handling required.
799 break;
800
801 case SIMDIntrinsicGetItem:
802 {
803 op1 = simdTree->gtGetOp1();
804 op2 = simdTree->gtGetOp2();
805
806 // We have an object and an index, either of which may be contained.
807 bool setOp2DelayFree = false;
808 if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
809 {
810 // If the index is not a constant and the object is not contained or is a local
811 // we will need a general purpose register to calculate the address
812 // internal register must not clobber input index
813 // TODO-Cleanup: An internal register will never clobber a source; this code actually
814 // ensures that the index (op2) doesn't interfere with the target.
815 buildInternalIntRegisterDefForNode(simdTree);
816 setOp2DelayFree = true;
817 }
818 srcCount += BuildOperandUses(op1);
819 if (!op2->isContained())
820 {
821 RefPosition* op2Use = BuildUse(op2);
822 if (setOp2DelayFree)
823 {
824 setDelayFree(op2Use);
825 }
826 srcCount++;
827 }
828
829 if (!op2->IsCnsIntOrI() && (!op1->isContained()))
830 {
831 // If vector is not already in memory (contained) and the index is not a constant,
832 // we will use the SIMD temp location to store the vector.
833 compiler->getSIMDInitTempVarNum();
834 }
835 buildUses = false;
836 }
837 break;
838
839 case SIMDIntrinsicAdd:
840 case SIMDIntrinsicSub:
841 case SIMDIntrinsicMul:
842 case SIMDIntrinsicDiv:
843 case SIMDIntrinsicBitwiseAnd:
844 case SIMDIntrinsicBitwiseAndNot:
845 case SIMDIntrinsicBitwiseOr:
846 case SIMDIntrinsicBitwiseXor:
847 case SIMDIntrinsicMin:
848 case SIMDIntrinsicMax:
849 case SIMDIntrinsicEqual:
850 case SIMDIntrinsicLessThan:
851 case SIMDIntrinsicGreaterThan:
852 case SIMDIntrinsicLessThanOrEqual:
853 case SIMDIntrinsicGreaterThanOrEqual:
854 // No special handling required.
855 break;
856
857 case SIMDIntrinsicSetX:
858 case SIMDIntrinsicSetY:
859 case SIMDIntrinsicSetZ:
860 case SIMDIntrinsicSetW:
861 case SIMDIntrinsicNarrow:
862 {
863 // Op1 will write to dst before Op2 is free
864 BuildUse(op1);
865 RefPosition* op2Use = BuildUse(op2);
866 setDelayFree(op2Use);
867 srcCount = 2;
868 buildUses = false;
869 break;
870 }
871
872 case SIMDIntrinsicInitN:
873 {
874 var_types baseType = simdTree->gtSIMDBaseType;
875 srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
876 if (varTypeIsFloating(simdTree->gtSIMDBaseType))
877 {
878 // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
879 buildInternalFloatRegisterDefForNode(simdTree);
880 }
881
882 int initCount = 0;
883 for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
884 {
885 assert(list->OperGet() == GT_LIST);
886 GenTree* listItem = list->gtGetOp1();
887 assert(listItem->TypeGet() == baseType);
888 assert(!listItem->isContained());
889 BuildUse(listItem);
890 initCount++;
891 }
892 assert(initCount == srcCount);
893 buildUses = false;
894
895 break;
896 }
897
898 case SIMDIntrinsicInitArray:
899 // We have an array and an index, which may be contained.
900 break;
901
902 case SIMDIntrinsicOpEquality:
903 case SIMDIntrinsicOpInEquality:
904 buildInternalFloatRegisterDefForNode(simdTree);
905 break;
906
907 case SIMDIntrinsicDotProduct:
908 buildInternalFloatRegisterDefForNode(simdTree);
909 break;
910
911 case SIMDIntrinsicSelect:
912 // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB
913 // bsl target register must be VC. Reserve a temp in case we need to shuffle things.
914 // This will require a different approach, as GenTreeSIMD has only two operands.
915 assert(!"SIMDIntrinsicSelect not yet supported");
916 buildInternalFloatRegisterDefForNode(simdTree);
917 break;
918
919 case SIMDIntrinsicInitArrayX:
920 case SIMDIntrinsicInitFixed:
921 case SIMDIntrinsicCopyToArray:
922 case SIMDIntrinsicCopyToArrayX:
923 case SIMDIntrinsicNone:
924 case SIMDIntrinsicGetCount:
925 case SIMDIntrinsicGetOne:
926 case SIMDIntrinsicGetZero:
927 case SIMDIntrinsicGetAllOnes:
928 case SIMDIntrinsicGetX:
929 case SIMDIntrinsicGetY:
930 case SIMDIntrinsicGetZ:
931 case SIMDIntrinsicGetW:
932 case SIMDIntrinsicInstEquals:
933 case SIMDIntrinsicHWAccel:
934 case SIMDIntrinsicWiden:
935 case SIMDIntrinsicInvalid:
936 assert(!"These intrinsics should not be seen during register allocation");
937 __fallthrough;
938
939 default:
940 noway_assert(!"Unimplemented SIMD node type.");
941 unreached();
942 }
943 if (buildUses)
944 {
945 assert(!op1->OperIs(GT_LIST));
946 assert(srcCount == 0);
947 srcCount = BuildOperandUses(op1);
948 if ((op2 != nullptr) && !op2->isContained())
949 {
950 srcCount += BuildOperandUses(op2);
951 }
952 }
953 assert(internalCount <= MaxInternalCount);
954 buildInternalRegisterUses();
955 if (dstCount == 1)
956 {
957 BuildDef(simdTree);
958 }
959 else
960 {
961 assert(dstCount == 0);
962 }
963 return srcCount;
964}
965#endif // FEATURE_SIMD
966
967#ifdef FEATURE_HW_INTRINSICS
968#include "hwintrinsic.h"
969//------------------------------------------------------------------------
970// BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
971//
972// Arguments:
973// tree - The GT_HWIntrinsic node of interest
974//
975// Return Value:
976// The number of sources consumed by this node.
977//
978int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
979{
980 NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
981 int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree);
982
983 GenTree* op1 = intrinsicTree->gtGetOp1();
984 GenTree* op2 = intrinsicTree->gtGetOp2();
985 GenTree* op3 = nullptr;
986 int srcCount = 0;
987
988 if ((op1 != nullptr) && op1->OperIsList())
989 {
990 // op2 must be null, and there must be at least two more arguments.
991 assert(op2 == nullptr);
992 noway_assert(op1->AsArgList()->Rest() != nullptr);
993 noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr);
994 assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
995 op2 = op1->AsArgList()->Rest()->Current();
996 op3 = op1->AsArgList()->Rest()->Rest()->Current();
997 op1 = op1->AsArgList()->Current();
998 }
999
1000 int dstCount = intrinsicTree->IsValue() ? 1 : 0;
1001 bool op2IsDelayFree = false;
1002 bool op3IsDelayFree = false;
1003
1004 // Create internal temps, and handle any other special requirements.
1005 switch (HWIntrinsicInfo::lookup(intrinsicID).form)
1006 {
1007 case HWIntrinsicInfo::Sha1HashOp:
1008 assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
1009 if (!op2->isContained())
1010 {
1011 assert(!op3->isContained());
1012 op2IsDelayFree = true;
1013 op3IsDelayFree = true;
1014 setInternalRegsDelayFree = true;
1015 }
1016 buildInternalFloatRegisterDefForNode(intrinsicTree);
1017 break;
1018 case HWIntrinsicInfo::SimdTernaryRMWOp:
1019 assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
1020 if (!op2->isContained())
1021 {
1022 assert(!op3->isContained());
1023 op2IsDelayFree = true;
1024 op3IsDelayFree = true;
1025 }
1026 break;
1027 case HWIntrinsicInfo::Sha1RotateOp:
1028 buildInternalFloatRegisterDefForNode(intrinsicTree);
1029 break;
1030
1031 case HWIntrinsicInfo::SimdExtractOp:
1032 case HWIntrinsicInfo::SimdInsertOp:
1033 if (!op2->isContained())
1034 {
1035 // We need a temp to create a switch table
1036 buildInternalIntRegisterDefForNode(intrinsicTree);
1037 }
1038 break;
1039
1040 default:
1041 break;
1042 }
1043
1044 // Next, build uses
1045 if (numArgs > 3)
1046 {
1047 srcCount = 0;
1048 assert(!op2IsDelayFree && !op3IsDelayFree);
1049 assert(op1->OperIs(GT_LIST));
1050 {
1051 for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
1052 {
1053 srcCount += BuildOperandUses(list->Current());
1054 }
1055 }
1056 assert(srcCount == numArgs);
1057 }
1058 else
1059 {
1060 if (op1 != nullptr)
1061 {
1062 srcCount += BuildOperandUses(op1);
1063 if (op2 != nullptr)
1064 {
1065 srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2);
1066 if (op3 != nullptr)
1067 {
1068 srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
1069 }
1070 }
1071 }
1072 }
1073 buildInternalRegisterUses();
1074
1075 // Now defs
1076 if (intrinsicTree->IsValue())
1077 {
1078 BuildDef(intrinsicTree);
1079 }
1080
1081 return srcCount;
1082}
1083#endif
1084
1085#endif // _TARGET_ARM64_
1086