1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Register Requirements for ARM64 XX |
9 | XX XX |
10 | XX This encapsulates all the logic for setting register requirements for XX |
11 | XX the ARM64 architecture. XX |
12 | XX XX |
13 | XX XX |
14 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
15 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
16 | */ |
17 | |
18 | #include "jitpch.h" |
19 | #ifdef _MSC_VER |
20 | #pragma hdrstop |
21 | #endif |
22 | |
23 | #ifdef _TARGET_ARM64_ |
24 | |
25 | #include "jit.h" |
26 | #include "sideeffects.h" |
27 | #include "lower.h" |
28 | |
29 | //------------------------------------------------------------------------ |
30 | // BuildNode: Build the RefPositions for for a node |
31 | // |
32 | // Arguments: |
33 | // treeNode - the node of interest |
34 | // |
35 | // Return Value: |
36 | // The number of sources consumed by this node. |
37 | // |
38 | // Notes: |
39 | // Preconditions: |
40 | // LSRA Has been initialized. |
41 | // |
42 | // Postconditions: |
43 | // RefPositions have been built for all the register defs and uses required |
44 | // for this node. |
45 | // |
46 | int LinearScan::BuildNode(GenTree* tree) |
47 | { |
48 | assert(!tree->isContained()); |
49 | Interval* prefSrcInterval = nullptr; |
50 | int srcCount; |
51 | int dstCount = 0; |
52 | regMaskTP dstCandidates = RBM_NONE; |
53 | regMaskTP killMask = RBM_NONE; |
54 | bool isLocalDefUse = false; |
55 | |
56 | // Reset the build-related members of LinearScan. |
57 | clearBuildState(); |
58 | |
59 | RegisterType registerType = TypeGet(tree); |
60 | |
61 | // Set the default dstCount. This may be modified below. |
62 | if (tree->IsValue()) |
63 | { |
64 | dstCount = 1; |
65 | if (tree->IsUnusedValue()) |
66 | { |
67 | isLocalDefUse = true; |
68 | } |
69 | } |
70 | else |
71 | { |
72 | dstCount = 0; |
73 | } |
74 | |
75 | switch (tree->OperGet()) |
76 | { |
77 | default: |
78 | srcCount = BuildSimple(tree); |
79 | break; |
80 | |
81 | case GT_LCL_VAR: |
82 | case GT_LCL_FLD: |
83 | { |
84 | // We handle tracked variables differently from non-tracked ones. If it is tracked, |
85 | // we will simply add a use of the tracked variable at its parent/consumer. |
86 | // Otherwise, for a use we need to actually add the appropriate references for loading |
87 | // or storing the variable. |
88 | // |
89 | // A tracked variable won't actually get used until the appropriate ancestor tree node |
90 | // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument |
91 | // to a call or an orphaned dead node. |
92 | // |
93 | LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum]; |
94 | if (isCandidateVar(varDsc)) |
95 | { |
96 | INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1)); |
97 | return 0; |
98 | } |
99 | srcCount = 0; |
100 | #ifdef FEATURE_SIMD |
101 | // Need an additional register to read upper 4 bytes of Vector3. |
102 | if (tree->TypeGet() == TYP_SIMD12) |
103 | { |
104 | // We need an internal register different from targetReg in which 'tree' produces its result |
105 | // because both targetReg and internal reg will be in use at the same time. |
106 | buildInternalFloatRegisterDefForNode(tree, allSIMDRegs()); |
107 | setInternalRegsDelayFree = true; |
108 | buildInternalRegisterUses(); |
109 | } |
110 | #endif |
111 | BuildDef(tree); |
112 | } |
113 | break; |
114 | |
115 | case GT_STORE_LCL_FLD: |
116 | case GT_STORE_LCL_VAR: |
117 | srcCount = 1; |
118 | assert(dstCount == 0); |
119 | srcCount = BuildStoreLoc(tree->AsLclVarCommon()); |
120 | break; |
121 | |
122 | case GT_FIELD_LIST: |
123 | // These should always be contained. We don't correctly allocate or |
124 | // generate code for a non-contained GT_FIELD_LIST. |
125 | noway_assert(!"Non-contained GT_FIELD_LIST" ); |
126 | srcCount = 0; |
127 | break; |
128 | |
129 | case GT_LIST: |
130 | case GT_ARGPLACE: |
131 | case GT_NO_OP: |
132 | case GT_START_NONGC: |
133 | case GT_PROF_HOOK: |
134 | srcCount = 0; |
135 | assert(dstCount == 0); |
136 | break; |
137 | |
138 | case GT_CNS_DBL: |
139 | { |
140 | GenTreeDblCon* dblConst = tree->AsDblCon(); |
141 | double constValue = dblConst->gtDblCon.gtDconVal; |
142 | |
143 | if (emitter::emitIns_valid_imm_for_fmov(constValue)) |
144 | { |
145 | // Directly encode constant to instructions. |
146 | } |
147 | else |
148 | { |
149 | // Reserve int to load constant from memory (IF_LARGELDC) |
150 | buildInternalIntRegisterDefForNode(tree); |
151 | buildInternalRegisterUses(); |
152 | } |
153 | } |
154 | __fallthrough; |
155 | |
156 | case GT_CNS_INT: |
157 | { |
158 | srcCount = 0; |
159 | assert(dstCount == 1); |
160 | RefPosition* def = BuildDef(tree); |
161 | def->getInterval()->isConstant = true; |
162 | } |
163 | break; |
164 | |
165 | case GT_BOX: |
166 | case GT_COMMA: |
167 | case GT_QMARK: |
168 | case GT_COLON: |
169 | srcCount = 0; |
170 | assert(dstCount == 0); |
171 | unreached(); |
172 | break; |
173 | |
174 | case GT_RETURN: |
175 | srcCount = BuildReturn(tree); |
176 | break; |
177 | |
178 | case GT_RETFILT: |
179 | assert(dstCount == 0); |
180 | if (tree->TypeGet() == TYP_VOID) |
181 | { |
182 | srcCount = 0; |
183 | } |
184 | else |
185 | { |
186 | assert(tree->TypeGet() == TYP_INT); |
187 | srcCount = 1; |
188 | BuildUse(tree->gtGetOp1(), RBM_INTRET); |
189 | } |
190 | break; |
191 | |
192 | case GT_NOP: |
193 | // A GT_NOP is either a passthrough (if it is void, or if it has |
194 | // a child), but must be considered to produce a dummy value if it |
195 | // has a type but no child. |
196 | srcCount = 0; |
197 | if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr) |
198 | { |
199 | assert(dstCount == 1); |
200 | BuildDef(tree); |
201 | } |
202 | else |
203 | { |
204 | assert(dstCount == 0); |
205 | } |
206 | break; |
207 | |
208 | case GT_JTRUE: |
209 | srcCount = 0; |
210 | assert(dstCount == 0); |
211 | break; |
212 | |
213 | case GT_JMP: |
214 | srcCount = 0; |
215 | assert(dstCount == 0); |
216 | break; |
217 | |
218 | case GT_SWITCH: |
219 | // This should never occur since switch nodes must not be visible at this |
220 | // point in the JIT. |
221 | srcCount = 0; |
222 | noway_assert(!"Switch must be lowered at this point" ); |
223 | break; |
224 | |
225 | case GT_JMPTABLE: |
226 | srcCount = 0; |
227 | assert(dstCount == 1); |
228 | BuildDef(tree); |
229 | break; |
230 | |
231 | case GT_SWITCH_TABLE: |
232 | buildInternalIntRegisterDefForNode(tree); |
233 | srcCount = BuildBinaryUses(tree->AsOp()); |
234 | assert(dstCount == 0); |
235 | break; |
236 | |
237 | case GT_ASG: |
238 | noway_assert(!"We should never hit any assignment operator in lowering" ); |
239 | srcCount = 0; |
240 | break; |
241 | |
242 | case GT_ADD: |
243 | case GT_SUB: |
244 | if (varTypeIsFloating(tree->TypeGet())) |
245 | { |
246 | // overflow operations aren't supported on float/double types. |
247 | assert(!tree->gtOverflow()); |
248 | |
249 | // No implicit conversions at this stage as the expectation is that |
250 | // everything is made explicit by adding casts. |
251 | assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet()); |
252 | } |
253 | |
254 | __fallthrough; |
255 | |
256 | case GT_AND: |
257 | case GT_OR: |
258 | case GT_XOR: |
259 | case GT_LSH: |
260 | case GT_RSH: |
261 | case GT_RSZ: |
262 | case GT_ROR: |
263 | srcCount = BuildBinaryUses(tree->AsOp()); |
264 | assert(dstCount == 1); |
265 | BuildDef(tree); |
266 | break; |
267 | |
268 | case GT_RETURNTRAP: |
269 | // this just turns into a compare of its child with an int |
270 | // + a conditional call |
271 | BuildUse(tree->gtGetOp1()); |
272 | srcCount = 1; |
273 | assert(dstCount == 0); |
274 | killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); |
275 | BuildDefsWithKills(tree, 0, RBM_NONE, killMask); |
276 | break; |
277 | |
278 | case GT_MOD: |
279 | case GT_UMOD: |
280 | NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64" ); |
281 | assert(!"Shouldn't see an integer typed GT_MOD node in ARM64" ); |
282 | srcCount = 0; |
283 | break; |
284 | |
285 | case GT_MUL: |
286 | if (tree->gtOverflow()) |
287 | { |
288 | // Need a register different from target reg to check for overflow. |
289 | buildInternalIntRegisterDefForNode(tree); |
290 | setInternalRegsDelayFree = true; |
291 | } |
292 | __fallthrough; |
293 | |
294 | case GT_DIV: |
295 | case GT_MULHI: |
296 | case GT_UDIV: |
297 | { |
298 | srcCount = BuildBinaryUses(tree->AsOp()); |
299 | buildInternalRegisterUses(); |
300 | assert(dstCount == 1); |
301 | BuildDef(tree); |
302 | } |
303 | break; |
304 | |
305 | case GT_INTRINSIC: |
306 | { |
307 | noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) || |
308 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) || |
309 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) || |
310 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) || |
311 | (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)); |
312 | |
313 | // Both operand and its result must be of the same floating point type. |
314 | GenTree* op1 = tree->gtGetOp1(); |
315 | assert(varTypeIsFloating(op1)); |
316 | assert(op1->TypeGet() == tree->TypeGet()); |
317 | |
318 | BuildUse(op1); |
319 | srcCount = 1; |
320 | assert(dstCount == 1); |
321 | BuildDef(tree); |
322 | } |
323 | break; |
324 | |
325 | #ifdef FEATURE_SIMD |
326 | case GT_SIMD: |
327 | srcCount = BuildSIMD(tree->AsSIMD()); |
328 | break; |
329 | #endif // FEATURE_SIMD |
330 | |
331 | #ifdef FEATURE_HW_INTRINSICS |
332 | case GT_HWIntrinsic: |
333 | srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic()); |
334 | break; |
335 | #endif // FEATURE_HW_INTRINSICS |
336 | |
337 | case GT_CAST: |
338 | assert(dstCount == 1); |
339 | srcCount = BuildCast(tree->AsCast()); |
340 | break; |
341 | |
342 | case GT_NEG: |
343 | case GT_NOT: |
344 | BuildUse(tree->gtGetOp1()); |
345 | srcCount = 1; |
346 | assert(dstCount == 1); |
347 | BuildDef(tree); |
348 | break; |
349 | |
350 | case GT_EQ: |
351 | case GT_NE: |
352 | case GT_LT: |
353 | case GT_LE: |
354 | case GT_GE: |
355 | case GT_GT: |
356 | case GT_TEST_EQ: |
357 | case GT_TEST_NE: |
358 | case GT_JCMP: |
359 | srcCount = BuildCmp(tree); |
360 | break; |
361 | |
362 | case GT_CKFINITE: |
363 | srcCount = 1; |
364 | assert(dstCount == 1); |
365 | buildInternalIntRegisterDefForNode(tree); |
366 | BuildUse(tree->gtGetOp1()); |
367 | BuildDef(tree); |
368 | buildInternalRegisterUses(); |
369 | break; |
370 | |
371 | case GT_CMPXCHG: |
372 | { |
373 | GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); |
374 | srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; |
375 | assert(dstCount == 1); |
376 | |
377 | if (!compiler->compSupports(InstructionSet_Atomics)) |
378 | { |
379 | // For ARMv8 exclusives requires a single internal register |
380 | buildInternalIntRegisterDefForNode(tree); |
381 | } |
382 | |
383 | // For ARMv8 exclusives the lifetime of the addr and data must be extended because |
384 | // it may be used used multiple during retries |
385 | |
386 | // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent |
387 | // them being reused as the target register which must be destroyed early |
388 | |
389 | RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation); |
390 | setDelayFree(locationUse); |
391 | RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue); |
392 | setDelayFree(valueUse); |
393 | if (!cmpXchgNode->gtOpComparand->isContained()) |
394 | { |
395 | RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand); |
396 | |
397 | // For ARMv8 exclusives the lifetime of the comparand must be extended because |
398 | // it may be used used multiple during retries |
399 | if (!compiler->compSupports(InstructionSet_Atomics)) |
400 | { |
401 | setDelayFree(comparandUse); |
402 | } |
403 | } |
404 | |
405 | // Internals may not collide with target |
406 | setInternalRegsDelayFree = true; |
407 | buildInternalRegisterUses(); |
408 | BuildDef(tree); |
409 | } |
410 | break; |
411 | |
412 | case GT_LOCKADD: |
413 | case GT_XADD: |
414 | case GT_XCHG: |
415 | { |
416 | assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1); |
417 | srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; |
418 | |
419 | if (!compiler->compSupports(InstructionSet_Atomics)) |
420 | { |
421 | // GT_XCHG requires a single internal register; the others require two. |
422 | buildInternalIntRegisterDefForNode(tree); |
423 | if (tree->OperGet() != GT_XCHG) |
424 | { |
425 | buildInternalIntRegisterDefForNode(tree); |
426 | } |
427 | } |
428 | |
429 | assert(!tree->gtGetOp1()->isContained()); |
430 | RefPosition* op1Use = BuildUse(tree->gtGetOp1()); |
431 | RefPosition* op2Use = nullptr; |
432 | if (!tree->gtGetOp2()->isContained()) |
433 | { |
434 | op2Use = BuildUse(tree->gtGetOp2()); |
435 | } |
436 | |
437 | // For ARMv8 exclusives the lifetime of the addr and data must be extended because |
438 | // it may be used used multiple during retries |
439 | if (!compiler->compSupports(InstructionSet_Atomics)) |
440 | { |
441 | // Internals may not collide with target |
442 | if (dstCount == 1) |
443 | { |
444 | setDelayFree(op1Use); |
445 | if (op2Use != nullptr) |
446 | { |
447 | setDelayFree(op2Use); |
448 | } |
449 | setInternalRegsDelayFree = true; |
450 | } |
451 | buildInternalRegisterUses(); |
452 | } |
453 | if (dstCount == 1) |
454 | { |
455 | BuildDef(tree); |
456 | } |
457 | } |
458 | break; |
459 | |
460 | #if FEATURE_ARG_SPLIT |
461 | case GT_PUTARG_SPLIT: |
462 | srcCount = BuildPutArgSplit(tree->AsPutArgSplit()); |
463 | dstCount = tree->AsPutArgSplit()->gtNumRegs; |
464 | break; |
465 | #endif // FEATURE _SPLIT_ARG |
466 | |
467 | case GT_PUTARG_STK: |
468 | srcCount = BuildPutArgStk(tree->AsPutArgStk()); |
469 | break; |
470 | |
471 | case GT_PUTARG_REG: |
472 | srcCount = BuildPutArgReg(tree->AsUnOp()); |
473 | break; |
474 | |
475 | case GT_CALL: |
476 | srcCount = BuildCall(tree->AsCall()); |
477 | if (tree->AsCall()->HasMultiRegRetVal()) |
478 | { |
479 | dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); |
480 | } |
481 | break; |
482 | |
483 | case GT_ADDR: |
484 | { |
485 | // For a GT_ADDR, the child node should not be evaluated into a register |
486 | GenTree* child = tree->gtGetOp1(); |
487 | assert(!isCandidateLocalRef(child)); |
488 | assert(child->isContained()); |
489 | assert(dstCount == 1); |
490 | srcCount = 0; |
491 | BuildDef(tree); |
492 | } |
493 | break; |
494 | |
495 | case GT_BLK: |
496 | case GT_DYN_BLK: |
497 | // These should all be eliminated prior to Lowering. |
498 | assert(!"Non-store block node in Lowering" ); |
499 | srcCount = 0; |
500 | break; |
501 | |
502 | case GT_STORE_BLK: |
503 | case GT_STORE_OBJ: |
504 | case GT_STORE_DYN_BLK: |
505 | srcCount = BuildBlockStore(tree->AsBlk()); |
506 | break; |
507 | |
508 | case GT_INIT_VAL: |
509 | // Always a passthrough of its child's value. |
510 | assert(!"INIT_VAL should always be contained" ); |
511 | srcCount = 0; |
512 | break; |
513 | |
514 | case GT_LCLHEAP: |
515 | { |
516 | assert(dstCount == 1); |
517 | |
518 | // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): |
519 | // Here '-' means don't care. |
520 | // |
521 | // Size? Init Memory? # temp regs |
522 | // 0 - 0 |
523 | // const and <=6 ptr words - 0 |
524 | // const and <PageSize No 0 |
525 | // >6 ptr words Yes 0 |
526 | // Non-const Yes 0 |
527 | // Non-const No 2 |
528 | // |
529 | |
530 | GenTree* size = tree->gtGetOp1(); |
531 | if (size->IsCnsIntOrI()) |
532 | { |
533 | assert(size->isContained()); |
534 | srcCount = 0; |
535 | |
536 | size_t sizeVal = size->gtIntCon.gtIconVal; |
537 | |
538 | if (sizeVal != 0) |
539 | { |
540 | // Compute the amount of memory to properly STACK_ALIGN. |
541 | // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. |
542 | // This should also help in debugging as we can examine the original size specified with |
543 | // localloc. |
544 | sizeVal = AlignUp(sizeVal, STACK_ALIGN); |
545 | size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); |
546 | |
547 | // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) |
548 | // |
549 | if (stpCount <= 4) |
550 | { |
551 | // Need no internal registers |
552 | } |
553 | else if (!compiler->info.compInitMem) |
554 | { |
555 | // No need to initialize allocated stack space. |
556 | if (sizeVal < compiler->eeGetPageSize()) |
557 | { |
558 | // Need no internal registers |
559 | } |
560 | else |
561 | { |
562 | // We need two registers: regCnt and RegTmp |
563 | buildInternalIntRegisterDefForNode(tree); |
564 | buildInternalIntRegisterDefForNode(tree); |
565 | } |
566 | } |
567 | } |
568 | } |
569 | else |
570 | { |
571 | srcCount = 1; |
572 | if (!compiler->info.compInitMem) |
573 | { |
574 | buildInternalIntRegisterDefForNode(tree); |
575 | buildInternalIntRegisterDefForNode(tree); |
576 | } |
577 | } |
578 | |
579 | if (!size->isContained()) |
580 | { |
581 | BuildUse(size); |
582 | } |
583 | buildInternalRegisterUses(); |
584 | BuildDef(tree); |
585 | } |
586 | break; |
587 | |
588 | case GT_ARR_BOUNDS_CHECK: |
589 | #ifdef FEATURE_SIMD |
590 | case GT_SIMD_CHK: |
591 | #endif // FEATURE_SIMD |
592 | { |
593 | GenTreeBoundsChk* node = tree->AsBoundsChk(); |
594 | // Consumes arrLen & index - has no result |
595 | assert(dstCount == 0); |
596 | |
597 | GenTree* intCns = nullptr; |
598 | GenTree* other = nullptr; |
599 | srcCount = BuildOperandUses(tree->AsBoundsChk()->gtIndex); |
600 | srcCount += BuildOperandUses(tree->AsBoundsChk()->gtArrLen); |
601 | } |
602 | break; |
603 | |
604 | case GT_ARR_ELEM: |
605 | // These must have been lowered to GT_ARR_INDEX |
606 | noway_assert(!"We should never see a GT_ARR_ELEM in lowering" ); |
607 | srcCount = 0; |
608 | assert(dstCount == 0); |
609 | break; |
610 | |
611 | case GT_ARR_INDEX: |
612 | { |
613 | srcCount = 2; |
614 | assert(dstCount == 1); |
615 | buildInternalIntRegisterDefForNode(tree); |
616 | setInternalRegsDelayFree = true; |
617 | |
618 | // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple |
619 | // times while the result is being computed. |
620 | RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj()); |
621 | setDelayFree(arrObjUse); |
622 | BuildUse(tree->AsArrIndex()->IndexExpr()); |
623 | buildInternalRegisterUses(); |
624 | BuildDef(tree); |
625 | } |
626 | break; |
627 | |
628 | case GT_ARR_OFFSET: |
629 | // This consumes the offset, if any, the arrObj and the effective index, |
630 | // and produces the flattened offset for this dimension. |
631 | srcCount = 2; |
632 | if (!tree->gtArrOffs.gtOffset->isContained()) |
633 | { |
634 | BuildUse(tree->AsArrOffs()->gtOffset); |
635 | srcCount++; |
636 | } |
637 | BuildUse(tree->AsArrOffs()->gtIndex); |
638 | BuildUse(tree->AsArrOffs()->gtArrObj); |
639 | assert(dstCount == 1); |
640 | buildInternalIntRegisterDefForNode(tree); |
641 | buildInternalRegisterUses(); |
642 | BuildDef(tree); |
643 | break; |
644 | |
645 | case GT_LEA: |
646 | { |
647 | GenTreeAddrMode* lea = tree->AsAddrMode(); |
648 | |
649 | GenTree* base = lea->Base(); |
650 | GenTree* index = lea->Index(); |
651 | int cns = lea->Offset(); |
652 | |
653 | // This LEA is instantiating an address, so we set up the srcCount here. |
654 | srcCount = 0; |
655 | if (base != nullptr) |
656 | { |
657 | srcCount++; |
658 | BuildUse(base); |
659 | } |
660 | if (index != nullptr) |
661 | { |
662 | srcCount++; |
663 | BuildUse(index); |
664 | } |
665 | assert(dstCount == 1); |
666 | |
667 | // On ARM64 we may need a single internal register |
668 | // (when both conditions are true then we still only need a single internal register) |
669 | if ((index != nullptr) && (cns != 0)) |
670 | { |
671 | // ARM64 does not support both Index and offset so we need an internal register |
672 | buildInternalIntRegisterDefForNode(tree); |
673 | } |
674 | else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE)) |
675 | { |
676 | // This offset can't be contained in the add instruction, so we need an internal register |
677 | buildInternalIntRegisterDefForNode(tree); |
678 | } |
679 | buildInternalRegisterUses(); |
680 | BuildDef(tree); |
681 | } |
682 | break; |
683 | |
684 | case GT_STOREIND: |
685 | { |
686 | assert(dstCount == 0); |
687 | |
688 | if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree)) |
689 | { |
690 | srcCount = BuildGCWriteBarrier(tree); |
691 | break; |
692 | } |
693 | |
694 | srcCount = BuildIndir(tree->AsIndir()); |
695 | if (!tree->gtGetOp2()->isContained()) |
696 | { |
697 | BuildUse(tree->gtGetOp2()); |
698 | srcCount++; |
699 | } |
700 | } |
701 | break; |
702 | |
703 | case GT_NULLCHECK: |
704 | // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register |
705 | // is required, and it is not a localDefUse. |
706 | assert(dstCount == 0); |
707 | assert(!tree->gtGetOp1()->isContained()); |
708 | BuildUse(tree->gtGetOp1()); |
709 | srcCount = 1; |
710 | break; |
711 | |
712 | case GT_IND: |
713 | assert(dstCount == 1); |
714 | srcCount = BuildIndir(tree->AsIndir()); |
715 | break; |
716 | |
717 | case GT_CATCH_ARG: |
718 | srcCount = 0; |
719 | assert(dstCount == 1); |
720 | BuildDef(tree, RBM_EXCEPTION_OBJECT); |
721 | break; |
722 | |
723 | case GT_CLS_VAR: |
724 | srcCount = 0; |
725 | // GT_CLS_VAR, by the time we reach the backend, must always |
726 | // be a pure use. |
727 | // It will produce a result of the type of the |
728 | // node, and use an internal register for the address. |
729 | |
730 | assert(dstCount == 1); |
731 | assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0); |
732 | buildInternalIntRegisterDefForNode(tree); |
733 | buildInternalRegisterUses(); |
734 | BuildDef(tree); |
735 | break; |
736 | |
737 | case GT_INDEX_ADDR: |
738 | assert(dstCount == 1); |
739 | srcCount = BuildBinaryUses(tree->AsOp()); |
740 | buildInternalIntRegisterDefForNode(tree); |
741 | buildInternalRegisterUses(); |
742 | BuildDef(tree); |
743 | break; |
744 | |
745 | } // end switch (tree->OperGet()) |
746 | |
747 | if (tree->IsUnusedValue() && (dstCount != 0)) |
748 | { |
749 | isLocalDefUse = true; |
750 | } |
751 | // We need to be sure that we've set srcCount and dstCount appropriately |
752 | assert((dstCount < 2) || tree->IsMultiRegCall()); |
753 | assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue())); |
754 | assert(!tree->IsUnusedValue() || (dstCount != 0)); |
755 | assert(dstCount == tree->GetRegisterDstCount()); |
756 | INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount)); |
757 | return srcCount; |
758 | } |
759 | |
760 | #ifdef FEATURE_SIMD |
761 | //------------------------------------------------------------------------ |
762 | // BuildSIMD: Set the NodeInfo for a GT_SIMD tree. |
763 | // |
764 | // Arguments: |
765 | // tree - The GT_SIMD node of interest |
766 | // |
767 | // Return Value: |
768 | // The number of sources consumed by this node. |
769 | // |
770 | int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) |
771 | { |
772 | int srcCount = 0; |
773 | // Only SIMDIntrinsicInit can be contained |
774 | if (simdTree->isContained()) |
775 | { |
776 | assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); |
777 | } |
778 | int dstCount = simdTree->IsValue() ? 1 : 0; |
779 | assert(dstCount == 1); |
780 | |
781 | bool buildUses = true; |
782 | |
783 | GenTree* op1 = simdTree->gtGetOp1(); |
784 | GenTree* op2 = simdTree->gtGetOp2(); |
785 | |
786 | switch (simdTree->gtSIMDIntrinsicID) |
787 | { |
788 | case SIMDIntrinsicInit: |
789 | case SIMDIntrinsicCast: |
790 | case SIMDIntrinsicSqrt: |
791 | case SIMDIntrinsicAbs: |
792 | case SIMDIntrinsicConvertToSingle: |
793 | case SIMDIntrinsicConvertToInt32: |
794 | case SIMDIntrinsicConvertToDouble: |
795 | case SIMDIntrinsicConvertToInt64: |
796 | case SIMDIntrinsicWidenLo: |
797 | case SIMDIntrinsicWidenHi: |
798 | // No special handling required. |
799 | break; |
800 | |
801 | case SIMDIntrinsicGetItem: |
802 | { |
803 | op1 = simdTree->gtGetOp1(); |
804 | op2 = simdTree->gtGetOp2(); |
805 | |
806 | // We have an object and an index, either of which may be contained. |
807 | bool setOp2DelayFree = false; |
808 | if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal())) |
809 | { |
810 | // If the index is not a constant and the object is not contained or is a local |
811 | // we will need a general purpose register to calculate the address |
812 | // internal register must not clobber input index |
813 | // TODO-Cleanup: An internal register will never clobber a source; this code actually |
814 | // ensures that the index (op2) doesn't interfere with the target. |
815 | buildInternalIntRegisterDefForNode(simdTree); |
816 | setOp2DelayFree = true; |
817 | } |
818 | srcCount += BuildOperandUses(op1); |
819 | if (!op2->isContained()) |
820 | { |
821 | RefPosition* op2Use = BuildUse(op2); |
822 | if (setOp2DelayFree) |
823 | { |
824 | setDelayFree(op2Use); |
825 | } |
826 | srcCount++; |
827 | } |
828 | |
829 | if (!op2->IsCnsIntOrI() && (!op1->isContained())) |
830 | { |
831 | // If vector is not already in memory (contained) and the index is not a constant, |
832 | // we will use the SIMD temp location to store the vector. |
833 | compiler->getSIMDInitTempVarNum(); |
834 | } |
835 | buildUses = false; |
836 | } |
837 | break; |
838 | |
839 | case SIMDIntrinsicAdd: |
840 | case SIMDIntrinsicSub: |
841 | case SIMDIntrinsicMul: |
842 | case SIMDIntrinsicDiv: |
843 | case SIMDIntrinsicBitwiseAnd: |
844 | case SIMDIntrinsicBitwiseAndNot: |
845 | case SIMDIntrinsicBitwiseOr: |
846 | case SIMDIntrinsicBitwiseXor: |
847 | case SIMDIntrinsicMin: |
848 | case SIMDIntrinsicMax: |
849 | case SIMDIntrinsicEqual: |
850 | case SIMDIntrinsicLessThan: |
851 | case SIMDIntrinsicGreaterThan: |
852 | case SIMDIntrinsicLessThanOrEqual: |
853 | case SIMDIntrinsicGreaterThanOrEqual: |
854 | // No special handling required. |
855 | break; |
856 | |
857 | case SIMDIntrinsicSetX: |
858 | case SIMDIntrinsicSetY: |
859 | case SIMDIntrinsicSetZ: |
860 | case SIMDIntrinsicSetW: |
861 | case SIMDIntrinsicNarrow: |
862 | { |
863 | // Op1 will write to dst before Op2 is free |
864 | BuildUse(op1); |
865 | RefPosition* op2Use = BuildUse(op2); |
866 | setDelayFree(op2Use); |
867 | srcCount = 2; |
868 | buildUses = false; |
869 | break; |
870 | } |
871 | |
872 | case SIMDIntrinsicInitN: |
873 | { |
874 | var_types baseType = simdTree->gtSIMDBaseType; |
875 | srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType)); |
876 | if (varTypeIsFloating(simdTree->gtSIMDBaseType)) |
877 | { |
878 | // Need an internal register to stitch together all the values into a single vector in a SIMD reg. |
879 | buildInternalFloatRegisterDefForNode(simdTree); |
880 | } |
881 | |
882 | int initCount = 0; |
883 | for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) |
884 | { |
885 | assert(list->OperGet() == GT_LIST); |
886 | GenTree* listItem = list->gtGetOp1(); |
887 | assert(listItem->TypeGet() == baseType); |
888 | assert(!listItem->isContained()); |
889 | BuildUse(listItem); |
890 | initCount++; |
891 | } |
892 | assert(initCount == srcCount); |
893 | buildUses = false; |
894 | |
895 | break; |
896 | } |
897 | |
898 | case SIMDIntrinsicInitArray: |
899 | // We have an array and an index, which may be contained. |
900 | break; |
901 | |
902 | case SIMDIntrinsicOpEquality: |
903 | case SIMDIntrinsicOpInEquality: |
904 | buildInternalFloatRegisterDefForNode(simdTree); |
905 | break; |
906 | |
907 | case SIMDIntrinsicDotProduct: |
908 | buildInternalFloatRegisterDefForNode(simdTree); |
909 | break; |
910 | |
911 | case SIMDIntrinsicSelect: |
912 | // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB |
913 | // bsl target register must be VC. Reserve a temp in case we need to shuffle things. |
914 | // This will require a different approach, as GenTreeSIMD has only two operands. |
915 | assert(!"SIMDIntrinsicSelect not yet supported" ); |
916 | buildInternalFloatRegisterDefForNode(simdTree); |
917 | break; |
918 | |
919 | case SIMDIntrinsicInitArrayX: |
920 | case SIMDIntrinsicInitFixed: |
921 | case SIMDIntrinsicCopyToArray: |
922 | case SIMDIntrinsicCopyToArrayX: |
923 | case SIMDIntrinsicNone: |
924 | case SIMDIntrinsicGetCount: |
925 | case SIMDIntrinsicGetOne: |
926 | case SIMDIntrinsicGetZero: |
927 | case SIMDIntrinsicGetAllOnes: |
928 | case SIMDIntrinsicGetX: |
929 | case SIMDIntrinsicGetY: |
930 | case SIMDIntrinsicGetZ: |
931 | case SIMDIntrinsicGetW: |
932 | case SIMDIntrinsicInstEquals: |
933 | case SIMDIntrinsicHWAccel: |
934 | case SIMDIntrinsicWiden: |
935 | case SIMDIntrinsicInvalid: |
936 | assert(!"These intrinsics should not be seen during register allocation" ); |
937 | __fallthrough; |
938 | |
939 | default: |
940 | noway_assert(!"Unimplemented SIMD node type." ); |
941 | unreached(); |
942 | } |
943 | if (buildUses) |
944 | { |
945 | assert(!op1->OperIs(GT_LIST)); |
946 | assert(srcCount == 0); |
947 | srcCount = BuildOperandUses(op1); |
948 | if ((op2 != nullptr) && !op2->isContained()) |
949 | { |
950 | srcCount += BuildOperandUses(op2); |
951 | } |
952 | } |
953 | assert(internalCount <= MaxInternalCount); |
954 | buildInternalRegisterUses(); |
955 | if (dstCount == 1) |
956 | { |
957 | BuildDef(simdTree); |
958 | } |
959 | else |
960 | { |
961 | assert(dstCount == 0); |
962 | } |
963 | return srcCount; |
964 | } |
965 | #endif // FEATURE_SIMD |
966 | |
967 | #ifdef FEATURE_HW_INTRINSICS |
968 | #include "hwintrinsic.h" |
969 | //------------------------------------------------------------------------ |
970 | // BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree. |
971 | // |
972 | // Arguments: |
973 | // tree - The GT_HWIntrinsic node of interest |
974 | // |
975 | // Return Value: |
976 | // The number of sources consumed by this node. |
977 | // |
978 | int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) |
979 | { |
980 | NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId; |
981 | int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree); |
982 | |
983 | GenTree* op1 = intrinsicTree->gtGetOp1(); |
984 | GenTree* op2 = intrinsicTree->gtGetOp2(); |
985 | GenTree* op3 = nullptr; |
986 | int srcCount = 0; |
987 | |
988 | if ((op1 != nullptr) && op1->OperIsList()) |
989 | { |
990 | // op2 must be null, and there must be at least two more arguments. |
991 | assert(op2 == nullptr); |
992 | noway_assert(op1->AsArgList()->Rest() != nullptr); |
993 | noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr); |
994 | assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr); |
995 | op2 = op1->AsArgList()->Rest()->Current(); |
996 | op3 = op1->AsArgList()->Rest()->Rest()->Current(); |
997 | op1 = op1->AsArgList()->Current(); |
998 | } |
999 | |
1000 | int dstCount = intrinsicTree->IsValue() ? 1 : 0; |
1001 | bool op2IsDelayFree = false; |
1002 | bool op3IsDelayFree = false; |
1003 | |
1004 | // Create internal temps, and handle any other special requirements. |
1005 | switch (HWIntrinsicInfo::lookup(intrinsicID).form) |
1006 | { |
1007 | case HWIntrinsicInfo::Sha1HashOp: |
1008 | assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); |
1009 | if (!op2->isContained()) |
1010 | { |
1011 | assert(!op3->isContained()); |
1012 | op2IsDelayFree = true; |
1013 | op3IsDelayFree = true; |
1014 | setInternalRegsDelayFree = true; |
1015 | } |
1016 | buildInternalFloatRegisterDefForNode(intrinsicTree); |
1017 | break; |
1018 | case HWIntrinsicInfo::SimdTernaryRMWOp: |
1019 | assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr)); |
1020 | if (!op2->isContained()) |
1021 | { |
1022 | assert(!op3->isContained()); |
1023 | op2IsDelayFree = true; |
1024 | op3IsDelayFree = true; |
1025 | } |
1026 | break; |
1027 | case HWIntrinsicInfo::Sha1RotateOp: |
1028 | buildInternalFloatRegisterDefForNode(intrinsicTree); |
1029 | break; |
1030 | |
1031 | case HWIntrinsicInfo::SimdExtractOp: |
1032 | case HWIntrinsicInfo::SimdInsertOp: |
1033 | if (!op2->isContained()) |
1034 | { |
1035 | // We need a temp to create a switch table |
1036 | buildInternalIntRegisterDefForNode(intrinsicTree); |
1037 | } |
1038 | break; |
1039 | |
1040 | default: |
1041 | break; |
1042 | } |
1043 | |
1044 | // Next, build uses |
1045 | if (numArgs > 3) |
1046 | { |
1047 | srcCount = 0; |
1048 | assert(!op2IsDelayFree && !op3IsDelayFree); |
1049 | assert(op1->OperIs(GT_LIST)); |
1050 | { |
1051 | for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest()) |
1052 | { |
1053 | srcCount += BuildOperandUses(list->Current()); |
1054 | } |
1055 | } |
1056 | assert(srcCount == numArgs); |
1057 | } |
1058 | else |
1059 | { |
1060 | if (op1 != nullptr) |
1061 | { |
1062 | srcCount += BuildOperandUses(op1); |
1063 | if (op2 != nullptr) |
1064 | { |
1065 | srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2); |
1066 | if (op3 != nullptr) |
1067 | { |
1068 | srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); |
1069 | } |
1070 | } |
1071 | } |
1072 | } |
1073 | buildInternalRegisterUses(); |
1074 | |
1075 | // Now defs |
1076 | if (intrinsicTree->IsValue()) |
1077 | { |
1078 | BuildDef(intrinsicTree); |
1079 | } |
1080 | |
1081 | return srcCount; |
1082 | } |
1083 | #endif |
1084 | |
1085 | #endif // _TARGET_ARM64_ |
1086 | |