1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX ARM/ARM64 Code Generator Common Code XX
9XX XX
10XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12*/
13#include "jitpch.h"
14#ifdef _MSC_VER
15#pragma hdrstop
16#endif
17
18#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures
19
20#include "codegen.h"
21#include "lower.h"
22#include "gcinfo.h"
23#include "emit.h"
24
25//------------------------------------------------------------------------
26// genCodeForTreeNode Generate code for a single node in the tree.
27//
28// Preconditions:
29// All operands have been evaluated.
30//
31void CodeGen::genCodeForTreeNode(GenTree* treeNode)
32{
33 regNumber targetReg = treeNode->gtRegNum;
34 var_types targetType = treeNode->TypeGet();
35 emitter* emit = getEmitter();
36
37#ifdef DEBUG
38 // Validate that all the operands for the current node are consumed in order.
39 // This is important because LSRA ensures that any necessary copies will be
40 // handled correctly.
41 lastConsumedNode = nullptr;
42 if (compiler->verbose)
43 {
44 unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
45 compiler->gtDispLIRNode(treeNode, "Generating: ");
46 }
47#endif // DEBUG
48
49#ifdef _TARGET_ARM64_ // TODO-ARM: is this applicable to ARM32?
50 // Is this a node whose value is already in a register? LSRA denotes this by
51 // setting the GTF_REUSE_REG_VAL flag.
52 if (treeNode->IsReuseRegVal())
53 {
54 // For now, this is only used for constant nodes.
55 assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
56 JITDUMP(" TreeNode is marked ReuseReg\n");
57 return;
58 }
59#endif // _TARGET_ARM64_
60
61 // contained nodes are part of their parents for codegen purposes
62 // ex : immediates, most LEAs
63 if (treeNode->isContained())
64 {
65 return;
66 }
67
68 switch (treeNode->gtOper)
69 {
70 case GT_START_NONGC:
71 getEmitter()->emitDisableGC();
72 break;
73
74 case GT_PROF_HOOK:
75 // We should be seeing this only if profiler hook is needed
76 noway_assert(compiler->compIsProfilerHookNeeded());
77
78#ifdef PROFILING_SUPPORTED
79 // Right now this node is used only for tail calls. In future if
80 // we intend to use it for Enter or Leave hooks, add a data member
81 // to this node indicating the kind of profiler hook. For example,
82 // helper number can be used.
83 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
84#endif // PROFILING_SUPPORTED
85 break;
86
87 case GT_LCLHEAP:
88 genLclHeap(treeNode);
89 break;
90
91 case GT_CNS_INT:
92 case GT_CNS_DBL:
93 genSetRegToConst(targetReg, targetType, treeNode);
94 genProduceReg(treeNode);
95 break;
96
97 case GT_NOT:
98 case GT_NEG:
99 genCodeForNegNot(treeNode);
100 break;
101
102 case GT_MOD:
103 case GT_UMOD:
104 case GT_DIV:
105 case GT_UDIV:
106 genCodeForDivMod(treeNode->AsOp());
107 break;
108
109 case GT_OR:
110 case GT_XOR:
111 case GT_AND:
112 assert(varTypeIsIntegralOrI(treeNode));
113
114 __fallthrough;
115
116#if !defined(_TARGET_64BIT_)
117 case GT_ADD_LO:
118 case GT_ADD_HI:
119 case GT_SUB_LO:
120 case GT_SUB_HI:
121#endif // !defined(_TARGET_64BIT_)
122
123 case GT_ADD:
124 case GT_SUB:
125 case GT_MUL:
126 genConsumeOperands(treeNode->AsOp());
127 genCodeForBinary(treeNode->AsOp());
128 break;
129
130 case GT_LSH:
131 case GT_RSH:
132 case GT_RSZ:
133 // case GT_ROL: // No ROL instruction on ARM; it has been lowered to ROR.
134 case GT_ROR:
135 genCodeForShift(treeNode);
136 break;
137
138#if !defined(_TARGET_64BIT_)
139
140 case GT_LSH_HI:
141 case GT_RSH_LO:
142 genCodeForShiftLong(treeNode);
143 break;
144
145#endif // !defined(_TARGET_64BIT_)
146
147 case GT_CAST:
148 genCodeForCast(treeNode->AsOp());
149 break;
150
151 case GT_BITCAST:
152 {
153 GenTree* op1 = treeNode->gtOp.gtOp1;
154 if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
155 {
156#ifdef _TARGET_ARM64_
157 inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
158#else // !_TARGET_ARM64_
159 if (varTypeIsFloating(treeNode))
160 {
161 // GT_BITCAST on ARM is only used to cast floating-point arguments to integer
162 // registers. Nobody generates GT_BITCAST from int to float currently.
163 NYI_ARM("GT_BITCAST from 'int' to 'float'");
164 }
165 else
166 {
167 assert(varTypeIsFloating(op1));
168
169 if (op1->TypeGet() == TYP_FLOAT)
170 {
171 inst_RV_RV(INS_vmov_f2i, targetReg, genConsumeReg(op1), targetType);
172 }
173 else
174 {
175 assert(op1->TypeGet() == TYP_DOUBLE);
176 regNumber otherReg = treeNode->AsMultiRegOp()->gtOtherReg;
177 assert(otherReg != REG_NA);
178 inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, genConsumeReg(op1), EA_8BYTE);
179 }
180 }
181#endif // !_TARGET_ARM64_
182 }
183 else
184 {
185 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
186 }
187 }
188 break;
189
190 case GT_LCL_FLD_ADDR:
191 case GT_LCL_VAR_ADDR:
192 genCodeForLclAddr(treeNode);
193 break;
194
195 case GT_LCL_FLD:
196 genCodeForLclFld(treeNode->AsLclFld());
197 break;
198
199 case GT_LCL_VAR:
200 genCodeForLclVar(treeNode->AsLclVar());
201 break;
202
203 case GT_STORE_LCL_FLD:
204 genCodeForStoreLclFld(treeNode->AsLclFld());
205 break;
206
207 case GT_STORE_LCL_VAR:
208 genCodeForStoreLclVar(treeNode->AsLclVar());
209 break;
210
211 case GT_RETFILT:
212 case GT_RETURN:
213 genReturn(treeNode);
214 break;
215
216 case GT_LEA:
217 // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction.
218 genLeaInstruction(treeNode->AsAddrMode());
219 break;
220
221 case GT_INDEX_ADDR:
222 genCodeForIndexAddr(treeNode->AsIndexAddr());
223 break;
224
225 case GT_IND:
226 genCodeForIndir(treeNode->AsIndir());
227 break;
228
229#ifdef _TARGET_ARM_
230 case GT_MUL_LONG:
231 genCodeForMulLong(treeNode->AsMultiRegOp());
232 break;
233#endif // _TARGET_ARM_
234
235#ifdef _TARGET_ARM64_
236
237 case GT_MULHI:
238 genCodeForMulHi(treeNode->AsOp());
239 break;
240
241 case GT_SWAP:
242 genCodeForSwap(treeNode->AsOp());
243 break;
244#endif // _TARGET_ARM64_
245
246 case GT_JMP:
247 genJmpMethod(treeNode);
248 break;
249
250 case GT_CKFINITE:
251 genCkfinite(treeNode);
252 break;
253
254 case GT_INTRINSIC:
255 genIntrinsic(treeNode);
256 break;
257
258#ifdef FEATURE_SIMD
259 case GT_SIMD:
260 genSIMDIntrinsic(treeNode->AsSIMD());
261 break;
262#endif // FEATURE_SIMD
263
264#ifdef FEATURE_HW_INTRINSICS
265 case GT_HWIntrinsic:
266 genHWIntrinsic(treeNode->AsHWIntrinsic());
267 break;
268#endif // FEATURE_HW_INTRINSICS
269
270 case GT_EQ:
271 case GT_NE:
272 case GT_LT:
273 case GT_LE:
274 case GT_GE:
275 case GT_GT:
276 case GT_CMP:
277#ifdef _TARGET_ARM64_
278 case GT_TEST_EQ:
279 case GT_TEST_NE:
280#endif // _TARGET_ARM64_
281 genCodeForCompare(treeNode->AsOp());
282 break;
283
284 case GT_JTRUE:
285 genCodeForJumpTrue(treeNode);
286 break;
287
288#ifdef _TARGET_ARM64_
289 case GT_JCMP:
290 genCodeForJumpCompare(treeNode->AsOp());
291 break;
292#endif // _TARGET_ARM64_
293
294 case GT_JCC:
295 genCodeForJcc(treeNode->AsCC());
296 break;
297
298 case GT_SETCC:
299 genCodeForSetcc(treeNode->AsCC());
300 break;
301
302 case GT_RETURNTRAP:
303 genCodeForReturnTrap(treeNode->AsOp());
304 break;
305
306 case GT_STOREIND:
307 genCodeForStoreInd(treeNode->AsStoreInd());
308 break;
309
310 case GT_COPY:
311 // This is handled at the time we call genConsumeReg() on the GT_COPY
312 break;
313
314 case GT_LIST:
315 case GT_FIELD_LIST:
316 // Should always be marked contained.
317 assert(!"LIST, FIELD_LIST nodes should always be marked contained.");
318 break;
319
320 case GT_PUTARG_STK:
321 genPutArgStk(treeNode->AsPutArgStk());
322 break;
323
324 case GT_PUTARG_REG:
325 genPutArgReg(treeNode->AsOp());
326 break;
327
328#if FEATURE_ARG_SPLIT
329 case GT_PUTARG_SPLIT:
330 genPutArgSplit(treeNode->AsPutArgSplit());
331 break;
332#endif // FEATURE_ARG_SPLIT
333
334 case GT_CALL:
335 genCallInstruction(treeNode->AsCall());
336 break;
337
338 case GT_MEMORYBARRIER:
339 instGen_MemoryBarrier();
340 break;
341
342#ifdef _TARGET_ARM64_
343 case GT_XCHG:
344 case GT_XADD:
345 genLockedInstructions(treeNode->AsOp());
346 break;
347
348 case GT_CMPXCHG:
349 genCodeForCmpXchg(treeNode->AsCmpXchg());
350 break;
351#endif // _TARGET_ARM64_
352
353 case GT_RELOAD:
354 // do nothing - reload is just a marker.
355 // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
356 // into the register specified in this node.
357 break;
358
359 case GT_NOP:
360 break;
361
362 case GT_NO_OP:
363 instGen(INS_nop);
364 break;
365
366 case GT_ARR_BOUNDS_CHECK:
367#ifdef FEATURE_SIMD
368 case GT_SIMD_CHK:
369#endif // FEATURE_SIMD
370 genRangeCheck(treeNode);
371 break;
372
373 case GT_PHYSREG:
374 genCodeForPhysReg(treeNode->AsPhysReg());
375 break;
376
377 case GT_NULLCHECK:
378 genCodeForNullCheck(treeNode->AsOp());
379 break;
380
381 case GT_CATCH_ARG:
382
383 noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
384
385 /* Catch arguments get passed in a register. genCodeForBBlist()
386 would have marked it as holding a GC object, but not used. */
387
388 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
389 genConsumeReg(treeNode);
390 break;
391
392 case GT_PINVOKE_PROLOG:
393 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
394
395 // the runtime side requires the codegen here to be consistent
396 emit->emitDisableRandomNops();
397 break;
398
399 case GT_LABEL:
400 genPendingCallLabel = genCreateTempLabel();
401 treeNode->gtLabel.gtLabBB = genPendingCallLabel;
402 emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
403 break;
404
405 case GT_STORE_OBJ:
406 case GT_STORE_DYN_BLK:
407 case GT_STORE_BLK:
408 genCodeForStoreBlk(treeNode->AsBlk());
409 break;
410
411 case GT_JMPTABLE:
412 genJumpTable(treeNode);
413 break;
414
415 case GT_SWITCH_TABLE:
416 genTableBasedSwitch(treeNode);
417 break;
418
419 case GT_ARR_INDEX:
420 genCodeForArrIndex(treeNode->AsArrIndex());
421 break;
422
423 case GT_ARR_OFFSET:
424 genCodeForArrOffset(treeNode->AsArrOffs());
425 break;
426
427#ifdef _TARGET_ARM_
428
429 case GT_CLS_VAR_ADDR:
430 emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
431 genProduceReg(treeNode);
432 break;
433
434 case GT_LONG:
435 assert(treeNode->isUsedFromReg());
436 genConsumeRegs(treeNode);
437 break;
438
439#endif // _TARGET_ARM_
440
441 case GT_IL_OFFSET:
442 // Do nothing; these nodes are simply markers for debug info.
443 break;
444
445 default:
446 {
447#ifdef DEBUG
448 char message[256];
449 _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s",
450 GenTree::OpName(treeNode->OperGet()));
451 NYIRAW(message);
452#else
453 NYI("unimplemented node");
454#endif
455 }
456 break;
457 }
458}
459
460//------------------------------------------------------------------------
461// genSetRegToIcon: Generate code that will set the given register to the integer constant.
462//
463void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
464{
465 // Reg cannot be a FP reg
466 assert(!genIsValidFloatReg(reg));
467
468 // The only TYP_REF constant that can come this path is a managed 'null' since it is not
469 // relocatable. Other ref type constants (e.g. string objects) go through a different
470 // code path.
471 noway_assert(type != TYP_REF || val == 0);
472
473 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
474}
475
476//---------------------------------------------------------------------
477// genIntrinsic - generate code for a given intrinsic
478//
479// Arguments
480// treeNode - the GT_INTRINSIC node
481//
482// Return value:
483// None
484//
485void CodeGen::genIntrinsic(GenTree* treeNode)
486{
487 assert(treeNode->OperIs(GT_INTRINSIC));
488
489 // Both operand and its result must be of the same floating point type.
490 GenTree* srcNode = treeNode->gtOp.gtOp1;
491 assert(varTypeIsFloating(srcNode));
492 assert(srcNode->TypeGet() == treeNode->TypeGet());
493
494 // Right now only Abs/Ceiling/Floor/Round/Sqrt are treated as math intrinsics.
495 //
496 switch (treeNode->gtIntrinsic.gtIntrinsicId)
497 {
498 case CORINFO_INTRINSIC_Abs:
499 genConsumeOperands(treeNode->AsOp());
500 getEmitter()->emitInsBinary(INS_ABS, emitActualTypeSize(treeNode), treeNode, srcNode);
501 break;
502
503#ifdef _TARGET_ARM64_
504 case CORINFO_INTRINSIC_Ceiling:
505 genConsumeOperands(treeNode->AsOp());
506 getEmitter()->emitInsBinary(INS_frintp, emitActualTypeSize(treeNode), treeNode, srcNode);
507 break;
508
509 case CORINFO_INTRINSIC_Floor:
510 genConsumeOperands(treeNode->AsOp());
511 getEmitter()->emitInsBinary(INS_frintm, emitActualTypeSize(treeNode), treeNode, srcNode);
512 break;
513
514 case CORINFO_INTRINSIC_Round:
515 genConsumeOperands(treeNode->AsOp());
516 getEmitter()->emitInsBinary(INS_frintn, emitActualTypeSize(treeNode), treeNode, srcNode);
517 break;
518#endif // _TARGET_ARM64_
519
520 case CORINFO_INTRINSIC_Sqrt:
521 genConsumeOperands(treeNode->AsOp());
522 getEmitter()->emitInsBinary(INS_SQRT, emitActualTypeSize(treeNode), treeNode, srcNode);
523 break;
524
525 default:
526 assert(!"genIntrinsic: Unsupported intrinsic");
527 unreached();
528 }
529
530 genProduceReg(treeNode);
531}
532
533//---------------------------------------------------------------------
534// genPutArgStk - generate code for a GT_PUTARG_STK node
535//
536// Arguments
537// treeNode - the GT_PUTARG_STK node
538//
539// Return value:
540// None
541//
542void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
543{
544 assert(treeNode->OperIs(GT_PUTARG_STK));
545 GenTree* source = treeNode->gtOp1;
546 var_types targetType = genActualType(source->TypeGet());
547 emitter* emit = getEmitter();
548
549 // This is the varNum for our store operations,
550 // typically this is the varNum for the Outgoing arg space
551 // When we are generating a tail call it will be the varNum for arg0
552 unsigned varNumOut = (unsigned)-1;
553 unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks
554
555 // Get argument offset to use with 'varNumOut'
556 // Here we cross check that argument offset hasn't changed from lowering to codegen since
557 // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
558 unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
559
560#ifdef DEBUG
561 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode);
562 assert(curArgTabEntry);
563 assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
564#endif // DEBUG
565
566 // Whether to setup stk arg in incoming or out-going arg area?
567 // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
568 // All other calls - stk arg is setup in out-going arg area.
569 if (treeNode->putInIncomingArgArea())
570 {
571 varNumOut = getFirstArgWithStackSlot();
572 argOffsetMax = compiler->compArgSize;
573#if FEATURE_FASTTAILCALL
574 // This must be a fast tail call.
575 assert(treeNode->gtCall->IsFastTailCall());
576
577 // Since it is a fast tail call, the existence of first incoming arg is guaranteed
578 // because fast tail call requires that in-coming arg area of caller is >= out-going
579 // arg area required for tail call.
580 LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
581 assert(varDsc != nullptr);
582#endif // FEATURE_FASTTAILCALL
583 }
584 else
585 {
586 varNumOut = compiler->lvaOutgoingArgSpaceVar;
587 argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
588 }
589
590 bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
591
592 if (!isStruct) // a normal non-Struct argument
593 {
594 if (varTypeIsSIMD(targetType))
595 {
596 assert(!source->isContained());
597
598 regNumber srcReg = genConsumeReg(source);
599
600 emitAttr storeAttr = emitTypeSize(targetType);
601
602 assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
603 emit->emitIns_S_R(INS_str, storeAttr, srcReg, varNumOut, argOffsetOut);
604
605 argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
606 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
607 return;
608 }
609
610 instruction storeIns = ins_Store(targetType);
611 emitAttr storeAttr = emitTypeSize(targetType);
612
613 // If it is contained then source must be the integer constant zero
614 if (source->isContained())
615 {
616#ifdef _TARGET_ARM64_
617 assert(source->OperGet() == GT_CNS_INT);
618 assert(source->AsIntConCommon()->IconValue() == 0);
619
620 emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
621#else // !_TARGET_ARM64_
622 // There is no zero register on ARM32
623 unreached();
624#endif // !_TARGET_ARM64
625 }
626 else
627 {
628 genConsumeReg(source);
629 emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut);
630#ifdef _TARGET_ARM_
631 if (targetType == TYP_LONG)
632 {
633 // This case currently only occurs for double types that are passed as TYP_LONG;
634 // actual long types would have been decomposed by now.
635 assert(source->IsCopyOrReload());
636 regNumber otherReg = (regNumber)source->AsCopyOrReload()->GetRegNumByIdx(1);
637 assert(otherReg != REG_NA);
638 argOffsetOut += EA_4BYTE;
639 emit->emitIns_S_R(storeIns, storeAttr, otherReg, varNumOut, argOffsetOut);
640 }
641#endif // _TARGET_ARM_
642 }
643 argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
644 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
645 }
646 else // We have some kind of a struct argument
647 {
648 assert(source->isContained()); // We expect that this node was marked as contained in Lower
649
650 if (source->OperGet() == GT_FIELD_LIST)
651 {
652 genPutArgStkFieldList(treeNode, varNumOut);
653 }
654 else // We must have a GT_OBJ or a GT_LCL_VAR
655 {
656 noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
657
658 var_types targetType = source->TypeGet();
659 noway_assert(varTypeIsStruct(targetType));
660
661 // We will copy this struct to the stack, possibly using a ldp/ldr instruction
662 // in ARM64/ARM
663 // Setup loReg (and hiReg) from the internal registers that we reserved in lower.
664 //
665 regNumber loReg = treeNode->ExtractTempReg();
666#ifdef _TARGET_ARM64_
667 regNumber hiReg = treeNode->GetSingleTempReg();
668#endif // _TARGET_ARM64_
669 regNumber addrReg = REG_NA;
670
671 GenTreeLclVarCommon* varNode = nullptr;
672 GenTree* addrNode = nullptr;
673
674 if (source->OperGet() == GT_LCL_VAR)
675 {
676 varNode = source->AsLclVarCommon();
677 }
678 else // we must have a GT_OBJ
679 {
680 assert(source->OperGet() == GT_OBJ);
681
682 addrNode = source->gtOp.gtOp1;
683
684 // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
685 //
686 if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
687 {
688 // We have a GT_OBJ(GT_LCL_VAR_ADDR)
689 //
690 // We will treat this case the same as above
691 // (i.e if we just had this GT_LCL_VAR directly as the source)
692 // so update 'source' to point this GT_LCL_VAR_ADDR node
693 // and continue to the codegen for the LCL_VAR node below
694 //
695 varNode = addrNode->AsLclVarCommon();
696 addrNode = nullptr;
697 }
698 }
699
700 // Either varNode or addrNOde must have been setup above,
701 // the xor ensures that only one of the two is setup, not both
702 assert((varNode != nullptr) ^ (addrNode != nullptr));
703
704 BYTE gcPtrArray[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
705 BYTE* gcPtrs = gcPtrArray;
706
707 unsigned gcPtrCount; // The count of GC pointers in the struct
708 int structSize;
709 bool isHfa;
710
711 // This is the varNum for our load operations,
712 // only used when we have a multireg struct with a LclVar source
713 unsigned varNumInp = BAD_VAR_NUM;
714
715#ifdef _TARGET_ARM_
716 // On ARM32, size of reference map can be larger than MAX_ARG_REG_COUNT
717 gcPtrs = treeNode->gtGcPtrs;
718 gcPtrCount = treeNode->gtNumberReferenceSlots;
719#endif
720 // Setup the structSize, isHFa, and gcPtrCount
721 if (varNode != nullptr)
722 {
723 varNumInp = varNode->gtLclNum;
724 assert(varNumInp < compiler->lvaCount);
725 LclVarDsc* varDsc = &compiler->lvaTable[varNumInp];
726
727 // This struct also must live in the stack frame
728 // And it can't live in a register (SIMD)
729 assert(varDsc->lvType == TYP_STRUCT);
730 assert(varDsc->lvOnFrame && !varDsc->lvRegister);
731
732 structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
733 // as that is how much stack is allocated for this LclVar
734 isHfa = varDsc->lvIsHfa();
735#ifdef _TARGET_ARM64_
736 gcPtrCount = varDsc->lvStructGcCount;
737 for (unsigned i = 0; i < gcPtrCount; ++i)
738 gcPtrs[i] = varDsc->lvGcLayout[i];
739#endif // _TARGET_ARM_
740 }
741 else // addrNode is used
742 {
743 assert(addrNode != nullptr);
744
745 // Generate code to load the address that we need into a register
746 genConsumeAddress(addrNode);
747 addrReg = addrNode->gtRegNum;
748
749#ifdef _TARGET_ARM64_
750 // If addrReg equal to loReg, swap(loReg, hiReg)
751 // This reduces code complexity by only supporting one addrReg overwrite case
752 if (loReg == addrReg)
753 {
754 loReg = hiReg;
755 hiReg = addrReg;
756 }
757#endif // _TARGET_ARM64_
758
759 CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
760
761 structSize = compiler->info.compCompHnd->getClassSize(objClass);
762 isHfa = compiler->IsHfa(objClass);
763#ifdef _TARGET_ARM64_
764 gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
765#endif
766 }
767
768 // If we have an HFA we can't have any GC pointers,
769 // if not then the max size for the the struct is 16 bytes
770 if (isHfa)
771 {
772 noway_assert(gcPtrCount == 0);
773 }
774#ifdef _TARGET_ARM64_
775 else
776 {
777 noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
778 }
779
780 noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
781#endif // _TARGET_ARM64_
782
783 int remainingSize = structSize;
784 unsigned structOffset = 0;
785 unsigned nextIndex = 0;
786
787#ifdef _TARGET_ARM64_
788 // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
789 // ldp x2, x3, [x0]
790 // stp x2, x3, [sp, #16]
791
792 while (remainingSize >= 2 * TARGET_POINTER_SIZE)
793 {
794 var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
795 var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
796
797 if (varNode != nullptr)
798 {
799 // Load from our varNumImp source
800 emit->emitIns_R_R_S_S(INS_ldp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumInp,
801 structOffset);
802 }
803 else
804 {
805 // check for case of destroying the addrRegister while we still need it
806 assert(loReg != addrReg);
807 noway_assert((remainingSize == 2 * TARGET_POINTER_SIZE) || (hiReg != addrReg));
808
809 // Load from our address expression source
810 emit->emitIns_R_R_R_I(INS_ldp, emitTypeSize(type0), loReg, hiReg, addrReg, structOffset,
811 INS_OPTS_NONE, emitTypeSize(type0));
812 }
813
814 // Emit stp instruction to store the two registers into the outgoing argument area
815 emit->emitIns_S_S_R_R(INS_stp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumOut,
816 argOffsetOut);
817 argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
818 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
819
820 remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct
821 structOffset += (2 * TARGET_POINTER_SIZE);
822 nextIndex += 2;
823 }
824#else // _TARGET_ARM_
825 // For a >= 4 byte structSize we will generate a ldr and str instruction each loop
826 // ldr r2, [r0]
827 // str r2, [sp, #16]
828 while (remainingSize >= TARGET_POINTER_SIZE)
829 {
830 var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
831
832 if (varNode != nullptr)
833 {
834 // Load from our varNumImp source
835 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, varNumInp, structOffset);
836 }
837 else
838 {
839 // check for case of destroying the addrRegister while we still need it
840 assert(loReg != addrReg || remainingSize == TARGET_POINTER_SIZE);
841
842 // Load from our address expression source
843 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), loReg, addrReg, structOffset);
844 }
845
846 // Emit str instruction to store the register into the outgoing argument area
847 emit->emitIns_S_R(INS_str, emitTypeSize(type), loReg, varNumOut, argOffsetOut);
848 argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
849 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
850
851 remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
852 structOffset += TARGET_POINTER_SIZE;
853 nextIndex += 1;
854 }
855#endif // _TARGET_ARM_
856
857 // For a 12-byte structSize we will we will generate two load instructions
858 // ldr x2, [x0]
859 // ldr w3, [x0, #8]
860 // str x2, [sp, #16]
861 // str w3, [sp, #24]
862
863 while (remainingSize > 0)
864 {
865 if (remainingSize >= TARGET_POINTER_SIZE)
866 {
867 var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
868 emitAttr nextAttr = emitTypeSize(nextType);
869 remainingSize -= TARGET_POINTER_SIZE;
870
871 if (varNode != nullptr)
872 {
873 // Load from our varNumImp source
874 emit->emitIns_R_S(ins_Load(nextType), nextAttr, loReg, varNumInp, structOffset);
875 }
876 else
877 {
878 assert(loReg != addrReg);
879
880 // Load from our address expression source
881 emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, loReg, addrReg, structOffset);
882 }
883 // Emit a store instruction to store the register into the outgoing argument area
884 emit->emitIns_S_R(ins_Store(nextType), nextAttr, loReg, varNumOut, argOffsetOut);
885 argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
886 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
887
888 structOffset += TARGET_POINTER_SIZE;
889 nextIndex++;
890 }
891 else // (remainingSize < TARGET_POINTER_SIZE)
892 {
893 int loadSize = remainingSize;
894 remainingSize = 0;
895
896 // We should never have to do a non-pointer sized load when we have a LclVar source
897 assert(varNode == nullptr);
898
899 // the left over size is smaller than a pointer and thus can never be a GC type
900 assert(varTypeIsGC(compiler->getJitGCType(gcPtrs[nextIndex])) == false);
901
902 var_types loadType = TYP_UINT;
903 if (loadSize == 1)
904 {
905 loadType = TYP_UBYTE;
906 }
907 else if (loadSize == 2)
908 {
909 loadType = TYP_USHORT;
910 }
911 else
912 {
913 // Need to handle additional loadSize cases here
914 noway_assert(loadSize == 4);
915 }
916
917 instruction loadIns = ins_Load(loadType);
918 emitAttr loadAttr = emitAttr(loadSize);
919
920 assert(loReg != addrReg);
921
922 emit->emitIns_R_R_I(loadIns, loadAttr, loReg, addrReg, structOffset);
923
924 // Emit a store instruction to store the register into the outgoing argument area
925 emit->emitIns_S_R(ins_Store(loadType), loadAttr, loReg, varNumOut, argOffsetOut);
926 argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
927 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
928 }
929 }
930 }
931 }
932}
933
934//---------------------------------------------------------------------
935// genPutArgReg - generate code for a GT_PUTARG_REG node
936//
937// Arguments
938// tree - the GT_PUTARG_REG node
939//
940// Return value:
941// None
942//
943void CodeGen::genPutArgReg(GenTreeOp* tree)
944{
945 assert(tree->OperIs(GT_PUTARG_REG));
946
947 var_types targetType = tree->TypeGet();
948 regNumber targetReg = tree->gtRegNum;
949
950 assert(targetType != TYP_STRUCT);
951
952 GenTree* op1 = tree->gtOp1;
953 genConsumeReg(op1);
954
955 // If child node is not already in the register we need, move it
956 if (targetReg != op1->gtRegNum)
957 {
958 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
959 }
960
961 genProduceReg(tree);
962}
963
964#if FEATURE_ARG_SPLIT
965//---------------------------------------------------------------------
966// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node
967//
968// Arguments
969// tree - the GT_PUTARG_SPLIT node
970//
971// Return value:
972// None
973//
974void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
975{
976 assert(treeNode->OperIs(GT_PUTARG_SPLIT));
977
978 GenTree* source = treeNode->gtOp1;
979 emitter* emit = getEmitter();
980 unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar;
981 unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
982 unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
983
984 if (source->OperGet() == GT_FIELD_LIST)
985 {
986 // Evaluate each of the GT_FIELD_LIST items into their register
987 // and store their register into the outgoing argument area
988 unsigned regIndex = 0;
989 for (GenTreeFieldList* fieldListPtr = source->AsFieldList(); fieldListPtr != nullptr;
990 fieldListPtr = fieldListPtr->Rest())
991 {
992 GenTree* nextArgNode = fieldListPtr->gtGetOp1();
993 regNumber fieldReg = nextArgNode->gtRegNum;
994 genConsumeReg(nextArgNode);
995
996 if (regIndex >= treeNode->gtNumRegs)
997 {
998 var_types type = nextArgNode->TypeGet();
999 emitAttr attr = emitTypeSize(type);
1000
1001 // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
1002 // argument area
1003 emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, argOffsetOut);
1004 argOffsetOut += EA_SIZE_IN_BYTES(attr);
1005 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
1006 }
1007 else
1008 {
1009 var_types type = treeNode->GetRegType(regIndex);
1010 regNumber argReg = treeNode->GetRegNumByIdx(regIndex);
1011#ifdef _TARGET_ARM_
1012 if (type == TYP_LONG)
1013 {
1014 // We should only see long fields for DOUBLEs passed in 2 integer registers, via bitcast.
1015 // All other LONGs should have been decomposed.
1016 // Handle the first INT, and then handle the 2nd below.
1017 assert(nextArgNode->OperIs(GT_BITCAST));
1018 type = TYP_INT;
1019 if (argReg != fieldReg)
1020 {
1021 inst_RV_RV(ins_Copy(type), argReg, fieldReg, type);
1022 }
1023 // Now set up the next register for the 2nd INT
1024 argReg = REG_NEXT(argReg);
1025 regIndex++;
1026 assert(argReg == treeNode->GetRegNumByIdx(regIndex));
1027 fieldReg = nextArgNode->AsMultiRegOp()->GetRegNumByIdx(1);
1028 }
1029#endif // _TARGET_ARM_
1030
1031 // If child node is not already in the register we need, move it
1032 if (argReg != fieldReg)
1033 {
1034 inst_RV_RV(ins_Copy(type), argReg, fieldReg, type);
1035 }
1036 regIndex++;
1037 }
1038 }
1039 }
1040 else
1041 {
1042 var_types targetType = source->TypeGet();
1043 assert(source->OperGet() == GT_OBJ);
1044 assert(varTypeIsStruct(targetType));
1045
1046 regNumber baseReg = treeNode->ExtractTempReg();
1047 regNumber addrReg = REG_NA;
1048
1049 GenTreeLclVarCommon* varNode = nullptr;
1050 GenTree* addrNode = nullptr;
1051
1052 addrNode = source->gtOp.gtOp1;
1053
1054 // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
1055 //
1056 if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
1057 {
1058 // We have a GT_OBJ(GT_LCL_VAR_ADDR)
1059 //
1060 // We will treat this case the same as above
1061 // (i.e if we just had this GT_LCL_VAR directly as the source)
1062 // so update 'source' to point this GT_LCL_VAR_ADDR node
1063 // and continue to the codegen for the LCL_VAR node below
1064 //
1065 varNode = addrNode->AsLclVarCommon();
1066 addrNode = nullptr;
1067 }
1068
1069 // Either varNode or addrNOde must have been setup above,
1070 // the xor ensures that only one of the two is setup, not both
1071 assert((varNode != nullptr) ^ (addrNode != nullptr));
1072
1073 // Setup the structSize, isHFa, and gcPtrCount
1074 BYTE* gcPtrs = treeNode->gtGcPtrs;
1075 unsigned gcPtrCount = treeNode->gtNumberReferenceSlots; // The count of GC pointers in the struct
1076 int structSize = treeNode->getArgSize();
1077
1078 // This is the varNum for our load operations,
1079 // only used when we have a struct with a LclVar source
1080 unsigned srcVarNum = BAD_VAR_NUM;
1081
1082 if (varNode != nullptr)
1083 {
1084 srcVarNum = varNode->gtLclNum;
1085 assert(srcVarNum < compiler->lvaCount);
1086
1087 // handle promote situation
1088 LclVarDsc* varDsc = compiler->lvaTable + srcVarNum;
1089
1090 // This struct also must live in the stack frame
1091 // And it can't live in a register (SIMD)
1092 assert(varDsc->lvType == TYP_STRUCT);
1093 assert(varDsc->lvOnFrame && !varDsc->lvRegister);
1094
1095 // We don't split HFA struct
1096 assert(!varDsc->lvIsHfa());
1097 }
1098 else // addrNode is used
1099 {
1100 assert(addrNode != nullptr);
1101
1102 // Generate code to load the address that we need into a register
1103 genConsumeAddress(addrNode);
1104 addrReg = addrNode->gtRegNum;
1105
1106 // If addrReg equal to baseReg, we use the last target register as alternative baseReg.
1107 // Because the candidate mask for the internal baseReg does not include any of the target register,
1108 // we can ensure that baseReg, addrReg, and the last target register are not all same.
1109 assert(baseReg != addrReg);
1110
1111 // We don't split HFA struct
1112 assert(!compiler->IsHfa(source->gtObj.gtClass));
1113 }
1114
1115 // Put on stack first
1116 unsigned nextIndex = treeNode->gtNumRegs;
1117 unsigned structOffset = nextIndex * TARGET_POINTER_SIZE;
1118 int remainingSize = structSize - structOffset;
1119
1120 // remainingSize is always multiple of TARGET_POINTER_SIZE
1121 assert(remainingSize % TARGET_POINTER_SIZE == 0);
1122 while (remainingSize > 0)
1123 {
1124 var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
1125
1126 if (varNode != nullptr)
1127 {
1128 // Load from our varNumImp source
1129 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), baseReg, srcVarNum, structOffset);
1130 }
1131 else
1132 {
1133 // check for case of destroying the addrRegister while we still need it
1134 assert(baseReg != addrReg);
1135
1136 // Load from our address expression source
1137 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), baseReg, addrReg, structOffset);
1138 }
1139
1140 // Emit str instruction to store the register into the outgoing argument area
1141 emit->emitIns_S_R(INS_str, emitTypeSize(type), baseReg, varNumOut, argOffsetOut);
1142 argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
1143 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
1144 remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
1145 structOffset += TARGET_POINTER_SIZE;
1146 nextIndex += 1;
1147 }
1148
1149 // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use,
1150 // in case we had to reuse the last target register for it.
1151 structOffset = 0;
1152 for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++)
1153 {
1154 regNumber targetReg = treeNode->GetRegNumByIdx(idx);
1155 var_types type = treeNode->GetRegType(idx);
1156
1157 if (varNode != nullptr)
1158 {
1159 // Load from our varNumImp source
1160 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcVarNum, structOffset);
1161 }
1162 else
1163 {
1164 // check for case of destroying the addrRegister while we still need it
1165 if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1)
1166 {
1167 assert(targetReg != baseReg);
1168 emit->emitIns_R_R(INS_mov, emitActualTypeSize(type), baseReg, addrReg);
1169 addrReg = baseReg;
1170 }
1171
1172 // Load from our address expression source
1173 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset);
1174 }
1175 structOffset += TARGET_POINTER_SIZE;
1176 }
1177 }
1178 genProduceReg(treeNode);
1179}
1180#endif // FEATURE_ARG_SPLIT
1181
1182//----------------------------------------------------------------------------------
1183// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
1184//
1185// Arguments:
1186// treeNode - Gentree of GT_STORE_LCL_VAR
1187//
1188// Return Value:
1189// None
1190//
1191// Assumption:
1192// The child of store is a multi-reg call node.
1193// genProduceReg() on treeNode is made by caller of this routine.
1194//
1195void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
1196{
1197 assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
1198
1199#if defined(_TARGET_ARM_)
1200 // Longs are returned in two return registers on Arm32.
1201 // Structs are returned in four registers on ARM32 and HFAs.
1202 assert(varTypeIsLong(treeNode) || varTypeIsStruct(treeNode));
1203#elif defined(_TARGET_ARM64_)
1204 // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs.
1205 assert(varTypeIsStruct(treeNode));
1206#endif // _TARGET_*
1207
1208 // Assumption: current implementation requires that a multi-reg
1209 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
1210 // being promoted.
1211 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
1212 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1213 noway_assert(varDsc->lvIsMultiRegRet);
1214
1215 GenTree* op1 = treeNode->gtGetOp1();
1216 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
1217 GenTreeCall* call = actualOp1->AsCall();
1218 assert(call->HasMultiRegRetVal());
1219
1220 genConsumeRegs(op1);
1221
1222 ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
1223 unsigned regCount = pRetTypeDesc->GetReturnRegCount();
1224
1225 if (treeNode->gtRegNum != REG_NA)
1226 {
1227 // Right now the only enregistrable multi-reg return types supported are SIMD types.
1228 assert(varTypeIsSIMD(treeNode));
1229 assert(regCount != 0);
1230
1231 regNumber dst = treeNode->gtRegNum;
1232
1233 // Treat dst register as a homogenous vector with element size equal to the src size
1234 // Insert pieces in reverse order
1235 for (int i = regCount - 1; i >= 0; --i)
1236 {
1237 var_types type = pRetTypeDesc->GetReturnRegType(i);
1238 regNumber reg = call->GetRegNumByIdx(i);
1239 if (op1->IsCopyOrReload())
1240 {
1241 // GT_COPY/GT_RELOAD will have valid reg for those positions
1242 // that need to be copied or reloaded.
1243 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
1244 if (reloadReg != REG_NA)
1245 {
1246 reg = reloadReg;
1247 }
1248 }
1249
1250 assert(reg != REG_NA);
1251 if (varTypeIsFloating(type))
1252 {
1253 // If the register piece was passed in a floating point register
1254 // Use a vector mov element instruction
1255 // src is not a vector, so it is in the first element reg[0]
1256 // mov dst[i], reg[0]
1257 // This effectively moves from `reg[0]` to `dst[i]`, leaving other dst bits unchanged till further
1258 // iterations
1259 // For the case where reg == dst, if we iterate so that we write dst[0] last, we eliminate the need for
1260 // a temporary
1261 getEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), dst, reg, i, 0);
1262 }
1263 else
1264 {
1265 // If the register piece was passed in an integer register
1266 // Use a vector mov from general purpose register instruction
1267 // mov dst[i], reg
1268 // This effectively moves from `reg` to `dst[i]`
1269 getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), dst, reg, i);
1270 }
1271 }
1272
1273 genProduceReg(treeNode);
1274 }
1275 else
1276 {
1277 // Stack store
1278 int offset = 0;
1279 for (unsigned i = 0; i < regCount; ++i)
1280 {
1281 var_types type = pRetTypeDesc->GetReturnRegType(i);
1282 regNumber reg = call->GetRegNumByIdx(i);
1283 if (op1->IsCopyOrReload())
1284 {
1285 // GT_COPY/GT_RELOAD will have valid reg for those positions
1286 // that need to be copied or reloaded.
1287 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
1288 if (reloadReg != REG_NA)
1289 {
1290 reg = reloadReg;
1291 }
1292 }
1293
1294 assert(reg != REG_NA);
1295 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
1296 offset += genTypeSize(type);
1297 }
1298
1299 genUpdateLife(treeNode);
1300 varDsc->lvRegNum = REG_STK;
1301 }
1302}
1303
1304//------------------------------------------------------------------------
1305// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
1306//
1307void CodeGen::genRangeCheck(GenTree* oper)
1308{
1309#ifdef FEATURE_SIMD
1310 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
1311#else // !FEATURE_SIMD
1312 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1313#endif // !FEATURE_SIMD
1314
1315 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1316
1317 GenTree* arrLen = bndsChk->gtArrLen;
1318 GenTree* arrIndex = bndsChk->gtIndex;
1319 GenTree* arrRef = NULL;
1320 int lenOffset = 0;
1321
1322 GenTree* src1;
1323 GenTree* src2;
1324 emitJumpKind jmpKind;
1325
1326 genConsumeRegs(arrIndex);
1327 genConsumeRegs(arrLen);
1328
1329 if (arrIndex->isContainedIntOrIImmed())
1330 {
1331 // To encode using a cmp immediate, we place the
1332 // constant operand in the second position
1333 src1 = arrLen;
1334 src2 = arrIndex;
1335 jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1336 }
1337 else
1338 {
1339 src1 = arrIndex;
1340 src2 = arrLen;
1341 jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1342 }
1343
1344 var_types bndsChkType = genActualType(src2->TypeGet());
1345#if DEBUG
1346 // Bounds checks can only be 32 or 64 bit sized comparisons.
1347 assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
1348
1349 // The type of the bounds check should always wide enough to compare against the index.
1350 assert(emitTypeSize(bndsChkType) >= emitActualTypeSize(src1->TypeGet()));
1351#endif // DEBUG
1352
1353 getEmitter()->emitInsBinary(INS_cmp, emitActualTypeSize(bndsChkType), src1, src2);
1354 genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
1355}
1356
1357//---------------------------------------------------------------------
1358// genCodeForPhysReg - generate code for a GT_PHYSREG node
1359//
1360// Arguments
1361// tree - the GT_PHYSREG node
1362//
1363// Return value:
1364// None
1365//
1366void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
1367{
1368 assert(tree->OperIs(GT_PHYSREG));
1369
1370 var_types targetType = tree->TypeGet();
1371 regNumber targetReg = tree->gtRegNum;
1372
1373 if (targetReg != tree->gtSrcReg)
1374 {
1375 inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType);
1376 genTransferRegGCState(targetReg, tree->gtSrcReg);
1377 }
1378
1379 genProduceReg(tree);
1380}
1381
1382//---------------------------------------------------------------------
1383// genCodeForNullCheck - generate code for a GT_NULLCHECK node
1384//
1385// Arguments
1386// tree - the GT_NULLCHECK node
1387//
1388// Return value:
1389// None
1390//
1391void CodeGen::genCodeForNullCheck(GenTreeOp* tree)
1392{
1393 assert(tree->OperIs(GT_NULLCHECK));
1394 assert(!tree->gtOp1->isContained());
1395 regNumber addrReg = genConsumeReg(tree->gtOp1);
1396
1397#ifdef _TARGET_ARM64_
1398 regNumber targetReg = REG_ZR;
1399#else
1400 regNumber targetReg = tree->GetSingleTempReg();
1401#endif
1402
1403 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0);
1404}
1405
1406//------------------------------------------------------------------------
1407// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
1408// lower bound for the given dimension.
1409//
1410// Arguments:
1411// elemType - the element type of the array
1412// rank - the rank of the array
1413// dimension - the dimension for which the lower bound offset will be returned.
1414//
1415// Return Value:
1416// The offset.
1417// TODO-Cleanup: move to CodeGenCommon.cpp
1418
1419// static
1420unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
1421{
1422 // Note that the lower bound and length fields of the Array object are always TYP_INT
1423 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
1424}
1425
1426//------------------------------------------------------------------------
1427// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
1428// size for the given dimension.
1429//
1430// Arguments:
1431// elemType - the element type of the array
1432// rank - the rank of the array
1433// dimension - the dimension for which the lower bound offset will be returned.
1434//
1435// Return Value:
1436// The offset.
1437// TODO-Cleanup: move to CodeGenCommon.cpp
1438
1439// static
1440unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
1441{
1442 // Note that the lower bound and length fields of the Array object are always TYP_INT
1443 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
1444}
1445
1446//------------------------------------------------------------------------
1447// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
1448// producing the effective index by subtracting the lower bound.
1449//
1450// Arguments:
1451// arrIndex - the node for which we're generating code
1452//
1453// Return Value:
1454// None.
1455//
1456void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
1457{
1458 emitter* emit = getEmitter();
1459 GenTree* arrObj = arrIndex->ArrObj();
1460 GenTree* indexNode = arrIndex->IndexExpr();
1461 regNumber arrReg = genConsumeReg(arrObj);
1462 regNumber indexReg = genConsumeReg(indexNode);
1463 regNumber tgtReg = arrIndex->gtRegNum;
1464 noway_assert(tgtReg != REG_NA);
1465
1466 // We will use a temp register to load the lower bound and dimension size values.
1467
1468 regNumber tmpReg = arrIndex->GetSingleTempReg();
1469 assert(tgtReg != tmpReg);
1470
1471 unsigned dim = arrIndex->gtCurrDim;
1472 unsigned rank = arrIndex->gtArrRank;
1473 var_types elemType = arrIndex->gtArrElemType;
1474 unsigned offset;
1475
1476 offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim);
1477 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
1478 emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg);
1479
1480 offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
1481 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
1482 emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg);
1483
1484 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1485 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
1486
1487 genProduceReg(arrIndex);
1488}
1489
1490//------------------------------------------------------------------------
1491// genCodeForArrOffset: Generates code to compute the flattened array offset for
1492// one dimension of an array reference:
1493// result = (prevDimOffset * dimSize) + effectiveIndex
1494// where dimSize is obtained from the arrObj operand
1495//
1496// Arguments:
1497// arrOffset - the node for which we're generating code
1498//
1499// Return Value:
1500// None.
1501//
1502// Notes:
1503// dimSize and effectiveIndex are always non-negative, the former by design,
1504// and the latter because it has been normalized to be zero-based.
1505
1506void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
1507{
1508 GenTree* offsetNode = arrOffset->gtOffset;
1509 GenTree* indexNode = arrOffset->gtIndex;
1510 regNumber tgtReg = arrOffset->gtRegNum;
1511
1512 noway_assert(tgtReg != REG_NA);
1513
1514 if (!offsetNode->IsIntegralConst(0))
1515 {
1516 emitter* emit = getEmitter();
1517 regNumber offsetReg = genConsumeReg(offsetNode);
1518 regNumber indexReg = genConsumeReg(indexNode);
1519 regNumber arrReg = genConsumeReg(arrOffset->gtArrObj);
1520 noway_assert(offsetReg != REG_NA);
1521 noway_assert(indexReg != REG_NA);
1522 noway_assert(arrReg != REG_NA);
1523
1524 regNumber tmpReg = arrOffset->GetSingleTempReg();
1525
1526 unsigned dim = arrOffset->gtCurrDim;
1527 unsigned rank = arrOffset->gtArrRank;
1528 var_types elemType = arrOffset->gtArrElemType;
1529 unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
1530
1531 // Load tmpReg with the dimension size and evaluate
1532 // tgtReg = offsetReg*tmpReg + indexReg.
1533 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset);
1534 emit->emitIns_R_R_R_R(INS_MULADD, EA_PTRSIZE, tgtReg, tmpReg, offsetReg, indexReg);
1535 }
1536 else
1537 {
1538 regNumber indexReg = genConsumeReg(indexNode);
1539 if (indexReg != tgtReg)
1540 {
1541 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
1542 }
1543 }
1544 genProduceReg(arrOffset);
1545}
1546
1547//------------------------------------------------------------------------
1548// genCodeForShift: Generates the code sequence for a GenTree node that
1549// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
1550//
1551// Arguments:
1552// tree - the bit shift node (that specifies the type of bit shift to perform).
1553//
1554// Assumptions:
1555// a) All GenTrees are register allocated.
1556//
1557void CodeGen::genCodeForShift(GenTree* tree)
1558{
1559 var_types targetType = tree->TypeGet();
1560 genTreeOps oper = tree->OperGet();
1561 instruction ins = genGetInsForOper(oper, targetType);
1562 emitAttr size = emitActualTypeSize(tree);
1563
1564 assert(tree->gtRegNum != REG_NA);
1565
1566 genConsumeOperands(tree->AsOp());
1567
1568 GenTree* operand = tree->gtGetOp1();
1569 GenTree* shiftBy = tree->gtGetOp2();
1570 if (!shiftBy->IsCnsIntOrI())
1571 {
1572 getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
1573 }
1574 else
1575 {
1576 unsigned immWidth = emitter::getBitWidth(size); // For ARM64, immWidth will be set to 32 or 64
1577 unsigned shiftByImm = (unsigned)shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
1578
1579 getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
1580 }
1581
1582 genProduceReg(tree);
1583}
1584
1585//------------------------------------------------------------------------
1586// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
1587//
1588// Arguments:
1589// tree - the node.
1590//
1591void CodeGen::genCodeForLclAddr(GenTree* tree)
1592{
1593 assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
1594
1595 var_types targetType = tree->TypeGet();
1596 regNumber targetReg = tree->gtRegNum;
1597
1598 // Address of a local var.
1599 noway_assert(targetType == TYP_BYREF);
1600
1601 inst_RV_TT(INS_lea, targetReg, tree, 0, EA_BYREF);
1602 genProduceReg(tree);
1603}
1604
1605//------------------------------------------------------------------------
1606// genCodeForLclFld: Produce code for a GT_LCL_FLD node.
1607//
1608// Arguments:
1609// tree - the GT_LCL_FLD node
1610//
1611void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
1612{
1613 assert(tree->OperIs(GT_LCL_FLD));
1614
1615 var_types targetType = tree->TypeGet();
1616 regNumber targetReg = tree->gtRegNum;
1617 emitter* emit = getEmitter();
1618
1619 NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
1620 assert(targetReg != REG_NA);
1621
1622 emitAttr size = emitTypeSize(targetType);
1623 unsigned offs = tree->gtLclOffs;
1624 unsigned varNum = tree->gtLclNum;
1625 assert(varNum < compiler->lvaCount);
1626
1627 if (varTypeIsFloating(targetType) || varTypeIsSIMD(targetType))
1628 {
1629 emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
1630 }
1631 else
1632 {
1633#ifdef _TARGET_ARM64_
1634 size = EA_SET_SIZE(size, EA_8BYTE);
1635#endif // _TARGET_ARM64_
1636 emit->emitIns_R_S(ins_Move_Extend(targetType, false), size, targetReg, varNum, offs);
1637 }
1638
1639 genProduceReg(tree);
1640}
1641
1642//------------------------------------------------------------------------
1643// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
1644//
1645// Arguments:
1646// tree - the GT_INDEX_ADDR node
1647//
1648void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
1649{
1650 GenTree* const base = node->Arr();
1651 GenTree* const index = node->Index();
1652
1653 genConsumeReg(base);
1654 genConsumeReg(index);
1655
1656 // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers
1657 // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the
1658 // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until
1659 // we are finished generating the code for this node.
1660
1661 gcInfo.gcMarkRegPtrVal(base->gtRegNum, base->TypeGet());
1662 assert(!varTypeIsGC(index->TypeGet()));
1663
1664 const regNumber tmpReg = node->GetSingleTempReg();
1665
1666 // Generate the bounds check if necessary.
1667 if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
1668 {
1669 // Create a GT_IND(GT_LEA)) tree for the array length access and load the length into a register.
1670 GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, static_cast<unsigned>(node->gtLenOffset));
1671 arrLenAddr.gtRegNum = REG_NA;
1672 arrLenAddr.SetContained();
1673
1674 GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr);
1675 arrLen.gtRegNum = tmpReg;
1676 arrLen.ClearContained();
1677
1678 getEmitter()->emitInsLoadStoreOp(ins_Load(TYP_INT), emitTypeSize(TYP_INT), arrLen.gtRegNum, &arrLen);
1679
1680#ifdef _TARGET_64BIT_
1681 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index
1682 // is a native int on a 64-bit platform, we will need to widen the array length and the compare.
1683 if (index->TypeGet() == TYP_I_IMPL)
1684 {
1685 // Extend the array length as needed.
1686 getEmitter()->emitIns_R_R(ins_Move_Extend(TYP_INT, true), EA_8BYTE, arrLen.gtRegNum, arrLen.gtRegNum);
1687 }
1688#endif
1689
1690 // Generate the range check.
1691 getEmitter()->emitInsBinary(INS_cmp, emitActualTypeSize(TYP_I_IMPL), index, &arrLen);
1692 genJumpToThrowHlpBlk(genJumpKindForOper(GT_GE, CK_UNSIGNED), SCK_RNGCHK_FAIL, node->gtIndRngFailBB);
1693 }
1694
1695 // Can we use a ScaledAdd instruction?
1696 //
1697 if (isPow2(node->gtElemSize) && (node->gtElemSize <= 32768))
1698 {
1699 DWORD scale;
1700 BitScanForward(&scale, node->gtElemSize);
1701
1702 // dest = base + index * scale
1703 genScaledAdd(emitActualTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum, scale);
1704 }
1705 else // we have to load the element size and use a MADD (multiply-add) instruction
1706 {
1707 // tmpReg = element size
1708 CodeGen::genSetRegToIcon(tmpReg, (ssize_t)node->gtElemSize, TYP_INT);
1709
1710 // dest = index * tmpReg + base
1711 getEmitter()->emitIns_R_R_R_R(INS_MULADD, emitActualTypeSize(node), node->gtRegNum, index->gtRegNum, tmpReg,
1712 base->gtRegNum);
1713 }
1714
1715 // dest = dest + elemOffs
1716 getEmitter()->emitIns_R_R_I(INS_add, emitActualTypeSize(node), node->gtRegNum, node->gtRegNum, node->gtElemOffset);
1717
1718 gcInfo.gcMarkRegSetNpt(base->gtGetRegMask());
1719
1720 genProduceReg(node);
1721}
1722
1723//------------------------------------------------------------------------
1724// genCodeForIndir: Produce code for a GT_IND node.
1725//
1726// Arguments:
1727// tree - the GT_IND node
1728//
1729void CodeGen::genCodeForIndir(GenTreeIndir* tree)
1730{
1731 assert(tree->OperIs(GT_IND));
1732
1733 var_types targetType = tree->TypeGet();
1734 regNumber targetReg = tree->gtRegNum;
1735 emitter* emit = getEmitter();
1736 emitAttr attr = emitTypeSize(tree);
1737 instruction ins = ins_Load(targetType);
1738
1739#ifdef FEATURE_SIMD
1740 // Handling of Vector3 type values loaded through indirection.
1741 if (tree->TypeGet() == TYP_SIMD12)
1742 {
1743 genLoadIndTypeSIMD12(tree);
1744 return;
1745 }
1746#endif // FEATURE_SIMD
1747
1748 genConsumeAddress(tree->Addr());
1749 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
1750 {
1751 bool isAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);
1752
1753 assert((attr != EA_1BYTE) || isAligned);
1754
1755#ifdef _TARGET_ARM64_
1756 GenTree* addr = tree->Addr();
1757 bool useLoadAcquire = genIsValidIntReg(targetReg) && !addr->isContained() &&
1758 (varTypeIsUnsigned(targetType) || varTypeIsI(targetType)) &&
1759 !(tree->gtFlags & GTF_IND_UNALIGNED);
1760
1761 if (useLoadAcquire)
1762 {
1763 switch (EA_SIZE(attr))
1764 {
1765 case EA_1BYTE:
1766 assert(ins == INS_ldrb);
1767 ins = INS_ldarb;
1768 break;
1769 case EA_2BYTE:
1770 assert(ins == INS_ldrh);
1771 ins = INS_ldarh;
1772 break;
1773 case EA_4BYTE:
1774 case EA_8BYTE:
1775 assert(ins == INS_ldr);
1776 ins = INS_ldar;
1777 break;
1778 default:
1779 assert(false); // We should not get here
1780 }
1781 }
1782
1783 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1784
1785 if (!useLoadAcquire) // issue a INS_BARRIER_OSHLD after a volatile LdInd operation
1786 instGen_MemoryBarrier(INS_BARRIER_OSHLD);
1787#else
1788 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1789
1790 // issue a full memory barrier after a volatile LdInd operation
1791 instGen_MemoryBarrier();
1792#endif // _TARGET_ARM64_
1793 }
1794 else
1795 {
1796 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1797 }
1798
1799 genProduceReg(tree);
1800}
1801
1802// Generate code for a CpBlk node by the means of the VM memcpy helper call
1803// Preconditions:
1804// a) The size argument of the CpBlk is not an integer constant
1805// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
1806void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
1807{
1808 // Make sure we got the arguments of the cpblk operation in the right registers
1809 unsigned blockSize = cpBlkNode->Size();
1810 GenTree* dstAddr = cpBlkNode->Addr();
1811 assert(!dstAddr->isContained());
1812
1813 genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
1814
1815#ifdef _TARGET_ARM64_
1816 if (blockSize != 0)
1817 {
1818 assert(blockSize > CPBLK_UNROLL_LIMIT);
1819 }
1820#endif // _TARGET_ARM64_
1821
1822 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1823 {
1824 // issue a full memory barrier before a volatile CpBlk operation
1825 instGen_MemoryBarrier();
1826 }
1827
1828 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
1829
1830 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1831 {
1832#ifdef _TARGET_ARM64_
1833 // issue a INS_BARRIER_ISHLD after a volatile CpBlk operation
1834 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
1835#else
1836 // issue a full memory barrier after a volatile CpBlk operation
1837 instGen_MemoryBarrier();
1838#endif // _TARGET_ARM64_
1839 }
1840}
1841
1842//----------------------------------------------------------------------------------
1843// genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll
1844//
1845// Arguments:
1846// cpBlkNode - Copy block node
1847//
1848// Return Value:
1849// None
1850//
1851// Assumption:
1852// The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes.
1853//
1854void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
1855{
1856 // Make sure we got the arguments of the cpblk operation in the right registers
1857 unsigned size = cpBlkNode->Size();
1858 GenTree* dstAddr = cpBlkNode->Addr();
1859 GenTree* source = cpBlkNode->Data();
1860 GenTree* srcAddr = nullptr;
1861
1862 assert((size != 0) && (size <= CPBLK_UNROLL_LIMIT));
1863
1864 emitter* emit = getEmitter();
1865
1866 if (dstAddr->isUsedFromReg())
1867 {
1868 genConsumeReg(dstAddr);
1869 }
1870
1871 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1872 {
1873 // issue a full memory barrier before a volatile CpBlkUnroll operation
1874 instGen_MemoryBarrier();
1875 }
1876
1877 if (source->gtOper == GT_IND)
1878 {
1879 srcAddr = source->gtGetOp1();
1880 if (srcAddr->isUsedFromReg())
1881 {
1882 genConsumeReg(srcAddr);
1883 }
1884 }
1885 else
1886 {
1887 noway_assert(source->IsLocal());
1888 // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
1889 // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
1890 if (source->OperGet() == GT_LCL_VAR)
1891 {
1892 source->SetOper(GT_LCL_VAR_ADDR);
1893 }
1894 else
1895 {
1896 assert(source->OperGet() == GT_LCL_FLD);
1897 source->SetOper(GT_LCL_FLD_ADDR);
1898 }
1899 srcAddr = source;
1900 }
1901
1902 unsigned offset = 0;
1903
1904 // Grab the integer temp register to emit the loads and stores.
1905 regNumber tmpReg = cpBlkNode->ExtractTempReg(RBM_ALLINT);
1906
1907#ifdef _TARGET_ARM64_
1908 if (size >= 2 * REGSIZE_BYTES)
1909 {
1910 regNumber tmp2Reg = cpBlkNode->ExtractTempReg(RBM_ALLINT);
1911
1912 size_t slots = size / (2 * REGSIZE_BYTES);
1913
1914 while (slots-- > 0)
1915 {
1916 // Load
1917 genCodeForLoadPairOffset(tmpReg, tmp2Reg, srcAddr, offset);
1918 // Store
1919 genCodeForStorePairOffset(tmpReg, tmp2Reg, dstAddr, offset);
1920 offset += 2 * REGSIZE_BYTES;
1921 }
1922 }
1923
1924 // Fill the remainder (15 bytes or less) if there's one.
1925 if ((size & 0xf) != 0)
1926 {
1927 if ((size & 8) != 0)
1928 {
1929 genCodeForLoadOffset(INS_ldr, EA_8BYTE, tmpReg, srcAddr, offset);
1930 genCodeForStoreOffset(INS_str, EA_8BYTE, tmpReg, dstAddr, offset);
1931 offset += 8;
1932 }
1933 if ((size & 4) != 0)
1934 {
1935 genCodeForLoadOffset(INS_ldr, EA_4BYTE, tmpReg, srcAddr, offset);
1936 genCodeForStoreOffset(INS_str, EA_4BYTE, tmpReg, dstAddr, offset);
1937 offset += 4;
1938 }
1939 if ((size & 2) != 0)
1940 {
1941 genCodeForLoadOffset(INS_ldrh, EA_2BYTE, tmpReg, srcAddr, offset);
1942 genCodeForStoreOffset(INS_strh, EA_2BYTE, tmpReg, dstAddr, offset);
1943 offset += 2;
1944 }
1945 if ((size & 1) != 0)
1946 {
1947 genCodeForLoadOffset(INS_ldrb, EA_1BYTE, tmpReg, srcAddr, offset);
1948 genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset);
1949 }
1950 }
1951#else // !_TARGET_ARM64_
1952 size_t slots = size / REGSIZE_BYTES;
1953 while (slots-- > 0)
1954 {
1955 genCodeForLoadOffset(INS_ldr, EA_4BYTE, tmpReg, srcAddr, offset);
1956 genCodeForStoreOffset(INS_str, EA_4BYTE, tmpReg, dstAddr, offset);
1957 offset += REGSIZE_BYTES;
1958 }
1959
1960 // Fill the remainder (3 bytes or less) if there's one.
1961 if ((size & 0x03) != 0)
1962 {
1963 if ((size & 2) != 0)
1964 {
1965 genCodeForLoadOffset(INS_ldrh, EA_2BYTE, tmpReg, srcAddr, offset);
1966 genCodeForStoreOffset(INS_strh, EA_2BYTE, tmpReg, dstAddr, offset);
1967 offset += 2;
1968 }
1969 if ((size & 1) != 0)
1970 {
1971 genCodeForLoadOffset(INS_ldrb, EA_1BYTE, tmpReg, srcAddr, offset);
1972 genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset);
1973 }
1974 }
1975#endif // !_TARGET_ARM64_
1976
1977 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1978 {
1979#ifdef _TARGET_ARM64_
1980 // issue a INS_BARRIER_ISHLD after a volatile CpBlkUnroll operation
1981 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
1982#else
1983 // issue a full memory barrier after a volatile CpBlk operation
1984 instGen_MemoryBarrier();
1985#endif // !_TARGET_ARM64_
1986 }
1987}
1988
1989// Generates code for InitBlk by calling the VM memset helper function.
1990// Preconditions:
1991// a) The size argument of the InitBlk is not an integer constant.
1992// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
1993void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
1994{
1995 unsigned size = initBlkNode->Size();
1996 GenTree* dstAddr = initBlkNode->Addr();
1997 GenTree* initVal = initBlkNode->Data();
1998 if (initVal->OperIsInitVal())
1999 {
2000 initVal = initVal->gtGetOp1();
2001 }
2002
2003 assert(!dstAddr->isContained());
2004 assert(!initVal->isContained());
2005
2006#ifdef _TARGET_ARM64_
2007 if (size != 0)
2008 {
2009 assert((size > INITBLK_UNROLL_LIMIT) || !initVal->IsCnsIntOrI());
2010 }
2011#endif // _TARGET_ARM64_
2012
2013 genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
2014
2015 if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
2016 {
2017 // issue a full memory barrier before a volatile initBlock Operation
2018 instGen_MemoryBarrier();
2019 }
2020
2021 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
2022}
2023
2024// Generate code for a load from some address + offset
2025// base: tree node which can be either a local address or arbitrary node
2026// offset: distance from the base from which to load
2027void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
2028{
2029 emitter* emit = getEmitter();
2030
2031 if (base->OperIsLocalAddr())
2032 {
2033 if (base->gtOper == GT_LCL_FLD_ADDR)
2034 offset += base->gtLclFld.gtLclOffs;
2035 emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
2036 }
2037 else
2038 {
2039 emit->emitIns_R_R_I(ins, size, dst, base->gtRegNum, offset);
2040 }
2041}
2042
2043// Generate code for a store to some address + offset
2044// base: tree node which can be either a local address or arbitrary node
2045// offset: distance from the base from which to load
2046void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
2047{
2048 emitter* emit = getEmitter();
2049
2050 if (base->OperIsLocalAddr())
2051 {
2052 if (base->gtOper == GT_LCL_FLD_ADDR)
2053 offset += base->gtLclFld.gtLclOffs;
2054 emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
2055 }
2056 else
2057 {
2058 emit->emitIns_R_R_I(ins, size, src, base->gtRegNum, offset);
2059 }
2060}
2061
2062//------------------------------------------------------------------------
2063// genRegCopy: Produce code for a GT_COPY node.
2064//
2065// Arguments:
2066// tree - the GT_COPY node
2067//
2068// Notes:
2069// This will copy the register(s) produced by this node's source, to
2070// the register(s) allocated to this GT_COPY node.
2071// It has some special handling for these cases:
2072// - when the source and target registers are in different register files
2073// (note that this is *not* a conversion).
2074// - when the source is a lclVar whose home location is being moved to a new
2075// register (rather than just being copied for temporary use).
2076//
2077void CodeGen::genRegCopy(GenTree* treeNode)
2078{
2079 assert(treeNode->OperGet() == GT_COPY);
2080 GenTree* op1 = treeNode->gtOp.gtOp1;
2081
2082 regNumber sourceReg = genConsumeReg(op1);
2083
2084 if (op1->IsMultiRegNode())
2085 {
2086 noway_assert(!op1->IsCopyOrReload());
2087 unsigned regCount = op1->GetMultiRegCount();
2088 for (unsigned i = 0; i < regCount; i++)
2089 {
2090 regNumber srcReg = op1->GetRegByIndex(i);
2091 regNumber tgtReg = treeNode->AsCopyOrReload()->GetRegNumByIdx(i);
2092 var_types regType = op1->GetRegTypeByIndex(i);
2093 inst_RV_RV(ins_Copy(regType), tgtReg, srcReg, regType);
2094 }
2095 }
2096 else
2097 {
2098 var_types targetType = treeNode->TypeGet();
2099 regNumber targetReg = treeNode->gtRegNum;
2100 assert(targetReg != REG_NA);
2101 assert(targetType != TYP_STRUCT);
2102
2103 // Check whether this node and the node from which we're copying the value have the same
2104 // register type.
2105 // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
2106 // register, in which case it is passed as an argument, or returned from a call,
2107 // in an integer register and must be copied if it's in a floating point register.
2108
2109 bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
2110 bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
2111 if (srcFltReg != tgtFltReg)
2112 {
2113#ifdef _TARGET_ARM64_
2114 inst_RV_RV(INS_fmov, targetReg, sourceReg, targetType);
2115#else // !_TARGET_ARM64_
2116 if (varTypeIsFloating(treeNode))
2117 {
2118 // GT_COPY from 'int' to 'float' currently can't happen. Maybe if ARM SIMD is implemented
2119 // it will happen, according to the comment above?
2120 NYI_ARM("genRegCopy from 'int' to 'float'");
2121 }
2122 else
2123 {
2124 assert(varTypeIsFloating(op1));
2125
2126 if (op1->TypeGet() == TYP_FLOAT)
2127 {
2128 inst_RV_RV(INS_vmov_f2i, targetReg, genConsumeReg(op1), targetType);
2129 }
2130 else
2131 {
2132 regNumber otherReg = (regNumber)treeNode->AsCopyOrReload()->gtOtherRegs[0];
2133 assert(otherReg != REG_NA);
2134 inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, genConsumeReg(op1), EA_8BYTE);
2135 }
2136 }
2137#endif // !_TARGET_ARM64_
2138 }
2139 else
2140 {
2141 inst_RV_RV(ins_Copy(targetType), targetReg, sourceReg, targetType);
2142 }
2143 }
2144
2145 if (op1->IsLocal())
2146 {
2147 // The lclVar will never be a def.
2148 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
2149 // appropriately set the gcInfo for the copied value.
2150 // If not, there are two cases we need to handle:
2151 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
2152 // will remain live in its original register.
2153 // genProduceReg() will appropriately set the gcInfo for the copied value,
2154 // and genConsumeReg will reset it.
2155 // - Otherwise, we need to update register info for the lclVar.
2156
2157 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
2158 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
2159
2160 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
2161 {
2162 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
2163
2164 // If we didn't just spill it (in genConsumeReg, above), then update the register info
2165 if (varDsc->lvRegNum != REG_STK)
2166 {
2167 // The old location is dying
2168 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
2169
2170 gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
2171
2172 genUpdateVarReg(varDsc, treeNode);
2173
2174 // The new location is going live
2175 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
2176 }
2177 }
2178 }
2179
2180 genProduceReg(treeNode);
2181}
2182
2183//------------------------------------------------------------------------
2184// genCallInstruction: Produce code for a GT_CALL node
2185//
2186void CodeGen::genCallInstruction(GenTreeCall* call)
2187{
2188 gtCallTypes callType = (gtCallTypes)call->gtCallType;
2189
2190 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
2191
2192 // all virtuals should have been expanded into a control expression
2193 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
2194
2195 // Consume all the arg regs
2196 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
2197 {
2198 assert(list->OperIsList());
2199
2200 GenTree* argNode = list->Current();
2201
2202 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
2203 assert(curArgTabEntry);
2204
2205 // GT_RELOAD/GT_COPY use the child node
2206 argNode = argNode->gtSkipReloadOrCopy();
2207
2208 if (curArgTabEntry->regNum == REG_STK)
2209 continue;
2210
2211 // Deal with multi register passed struct args.
2212 if (argNode->OperGet() == GT_FIELD_LIST)
2213 {
2214 GenTreeArgList* argListPtr = argNode->AsArgList();
2215 unsigned iterationNum = 0;
2216 regNumber argReg = curArgTabEntry->regNum;
2217 for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
2218 {
2219 GenTree* putArgRegNode = argListPtr->gtOp.gtOp1;
2220 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
2221
2222 genConsumeReg(putArgRegNode);
2223
2224 if (putArgRegNode->gtRegNum != argReg)
2225 {
2226 inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), true), argReg, putArgRegNode->gtRegNum);
2227 }
2228
2229 argReg = genRegArgNext(argReg);
2230
2231#if defined(_TARGET_ARM_)
2232 // A double register is modelled as an even-numbered single one
2233 if (putArgRegNode->TypeGet() == TYP_DOUBLE)
2234 {
2235 argReg = genRegArgNext(argReg);
2236 }
2237#endif // _TARGET_ARM_
2238 }
2239 }
2240#if FEATURE_ARG_SPLIT
2241 else if (curArgTabEntry->isSplit)
2242 {
2243 assert(curArgTabEntry->numRegs >= 1);
2244 genConsumeArgSplitStruct(argNode->AsPutArgSplit());
2245 for (unsigned idx = 0; idx < curArgTabEntry->numRegs; idx++)
2246 {
2247 regNumber argReg = (regNumber)((unsigned)curArgTabEntry->regNum + idx);
2248 regNumber allocReg = argNode->AsPutArgSplit()->GetRegNumByIdx(idx);
2249 if (argReg != allocReg)
2250 {
2251 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, allocReg);
2252 }
2253 }
2254 }
2255#endif // FEATURE_ARG_SPLIT
2256 else
2257 {
2258 regNumber argReg = curArgTabEntry->regNum;
2259 genConsumeReg(argNode);
2260 if (argNode->gtRegNum != argReg)
2261 {
2262 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->gtRegNum);
2263 }
2264 }
2265 }
2266
2267 // Insert a null check on "this" pointer if asked.
2268 if (call->NeedsNullCheck())
2269 {
2270 const regNumber regThis = genGetThisArgReg(call);
2271
2272#if defined(_TARGET_ARM_)
2273 const regNumber tmpReg = call->ExtractTempReg();
2274 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0);
2275#elif defined(_TARGET_ARM64_)
2276 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
2277#endif // _TARGET_*
2278 }
2279
2280 // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper
2281 // method.
2282 CORINFO_METHOD_HANDLE methHnd;
2283 GenTree* target = call->gtControlExpr;
2284 if (callType == CT_INDIRECT)
2285 {
2286 assert(target == nullptr);
2287 target = call->gtCallAddr;
2288 methHnd = nullptr;
2289 }
2290 else
2291 {
2292 methHnd = call->gtCallMethHnd;
2293 }
2294
2295 CORINFO_SIG_INFO* sigInfo = nullptr;
2296#ifdef DEBUG
2297 // Pass the call signature information down into the emitter so the emitter can associate
2298 // native call sites with the signatures they were generated from.
2299 if (callType != CT_HELPER)
2300 {
2301 sigInfo = call->callSig;
2302 }
2303#endif // DEBUG
2304
2305 // If fast tail call, then we are done. In this case we setup the args (both reg args
2306 // and stack args in incoming arg area) and call target. Epilog sequence would
2307 // generate "br <reg>".
2308 if (call->IsFastTailCall())
2309 {
2310 // Don't support fast tail calling JIT helpers
2311 assert(callType != CT_HELPER);
2312
2313 // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
2314 assert(target != nullptr);
2315
2316 genConsumeReg(target);
2317
2318 // Use IP0 on ARM64 and R12 on ARM32 as the call target register.
2319 if (target->gtRegNum != REG_FASTTAILCALL_TARGET)
2320 {
2321 inst_RV_RV(INS_mov, REG_FASTTAILCALL_TARGET, target->gtRegNum);
2322 }
2323
2324 return;
2325 }
2326
2327 // For a pinvoke to unmanaged code we emit a label to clear
2328 // the GC pointer state before the callsite.
2329 // We can't utilize the typical lazy killing of GC pointers
2330 // at (or inside) the callsite.
2331 if (compiler->killGCRefs(call))
2332 {
2333 genDefineTempLabel(genCreateTempLabel());
2334 }
2335
2336 // Determine return value size(s).
2337 ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
2338 emitAttr retSize = EA_PTRSIZE;
2339 emitAttr secondRetSize = EA_UNKNOWN;
2340
2341 if (call->HasMultiRegRetVal())
2342 {
2343 retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
2344 secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
2345 }
2346 else
2347 {
2348 assert(!varTypeIsStruct(call));
2349
2350 if (call->gtType == TYP_REF)
2351 {
2352 retSize = EA_GCREF;
2353 }
2354 else if (call->gtType == TYP_BYREF)
2355 {
2356 retSize = EA_BYREF;
2357 }
2358 }
2359
2360 // We need to propagate the IL offset information to the call instruction, so we can emit
2361 // an IL to native mapping record for the call, to support managed return value debugging.
2362 // We don't want tail call helper calls that were converted from normal calls to get a record,
2363 // so we skip this hash table lookup logic in that case.
2364 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
2365 {
2366 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
2367 }
2368
2369 if (target != nullptr)
2370 {
2371 // A call target can not be a contained indirection
2372 assert(!target->isContainedIndir());
2373
2374 genConsumeReg(target);
2375
2376 // We have already generated code for gtControlExpr evaluating it into a register.
2377 // We just need to emit "call reg" in this case.
2378 //
2379 assert(genIsValidIntReg(target->gtRegNum));
2380
2381 genEmitCall(emitter::EC_INDIR_R, methHnd,
2382 INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
2383 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, target->gtRegNum);
2384 }
2385 else
2386 {
2387 // Generate a direct call to a non-virtual user defined or helper method
2388 assert(callType == CT_HELPER || callType == CT_USER_FUNC);
2389
2390 void* addr = nullptr;
2391#ifdef FEATURE_READYTORUN_COMPILER
2392 if (call->gtEntryPoint.addr != NULL)
2393 {
2394 assert(call->gtEntryPoint.accessType == IAT_VALUE);
2395 addr = call->gtEntryPoint.addr;
2396 }
2397 else
2398#endif // FEATURE_READYTORUN_COMPILER
2399 if (callType == CT_HELPER)
2400 {
2401 CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
2402 noway_assert(helperNum != CORINFO_HELP_UNDEF);
2403
2404 void* pAddr = nullptr;
2405 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
2406 assert(pAddr == nullptr);
2407 }
2408 else
2409 {
2410 // Direct call to a non-virtual user function.
2411 addr = call->gtDirectCallAddress;
2412 }
2413
2414 assert(addr != nullptr);
2415
2416// Non-virtual direct call to known addresses
2417#ifdef _TARGET_ARM_
2418 if (!arm_Valid_Imm_For_BL((ssize_t)addr))
2419 {
2420 regNumber tmpReg = call->GetSingleTempReg();
2421 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr);
2422 genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg);
2423 }
2424 else
2425#endif // _TARGET_ARM_
2426 {
2427 genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr,
2428 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
2429 }
2430
2431#if 0 && defined(_TARGET_ARM64_)
2432 // Use this path if you want to load an absolute call target using
2433 // a sequence of movs followed by an indirect call (blr instruction)
2434 // If this path is enabled, we need to ensure that REG_IP0 is assigned during Lowering.
2435
2436 // Load the call target address in x16
2437 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
2438
2439 // indirect call to constant address in IP0
2440 genEmitCall(emitter::EC_INDIR_R,
2441 methHnd,
2442 INDEBUG_LDISASM_COMMA(sigInfo)
2443 nullptr, //addr
2444 retSize,
2445 secondRetSize,
2446 ilOffset,
2447 REG_IP0);
2448#endif
2449 }
2450
2451 // if it was a pinvoke we may have needed to get the address of a label
2452 if (genPendingCallLabel)
2453 {
2454 assert(call->IsUnmanaged());
2455 genDefineTempLabel(genPendingCallLabel);
2456 genPendingCallLabel = nullptr;
2457 }
2458
2459 // Update GC info:
2460 // All Callee arg registers are trashed and no longer contain any GC pointers.
2461 // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
2462 // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
2463 // registers from RBM_CALLEE_TRASH
2464 assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
2465 assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
2466 gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
2467 gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
2468
2469 var_types returnType = call->TypeGet();
2470 if (returnType != TYP_VOID)
2471 {
2472 regNumber returnReg;
2473
2474 if (call->HasMultiRegRetVal())
2475 {
2476 assert(pRetTypeDesc != nullptr);
2477 unsigned regCount = pRetTypeDesc->GetReturnRegCount();
2478
2479 // If regs allocated to call node are different from ABI return
2480 // regs in which the call has returned its result, move the result
2481 // to regs allocated to call node.
2482 for (unsigned i = 0; i < regCount; ++i)
2483 {
2484 var_types regType = pRetTypeDesc->GetReturnRegType(i);
2485 returnReg = pRetTypeDesc->GetABIReturnReg(i);
2486 regNumber allocatedReg = call->GetRegNumByIdx(i);
2487 if (returnReg != allocatedReg)
2488 {
2489 inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
2490 }
2491 }
2492 }
2493 else
2494 {
2495#ifdef _TARGET_ARM_
2496 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
2497 {
2498 // The CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
2499 // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
2500 returnReg = REG_PINVOKE_TCB;
2501 }
2502 else
2503#endif // _TARGET_ARM_
2504 if (varTypeIsFloating(returnType) && !compiler->opts.compUseSoftFP)
2505 {
2506 returnReg = REG_FLOATRET;
2507 }
2508 else
2509 {
2510 returnReg = REG_INTRET;
2511 }
2512
2513 if (call->gtRegNum != returnReg)
2514 {
2515#ifdef _TARGET_ARM_
2516 if (compiler->opts.compUseSoftFP && returnType == TYP_DOUBLE)
2517 {
2518 inst_RV_RV_RV(INS_vmov_i2d, call->gtRegNum, returnReg, genRegArgNext(returnReg), EA_8BYTE);
2519 }
2520 else if (compiler->opts.compUseSoftFP && returnType == TYP_FLOAT)
2521 {
2522 inst_RV_RV(INS_vmov_i2f, call->gtRegNum, returnReg, returnType);
2523 }
2524 else
2525#endif
2526 {
2527 inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
2528 }
2529 }
2530 }
2531
2532 genProduceReg(call);
2533 }
2534
2535 // If there is nothing next, that means the result is thrown away, so this value is not live.
2536 // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
2537 if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
2538 {
2539 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
2540 }
2541}
2542
2543// Produce code for a GT_JMP node.
2544// The arguments of the caller needs to be transferred to the callee before exiting caller.
2545// The actual jump to callee is generated as part of caller epilog sequence.
2546// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
2547void CodeGen::genJmpMethod(GenTree* jmp)
2548{
2549 assert(jmp->OperGet() == GT_JMP);
2550 assert(compiler->compJmpOpUsed);
2551
2552 // If no arguments, nothing to do
2553 if (compiler->info.compArgsCount == 0)
2554 {
2555 return;
2556 }
2557
2558 // Make sure register arguments are in their initial registers
2559 // and stack arguments are put back as well.
2560 unsigned varNum;
2561 LclVarDsc* varDsc;
2562
2563 // First move any en-registered stack arguments back to the stack.
2564 // At the same time any reg arg not in correct reg is moved back to its stack location.
2565 //
2566 // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
2567 // But that would require us to deal with circularity while moving values around. Spilling
2568 // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
2569 // are not frequent.
2570 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
2571 {
2572 varDsc = compiler->lvaTable + varNum;
2573
2574 if (varDsc->lvPromoted)
2575 {
2576 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
2577
2578 unsigned fieldVarNum = varDsc->lvFieldLclStart;
2579 varDsc = compiler->lvaTable + fieldVarNum;
2580 }
2581 noway_assert(varDsc->lvIsParam);
2582
2583 if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
2584 {
2585 // Skip reg args which are already in its right register for jmp call.
2586 // If not, we will spill such args to their stack locations.
2587 //
2588 // If we need to generate a tail call profiler hook, then spill all
2589 // arg regs to free them up for the callback.
2590 if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
2591 continue;
2592 }
2593 else if (varDsc->lvRegNum == REG_STK)
2594 {
2595 // Skip args which are currently living in stack.
2596 continue;
2597 }
2598
2599 // If we came here it means either a reg argument not in the right register or
2600 // a stack argument currently living in a register. In either case the following
2601 // assert should hold.
2602 assert(varDsc->lvRegNum != REG_STK);
2603 assert(varDsc->TypeGet() != TYP_STRUCT);
2604 var_types storeType = genActualType(varDsc->TypeGet());
2605 emitAttr storeSize = emitActualTypeSize(storeType);
2606
2607#ifdef _TARGET_ARM_
2608 if (varDsc->TypeGet() == TYP_LONG)
2609 {
2610 // long - at least the low half must be enregistered
2611 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
2612
2613 // Is the upper half also enregistered?
2614 if (varDsc->lvOtherReg != REG_STK)
2615 {
2616 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
2617 }
2618 }
2619 else
2620#endif // _TARGET_ARM_
2621 {
2622 getEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->lvRegNum, varNum, 0);
2623 }
2624 // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
2625 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
2626 // Therefore manually update life of varDsc->lvRegNum.
2627 regMaskTP tempMask = genRegMask(varDsc->lvRegNum);
2628 regSet.RemoveMaskVars(tempMask);
2629 gcInfo.gcMarkRegSetNpt(tempMask);
2630 if (compiler->lvaIsGCTracked(varDsc))
2631 {
2632 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2633 }
2634 }
2635
2636#ifdef PROFILING_SUPPORTED
2637 // At this point all arg regs are free.
2638 // Emit tail call profiler callback.
2639 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
2640#endif
2641
2642 // Next move any un-enregistered register arguments back to their register.
2643 regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
2644 unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
2645 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
2646 {
2647 varDsc = compiler->lvaTable + varNum;
2648 if (varDsc->lvPromoted)
2649 {
2650 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
2651
2652 unsigned fieldVarNum = varDsc->lvFieldLclStart;
2653 varDsc = compiler->lvaTable + fieldVarNum;
2654 }
2655 noway_assert(varDsc->lvIsParam);
2656
2657 // Skip if arg not passed in a register.
2658 if (!varDsc->lvIsRegArg)
2659 continue;
2660
2661 // Register argument
2662 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
2663
2664 // Is register argument already in the right register?
2665 // If not load it from its stack location.
2666 regNumber argReg = varDsc->lvArgReg; // incoming arg register
2667 regNumber argRegNext = REG_NA;
2668
2669#ifdef _TARGET_ARM64_
2670 if (varDsc->lvRegNum != argReg)
2671 {
2672 var_types loadType = TYP_UNDEF;
2673 if (varTypeIsStruct(varDsc))
2674 {
2675 // Must be <= 16 bytes or else it wouldn't be passed in registers
2676 noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= MAX_PASS_MULTIREG_BYTES);
2677 loadType = compiler->getJitGCType(varDsc->lvGcLayout[0]);
2678 }
2679 else
2680 {
2681 loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
2682 }
2683 emitAttr loadSize = emitActualTypeSize(loadType);
2684 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
2685
2686 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
2687 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
2688 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
2689 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
2690 regSet.AddMaskVars(genRegMask(argReg));
2691 gcInfo.gcMarkRegPtrVal(argReg, loadType);
2692
2693 if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
2694 {
2695 if (varDsc->lvIsHfa())
2696 {
2697 NYI_ARM64("CodeGen::genJmpMethod with multireg HFA arg");
2698 }
2699
2700 // Restore the second register.
2701 argRegNext = genRegArgNext(argReg);
2702
2703 loadType = compiler->getJitGCType(varDsc->lvGcLayout[1]);
2704 loadSize = emitActualTypeSize(loadType);
2705 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, TARGET_POINTER_SIZE);
2706
2707 regSet.AddMaskVars(genRegMask(argRegNext));
2708 gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
2709 }
2710
2711 if (compiler->lvaIsGCTracked(varDsc))
2712 {
2713 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2714 }
2715 }
2716
2717 // In case of a jmp call to a vararg method ensure only integer registers are passed.
2718 if (compiler->info.compIsVarArgs)
2719 {
2720 assert((genRegMask(argReg) & RBM_ARG_REGS) != RBM_NONE);
2721
2722 fixedIntArgMask |= genRegMask(argReg);
2723
2724 if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
2725 {
2726 assert(argRegNext != REG_NA);
2727 fixedIntArgMask |= genRegMask(argRegNext);
2728 }
2729
2730 if (argReg == REG_ARG_0)
2731 {
2732 assert(firstArgVarNum == BAD_VAR_NUM);
2733 firstArgVarNum = varNum;
2734 }
2735 }
2736#else
2737 bool twoParts = false;
2738 var_types loadType = TYP_UNDEF;
2739 if (varDsc->TypeGet() == TYP_LONG)
2740 {
2741 twoParts = true;
2742 }
2743 else if (varDsc->TypeGet() == TYP_DOUBLE)
2744 {
2745 if (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)
2746 {
2747 twoParts = true;
2748 }
2749 }
2750
2751 if (twoParts)
2752 {
2753 argRegNext = genRegArgNext(argReg);
2754
2755 if (varDsc->lvRegNum != argReg)
2756 {
2757 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, argReg, varNum, 0);
2758 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, argRegNext, varNum, REGSIZE_BYTES);
2759 }
2760
2761 if (compiler->info.compIsVarArgs)
2762 {
2763 fixedIntArgMask |= genRegMask(argReg);
2764 fixedIntArgMask |= genRegMask(argRegNext);
2765 }
2766 }
2767 else if (varDsc->lvIsHfaRegArg())
2768 {
2769 loadType = varDsc->GetHfaType();
2770 regNumber fieldReg = argReg;
2771 emitAttr loadSize = emitActualTypeSize(loadType);
2772 unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
2773
2774 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)loadSize)
2775 {
2776 if (varDsc->lvRegNum != argReg)
2777 {
2778 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, fieldReg, varNum, ofs);
2779 }
2780 assert(genIsValidFloatReg(fieldReg)); // we don't use register tracking for FP
2781 fieldReg = regNextOfType(fieldReg, loadType);
2782 }
2783 }
2784 else if (varTypeIsStruct(varDsc))
2785 {
2786 regNumber slotReg = argReg;
2787 unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
2788
2789 for (unsigned ofs = 0; ofs < maxSize; ofs += REGSIZE_BYTES)
2790 {
2791 unsigned idx = ofs / REGSIZE_BYTES;
2792 loadType = compiler->getJitGCType(varDsc->lvGcLayout[idx]);
2793
2794 if (varDsc->lvRegNum != argReg)
2795 {
2796 emitAttr loadSize = emitActualTypeSize(loadType);
2797
2798 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, slotReg, varNum, ofs);
2799 }
2800
2801 regSet.AddMaskVars(genRegMask(slotReg));
2802 gcInfo.gcMarkRegPtrVal(slotReg, loadType);
2803 if (genIsValidIntReg(slotReg) && compiler->info.compIsVarArgs)
2804 {
2805 fixedIntArgMask |= genRegMask(slotReg);
2806 }
2807
2808 slotReg = genRegArgNext(slotReg);
2809 }
2810 }
2811 else
2812 {
2813 loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
2814
2815 if (varDsc->lvRegNum != argReg)
2816 {
2817 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
2818 }
2819
2820 regSet.AddMaskVars(genRegMask(argReg));
2821 gcInfo.gcMarkRegPtrVal(argReg, loadType);
2822
2823 if (genIsValidIntReg(argReg) && compiler->info.compIsVarArgs)
2824 {
2825 fixedIntArgMask |= genRegMask(argReg);
2826 }
2827 }
2828
2829 if (compiler->lvaIsGCTracked(varDsc))
2830 {
2831 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2832 }
2833#endif
2834 }
2835
2836 // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg,
2837 // load the remaining integer arg registers from the corresponding
2838 // shadow stack slots. This is for the reason that we don't know the number and type
2839 // of non-fixed params passed by the caller, therefore we have to assume the worst case
2840 // of caller passing all integer arg regs that can be max size of reg.
2841 //
2842 // The caller could have passed gc-ref/byref type var args. Since these are var args
2843 // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
2844 // remaining arg registers from shadow stack slots as non-gc interruptible.
2845 if (fixedIntArgMask != RBM_NONE)
2846 {
2847 assert(compiler->info.compIsVarArgs);
2848 assert(firstArgVarNum != BAD_VAR_NUM);
2849
2850 regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
2851 if (remainingIntArgMask != RBM_NONE)
2852 {
2853 getEmitter()->emitDisableGC();
2854 for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
2855 {
2856 regNumber argReg = intArgRegs[argNum];
2857 regMaskTP argRegMask = genRegMask(argReg);
2858
2859 if ((remainingIntArgMask & argRegMask) != 0)
2860 {
2861 remainingIntArgMask &= ~argRegMask;
2862 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, argReg, firstArgVarNum, argOffset);
2863 }
2864
2865 argOffset += REGSIZE_BYTES;
2866 }
2867 getEmitter()->emitEnableGC();
2868 }
2869 }
2870}
2871
2872//------------------------------------------------------------------------
2873// genIntCastOverflowCheck: Generate overflow checking code for an integer cast.
2874//
2875// Arguments:
2876// cast - The GT_CAST node
2877// desc - The cast description
2878// reg - The register containing the value to check
2879//
2880void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg)
2881{
2882 switch (desc.CheckKind())
2883 {
2884 case GenIntCastDesc::CHECK_POSITIVE:
2885 getEmitter()->emitIns_R_I(INS_cmp, EA_ATTR(desc.CheckSrcSize()), reg, 0);
2886 genJumpToThrowHlpBlk(EJ_lt, SCK_OVERFLOW);
2887 break;
2888
2889#ifdef _TARGET_64BIT_
2890 case GenIntCastDesc::CHECK_UINT_RANGE:
2891 // We need to check if the value is not greater than 0xFFFFFFFF but this value
2892 // cannot be encoded in the immediate operand of CMP. Use TST instead to check
2893 // if the upper 32 bits are zero.
2894 getEmitter()->emitIns_R_I(INS_tst, EA_8BYTE, reg, 0xFFFFFFFF00000000LL);
2895 genJumpToThrowHlpBlk(EJ_ne, SCK_OVERFLOW);
2896 break;
2897
2898 case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE:
2899 // We need to check if the value is not greater than 0x7FFFFFFF but this value
2900 // cannot be encoded in the immediate operand of CMP. Use TST instead to check
2901 // if the upper 33 bits are zero.
2902 getEmitter()->emitIns_R_I(INS_tst, EA_8BYTE, reg, 0xFFFFFFFF80000000LL);
2903 genJumpToThrowHlpBlk(EJ_ne, SCK_OVERFLOW);
2904 break;
2905
2906 case GenIntCastDesc::CHECK_INT_RANGE:
2907 {
2908 const regNumber tempReg = cast->GetSingleTempReg();
2909 assert(tempReg != reg);
2910 instGen_Set_Reg_To_Imm(EA_8BYTE, tempReg, INT32_MAX);
2911 getEmitter()->emitIns_R_R(INS_cmp, EA_8BYTE, reg, tempReg);
2912 genJumpToThrowHlpBlk(EJ_gt, SCK_OVERFLOW);
2913 instGen_Set_Reg_To_Imm(EA_8BYTE, tempReg, INT32_MIN);
2914 getEmitter()->emitIns_R_R(INS_cmp, EA_8BYTE, reg, tempReg);
2915 genJumpToThrowHlpBlk(EJ_lt, SCK_OVERFLOW);
2916 }
2917 break;
2918#endif
2919
2920 default:
2921 {
2922 assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE);
2923 const int castMaxValue = desc.CheckSmallIntMax();
2924 const int castMinValue = desc.CheckSmallIntMin();
2925
2926 // Values greater than 255 cannot be encoded in the immediate operand of CMP.
2927 // Replace (x > max) with (x >= max + 1) where max + 1 (a power of 2) can be
2928 // encoded. We could do this for all max values but on ARM32 "cmp r0, 255"
2929 // is better than "cmp r0, 256" because it has a shorter encoding.
2930 if (castMaxValue > 255)
2931 {
2932 assert((castMaxValue == 32767) || (castMaxValue == 65535));
2933 getEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(desc.CheckSrcSize()), reg, castMaxValue + 1);
2934 genJumpToThrowHlpBlk((castMinValue == 0) ? EJ_hs : EJ_ge, SCK_OVERFLOW);
2935 }
2936 else
2937 {
2938 getEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(desc.CheckSrcSize()), reg, castMaxValue);
2939 genJumpToThrowHlpBlk((castMinValue == 0) ? EJ_hi : EJ_gt, SCK_OVERFLOW);
2940 }
2941
2942 if (castMinValue != 0)
2943 {
2944 getEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(desc.CheckSrcSize()), reg, castMinValue);
2945 genJumpToThrowHlpBlk(EJ_lt, SCK_OVERFLOW);
2946 }
2947 }
2948 break;
2949 }
2950}
2951
2952//------------------------------------------------------------------------
2953// genIntToIntCast: Generate code for an integer cast, with or without overflow check.
2954//
2955// Arguments:
2956// cast - The GT_CAST node
2957//
2958// Assumptions:
2959// The cast node is not a contained node and must have an assigned register.
2960// Neither the source nor target type can be a floating point type.
2961//
2962// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
2963//
2964void CodeGen::genIntToIntCast(GenTreeCast* cast)
2965{
2966 genConsumeRegs(cast->gtGetOp1());
2967
2968 const regNumber srcReg = cast->gtGetOp1()->gtRegNum;
2969 const regNumber dstReg = cast->gtRegNum;
2970
2971 assert(genIsValidIntReg(srcReg));
2972 assert(genIsValidIntReg(dstReg));
2973
2974 GenIntCastDesc desc(cast);
2975
2976 if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE)
2977 {
2978 genIntCastOverflowCheck(cast, desc, srcReg);
2979 }
2980
2981 if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
2982 {
2983 instruction ins;
2984 unsigned insSize;
2985
2986 switch (desc.ExtendKind())
2987 {
2988 case GenIntCastDesc::ZERO_EXTEND_SMALL_INT:
2989 ins = (desc.ExtendSrcSize() == 1) ? INS_uxtb : INS_uxth;
2990 insSize = 4;
2991 break;
2992 case GenIntCastDesc::SIGN_EXTEND_SMALL_INT:
2993 ins = (desc.ExtendSrcSize() == 1) ? INS_sxtb : INS_sxth;
2994 insSize = 4;
2995 break;
2996#ifdef _TARGET_64BIT_
2997 case GenIntCastDesc::ZERO_EXTEND_INT:
2998 ins = INS_mov;
2999 insSize = 4;
3000 break;
3001 case GenIntCastDesc::SIGN_EXTEND_INT:
3002 ins = INS_sxtw;
3003 insSize = 8;
3004 break;
3005#endif
3006 default:
3007 assert(desc.ExtendKind() == GenIntCastDesc::COPY);
3008 ins = INS_mov;
3009 insSize = desc.ExtendSrcSize();
3010 break;
3011 }
3012
3013 getEmitter()->emitIns_R_R(ins, EA_ATTR(insSize), dstReg, srcReg);
3014 }
3015
3016 genProduceReg(cast);
3017}
3018
3019//------------------------------------------------------------------------
3020// genFloatToFloatCast: Generate code for a cast between float and double
3021//
3022// Arguments:
3023// treeNode - The GT_CAST node
3024//
3025// Return Value:
3026// None.
3027//
3028// Assumptions:
3029// Cast is a non-overflow conversion.
3030// The treeNode must have an assigned register.
3031// The cast is between float and double.
3032//
3033void CodeGen::genFloatToFloatCast(GenTree* treeNode)
3034{
3035 // float <--> double conversions are always non-overflow ones
3036 assert(treeNode->OperGet() == GT_CAST);
3037 assert(!treeNode->gtOverflow());
3038
3039 regNumber targetReg = treeNode->gtRegNum;
3040 assert(genIsValidFloatReg(targetReg));
3041
3042 GenTree* op1 = treeNode->gtOp.gtOp1;
3043 assert(!op1->isContained()); // Cannot be contained
3044 assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
3045
3046 var_types dstType = treeNode->CastToType();
3047 var_types srcType = op1->TypeGet();
3048 assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
3049
3050 genConsumeOperands(treeNode->AsOp());
3051
3052 // treeNode must be a reg
3053 assert(!treeNode->isContained());
3054
3055#if defined(_TARGET_ARM_)
3056
3057 if (srcType != dstType)
3058 {
3059 instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double
3060 : INS_vcvt_d2f; // convert Double to Float
3061
3062 getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
3063 }
3064 else if (treeNode->gtRegNum != op1->gtRegNum)
3065 {
3066 getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
3067 }
3068
3069#elif defined(_TARGET_ARM64_)
3070
3071 if (srcType != dstType)
3072 {
3073 insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double
3074 : INS_OPTS_D_TO_S; // convert Double to Single
3075
3076 getEmitter()->emitIns_R_R(INS_fcvt, emitActualTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3077 }
3078 else if (treeNode->gtRegNum != op1->gtRegNum)
3079 {
3080 // If double to double cast or float to float cast. Emit a move instruction.
3081 getEmitter()->emitIns_R_R(INS_mov, emitActualTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
3082 }
3083
3084#endif // _TARGET_*
3085
3086 genProduceReg(treeNode);
3087}
3088
3089//------------------------------------------------------------------------
3090// genCreateAndStoreGCInfo: Create and record GC Info for the function.
3091//
3092void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
3093 unsigned prologSize,
3094 unsigned epilogSize DEBUGARG(void* codePtr))
3095{
3096 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
3097 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
3098 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
3099 assert(gcInfoEncoder != nullptr);
3100
3101 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
3102 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
3103
3104 // We keep the call count for the second call to gcMakeRegPtrTable() below.
3105 unsigned callCnt = 0;
3106
3107 // First we figure out the encoder ID's for the stack slots and registers.
3108 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
3109
3110 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
3111 gcInfoEncoder->FinalizeSlotIds();
3112
3113 // Now we can actually use those slot ID's to declare live ranges.
3114 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
3115
3116#ifdef _TARGET_ARM64_
3117
3118 if (compiler->opts.compDbgEnC)
3119 {
3120 // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
3121 // which is:
3122 // -return address
3123 // -saved off RBP
3124 // -saved 'this' pointer and bool for synchronized methods
3125
3126 // 4 slots for RBP + return address + RSI + RDI
3127 int preservedAreaSize = 4 * REGSIZE_BYTES;
3128
3129 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
3130 {
3131 if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
3132 preservedAreaSize += REGSIZE_BYTES;
3133
3134 preservedAreaSize += 1; // bool for synchronized methods
3135 }
3136
3137 // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
3138 // frame
3139 gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
3140 }
3141
3142#endif // _TARGET_ARM64_
3143
3144 if (compiler->opts.IsReversePInvoke())
3145 {
3146 unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
3147 assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM && reversePInvokeFrameVarNumber < compiler->lvaRefCount);
3148 LclVarDsc& reversePInvokeFrameVar = compiler->lvaTable[reversePInvokeFrameVarNumber];
3149 gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar.lvStkOffs);
3150 }
3151
3152 gcInfoEncoder->Build();
3153
3154 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
3155 // let's save the values anyway for debugging purposes
3156 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
3157 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
3158}
3159
3160//-------------------------------------------------------------------------------------------
3161// genJumpKindsForTree: Determine the number and kinds of conditional branches
3162// necessary to implement the given GT_CMP node
3163//
3164// Arguments:
3165// cmpTree - (input) The GenTree node that is used to set the Condition codes
3166// - The GenTree Relop node that was used to set the Condition codes
3167// jmpKind[2] - (output) One or two conditional branch instructions
3168// jmpToTrueLabel[2] - (output) On Arm64 both branches will always branch to the true label
3169//
3170// Return Value:
3171// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
3172//
3173// Assumptions:
3174// At least one conditional branch instruction will be returned.
3175// Typically only one conditional branch is needed
3176// and the second jmpKind[] value is set to EJ_NONE
3177//
3178void CodeGen::genJumpKindsForTree(GenTree* cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
3179{
3180 // On ARM both branches will always branch to the true label
3181 jmpToTrueLabel[0] = true;
3182 jmpToTrueLabel[1] = true;
3183
3184 // For integer comparisons just use genJumpKindForOper
3185 if (!varTypeIsFloating(cmpTree->gtOp.gtOp1))
3186 {
3187 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
3188 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
3189 jmpKind[1] = EJ_NONE;
3190 }
3191 else // We have a Floating Point Compare operation
3192 {
3193 assert(cmpTree->OperIsCompare());
3194
3195 // For details on this mapping, see the ARM Condition Code table
3196 // at section A8.3 in the ARMv7 architecture manual or
3197 // at section C1.2.3 in the ARMV8 architecture manual.
3198
3199 // We must check the GTF_RELOP_NAN_UN to find out
3200 // if we need to branch when we have a NaN operand.
3201 //
3202 if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
3203 {
3204 // Must branch if we have an NaN, unordered
3205 switch (cmpTree->gtOper)
3206 {
3207 case GT_EQ:
3208 jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
3209 jmpKind[1] = EJ_vs; // branch or set when we have a NaN
3210 break;
3211
3212 case GT_NE:
3213 jmpKind[0] = EJ_ne; // branch or set when not equal (or have NaN's)
3214 jmpKind[1] = EJ_NONE;
3215 break;
3216
3217 case GT_LT:
3218 jmpKind[0] = EJ_lt; // branch or set when less than (or have NaN's)
3219 jmpKind[1] = EJ_NONE;
3220 break;
3221
3222 case GT_LE:
3223 jmpKind[0] = EJ_le; // branch or set when less than or equal (or have NaN's)
3224 jmpKind[1] = EJ_NONE;
3225 break;
3226
3227 case GT_GT:
3228 jmpKind[0] = EJ_hi; // branch or set when greater than (or have NaN's)
3229 jmpKind[1] = EJ_NONE;
3230 break;
3231
3232 case GT_GE:
3233 jmpKind[0] = EJ_hs; // branch or set when greater than or equal (or have NaN's)
3234 jmpKind[1] = EJ_NONE;
3235 break;
3236
3237 default:
3238 unreached();
3239 }
3240 }
3241 else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
3242 {
3243 // Do not branch if we have an NaN, unordered
3244 switch (cmpTree->gtOper)
3245 {
3246 case GT_EQ:
3247 jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
3248 jmpKind[1] = EJ_NONE;
3249 break;
3250
3251 case GT_NE:
3252 jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
3253 jmpKind[1] = EJ_lo; // branch or set when less than (and no NaN's)
3254 break;
3255
3256 case GT_LT:
3257 jmpKind[0] = EJ_lo; // branch or set when less than (and no NaN's)
3258 jmpKind[1] = EJ_NONE;
3259 break;
3260
3261 case GT_LE:
3262 jmpKind[0] = EJ_ls; // branch or set when less than or equal (and no NaN's)
3263 jmpKind[1] = EJ_NONE;
3264 break;
3265
3266 case GT_GT:
3267 jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
3268 jmpKind[1] = EJ_NONE;
3269 break;
3270
3271 case GT_GE:
3272 jmpKind[0] = EJ_ge; // branch or set when greater than or equal (and no NaN's)
3273 jmpKind[1] = EJ_NONE;
3274 break;
3275
3276 default:
3277 unreached();
3278 }
3279 }
3280 }
3281}
3282
3283//------------------------------------------------------------------------
3284// genCodeForJumpTrue: Generates code for jmpTrue statement.
3285//
3286// Arguments:
3287// tree - The GT_JTRUE tree node.
3288//
3289// Return Value:
3290// None
3291//
3292void CodeGen::genCodeForJumpTrue(GenTree* tree)
3293{
3294 GenTree* cmp = tree->gtOp.gtOp1;
3295 assert(cmp->OperIsCompare());
3296 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
3297
3298 // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
3299 // is governed by a flag NOT by the inherent type of the node
3300 emitJumpKind jumpKind[2];
3301 bool branchToTrueLabel[2];
3302 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
3303 assert(jumpKind[0] != EJ_NONE);
3304
3305 // On ARM the branches will always branch to the true label
3306 assert(branchToTrueLabel[0]);
3307 inst_JMP(jumpKind[0], compiler->compCurBB->bbJumpDest);
3308
3309 if (jumpKind[1] != EJ_NONE)
3310 {
3311 // the second conditional branch always has to be to the true label
3312 assert(branchToTrueLabel[1]);
3313 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
3314 }
3315}
3316
3317//------------------------------------------------------------------------
3318// genCodeForJcc: Produce code for a GT_JCC node.
3319//
3320// Arguments:
3321// tree - the node
3322//
3323void CodeGen::genCodeForJcc(GenTreeCC* tree)
3324{
3325 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
3326
3327 CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
3328 emitJumpKind jumpKind = genJumpKindForOper(tree->gtCondition, compareKind);
3329
3330 inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
3331}
3332
3333//------------------------------------------------------------------------
3334// genCodeForSetcc: Generates code for a GT_SETCC node.
3335//
3336// Arguments:
3337// setcc - the GT_SETCC node
3338//
3339// Assumptions:
3340// The condition represents an integer comparison. This code doesn't
3341// have the necessary logic to deal with floating point comparisons,
3342// in fact it doesn't even know if the comparison is integer or floating
3343// point because SETCC nodes do not have any operands.
3344//
3345
3346void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
3347{
3348 regNumber dstReg = setcc->gtRegNum;
3349 CompareKind compareKind = setcc->IsUnsigned() ? CK_UNSIGNED : CK_SIGNED;
3350 emitJumpKind jumpKind = genJumpKindForOper(setcc->gtCondition, compareKind);
3351
3352 assert(genIsValidIntReg(dstReg));
3353 // Make sure nobody is setting GTF_RELOP_NAN_UN on this node as it is ignored.
3354 assert((setcc->gtFlags & GTF_RELOP_NAN_UN) == 0);
3355
3356#ifdef _TARGET_ARM64_
3357 inst_SET(jumpKind, dstReg);
3358#else
3359 // Emit code like that:
3360 // ...
3361 // bgt True
3362 // movs rD, #0
3363 // b Next
3364 // True:
3365 // movs rD, #1
3366 // Next:
3367 // ...
3368
3369 BasicBlock* labelTrue = genCreateTempLabel();
3370 getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jumpKind), labelTrue);
3371
3372 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(setcc->TypeGet()), dstReg, 0);
3373
3374 BasicBlock* labelNext = genCreateTempLabel();
3375 getEmitter()->emitIns_J(INS_b, labelNext);
3376
3377 genDefineTempLabel(labelTrue);
3378 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(setcc->TypeGet()), dstReg, 1);
3379 genDefineTempLabel(labelNext);
3380#endif
3381
3382 genProduceReg(setcc);
3383}
3384
3385//------------------------------------------------------------------------
3386// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
3387//
3388// Arguments:
3389// tree - the node
3390//
3391void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
3392{
3393 assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
3394
3395 if (blkOp->OperIs(GT_STORE_OBJ) && blkOp->OperIsCopyBlkOp())
3396 {
3397 assert(blkOp->AsObj()->gtGcPtrCount != 0);
3398 genCodeForCpObj(blkOp->AsObj());
3399 return;
3400 }
3401
3402 if (blkOp->gtBlkOpGcUnsafe)
3403 {
3404 getEmitter()->emitDisableGC();
3405 }
3406 bool isCopyBlk = blkOp->OperIsCopyBlkOp();
3407
3408 switch (blkOp->gtBlkOpKind)
3409 {
3410 case GenTreeBlk::BlkOpKindHelper:
3411 if (isCopyBlk)
3412 {
3413 genCodeForCpBlk(blkOp);
3414 }
3415 else
3416 {
3417 genCodeForInitBlk(blkOp);
3418 }
3419 break;
3420
3421 case GenTreeBlk::BlkOpKindUnroll:
3422 if (isCopyBlk)
3423 {
3424 genCodeForCpBlkUnroll(blkOp);
3425 }
3426 else
3427 {
3428 genCodeForInitBlkUnroll(blkOp);
3429 }
3430 break;
3431
3432 default:
3433 unreached();
3434 }
3435
3436 if (blkOp->gtBlkOpGcUnsafe)
3437 {
3438 getEmitter()->emitEnableGC();
3439 }
3440}
3441
3442//------------------------------------------------------------------------
3443// genScaledAdd: A helper for genLeaInstruction.
3444//
3445void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale)
3446{
3447 emitter* emit = getEmitter();
3448 if (scale == 0)
3449 {
3450 // target = base + index
3451 getEmitter()->emitIns_R_R_R(INS_add, attr, targetReg, baseReg, indexReg);
3452 }
3453 else
3454 {
3455// target = base + index<<scale
3456#if defined(_TARGET_ARM_)
3457 emit->emitIns_R_R_R_I(INS_add, attr, targetReg, baseReg, indexReg, scale, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
3458#elif defined(_TARGET_ARM64_)
3459 emit->emitIns_R_R_R_I(INS_add, attr, targetReg, baseReg, indexReg, scale, INS_OPTS_LSL);
3460#endif
3461 }
3462}
3463
3464//------------------------------------------------------------------------
3465// genLeaInstruction: Produce code for a GT_LEA node.
3466//
3467// Arguments:
3468// lea - the node
3469//
3470void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
3471{
3472 genConsumeOperands(lea);
3473 emitter* emit = getEmitter();
3474 emitAttr size = emitTypeSize(lea);
3475 int offset = lea->Offset();
3476
3477 // In ARM we can only load addresses of the form:
3478 //
3479 // [Base + index*scale]
3480 // [Base + Offset]
3481 // [Literal] (PC-Relative)
3482 //
3483 // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
3484 // destReg = baseReg + indexReg * scale;
3485 // destReg = destReg + offset;
3486 //
3487 // TODO-ARM64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
3488 // addressing mode instruction. Currently we're 'cheating' by producing one or more
3489 // instructions to generate the addressing mode so we need to modify lowering to
3490 // produce LEAs that are a 1:1 relationship to the ARM64 architecture.
3491 if (lea->Base() && lea->Index())
3492 {
3493 GenTree* memBase = lea->Base();
3494 GenTree* index = lea->Index();
3495
3496 DWORD scale;
3497
3498 assert(isPow2(lea->gtScale));
3499 BitScanForward(&scale, lea->gtScale);
3500
3501 assert(scale <= 4);
3502
3503 if (offset != 0)
3504 {
3505 regNumber tmpReg = lea->GetSingleTempReg();
3506
3507 // When generating fully interruptible code we have to use the "large offset" sequence
3508 // when calculating a EA_BYREF as we can't report a byref that points outside of the object
3509 //
3510 bool useLargeOffsetSeq = compiler->genInterruptible && (size == EA_BYREF);
3511
3512 if (!useLargeOffsetSeq && emitter::emitIns_valid_imm_for_add(offset))
3513 {
3514 // Generate code to set tmpReg = base + index*scale
3515 genScaledAdd(size, tmpReg, memBase->gtRegNum, index->gtRegNum, scale);
3516
3517 // Then compute target reg from [tmpReg + offset]
3518 emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset);
3519 }
3520 else // large offset sequence
3521 {
3522 noway_assert(tmpReg != index->gtRegNum);
3523 noway_assert(tmpReg != memBase->gtRegNum);
3524
3525 // First load/store tmpReg with the offset constant
3526 // rTmp = imm
3527 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
3528
3529 // Then add the scaled index register
3530 // rTmp = rTmp + index*scale
3531 genScaledAdd(EA_PTRSIZE, tmpReg, tmpReg, index->gtRegNum, scale);
3532
3533 // Then compute target reg from [base + tmpReg ]
3534 // rDst = base + rTmp
3535 emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, tmpReg);
3536 }
3537 }
3538 else
3539 {
3540 // Then compute target reg from [base + index*scale]
3541 genScaledAdd(size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum, scale);
3542 }
3543 }
3544 else if (lea->Base())
3545 {
3546 GenTree* memBase = lea->Base();
3547
3548 if (emitter::emitIns_valid_imm_for_add(offset))
3549 {
3550 if (offset != 0)
3551 {
3552 // Then compute target reg from [memBase + offset]
3553 emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, offset);
3554 }
3555 else // offset is zero
3556 {
3557 if (lea->gtRegNum != memBase->gtRegNum)
3558 {
3559 emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, memBase->gtRegNum);
3560 }
3561 }
3562 }
3563 else
3564 {
3565 // We require a tmpReg to hold the offset
3566 regNumber tmpReg = lea->GetSingleTempReg();
3567
3568 // First load tmpReg with the large offset constant
3569 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
3570
3571 // Then compute target reg from [memBase + tmpReg]
3572 emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, tmpReg);
3573 }
3574 }
3575 else if (lea->Index())
3576 {
3577 // If we encounter a GT_LEA node without a base it means it came out
3578 // when attempting to optimize an arbitrary arithmetic expression during lower.
3579 // This is currently disabled in ARM64 since we need to adjust lower to account
3580 // for the simpler instructions ARM64 supports.
3581 // TODO-ARM64-CQ: Fix this and let LEA optimize arithmetic trees too.
3582 assert(!"We shouldn't see a baseless address computation during CodeGen for ARM64");
3583 }
3584
3585 genProduceReg(lea);
3586}
3587
3588//------------------------------------------------------------------------
3589// isStructReturn: Returns whether the 'treeNode' is returning a struct.
3590//
3591// Arguments:
3592// treeNode - The tree node to evaluate whether is a struct return.
3593//
3594// Return Value:
3595// Returns true if the 'treeNode" is a GT_RETURN node of type struct.
3596// Otherwise returns false.
3597//
3598bool CodeGen::isStructReturn(GenTree* treeNode)
3599{
3600 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
3601 // For the GT_RET_FILT, the return is always
3602 // a bool or a void, for the end of a finally block.
3603 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
3604
3605 return varTypeIsStruct(treeNode);
3606}
3607
3608//------------------------------------------------------------------------
3609// genStructReturn: Generates code for returning a struct.
3610//
3611// Arguments:
3612// treeNode - The GT_RETURN tree node.
3613//
3614// Return Value:
3615// None
3616//
3617// Assumption:
3618// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
3619void CodeGen::genStructReturn(GenTree* treeNode)
3620{
3621 assert(treeNode->OperGet() == GT_RETURN);
3622 assert(isStructReturn(treeNode));
3623 GenTree* op1 = treeNode->gtGetOp1();
3624
3625 if (op1->OperGet() == GT_LCL_VAR)
3626 {
3627 GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
3628 LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
3629 var_types lclType = genActualType(varDsc->TypeGet());
3630
3631 assert(varTypeIsStruct(lclType));
3632 assert(varDsc->lvIsMultiRegRet);
3633
3634 ReturnTypeDesc retTypeDesc;
3635 unsigned regCount;
3636
3637 retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
3638 regCount = retTypeDesc.GetReturnRegCount();
3639
3640 assert(regCount >= 2);
3641
3642 assert(varTypeIsSIMD(lclType) || op1->isContained());
3643
3644 if (op1->isContained())
3645 {
3646 // Copy var on stack into ABI return registers
3647 // TODO: It could be optimized by reducing two float loading to one double
3648 int offset = 0;
3649 for (unsigned i = 0; i < regCount; ++i)
3650 {
3651 var_types type = retTypeDesc.GetReturnRegType(i);
3652 regNumber reg = retTypeDesc.GetABIReturnReg(i);
3653 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
3654 offset += genTypeSize(type);
3655 }
3656 }
3657 else
3658 {
3659 // Handle SIMD genStructReturn case
3660 NYI_ARM("SIMD genStructReturn");
3661
3662#ifdef _TARGET_ARM64_
3663 genConsumeRegs(op1);
3664 regNumber src = op1->gtRegNum;
3665
3666 // Treat src register as a homogenous vector with element size equal to the reg size
3667 // Insert pieces in order
3668 for (unsigned i = 0; i < regCount; ++i)
3669 {
3670 var_types type = retTypeDesc.GetReturnRegType(i);
3671 regNumber reg = retTypeDesc.GetABIReturnReg(i);
3672 if (varTypeIsFloating(type))
3673 {
3674 // If the register piece is to be passed in a floating point register
3675 // Use a vector mov element instruction
3676 // reg is not a vector, so it is in the first element reg[0]
3677 // mov reg[0], src[i]
3678 // This effectively moves from `src[i]` to `reg[0]`, upper bits of reg remain unchanged
3679 // For the case where src == reg, since we are only writing reg[0], as long as we iterate
3680 // so that src[0] is consumed before writing reg[0], we do not need a temporary.
3681 getEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), reg, src, 0, i);
3682 }
3683 else
3684 {
3685 // If the register piece is to be passed in an integer register
3686 // Use a vector mov to general purpose register instruction
3687 // mov reg, src[i]
3688 // This effectively moves from `src[i]` to `reg`
3689 getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), reg, src, i);
3690 }
3691 }
3692#endif // _TARGET_ARM64_
3693 }
3694 }
3695 else // op1 must be multi-reg GT_CALL
3696 {
3697 assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
3698
3699 genConsumeRegs(op1);
3700
3701 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
3702 GenTreeCall* call = actualOp1->AsCall();
3703
3704 ReturnTypeDesc* pRetTypeDesc;
3705 unsigned regCount;
3706 unsigned matchingCount = 0;
3707
3708 pRetTypeDesc = call->GetReturnTypeDesc();
3709 regCount = pRetTypeDesc->GetReturnRegCount();
3710
3711 var_types regType[MAX_RET_REG_COUNT];
3712 regNumber returnReg[MAX_RET_REG_COUNT];
3713 regNumber allocatedReg[MAX_RET_REG_COUNT];
3714 regMaskTP srcRegsMask = 0;
3715 regMaskTP dstRegsMask = 0;
3716 bool needToShuffleRegs = false; // Set to true if we have to move any registers
3717
3718 for (unsigned i = 0; i < regCount; ++i)
3719 {
3720 regType[i] = pRetTypeDesc->GetReturnRegType(i);
3721 returnReg[i] = pRetTypeDesc->GetABIReturnReg(i);
3722
3723 regNumber reloadReg = REG_NA;
3724 if (op1->IsCopyOrReload())
3725 {
3726 // GT_COPY/GT_RELOAD will have valid reg for those positions
3727 // that need to be copied or reloaded.
3728 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
3729 }
3730
3731 if (reloadReg != REG_NA)
3732 {
3733 allocatedReg[i] = reloadReg;
3734 }
3735 else
3736 {
3737 allocatedReg[i] = call->GetRegNumByIdx(i);
3738 }
3739
3740 if (returnReg[i] == allocatedReg[i])
3741 {
3742 matchingCount++;
3743 }
3744 else // We need to move this value
3745 {
3746 // We want to move the value from allocatedReg[i] into returnReg[i]
3747 // so record these two registers in the src and dst masks
3748 //
3749 srcRegsMask |= genRegMask(allocatedReg[i]);
3750 dstRegsMask |= genRegMask(returnReg[i]);
3751
3752 needToShuffleRegs = true;
3753 }
3754 }
3755
3756 if (needToShuffleRegs)
3757 {
3758 assert(matchingCount < regCount);
3759
3760 unsigned remainingRegCount = regCount - matchingCount;
3761 regMaskTP extraRegMask = treeNode->gtRsvdRegs;
3762
3763 while (remainingRegCount > 0)
3764 {
3765 // set 'available' to the 'dst' registers that are not currently holding 'src' registers
3766 //
3767 regMaskTP availableMask = dstRegsMask & ~srcRegsMask;
3768
3769 regMaskTP dstMask;
3770 regNumber srcReg;
3771 regNumber dstReg;
3772 var_types curType = TYP_UNKNOWN;
3773 regNumber freeUpReg = REG_NA;
3774
3775 if (availableMask == 0)
3776 {
3777 // Circular register dependencies
3778 // So just free up the lowest register in dstRegsMask by moving it to the 'extra' register
3779
3780 assert(dstRegsMask == srcRegsMask); // this has to be true for us to reach here
3781 assert(extraRegMask != 0); // we require an 'extra' register
3782 assert((extraRegMask & ~dstRegsMask) != 0); // it can't be part of dstRegsMask
3783
3784 availableMask = extraRegMask & ~dstRegsMask;
3785
3786 regMaskTP srcMask = genFindLowestBit(srcRegsMask);
3787 freeUpReg = genRegNumFromMask(srcMask);
3788 }
3789
3790 dstMask = genFindLowestBit(availableMask);
3791 dstReg = genRegNumFromMask(dstMask);
3792 srcReg = REG_NA;
3793
3794 if (freeUpReg != REG_NA)
3795 {
3796 // We will free up the srcReg by moving it to dstReg which is an extra register
3797 //
3798 srcReg = freeUpReg;
3799
3800 // Find the 'srcReg' and set 'curType', change allocatedReg[] to dstReg
3801 // and add the new register mask bit to srcRegsMask
3802 //
3803 for (unsigned i = 0; i < regCount; ++i)
3804 {
3805 if (allocatedReg[i] == srcReg)
3806 {
3807 curType = regType[i];
3808 allocatedReg[i] = dstReg;
3809 srcRegsMask |= genRegMask(dstReg);
3810 }
3811 }
3812 }
3813 else // The normal case
3814 {
3815 // Find the 'srcReg' and set 'curType'
3816 //
3817 for (unsigned i = 0; i < regCount; ++i)
3818 {
3819 if (returnReg[i] == dstReg)
3820 {
3821 srcReg = allocatedReg[i];
3822 curType = regType[i];
3823 }
3824 }
3825 // After we perform this move we will have one less registers to setup
3826 remainingRegCount--;
3827 }
3828 assert(curType != TYP_UNKNOWN);
3829
3830 inst_RV_RV(ins_Copy(curType), dstReg, srcReg, curType);
3831
3832 // Clear the appropriate bits in srcRegsMask and dstRegsMask
3833 srcRegsMask &= ~genRegMask(srcReg);
3834 dstRegsMask &= ~genRegMask(dstReg);
3835
3836 } // while (remainingRegCount > 0)
3837
3838 } // (needToShuffleRegs)
3839
3840 } // op1 must be multi-reg GT_CALL
3841}
3842
3843//------------------------------------------------------------------------
3844// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP.
3845//
3846// Notes:
3847// On ARM64, this only does the probing; allocating the frame is done when
3848// callee-saved registers are saved.
3849//
3850void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
3851{
3852 assert(compiler->compGeneratingProlog);
3853
3854 if (frameSize == 0)
3855 {
3856 return;
3857 }
3858
3859 const target_size_t pageSize = compiler->eeGetPageSize();
3860
3861 assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
3862
3863 if (frameSize < pageSize)
3864 {
3865#ifdef _TARGET_ARM_
3866 // Frame size is (0x0008..0x1000)
3867 inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
3868#endif // _TARGET_ARM_
3869 }
3870 else if (frameSize < compiler->getVeryLargeFrameSize())
3871 {
3872 // Frame size is (0x1000..0x3000)
3873
3874 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
3875 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
3876 regSet.verifyRegUsed(initReg);
3877 *pInitRegZeroed = false; // The initReg does not contain zero
3878
3879 if (frameSize >= 0x2000)
3880 {
3881 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
3882 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
3883 regSet.verifyRegUsed(initReg);
3884 }
3885
3886#ifdef _TARGET_ARM64_
3887 compiler->unwindPadding();
3888#else // !_TARGET_ARM64_
3889 instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
3890 compiler->unwindPadding();
3891 getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
3892#endif // !_TARGET_ARM64_
3893 }
3894 else
3895 {
3896 // Frame size >= 0x3000
3897 assert(frameSize >= compiler->getVeryLargeFrameSize());
3898
3899 // Emit the following sequence to 'tickle' the pages.
3900 // Note it is important that stack pointer not change until this is
3901 // complete since the tickles could cause a stack overflow, and we
3902 // need to be able to crawl the stack afterward (which means the
3903 // stack pointer needs to be known).
3904
3905 instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
3906
3907 //
3908 // Can't have a label inside the ReJIT padding area
3909 //
3910 genPrologPadForReJit();
3911
3912 // TODO-ARM64-Bug?: set the availMask properly!
3913 regMaskTP availMask =
3914 (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
3915 availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
3916 availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
3917
3918 regNumber rOffset = initReg;
3919 regNumber rLimit;
3920 regNumber rTemp;
3921 regMaskTP tempMask;
3922
3923 // We pick the next lowest register number for rTemp
3924 noway_assert(availMask != RBM_NONE);
3925 tempMask = genFindLowestBit(availMask);
3926 rTemp = genRegNumFromMask(tempMask);
3927 availMask &= ~tempMask;
3928
3929 // We pick the next lowest register number for rLimit
3930 noway_assert(availMask != RBM_NONE);
3931 tempMask = genFindLowestBit(availMask);
3932 rLimit = genRegNumFromMask(tempMask);
3933 availMask &= ~tempMask;
3934
3935 // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
3936 // make sense.
3937 // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
3938 //
3939 // mov rLimit, -frameSize
3940 // loop:
3941 // ldr rTemp, [sp+rOffset]
3942 // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
3943 // cmp rOffset, rLimit
3944 // jge loop
3945 noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
3946 instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
3947 getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
3948 regSet.verifyRegUsed(rTemp);
3949#if defined(_TARGET_ARM_)
3950 getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
3951#elif defined(_TARGET_ARM64_)
3952 getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
3953#endif // _TARGET_ARM64_
3954 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
3955 getEmitter()->emitIns_J(INS_bhi, NULL, -4);
3956
3957 *pInitRegZeroed = false; // The initReg does not contain zero
3958
3959 compiler->unwindPadding();
3960
3961#ifdef _TARGET_ARM_
3962 inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
3963#endif // _TARGET_ARM_
3964 }
3965
3966#ifdef _TARGET_ARM_
3967 compiler->unwindAllocStack(frameSize);
3968
3969 if (!doubleAlignOrFramePointerUsed())
3970 {
3971 psiAdjustStackLevel(frameSize);
3972 }
3973#endif // _TARGET_ARM_
3974}
3975
3976#endif // _TARGET_ARMARCH_
3977