1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX Morph XX
9XX XX
10XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12*/
13
14#include "jitpch.h"
15#ifdef _MSC_VER
16#pragma hdrstop
17#endif
18
19#include "allocacheck.h" // for alloca
20
21// Convert the given node into a call to the specified helper passing
22// the given argument list.
23//
24// Tries to fold constants and also adds an edge for overflow exception
25// returns the morphed tree
26GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper)
27{
28 GenTree* result;
29
30 /* If the operand is a constant, we'll try to fold it */
31 if (oper->OperIsConst())
32 {
33 GenTree* oldTree = tree;
34
35 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
36
37 if (tree != oldTree)
38 {
39 return fgMorphTree(tree);
40 }
41 else if (tree->OperKind() & GTK_CONST)
42 {
43 return fgMorphConst(tree);
44 }
45
46 // assert that oper is unchanged and that it is still a GT_CAST node
47 noway_assert(tree->gtCast.CastOp() == oper);
48 noway_assert(tree->gtOper == GT_CAST);
49 }
50 result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
51 assert(result == tree);
52 return result;
53}
54
55/*****************************************************************************
56 *
57 * Convert the given node into a call to the specified helper passing
58 * the given argument list.
59 */
60
61GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeArgList* args, bool morphArgs)
62{
63 // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
64 tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
65
66 tree->gtCall.gtCallType = CT_HELPER;
67 tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
68 tree->gtCall.gtCallArgs = args;
69 tree->gtCall.gtCallObjp = nullptr;
70 tree->gtCall.gtCallLateArgs = nullptr;
71 tree->gtCall.fgArgInfo = nullptr;
72 tree->gtCall.gtRetClsHnd = nullptr;
73 tree->gtCall.gtCallMoreFlags = 0;
74 tree->gtCall.gtInlineCandidateInfo = nullptr;
75 tree->gtCall.gtControlExpr = nullptr;
76
77#if DEBUG
78 // Helper calls are never candidates.
79
80 tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
81#endif // DEBUG
82
83#ifdef FEATURE_READYTORUN_COMPILER
84 tree->gtCall.gtEntryPoint.addr = nullptr;
85 tree->gtCall.gtEntryPoint.accessType = IAT_VALUE;
86#endif
87
88#ifndef _TARGET_64BIT_
89 if (varTypeIsLong(tree))
90 {
91 GenTreeCall* callNode = tree->AsCall();
92 ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
93 retTypeDesc->Reset();
94 retTypeDesc->InitializeLongReturnType(this);
95 callNode->ClearOtherRegs();
96 }
97#endif // !_TARGET_64BIT_
98
99 if (tree->OperMayThrow(this))
100 {
101 tree->gtFlags |= GTF_EXCEPT;
102 }
103 else
104 {
105 tree->gtFlags &= ~GTF_EXCEPT;
106 }
107 tree->gtFlags |= GTF_CALL;
108 if (args)
109 {
110 tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
111 }
112
113 /* Perform the morphing */
114
115 if (morphArgs)
116 {
117 tree = fgMorphArgs(tree->AsCall());
118 }
119
120 return tree;
121}
122
123/*****************************************************************************
124 *
125 * Morph a cast node (we perform some very simple transformations here).
126 */
127
128#ifdef _PREFAST_
129#pragma warning(push)
130#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
131#endif
132GenTree* Compiler::fgMorphCast(GenTree* tree)
133{
134 noway_assert(tree->gtOper == GT_CAST);
135 noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE);
136
137 /* The first sub-operand is the thing being cast */
138
139 GenTree* oper = tree->gtCast.CastOp();
140
141 if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
142 {
143 // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
144 // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
145 // morphing code to see that type.
146 fgMorphImplicitByRefArgs(oper);
147 }
148
149 var_types srcType = genActualType(oper->TypeGet());
150
151 var_types dstType = tree->CastToType();
152 unsigned dstSize = genTypeSize(dstType);
153
154 // See if the cast has to be done in two steps. R -> I
155 if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
156 {
157 if (srcType == TYP_FLOAT
158#if defined(_TARGET_ARM64_)
159 // Arm64: src = float, dst is overflow conversion.
160 // This goes through helper and hence src needs to be converted to double.
161 && tree->gtOverflow()
162#elif defined(_TARGET_AMD64_)
163 // Amd64: src = float, dst = uint64 or overflow conversion.
164 // This goes through helper and hence src needs to be converted to double.
165 && (tree->gtOverflow() || (dstType == TYP_ULONG))
166#elif defined(_TARGET_ARM_)
167 // Arm: src = float, dst = int64/uint64 or overflow conversion.
168 && (tree->gtOverflow() || varTypeIsLong(dstType))
169#else
170 // x86: src = float, dst = uint32/int64/uint64 or overflow conversion.
171 && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT))
172#endif
173 )
174 {
175 oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
176 }
177
178 // do we need to do it in two steps R -> I, '-> smallType
179 CLANG_FORMAT_COMMENT_ANCHOR;
180
181#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
182 if (dstSize < genTypeSize(TYP_INT))
183 {
184 oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT);
185 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
186 tree->gtFlags &= ~GTF_UNSIGNED;
187 }
188#else
189 if (dstSize < TARGET_POINTER_SIZE)
190 {
191 oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL);
192 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
193 }
194#endif
195 else
196 {
197 /* Note that if we need to use a helper call then we can not morph oper */
198 if (!tree->gtOverflow())
199 {
200#ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
201 goto OPTIMIZECAST;
202#else
203 switch (dstType)
204 {
205 case TYP_INT:
206 goto OPTIMIZECAST;
207
208 case TYP_UINT:
209#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
210 goto OPTIMIZECAST;
211#else // _TARGET_X86_
212 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
213#endif // _TARGET_X86_
214
215 case TYP_LONG:
216#ifdef _TARGET_AMD64_
217 // SSE2 has instructions to convert a float/double directly to a long
218 goto OPTIMIZECAST;
219#else // !_TARGET_AMD64_
220 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
221#endif // !_TARGET_AMD64_
222
223 case TYP_ULONG:
224 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
225 default:
226 break;
227 }
228#endif // _TARGET_ARM64_
229 }
230 else
231 {
232 switch (dstType)
233 {
234 case TYP_INT:
235 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
236 case TYP_UINT:
237 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
238 case TYP_LONG:
239 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
240 case TYP_ULONG:
241 return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
242 default:
243 break;
244 }
245 }
246 noway_assert(!"Unexpected dstType");
247 }
248 }
249#ifndef _TARGET_64BIT_
250 // The code generation phase (for x86 & ARM32) does not handle casts
251 // directly from [u]long to anything other than [u]int. Insert an
252 // intermediate cast to native int.
253 else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
254 {
255 oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL);
256 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
257 tree->gtFlags &= ~GTF_UNSIGNED;
258 }
259#endif //!_TARGET_64BIT_
260
261#ifdef _TARGET_ARM_
262 else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
263 !varTypeIsLong(oper->gtCast.CastOp()))
264 {
265 // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
266 // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
267 // This happens semi-frequently because there is no IL 'conv.r4.un'
268 oper->gtType = TYP_FLOAT;
269 oper->CastToType() = TYP_FLOAT;
270 return fgMorphTree(oper);
271 }
272 // converts long/ulong --> float/double casts into helper calls.
273 else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
274 {
275 if (dstType == TYP_FLOAT)
276 {
277 // there is only a double helper, so we
278 // - change the dsttype to double
279 // - insert a cast from double to float
280 // - recurse into the resulting tree
281 tree->CastToType() = TYP_DOUBLE;
282 tree->gtType = TYP_DOUBLE;
283
284 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
285
286 return fgMorphTree(tree);
287 }
288 if (tree->gtFlags & GTF_UNSIGNED)
289 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
290 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
291 }
292#endif //_TARGET_ARM_
293
294#ifdef _TARGET_AMD64_
295 // Do we have to do two step U4/8 -> R4/8 ?
296 // Codegen supports the following conversion as one-step operation
297 // a) Long -> R4/R8
298 // b) U8 -> R8
299 //
300 // The following conversions are performed as two-step operations using above.
301 // U4 -> R4/8 = U4-> Long -> R4/8
302 // U8 -> R4 = U8 -> R8 -> R4
303 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
304 {
305 srcType = genUnsignedType(srcType);
306
307 if (srcType == TYP_ULONG)
308 {
309 if (dstType == TYP_FLOAT)
310 {
311 // Codegen can handle U8 -> R8 conversion.
312 // U8 -> R4 = U8 -> R8 -> R4
313 // - change the dsttype to double
314 // - insert a cast from double to float
315 // - recurse into the resulting tree
316 tree->CastToType() = TYP_DOUBLE;
317 tree->gtType = TYP_DOUBLE;
318 tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
319 return fgMorphTree(tree);
320 }
321 }
322 else if (srcType == TYP_UINT)
323 {
324 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
325 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
326 tree->gtFlags &= ~GTF_UNSIGNED;
327 }
328 }
329#endif // _TARGET_AMD64_
330
331#ifdef _TARGET_X86_
332 // Do we have to do two step U4/8 -> R4/8 ?
333 else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
334 {
335 srcType = genUnsignedType(srcType);
336
337 if (srcType == TYP_ULONG)
338 {
339 return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
340 }
341 else if (srcType == TYP_UINT)
342 {
343 oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
344 oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
345 tree->gtFlags &= ~GTF_UNSIGNED;
346 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
347 }
348 }
349 else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
350 {
351 return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
352 }
353#endif //_TARGET_X86_
354 else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
355 {
356 // We are casting away GC information. we would like to just
357 // change the type to int, however this gives the emitter fits because
358 // it believes the variable is a GC variable at the begining of the
359 // instruction group, but is not turned non-gc by the code generator
360 // we fix this by copying the GC pointer to a non-gc pointer temp.
361 noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
362
363 // We generate an assignment to an int and then do the cast from an int. With this we avoid
364 // the gc problem and we allow casts to bytes, longs, etc...
365 unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
366 oper->gtType = TYP_I_IMPL;
367 GenTree* asg = gtNewTempAssign(lclNum, oper);
368 oper->gtType = srcType;
369
370 // do the real cast
371 GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType);
372
373 // Generate the comma tree
374 oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
375
376 return fgMorphTree(oper);
377 }
378
379 // Look for narrowing casts ([u]long -> [u]int) and try to push them
380 // down into the operand before morphing it.
381 //
382 // It doesn't matter if this is cast is from ulong or long (i.e. if
383 // GTF_UNSIGNED is set) because the transformation is only applied to
384 // overflow-insensitive narrowing casts, which always silently truncate.
385 //
386 // Note that casts from [u]long to small integer types are handled above.
387 if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
388 {
389 // As a special case, look for overflow-sensitive casts of an AND
390 // expression, and see if the second operand is a small constant. Since
391 // the result of an AND is bound by its smaller operand, it may be
392 // possible to prove that the cast won't overflow, which will in turn
393 // allow the cast's operand to be transformed.
394 if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
395 {
396 GenTree* andOp2 = oper->gtOp.gtOp2;
397
398 // Special case to the special case: AND with a casted int.
399 if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
400 {
401 // gtFoldExprConst will deal with whether the cast is signed or
402 // unsigned, or overflow-sensitive.
403 andOp2 = gtFoldExprConst(andOp2);
404 oper->gtOp.gtOp2 = andOp2;
405 }
406
407 // Look for a constant less than 2^{32} for a cast to uint, or less
408 // than 2^{31} for a cast to int.
409 int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
410
411 if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
412 {
413 // This cast can't overflow.
414 tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
415 }
416 }
417
418 // Only apply this transformation during global morph,
419 // when neither the cast node nor the oper node may throw an exception
420 // based on the upper 32 bits.
421 //
422 if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
423 {
424 // For these operations the lower 32 bits of the result only depends
425 // upon the lower 32 bits of the operands.
426 //
427 bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG);
428
429 // For long LSH cast to int, there is a discontinuity in behavior
430 // when the shift amount is 32 or larger.
431 //
432 // CAST(INT, LSH(1LL, 31)) == LSH(1, 31)
433 // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31)
434 //
435 // CAST(INT, LSH(1LL, 32)) == 0
436 // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1
437 //
438 // So some extra validation is needed.
439 //
440 if (oper->OperIs(GT_LSH))
441 {
442 GenTree* shiftAmount = oper->gtOp.gtOp2;
443
444 // Expose constant value for shift, if possible, to maximize the number
445 // of cases we can handle.
446 shiftAmount = gtFoldExpr(shiftAmount);
447 oper->gtOp.gtOp2 = shiftAmount;
448
449#if DEBUG
450 // We may remorph the shift amount tree again later, so clear any morphed flag.
451 shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
452#endif // DEBUG
453
454 if (shiftAmount->IsIntegralConst())
455 {
456 const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue();
457
458 if ((shiftAmountValue >= 64) || (shiftAmountValue < 0))
459 {
460 // Shift amount is large enough or negative so result is undefined.
461 // Don't try to optimize.
462 assert(!canPushCast);
463 }
464 else if ((shiftAmountValue >= 32) && ((tree->gtFlags & GTF_ALL_EFFECT) == 0))
465 {
466 // Result of the shift is zero.
467 DEBUG_DESTROY_NODE(tree);
468 GenTree* zero = gtNewZeroConNode(TYP_INT);
469 return fgMorphTree(zero);
470 }
471 else
472 {
473 // Shift amount is positive and small enough that we can push the cast through.
474 canPushCast = true;
475 }
476 }
477 else
478 {
479 // Shift amount is unknown. We can't optimize this case.
480 assert(!canPushCast);
481 }
482 }
483
484 if (canPushCast)
485 {
486 DEBUG_DESTROY_NODE(tree);
487
488 // Insert narrowing casts for op1 and op2.
489 oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, false, dstType);
490 if (oper->gtOp.gtOp2 != nullptr)
491 {
492 oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, false, dstType);
493 }
494
495 // Clear the GT_MUL_64RSLT if it is set.
496 if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
497 {
498 oper->gtFlags &= ~GTF_MUL_64RSLT;
499 }
500
501 // The operation now produces a 32-bit result.
502 oper->gtType = TYP_INT;
503
504 // Remorph the new tree as the casts that we added may be folded away.
505 return fgMorphTree(oper);
506 }
507 }
508 }
509
510OPTIMIZECAST:
511 noway_assert(tree->gtOper == GT_CAST);
512
513 /* Morph the operand */
514 tree->gtCast.CastOp() = oper = fgMorphTree(oper);
515
516 /* Reset the call flag */
517 tree->gtFlags &= ~GTF_CALL;
518
519 /* Reset the assignment flag */
520 tree->gtFlags &= ~GTF_ASG;
521
522 /* unless we have an overflow cast, reset the except flag */
523 if (!tree->gtOverflow())
524 {
525 tree->gtFlags &= ~GTF_EXCEPT;
526 }
527
528 /* Just in case new side effects were introduced */
529 tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
530
531 if (!gtIsActiveCSE_Candidate(tree) && !gtIsActiveCSE_Candidate(oper))
532 {
533 srcType = oper->TypeGet();
534
535 /* See if we can discard the cast */
536 if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
537 {
538 if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType))
539 {
540 if (varTypeIsSmall(srcType))
541 {
542 // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the
543 // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType
544 // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is
545 // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion
546 // from u4 to i2.
547 srcType = genActualType(srcType);
548 }
549
550 srcType = genUnsignedType(srcType);
551 }
552
553 if (srcType == dstType)
554 { // Certainly if they are identical it is pointless
555 goto REMOVE_CAST;
556 }
557
558 if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
559 {
560 unsigned varNum = oper->gtLclVarCommon.gtLclNum;
561 LclVarDsc* varDsc = &lvaTable[varNum];
562 if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
563 {
564 goto REMOVE_CAST;
565 }
566 }
567
568 bool unsignedSrc = varTypeIsUnsigned(srcType);
569 bool unsignedDst = varTypeIsUnsigned(dstType);
570 bool signsDiffer = (unsignedSrc != unsignedDst);
571 unsigned srcSize = genTypeSize(srcType);
572
573 // For same sized casts with
574 // the same signs or non-overflow cast we discard them as well
575 if (srcSize == dstSize)
576 {
577 /* This should have been handled above */
578 noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
579
580 if (!signsDiffer)
581 {
582 goto REMOVE_CAST;
583 }
584
585 if (!tree->gtOverflow())
586 {
587 /* For small type casts, when necessary we force
588 the src operand to the dstType and allow the
589 implied load from memory to perform the casting */
590 if (varTypeIsSmall(srcType))
591 {
592 switch (oper->gtOper)
593 {
594 case GT_IND:
595 case GT_CLS_VAR:
596 case GT_LCL_FLD:
597 case GT_ARR_ELEM:
598 oper->gtType = dstType;
599 // We're changing the type here so we need to update the VN;
600 // in other cases we discard the cast without modifying oper
601 // so the VN doesn't change.
602 oper->SetVNsFromNode(tree);
603 goto REMOVE_CAST;
604 default:
605 break;
606 }
607 }
608 else
609 {
610 goto REMOVE_CAST;
611 }
612 }
613 }
614 else if (srcSize < dstSize) // widening cast
615 {
616 // Keep any long casts
617 if (dstSize == sizeof(int))
618 {
619 // Only keep signed to unsigned widening cast with overflow check
620 if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
621 {
622 goto REMOVE_CAST;
623 }
624 }
625
626 // Widening casts from unsigned or to signed can never overflow
627
628 if (unsignedSrc || !unsignedDst)
629 {
630 tree->gtFlags &= ~GTF_OVERFLOW;
631 if (!(oper->gtFlags & GTF_EXCEPT))
632 {
633 tree->gtFlags &= ~GTF_EXCEPT;
634 }
635 }
636 }
637 else // if (srcSize > dstSize)
638 {
639 // Try to narrow the operand of the cast and discard the cast
640 // Note: Do not narrow a cast that is marked as a CSE
641 // And do not narrow if the oper is marked as a CSE either
642 //
643 if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
644 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
645 {
646 optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
647
648 /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
649 if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
650 {
651 oper = oper->gtCast.CastOp();
652 }
653 goto REMOVE_CAST;
654 }
655 }
656 }
657
658 switch (oper->gtOper)
659 {
660 /* If the operand is a constant, we'll fold it */
661 case GT_CNS_INT:
662 case GT_CNS_LNG:
663 case GT_CNS_DBL:
664 case GT_CNS_STR:
665 {
666 GenTree* oldTree = tree;
667
668 tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
669
670 // Did we get a comma throw as a result of gtFoldExprConst?
671 if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
672 {
673 noway_assert(fgIsCommaThrow(tree));
674 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
675 fgMorphTreeDone(tree);
676 return tree;
677 }
678 else if (tree->gtOper != GT_CAST)
679 {
680 return tree;
681 }
682
683 noway_assert(tree->gtCast.CastOp() == oper); // unchanged
684 }
685 break;
686
687 case GT_CAST:
688 /* Check for two consecutive casts into the same dstType */
689 if (!tree->gtOverflow())
690 {
691 var_types dstType2 = oper->CastToType();
692 if (dstType == dstType2)
693 {
694 goto REMOVE_CAST;
695 }
696 }
697 break;
698
699 case GT_COMMA:
700 // Check for cast of a GT_COMMA with a throw overflow
701 // Bug 110829: Since this optimization will bash the types
702 // neither oper or commaOp2 can be CSE candidates
703 if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
704 {
705 GenTree* commaOp2 = oper->gtOp.gtOp2;
706
707 if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
708 {
709 // need type of oper to be same as tree
710 if (tree->gtType == TYP_LONG)
711 {
712 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
713 commaOp2->gtIntConCommon.SetLngValue(0);
714 /* Change the types of oper and commaOp2 to TYP_LONG */
715 oper->gtType = commaOp2->gtType = TYP_LONG;
716 }
717 else if (varTypeIsFloating(tree->gtType))
718 {
719 commaOp2->ChangeOperConst(GT_CNS_DBL);
720 commaOp2->gtDblCon.gtDconVal = 0.0;
721 // Change the types of oper and commaOp2
722 oper->gtType = commaOp2->gtType = tree->gtType;
723 }
724 else
725 {
726 commaOp2->ChangeOperConst(GT_CNS_INT);
727 commaOp2->gtIntCon.gtIconVal = 0;
728 /* Change the types of oper and commaOp2 to TYP_INT */
729 oper->gtType = commaOp2->gtType = TYP_INT;
730 }
731 }
732
733 if (vnStore != nullptr)
734 {
735 fgValueNumberTreeConst(commaOp2);
736 }
737
738 /* Return the GT_COMMA node as the new tree */
739 return oper;
740 }
741 break;
742
743 default:
744 break;
745 } /* end switch (oper->gtOper) */
746 }
747
748 if (tree->gtOverflow())
749 {
750 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
751 }
752
753 return tree;
754
755REMOVE_CAST:
756 /* Here we've eliminated the cast, so just return it's operand */
757 assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
758
759 DEBUG_DESTROY_NODE(tree);
760 return oper;
761}
762#ifdef _PREFAST_
763#pragma warning(pop)
764#endif
765
766/*****************************************************************************
767 *
768 * Perform an unwrap operation on a Proxy object
769 */
770
771GenTree* Compiler::fgUnwrapProxy(GenTree* objRef)
772{
773 assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
774
775 CORINFO_EE_INFO* pInfo = eeGetEEInfo();
776 GenTree* addTree;
777
778 // Perform the unwrap:
779 //
780 // This requires two extra indirections.
781 // We mark these indirections as 'invariant' and
782 // the CSE logic will hoist them when appropriate.
783 //
784 // Note that each dereference is a GC pointer
785
786 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
787
788 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
789 objRef->gtFlags |= GTF_IND_INVARIANT;
790
791 addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
792
793 objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
794 objRef->gtFlags |= GTF_IND_INVARIANT;
795
796 // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
797 return objRef;
798}
799
800/*****************************************************************************
801 *
802 * Morph an argument list; compute the pointer argument count in the process.
803 *
804 * NOTE: This function can be called from any place in the JIT to perform re-morphing
805 * due to graph altering modifications such as copy / constant propagation
806 */
807
808unsigned UpdateGT_LISTFlags(GenTree* tree)
809{
810 assert(tree->gtOper == GT_LIST);
811
812 unsigned flags = 0;
813 if (tree->gtOp.gtOp2)
814 {
815 flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
816 }
817
818 flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
819
820 tree->gtFlags &= ~GTF_ALL_EFFECT;
821 tree->gtFlags |= flags;
822
823 return tree->gtFlags;
824}
825
826#ifdef DEBUG
827void fgArgTabEntry::Dump()
828{
829 printf("fgArgTabEntry[arg %u", argNum);
830 printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper));
831 if (regNum != REG_STK)
832 {
833 printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s");
834 for (unsigned i = 0; i < numRegs; i++)
835 {
836 printf(" %s", getRegName(regNums[i]));
837 }
838 }
839 if (numSlots > 0)
840 {
841 printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
842 }
843 printf(", align=%u", alignment);
844 if (isLateArg())
845 {
846 printf(", lateArgInx=%u", lateArgInx);
847 }
848 if (isSplit)
849 {
850 printf(", isSplit");
851 }
852 if (needTmp)
853 {
854 printf(", tmpNum=V%02u", tmpNum);
855 }
856 if (needPlace)
857 {
858 printf(", needPlace");
859 }
860 if (isTmp)
861 {
862 printf(", isTmp");
863 }
864 if (processed)
865 {
866 printf(", processed");
867 }
868 if (isHfaRegArg)
869 {
870 printf(", isHfa");
871 }
872 if (isBackFilled)
873 {
874 printf(", isBackFilled");
875 }
876 if (isNonStandard)
877 {
878 printf(", isNonStandard");
879 }
880 if (isStruct)
881 {
882 printf(", isStruct");
883 }
884 printf("]\n");
885}
886#endif
887
888fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
889{
890 compiler = comp;
891 callTree = call;
892 argCount = 0; // filled in arg count, starts at zero
893 nextSlotNum = INIT_ARG_STACK_SLOT;
894 stkLevel = 0;
895#if defined(UNIX_X86_ABI)
896 alignmentDone = false;
897 stkSizeBytes = 0;
898 padStkAlign = 0;
899#endif
900#if FEATURE_FIXED_OUT_ARGS
901 outArgSize = 0;
902#endif
903
904 argTableSize = numArgs; // the allocated table size
905
906 hasRegArgs = false;
907 hasStackArgs = false;
908 argsComplete = false;
909 argsSorted = false;
910
911 if (argTableSize == 0)
912 {
913 argTable = nullptr;
914 }
915 else
916 {
917 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
918 }
919}
920
921/*****************************************************************************
922 *
923 * fgArgInfo Copy Constructor
924 *
925 * This method needs to act like a copy constructor for fgArgInfo.
926 * The newCall needs to have its fgArgInfo initialized such that
927 * we have newCall that is an exact copy of the oldCall.
928 * We have to take care since the argument information
929 * in the argTable contains pointers that must point to the
930 * new arguments and not the old arguments.
931 */
932fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
933{
934 fgArgInfo* oldArgInfo = oldCall->gtCall.fgArgInfo;
935
936 compiler = oldArgInfo->compiler;
937 callTree = newCall;
938 argCount = 0; // filled in arg count, starts at zero
939 nextSlotNum = INIT_ARG_STACK_SLOT;
940 stkLevel = oldArgInfo->stkLevel;
941#if defined(UNIX_X86_ABI)
942 alignmentDone = oldArgInfo->alignmentDone;
943 stkSizeBytes = oldArgInfo->stkSizeBytes;
944 padStkAlign = oldArgInfo->padStkAlign;
945#endif
946#if FEATURE_FIXED_OUT_ARGS
947 outArgSize = oldArgInfo->outArgSize;
948#endif
949 argTableSize = oldArgInfo->argTableSize;
950 argsComplete = false;
951 argTable = nullptr;
952 if (argTableSize > 0)
953 {
954 argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize];
955 for (unsigned inx = 0; inx < argTableSize; inx++)
956 {
957 argTable[inx] = nullptr;
958 }
959 }
960
961 assert(oldArgInfo->argsComplete);
962
963 // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
964 // so we can iterate over these argument lists more uniformly.
965 // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
966 GenTreeArgList* newArgs;
967 GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
968 GenTreeArgList* oldArgs;
969 GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
970
971 if (newCall->gtCallObjp == nullptr)
972 {
973 assert(oldCall->gtCallObjp == nullptr);
974 newArgs = newCall->gtCallArgs;
975 oldArgs = oldCall->gtCallArgs;
976 }
977 else
978 {
979 assert(oldCall->gtCallObjp != nullptr);
980 newArgObjp.Current() = newCall->gtCallArgs;
981 newArgs = &newArgObjp;
982 oldArgObjp.Current() = oldCall->gtCallObjp;
983 oldArgs = &oldArgObjp;
984 }
985
986 GenTree* newCurr;
987 GenTree* oldCurr;
988 GenTreeArgList* newParent = nullptr;
989 GenTreeArgList* oldParent = nullptr;
990 fgArgTabEntry** oldArgTable = oldArgInfo->argTable;
991 bool scanRegArgs = false;
992
993 while (newArgs)
994 {
995 /* Get hold of the next argument values for the oldCall and newCall */
996
997 newCurr = newArgs->Current();
998 oldCurr = oldArgs->Current();
999 if (newArgs != &newArgObjp)
1000 {
1001 newParent = newArgs;
1002 oldParent = oldArgs;
1003 }
1004 else
1005 {
1006 assert(newParent == nullptr && oldParent == nullptr);
1007 }
1008 newArgs = newArgs->Rest();
1009 oldArgs = oldArgs->Rest();
1010
1011 fgArgTabEntry* oldArgTabEntry = nullptr;
1012 fgArgTabEntry* newArgTabEntry = nullptr;
1013
1014 for (unsigned inx = 0; inx < argTableSize; inx++)
1015 {
1016 oldArgTabEntry = oldArgTable[inx];
1017
1018 if (oldArgTabEntry->parent == oldParent)
1019 {
1020 assert((oldParent == nullptr) == (newParent == nullptr));
1021
1022 // We have found the matching "parent" field in oldArgTabEntry
1023
1024 newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1025
1026 // First block copy all fields
1027 //
1028 *newArgTabEntry = *oldArgTabEntry;
1029
1030 // Then update all GenTree* fields in the newArgTabEntry
1031 //
1032 newArgTabEntry->parent = newParent;
1033
1034 // The node field is likely to have been updated
1035 // to point at a node in the gtCallLateArgs list
1036 //
1037 if (oldArgTabEntry->node == oldCurr)
1038 {
1039 // node is not pointing into the gtCallLateArgs list
1040 newArgTabEntry->node = newCurr;
1041 }
1042 else
1043 {
1044 // node must be pointing into the gtCallLateArgs list
1045 //
1046 // We will fix this pointer up in the next loop
1047 //
1048 newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
1049
1050 scanRegArgs = true;
1051 }
1052
1053 // Now initialize the proper element in the argTable array
1054 //
1055 argTable[inx] = newArgTabEntry;
1056 break;
1057 }
1058 }
1059 // We should have found the matching oldArgTabEntry and created the newArgTabEntry
1060 //
1061 assert(newArgTabEntry != nullptr);
1062 }
1063
1064 if (scanRegArgs)
1065 {
1066 newArgs = newCall->gtCallLateArgs;
1067 oldArgs = oldCall->gtCallLateArgs;
1068
1069 while (newArgs)
1070 {
1071 /* Get hold of the next argument values for the oldCall and newCall */
1072
1073 assert(newArgs->OperIsList());
1074
1075 newCurr = newArgs->Current();
1076 newArgs = newArgs->Rest();
1077
1078 assert(oldArgs->OperIsList());
1079
1080 oldCurr = oldArgs->Current();
1081 oldArgs = oldArgs->Rest();
1082
1083 fgArgTabEntry* oldArgTabEntry = nullptr;
1084 fgArgTabEntry* newArgTabEntry = nullptr;
1085
1086 for (unsigned inx = 0; inx < argTableSize; inx++)
1087 {
1088 oldArgTabEntry = oldArgTable[inx];
1089
1090 if (oldArgTabEntry->node == oldCurr)
1091 {
1092 // We have found the matching "node" field in oldArgTabEntry
1093
1094 newArgTabEntry = argTable[inx];
1095 assert(newArgTabEntry != nullptr);
1096
1097 // update the "node" GenTree* fields in the newArgTabEntry
1098 //
1099 assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
1100
1101 newArgTabEntry->node = newCurr;
1102 break;
1103 }
1104 }
1105 }
1106 }
1107
1108 argCount = oldArgInfo->argCount;
1109 nextSlotNum = oldArgInfo->nextSlotNum;
1110 hasRegArgs = oldArgInfo->hasRegArgs;
1111 hasStackArgs = oldArgInfo->hasStackArgs;
1112 argsComplete = true;
1113 argsSorted = true;
1114}
1115
1116void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry)
1117{
1118 assert(argCount < argTableSize);
1119 argTable[argCount] = curArgTabEntry;
1120 argCount++;
1121}
1122
1123fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
1124 GenTree* node,
1125 GenTree* parent,
1126 regNumber regNum,
1127 unsigned numRegs,
1128 unsigned alignment,
1129 bool isStruct,
1130 bool isVararg /*=false*/)
1131{
1132 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1133
1134 // Any additional register numbers are set by the caller.
1135 // This is primarily because on ARM we don't yet know if it
1136 // will be split or if it is a double HFA, so the number of registers
1137 // may actually be less.
1138 curArgTabEntry->setRegNum(0, regNum);
1139
1140 curArgTabEntry->argNum = argNum;
1141 curArgTabEntry->node = node;
1142 curArgTabEntry->parent = parent;
1143 curArgTabEntry->slotNum = 0;
1144 curArgTabEntry->numRegs = numRegs;
1145 curArgTabEntry->numSlots = 0;
1146 curArgTabEntry->alignment = alignment;
1147 curArgTabEntry->lateArgInx = UINT_MAX;
1148 curArgTabEntry->tmpNum = BAD_VAR_NUM;
1149 curArgTabEntry->isSplit = false;
1150 curArgTabEntry->isTmp = false;
1151 curArgTabEntry->needTmp = false;
1152 curArgTabEntry->needPlace = false;
1153 curArgTabEntry->processed = false;
1154#ifdef FEATURE_HFA
1155 curArgTabEntry->_isHfaArg = false;
1156#endif
1157 curArgTabEntry->isBackFilled = false;
1158 curArgTabEntry->isNonStandard = false;
1159 curArgTabEntry->isStruct = isStruct;
1160 curArgTabEntry->isVararg = isVararg;
1161
1162 hasRegArgs = true;
1163 AddArg(curArgTabEntry);
1164 return curArgTabEntry;
1165}
1166
1167#if defined(UNIX_AMD64_ABI)
1168fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
1169 GenTree* node,
1170 GenTree* parent,
1171 regNumber regNum,
1172 unsigned numRegs,
1173 unsigned alignment,
1174 const bool isStruct,
1175 const bool isVararg,
1176 const regNumber otherRegNum,
1177 const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
1178{
1179 fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct, isVararg);
1180 assert(curArgTabEntry != nullptr);
1181
1182 curArgTabEntry->isStruct = isStruct; // is this a struct arg
1183
1184 curArgTabEntry->checkIsStruct();
1185 assert(numRegs <= 2);
1186 if (numRegs == 2)
1187 {
1188 curArgTabEntry->setRegNum(1, otherRegNum);
1189 }
1190
1191 if (isStruct && structDescPtr != nullptr)
1192 {
1193 curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
1194 }
1195
1196 return curArgTabEntry;
1197}
1198#endif // defined(UNIX_AMD64_ABI)
1199
1200fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
1201 GenTree* node,
1202 GenTree* parent,
1203 unsigned numSlots,
1204 unsigned alignment,
1205 bool isStruct,
1206 bool isVararg /*=false*/)
1207{
1208 fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
1209
1210 nextSlotNum = roundUp(nextSlotNum, alignment);
1211
1212 curArgTabEntry->setRegNum(0, REG_STK);
1213 curArgTabEntry->argNum = argNum;
1214 curArgTabEntry->node = node;
1215 curArgTabEntry->parent = parent;
1216 curArgTabEntry->slotNum = nextSlotNum;
1217 curArgTabEntry->numRegs = 0;
1218 curArgTabEntry->numSlots = numSlots;
1219 curArgTabEntry->alignment = alignment;
1220 curArgTabEntry->lateArgInx = UINT_MAX;
1221 curArgTabEntry->tmpNum = BAD_VAR_NUM;
1222 curArgTabEntry->isSplit = false;
1223 curArgTabEntry->isTmp = false;
1224 curArgTabEntry->needTmp = false;
1225 curArgTabEntry->needPlace = false;
1226 curArgTabEntry->processed = false;
1227#ifdef FEATURE_HFA
1228 curArgTabEntry->_isHfaArg = false;
1229#endif
1230 curArgTabEntry->isBackFilled = false;
1231 curArgTabEntry->isNonStandard = false;
1232 curArgTabEntry->isStruct = isStruct;
1233 curArgTabEntry->isVararg = isVararg;
1234
1235 hasStackArgs = true;
1236 AddArg(curArgTabEntry);
1237
1238 nextSlotNum += numSlots;
1239 return curArgTabEntry;
1240}
1241
1242void fgArgInfo::RemorphReset()
1243{
1244 nextSlotNum = INIT_ARG_STACK_SLOT;
1245}
1246
1247//------------------------------------------------------------------------
1248// UpdateRegArg: Update the given fgArgTabEntry while morphing.
1249//
1250// Arguments:
1251// curArgTabEntry - the fgArgTabEntry to update.
1252// node - the tree node that defines the argument
1253// reMorphing - a boolean value indicate whether we are remorphing the call
1254//
1255// Assumptions:
1256// This must have already been determined to be at least partially passed in registers.
1257//
1258void fgArgInfo::UpdateRegArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing)
1259{
1260 bool isLateArg = curArgTabEntry->isLateArg();
1261 // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa.
1262 assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) ||
1263 (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0)));
1264
1265 assert(curArgTabEntry->numRegs != 0);
1266
1267 if (curArgTabEntry->parent != nullptr)
1268 {
1269 assert(curArgTabEntry->parent->OperIsList());
1270 assert(curArgTabEntry->parent->Current() == node);
1271 }
1272
1273 if (curArgTabEntry->node != node)
1274 {
1275 if (reMorphing)
1276 {
1277 // Find the arg in the late args list.
1278 GenTree* argx = Compiler::gtArgNodeByLateArgInx(callTree, curArgTabEntry->lateArgInx);
1279 if (curArgTabEntry->node != argx)
1280 {
1281 curArgTabEntry->node = argx;
1282 }
1283 }
1284 else
1285 {
1286 assert(!isLateArg);
1287 curArgTabEntry->node = node;
1288 }
1289 }
1290}
1291
1292//------------------------------------------------------------------------
1293// UpdateStkArg: Update the given fgArgTabEntry while morphing.
1294//
1295// Arguments:
1296// curArgTabEntry - the fgArgTabEntry to update.
1297// node - the tree node that defines the argument
1298// reMorphing - a boolean value indicate whether we are remorphing the call
1299//
1300// Assumptions:
1301// This must have already been determined to be passed on the stack.
1302//
1303void fgArgInfo::UpdateStkArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing)
1304{
1305 bool isLateArg = curArgTabEntry->isLateArg();
1306 // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa.
1307 assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) ||
1308 (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0)));
1309
1310 noway_assert(curArgTabEntry->parent != nullptr);
1311 assert((curArgTabEntry->regNum == REG_STK) || curArgTabEntry->isSplit);
1312 assert(curArgTabEntry->parent->OperIsList());
1313 assert(curArgTabEntry->parent->Current() == node);
1314 nextSlotNum = (unsigned)roundUp(nextSlotNum, curArgTabEntry->alignment);
1315 assert(curArgTabEntry->slotNum == nextSlotNum);
1316
1317 if (curArgTabEntry->node != node)
1318 {
1319#if FEATURE_FIXED_OUT_ARGS
1320 if (isLateArg)
1321 {
1322 GenTree* argx = nullptr;
1323 unsigned lateArgInx = curArgTabEntry->lateArgInx;
1324
1325 // Traverse the late argument list to find this argument so that we can update it.
1326 unsigned listInx = 0;
1327 for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), listInx++)
1328 {
1329 argx = list->Current();
1330 assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
1331 if (listInx == lateArgInx)
1332 {
1333 break;
1334 }
1335 }
1336 assert(listInx == lateArgInx);
1337 assert(lateArgInx == curArgTabEntry->lateArgInx);
1338
1339 if (curArgTabEntry->node != argx)
1340 {
1341 curArgTabEntry->node = argx;
1342 }
1343 }
1344 else
1345#endif // FEATURE_FIXED_OUT_ARGS
1346 {
1347 curArgTabEntry->node = node;
1348 }
1349 }
1350 nextSlotNum += curArgTabEntry->numSlots;
1351}
1352
1353void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
1354{
1355 fgArgTabEntry* curArgTabEntry = nullptr;
1356 assert(argNum < argCount);
1357 for (unsigned inx = 0; inx < argCount; inx++)
1358 {
1359 curArgTabEntry = argTable[inx];
1360 if (curArgTabEntry->argNum == argNum)
1361 {
1362 break;
1363 }
1364 }
1365
1366 assert(numRegs > 0);
1367 assert(numSlots > 0);
1368
1369 if (argsComplete)
1370 {
1371 assert(curArgTabEntry->isSplit == true);
1372 assert(curArgTabEntry->numRegs == numRegs);
1373 assert(curArgTabEntry->numSlots == numSlots);
1374 assert(hasStackArgs == true);
1375 }
1376 else
1377 {
1378 curArgTabEntry->isSplit = true;
1379 curArgTabEntry->numRegs = numRegs;
1380 curArgTabEntry->numSlots = numSlots;
1381 hasStackArgs = true;
1382 }
1383 nextSlotNum += numSlots;
1384}
1385
1386//------------------------------------------------------------------------
1387// EvalToTmp: Replace the node in the given fgArgTabEntry with a temp
1388//
1389// Arguments:
1390// curArgTabEntry - the fgArgTabEntry for the argument
1391// tmpNum - the varNum for the temp
1392// newNode - the assignment of the argument value to the temp
1393//
1394// Notes:
1395// Although the name of this method is EvalToTmp, it doesn't actually create
1396// the temp or the copy.
1397//
1398void fgArgInfo::EvalToTmp(fgArgTabEntry* curArgTabEntry, unsigned tmpNum, GenTree* newNode)
1399{
1400 assert(curArgTabEntry->parent->Current() == newNode);
1401
1402 curArgTabEntry->node = newNode;
1403 curArgTabEntry->tmpNum = tmpNum;
1404 curArgTabEntry->isTmp = true;
1405}
1406
1407void fgArgInfo::ArgsComplete()
1408{
1409 bool hasStackArgs = false;
1410 bool hasStructRegArg = false;
1411
1412 for (unsigned curInx = 0; curInx < argCount; curInx++)
1413 {
1414 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1415 assert(curArgTabEntry != nullptr);
1416 GenTree* argx = curArgTabEntry->node;
1417
1418 if (curArgTabEntry->regNum == REG_STK)
1419 {
1420 hasStackArgs = true;
1421#if !FEATURE_FIXED_OUT_ARGS
1422 // On x86 we use push instructions to pass arguments:
1423 // The non-register arguments are evaluated and pushed in order
1424 // and they are never evaluated into temps
1425 //
1426 continue;
1427#endif
1428 }
1429#if FEATURE_ARG_SPLIT
1430 else if (curArgTabEntry->isSplit)
1431 {
1432 hasStructRegArg = true;
1433 hasStackArgs = true;
1434 }
1435#endif // FEATURE_ARG_SPLIT
1436 else // we have a register argument, next we look for a struct type.
1437 {
1438 if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct))
1439 {
1440 hasStructRegArg = true;
1441 }
1442 }
1443
1444 /* If the argument tree contains an assignment (GTF_ASG) then the argument and
1445 and every earlier argument (except constants) must be evaluated into temps
1446 since there may be other arguments that follow and they may use the value being assigned.
1447
1448 EXAMPLE: ArgTab is "a, a=5, a"
1449 -> when we see the second arg "a=5"
1450 we know the first two arguments "a, a=5" have to be evaluated into temps
1451
1452 For the case of an assignment, we only know that there exist some assignment someplace
1453 in the tree. We don't know what is being assigned so we are very conservative here
1454 and assume that any local variable could have been assigned.
1455 */
1456
1457 if (argx->gtFlags & GTF_ASG)
1458 {
1459 // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
1460 // a tmp, then we need a temp in the late arg list.
1461 if ((argCount > 1) || argx->OperIsCopyBlkOp()
1462#ifdef FEATURE_FIXED_OUT_ARGS
1463 || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
1464 // that we only have late non-register args when that feature is on.
1465#endif // FEATURE_FIXED_OUT_ARGS
1466 )
1467 {
1468 curArgTabEntry->needTmp = true;
1469 }
1470
1471 // For all previous arguments, unless they are a simple constant
1472 // we require that they be evaluated into temps
1473 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1474 {
1475 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1476 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1477
1478 assert(prevArgTabEntry->node);
1479 if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
1480 {
1481 prevArgTabEntry->needTmp = true;
1482 }
1483 }
1484 }
1485
1486 bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0);
1487#if FEATURE_FIXED_OUT_ARGS
1488 // Like calls, if this argument has a tree that will do an inline throw,
1489 // a call to a jit helper, then we need to treat it like a call (but only
1490 // if there are/were any stack args).
1491 // This means unnesting, sorting, etc. Technically this is overly
1492 // conservative, but I want to avoid as much special-case debug-only code
1493 // as possible, so leveraging the GTF_CALL flag is the easiest.
1494 //
1495 if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && compiler->opts.compDbgCode &&
1496 (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
1497 {
1498 for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
1499 {
1500 if (otherInx == curInx)
1501 {
1502 continue;
1503 }
1504
1505 if (argTable[otherInx]->regNum == REG_STK)
1506 {
1507 treatLikeCall = true;
1508 break;
1509 }
1510 }
1511 }
1512#endif // FEATURE_FIXED_OUT_ARGS
1513
1514 /* If it contains a call (GTF_CALL) then itself and everything before the call
1515 with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
1516 has to be kept in the right order since we will move the call to the first position)
1517
1518 For calls we don't have to be quite as conservative as we are with an assignment
1519 since the call won't be modifying any non-address taken LclVars.
1520 */
1521
1522 if (treatLikeCall)
1523 {
1524 if (argCount > 1) // If this is not the only argument
1525 {
1526 curArgTabEntry->needTmp = true;
1527 }
1528 else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
1529 {
1530 // Spill all arguments that are floating point calls
1531 curArgTabEntry->needTmp = true;
1532 }
1533
1534 // All previous arguments may need to be evaluated into temps
1535 for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
1536 {
1537 fgArgTabEntry* prevArgTabEntry = argTable[prevInx];
1538 assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
1539 assert(prevArgTabEntry->node);
1540
1541 // For all previous arguments, if they have any GTF_ALL_EFFECT
1542 // we require that they be evaluated into a temp
1543 if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
1544 {
1545 prevArgTabEntry->needTmp = true;
1546 }
1547#if FEATURE_FIXED_OUT_ARGS
1548 // Or, if they are stored into the FIXED_OUT_ARG area
1549 // we require that they be moved to the gtCallLateArgs
1550 // and replaced with a placeholder node
1551 else if (prevArgTabEntry->regNum == REG_STK)
1552 {
1553 prevArgTabEntry->needPlace = true;
1554 }
1555#if FEATURE_ARG_SPLIT
1556 else if (prevArgTabEntry->isSplit)
1557 {
1558 prevArgTabEntry->needPlace = true;
1559 }
1560#endif // _TARGET_ARM_
1561#endif
1562 }
1563 }
1564
1565#if FEATURE_MULTIREG_ARGS
1566 // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
1567 // with multiple indirections, so here we consider spilling it into a tmp LclVar.
1568 //
1569 CLANG_FORMAT_COMMENT_ANCHOR;
1570#ifdef _TARGET_ARM_
1571 bool isMultiRegArg = (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->numSlots > 1);
1572#else
1573 bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
1574#endif
1575
1576 if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false))
1577 {
1578 if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
1579 {
1580 // Spill multireg struct arguments that have Assignments or Calls embedded in them
1581 curArgTabEntry->needTmp = true;
1582 }
1583 else
1584 {
1585 // We call gtPrepareCost to measure the cost of evaluating this tree
1586 compiler->gtPrepareCost(argx);
1587
1588 if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
1589 {
1590 // Spill multireg struct arguments that are expensive to evaluate twice
1591 curArgTabEntry->needTmp = true;
1592 }
1593#if defined(FEATURE_SIMD) && defined(_TARGET_ARM64_)
1594 else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet()))
1595 {
1596 // SIMD types do not need the optimization below due to their sizes
1597 if (argx->OperIsSIMDorSimdHWintrinsic() ||
1598 (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) &&
1599 argx->AsObj()->gtOp1->gtOp.gtOp1->OperIsSIMDorSimdHWintrinsic()))
1600 {
1601 curArgTabEntry->needTmp = true;
1602 }
1603 }
1604#endif
1605#ifndef _TARGET_ARM_
1606 // TODO-Arm: This optimization is not implemented for ARM32
1607 // so we skip this for ARM32 until it is ported to use RyuJIT backend
1608 //
1609 else if (argx->OperGet() == GT_OBJ)
1610 {
1611 GenTreeObj* argObj = argx->AsObj();
1612 CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
1613 unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
1614 switch (structSize)
1615 {
1616 case 3:
1617 case 5:
1618 case 6:
1619 case 7:
1620 // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
1621 //
1622 if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
1623 {
1624 // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
1625 // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
1626 //
1627 curArgTabEntry->needTmp = true;
1628 }
1629 break;
1630 case 11:
1631 case 13:
1632 case 14:
1633 case 15:
1634 // Spill any GT_OBJ multireg structs that are difficult to extract
1635 //
1636 // When we have a GT_OBJ of a struct with the above sizes we would need
1637 // to use 3 or 4 load instructions to load the exact size of this struct.
1638 // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
1639 // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
1640 // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
1641 // the argument.
1642 //
1643 curArgTabEntry->needTmp = true;
1644 break;
1645
1646 default:
1647 break;
1648 }
1649 }
1650#endif // !_TARGET_ARM_
1651 }
1652 }
1653#endif // FEATURE_MULTIREG_ARGS
1654 }
1655
1656 // We only care because we can't spill structs and qmarks involve a lot of spilling, but
1657 // if we don't have qmarks, then it doesn't matter.
1658 // So check for Qmark's globally once here, instead of inside the loop.
1659 //
1660 const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
1661
1662#if FEATURE_FIXED_OUT_ARGS
1663
1664 // For Arm/x64 we only care because we can't reorder a register
1665 // argument that uses GT_LCLHEAP. This is an optimization to
1666 // save a check inside the below loop.
1667 //
1668 const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
1669
1670#else
1671
1672 const bool hasStackArgsWeCareAbout = hasStackArgs;
1673
1674#endif // FEATURE_FIXED_OUT_ARGS
1675
1676 // If we have any stack args we have to force the evaluation
1677 // of any arguments passed in registers that might throw an exception
1678 //
1679 // Technically we only a required to handle the following two cases:
1680 // a GT_IND with GTF_IND_RNGCHK (only on x86) or
1681 // a GT_LCLHEAP node that allocates stuff on the stack
1682 //
1683 if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
1684 {
1685 for (unsigned curInx = 0; curInx < argCount; curInx++)
1686 {
1687 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1688 assert(curArgTabEntry != nullptr);
1689 GenTree* argx = curArgTabEntry->node;
1690
1691 // Examine the register args that are currently not marked needTmp
1692 //
1693 if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
1694 {
1695 if (hasStackArgsWeCareAbout)
1696 {
1697#if !FEATURE_FIXED_OUT_ARGS
1698 // On x86 we previously recorded a stack depth of zero when
1699 // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
1700 // Thus we can not reorder the argument after any stack based argument
1701 // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
1702 // check for it explicitly.)
1703 //
1704 if (argx->gtFlags & GTF_EXCEPT)
1705 {
1706 curArgTabEntry->needTmp = true;
1707 continue;
1708 }
1709#else
1710 // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
1711 //
1712 if (argx->gtFlags & GTF_EXCEPT)
1713 {
1714 assert(compiler->compLocallocUsed);
1715
1716 // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
1717 //
1718 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
1719 {
1720 curArgTabEntry->needTmp = true;
1721 continue;
1722 }
1723 }
1724#endif
1725 }
1726 if (hasStructRegArgWeCareAbout)
1727 {
1728 // Returns true if a GT_QMARK node is encountered in the argx tree
1729 //
1730 if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
1731 {
1732 curArgTabEntry->needTmp = true;
1733 continue;
1734 }
1735 }
1736 }
1737 }
1738 }
1739
1740 argsComplete = true;
1741}
1742
1743void fgArgInfo::SortArgs()
1744{
1745 assert(argsComplete == true);
1746
1747#ifdef DEBUG
1748 if (compiler->verbose)
1749 {
1750 printf("\nSorting the arguments:\n");
1751 }
1752#endif
1753
1754 /* Shuffle the arguments around before we build the gtCallLateArgs list.
1755 The idea is to move all "simple" arguments like constants and local vars
1756 to the end of the table, and move the complex arguments towards the beginning
1757 of the table. This will help prevent registers from being spilled by
1758 allowing us to evaluate the more complex arguments before the simpler arguments.
1759 The argTable ends up looking like:
1760 +------------------------------------+ <--- argTable[argCount - 1]
1761 | constants |
1762 +------------------------------------+
1763 | local var / local field |
1764 +------------------------------------+
1765 | remaining arguments sorted by cost |
1766 +------------------------------------+
1767 | temps (argTable[].needTmp = true) |
1768 +------------------------------------+
1769 | args with calls (GTF_CALL) |
1770 +------------------------------------+ <--- argTable[0]
1771 */
1772
1773 /* Set the beginning and end for the new argument table */
1774 unsigned curInx;
1775 int regCount = 0;
1776 unsigned begTab = 0;
1777 unsigned endTab = argCount - 1;
1778 unsigned argsRemaining = argCount;
1779
1780 // First take care of arguments that are constants.
1781 // [We use a backward iterator pattern]
1782 //
1783 curInx = argCount;
1784 do
1785 {
1786 curInx--;
1787
1788 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1789
1790 if (curArgTabEntry->regNum != REG_STK)
1791 {
1792 regCount++;
1793 }
1794
1795 // Skip any already processed args
1796 //
1797 if (!curArgTabEntry->processed)
1798 {
1799 GenTree* argx = curArgTabEntry->node;
1800
1801 // put constants at the end of the table
1802 //
1803 if (argx->gtOper == GT_CNS_INT)
1804 {
1805 noway_assert(curInx <= endTab);
1806
1807 curArgTabEntry->processed = true;
1808
1809 // place curArgTabEntry at the endTab position by performing a swap
1810 //
1811 if (curInx != endTab)
1812 {
1813 argTable[curInx] = argTable[endTab];
1814 argTable[endTab] = curArgTabEntry;
1815 }
1816
1817 endTab--;
1818 argsRemaining--;
1819 }
1820 }
1821 } while (curInx > 0);
1822
1823 if (argsRemaining > 0)
1824 {
1825 // Next take care of arguments that are calls.
1826 // [We use a forward iterator pattern]
1827 //
1828 for (curInx = begTab; curInx <= endTab; curInx++)
1829 {
1830 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1831
1832 // Skip any already processed args
1833 //
1834 if (!curArgTabEntry->processed)
1835 {
1836 GenTree* argx = curArgTabEntry->node;
1837
1838 // put calls at the beginning of the table
1839 //
1840 if (argx->gtFlags & GTF_CALL)
1841 {
1842 curArgTabEntry->processed = true;
1843
1844 // place curArgTabEntry at the begTab position by performing a swap
1845 //
1846 if (curInx != begTab)
1847 {
1848 argTable[curInx] = argTable[begTab];
1849 argTable[begTab] = curArgTabEntry;
1850 }
1851
1852 begTab++;
1853 argsRemaining--;
1854 }
1855 }
1856 }
1857 }
1858
1859 if (argsRemaining > 0)
1860 {
1861 // Next take care arguments that are temps.
1862 // These temps come before the arguments that are
1863 // ordinary local vars or local fields
1864 // since this will give them a better chance to become
1865 // enregistered into their actual argument register.
1866 // [We use a forward iterator pattern]
1867 //
1868 for (curInx = begTab; curInx <= endTab; curInx++)
1869 {
1870 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1871
1872 // Skip any already processed args
1873 //
1874 if (!curArgTabEntry->processed)
1875 {
1876 if (curArgTabEntry->needTmp)
1877 {
1878 curArgTabEntry->processed = true;
1879
1880 // place curArgTabEntry at the begTab position by performing a swap
1881 //
1882 if (curInx != begTab)
1883 {
1884 argTable[curInx] = argTable[begTab];
1885 argTable[begTab] = curArgTabEntry;
1886 }
1887
1888 begTab++;
1889 argsRemaining--;
1890 }
1891 }
1892 }
1893 }
1894
1895 if (argsRemaining > 0)
1896 {
1897 // Next take care of local var and local field arguments.
1898 // These are moved towards the end of the argument evaluation.
1899 // [We use a backward iterator pattern]
1900 //
1901 curInx = endTab + 1;
1902 do
1903 {
1904 curInx--;
1905
1906 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1907
1908 // Skip any already processed args
1909 //
1910 if (!curArgTabEntry->processed)
1911 {
1912 GenTree* argx = curArgTabEntry->node;
1913
1914 if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
1915 {
1916 noway_assert(curInx <= endTab);
1917
1918 curArgTabEntry->processed = true;
1919
1920 // place curArgTabEntry at the endTab position by performing a swap
1921 //
1922 if (curInx != endTab)
1923 {
1924 argTable[curInx] = argTable[endTab];
1925 argTable[endTab] = curArgTabEntry;
1926 }
1927
1928 endTab--;
1929 argsRemaining--;
1930 }
1931 }
1932 } while (curInx > begTab);
1933 }
1934
1935 // Finally, take care of all the remaining arguments.
1936 // Note that we fill in one arg at a time using a while loop.
1937 bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
1938 while (argsRemaining > 0)
1939 {
1940 /* Find the most expensive arg remaining and evaluate it next */
1941
1942 fgArgTabEntry* expensiveArgTabEntry = nullptr;
1943 unsigned expensiveArg = UINT_MAX;
1944 unsigned expensiveArgCost = 0;
1945
1946 // [We use a forward iterator pattern]
1947 //
1948 for (curInx = begTab; curInx <= endTab; curInx++)
1949 {
1950 fgArgTabEntry* curArgTabEntry = argTable[curInx];
1951
1952 // Skip any already processed args
1953 //
1954 if (!curArgTabEntry->processed)
1955 {
1956 GenTree* argx = curArgTabEntry->node;
1957
1958 // We should have already handled these kinds of args
1959 assert(argx->gtOper != GT_LCL_VAR);
1960 assert(argx->gtOper != GT_LCL_FLD);
1961 assert(argx->gtOper != GT_CNS_INT);
1962
1963 // This arg should either have no persistent side effects or be the last one in our table
1964 // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
1965
1966 if (argsRemaining == 1)
1967 {
1968 // This is the last arg to place
1969 expensiveArg = curInx;
1970 expensiveArgTabEntry = curArgTabEntry;
1971 assert(begTab == endTab);
1972 break;
1973 }
1974 else
1975 {
1976 if (!costsPrepared)
1977 {
1978 /* We call gtPrepareCost to measure the cost of evaluating this tree */
1979 compiler->gtPrepareCost(argx);
1980 }
1981
1982 if (argx->gtCostEx > expensiveArgCost)
1983 {
1984 // Remember this arg as the most expensive one that we have yet seen
1985 expensiveArgCost = argx->gtCostEx;
1986 expensiveArg = curInx;
1987 expensiveArgTabEntry = curArgTabEntry;
1988 }
1989 }
1990 }
1991 }
1992
1993 noway_assert(expensiveArg != UINT_MAX);
1994
1995 // put the most expensive arg towards the beginning of the table
1996
1997 expensiveArgTabEntry->processed = true;
1998
1999 // place expensiveArgTabEntry at the begTab position by performing a swap
2000 //
2001 if (expensiveArg != begTab)
2002 {
2003 argTable[expensiveArg] = argTable[begTab];
2004 argTable[begTab] = expensiveArgTabEntry;
2005 }
2006
2007 begTab++;
2008 argsRemaining--;
2009
2010 costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
2011 }
2012
2013 // The table should now be completely filled and thus begTab should now be adjacent to endTab
2014 // and regArgsRemaining should be zero
2015 assert(begTab == (endTab + 1));
2016 assert(argsRemaining == 0);
2017
2018#if !FEATURE_FIXED_OUT_ARGS
2019 // Finally build the regArgList
2020 //
2021 callTree->gtCall.regArgList = NULL;
2022 callTree->gtCall.regArgListCount = regCount;
2023
2024 unsigned regInx = 0;
2025 for (curInx = 0; curInx < argCount; curInx++)
2026 {
2027 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2028
2029 if (curArgTabEntry->regNum != REG_STK)
2030 {
2031 // Encode the argument register in the register mask
2032 //
2033 callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
2034 regInx++;
2035 }
2036 }
2037#endif // !FEATURE_FIXED_OUT_ARGS
2038
2039 argsSorted = true;
2040}
2041
2042#ifdef DEBUG
2043void fgArgInfo::Dump(Compiler* compiler)
2044{
2045 for (unsigned curInx = 0; curInx < ArgCount(); curInx++)
2046 {
2047 fgArgTabEntry* curArgEntry = ArgTable()[curInx];
2048 curArgEntry->Dump();
2049 }
2050}
2051#endif
2052
2053//------------------------------------------------------------------------------
2054// fgMakeTmpArgNode : This function creates a tmp var only if needed.
2055// We need this to be done in order to enforce ordering
2056// of the evaluation of arguments.
2057//
2058// Arguments:
2059// curArgTabEntry
2060//
2061// Return Value:
2062// the newly created temp var tree.
2063
2064GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
2065{
2066 unsigned tmpVarNum = curArgTabEntry->tmpNum;
2067 LclVarDsc* varDsc = &lvaTable[tmpVarNum];
2068 assert(varDsc->lvIsTemp);
2069 var_types type = varDsc->TypeGet();
2070
2071 // Create a copy of the temp to go into the late argument list
2072 GenTree* arg = gtNewLclvNode(tmpVarNum, type);
2073 GenTree* addrNode = nullptr;
2074
2075 if (varTypeIsStruct(type))
2076 {
2077
2078#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)
2079
2080 // Can this type be passed as a primitive type?
2081 // If so, the following call will return the corresponding primitive type.
2082 // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type.
2083
2084 bool passedAsPrimitive = false;
2085 if (curArgTabEntry->isSingleRegOrSlot())
2086 {
2087 CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2088 var_types structBaseType =
2089 getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd, curArgTabEntry->isVararg);
2090
2091 if (structBaseType != TYP_UNKNOWN)
2092 {
2093 passedAsPrimitive = true;
2094#if defined(UNIX_AMD64_ABI)
2095 // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry,
2096 // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take
2097 // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again.
2098 //
2099 if (genIsValidFloatReg(curArgTabEntry->regNum))
2100 {
2101 if (structBaseType == TYP_INT)
2102 {
2103 structBaseType = TYP_FLOAT;
2104 }
2105 else
2106 {
2107 assert(structBaseType == TYP_LONG);
2108 structBaseType = TYP_DOUBLE;
2109 }
2110 }
2111#endif
2112 type = structBaseType;
2113 }
2114 }
2115
2116 // If it is passed in registers, don't get the address of the var. Make it a
2117 // field instead. It will be loaded in registers with putarg_reg tree in lower.
2118 if (passedAsPrimitive)
2119 {
2120 arg->ChangeOper(GT_LCL_FLD);
2121 arg->gtType = type;
2122 }
2123 else
2124 {
2125 var_types addrType = TYP_BYREF;
2126 arg = gtNewOperNode(GT_ADDR, addrType, arg);
2127 addrNode = arg;
2128
2129#if FEATURE_MULTIREG_ARGS
2130#ifdef _TARGET_ARM64_
2131 assert(varTypeIsStruct(type));
2132 if (lvaIsMultiregStruct(varDsc, curArgTabEntry->isVararg))
2133 {
2134 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
2135 // as that is how UNIX_AMD64_ABI works.
2136 // We will create a GT_OBJ for the argument below.
2137 // This will be passed by value in two registers.
2138 assert(addrNode != nullptr);
2139
2140 // Create an Obj of the temp to use it as a call argument.
2141 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2142
2143 // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
2144 // this is only to preserve former behavior (though some CSE'ing of struct
2145 // values can be pessimizing, so enabling this may require some additional tuning).
2146 arg->gtFlags |= GTF_DONT_CSE;
2147 }
2148#else
2149 // Always create an Obj of the temp to use it as a call argument.
2150 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
2151 arg->gtFlags |= GTF_DONT_CSE;
2152#endif // !_TARGET_ARM64_
2153#endif // FEATURE_MULTIREG_ARGS
2154 }
2155
2156#else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_)
2157
2158 // other targets, we pass the struct by value
2159 assert(varTypeIsStruct(type));
2160
2161 addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
2162
2163 // Get a new Obj node temp to use it as a call argument.
2164 // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
2165 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
2166
2167#endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_)
2168
2169 } // (varTypeIsStruct(type))
2170
2171 if (addrNode != nullptr)
2172 {
2173 assert(addrNode->gtOper == GT_ADDR);
2174
2175 // This will prevent this LclVar from being optimized away
2176 lvaSetVarAddrExposed(tmpVarNum);
2177
2178 // the child of a GT_ADDR is required to have this flag set
2179 addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
2180 }
2181
2182 return arg;
2183}
2184
2185//------------------------------------------------------------------------------
2186// EvalArgsToTemps : Create temp assignments and populate the LateArgs list.
2187
2188void fgArgInfo::EvalArgsToTemps()
2189{
2190 assert(argsSorted == true);
2191
2192 unsigned regArgInx = 0;
2193 // Now go through the argument table and perform the necessary evaluation into temps
2194 GenTreeArgList* tmpRegArgNext = nullptr;
2195 for (unsigned curInx = 0; curInx < argCount; curInx++)
2196 {
2197 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2198
2199 GenTree* argx = curArgTabEntry->node;
2200 GenTree* setupArg = nullptr;
2201 GenTree* defArg;
2202
2203#if !FEATURE_FIXED_OUT_ARGS
2204 // Only ever set for FEATURE_FIXED_OUT_ARGS
2205 assert(curArgTabEntry->needPlace == false);
2206
2207 // On x86 and other archs that use push instructions to pass arguments:
2208 // Only the register arguments need to be replaced with placeholder nodes.
2209 // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
2210 //
2211 if (curArgTabEntry->regNum == REG_STK)
2212 continue;
2213#endif
2214
2215 if (curArgTabEntry->needTmp)
2216 {
2217 if (curArgTabEntry->isTmp == true)
2218 {
2219 // Create a copy of the temp to go into the late argument list
2220 defArg = compiler->fgMakeTmpArgNode(curArgTabEntry);
2221
2222 // mark the original node as a late argument
2223 argx->gtFlags |= GTF_LATE_ARG;
2224 }
2225 else
2226 {
2227 // Create a temp assignment for the argument
2228 // Put the temp in the gtCallLateArgs list
2229 CLANG_FORMAT_COMMENT_ANCHOR;
2230
2231#ifdef DEBUG
2232 if (compiler->verbose)
2233 {
2234 printf("Argument with 'side effect'...\n");
2235 compiler->gtDispTree(argx);
2236 }
2237#endif
2238
2239#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
2240 noway_assert(argx->gtType != TYP_STRUCT);
2241#endif
2242
2243 unsigned tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
2244 if (argx->gtOper == GT_MKREFANY)
2245 {
2246 // For GT_MKREFANY, typically the actual struct copying does
2247 // not have any side-effects and can be delayed. So instead
2248 // of using a temp for the whole struct, we can just use a temp
2249 // for operand that that has a side-effect
2250 GenTree* operand;
2251 if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
2252 {
2253 operand = argx->gtOp.gtOp1;
2254
2255 // In the early argument evaluation, place an assignment to the temp
2256 // from the source operand of the mkrefany
2257 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2258
2259 // Replace the operand for the mkrefany with the new temp.
2260 argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2261 }
2262 else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
2263 {
2264 operand = argx->gtOp.gtOp2;
2265
2266 // In the early argument evaluation, place an assignment to the temp
2267 // from the source operand of the mkrefany
2268 setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
2269
2270 // Replace the operand for the mkrefany with the new temp.
2271 argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
2272 }
2273 }
2274
2275 if (setupArg != nullptr)
2276 {
2277 // Now keep the mkrefany for the late argument list
2278 defArg = argx;
2279
2280 // Clear the side-effect flags because now both op1 and op2 have no side-effects
2281 defArg->gtFlags &= ~GTF_ALL_EFFECT;
2282 }
2283 else
2284 {
2285 setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
2286
2287 LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
2288 var_types lclVarType = genActualType(argx->gtType);
2289 var_types scalarType = TYP_UNKNOWN;
2290
2291 if (setupArg->OperIsCopyBlkOp())
2292 {
2293 setupArg = compiler->fgMorphCopyBlock(setupArg);
2294#if defined(_TARGET_ARMARCH_)
2295 // This scalar LclVar widening step is only performed for ARM architectures.
2296 //
2297 CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
2298 unsigned structSize = varDsc->lvExactSize;
2299
2300 scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg);
2301#endif // _TARGET_ARMARCH_
2302 }
2303
2304 // scalarType can be set to a wider type for ARM architectures: (3 => 4) or (5,6,7 => 8)
2305 if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
2306 {
2307 // Create a GT_LCL_FLD using the wider type to go to the late argument list
2308 defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
2309 }
2310 else
2311 {
2312 // Create a copy of the temp to go to the late argument list
2313 defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
2314 }
2315
2316 curArgTabEntry->isTmp = true;
2317 curArgTabEntry->tmpNum = tmpVarNum;
2318
2319#ifdef _TARGET_ARM_
2320 // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
2321 // might have left holes in the used registers (see
2322 // fgAddSkippedRegsInPromotedStructArg).
2323 // Too bad we're not that smart for these intermediate temps...
2324 if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
2325 {
2326 regNumber argReg = curArgTabEntry->regNum;
2327 regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
2328 for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
2329 {
2330 argReg = genRegArgNext(argReg);
2331 allUsedRegs |= genRegMask(argReg);
2332 }
2333 }
2334#endif // _TARGET_ARM_
2335 }
2336
2337 /* mark the assignment as a late argument */
2338 setupArg->gtFlags |= GTF_LATE_ARG;
2339
2340#ifdef DEBUG
2341 if (compiler->verbose)
2342 {
2343 printf("\n Evaluate to a temp:\n");
2344 compiler->gtDispTree(setupArg);
2345 }
2346#endif
2347 }
2348 }
2349 else // curArgTabEntry->needTmp == false
2350 {
2351 // On x86 -
2352 // Only register args are replaced with placeholder nodes
2353 // and the stack based arguments are evaluated and pushed in order.
2354 //
2355 // On Arm/x64 - When needTmp is false and needPlace is false,
2356 // the non-register arguments are evaluated and stored in order.
2357 // When needPlace is true we have a nested call that comes after
2358 // this argument so we have to replace it in the gtCallArgs list
2359 // (the initial argument evaluation list) with a placeholder.
2360 //
2361 if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
2362 {
2363 continue;
2364 }
2365
2366 /* No temp needed - move the whole node to the gtCallLateArgs list */
2367
2368 /* The argument is deferred and put in the late argument list */
2369
2370 defArg = argx;
2371
2372 // Create a placeholder node to put in its place in gtCallLateArgs.
2373
2374 // For a struct type we also need to record the class handle of the arg.
2375 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
2376
2377#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
2378
2379 // All structs are either passed (and retyped) as integral types, OR they
2380 // are passed by reference.
2381 noway_assert(argx->gtType != TYP_STRUCT);
2382
2383#else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
2384
2385 if (varTypeIsStruct(defArg))
2386 {
2387 clsHnd = compiler->gtGetStructHandleIfPresent(defArg);
2388 noway_assert(clsHnd != NO_CLASS_HANDLE);
2389 }
2390
2391#endif // !(defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI))
2392
2393 setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
2394
2395 /* mark the placeholder node as a late argument */
2396 setupArg->gtFlags |= GTF_LATE_ARG;
2397
2398#ifdef DEBUG
2399 if (compiler->verbose)
2400 {
2401 if (curArgTabEntry->regNum == REG_STK)
2402 {
2403 printf("Deferred stack argument :\n");
2404 }
2405 else
2406 {
2407 printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
2408 }
2409
2410 compiler->gtDispTree(argx);
2411 printf("Replaced with placeholder node:\n");
2412 compiler->gtDispTree(setupArg);
2413 }
2414#endif
2415 }
2416
2417 if (setupArg != nullptr)
2418 {
2419 if (curArgTabEntry->parent)
2420 {
2421 GenTree* parent = curArgTabEntry->parent;
2422 /* a normal argument from the list */
2423 noway_assert(parent->OperIsList());
2424 noway_assert(parent->gtOp.gtOp1 == argx);
2425
2426 parent->gtFlags |= (setupArg->gtFlags & GTF_ALL_EFFECT);
2427
2428 parent->gtOp.gtOp1 = setupArg;
2429 }
2430 else
2431 {
2432 /* must be the gtCallObjp */
2433 noway_assert(callTree->gtCall.gtCallObjp == argx);
2434
2435 callTree->gtCall.gtCallObjp = setupArg;
2436 }
2437 }
2438
2439 /* deferred arg goes into the late argument list */
2440
2441 if (tmpRegArgNext == nullptr)
2442 {
2443 tmpRegArgNext = compiler->gtNewArgList(defArg);
2444 callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
2445 }
2446 else
2447 {
2448 noway_assert(tmpRegArgNext->OperIsList());
2449 noway_assert(tmpRegArgNext->Current());
2450 tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
2451
2452 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2453 tmpRegArgNext = tmpRegArgNext->Rest();
2454 }
2455
2456 tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT);
2457
2458 curArgTabEntry->node = defArg;
2459 curArgTabEntry->lateArgInx = regArgInx++;
2460 }
2461
2462#ifdef DEBUG
2463 if (compiler->verbose)
2464 {
2465 printf("\nShuffled argument table: ");
2466 for (unsigned curInx = 0; curInx < argCount; curInx++)
2467 {
2468 fgArgTabEntry* curArgTabEntry = argTable[curInx];
2469
2470 if (curArgTabEntry->regNum != REG_STK)
2471 {
2472 printf("%s ", getRegName(curArgTabEntry->regNum));
2473 }
2474 }
2475 printf("\n");
2476 }
2477#endif
2478}
2479
2480// Return a conservative estimate of the stack size in bytes.
2481// It will be used only on the intercepted-for-host code path to copy the arguments.
2482int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
2483{
2484
2485 int numArgs = 0;
2486 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
2487 {
2488 numArgs++;
2489 }
2490
2491 int numStkArgs;
2492 if (numArgs > MAX_REG_ARG)
2493 {
2494 numStkArgs = numArgs - MAX_REG_ARG;
2495 }
2496 else
2497 {
2498 numStkArgs = 0;
2499 }
2500
2501 return numStkArgs * REGSIZE_BYTES;
2502}
2503
2504//------------------------------------------------------------------------------
2505// fgMakeMultiUse : If the node is a local, clone it and increase the ref count
2506// otherwise insert a comma form temp
2507//
2508// Arguments:
2509// ppTree - a pointer to the child node we will be replacing with the comma expression that
2510// evaluates ppTree to a temp and returns the result
2511//
2512// Return Value:
2513// A fresh GT_LCL_VAR node referencing the temp which has not been used
2514//
2515// Assumption:
2516// The result tree MUST be added to the tree structure since the ref counts are
2517// already incremented.
2518
2519GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
2520{
2521 GenTree* tree = *pOp;
2522 if (tree->IsLocal())
2523 {
2524 return gtClone(tree);
2525 }
2526 else
2527 {
2528 return fgInsertCommaFormTemp(pOp);
2529 }
2530}
2531
2532//------------------------------------------------------------------------------
2533// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
2534// and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
2535//
2536// Arguments:
2537// ppTree - a pointer to the child node we will be replacing with the comma expression that
2538// evaluates ppTree to a temp and returns the result
2539//
2540// structType - value type handle if the temp created is of TYP_STRUCT.
2541//
2542// Return Value:
2543// A fresh GT_LCL_VAR node referencing the temp which has not been used
2544//
2545
2546GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
2547{
2548 GenTree* subTree = *ppTree;
2549
2550 unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
2551
2552 if (varTypeIsStruct(subTree))
2553 {
2554 assert(structType != nullptr);
2555 lvaSetStruct(lclNum, structType, false);
2556 }
2557
2558 // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
2559 // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
2560 // setting type of lcl vars created.
2561 GenTree* asg = gtNewTempAssign(lclNum, subTree);
2562
2563 GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2564
2565 GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
2566
2567 *ppTree = comma;
2568
2569 return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
2570}
2571
2572//------------------------------------------------------------------------
2573// fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg
2574//
2575// Arguments:
2576// callNode - the call for which we are generating the fgArgInfo
2577//
2578// Return Value:
2579// None
2580//
2581// Notes:
2582// This method is idempotent in that it checks whether the fgArgInfo has already been
2583// constructed, and just returns.
2584// This method only computes the arg table and arg entries for the call (the fgArgInfo),
2585// and makes no modification of the args themselves.
2586//
2587void Compiler::fgInitArgInfo(GenTreeCall* call)
2588{
2589 GenTree* args;
2590 GenTree* argx;
2591
2592 unsigned argIndex = 0;
2593 unsigned intArgRegNum = 0;
2594 unsigned fltArgRegNum = 0;
2595 unsigned argSlots = 0;
2596
2597 bool callHasRetBuffArg = call->HasRetBufArg();
2598 bool callIsVararg = call->IsVarargs();
2599
2600#ifdef _TARGET_ARM_
2601 regMaskTP argSkippedRegMask = RBM_NONE;
2602 regMaskTP fltArgSkippedRegMask = RBM_NONE;
2603#endif // _TARGET_ARM_
2604
2605#if defined(_TARGET_X86_)
2606 unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
2607#else
2608 const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
2609#endif
2610
2611 if (call->fgArgInfo != nullptr)
2612 {
2613 // We've already initialized and set the fgArgInfo.
2614 return;
2615 }
2616 JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper));
2617
2618 // At this point, we should never have gtCallLateArgs, as this needs to be done before those are determined.
2619 assert(call->gtCallLateArgs == nullptr);
2620
2621#ifdef _TARGET_UNIX_
2622 if (callIsVararg)
2623 {
2624 // Currently native varargs is not implemented on non windows targets.
2625 //
2626 // Note that some targets like Arm64 Unix should not need much work as
2627 // the ABI is the same. While other targets may only need small changes
2628 // such as amd64 Unix, which just expects RAX to pass numFPArguments.
2629 NYI("Morphing Vararg call not yet implemented on non Windows targets.");
2630 }
2631#endif // _TARGET_UNIX_
2632
2633 // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2634 // following the normal calling convention or in the normal argument registers. We either mark existing
2635 // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
2636 // non-standard arguments into the argument list, below.
2637 class NonStandardArgs
2638 {
2639 struct NonStandardArg
2640 {
2641 regNumber reg; // The register to be assigned to this non-standard argument.
2642 GenTree* node; // The tree node representing this non-standard argument.
2643 // Note that this must be updated if the tree node changes due to morphing!
2644 };
2645
2646 ArrayStack<NonStandardArg> args;
2647
2648 public:
2649 NonStandardArgs(CompAllocator alloc) : args(alloc, 3) // We will have at most 3 non-standard arguments
2650 {
2651 }
2652
2653 //-----------------------------------------------------------------------------
2654 // Add: add a non-standard argument to the table of non-standard arguments
2655 //
2656 // Arguments:
2657 // node - a GenTree node that has a non-standard argument.
2658 // reg - the register to assign to this node.
2659 //
2660 // Return Value:
2661 // None.
2662 //
2663 void Add(GenTree* node, regNumber reg)
2664 {
2665 NonStandardArg nsa = {reg, node};
2666 args.Push(nsa);
2667 }
2668
2669 //-----------------------------------------------------------------------------
2670 // Find: Look for a GenTree* in the set of non-standard args.
2671 //
2672 // Arguments:
2673 // node - a GenTree node to look for
2674 //
2675 // Return Value:
2676 // The index of the non-standard argument (a non-negative, unique, stable number).
2677 // If the node is not a non-standard argument, return -1.
2678 //
2679 int Find(GenTree* node)
2680 {
2681 for (int i = 0; i < args.Height(); i++)
2682 {
2683 if (node == args.Index(i).node)
2684 {
2685 return i;
2686 }
2687 }
2688 return -1;
2689 }
2690
2691 //-----------------------------------------------------------------------------
2692 // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
2693 // set the register to use for the node.
2694 //
2695 // Arguments:
2696 // node - a GenTree node to look for
2697 // pReg - an OUT argument. *pReg is set to the non-standard register to use if
2698 // 'node' is found in the non-standard argument set.
2699 //
2700 // Return Value:
2701 // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
2702 // register to use.
2703 // 'false' otherwise (in this case, *pReg is unmodified).
2704 //
2705 bool FindReg(GenTree* node, regNumber* pReg)
2706 {
2707 for (int i = 0; i < args.Height(); i++)
2708 {
2709 NonStandardArg& nsa = args.IndexRef(i);
2710 if (node == nsa.node)
2711 {
2712 *pReg = nsa.reg;
2713 return true;
2714 }
2715 }
2716 return false;
2717 }
2718
2719 //-----------------------------------------------------------------------------
2720 // Replace: Replace the non-standard argument node at a given index. This is done when
2721 // the original node was replaced via morphing, but we need to continue to assign a
2722 // particular non-standard arg to it.
2723 //
2724 // Arguments:
2725 // index - the index of the non-standard arg. It must exist.
2726 // node - the new GenTree node.
2727 //
2728 // Return Value:
2729 // None.
2730 //
2731 void Replace(int index, GenTree* node)
2732 {
2733 args.IndexRef(index).node = node;
2734 }
2735
2736 } nonStandardArgs(getAllocator(CMK_ArrayStack));
2737
2738 // Count of args. On first morph, this is counted before we've filled in the arg table.
2739 // On remorph, we grab it from the arg table.
2740 unsigned numArgs = 0;
2741
2742 // First we need to count the args
2743 if (call->gtCallObjp)
2744 {
2745 numArgs++;
2746 }
2747 for (GenTree* args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
2748 {
2749 numArgs++;
2750 }
2751
2752 // Insert or mark non-standard args. These are either outside the normal calling convention, or
2753 // arguments registers that don't follow the normal progression of argument registers in the calling
2754 // convention (such as for the ARM64 fixed return buffer argument x8).
2755 //
2756 // *********** NOTE *************
2757 // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
2758 // in the implementation of fast tail call.
2759 // *********** END NOTE *********
2760 CLANG_FORMAT_COMMENT_ANCHOR;
2761
2762#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2763 // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
2764 // Set the argument registers correctly here.
2765 if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
2766 {
2767 GenTreeArgList* args = call->gtCallArgs;
2768 GenTree* arg1 = args->Current();
2769 assert(arg1 != nullptr);
2770 nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
2771 }
2772#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
2773#if defined(_TARGET_ARM_)
2774 // A non-standard calling convention using secure delegate invoke is used on ARM, only, but not for secure
2775 // delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing
2776 // R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs
2777 // to be preserved. Thus, the VM uses a secure delegate IL stub, which preserves R4 and also sets up R4
2778 // correctly for the VSD call. The VM is simply reusing an existing mechanism (secure delegate IL stub)
2779 // to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details.
2780 else if (call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV)
2781 {
2782 GenTree* arg = call->gtCallObjp;
2783 if (arg->OperIsLocal())
2784 {
2785 arg = gtClone(arg, true);
2786 }
2787 else
2788 {
2789 GenTree* tmp = fgInsertCommaFormTemp(&arg);
2790 call->gtCallObjp = arg;
2791 call->gtFlags |= GTF_ASG;
2792 arg = tmp;
2793 }
2794 noway_assert(arg != nullptr);
2795
2796 GenTree* newArg = new (this, GT_ADDR)
2797 GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfSecureDelegateIndirectCell);
2798
2799 // Append newArg as the last arg
2800 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2801 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2802 {
2803 }
2804 *insertionPoint = gtNewListNode(newArg, nullptr);
2805
2806 numArgs++;
2807 nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg());
2808 }
2809#endif // defined(_TARGET_ARM_)
2810#if defined(_TARGET_X86_)
2811 // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
2812 // hi part to be in EDX. This sets the argument registers up correctly.
2813 else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
2814 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
2815 {
2816 GenTreeArgList* args = call->gtCallArgs;
2817 GenTree* arg1 = args->Current();
2818 assert(arg1 != nullptr);
2819 nonStandardArgs.Add(arg1, REG_LNGARG_LO);
2820
2821 args = args->Rest();
2822 GenTree* arg2 = args->Current();
2823 assert(arg2 != nullptr);
2824 nonStandardArgs.Add(arg2, REG_LNGARG_HI);
2825 }
2826#else // !_TARGET_X86_
2827 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
2828 // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
2829 // convention for x86/SSE.
2830
2831 // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
2832 //
2833 if (hasFixedRetBuffReg() && call->HasRetBufArg())
2834 {
2835 args = call->gtCallArgs;
2836 assert(args != nullptr);
2837 assert(args->OperIsList());
2838
2839 argx = call->gtCallArgs->Current();
2840
2841 // We don't increment numArgs here, since we already counted this argument above.
2842
2843 nonStandardArgs.Add(argx, theFixedRetBuffReg());
2844 }
2845
2846 // We are allowed to have a Fixed Return Buffer argument combined
2847 // with any of the remaining non-standard arguments
2848 //
2849 if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
2850 {
2851 assert(!call->gtCallCookie);
2852 // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
2853 // It will be used only on the intercepted-for-host code path to copy the arguments.
2854
2855 GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
2856 call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
2857 numArgs++;
2858
2859 nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
2860 }
2861 else if (call->IsVirtualStub())
2862 {
2863 if (!call->IsTailCallViaHelper())
2864 {
2865 GenTree* stubAddrArg = fgGetStubAddrArg(call);
2866 // And push the stub address onto the list of arguments
2867 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
2868
2869 numArgs++;
2870 nonStandardArgs.Add(stubAddrArg, stubAddrArg->gtRegNum);
2871 }
2872 else
2873 {
2874 // If it is a VSD call getting dispatched via tail call helper,
2875 // fgMorphTailCall() would materialize stub addr as an additional
2876 // parameter added to the original arg list and hence no need to
2877 // add as a non-standard arg.
2878 }
2879 }
2880 else
2881#endif // !_TARGET_X86_
2882 if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
2883 {
2884 assert(!call->IsUnmanaged());
2885
2886 GenTree* arg = call->gtCallCookie;
2887 noway_assert(arg != nullptr);
2888 call->gtCallCookie = nullptr;
2889
2890#if defined(_TARGET_X86_)
2891 // x86 passes the cookie on the stack as the final argument to the call.
2892 GenTreeArgList** insertionPoint = &call->gtCallArgs;
2893 for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
2894 {
2895 }
2896 *insertionPoint = gtNewListNode(arg, nullptr);
2897#else // !defined(_TARGET_X86_)
2898 // All other architectures pass the cookie in a register.
2899 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2900#endif // defined(_TARGET_X86_)
2901
2902 nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
2903 numArgs++;
2904
2905 // put destination into R10/EAX
2906 arg = gtClone(call->gtCallAddr, true);
2907 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
2908 numArgs++;
2909
2910 nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
2911
2912 // finally change this call to a helper call
2913 call->gtCallType = CT_HELPER;
2914 call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
2915 }
2916#if defined(FEATURE_READYTORUN_COMPILER) && defined(_TARGET_ARMARCH_)
2917 // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg()
2918 // for indirection cell address, which ZapIndirectHelperThunk expects.
2919 if (call->IsR2RRelativeIndir())
2920 {
2921 assert(call->gtEntryPoint.addr != nullptr);
2922
2923 size_t addrValue = (size_t)call->gtEntryPoint.addr;
2924 GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR);
2925 indirectCellAddress->gtRegNum = REG_R2R_INDIRECT_PARAM;
2926
2927 // Push the stub address onto the list of arguments.
2928 call->gtCallArgs = gtNewListNode(indirectCellAddress, call->gtCallArgs);
2929
2930 numArgs++;
2931 nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->gtRegNum);
2932 }
2933
2934#endif // FEATURE_READYTORUN_COMPILER && _TARGET_ARMARCH_
2935
2936 // Allocate the fgArgInfo for the call node;
2937 //
2938 call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
2939
2940 // Add the 'this' argument value, if present.
2941 argx = call->gtCallObjp;
2942 if (argx != nullptr)
2943 {
2944 assert(argIndex == 0);
2945 assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
2946 assert(varTypeIsGC(argx) || (argx->gtType == TYP_I_IMPL));
2947
2948 // This is a register argument - put it in the table.
2949 call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1, false,
2950 callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr));
2951
2952 intArgRegNum++;
2953#ifdef WINDOWS_AMD64_ABI
2954 // Whenever we pass an integer register argument
2955 // we skip the corresponding floating point register argument
2956 fltArgRegNum++;
2957#endif // WINDOWS_AMD64_ABI
2958 argIndex++;
2959 argSlots++;
2960 }
2961
2962#ifdef _TARGET_X86_
2963 // Compute the maximum number of arguments that can be passed in registers.
2964 // For X86 we handle the varargs and unmanaged calling conventions
2965
2966 if (call->gtFlags & GTF_CALL_POP_ARGS)
2967 {
2968 noway_assert(intArgRegNum < MAX_REG_ARG);
2969 // No more register arguments for varargs (CALL_POP_ARGS)
2970 maxRegArgs = intArgRegNum;
2971
2972 // Add in the ret buff arg
2973 if (callHasRetBuffArg)
2974 maxRegArgs++;
2975 }
2976
2977 if (call->IsUnmanaged())
2978 {
2979 noway_assert(intArgRegNum == 0);
2980
2981 if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
2982 {
2983 noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
2984 call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
2985 call->gtCallArgs->gtOp.gtOp1->gtOper ==
2986 GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
2987 maxRegArgs = 1;
2988 }
2989 else
2990 {
2991 maxRegArgs = 0;
2992 }
2993
2994 // Add in the ret buff arg
2995 if (callHasRetBuffArg)
2996 maxRegArgs++;
2997 }
2998#endif // _TARGET_X86_
2999
3000 /* Morph the user arguments */
3001 CLANG_FORMAT_COMMENT_ANCHOR;
3002
3003#if defined(_TARGET_ARM_)
3004
3005 // The ARM ABI has a concept of back-filling of floating-point argument registers, according
3006 // to the "Procedure Call Standard for the ARM Architecture" document, especially
3007 // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
3008 // appear in a lower-numbered register than floating point argument N. That is, argument
3009 // register allocation is not strictly increasing. To support this, we need to keep track of unused
3010 // floating-point argument registers that we can back-fill. We only support 4-byte float and
3011 // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
3012 // only back-fill single registers, since there is no way with these types to create
3013 // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
3014 // available (with 16 FP argument registers). Consider this code:
3015 //
3016 // struct HFA { float x, y, z; }; // a three element HFA
3017 // void bar(float a1, // passed in f0
3018 // double a2, // passed in f2/f3; skip f1 for alignment
3019 // HFA a3, // passed in f4/f5/f6
3020 // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
3021 // HFA a5, // passed in f10/f11/f12
3022 // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
3023 // // slots
3024 // float a7, // passed in f1 (back-filled)
3025 // float a8, // passed in f7 (back-filled)
3026 // float a9, // passed in f13 (back-filled)
3027 // float a10) // passed on the stack in [OutArg+0]
3028 //
3029 // Note that if we ever support FP types with larger alignment requirements, then there could
3030 // be more than single register back-fills.
3031 //
3032 // Once we assign a floating-pointer register to the stack, they all must be on the stack.
3033 // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
3034 // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
3035 // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
3036 // and prevent any additional floating-point arguments from going in registers.
3037
3038 bool anyFloatStackArgs = false;
3039
3040#endif // _TARGET_ARM_
3041
3042#ifdef UNIX_AMD64_ABI
3043 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
3044#endif // UNIX_AMD64_ABI
3045
3046 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3047 {
3048 assert(args->OperIsList());
3049 argx = args->Current();
3050 fgArgTabEntry* argEntry = nullptr;
3051
3052 // Change the node to TYP_I_IMPL so we don't report GC info
3053 // NOTE: We deferred this from the importer because of the inliner.
3054
3055 if (argx->IsVarAddr())
3056 {
3057 argx->gtType = TYP_I_IMPL;
3058 }
3059
3060 // We should never have any ArgPlaceHolder nodes at this point.
3061 assert(!argx->IsArgPlaceHolderNode());
3062
3063 // Setup any HFA information about 'argx'
3064 bool isHfaArg = false;
3065 var_types hfaType = TYP_UNDEF;
3066 unsigned hfaSlots = 0;
3067
3068 bool passUsingFloatRegs;
3069 unsigned argAlign = 1;
3070 unsigned size = 0;
3071 CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
3072 bool isRegArg = false;
3073 bool isNonStandard = false;
3074 regNumber nonStdRegNum = REG_NA;
3075
3076#ifdef FEATURE_HFA
3077 hfaType = GetHfaType(argx);
3078 isHfaArg = varTypeIsFloating(hfaType);
3079
3080#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3081 // Make sure for vararg methods isHfaArg is not true.
3082 isHfaArg = callIsVararg ? false : isHfaArg;
3083#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
3084
3085 if (isHfaArg)
3086 {
3087 isHfaArg = true;
3088 hfaSlots = GetHfaCount(argx);
3089
3090 // If we have a HFA struct it's possible we transition from a method that originally
3091 // only had integer types to now start having FP types. We have to communicate this
3092 // through this flag since LSRA later on will use this flag to determine whether
3093 // or not to track the FP register set.
3094 //
3095 compFloatingPointUsed = true;
3096 }
3097#endif // FEATURE_HFA
3098
3099#ifdef _TARGET_ARM_
3100 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
3101 bool passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
3102
3103 // We don't use the "size" return value from InferOpSizeAlign().
3104 codeGen->InferOpSizeAlign(argx, &argAlign);
3105
3106 argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
3107 argAlign /= TARGET_POINTER_SIZE;
3108
3109 if (argAlign == 2)
3110 {
3111 if (passUsingFloatRegs)
3112 {
3113 if (fltArgRegNum % 2 == 1)
3114 {
3115 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3116 fltArgRegNum++;
3117 }
3118 }
3119 else if (passUsingIntRegs)
3120 {
3121 if (intArgRegNum % 2 == 1)
3122 {
3123 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3124 intArgRegNum++;
3125 }
3126 }
3127
3128 if (argSlots % 2 == 1)
3129 {
3130 argSlots++;
3131 }
3132 }
3133
3134#elif defined(_TARGET_ARM64_)
3135
3136 assert(!callIsVararg || !isHfaArg);
3137 passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
3138
3139#elif defined(_TARGET_AMD64_)
3140
3141 passUsingFloatRegs = varTypeIsFloating(argx);
3142
3143#elif defined(_TARGET_X86_)
3144
3145 passUsingFloatRegs = false;
3146
3147#else
3148#error Unsupported or unset target architecture
3149#endif // _TARGET_*
3150
3151 bool isBackFilled = false;
3152 unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
3153 var_types structBaseType = TYP_STRUCT;
3154 unsigned structSize = 0;
3155 bool passStructByRef = false;
3156
3157 bool isStructArg;
3158 GenTree* actualArg = argx->gtEffectiveVal(true /* Commas only */);
3159
3160 //
3161 // Figure out the size of the argument. This is either in number of registers, or number of
3162 // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
3163 // the stack.
3164 //
3165 isStructArg = varTypeIsStruct(argx);
3166 CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
3167 if (isStructArg)
3168 {
3169 objClass = gtGetStructHandle(argx);
3170 if (argx->TypeGet() == TYP_STRUCT)
3171 {
3172 // For TYP_STRUCT arguments we must have an OBJ, LCL_VAR or MKREFANY
3173 switch (actualArg->OperGet())
3174 {
3175 case GT_OBJ:
3176 // Get the size off the OBJ node.
3177 structSize = actualArg->AsObj()->gtBlkSize;
3178 assert(structSize == info.compCompHnd->getClassSize(objClass));
3179 break;
3180 case GT_LCL_VAR:
3181 structSize = lvaGetDesc(actualArg->AsLclVarCommon())->lvExactSize;
3182 break;
3183 case GT_MKREFANY:
3184 structSize = info.compCompHnd->getClassSize(objClass);
3185 break;
3186 default:
3187 BADCODE("illegal argument tree in fgInitArgInfo");
3188 break;
3189 }
3190 }
3191 else
3192 {
3193 structSize = genTypeSize(argx);
3194 assert(structSize == info.compCompHnd->getClassSize(objClass));
3195 }
3196 }
3197#if defined(_TARGET_AMD64_)
3198#ifdef UNIX_AMD64_ABI
3199 if (!isStructArg)
3200 {
3201 size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
3202 }
3203 else
3204 {
3205 size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3206 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
3207 }
3208#else // !UNIX_AMD64_ABI
3209 size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot'
3210#endif // UNIX_AMD64_ABI
3211#elif defined(_TARGET_ARM64_)
3212 if (isStructArg)
3213 {
3214 if (isHfaArg)
3215 {
3216 // HFA structs are passed by value in multiple registers.
3217 // The "size" in registers may differ the size in pointer-sized units.
3218 size = GetHfaCount(argx);
3219 }
3220 else
3221 {
3222 // Structs are either passed in 1 or 2 (64-bit) slots.
3223 // Structs that are the size of 2 pointers are passed by value in multiple registers,
3224 // if sufficient registers are available.
3225 // Structs that are larger than 2 pointers (except for HFAs) are passed by
3226 // reference (to a copy)
3227 size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3228
3229 if (size > 2)
3230 {
3231 size = 1;
3232 }
3233 }
3234 // Note that there are some additional rules for multireg structs.
3235 // (i.e they cannot be split between registers and the stack)
3236 }
3237 else
3238 {
3239 size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
3240 }
3241#elif defined(_TARGET_ARM_) || defined(_TARGET_X86_)
3242 if (isStructArg)
3243 {
3244 size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
3245 }
3246 else
3247 {
3248 // The typical case.
3249 // Long/double type argument(s) will be modified as needed in Lowering.
3250 size = genTypeStSz(argx->gtType);
3251 }
3252#else
3253#error Unsupported or unset target architecture
3254#endif // _TARGET_XXX_
3255 if (isStructArg)
3256 {
3257 // We have an argument with a struct type, but it may be be a child of a GT_COMMA
3258 GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
3259
3260 assert(args->OperIsList());
3261 assert(argx == args->Current());
3262
3263 unsigned originalSize = structSize;
3264 originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
3265 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3266
3267 structSize = originalSize;
3268
3269 structPassingKind howToPassStruct;
3270
3271 structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, originalSize);
3272
3273 bool passedInRegisters = false;
3274 passStructByRef = (howToPassStruct == SPK_ByReference);
3275
3276 if (howToPassStruct == SPK_PrimitiveType)
3277 {
3278// For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register.
3279// For ARM or AMD64/Windows only power-of-2 structs are passed in registers.
3280#if !defined(_TARGET_ARM64_) && !defined(UNIX_AMD64_ABI)
3281 if (!isPow2(originalSize))
3282#endif // !_TARGET_ARM64_ && !UNIX_AMD64_ABI
3283 {
3284 passedInRegisters = true;
3285 }
3286#ifdef _TARGET_ARM_
3287 // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct,
3288 // or for a struct of two floats. This causes the struct to be address-taken.
3289 if (structBaseType == TYP_DOUBLE)
3290 {
3291 size = 2;
3292 }
3293 else
3294#endif // _TARGET_ARM_
3295 {
3296 size = 1;
3297 }
3298 }
3299 else if (passStructByRef)
3300 {
3301 size = 1;
3302 }
3303 }
3304
3305 // The 'size' value has now must have been set. (the original value of zero is an invalid value)
3306 assert(size != 0);
3307
3308 //
3309 // Figure out if the argument will be passed in a register.
3310 //
3311
3312 if (isRegParamType(genActualType(argx->TypeGet()))
3313#ifdef UNIX_AMD64_ABI
3314 && (!isStructArg || structDesc.passedInRegisters)
3315#endif
3316 )
3317 {
3318#ifdef _TARGET_ARM_
3319 if (passUsingFloatRegs)
3320 {
3321 // First, see if it can be back-filled
3322 if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
3323 (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
3324 (size == 1)) // The size to back-fill is one float register
3325 {
3326 // Back-fill the register.
3327 isBackFilled = true;
3328 regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
3329 fltArgSkippedRegMask &=
3330 ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
3331 nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
3332 assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
3333 }
3334
3335 // Does the entire float, double, or HFA fit in the FP arg registers?
3336 // Check if the last register needed is still in the argument register range.
3337 isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
3338
3339 if (!isRegArg)
3340 {
3341 anyFloatStackArgs = true;
3342 }
3343 }
3344 else
3345 {
3346 isRegArg = intArgRegNum < MAX_REG_ARG;
3347 }
3348#elif defined(_TARGET_ARM64_)
3349 if (passUsingFloatRegs)
3350 {
3351 // Check if the last register needed is still in the fp argument register range.
3352 isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
3353
3354 // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
3355 if (isHfaArg && !isRegArg)
3356 {
3357 // recompute the 'size' so that it represent the number of stack slots rather than the number of
3358 // registers
3359 //
3360 unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
3361 size = roundupSize / TARGET_POINTER_SIZE;
3362
3363 // We also must update fltArgRegNum so that we no longer try to
3364 // allocate any new floating point registers for args
3365 // This prevents us from backfilling a subsequent arg into d7
3366 //
3367 fltArgRegNum = MAX_FLOAT_REG_ARG;
3368 }
3369 }
3370 else
3371 {
3372 // Check if the last register needed is still in the int argument register range.
3373 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3374
3375 // Did we run out of registers when we had a 16-byte struct (size===2) ?
3376 // (i.e we only have one register remaining but we needed two registers to pass this arg)
3377 // This prevents us from backfilling a subsequent arg into x7
3378 //
3379 if (!isRegArg && (size > 1))
3380 {
3381#if defined(_TARGET_WINDOWS_)
3382 // Arm64 windows native varargs allows splitting a 16 byte struct between stack
3383 // and the last general purpose register.
3384 if (callIsVararg)
3385 {
3386 // Override the decision and force a split.
3387 isRegArg = isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs;
3388 }
3389 else
3390#endif // defined(_TARGET_WINDOWS_)
3391 {
3392 // We also must update intArgRegNum so that we no longer try to
3393 // allocate any new general purpose registers for args
3394 //
3395 intArgRegNum = maxRegArgs;
3396 }
3397 }
3398 }
3399#else // not _TARGET_ARM_ or _TARGET_ARM64_
3400
3401#if defined(UNIX_AMD64_ABI)
3402
3403 // Here a struct can be passed in register following the classifications of its members and size.
3404 // Now make sure there are actually enough registers to do so.
3405 if (isStructArg)
3406 {
3407 unsigned int structFloatRegs = 0;
3408 unsigned int structIntRegs = 0;
3409 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3410 {
3411 if (structDesc.IsIntegralSlot(i))
3412 {
3413 structIntRegs++;
3414 }
3415 else if (structDesc.IsSseSlot(i))
3416 {
3417 structFloatRegs++;
3418 }
3419 }
3420
3421 isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
3422 ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
3423 }
3424 else
3425 {
3426 if (passUsingFloatRegs)
3427 {
3428 isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
3429 }
3430 else
3431 {
3432 isRegArg = intArgRegNum < MAX_REG_ARG;
3433 }
3434 }
3435#else // !defined(UNIX_AMD64_ABI)
3436 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
3437#endif // !defined(UNIX_AMD64_ABI)
3438#endif // _TARGET_ARM_
3439 }
3440 else
3441 {
3442 isRegArg = false;
3443 }
3444
3445 // If there are nonstandard args (outside the calling convention) they were inserted above
3446 // and noted them in a table so we can recognize them here and build their argInfo.
3447 //
3448 // They should not affect the placement of any other args or stack space required.
3449 // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
3450 isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
3451 if (isNonStandard)
3452 {
3453 isRegArg = (nonStdRegNum != REG_STK);
3454 }
3455#if defined(_TARGET_X86_)
3456 else if (call->IsTailCallViaHelper())
3457 {
3458 // We have already (before calling fgMorphArgs()) appended the 4 special args
3459 // required by the x86 tailcall helper. These args are required to go on the
3460 // stack. Force them to the stack here.
3461 assert(numArgs >= 4);
3462 if (argIndex >= numArgs - 4)
3463 {
3464 isRegArg = false;
3465 }
3466 }
3467#endif // defined(_TARGET_X86_)
3468
3469 // Now we know if the argument goes in registers or not and how big it is.
3470 CLANG_FORMAT_COMMENT_ANCHOR;
3471
3472#ifdef _TARGET_ARM_
3473 // If we ever allocate a floating point argument to the stack, then all
3474 // subsequent HFA/float/double arguments go on the stack.
3475 if (!isRegArg && passUsingFloatRegs)
3476 {
3477 for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
3478 {
3479 fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
3480 }
3481 }
3482
3483 // If we think we're going to split a struct between integer registers and the stack, check to
3484 // see if we've already assigned a floating-point arg to the stack.
3485 if (isRegArg && // We decided above to use a register for the argument
3486 !passUsingFloatRegs && // We're using integer registers
3487 (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
3488 anyFloatStackArgs) // We've already used the stack for a floating-point argument
3489 {
3490 isRegArg = false; // Change our mind; don't pass this struct partially in registers
3491
3492 // Skip the rest of the integer argument registers
3493 for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
3494 {
3495 argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
3496 }
3497 }
3498#endif // _TARGET_ARM_
3499
3500 // Now create the fgArgTabEntry.
3501 fgArgTabEntry* newArgEntry;
3502 if (isRegArg)
3503 {
3504 regNumber nextRegNum = REG_STK;
3505
3506#if defined(UNIX_AMD64_ABI)
3507 regNumber nextOtherRegNum = REG_STK;
3508 unsigned int structFloatRegs = 0;
3509 unsigned int structIntRegs = 0;
3510#endif // defined(UNIX_AMD64_ABI)
3511
3512 if (isNonStandard)
3513 {
3514 nextRegNum = nonStdRegNum;
3515 }
3516#if defined(UNIX_AMD64_ABI)
3517 else if (isStructArg && structDesc.passedInRegisters)
3518 {
3519 // It is a struct passed in registers. Assign the next available register.
3520 assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
3521 regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
3522 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
3523 {
3524 if (structDesc.IsIntegralSlot(i))
3525 {
3526 *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
3527 structIntRegs++;
3528 }
3529 else if (structDesc.IsSseSlot(i))
3530 {
3531 *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
3532 structFloatRegs++;
3533 }
3534 }
3535 }
3536#endif // defined(UNIX_AMD64_ABI)
3537 else
3538 {
3539 // fill in or update the argInfo table
3540 nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
3541 : genMapIntRegArgNumToRegNum(intArgRegNum);
3542 }
3543
3544#ifdef _TARGET_AMD64_
3545#ifndef UNIX_AMD64_ABI
3546 assert(size == 1);
3547#endif
3548#endif
3549
3550 // This is a register argument - put it in the table
3551 newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign, isStructArg,
3552 callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
3553 UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
3554
3555 newArgEntry->SetIsBackFilled(isBackFilled);
3556 newArgEntry->isNonStandard = isNonStandard;
3557
3558 // Set up the next intArgRegNum and fltArgRegNum values.
3559 if (!isBackFilled)
3560 {
3561#if defined(UNIX_AMD64_ABI)
3562 if (isStructArg)
3563 {
3564 // For this case, we've already set the regNums in the argTabEntry
3565 intArgRegNum += structIntRegs;
3566 fltArgRegNum += structFloatRegs;
3567 }
3568 else
3569#endif // defined(UNIX_AMD64_ABI)
3570 {
3571 if (!isNonStandard)
3572 {
3573#if FEATURE_ARG_SPLIT
3574 // Check for a split (partially enregistered) struct
3575 if (!passUsingFloatRegs && ((intArgRegNum + size) > MAX_REG_ARG))
3576 {
3577 // This indicates a partial enregistration of a struct type
3578 assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
3579 (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
3580 unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum;
3581 assert((unsigned char)numRegsPartial == numRegsPartial);
3582 call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
3583 }
3584#endif // FEATURE_ARG_SPLIT
3585
3586 if (passUsingFloatRegs)
3587 {
3588 fltArgRegNum += size;
3589
3590#ifdef WINDOWS_AMD64_ABI
3591 // Whenever we pass an integer register argument
3592 // we skip the corresponding floating point register argument
3593 intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
3594#endif // WINDOWS_AMD64_ABI
3595 // No supported architecture supports partial structs using float registers.
3596 assert(fltArgRegNum <= MAX_FLOAT_REG_ARG);
3597 }
3598 else
3599 {
3600 // Increment intArgRegNum by 'size' registers
3601 intArgRegNum += size;
3602
3603#ifdef WINDOWS_AMD64_ABI
3604 fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
3605#endif // WINDOWS_AMD64_ABI
3606 }
3607 }
3608 }
3609 }
3610 }
3611 else // We have an argument that is not passed in a register
3612 {
3613 // This is a stack argument - put it in the table
3614 newArgEntry = call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg, callIsVararg);
3615#ifdef UNIX_AMD64_ABI
3616 // TODO-Amd64-Unix-CQ: This is temporary (see also in fgMorphArgs).
3617 if (structDesc.passedInRegisters)
3618 {
3619 newArgEntry->structDesc.CopyFrom(structDesc);
3620 }
3621#endif
3622 }
3623
3624#ifdef FEATURE_HFA
3625 if (isHfaArg)
3626 {
3627 newArgEntry->setHfaType(hfaType, hfaSlots);
3628 }
3629#endif // FEATURE_HFA
3630 newArgEntry->SetMultiRegNums();
3631
3632 noway_assert(newArgEntry != nullptr);
3633 if (newArgEntry->isStruct)
3634 {
3635 newArgEntry->passedByRef = passStructByRef;
3636 newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
3637 }
3638 else
3639 {
3640 newArgEntry->argType = argx->TypeGet();
3641 }
3642
3643 argSlots += size;
3644 } // end foreach argument loop
3645
3646#ifdef DEBUG
3647 if (verbose)
3648 {
3649 call->fgArgInfo->Dump(this);
3650 JITDUMP("\n");
3651 }
3652#endif
3653}
3654
3655//------------------------------------------------------------------------
3656// fgMorphArgs: Walk and transform (morph) the arguments of a call
3657//
3658// Arguments:
3659// callNode - the call for which we are doing the argument morphing
3660//
3661// Return Value:
3662// Like most morph methods, this method returns the morphed node,
3663// though in this case there are currently no scenarios where the
3664// node itself is re-created.
3665//
3666// Notes:
3667// This calls fgInitArgInfo to create the 'fgArgInfo' for the call.
3668// If it has already been created, that method will simply return.
3669//
3670// This method changes the state of the call node. It uses the existence
3671// of gtCallLateArgs (the late arguments list) to determine if it has
3672// already done the first round of morphing.
3673//
3674// The first time it is called (i.e. during global morphing), this method
3675// computes the "late arguments". This is when it determines which arguments
3676// need to be evaluated to temps prior to the main argument setup, and which
3677// can be directly evaluated into the argument location. It also creates a
3678// second argument list (gtCallLateArgs) that does the final placement of the
3679// arguments, e.g. into registers or onto the stack.
3680//
3681// The "non-late arguments", aka the gtCallArgs, are doing the in-order
3682// evaluation of the arguments that might have side-effects, such as embedded
3683// assignments, calls or possible throws. In these cases, it and earlier
3684// arguments must be evaluated to temps.
3685//
3686// On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
3687// if we have any nested calls, we need to defer the copying of the argument
3688// into the fixed argument area until after the call. If the argument did not
3689// otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
3690// replaced in the "early" arg list (gtCallArgs) with a placeholder node.
3691
3692#ifdef _PREFAST_
3693#pragma warning(push)
3694#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3695#endif
3696GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
3697{
3698 GenTree* args;
3699 GenTree* argx;
3700
3701 unsigned flagsSummary = 0;
3702
3703 unsigned argIndex = 0;
3704 unsigned argSlots = 0;
3705
3706 bool reMorphing = call->AreArgsComplete();
3707
3708 // Set up the fgArgInfo.
3709 fgInitArgInfo(call);
3710 unsigned numArgs = call->fgArgInfo->ArgCount();
3711 JITDUMP("%sMorphing args for %d.%s:\n", (reMorphing) ? "Re" : "", call->gtTreeID, GenTree::OpName(call->gtOper));
3712
3713 // If we are remorphing, process the late arguments (which were determined by a previous caller).
3714 if (reMorphing)
3715 {
3716 // We need to reMorph the gtCallLateArgs early since that is what triggers
3717 // the expression folding and we need to have the final folded gtCallLateArgs
3718 // available when we call UpdateRegArg so that we correctly update the fgArgInfo
3719 // with the folded tree that represents the final optimized argument nodes.
3720 //
3721 if (call->gtCallLateArgs != nullptr)
3722 {
3723
3724 call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
3725 flagsSummary |= call->gtCallLateArgs->gtFlags;
3726 }
3727 assert(call->fgArgInfo != nullptr);
3728 }
3729 call->fgArgInfo->RemorphReset();
3730
3731 // First we morph the argument subtrees ('this' pointer, arguments, etc.).
3732 // During the first call to fgMorphArgs we also record the
3733 // information about late arguments we have in 'fgArgInfo'.
3734 // This information is used later to contruct the gtCallLateArgs */
3735
3736 // Process the 'this' argument value, if present.
3737 argx = call->gtCallObjp;
3738 if (argx)
3739 {
3740 fgArgTabEntry* thisArgEntry = call->fgArgInfo->GetArgEntry(0, reMorphing);
3741 argx = fgMorphTree(argx);
3742 call->gtCallObjp = argx;
3743 // This is a register argument - possibly update it in the table.
3744 call->fgArgInfo->UpdateRegArg(thisArgEntry, argx, reMorphing);
3745 flagsSummary |= argx->gtFlags;
3746 assert(argIndex == 0);
3747 argIndex++;
3748 argSlots++;
3749 }
3750
3751 // Note that this name is a bit of a misnomer - it indicates that there are struct args
3752 // that occupy more than a single slot that are passed by value (not necessarily in regs).
3753 bool hasMultiregStructArgs = false;
3754 for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
3755 {
3756 GenTree** parentArgx = &args->gtOp.gtOp1;
3757 fgArgTabEntry* argEntry = call->fgArgInfo->GetArgEntry(argIndex, reMorphing);
3758
3759 // Morph the arg node, and update the parent and argEntry pointers.
3760 argx = *parentArgx;
3761 argx = fgMorphTree(argx);
3762 *parentArgx = argx;
3763 assert(args->OperIsList());
3764 assert(argx == args->Current());
3765
3766 unsigned argAlign = argEntry->alignment;
3767 unsigned size = argEntry->getSize();
3768 CORINFO_CLASS_HANDLE copyBlkClass = NO_CLASS_HANDLE;
3769
3770 if (argAlign == 2)
3771 {
3772 if (argSlots % 2 == 1)
3773 {
3774 argSlots++;
3775 }
3776 }
3777 if (argEntry->isNonStandard)
3778 {
3779 // We need to update the node field for this nonStandard arg here
3780 // as it may have been changed by the call to fgMorphTree.
3781 call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing);
3782 flagsSummary |= argx->gtFlags;
3783 continue;
3784 }
3785
3786 assert(size != 0);
3787 argSlots += argEntry->getSlotCount();
3788
3789 // lclVar address should have been retyped to TYP_I_IMPL.
3790 assert(!argx->IsVarAddr() || (argx->gtType = TYP_I_IMPL));
3791
3792 // Get information about this argument.
3793 var_types hfaType = argEntry->hfaType;
3794 bool isHfaArg = (hfaType != TYP_UNDEF);
3795 bool isHfaRegArg = argEntry->isHfaRegArg;
3796 unsigned hfaSlots = argEntry->numRegs;
3797 bool passUsingFloatRegs = argEntry->isPassedInFloatRegisters();
3798 bool isBackFilled = argEntry->IsBackFilled();
3799 unsigned structSize = 0;
3800
3801 // Struct arguments may be morphed into a node that is not a struct type.
3802 // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
3803 // was a struct and the struct classification.
3804 bool isStructArg = argEntry->isStruct;
3805
3806 GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
3807 if (isStructArg && varTypeIsStruct(argObj) && !argObj->OperIs(GT_ASG, GT_MKREFANY, GT_FIELD_LIST, GT_ARGPLACE))
3808 {
3809 CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(argObj);
3810 unsigned originalSize;
3811 if (argObj->TypeGet() == TYP_STRUCT)
3812 {
3813 if (argObj->OperIs(GT_OBJ))
3814 {
3815 // Get the size off the OBJ node.
3816 originalSize = argObj->AsObj()->gtBlkSize;
3817 assert(originalSize == info.compCompHnd->getClassSize(objClass));
3818 }
3819 else
3820 {
3821 // We have a BADCODE assert for this in fgInitArgInfo.
3822 assert(argObj->OperIs(GT_LCL_VAR));
3823 originalSize = lvaGetDesc(argObj->AsLclVarCommon())->lvExactSize;
3824 }
3825 }
3826 else
3827 {
3828 originalSize = genTypeSize(argx);
3829 assert(originalSize == info.compCompHnd->getClassSize(objClass));
3830 }
3831 unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
3832 var_types structBaseType = argEntry->argType;
3833
3834#ifndef _TARGET_X86_
3835 // First, handle the case where the argument is passed by reference.
3836 if (argEntry->passedByRef)
3837 {
3838 assert(size == 1);
3839 copyBlkClass = objClass;
3840#ifdef UNIX_AMD64_ABI
3841 assert(!"Structs are not passed by reference on x64/ux");
3842#endif // UNIX_AMD64_ABI
3843 }
3844 else
3845 {
3846 // This is passed by value.
3847 // Check to see if we can transform this into load of a primitive type.
3848 // 'size' must be the number of pointer sized items
3849 assert(size == roundupSize / TARGET_POINTER_SIZE);
3850
3851 structSize = originalSize;
3852 unsigned passingSize = originalSize;
3853
3854 // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size.
3855 // When it can do this is platform-dependent:
3856 // - In general, it can be done for power of 2 structs that fit in a single register.
3857 // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field.
3858 // - This is irrelevant for X86, since structs are always passed by value on the stack.
3859
3860 GenTree** parentOfArgObj = parentArgx;
3861 GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
3862 bool canTransform = false;
3863
3864 if (structBaseType != TYP_STRUCT)
3865 {
3866 if (isPow2(passingSize))
3867 {
3868 canTransform = true;
3869 }
3870
3871#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
3872 // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can
3873 // only transform in that case if the arg is a local.
3874 // TODO-CQ: This transformation should be applicable in general, not just for the ARM64
3875 // or UNIX_AMD64_ABI cases where they will be passed in registers.
3876 else
3877 {
3878 canTransform = (lclVar != nullptr);
3879 passingSize = genTypeSize(structBaseType);
3880 }
3881#endif // _TARGET_ARM64_ || UNIX_AMD64_ABI
3882 }
3883
3884 if (!canTransform)
3885 {
3886#if defined(_TARGET_AMD64_)
3887#ifndef UNIX_AMD64_ABI
3888 // On Windows structs are always copied and passed by reference (handled above) unless they are
3889 // passed by value in a single register.
3890 assert(size == 1);
3891 copyBlkClass = objClass;
3892#else // UNIX_AMD64_ABI
3893 // On Unix, structs are always passed by value.
3894 // We only need a copy if we have one of the following:
3895 // - We have a lclVar that has been promoted and is passed in registers.
3896 // - The sizes don't match for a non-lclVar argument.
3897 // - We have a known struct type (e.g. SIMD) that requires multiple registers.
3898 // TODO-Amd64-Unix-CQ: The first case could and should be handled without copies.
3899 // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not
3900 // actually passed in registers.
3901 if (argEntry->isPassedInRegisters())
3902 {
3903 assert(argEntry->structDesc.passedInRegisters);
3904 if (lclVar != nullptr)
3905 {
3906 if (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)
3907 {
3908 copyBlkClass = objClass;
3909 }
3910 }
3911 else if (argObj->OperIs(GT_OBJ))
3912 {
3913 if (passingSize != structSize)
3914 {
3915 copyBlkClass = objClass;
3916 }
3917 }
3918 else
3919 {
3920 // This should only be the case of a value directly producing a known struct type.
3921 assert(argObj->TypeGet() != TYP_STRUCT);
3922 if (argEntry->numRegs > 1)
3923 {
3924 copyBlkClass = objClass;
3925 }
3926 }
3927 }
3928#endif // UNIX_AMD64_ABI
3929#elif defined(_TARGET_ARM64_)
3930 if ((passingSize != structSize) && (lclVar == nullptr))
3931 {
3932 copyBlkClass = objClass;
3933 }
3934#endif
3935
3936#ifdef _TARGET_ARM_
3937 // TODO-1stClassStructs: Unify these conditions across targets.
3938 if (((lclVar != nullptr) &&
3939 (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)) ||
3940 ((argObj->OperIs(GT_OBJ)) && (passingSize != structSize)))
3941 {
3942 copyBlkClass = objClass;
3943 }
3944
3945 if (structSize < TARGET_POINTER_SIZE)
3946 {
3947 copyBlkClass = objClass;
3948 }
3949#endif // _TARGET_ARM_
3950 }
3951 else
3952 {
3953 // We have a struct argument that's less than pointer size, and it is either a power of 2,
3954 // or a local.
3955 // Change our GT_OBJ into a GT_IND of the correct type.
3956 CLANG_FORMAT_COMMENT_ANCHOR;
3957
3958#ifdef _TARGET_ARM_
3959 assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2)));
3960#else
3961 assert(size == 1);
3962#endif
3963
3964 assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize));
3965
3966 if (argObj->OperIs(GT_OBJ))
3967 {
3968 argObj->ChangeOper(GT_IND);
3969
3970 // Now see if we can fold *(&X) into X
3971 if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
3972 {
3973 GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
3974
3975 // Keep the DONT_CSE flag in sync
3976 // (as the addr always marks it for its op1)
3977 temp->gtFlags &= ~GTF_DONT_CSE;
3978 temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
3979 DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
3980 DEBUG_DESTROY_NODE(argObj); // GT_IND
3981
3982 argObj = temp;
3983 *parentOfArgObj = temp;
3984
3985 // If the OBJ had been the top level node, we've now changed argx.
3986 if (parentOfArgObj == parentArgx)
3987 {
3988 argx = temp;
3989 }
3990 }
3991 }
3992 if (argObj->gtOper == GT_LCL_VAR)
3993 {
3994 unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
3995 LclVarDsc* varDsc = &lvaTable[lclNum];
3996
3997 if (varDsc->lvPromoted)
3998 {
3999 if (varDsc->lvFieldCnt == 1)
4000 {
4001 // get the first and only promoted field
4002 LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
4003 if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
4004 {
4005 // we will use the first and only promoted field
4006 argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
4007
4008 if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
4009 (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
4010 {
4011 // Just use the existing field's type
4012 argObj->gtType = fieldVarDsc->TypeGet();
4013 }
4014 else
4015 {
4016 // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
4017 // to a new type
4018 argObj->ChangeOper(GT_LCL_FLD);
4019 argObj->gtType = structBaseType;
4020 }
4021 assert(varTypeCanReg(argObj->TypeGet()));
4022 assert(copyBlkClass == NO_CLASS_HANDLE);
4023 }
4024 else
4025 {
4026 // use GT_LCL_FLD to swizzle the single field struct to a new type
4027 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
4028 argObj->ChangeOper(GT_LCL_FLD);
4029 argObj->gtType = structBaseType;
4030 }
4031 }
4032 else
4033 {
4034 // The struct fits into a single register, but it has been promoted into its
4035 // constituent fields, and so we have to re-assemble it
4036 copyBlkClass = objClass;
4037 }
4038 }
4039 else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
4040 {
4041 // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
4042 argObj->ChangeOper(GT_LCL_FLD);
4043 argObj->gtType = structBaseType;
4044 }
4045 }
4046 else
4047 {
4048 // Not a GT_LCL_VAR, so we can just change the type on the node
4049 argObj->gtType = structBaseType;
4050 }
4051 assert(varTypeCanReg(argObj->TypeGet()) ||
4052 ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType)));
4053
4054 size = 1;
4055 }
4056
4057#ifndef UNIX_AMD64_ABI
4058 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
4059 if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef)
4060 {
4061 if (isHfaArg && passUsingFloatRegs)
4062 {
4063 size = argEntry->numRegs;
4064 }
4065 else
4066 {
4067 // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
4068 // we must copyblk to a temp before doing the obj to avoid
4069 // the obj reading memory past the end of the valuetype
4070 CLANG_FORMAT_COMMENT_ANCHOR;
4071
4072 if (roundupSize > originalSize)
4073 {
4074 copyBlkClass = objClass;
4075
4076 // There are a few special cases where we can omit using a CopyBlk
4077 // where we normally would need to use one.
4078
4079 if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
4080 {
4081 copyBlkClass = NO_CLASS_HANDLE;
4082 }
4083 }
4084
4085 size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
4086 }
4087 }
4088#endif // !UNIX_AMD64_ABI
4089 }
4090#endif // !_TARGET_X86_
4091 }
4092
4093 if (argEntry->isPassedInRegisters())
4094 {
4095 call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing);
4096 }
4097 else
4098 {
4099 call->fgArgInfo->UpdateStkArg(argEntry, argx, reMorphing);
4100 }
4101
4102 if (copyBlkClass != NO_CLASS_HANDLE)
4103 {
4104 fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass);
4105 }
4106
4107 if (argx->gtOper == GT_MKREFANY)
4108 {
4109 // 'Lower' the MKREFANY tree and insert it.
4110 noway_assert(!reMorphing);
4111
4112#ifdef _TARGET_X86_
4113
4114 // Build the mkrefany as a GT_FIELD_LIST
4115 GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
4116 GenTreeFieldList(argx->gtOp.gtOp1, OFFSETOF__CORINFO_TypedReference__dataPtr, TYP_BYREF, nullptr);
4117 (void)new (this, GT_FIELD_LIST)
4118 GenTreeFieldList(argx->gtOp.gtOp2, OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL, fieldList);
4119 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4120 fp->node = fieldList;
4121 args->gtOp.gtOp1 = fieldList;
4122
4123#else // !_TARGET_X86_
4124
4125 // Get a new temp
4126 // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
4127 unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
4128 lvaSetStruct(tmp, impGetRefAnyClass(), false);
4129
4130 // Build the mkrefany as a comma node:
4131 // (tmp.ptr=argx),(tmp.type=handle)
4132 GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__dataPtr);
4133 GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__type);
4134 destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
4135 destPtrSlot->gtFlags |= GTF_VAR_DEF;
4136 destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
4137 destTypeSlot->gtFlags |= GTF_VAR_DEF;
4138
4139 GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
4140 GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
4141 GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
4142
4143 // Change the expression to "(tmp=val)"
4144 args->gtOp.gtOp1 = asg;
4145
4146 // EvalArgsToTemps will cause tmp to actually get loaded as the argument
4147 call->fgArgInfo->EvalToTmp(argEntry, tmp, asg);
4148 lvaSetVarAddrExposed(tmp);
4149#endif // !_TARGET_X86_
4150 }
4151
4152#if FEATURE_MULTIREG_ARGS
4153 if (isStructArg)
4154 {
4155 if (size > 1 || isHfaArg)
4156 {
4157 hasMultiregStructArgs = true;
4158 }
4159 }
4160#ifdef _TARGET_ARM_
4161 else if ((argEntry->argType == TYP_LONG) || (argEntry->argType == TYP_DOUBLE))
4162 {
4163 assert((argEntry->numRegs == 2) || (argEntry->numSlots == 2));
4164 }
4165#endif
4166 else
4167 {
4168 // We must have exactly one register or slot.
4169 assert(((argEntry->numRegs == 1) && (argEntry->numSlots == 0)) ||
4170 ((argEntry->numRegs == 0) && (argEntry->numSlots == 1)));
4171 }
4172#endif
4173
4174#if defined(_TARGET_X86_)
4175 if (isStructArg)
4176 {
4177 GenTree* lclNode = argx->OperIs(GT_LCL_VAR) ? argx : fgIsIndirOfAddrOfLocal(argx);
4178 if ((lclNode != nullptr) &&
4179 (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
4180 {
4181 // Make a GT_FIELD_LIST of the field lclVars.
4182 GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
4183 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
4184 GenTreeFieldList* fieldList = nullptr;
4185 for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
4186 fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
4187 {
4188 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
4189 if (fieldList == nullptr)
4190 {
4191 lcl->SetLclNum(fieldLclNum);
4192 lcl->ChangeOper(GT_LCL_VAR);
4193 lcl->gtType = fieldVarDsc->lvType;
4194 fieldList = new (this, GT_FIELD_LIST)
4195 GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
4196 fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
4197 fp->node = fieldList;
4198 args->gtOp.gtOp1 = fieldList;
4199 }
4200 else
4201 {
4202 GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
4203 fieldList = new (this, GT_FIELD_LIST)
4204 GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
4205 }
4206 }
4207 }
4208 }
4209#endif // _TARGET_X86_
4210
4211 flagsSummary |= args->Current()->gtFlags;
4212
4213 } // end foreach argument loop
4214
4215 if (!reMorphing)
4216 {
4217 call->fgArgInfo->ArgsComplete();
4218 }
4219
4220 if (call->gtCallArgs)
4221 {
4222 UpdateGT_LISTFlags(call->gtCallArgs);
4223 }
4224
4225 /* Process the function address, if indirect call */
4226
4227 if (call->gtCallType == CT_INDIRECT)
4228 {
4229 call->gtCallAddr = fgMorphTree(call->gtCallAddr);
4230 }
4231
4232#if FEATURE_FIXED_OUT_ARGS
4233
4234 // Record the outgoing argument size. If the call is a fast tail
4235 // call, it will setup its arguments in incoming arg area instead
4236 // of the out-going arg area, so we don't need to track the
4237 // outgoing arg size.
4238 if (!call->IsFastTailCall())
4239 {
4240 unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
4241
4242#if defined(UNIX_AMD64_ABI)
4243 // This is currently required for the UNIX ABI to work correctly.
4244 opts.compNeedToAlignFrame = true;
4245#endif // UNIX_AMD64_ABI
4246
4247 const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
4248 call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
4249
4250#ifdef DEBUG
4251 if (verbose)
4252 {
4253 printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
4254 preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
4255 }
4256#endif
4257 }
4258#endif // FEATURE_FIXED_OUT_ARGS
4259
4260 // Clear the ASG and EXCEPT (if possible) flags on the call node
4261 call->gtFlags &= ~GTF_ASG;
4262 if (!call->OperMayThrow(this))
4263 {
4264 call->gtFlags &= ~GTF_EXCEPT;
4265 }
4266
4267 // Union in the side effect flags from the call's operands
4268 call->gtFlags |= flagsSummary & GTF_ALL_EFFECT;
4269
4270 // If the register arguments have already been determined
4271 // or we have no register arguments then we don't need to
4272 // call SortArgs() and EvalArgsToTemps()
4273 //
4274 // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4275 // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4276 // is added to make sure to call EvalArgsToTemp.
4277 if (!reMorphing && (call->fgArgInfo->HasRegArgs()))
4278 {
4279 // This is the first time that we morph this call AND it has register arguments.
4280 // Follow into the code below and do the 'defer or eval to temp' analysis.
4281
4282 call->fgArgInfo->SortArgs();
4283
4284 call->fgArgInfo->EvalArgsToTemps();
4285
4286 // We may have updated the arguments
4287 if (call->gtCallArgs)
4288 {
4289 UpdateGT_LISTFlags(call->gtCallArgs);
4290 }
4291 }
4292
4293 if (hasMultiregStructArgs)
4294 {
4295 fgMorphMultiregStructArgs(call);
4296 }
4297
4298#ifdef DEBUG
4299 if (verbose)
4300 {
4301 call->fgArgInfo->Dump(this);
4302 JITDUMP("\n");
4303 }
4304#endif
4305 return call;
4306}
4307#ifdef _PREFAST_
4308#pragma warning(pop)
4309#endif
4310
4311//-----------------------------------------------------------------------------
4312// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
4313// call fgMorphMultiregStructArg on each of them.
4314//
4315// Arguments:
4316// call : a GenTreeCall node that has one or more TYP_STRUCT arguments\
4317//
4318// Notes:
4319// We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types.
4320// It will ensure that the struct arguments are in the correct form.
4321// If this method fails to find any TYP_STRUCT arguments it will assert.
4322//
4323void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
4324{
4325 bool foundStructArg = false;
4326 unsigned initialFlags = call->gtFlags;
4327 unsigned flagsSummary = 0;
4328 fgArgInfo* allArgInfo = call->fgArgInfo;
4329
4330#ifdef _TARGET_X86_
4331 assert(!"Logic error: no MultiregStructArgs for X86");
4332#endif
4333#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
4334 assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
4335#endif
4336
4337 for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
4338 {
4339 // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
4340 // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
4341 // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
4342 // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
4343 // otherwise points to the list in the late args list.
4344 bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
4345 fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
4346 assert(fgEntryPtr != nullptr);
4347 GenTree* argx = fgEntryPtr->node;
4348 GenTree* lateList = nullptr;
4349 GenTree* lateNode = nullptr;
4350
4351 if (isLateArg)
4352 {
4353 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
4354 {
4355 assert(list->OperIsList());
4356
4357 GenTree* argNode = list->Current();
4358 if (argx == argNode)
4359 {
4360 lateList = list;
4361 lateNode = argNode;
4362 break;
4363 }
4364 }
4365 assert(lateList != nullptr && lateNode != nullptr);
4366 }
4367
4368 GenTree* arg = argx;
4369
4370 if (!fgEntryPtr->isStruct)
4371 {
4372 continue;
4373 }
4374
4375 unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots);
4376 if ((size > 1) || fgEntryPtr->isHfaArg)
4377 {
4378 foundStructArg = true;
4379 if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST))
4380 {
4381 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
4382
4383 // Did we replace 'argx' with a new tree?
4384 if (arg != argx)
4385 {
4386 fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
4387
4388 // link the new arg node into either the late arg list or the gtCallArgs list
4389 if (isLateArg)
4390 {
4391 lateList->gtOp.gtOp1 = arg;
4392 }
4393 else
4394 {
4395 args->gtOp.gtOp1 = arg;
4396 }
4397 }
4398 }
4399 }
4400 }
4401
4402 // We should only call this method when we actually have one or more multireg struct args
4403 assert(foundStructArg);
4404
4405 // Update the flags
4406 call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
4407}
4408
4409//-----------------------------------------------------------------------------
4410// fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list,
4411// morph the argument as needed to be passed correctly.
4412//
4413// Arguments:
4414// arg - A GenTree node containing a TYP_STRUCT arg
4415// fgEntryPtr - the fgArgTabEntry information for the current 'arg'
4416//
4417// Notes:
4418// The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT.
4419// If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the
4420// stack are marked as doNotEnregister, and then we return.
4421//
4422// If it is passed by register, we mutate the argument into the GT_FIELD_LIST form
4423// which is only used for struct arguments.
4424//
4425// If arg is a LclVar we check if it is struct promoted and has the right number of fields
4426// and if they are at the appropriate offsets we will use the struct promted fields
4427// in the GT_FIELD_LIST nodes that we create.
4428// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
4429// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
4430// this also forces the struct to be stack allocated into the local frame.
4431// For the GT_OBJ case will clone the address expression and generate two (or more)
4432// indirections.
4433// Currently the implementation handles ARM64/ARM and will NYI for other architectures.
4434//
4435GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr)
4436{
4437 assert(varTypeIsStruct(arg->TypeGet()));
4438
4439#if !defined(_TARGET_ARMARCH_) && !defined(UNIX_AMD64_ABI)
4440 NYI("fgMorphMultiregStructArg requires implementation for this target");
4441#endif
4442
4443#ifdef _TARGET_ARM_
4444 if ((fgEntryPtr->isSplit && fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4) ||
4445 (!fgEntryPtr->isSplit && fgEntryPtr->regNum == REG_STK))
4446#else
4447 if (fgEntryPtr->regNum == REG_STK)
4448#endif
4449 {
4450 GenTreeLclVarCommon* lcl = nullptr;
4451 GenTree* actualArg = arg->gtEffectiveVal();
4452
4453 if (actualArg->OperGet() == GT_OBJ)
4454 {
4455 if (actualArg->gtGetOp1()->OperIs(GT_ADDR) && actualArg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
4456 {
4457 lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVarCommon();
4458 }
4459 }
4460 else if (actualArg->OperGet() == GT_LCL_VAR)
4461 {
4462 lcl = actualArg->AsLclVarCommon();
4463 }
4464 if (lcl != nullptr)
4465 {
4466 if (lvaGetPromotionType(lcl->gtLclNum) == PROMOTION_TYPE_INDEPENDENT)
4467 {
4468 arg = fgMorphLclArgToFieldlist(lcl);
4469 }
4470 else if (arg->TypeGet() == TYP_STRUCT)
4471 {
4472 // If this is a non-register struct, it must be referenced from memory.
4473 if (!actualArg->OperIs(GT_OBJ))
4474 {
4475 // Create an Obj of the temp to use it as a call argument.
4476 arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
4477 arg = gtNewObjNode(lvaGetStruct(lcl->gtLclNum), arg);
4478 }
4479 // Its fields will need to be accessed by address.
4480 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUG_ARG(DNER_IsStructArg));
4481 }
4482 }
4483
4484 return arg;
4485 }
4486
4487#if FEATURE_MULTIREG_ARGS
4488 // Examine 'arg' and setup argValue objClass and structSize
4489 //
4490 CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg);
4491 GenTree* argValue = arg; // normally argValue will be arg, but see right below
4492 unsigned structSize = 0;
4493
4494 if (arg->OperGet() == GT_OBJ)
4495 {
4496 GenTreeObj* argObj = arg->AsObj();
4497 objClass = argObj->gtClass;
4498 structSize = argObj->Size();
4499 assert(structSize == info.compCompHnd->getClassSize(objClass));
4500
4501 // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR.
4502 GenTree* op1 = argObj->gtOp1;
4503 if (op1->OperGet() == GT_ADDR)
4504 {
4505 GenTree* underlyingTree = op1->gtOp.gtOp1;
4506
4507 // Only update to the same type.
4508 if (underlyingTree->OperIs(GT_LCL_VAR) && (underlyingTree->TypeGet() == argValue->TypeGet()) &&
4509 (objClass == gtGetStructHandleIfPresent(underlyingTree)))
4510 {
4511 argValue = underlyingTree;
4512 }
4513 }
4514 }
4515 else if (arg->OperGet() == GT_LCL_VAR)
4516 {
4517 GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
4518 unsigned varNum = varNode->gtLclNum;
4519 assert(varNum < lvaCount);
4520 LclVarDsc* varDsc = &lvaTable[varNum];
4521
4522 structSize = varDsc->lvExactSize;
4523 assert(structSize == info.compCompHnd->getClassSize(objClass));
4524 }
4525 else
4526 {
4527 objClass = gtGetStructHandleIfPresent(arg);
4528 structSize = info.compCompHnd->getClassSize(objClass);
4529 }
4530 noway_assert(objClass != NO_CLASS_HANDLE);
4531
4532 var_types hfaType = TYP_UNDEF;
4533 var_types elemType = TYP_UNDEF;
4534 unsigned elemCount = 0;
4535 unsigned elemSize = 0;
4536 var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
4537
4538 hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
4539 if (varTypeIsFloating(hfaType)
4540#if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4541 && !fgEntryPtr->isVararg
4542#endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4543 )
4544 {
4545 elemType = hfaType;
4546 elemSize = genTypeSize(elemType);
4547 elemCount = structSize / elemSize;
4548 assert(elemSize * elemCount == structSize);
4549 for (unsigned inx = 0; inx < elemCount; inx++)
4550 {
4551 type[inx] = elemType;
4552 }
4553 }
4554 else
4555 {
4556 assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE);
4557 BYTE gcPtrs[MAX_ARG_REG_COUNT];
4558 elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
4559 info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
4560
4561 for (unsigned inx = 0; inx < elemCount; inx++)
4562 {
4563#ifdef UNIX_AMD64_ABI
4564 if (gcPtrs[inx] == TYPE_GC_NONE)
4565 {
4566 type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx],
4567 fgEntryPtr->structDesc.eightByteSizes[inx]);
4568 }
4569 else
4570#endif // UNIX_AMD64_ABI
4571 {
4572 type[inx] = getJitGCType(gcPtrs[inx]);
4573 }
4574 }
4575
4576#ifndef UNIX_AMD64_ABI
4577 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4578 {
4579 elemSize = TARGET_POINTER_SIZE;
4580 // We can safely widen this to aligned bytes since we are loading from
4581 // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
4582 // lives in the stack frame or will be a promoted field.
4583 //
4584 structSize = elemCount * TARGET_POINTER_SIZE;
4585 }
4586 else // we must have a GT_OBJ
4587 {
4588 assert(argValue->OperGet() == GT_OBJ);
4589
4590 // We need to load the struct from an arbitrary address
4591 // and we can't read past the end of the structSize
4592 // We adjust the last load type here
4593 //
4594 unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
4595 unsigned lastElem = elemCount - 1;
4596 if (remainingBytes != 0)
4597 {
4598 switch (remainingBytes)
4599 {
4600 case 1:
4601 type[lastElem] = TYP_BYTE;
4602 break;
4603 case 2:
4604 type[lastElem] = TYP_SHORT;
4605 break;
4606#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
4607 case 4:
4608 type[lastElem] = TYP_INT;
4609 break;
4610#endif // (_TARGET_ARM64_) || (UNIX_AMD64_ABI)
4611 default:
4612 noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
4613 break;
4614 }
4615 }
4616 }
4617#endif // !UNIX_AMD64_ABI
4618 }
4619
4620 // We should still have a TYP_STRUCT
4621 assert(varTypeIsStruct(argValue->TypeGet()));
4622
4623 GenTreeFieldList* newArg = nullptr;
4624
4625 // Are we passing a struct LclVar?
4626 //
4627 if (argValue->OperGet() == GT_LCL_VAR)
4628 {
4629 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4630 unsigned varNum = varNode->gtLclNum;
4631 assert(varNum < lvaCount);
4632 LclVarDsc* varDsc = &lvaTable[varNum];
4633
4634 // At this point any TYP_STRUCT LclVar must be an aligned struct
4635 // or an HFA struct, both which are passed by value.
4636 //
4637 assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
4638
4639 varDsc->lvIsMultiRegArg = true;
4640
4641#ifdef DEBUG
4642 if (verbose)
4643 {
4644 JITDUMP("Multireg struct argument V%02u : ", varNum);
4645 fgEntryPtr->Dump();
4646 }
4647#endif // DEBUG
4648
4649#ifndef UNIX_AMD64_ABI
4650 // This local variable must match the layout of the 'objClass' type exactly
4651 if (varDsc->lvIsHfa()
4652#if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4653 && !fgEntryPtr->isVararg
4654#endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4655 )
4656 {
4657 // We have a HFA struct
4658 noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
4659 noway_assert(elemSize == genTypeSize(elemType));
4660 noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
4661 noway_assert(elemSize * elemCount == varDsc->lvExactSize);
4662
4663 for (unsigned inx = 0; (inx < elemCount); inx++)
4664 {
4665 noway_assert(type[inx] == elemType);
4666 }
4667 }
4668 else
4669 {
4670#if defined(_TARGET_ARM64_)
4671 // We must have a 16-byte struct (non-HFA)
4672 noway_assert(elemCount == 2);
4673#elif defined(_TARGET_ARM_)
4674 noway_assert(elemCount <= 4);
4675#endif
4676
4677 for (unsigned inx = 0; inx < elemCount; inx++)
4678 {
4679 CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
4680
4681 // We setup the type[inx] value above using the GC info from 'objClass'
4682 // This GT_LCL_VAR must have the same GC layout info
4683 //
4684 if (currentGcLayoutType != TYPE_GC_NONE)
4685 {
4686 noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
4687 }
4688 else
4689 {
4690 // We may have use a small type when we setup the type[inx] values above
4691 // We can safely widen this to TYP_I_IMPL
4692 type[inx] = TYP_I_IMPL;
4693 }
4694 }
4695 }
4696#endif // !UNIX_AMD64_ABI
4697
4698#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
4699 // Is this LclVar a promoted struct with exactly 2 fields?
4700 // TODO-ARM64-CQ: Support struct promoted HFA types here
4701 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && (!varDsc->lvIsHfa()
4702#if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4703 && !fgEntryPtr->isVararg
4704#endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
4705 ))
4706 {
4707 // See if we have two promoted fields that start at offset 0 and 8?
4708 unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
4709 unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
4710
4711 // Did we find the promoted fields at the necessary offsets?
4712 if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
4713 {
4714 LclVarDsc* loVarDsc = &lvaTable[loVarNum];
4715 LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
4716
4717 var_types loType = loVarDsc->lvType;
4718 var_types hiType = hiVarDsc->lvType;
4719
4720 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
4721 {
4722 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
4723 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4724 //
4725 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4726 varNum);
4727 //
4728 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4729 //
4730 }
4731 else
4732 {
4733 // We can use the struct promoted field as the two arguments
4734
4735 GenTree* loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
4736 GenTree* hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
4737
4738 // Create a new tree for 'arg'
4739 // replace the existing LDOBJ(ADDR(LCLVAR))
4740 // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
4741 //
4742 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
4743 (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
4744 }
4745 }
4746 }
4747 else
4748 {
4749 //
4750 // We will create a list of GT_LCL_FLDs nodes to pass this struct
4751 //
4752 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4753 }
4754#elif defined(_TARGET_ARM_)
4755 // Is this LclVar a promoted struct with exactly same size?
4756 if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
4757 {
4758 // See if we have promoted fields?
4759 unsigned varNums[4];
4760 bool hasBadVarNum = false;
4761 for (unsigned inx = 0; inx < elemCount; inx++)
4762 {
4763 varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
4764 if (varNums[inx] == BAD_VAR_NUM)
4765 {
4766 hasBadVarNum = true;
4767 break;
4768 }
4769 }
4770
4771 // Did we find the promoted fields at the necessary offsets?
4772 if (!hasBadVarNum)
4773 {
4774 LclVarDsc* varDscs[4];
4775 var_types varType[4];
4776 bool varIsFloat = false;
4777
4778 for (unsigned inx = 0; inx < elemCount; inx++)
4779 {
4780 varDscs[inx] = &lvaTable[varNums[inx]];
4781 varType[inx] = varDscs[inx]->lvType;
4782 if (varTypeIsFloating(varType[inx]))
4783 {
4784 // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
4785 // integer
4786 // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
4787 //
4788 JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
4789 varNum);
4790 //
4791 // we call lvaSetVarDoNotEnregister and do the proper transformation below.
4792 //
4793 varIsFloat = true;
4794 break;
4795 }
4796 }
4797
4798 if (!varIsFloat)
4799 {
4800 newArg = fgMorphLclArgToFieldlist(varNode);
4801 }
4802 }
4803 }
4804 else
4805 {
4806 //
4807 // We will create a list of GT_LCL_FLDs nodes to pass this struct
4808 //
4809 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4810 }
4811#endif // _TARGET_ARM_
4812 }
4813
4814 // If we didn't set newarg to a new List Node tree
4815 //
4816 if (newArg == nullptr)
4817 {
4818 if (fgEntryPtr->regNum == REG_STK)
4819 {
4820 // We leave this stack passed argument alone
4821 return arg;
4822 }
4823
4824 // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
4825 // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
4826 //
4827 if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
4828 {
4829 GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
4830 unsigned varNum = varNode->gtLclNum;
4831 assert(varNum < lvaCount);
4832 LclVarDsc* varDsc = &lvaTable[varNum];
4833
4834 unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
4835 unsigned lastOffset = baseOffset + structSize;
4836
4837 // The allocated size of our LocalVar must be at least as big as lastOffset
4838 assert(varDsc->lvSize() >= lastOffset);
4839
4840 if (varDsc->lvStructGcCount > 0)
4841 {
4842 // alignment of the baseOffset is required
4843 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
4844#ifndef UNIX_AMD64_ABI
4845 noway_assert(elemSize == TARGET_POINTER_SIZE);
4846#endif
4847 unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
4848 const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
4849 for (unsigned inx = 0; (inx < elemCount); inx++)
4850 {
4851 // The GC information must match what we setup using 'objClass'
4852 if ((gcPtrs[baseIndex + inx] != TYPE_GC_NONE) || varTypeGCtype(type[inx]))
4853 {
4854 noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
4855 }
4856 }
4857 }
4858 else // this varDsc contains no GC pointers
4859 {
4860 for (unsigned inx = 0; inx < elemCount; inx++)
4861 {
4862 // The GC information must match what we setup using 'objClass'
4863 noway_assert(!varTypeIsGC(type[inx]));
4864 }
4865 }
4866
4867 //
4868 // We create a list of GT_LCL_FLDs nodes to pass this struct
4869 //
4870 lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
4871
4872 // Create a new tree for 'arg'
4873 // replace the existing LDOBJ(ADDR(LCLVAR))
4874 // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
4875 //
4876 unsigned offset = baseOffset;
4877 GenTreeFieldList* listEntry = nullptr;
4878 for (unsigned inx = 0; inx < elemCount; inx++)
4879 {
4880 elemSize = genTypeSize(type[inx]);
4881 GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
4882 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
4883 if (newArg == nullptr)
4884 {
4885 newArg = listEntry;
4886 }
4887 offset += elemSize;
4888 }
4889 }
4890 // Are we passing a GT_OBJ struct?
4891 //
4892 else if (argValue->OperGet() == GT_OBJ)
4893 {
4894 GenTreeObj* argObj = argValue->AsObj();
4895 GenTree* baseAddr = argObj->gtOp1;
4896 var_types addrType = baseAddr->TypeGet();
4897
4898 if (baseAddr->OperGet() == GT_ADDR)
4899 {
4900 GenTree* addrTaken = baseAddr->gtOp.gtOp1;
4901 if (addrTaken->IsLocal())
4902 {
4903 GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon();
4904 unsigned varNum = varNode->gtLclNum;
4905 // We access non-struct type (for example, long) as a struct type.
4906 // Make sure lclVar lives on stack to make sure its fields are accessible by address.
4907 lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField));
4908 }
4909 }
4910
4911 // Create a new tree for 'arg'
4912 // replace the existing LDOBJ(EXPR)
4913 // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
4914 //
4915
4916 unsigned offset = 0;
4917 GenTreeFieldList* listEntry = nullptr;
4918 for (unsigned inx = 0; inx < elemCount; inx++)
4919 {
4920 elemSize = genTypeSize(type[inx]);
4921 GenTree* curAddr = baseAddr;
4922 if (offset != 0)
4923 {
4924 GenTree* baseAddrDup = gtCloneExpr(baseAddr);
4925 noway_assert(baseAddrDup != nullptr);
4926 curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
4927 }
4928 else
4929 {
4930 curAddr = baseAddr;
4931 }
4932 GenTree* curItem = gtNewIndir(type[inx], curAddr);
4933
4934 // For safety all GT_IND should have at least GT_GLOB_REF set.
4935 curItem->gtFlags |= GTF_GLOB_REF;
4936
4937 listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
4938 if (newArg == nullptr)
4939 {
4940 newArg = listEntry;
4941 }
4942 offset += elemSize;
4943 }
4944 }
4945 }
4946
4947#ifdef DEBUG
4948 // If we reach here we should have set newArg to something
4949 if (newArg == nullptr)
4950 {
4951 gtDispTree(argValue);
4952 assert(!"Missing case in fgMorphMultiregStructArg");
4953 }
4954#endif
4955
4956 noway_assert(newArg != nullptr);
4957 noway_assert(newArg->OperIsFieldList());
4958
4959 // We need to propagate any GTF_ALL_EFFECT flags from the end of the list back to the beginning.
4960 // This is verified in fgDebugCheckFlags().
4961
4962 ArrayStack<GenTree*> stack(getAllocator(CMK_ArrayStack));
4963 GenTree* tree;
4964 for (tree = newArg; (tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsFieldList(); tree = tree->gtGetOp2())
4965 {
4966 stack.Push(tree);
4967 }
4968
4969 unsigned propFlags = (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
4970 tree->gtFlags |= propFlags;
4971
4972 while (!stack.Empty())
4973 {
4974 tree = stack.Pop();
4975 propFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
4976 propFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
4977 tree->gtFlags |= propFlags;
4978 }
4979
4980#ifdef DEBUG
4981 if (verbose)
4982 {
4983 printf("fgMorphMultiregStructArg created tree:\n");
4984 gtDispTree(newArg);
4985 }
4986#endif
4987
4988 arg = newArg; // consider calling fgMorphTree(newArg);
4989
4990#endif // FEATURE_MULTIREG_ARGS
4991
4992 return arg;
4993}
4994
4995//------------------------------------------------------------------------
4996// fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields
4997//
4998// Arguments:
4999// lcl - The GT_LCL_VAR node we will transform
5000//
5001// Return value:
5002// The new GT_FIELD_LIST that we have created.
5003//
5004GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl)
5005{
5006 LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
5007 assert(varDsc->lvPromoted == true);
5008
5009 unsigned fieldCount = varDsc->lvFieldCnt;
5010 GenTreeFieldList* listEntry = nullptr;
5011 GenTreeFieldList* newArg = nullptr;
5012 unsigned fieldLclNum = varDsc->lvFieldLclStart;
5013
5014 // We can use the struct promoted field as arguments
5015 for (unsigned i = 0; i < fieldCount; i++)
5016 {
5017 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
5018 GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
5019 listEntry = new (this, GT_FIELD_LIST)
5020 GenTreeFieldList(lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, listEntry);
5021 if (newArg == nullptr)
5022 {
5023 newArg = listEntry;
5024 }
5025 fieldLclNum++;
5026 }
5027 return newArg;
5028}
5029
5030//------------------------------------------------------------------------
5031// fgMakeOutgoingStructArgCopy: make a copy of a struct variable if necessary,
5032// to pass to a callee.
5033//
5034// Arguments:
5035// call - call being processed
5036// args - args for the call
5037/// argIndex - arg being processed
5038// copyBlkClass - class handle for the struct
5039//
5040// Return value:
5041// tree that computes address of the outgoing arg
5042//
5043void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
5044 GenTree* args,
5045 unsigned argIndex,
5046 CORINFO_CLASS_HANDLE copyBlkClass)
5047{
5048 GenTree* argx = args->Current();
5049 noway_assert(argx->gtOper != GT_MKREFANY);
5050 fgArgTabEntry* argEntry = Compiler::gtArgEntryByNode(call, argx);
5051
5052 // If we're optimizing, see if we can avoid making a copy.
5053 //
5054 // We don't need a copy if this is the last use of an implicit by-ref local.
5055 //
5056 // We can't determine that all of the time, but if there is only
5057 // one use and the method has no loops, then this use must be the last.
5058 if (opts.OptimizationEnabled())
5059 {
5060 GenTreeLclVarCommon* lcl = nullptr;
5061
5062 if (argx->OperIsLocal())
5063 {
5064 lcl = argx->AsLclVarCommon();
5065 }
5066 else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
5067 {
5068 lcl = argx->AsObj()->Addr()->AsLclVarCommon();
5069 }
5070
5071 if (lcl != nullptr)
5072 {
5073 unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
5074 if (lvaIsImplicitByRefLocal(varNum))
5075 {
5076 LclVarDsc* varDsc = &lvaTable[varNum];
5077 // JIT_TailCall helper has an implicit assumption that all tail call arguments live
5078 // on the caller's frame. If an argument lives on the caller caller's frame, it may get
5079 // overwritten if that frame is reused for the tail call. Therefore, we should always copy
5080 // struct parameters if they are passed as arguments to a tail call.
5081 if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop())
5082 {
5083 varDsc->setLvRefCnt(0, RCS_EARLY);
5084 args->gtOp.gtOp1 = lcl;
5085 argEntry->node = lcl;
5086
5087 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
5088 return;
5089 }
5090 }
5091 }
5092 }
5093
5094 if (fgOutgoingArgTemps == nullptr)
5095 {
5096 fgOutgoingArgTemps = hashBv::Create(this);
5097 }
5098
5099 unsigned tmp = 0;
5100 bool found = false;
5101
5102 // Attempt to find a local we have already used for an outgoing struct and reuse it.
5103 // We do not reuse within a statement.
5104 if (!opts.MinOpts())
5105 {
5106 indexType lclNum;
5107 FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
5108 {
5109 LclVarDsc* varDsc = &lvaTable[lclNum];
5110 if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
5111 !fgCurrentlyInUseArgTemps->testBit(lclNum))
5112 {
5113 tmp = (unsigned)lclNum;
5114 found = true;
5115 JITDUMP("reusing outgoing struct arg");
5116 break;
5117 }
5118 }
5119 NEXT_HBV_BIT_SET;
5120 }
5121
5122 // Create the CopyBlk tree and insert it.
5123 if (!found)
5124 {
5125 // Get a new temp
5126 // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
5127 tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
5128 lvaSetStruct(tmp, copyBlkClass, false);
5129 if (call->IsVarargs())
5130 {
5131 lvaSetStructUsedAsVarArg(tmp);
5132 }
5133
5134 fgOutgoingArgTemps->setBit(tmp);
5135 }
5136
5137 fgCurrentlyInUseArgTemps->setBit(tmp);
5138
5139 // TYP_SIMD structs should not be enregistered, since ABI requires it to be
5140 // allocated on stack and address of it needs to be passed.
5141 if (lclVarIsSIMDType(tmp))
5142 {
5143 lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
5144 }
5145
5146 // Create a reference to the temp
5147 GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
5148 dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
5149
5150 if (argx->gtOper == GT_OBJ)
5151 {
5152 argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
5153 argx->SetIndirExceptionFlags(this);
5154 }
5155 else
5156 {
5157 argx->gtFlags |= GTF_DONT_CSE;
5158 }
5159
5160 // Copy the valuetype to the temp
5161 unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
5162 GenTree* copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
5163 copyBlk = fgMorphCopyBlock(copyBlk);
5164
5165#if FEATURE_FIXED_OUT_ARGS
5166
5167 // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
5168 // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
5169 GenTree* arg = copyBlk;
5170
5171#else // FEATURE_FIXED_OUT_ARGS
5172
5173 // Structs are always on the stack, and thus never need temps
5174 // so we have to put the copy and temp all into one expression.
5175 argEntry->tmpNum = tmp;
5176 GenTree* arg = fgMakeTmpArgNode(argEntry);
5177
5178 // Change the expression to "(tmp=val),tmp"
5179 arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
5180
5181#endif // FEATURE_FIXED_OUT_ARGS
5182
5183 args->gtOp.gtOp1 = arg;
5184 call->fgArgInfo->EvalToTmp(argEntry, tmp, arg);
5185
5186 return;
5187}
5188
5189#ifdef _TARGET_ARM_
5190// See declaration for specification comment.
5191void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
5192 unsigned firstArgRegNum,
5193 regMaskTP* pArgSkippedRegMask)
5194{
5195 assert(varDsc->lvPromoted);
5196 // There's no way to do these calculations without breaking abstraction and assuming that
5197 // integer register arguments are consecutive ints. They are on ARM.
5198
5199 // To start, figure out what register contains the last byte of the first argument.
5200 LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
5201 unsigned lastFldRegOfLastByte =
5202 (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5203 ;
5204
5205 // Now we're keeping track of the register that the last field ended in; see what registers
5206 // subsequent fields start in, and whether any are skipped.
5207 // (We assume here the invariant that the fields are sorted in offset order.)
5208 for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
5209 {
5210 unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
5211 LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
5212 unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
5213 assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
5214 // This loop should enumerate the offsets of any registers skipped.
5215 // Find what reg contains the last byte:
5216 // And start at the first register after that. If that isn't the first reg of the current
5217 for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
5218 skippedRegOffsets++)
5219 {
5220 // If the register number would not be an arg reg, we're done.
5221 if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
5222 return;
5223 *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
5224 }
5225 lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
5226 }
5227}
5228
5229#endif // _TARGET_ARM_
5230
5231//****************************************************************************
5232// fgFixupStructReturn:
5233// The companion to impFixupCallStructReturn. Now that the importer is done
5234// change the gtType to the precomputed native return type
5235// requires that callNode currently has a struct type
5236//
5237void Compiler::fgFixupStructReturn(GenTree* callNode)
5238{
5239 assert(varTypeIsStruct(callNode));
5240
5241 GenTreeCall* call = callNode->AsCall();
5242 bool callHasRetBuffArg = call->HasRetBufArg();
5243 bool isHelperCall = call->IsHelperCall();
5244
5245 // Decide on the proper return type for this call that currently returns a struct
5246 //
5247 CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
5248 Compiler::structPassingKind howToReturnStruct;
5249 var_types returnType;
5250
5251 // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
5252 // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
5253 //
5254 // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
5255 // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
5256 // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
5257 //
5258 if (isHelperCall)
5259 {
5260 assert(!callHasRetBuffArg);
5261 assert(retClsHnd == NO_CLASS_HANDLE);
5262
5263 // Now that we are past the importer, re-type this node
5264 howToReturnStruct = SPK_PrimitiveType;
5265 returnType = (var_types)call->gtReturnType;
5266 }
5267 else
5268 {
5269 returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
5270 }
5271
5272 if (howToReturnStruct == SPK_ByReference)
5273 {
5274 assert(returnType == TYP_UNKNOWN);
5275 assert(callHasRetBuffArg);
5276 }
5277 else
5278 {
5279 assert(returnType != TYP_UNKNOWN);
5280
5281 if (!varTypeIsStruct(returnType))
5282 {
5283 // Widen the primitive type if necessary
5284 returnType = genActualType(returnType);
5285 }
5286 call->gtType = returnType;
5287 }
5288
5289#if FEATURE_MULTIREG_RET
5290 // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
5291 assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
5292#else // !FEATURE_MULTIREG_RET
5293 // No more struct returns
5294 assert(call->TypeGet() != TYP_STRUCT);
5295#endif
5296
5297#if !defined(UNIX_AMD64_ABI)
5298 // If it was a struct return, it has been transformed into a call
5299 // with a return buffer (that returns TYP_VOID) or into a return
5300 // of a primitive/enregisterable type
5301 assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
5302#endif
5303}
5304
5305/*****************************************************************************
5306 *
5307 * A little helper used to rearrange nested commutative operations. The
5308 * effect is that nested associative, commutative operations are transformed
5309 * into a 'left-deep' tree, i.e. into something like this:
5310 *
5311 * (((a op b) op c) op d) op...
5312 */
5313
5314#if REARRANGE_ADDS
5315
5316void Compiler::fgMoveOpsLeft(GenTree* tree)
5317{
5318 GenTree* op1;
5319 GenTree* op2;
5320 genTreeOps oper;
5321
5322 do
5323 {
5324 op1 = tree->gtOp.gtOp1;
5325 op2 = tree->gtOp.gtOp2;
5326 oper = tree->OperGet();
5327
5328 noway_assert(GenTree::OperIsCommutative(oper));
5329 noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
5330 noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
5331 noway_assert(oper == op2->gtOper);
5332
5333 // Commutativity doesn't hold if overflow checks are needed
5334
5335 if (tree->gtOverflowEx() || op2->gtOverflowEx())
5336 {
5337 return;
5338 }
5339
5340 if (gtIsActiveCSE_Candidate(op2))
5341 {
5342 // If we have marked op2 as a CSE candidate,
5343 // we can't perform a commutative reordering
5344 // because any value numbers that we computed for op2
5345 // will be incorrect after performing a commutative reordering
5346 //
5347 return;
5348 }
5349
5350 if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
5351 {
5352 return;
5353 }
5354
5355 // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
5356 if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
5357 {
5358 return;
5359 }
5360
5361 if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
5362 {
5363 // We could deal with this, but we were always broken and just hit the assert
5364 // below regarding flags, which means it's not frequent, so will just bail out.
5365 // See #195514
5366 return;
5367 }
5368
5369 noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
5370
5371 GenTree* ad1 = op2->gtOp.gtOp1;
5372 GenTree* ad2 = op2->gtOp.gtOp2;
5373
5374 // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
5375 // We can not reorder such GT_OR trees
5376 //
5377 if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
5378 {
5379 break;
5380 }
5381
5382 // Don't split up a byref calculation and create a new byref. E.g.,
5383 // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int).
5384 // Doing this transformation could create a situation where the first
5385 // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that
5386 // no longer points within the ref object. If a GC happens, the byref won't
5387 // get updated. This can happen, for instance, if one of the int components
5388 // is negative. It also requires the address generation be in a fully-interruptible
5389 // code region.
5390 //
5391 if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL)
5392 {
5393 assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD));
5394 break;
5395 }
5396
5397 /* Change "(x op (y op z))" to "(x op y) op z" */
5398 /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
5399
5400 GenTree* new_op1 = op2;
5401
5402 new_op1->gtOp.gtOp1 = op1;
5403 new_op1->gtOp.gtOp2 = ad1;
5404
5405 /* Change the flags. */
5406
5407 // Make sure we arent throwing away any flags
5408 noway_assert((new_op1->gtFlags &
5409 ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
5410 GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
5411 GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
5412
5413 new_op1->gtFlags =
5414 (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
5415 (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
5416
5417 /* Retype new_op1 if it has not/become a GC ptr. */
5418
5419 if (varTypeIsGC(op1->TypeGet()))
5420 {
5421 noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5422 oper == GT_ADD) || // byref(ref + (int+int))
5423 (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
5424 oper == GT_OR)); // int(gcref | int(gcref|intval))
5425
5426 new_op1->gtType = tree->gtType;
5427 }
5428 else if (varTypeIsGC(ad2->TypeGet()))
5429 {
5430 // Neither ad1 nor op1 are GC. So new_op1 isnt either
5431 noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
5432 new_op1->gtType = TYP_I_IMPL;
5433 }
5434
5435 // If new_op1 is a new expression. Assign it a new unique value number.
5436 // vnStore is null before the ValueNumber phase has run
5437 if (vnStore != nullptr)
5438 {
5439 // We can only keep the old value number on new_op1 if both op1 and ad2
5440 // have the same non-NoVN value numbers. Since op is commutative, comparing
5441 // only ad2 and op1 is enough.
5442 if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5443 (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
5444 (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
5445 {
5446 new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
5447 }
5448 }
5449
5450 tree->gtOp.gtOp1 = new_op1;
5451 tree->gtOp.gtOp2 = ad2;
5452
5453 /* If 'new_op1' is now the same nested op, process it recursively */
5454
5455 if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
5456 {
5457 fgMoveOpsLeft(new_op1);
5458 }
5459
5460 /* If 'ad2' is now the same nested op, process it
5461 * Instead of recursion, we set up op1 and op2 for the next loop.
5462 */
5463
5464 op1 = new_op1;
5465 op2 = ad2;
5466 } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
5467
5468 return;
5469}
5470
5471#endif
5472
5473/*****************************************************************************/
5474
5475void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay)
5476{
5477 if (tree->OperIsBoundsCheck())
5478 {
5479 GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk();
5480 BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay);
5481 if (failBlock != nullptr)
5482 {
5483 boundsChk->gtIndRngFailBB = gtNewCodeRef(failBlock);
5484 }
5485 }
5486 else if (tree->OperIs(GT_INDEX_ADDR))
5487 {
5488 GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr();
5489 BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay);
5490 if (failBlock != nullptr)
5491 {
5492 indexAddr->gtIndRngFailBB = gtNewCodeRef(failBlock);
5493 }
5494 }
5495 else
5496 {
5497 noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX));
5498 fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay);
5499 }
5500}
5501
5502BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay)
5503{
5504 if (opts.MinOpts())
5505 {
5506 delay = false;
5507 }
5508
5509 if (!opts.compDbgCode)
5510 {
5511 if (!delay && !compIsForInlining())
5512 {
5513 // Create/find the appropriate "range-fail" label
5514 return fgRngChkTarget(compCurBB, kind);
5515 }
5516 }
5517
5518 return nullptr;
5519}
5520
5521/*****************************************************************************
5522 *
5523 * Expand a GT_INDEX node and fully morph the child operands
5524 *
5525 * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
5526 * the array element. We expand the GT_INDEX node into a larger tree that
5527 * evaluates the array base and index. The simplest expansion is a GT_COMMA
5528 * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
5529 * For complex array or index expressions one or more GT_COMMA assignments
5530 * are inserted so that we only evaluate the array or index expressions once.
5531 *
5532 * The fully expanded tree is then morphed. This causes gtFoldExpr to
5533 * perform local constant prop and reorder the constants in the tree and
5534 * fold them.
5535 *
5536 * We then parse the resulting array element expression in order to locate
5537 * and label the constants and variables that occur in the tree.
5538 */
5539
5540const int MAX_ARR_COMPLEXITY = 4;
5541const int MAX_INDEX_COMPLEXITY = 4;
5542
5543GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
5544{
5545 noway_assert(tree->gtOper == GT_INDEX);
5546 GenTreeIndex* asIndex = tree->AsIndex();
5547
5548 var_types elemTyp = tree->TypeGet();
5549 unsigned elemSize = tree->gtIndex.gtIndElemSize;
5550 CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
5551
5552 noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
5553
5554#ifdef FEATURE_SIMD
5555 if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= maxSIMDStructBytes())
5556 {
5557 // If this is a SIMD type, this is the point at which we lose the type information,
5558 // so we need to set the correct type on the GT_IND.
5559 // (We don't care about the base type here, so we only check, but don't retain, the return value).
5560 unsigned simdElemSize = 0;
5561 if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
5562 {
5563 assert(simdElemSize == elemSize);
5564 elemTyp = getSIMDTypeForSize(elemSize);
5565 // This is the new type of the node.
5566 tree->gtType = elemTyp;
5567 // Now set elemStructType to null so that we don't confuse value numbering.
5568 elemStructType = nullptr;
5569 }
5570 }
5571#endif // FEATURE_SIMD
5572
5573 // Set up the the array length's offset into lenOffs
5574 // And the the first element's offset into elemOffs
5575 ssize_t lenOffs;
5576 ssize_t elemOffs;
5577 if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
5578 {
5579 lenOffs = OFFSETOF__CORINFO_String__stringLen;
5580 elemOffs = OFFSETOF__CORINFO_String__chars;
5581 tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
5582 }
5583 else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
5584 {
5585 lenOffs = OFFSETOF__CORINFO_Array__length;
5586 elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
5587 }
5588 else // We have a standard array
5589 {
5590 lenOffs = OFFSETOF__CORINFO_Array__length;
5591 elemOffs = OFFSETOF__CORINFO_Array__data;
5592 }
5593
5594 // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts
5595 // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
5596 // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
5597 // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in
5598 // minopts).
5599 //
5600 // When we *are* optimizing, we fully expand GT_INDEX to:
5601 // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or
5602 // side-effecting.
5603 // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or
5604 // side-effecting.
5605 // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array))
5606 // 4. Compute the address of the element that will be accessed:
5607 // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize))
5608 // 5. Dereference the address with a GT_IND.
5609 //
5610 // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows
5611 // for more straightforward bounds-check removal, CSE, etc.
5612 if (opts.MinOpts())
5613 {
5614 GenTree* const array = fgMorphTree(asIndex->Arr());
5615 GenTree* const index = fgMorphTree(asIndex->Index());
5616
5617 GenTreeIndexAddr* const indexAddr =
5618 new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize,
5619 static_cast<unsigned>(lenOffs), static_cast<unsigned>(elemOffs));
5620 indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT;
5621
5622 // Mark the indirection node as needing a range check if necessary.
5623 // Note this will always be true unless JitSkipArrayBoundCheck() is used
5624 if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0)
5625 {
5626 fgSetRngChkTarget(indexAddr);
5627 }
5628
5629 // Change `tree` into an indirection and return.
5630 tree->ChangeOper(GT_IND);
5631 GenTreeIndir* const indir = tree->AsIndir();
5632 indir->Addr() = indexAddr;
5633 indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT);
5634
5635#ifdef DEBUG
5636 indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5637#endif // DEBUG
5638
5639 return indir;
5640 }
5641
5642 GenTree* arrRef = asIndex->Arr();
5643 GenTree* index = asIndex->Index();
5644
5645 bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
5646 bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
5647
5648 GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
5649 GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
5650 GenTree* bndsChk = nullptr;
5651
5652 // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
5653 if (chkd)
5654 {
5655 GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression
5656 GenTree* index2 = nullptr;
5657
5658 // If the arrRef expression involves an assignment, a call or reads from global memory,
5659 // then we *must* allocate a temporary in which to "localize" those values,
5660 // to ensure that the same values are used in the bounds check and the actual
5661 // dereference.
5662 // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
5663 // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
5664 // complexity is not exposed. (Without that condition there are cases of local struct
5665 // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
5666 // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
5667 //
5668 if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
5669 gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
5670 {
5671 unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5672 arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
5673 arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5674 arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
5675 }
5676 else
5677 {
5678 arrRef2 = gtCloneExpr(arrRef);
5679 noway_assert(arrRef2 != nullptr);
5680 }
5681
5682 // If the index expression involves an assignment, a call or reads from global memory,
5683 // we *must* allocate a temporary in which to "localize" those values,
5684 // to ensure that the same values are used in the bounds check and the actual
5685 // dereference.
5686 // Also we allocate the temporary when the index is sufficiently complex/expensive.
5687 //
5688 if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
5689 (arrRef->OperGet() == GT_FIELD))
5690 {
5691 unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
5692 indexDefn = gtNewTempAssign(indexTmpNum, index);
5693 index = gtNewLclvNode(indexTmpNum, index->TypeGet());
5694 index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
5695 }
5696 else
5697 {
5698 index2 = gtCloneExpr(index);
5699 noway_assert(index2 != nullptr);
5700 }
5701
5702 // Next introduce a GT_ARR_BOUNDS_CHECK node
5703 var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
5704
5705#ifdef _TARGET_64BIT_
5706 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
5707 // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
5708 // the comparison will have to be widen to 64 bits.
5709 if (index->TypeGet() == TYP_I_IMPL)
5710 {
5711 bndsChkType = TYP_I_IMPL;
5712 }
5713#endif // _TARGET_64BIT_
5714
5715 GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs);
5716
5717 if (bndsChkType != TYP_INT)
5718 {
5719 arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType);
5720 }
5721
5722 GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
5723 GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
5724
5725 bndsChk = arrBndsChk;
5726
5727 // Now we'll switch to using the second copies for arrRef and index
5728 // to compute the address expression
5729
5730 arrRef = arrRef2;
5731 index = index2;
5732 }
5733
5734 // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
5735
5736 GenTree* addr;
5737
5738#ifdef _TARGET_64BIT_
5739 // Widen 'index' on 64-bit targets
5740 if (index->TypeGet() != TYP_I_IMPL)
5741 {
5742 if (index->OperGet() == GT_CNS_INT)
5743 {
5744 index->gtType = TYP_I_IMPL;
5745 }
5746 else
5747 {
5748 index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL);
5749 }
5750 }
5751#endif // _TARGET_64BIT_
5752
5753 /* Scale the index value if necessary */
5754 if (elemSize > 1)
5755 {
5756 GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL);
5757
5758 // Fix 392756 WP7 Crossgen
5759 //
5760 // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
5761 // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
5762 // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
5763 //
5764 size->gtFlags |= GTF_DONT_CSE;
5765
5766 /* Multiply by the array element size */
5767 addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
5768 }
5769 else
5770 {
5771 addr = index;
5772 }
5773
5774 // Be careful to only create the byref pointer when the full index expression is added to the array reference.
5775 // We don't want to create a partial byref address expression that doesn't include the full index offset:
5776 // a byref must point within the containing object. It is dangerous (especially when optimizations come into
5777 // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that
5778 // the partial byref will not point within the object, and thus not get updated correctly during a GC.
5779 // This is mostly a risk in fully-interruptible code regions.
5780 //
5781 // NOTE: the tree form created here is pattern matched by optExtractArrIndex(), so changes here must
5782 // be reflected there.
5783
5784 /* Add the first element's offset */
5785
5786 GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5787
5788 addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);
5789
5790 /* Add the object ref to the element's offset */
5791
5792 addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5793
5794#if SMALL_TREE_NODES
5795 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
5796#endif
5797
5798 // Change the orginal GT_INDEX node into a GT_IND node
5799 tree->SetOper(GT_IND);
5800
5801 // If the index node is a floating-point type, notify the compiler
5802 // we'll potentially use floating point registers at the time of codegen.
5803 if (varTypeIsFloating(tree->gtType))
5804 {
5805 this->compFloatingPointUsed = true;
5806 }
5807
5808 // We've now consumed the GTF_INX_RNGCHK, and the node
5809 // is no longer a GT_INDEX node.
5810 tree->gtFlags &= ~GTF_INX_RNGCHK;
5811
5812 tree->gtOp.gtOp1 = addr;
5813
5814 // This is an array index expression.
5815 tree->gtFlags |= GTF_IND_ARR_INDEX;
5816
5817 /* An indirection will cause a GPF if the address is null */
5818 tree->gtFlags |= GTF_EXCEPT;
5819
5820 if (nCSE)
5821 {
5822 tree->gtFlags |= GTF_DONT_CSE;
5823 }
5824
5825 // Store information about it.
5826 GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
5827
5828 // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
5829
5830 GenTree* indTree = tree;
5831
5832 // Did we create a bndsChk tree?
5833 if (bndsChk)
5834 {
5835 // Use a GT_COMMA node to prepend the array bound check
5836 //
5837 tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
5838
5839 /* Mark the indirection node as needing a range check */
5840 fgSetRngChkTarget(bndsChk);
5841 }
5842
5843 if (indexDefn != nullptr)
5844 {
5845 // Use a GT_COMMA node to prepend the index assignment
5846 //
5847 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
5848 }
5849 if (arrRefDefn != nullptr)
5850 {
5851 // Use a GT_COMMA node to prepend the arRef assignment
5852 //
5853 tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
5854 }
5855
5856 // Currently we morph the tree to perform some folding operations prior
5857 // to attaching fieldSeq info and labeling constant array index contributions
5858 //
5859 fgMorphTree(tree);
5860
5861 // Ideally we just want to proceed to attaching fieldSeq info and labeling the
5862 // constant array index contributions, but the morphing operation may have changed
5863 // the 'tree' into something that now unconditionally throws an exception.
5864 //
5865 // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
5866 // or it could be left unchanged. If it is unchanged then we should not return,
5867 // instead we should proceed to attaching fieldSeq info, etc...
5868 //
5869 GenTree* arrElem = tree->gtEffectiveVal();
5870
5871 if (fgIsCommaThrow(tree))
5872 {
5873 if ((arrElem != indTree) || // A new tree node may have been created
5874 (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
5875 {
5876 return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
5877 }
5878 }
5879
5880 assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
5881
5882 addr = arrElem->gtOp.gtOp1;
5883
5884 assert(addr->TypeGet() == TYP_BYREF);
5885
5886 GenTree* cnsOff = nullptr;
5887 if (addr->OperGet() == GT_ADD)
5888 {
5889 assert(addr->TypeGet() == TYP_BYREF);
5890 assert(addr->gtOp.gtOp1->TypeGet() == TYP_REF);
5891
5892 addr = addr->gtOp.gtOp2;
5893
5894 // Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
5895
5896 if (addr->gtOper == GT_CNS_INT)
5897 {
5898 cnsOff = addr;
5899 addr = nullptr;
5900 }
5901 else
5902 {
5903 if ((addr->OperGet() == GT_ADD) && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT))
5904 {
5905 cnsOff = addr->gtOp.gtOp2;
5906 addr = addr->gtOp.gtOp1;
5907 }
5908
5909 // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
5910 addr->LabelIndex(this);
5911 }
5912 }
5913 else if (addr->OperGet() == GT_CNS_INT)
5914 {
5915 cnsOff = addr;
5916 }
5917
5918 FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
5919
5920 if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
5921 {
5922 // Assign it the [#FirstElem] field sequence
5923 //
5924 cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
5925 }
5926 else // We have folded the first element's offset with the index expression
5927 {
5928 // Build the [#ConstantIndex, #FirstElem] field sequence
5929 //
5930 FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
5931 FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
5932
5933 if (cnsOff == nullptr) // It must have folded into a zero offset
5934 {
5935 // Record in the general zero-offset map.
5936 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
5937 }
5938 else
5939 {
5940 cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
5941 }
5942 }
5943
5944 return tree;
5945}
5946
5947#ifdef _TARGET_X86_
5948/*****************************************************************************
5949 *
5950 * Wrap fixed stack arguments for varargs functions to go through varargs
5951 * cookie to access them, except for the cookie itself.
5952 *
5953 * Non-x86 platforms are allowed to access all arguments directly
5954 * so we don't need this code.
5955 *
5956 */
5957GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
5958{
5959 /* For the fixed stack arguments of a varargs function, we need to go
5960 through the varargs cookies to access them, except for the
5961 cookie itself */
5962
5963 LclVarDsc* varDsc = &lvaTable[lclNum];
5964
5965 if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
5966 {
5967 // Create a node representing the local pointing to the base of the args
5968 GenTree* ptrArg =
5969 gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
5970 gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES +
5971 lclOffs));
5972
5973 // Access the argument through the local
5974 GenTree* tree;
5975 if (varTypeIsStruct(varType))
5976 {
5977 tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
5978 }
5979 else
5980 {
5981 tree = gtNewOperNode(GT_IND, varType, ptrArg);
5982 }
5983 tree->gtFlags |= GTF_IND_TGTANYWHERE;
5984
5985 if (varDsc->lvAddrExposed)
5986 {
5987 tree->gtFlags |= GTF_GLOB_REF;
5988 }
5989
5990 return fgMorphTree(tree);
5991 }
5992
5993 return NULL;
5994}
5995#endif
5996
5997/*****************************************************************************
5998 *
5999 * Transform the given GT_LCL_VAR tree for code generation.
6000 */
6001
6002GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph)
6003{
6004 assert(tree->gtOper == GT_LCL_VAR);
6005
6006 unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
6007 var_types varType = lvaGetRealType(lclNum);
6008 LclVarDsc* varDsc = &lvaTable[lclNum];
6009
6010 if (varDsc->lvAddrExposed)
6011 {
6012 tree->gtFlags |= GTF_GLOB_REF;
6013 }
6014
6015#ifdef _TARGET_X86_
6016 if (info.compIsVarArgs)
6017 {
6018 GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
6019 if (newTree != nullptr)
6020 {
6021 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
6022 {
6023 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
6024 }
6025 return newTree;
6026 }
6027 }
6028#endif // _TARGET_X86_
6029
6030 /* If not during the global morphing phase bail */
6031
6032 if (!fgGlobalMorph && !forceRemorph)
6033 {
6034 return tree;
6035 }
6036
6037 bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
6038
6039 noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
6040
6041 if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
6042 {
6043#if LOCAL_ASSERTION_PROP
6044 /* Assertion prop can tell us to omit adding a cast here */
6045 if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
6046 {
6047 return tree;
6048 }
6049#endif
6050 /* Small-typed arguments and aliased locals are normalized on load.
6051 Other small-typed locals are normalized on store.
6052 Also, under the debugger as the debugger could write to the variable.
6053 If this is one of the former, insert a narrowing cast on the load.
6054 ie. Convert: var-short --> cast-short(var-int) */
6055
6056 tree->gtType = TYP_INT;
6057 fgMorphTreeDone(tree);
6058 tree = gtNewCastNode(TYP_INT, tree, false, varType);
6059 fgMorphTreeDone(tree);
6060 return tree;
6061 }
6062
6063 return tree;
6064}
6065
6066/*****************************************************************************
6067 Grab a temp for big offset morphing.
6068 This method will grab a new temp if no temp of this "type" has been created.
6069 Or it will return the same cached one if it has been created.
6070*/
6071unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
6072{
6073 unsigned lclNum = fgBigOffsetMorphingTemps[type];
6074
6075 if (lclNum == BAD_VAR_NUM)
6076 {
6077 // We haven't created a temp for this kind of type. Create one now.
6078 lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
6079 fgBigOffsetMorphingTemps[type] = lclNum;
6080 }
6081 else
6082 {
6083 // We better get the right type.
6084 noway_assert(lvaTable[lclNum].TypeGet() == type);
6085 }
6086
6087 noway_assert(lclNum != BAD_VAR_NUM);
6088 return lclNum;
6089}
6090
6091/*****************************************************************************
6092 *
6093 * Transform the given GT_FIELD tree for code generation.
6094 */
6095
6096GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
6097{
6098 assert(tree->gtOper == GT_FIELD);
6099
6100 CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
6101 unsigned fldOffset = tree->gtField.gtFldOffset;
6102 GenTree* objRef = tree->gtField.gtFldObj;
6103 bool fieldMayOverlap = false;
6104 bool objIsLocal = false;
6105
6106 if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
6107 {
6108 // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
6109 // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
6110 // simd field rewrites are sensitive to.
6111 fgMorphImplicitByRefArgs(objRef);
6112 }
6113
6114 noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
6115 ((tree->gtFlags & GTF_GLOB_REF) != 0));
6116
6117 if (tree->gtField.gtFldMayOverlap)
6118 {
6119 fieldMayOverlap = true;
6120 // Reset the flag because we may reuse the node.
6121 tree->gtField.gtFldMayOverlap = false;
6122 }
6123
6124#ifdef FEATURE_SIMD
6125 // if this field belongs to simd struct, translate it to simd instrinsic.
6126 if (mac == nullptr)
6127 {
6128 GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
6129 if (newTree != tree)
6130 {
6131 newTree = fgMorphSmpOp(newTree);
6132 return newTree;
6133 }
6134 }
6135 else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
6136 {
6137 GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
6138 if (lcl != nullptr)
6139 {
6140 lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
6141 }
6142 }
6143#endif
6144
6145 /* Is this an instance data member? */
6146
6147 if (objRef)
6148 {
6149 GenTree* addr;
6150 objIsLocal = objRef->IsLocal();
6151
6152 if (tree->gtFlags & GTF_IND_TLS_REF)
6153 {
6154 NO_WAY("instance field can not be a TLS ref.");
6155 }
6156
6157 /* We'll create the expression "*(objRef + mem_offs)" */
6158
6159 noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
6160
6161 // An optimization for Contextful classes:
6162 // we unwrap the proxy when we have a 'this reference'
6163 if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
6164 {
6165 objRef = fgUnwrapProxy(objRef);
6166 }
6167
6168 /*
6169 Now we have a tree like this:
6170
6171 +--------------------+
6172 | GT_FIELD | tree
6173 +----------+---------+
6174 |
6175 +--------------+-------------+
6176 | tree->gtField.gtFldObj |
6177 +--------------+-------------+
6178
6179
6180 We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6181
6182 +--------------------+
6183 | GT_IND/GT_OBJ | tree
6184 +---------+----------+
6185 |
6186 |
6187 +---------+----------+
6188 | GT_ADD | addr
6189 +---------+----------+
6190 |
6191 / \
6192 / \
6193 / \
6194 +-------------------+ +----------------------+
6195 | objRef | | fldOffset |
6196 | | | (when fldOffset !=0) |
6197 +-------------------+ +----------------------+
6198
6199
6200 or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
6201
6202
6203 +--------------------+
6204 | GT_IND/GT_OBJ | tree
6205 +----------+---------+
6206 |
6207 +----------+---------+
6208 | GT_COMMA | comma2
6209 +----------+---------+
6210 |
6211 / \
6212 / \
6213 / \
6214 / \
6215 +---------+----------+ +---------+----------+
6216 comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
6217 +---------+----------+ +---------+----------+
6218 | |
6219 / \ / \
6220 / \ / \
6221 / \ / \
6222 +-----+-----+ +-----+-----+ +---------+ +-----------+
6223 asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
6224 +-----+-----+ +-----+-----+ +---------+ +-----------+
6225 | |
6226 / \ |
6227 / \ |
6228 / \ |
6229 +-----+-----+ +-----+-----+ +-----------+
6230 | tmpLcl | | objRef | | tmpLcl |
6231 +-----------+ +-----------+ +-----------+
6232
6233
6234 */
6235
6236 var_types objRefType = objRef->TypeGet();
6237
6238 GenTree* comma = nullptr;
6239
6240 // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
6241 // and thus is equivalent to a MACK_Ind with zero offset.
6242 MorphAddrContext defMAC(MACK_Ind);
6243 if (mac == nullptr)
6244 {
6245 mac = &defMAC;
6246 }
6247
6248 // This flag is set to enable the "conservative" style of explicit null-check insertion.
6249 // This means that we insert an explicit null check whenever we create byref by adding a
6250 // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
6251 // dereferenced). The alternative is "aggressive", which would not insert such checks (for
6252 // small offsets); in this plan, we would transfer some null-checking responsibility to
6253 // callee's of methods taking byref parameters. They would have to add explicit null checks
6254 // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
6255 // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
6256 // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
6257 // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
6258 // This is left here to point out how to implement it.
6259 CLANG_FORMAT_COMMENT_ANCHOR;
6260
6261#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
6262
6263 bool addExplicitNullCheck = false;
6264
6265 // Implicit byref locals are never null.
6266 if (!((objRef->gtOper == GT_LCL_VAR) && lvaIsImplicitByRefLocal(objRef->gtLclVarCommon.gtLclNum)))
6267 {
6268 // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
6269 // whose address is being taken is either a local or static variable, whose address is necessarily
6270 // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
6271 if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind))
6272 {
6273 if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset))
6274 {
6275 addExplicitNullCheck = true;
6276 }
6277 else
6278 {
6279 // In R2R mode the field offset for some fields may change when the code
6280 // is loaded. So we can't rely on a zero offset here to suppress the null check.
6281 //
6282 // See GitHub issue #16454.
6283 bool fieldHasChangeableOffset = false;
6284
6285#ifdef FEATURE_READYTORUN_COMPILER
6286 fieldHasChangeableOffset = (tree->gtField.gtFieldLookup.addr != nullptr);
6287#endif
6288
6289#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
6290 addExplicitNullCheck = (mac->m_kind == MACK_Addr) &&
6291 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset);
6292#else
6293 addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
6294 ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset));
6295#endif
6296 }
6297 }
6298 }
6299
6300 if (addExplicitNullCheck)
6301 {
6302#ifdef DEBUG
6303 if (verbose)
6304 {
6305 printf("Before explicit null check morphing:\n");
6306 gtDispTree(tree);
6307 }
6308#endif
6309
6310 //
6311 // Create the "comma" subtree
6312 //
6313 GenTree* asg = nullptr;
6314 GenTree* nullchk;
6315
6316 unsigned lclNum;
6317
6318 if (objRef->gtOper != GT_LCL_VAR)
6319 {
6320 lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
6321
6322 // Create the "asg" node
6323 asg = gtNewTempAssign(lclNum, objRef);
6324 }
6325 else
6326 {
6327 lclNum = objRef->gtLclVarCommon.gtLclNum;
6328 }
6329
6330 // Create the "nullchk" node.
6331 // Make it TYP_BYTE so we only deference it for 1 byte.
6332 GenTree* lclVar = gtNewLclvNode(lclNum, objRefType);
6333 nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
6334
6335 nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
6336
6337 // An indirection will cause a GPF if the address is null.
6338 nullchk->gtFlags |= GTF_EXCEPT;
6339
6340 compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
6341 optMethodFlags |= OMF_HAS_NULLCHECK;
6342
6343 if (asg)
6344 {
6345 // Create the "comma" node.
6346 comma = gtNewOperNode(GT_COMMA,
6347 TYP_VOID, // We don't want to return anything from this "comma" node.
6348 // Set the type to TYP_VOID, so we can select "cmp" instruction
6349 // instead of "mov" instruction later on.
6350 asg, nullchk);
6351 }
6352 else
6353 {
6354 comma = nullchk;
6355 }
6356
6357 addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
6358 }
6359 else if (fldOffset == 0)
6360 {
6361 // Generate the "addr" node.
6362 addr = objRef;
6363 FieldSeqNode* fieldSeq =
6364 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6365 GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
6366 }
6367 else
6368 {
6369 addr = objRef;
6370 }
6371
6372#ifdef FEATURE_READYTORUN_COMPILER
6373 if (tree->gtField.gtFieldLookup.addr != nullptr)
6374 {
6375 GenTree* offsetNode = nullptr;
6376 if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
6377 {
6378 offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->gtField.gtFieldLookup.addr,
6379 GTF_ICON_FIELD_HDL, false);
6380 }
6381 else
6382 {
6383 noway_assert(!"unexpected accessType for R2R field access");
6384 }
6385
6386 var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF;
6387 addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode);
6388 }
6389#endif
6390 if (fldOffset != 0)
6391 {
6392 // Generate the "addr" node.
6393 /* Add the member offset to the object's address */
6394 FieldSeqNode* fieldSeq =
6395 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6396 addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
6397 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
6398 }
6399
6400 // Now let's set the "tree" as a GT_IND tree.
6401
6402 tree->SetOper(GT_IND);
6403 tree->gtOp.gtOp1 = addr;
6404
6405 tree->gtFlags &= (~GTF_EXCEPT | addr->gtFlags);
6406 tree->SetIndirExceptionFlags(this);
6407
6408 if (addExplicitNullCheck)
6409 {
6410 //
6411 // Create "comma2" node and link it to "tree".
6412 //
6413 GenTree* comma2;
6414 comma2 = gtNewOperNode(GT_COMMA,
6415 addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
6416 comma, addr);
6417 tree->gtOp.gtOp1 = comma2;
6418 }
6419
6420#ifdef DEBUG
6421 if (verbose)
6422 {
6423 if (addExplicitNullCheck)
6424 {
6425 printf("After adding explicit null check:\n");
6426 gtDispTree(tree);
6427 }
6428 }
6429#endif
6430 }
6431 else /* This is a static data member */
6432 {
6433 if (tree->gtFlags & GTF_IND_TLS_REF)
6434 {
6435 // Thread Local Storage static field reference
6436 //
6437 // Field ref is a TLS 'Thread-Local-Storage' reference
6438 //
6439 // Build this tree: IND(*) #
6440 // |
6441 // ADD(I_IMPL)
6442 // / \
6443 // / CNS(fldOffset)
6444 // /
6445 // /
6446 // /
6447 // IND(I_IMPL) == [Base of this DLL's TLS]
6448 // |
6449 // ADD(I_IMPL)
6450 // / \
6451 // / CNS(IdValue*4) or MUL
6452 // / / \
6453 // IND(I_IMPL) / CNS(4)
6454 // | /
6455 // CNS(TLS_HDL,0x2C) IND
6456 // |
6457 // CNS(pIdAddr)
6458 //
6459 // # Denotes the orginal node
6460 //
6461 void** pIdAddr = nullptr;
6462 unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
6463
6464 //
6465 // If we can we access the TLS DLL index ID value directly
6466 // then pIdAddr will be NULL and
6467 // IdValue will be the actual TLS DLL index ID
6468 //
6469 GenTree* dllRef = nullptr;
6470 if (pIdAddr == nullptr)
6471 {
6472 if (IdValue != 0)
6473 {
6474 dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
6475 }
6476 }
6477 else
6478 {
6479 dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_STATIC_HDL, true);
6480
6481 // Next we multiply by 4
6482 dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
6483 }
6484
6485#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
6486
6487 // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
6488
6489 GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
6490
6491 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6492 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6493 {
6494 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6495 tlsRef->gtFlags |= GTF_ICON_INITCLASS;
6496 }
6497
6498 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6499
6500 if (dllRef != nullptr)
6501 {
6502 /* Add the dllRef */
6503 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
6504 }
6505
6506 /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
6507 tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
6508
6509 if (fldOffset != 0)
6510 {
6511 FieldSeqNode* fieldSeq =
6512 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6513 GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
6514
6515 /* Add the TLS static field offset to the address */
6516
6517 tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
6518 }
6519
6520 // Final indirect to get to actual value of TLS static field
6521
6522 tree->SetOper(GT_IND);
6523 tree->gtOp.gtOp1 = tlsRef;
6524
6525 noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
6526 }
6527 else
6528 {
6529 // Normal static field reference
6530
6531 //
6532 // If we can we access the static's address directly
6533 // then pFldAddr will be NULL and
6534 // fldAddr will be the actual address of the static field
6535 //
6536 void** pFldAddr = nullptr;
6537 void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
6538
6539 if (pFldAddr == nullptr)
6540 {
6541#ifdef _TARGET_64BIT_
6542 if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
6543 {
6544 // The address is not directly addressible, so force it into a
6545 // constant, so we handle it properly
6546
6547 GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
6548 addr->gtType = TYP_I_IMPL;
6549 FieldSeqNode* fieldSeq =
6550 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6551 addr->gtIntCon.gtFieldSeq = fieldSeq;
6552 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6553 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6554 {
6555 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6556 addr->gtFlags |= GTF_ICON_INITCLASS;
6557 }
6558
6559 tree->SetOper(GT_IND);
6560 tree->gtOp.gtOp1 = addr;
6561
6562 return fgMorphSmpOp(tree);
6563 }
6564 else
6565#endif // _TARGET_64BIT_
6566 {
6567 // Only volatile or classinit could be set, and they map over
6568 noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
6569 static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
6570 static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
6571 tree->SetOper(GT_CLS_VAR);
6572 tree->gtClsVar.gtClsVarHnd = symHnd;
6573 FieldSeqNode* fieldSeq =
6574 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6575 tree->gtClsVar.gtFieldSeq = fieldSeq;
6576 }
6577
6578 return tree;
6579 }
6580 else
6581 {
6582 GenTree* addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
6583
6584 // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
6585 if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
6586 {
6587 tree->gtFlags &= ~GTF_FLD_INITCLASS;
6588 addr->gtFlags |= GTF_ICON_INITCLASS;
6589 }
6590
6591 // There are two cases here, either the static is RVA based,
6592 // in which case the type of the FIELD node is not a GC type
6593 // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
6594 // a GC type and the handle to it is a TYP_BYREF in the GC heap
6595 // because handles to statics now go into the large object heap
6596
6597 var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
6598 GenTree* op1 = gtNewOperNode(GT_IND, handleTyp, addr);
6599 op1->gtFlags |= GTF_IND_INVARIANT;
6600
6601 tree->SetOper(GT_IND);
6602 tree->gtOp.gtOp1 = op1;
6603 }
6604 }
6605 }
6606 noway_assert(tree->gtOper == GT_IND);
6607
6608 GenTree* res = fgMorphSmpOp(tree);
6609
6610 // If we have a struct type, this node would previously have been under a GT_ADDR,
6611 // and therefore would have been marked GTF_DONT_CSE.
6612 // TODO-1stClassStructs: revisit this.
6613 if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
6614 {
6615 res->gtFlags |= GTF_DONT_CSE;
6616 }
6617
6618 if (fldOffset == 0 && res->OperGet() == GT_IND)
6619 {
6620 GenTree* addr = res->gtOp.gtOp1;
6621 // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
6622 FieldSeqNode* fieldSeq =
6623 fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
6624 fgAddFieldSeqForZeroOffset(addr, fieldSeq);
6625 }
6626
6627 return res;
6628}
6629
6630//------------------------------------------------------------------------------
6631// fgMorphCallInline: attempt to inline a call
6632//
6633// Arguments:
6634// call - call expression to inline, inline candidate
6635// inlineResult - result tracking and reporting
6636//
6637// Notes:
6638// Attempts to inline the call.
6639//
6640// If successful, callee's IR is inserted in place of the call, and
6641// is marked with an InlineContext.
6642//
6643// If unsuccessful, the transformations done in anticipation of a
6644// possible inline are undone, and the candidate flag on the call
6645// is cleared.
6646
6647void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
6648{
6649 bool inliningFailed = false;
6650
6651 // Is this call an inline candidate?
6652 if (call->IsInlineCandidate())
6653 {
6654 // Attempt the inline
6655 fgMorphCallInlineHelper(call, inlineResult);
6656
6657 // We should have made up our minds one way or another....
6658 assert(inlineResult->IsDecided());
6659
6660 // If we failed to inline, we have a bit of work to do to cleanup
6661 if (inlineResult->IsFailure())
6662 {
6663
6664#ifdef DEBUG
6665
6666 // Before we do any cleanup, create a failing InlineContext to
6667 // capture details of the inlining attempt.
6668 m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
6669
6670#endif
6671
6672 inliningFailed = true;
6673
6674 // Clear the Inline Candidate flag so we can ensure later we tried
6675 // inlining all candidates.
6676 //
6677 call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
6678 }
6679 }
6680 else
6681 {
6682 // This wasn't an inline candidate. So it must be a GDV candidate.
6683 assert(call->IsGuardedDevirtualizationCandidate());
6684
6685 // We already know we can't inline this call, so don't even bother to try.
6686 inliningFailed = true;
6687 }
6688
6689 // If we failed to inline (or didn't even try), do some cleanup.
6690 if (inliningFailed)
6691 {
6692 if (call->gtReturnType != TYP_VOID)
6693 {
6694 JITDUMP("Inlining [%06u] failed, so bashing [%06u] to NOP\n", dspTreeID(call), dspTreeID(fgMorphStmt));
6695
6696 // Detach the GT_CALL tree from the original statement by
6697 // hanging a "nothing" node to it. Later the "nothing" node will be removed
6698 // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
6699
6700 noway_assert(fgMorphStmt->gtStmtExpr == call);
6701 fgMorphStmt->gtStmtExpr = gtNewNothingNode();
6702 }
6703 }
6704}
6705
6706/*****************************************************************************
6707 * Helper to attempt to inline a call
6708 * Sets success/failure in inline result
6709 * If success, modifies current method's IR with inlinee's IR
6710 * If failed, undoes any speculative modifications to current method
6711 */
6712
6713void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
6714{
6715 // Don't expect any surprises here.
6716 assert(result->IsCandidate());
6717
6718 if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
6719 {
6720 // For now, attributing this to call site, though it's really
6721 // more of a budget issue (lvaCount currently includes all
6722 // caller and prospective callee locals). We still might be
6723 // able to inline other callees into this caller, or inline
6724 // this callee in other callers.
6725 result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
6726 return;
6727 }
6728
6729 if (call->IsVirtual())
6730 {
6731 result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
6732 return;
6733 }
6734
6735 // Re-check this because guarded devirtualization may allow these through.
6736 if (gtIsRecursiveCall(call) && call->IsImplicitTailCall())
6737 {
6738 result->NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL);
6739 return;
6740 }
6741
6742 // impMarkInlineCandidate() is expected not to mark tail prefixed calls
6743 // and recursive tail calls as inline candidates.
6744 noway_assert(!call->IsTailPrefixedCall());
6745 noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
6746
6747 /* If the caller's stack frame is marked, then we can't do any inlining. Period.
6748 Although we have checked this in impCanInline, it is possible that later IL instructions
6749 might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
6750 */
6751
6752 if (opts.compNeedSecurityCheck)
6753 {
6754 result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
6755 return;
6756 }
6757
6758 //
6759 // Calling inlinee's compiler to inline the method.
6760 //
6761
6762 unsigned startVars = lvaCount;
6763
6764#ifdef DEBUG
6765 if (verbose)
6766 {
6767 printf("Expanding INLINE_CANDIDATE in statement ");
6768 printTreeID(fgMorphStmt);
6769 printf(" in " FMT_BB ":\n", compCurBB->bbNum);
6770 gtDispTree(fgMorphStmt);
6771 if (call->IsImplicitTailCall())
6772 {
6773 printf("Note: candidate is implicit tail call\n");
6774 }
6775 }
6776#endif
6777
6778 impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
6779
6780 //
6781 // Invoke the compiler to inline the call.
6782 //
6783
6784 fgInvokeInlineeCompiler(call, result);
6785
6786 if (result->IsFailure())
6787 {
6788 // Undo some changes made in anticipation of inlining...
6789
6790 // Zero out the used locals
6791 memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
6792 for (unsigned i = startVars; i < lvaCount; i++)
6793 {
6794 new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(); // call the constructor.
6795 }
6796
6797 lvaCount = startVars;
6798
6799#ifdef DEBUG
6800 if (verbose)
6801 {
6802 // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
6803 }
6804#endif
6805
6806 return;
6807 }
6808
6809#ifdef DEBUG
6810 if (verbose)
6811 {
6812 // printf("After inlining lvaCount=%d.\n", lvaCount);
6813 }
6814#endif
6815}
6816
6817//------------------------------------------------------------------------
6818// fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp.
6819//
6820// Arguments:
6821// callee - The callee to check
6822//
6823// Return Value:
6824// Returns true or false based on whether the callee can be fastTailCalled
6825//
6826// Notes:
6827// This function is target specific and each target will make the fastTailCall
6828// decision differently. See the notes below.
6829//
6830//
6831// Windows Amd64:
6832// A fast tail call can be made whenever the number of callee arguments
6833// is larger than or equal to the number of caller arguments, or we have four
6834// or fewer callee arguments. This is because, on Windows AMD64, each
6835// argument uses exactly one register or one 8-byte stack slot. Thus, we only
6836// need to count arguments, and not be concerned with the size of each
6837// incoming or outgoing argument.
6838//
6839// Can fast tail call examples (amd64 Windows):
6840//
6841// -- Callee will have all register arguments --
6842// caller(int, int, int, int)
6843// callee(int, int, float, int)
6844//
6845// -- Callee requires stack space that is equal to the caller --
6846// caller(struct, struct, struct, struct, struct, struct)
6847// callee(int, int, int, int, int, int)
6848//
6849// -- Callee requires stack space that is less than the caller --
6850// caller(struct, double, struct, float, struct, struct)
6851// callee(int, int, int, int, int)
6852//
6853// -- Callee will have all register arguments --
6854// caller(int)
6855// callee(int, int, int, int)
6856//
6857// Cannot fast tail call examples (amd64 Windows):
6858//
6859// -- Callee requires stack space that is larger than the caller --
6860// caller(struct, double, struct, float, struct, struct)
6861// callee(int, int, int, int, int, double, double, double)
6862//
6863// Unix Amd64 && Arm64:
6864// A fastTailCall decision can be made whenever the callee's stack space is
6865// less than or equal to the caller's stack space. There are many permutations
6866// of when the caller and callee have different stack sizes if there are
6867// structs being passed to either the caller or callee.
6868//
6869// Exceptions:
6870// 1) If the callee has structs which cannot be enregistered it will be
6871// reported as cannot fast tail call. This is an implementation limitation
6872// where the callee only is checked for non enregisterable structs. This is
6873// tracked with https://github.com/dotnet/coreclr/issues/12644.
6874//
6875// 2) If the caller or callee has stack arguments and the callee has more
6876// arguments then the caller it will be reported as cannot fast tail call.
6877// This is due to a bug in LowerFastTailCall which assumes that
6878// nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This
6879// is tracked with https://github.com/dotnet/coreclr/issues/12468.
6880//
6881// 3) If the callee has a 9 to 16 byte struct argument and the callee has
6882// stack arguments, the decision will be to not fast tail call. This is
6883// because before fgMorphArgs is done, the struct is unknown whether it
6884// will be placed on the stack or enregistered. Therefore, the conservative
6885// decision of do not fast tail call is taken. This limitations should be
6886// removed if/when fgMorphArgs no longer depends on fgCanFastTailCall.
6887//
6888// 4) Arm64 Only, if there are HFA arguments and the callee has stack
6889// arguments, the decision will be reported as cannot fast tail call.
6890// This is because before fgMorphArgs is done, the struct is unknown whether it
6891// will be placed on the stack or enregistered. Therefore, the conservative
6892// decision of do not fast tail call is taken.
6893//
6894// Can fast tail call examples (amd64 Unix):
6895//
6896// -- Callee will have all register arguments --
6897// caller(int, int, int, int)
6898// callee(int, int, float, int)
6899//
6900// -- Callee requires stack space that is equal to the caller --
6901// caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack
6902// space
6903// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
6904//
6905// -- Callee requires stack space that is less than the caller --
6906// caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack
6907// space
6908// callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space
6909//
6910// -- Callee will have all register arguments --
6911// caller(int)
6912// callee(int, int, int, int)
6913//
6914// Cannot fast tail call examples (amd64 Unix):
6915//
6916// -- Callee requires stack space that is larger than the caller --
6917// caller(float, float, float, float, float, float, float, float) -- 8 float register arguments
6918// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
6919//
6920// -- Callee has structs which cannot be enregistered (Implementation Limitation) --
6921// caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register
6922// arguments, 24 byte stack space
6923// callee({ double, double, double }) -- 24 bytes stack space
6924//
6925// -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) --
6926// caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space
6927// callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space
6928//
6929// -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) --
6930// caller({ double, double, double, double, double, double }) // 48 byte stack
6931// callee(int, int) -- 2 int registers
6932
6933bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
6934{
6935#if FEATURE_FASTTAILCALL
6936 // To reach here means that the return types of the caller and callee are tail call compatible.
6937 // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
6938 //
6939 // In an implicit tail call case callSig may not be available but it is guaranteed to be available
6940 // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
6941 // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
6942 // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
6943 // currently not copying/setting callSig.
6944 CLANG_FORMAT_COMMENT_ANCHOR;
6945
6946#ifdef DEBUG
6947 if (callee->IsTailPrefixedCall())
6948 {
6949 assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
6950 (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
6951 }
6952#endif
6953
6954 auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) {
6955#if DEBUG
6956 if ((JitConfig.JitReportFastTailCallDecisions()) == 1)
6957 {
6958 if (callee->gtCallType != CT_INDIRECT)
6959 {
6960 const char* methodName;
6961
6962 methodName = eeGetMethodFullName(callee->gtCallMethHnd);
6963
6964 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ",
6965 info.compFullName, methodName);
6966 }
6967 else
6968 {
6969 printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- "
6970 "Decision: ",
6971 info.compFullName);
6972 }
6973
6974 if (callerStackSize != -1)
6975 {
6976 printf("%s (CallerStackSize: %d, CalleeStackSize: %d)\n\n", msg, callerStackSize, calleeStackSize);
6977 }
6978 else
6979 {
6980 printf("%s\n\n", msg);
6981 }
6982 }
6983 else
6984 {
6985 JITDUMP("[Fast tailcall decision]: %s\n", msg);
6986 }
6987#else
6988 (void)this;
6989 (void)callee;
6990#endif // DEBUG
6991 };
6992
6993 // Note on vararg methods:
6994 // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
6995 // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
6996 // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
6997 // out-going area required for callee is bounded by caller's fixed argument space.
6998 //
6999 // Note that callee being a vararg method is not a problem since we can account the params being passed.
7000 unsigned nCallerArgs = info.compArgsCount;
7001
7002 size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount;
7003 size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount;
7004
7005 // Count the callee args including implicit and hidden.
7006 // Note that GenericContext and VarargCookie are added by importer while
7007 // importing the call to gtCallArgs list along with explicit user args.
7008 size_t calleeArgRegCount = 0;
7009 size_t calleeFloatArgRegCount = 0;
7010
7011 if (callee->gtCallObjp) // thisPtr
7012 {
7013 ++calleeArgRegCount;
7014 }
7015
7016 if (callee->HasRetBufArg()) // RetBuf
7017 {
7018 // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs.
7019
7020 // If callee has RetBuf param, caller too must have it.
7021 // Otherwise go the slow route.
7022 if (info.compRetBuffArg == BAD_VAR_NUM)
7023 {
7024 reportFastTailCallDecision("Callee has RetBuf but caller does not.", 0, 0);
7025 return false;
7026 }
7027 }
7028
7029 // Count user args while tracking whether any of them is a multi-byte params
7030 // that cannot be passed in a register. Note that we don't need to count
7031 // non-standard and secret params passed in registers (e.g. R10, R11) since
7032 // these won't contribute to out-going arg size.
7033 // For each struct arg, hasMultiByteStackArgs will track if it can be passed in registers.
7034 // If it cannot we will break the loop and not fastTailCall. This is an implementation limitation
7035 // where the callee only is checked for non enregisterable structs.
7036 // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
7037 bool hasMultiByteStackArgs = false;
7038 bool hasTwoSlotSizedStruct = false;
7039 bool hasHfaArg = false;
7040 size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have.
7041 size_t calleeStackSize = 0;
7042 for (GenTree* args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
7043 {
7044 ++nCalleeArgs;
7045 assert(args->OperIsList());
7046 GenTree* argx = args->gtOp.gtOp1;
7047
7048 if (varTypeIsStruct(argx))
7049 {
7050 // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
7051 argx = argx->gtEffectiveVal(true /*commaOnly*/);
7052
7053 // Get the size of the struct and see if it is register passable.
7054 CORINFO_CLASS_HANDLE objClass = nullptr;
7055
7056 if (argx->OperGet() == GT_OBJ)
7057 {
7058 objClass = argx->AsObj()->gtClass;
7059 }
7060 else if (argx->IsLocal())
7061 {
7062 objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
7063 }
7064 if (objClass != nullptr)
7065 {
7066#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
7067
7068 unsigned typeSize = 0;
7069 // We should have already broken out of the loop if we've set hasMultiByteStackArgs to true.
7070 assert(!hasMultiByteStackArgs);
7071 hasMultiByteStackArgs =
7072 !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false, false);
7073
7074#if defined(UNIX_AMD64_ABI)
7075 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
7076
7077 assert(objClass != nullptr);
7078 eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
7079
7080 if (structDesc.passedInRegisters)
7081 {
7082 if (structDesc.eightByteCount == 2)
7083 {
7084 hasTwoSlotSizedStruct = true;
7085 }
7086
7087 for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
7088 {
7089 if (structDesc.IsIntegralSlot(i))
7090 {
7091 ++calleeArgRegCount;
7092 }
7093 else if (structDesc.IsSseSlot(i))
7094 {
7095 ++calleeFloatArgRegCount;
7096 }
7097 else
7098 {
7099 assert(false && "Invalid eightbyte classification type.");
7100 break;
7101 }
7102 }
7103 }
7104 else
7105 {
7106 calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE);
7107 hasMultiByteStackArgs = true;
7108 }
7109
7110#elif defined(_TARGET_ARM64_) // ARM64
7111 var_types hfaType = GetHfaType(argx);
7112 bool isHfaArg = varTypeIsFloating(hfaType);
7113 size_t size = 1;
7114
7115 if (isHfaArg)
7116 {
7117 hasHfaArg = true;
7118
7119 calleeFloatArgRegCount += GetHfaCount(argx);
7120 }
7121 else
7122 {
7123 // Structs are either passed in 1 or 2 (64-bit) slots
7124 size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE);
7125 size = roundupSize / TARGET_POINTER_SIZE;
7126
7127 if (size > 2)
7128 {
7129 size = 1;
7130 }
7131
7132 else if (size == 2)
7133 {
7134 hasTwoSlotSizedStruct = true;
7135 }
7136
7137 calleeArgRegCount += size;
7138 }
7139
7140#elif defined(WINDOWS_AMD64_ABI)
7141
7142 ++calleeArgRegCount;
7143
7144#endif // UNIX_AMD64_ABI
7145
7146#else
7147 assert(!"Target platform ABI rules regarding passing struct type args in registers");
7148 unreached();
7149#endif //_TARGET_AMD64_ || _TARGET_ARM64_
7150 }
7151 else
7152 {
7153 hasMultiByteStackArgs = true;
7154 }
7155 }
7156 else
7157 {
7158 varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount;
7159 }
7160
7161 // We can break early on multiByte cases.
7162 if (hasMultiByteStackArgs)
7163 {
7164 break;
7165 }
7166 }
7167
7168 const unsigned maxRegArgs = MAX_REG_ARG;
7169
7170// If we reached here means that callee has only those argument types which can be passed in
7171// a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
7172// If we are passing args on stack for the callee and it has more args passed on stack than
7173// the caller, then fast tail call cannot be performed.
7174//
7175// Note that the GC'ness of on stack args need not match since the arg setup area is marked
7176// as non-interruptible for fast tail calls.
7177
7178#ifdef WINDOWS_AMD64_ABI
7179 assert(calleeStackSize == 0);
7180 size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs)
7181 ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs
7182 : 0;
7183 calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE;
7184 size_t callerStackSize = info.compArgStackSize;
7185
7186 bool hasStackArgs = false;
7187
7188 if (callerStackSize > 0 || calleeStackSize > 0)
7189 {
7190 hasStackArgs = true;
7191 }
7192
7193 // Go the slow route, if it has multi-byte params. This is an implementation
7194 // limitatio see https://github.com/dotnet/coreclr/issues/12644.
7195 if (hasMultiByteStackArgs)
7196 {
7197 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7198 return false;
7199 }
7200
7201 // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then
7202 // make sure the callee's incoming arguments is less than the caller's
7203 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7204 {
7205 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7206 calleeStackSize);
7207 return false;
7208 }
7209
7210#elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
7211
7212 // For *nix Amd64 and Arm64 check to see if all arguments for the callee
7213 // and caller are passing in registers. If not, ensure that the outgoing argument stack size
7214 // requirement for the callee is less than or equal to the caller's entire stack frame usage.
7215 //
7216 // Also, in the case that we have to pass arguments on the stack make sure
7217 // that we are not dealing with structs that are >8 bytes.
7218
7219 bool hasStackArgs = false;
7220 size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG;
7221
7222 size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0;
7223 size_t calleeFloatStackArgCount =
7224 calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0;
7225
7226 size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount;
7227 size_t callerStackSize = info.compArgStackSize;
7228 calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE;
7229
7230 if (callerStackSize > 0 || calleeStackSize > 0)
7231 {
7232 hasStackArgs = true;
7233 }
7234
7235 // Go the slow route, if it has multi-byte params. This is an implementation
7236 // limitation see https://github.com/dotnet/coreclr/issues/12644.
7237 if (hasMultiByteStackArgs)
7238 {
7239 reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize);
7240 return false;
7241 }
7242
7243 // Callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not fastTailCall.
7244 if (calleeStackSize > 0 && hasTwoSlotSizedStruct)
7245 {
7246 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct",
7247 callerStackSize, calleeStackSize);
7248 return false;
7249 }
7250
7251 // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall.
7252 if (calleeStackSize > 0 && hasHfaArg)
7253 {
7254 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg", callerStackSize,
7255 calleeStackSize);
7256 return false;
7257 }
7258
7259 // TODO-AMD64-Unix
7260 // TODO-ARM64
7261 //
7262 // LowerFastTailCall currently assumes nCalleeArgs <= nCallerArgs. This is
7263 // not true in many cases on x64 linux, remove this pessimization when
7264 // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468
7265 // for more information.
7266 if (hasStackArgs && (nCalleeArgs > nCallerArgs))
7267 {
7268 reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize,
7269 calleeStackSize);
7270 return false;
7271 }
7272
7273 if (calleeStackSize > callerStackSize)
7274 {
7275 reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize", callerStackSize,
7276 calleeStackSize);
7277 return false;
7278 }
7279
7280#else
7281
7282 NYI("fastTailCall not supported on this Architecture.");
7283
7284#endif // WINDOWS_AMD64_ABI
7285
7286 reportFastTailCallDecision("Will fastTailCall", callerStackSize, calleeStackSize);
7287 return true;
7288#else // FEATURE_FASTTAILCALL
7289 return false;
7290#endif
7291}
7292
7293/*****************************************************************************
7294 *
7295 * Transform the given GT_CALL tree for tail call code generation.
7296 */
7297void Compiler::fgMorphTailCall(GenTreeCall* call, void* pfnCopyArgs)
7298{
7299 JITDUMP("fgMorphTailCall (before):\n");
7300 DISPTREE(call);
7301
7302 // The runtime requires that we perform a null check on the `this` argument before
7303 // tail calling to a virtual dispatch stub. This requirement is a consequence of limitations
7304 // in the runtime's ability to map an AV to a NullReferenceException if
7305 // the AV occurs in a dispatch stub that has unmanaged caller.
7306 if (call->IsVirtualStub())
7307 {
7308 call->gtFlags |= GTF_CALL_NULLCHECK;
7309 }
7310
7311#if defined(_TARGET_ARM_)
7312 // For the helper-assisted tail calls, we need to push all the arguments
7313 // into a single list, and then add a few extra at the beginning
7314
7315 // Check for PInvoke call types that we don't handle in codegen yet.
7316 assert(!call->IsUnmanaged());
7317 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
7318
7319 // First move the this pointer (if any) onto the regular arg list
7320 GenTree* thisPtr = NULL;
7321 if (call->gtCallObjp)
7322 {
7323 GenTree* objp = call->gtCallObjp;
7324 call->gtCallObjp = NULL;
7325
7326 if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
7327 {
7328 thisPtr = gtClone(objp, true);
7329 var_types vt = objp->TypeGet();
7330 if (thisPtr == NULL)
7331 {
7332 // Too complex, so use a temp
7333 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7334 GenTree* asg = gtNewTempAssign(lclNum, objp);
7335 if (!call->IsVirtualVtable())
7336 {
7337 // Add an indirection to get the nullcheck
7338 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7339 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7340 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7341 }
7342 objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7343 thisPtr = gtNewLclvNode(lclNum, vt);
7344 }
7345 else if (!call->IsVirtualVtable())
7346 {
7347 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7348 objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
7349 thisPtr = gtClone(thisPtr, true);
7350 }
7351
7352 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7353 }
7354
7355 call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
7356 }
7357
7358 // Add the extra VSD parameter if needed
7359 if (call->IsVirtualStub())
7360 {
7361 GenTree* stubAddrArg = fgGetStubAddrArg(call);
7362 // And push the stub address onto the list of arguments
7363 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7364 }
7365 else if (call->IsVirtualVtable())
7366 {
7367 noway_assert(thisPtr != NULL);
7368
7369 GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
7370 GenTree* vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7371 vtbl->gtFlags |= GTF_EXCEPT;
7372
7373 unsigned vtabOffsOfIndirection;
7374 unsigned vtabOffsAfterIndirection;
7375 bool isRelative;
7376 info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
7377 &isRelative);
7378
7379 /* Get the appropriate vtable chunk */
7380
7381 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
7382 {
7383 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
7384
7385 GenTree* indOffTree = nullptr;
7386
7387 if (isRelative)
7388 {
7389 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7390 nullptr DEBUGARG("virtual table call"));
7391 }
7392
7393 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7394
7395 if (isRelative)
7396 {
7397 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7398 }
7399 }
7400
7401 /* Now the appropriate vtable slot */
7402
7403 add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
7404
7405 GenTree* indOffTree = nullptr;
7406
7407 if (isRelative)
7408 {
7409 indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
7410 nullptr DEBUGARG("virtual table call 2"));
7411 }
7412
7413 vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
7414
7415 if (isRelative)
7416 {
7417 vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree);
7418 }
7419
7420 // Switch this to a plain indirect call
7421 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
7422 assert(!call->IsVirtual());
7423 call->gtCallType = CT_INDIRECT;
7424
7425 call->gtCallAddr = vtbl;
7426 call->gtCallCookie = NULL;
7427 call->gtFlags |= GTF_EXCEPT;
7428 }
7429
7430 // Now inject a placeholder for the real call target that codegen will generate
7431 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
7432 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7433
7434 // Lastly inject the pointer for the copy routine
7435 noway_assert(pfnCopyArgs != nullptr);
7436 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7437 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7438
7439 // It is now a varargs tail call
7440 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7441 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7442
7443#elif defined(_TARGET_XARCH_)
7444
7445 // For the helper-assisted tail calls, we need to push all the arguments
7446 // into a single list, and then add a few extra at the beginning or end.
7447 //
7448 // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
7449 //
7450 // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
7451 //
7452 // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
7453 // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
7454 // for callTarget here which will be replaced later with callTarget in tail call lowering.
7455 //
7456 // For x86, the tailcall helper is defined as:
7457 //
7458 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
7459 // callTarget)
7460 //
7461 // Note that the special arguments are on the stack, whereas the function arguments follow
7462 // the normal convention: there might be register arguments in ECX and EDX. The stack will
7463 // look like (highest address at the top):
7464 // first normal stack argument
7465 // ...
7466 // last normal stack argument
7467 // numberOfOldStackArgs
7468 // numberOfNewStackArgs
7469 // flags
7470 // callTarget
7471 //
7472 // Each special arg is 4 bytes.
7473 //
7474 // 'flags' is a bitmask where:
7475 // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
7476 // callee-saved registers for tailcall functions. Note that the helper assumes
7477 // that the callee-saved registers live immediately below EBP, and must have been
7478 // pushed in this order: EDI, ESI, EBX.
7479 // 2 == call target is a virtual stub dispatch.
7480 //
7481 // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
7482 // on the custom calling convention.
7483
7484 // Check for PInvoke call types that we don't handle in codegen yet.
7485 assert(!call->IsUnmanaged());
7486 assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
7487
7488 // Don't support tail calling helper methods
7489 assert(call->gtCallType != CT_HELPER);
7490
7491 // We come this route only for tail prefixed calls that cannot be dispatched as
7492 // fast tail calls
7493 assert(!call->IsImplicitTailCall());
7494 assert(!fgCanFastTailCall(call));
7495
7496 // First move the 'this' pointer (if any) onto the regular arg list. We do this because
7497 // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
7498 // and thus shift where the 'this' pointer will be passed to a later argument slot. In
7499 // addition, for all platforms, we are going to change the call into a helper call. Our code
7500 // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
7501 // do this transformation, we must explicitly create a null 'this' pointer check, if required,
7502 // since special 'this' pointer handling will no longer kick in.
7503 //
7504 // Some call types, such as virtual vtable calls, require creating a call address expression
7505 // that involves the "this" pointer. Lowering will sometimes create an embedded statement
7506 // to create a temporary that is assigned to the "this" pointer expression, and then use
7507 // that temp to create the call address expression. This temp creation embedded statement
7508 // will occur immediately before the "this" pointer argument, and then will be used for both
7509 // the "this" pointer argument as well as the call address expression. In the normal ordering,
7510 // the embedded statement establishing the "this" pointer temp will execute before both uses
7511 // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
7512 // normal call argument list, and insert a placeholder which will hold the call address
7513 // expression. For non-x86, things are ok, because the order of execution of these is not
7514 // altered. However, for x86, the call address expression is inserted as the *last* argument
7515 // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
7516 // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
7517 // for those cases where call lowering creates an embedded form temp of "this", we will
7518 // create a temp here, early, that will later get morphed correctly.
7519
7520 if (call->gtCallObjp)
7521 {
7522 GenTree* thisPtr = nullptr;
7523 GenTree* objp = call->gtCallObjp;
7524 call->gtCallObjp = nullptr;
7525
7526#ifdef _TARGET_X86_
7527 if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
7528 {
7529 // tmp = "this"
7530 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7531 GenTree* asg = gtNewTempAssign(lclNum, objp);
7532
7533 // COMMA(tmp = "this", tmp)
7534 var_types vt = objp->TypeGet();
7535 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7536 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
7537
7538 objp = thisPtr;
7539 }
7540#endif // _TARGET_X86_
7541
7542 if (call->NeedsNullCheck())
7543 {
7544 // clone "this" if "this" has no side effects.
7545 if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
7546 {
7547 thisPtr = gtClone(objp, true);
7548 }
7549
7550 var_types vt = objp->TypeGet();
7551 if (thisPtr == nullptr)
7552 {
7553 // create a temp if either "this" has side effects or "this" is too complex to clone.
7554
7555 // tmp = "this"
7556 unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
7557 GenTree* asg = gtNewTempAssign(lclNum, objp);
7558
7559 // COMMA(tmp = "this", deref(tmp))
7560 GenTree* tmp = gtNewLclvNode(lclNum, vt);
7561 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
7562 asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
7563
7564 // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
7565 thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
7566 }
7567 else
7568 {
7569 // thisPtr = COMMA(deref("this"), "this")
7570 GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
7571 thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
7572 }
7573
7574 call->gtFlags &= ~GTF_CALL_NULLCHECK;
7575 }
7576 else
7577 {
7578 thisPtr = objp;
7579 }
7580
7581 // During rationalization tmp="this" and null check will
7582 // materialize as embedded stmts in right execution order.
7583 assert(thisPtr != nullptr);
7584 call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
7585 }
7586
7587#if defined(_TARGET_AMD64_)
7588
7589 // Add the extra VSD parameter to arg list in case of VSD calls.
7590 // Tail call arg copying thunk will move this extra VSD parameter
7591 // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
7592 // in Stublinkerx86.cpp for more details.
7593 if (call->IsVirtualStub())
7594 {
7595 GenTree* stubAddrArg = fgGetStubAddrArg(call);
7596 // And push the stub address onto the list of arguments
7597 call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
7598 }
7599
7600 // Now inject a placeholder for the real call target that Lower phase will generate.
7601 GenTree* arg = gtNewIconNode(0, TYP_I_IMPL);
7602 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7603
7604 // Inject the pointer for the copy routine to be used for struct copying
7605 noway_assert(pfnCopyArgs != nullptr);
7606 arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
7607 call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
7608
7609#else // !_TARGET_AMD64_
7610
7611 // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
7612 // append to the list.
7613 GenTreeArgList** ppArg = &call->gtCallArgs;
7614 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
7615 {
7616 ppArg = (GenTreeArgList**)&args->gtOp2;
7617 }
7618 assert(ppArg != nullptr);
7619 assert(*ppArg == nullptr);
7620
7621 unsigned nOldStkArgsWords =
7622 (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
7623 GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
7624 *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
7625 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7626
7627 // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
7628 // The constant will be replaced.
7629 GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
7630 *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
7631 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7632
7633 // Inject a placeholder for the flags.
7634 // The constant will be replaced.
7635 GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
7636 *ppArg = gtNewListNode(arg1, nullptr);
7637 ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
7638
7639 // Inject a placeholder for the real call target that the Lowering phase will generate.
7640 // The constant will be replaced.
7641 GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
7642 *ppArg = gtNewListNode(arg0, nullptr);
7643
7644#endif // !_TARGET_AMD64_
7645
7646 // It is now a varargs tail call dispatched via helper.
7647 call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
7648 call->gtFlags &= ~GTF_CALL_POP_ARGS;
7649
7650#elif defined(_TARGET_ARM64_)
7651 NYI_ARM64("Tail calls via stub are unsupported on this platform.");
7652#endif // _TARGET_ARM64_
7653
7654 // The function is responsible for doing explicit null check when it is necessary.
7655 assert(!call->NeedsNullCheck());
7656
7657 JITDUMP("fgMorphTailCall (after):\n");
7658 DISPTREE(call);
7659}
7660
7661//------------------------------------------------------------------------
7662// fgGetStubAddrArg: Return the virtual stub address for the given call.
7663//
7664// Notes:
7665// the JIT must place the address of the stub used to load the call target,
7666// the "stub indirection cell", in special call argument with special register.
7667//
7668// Arguments:
7669// call - a call that needs virtual stub dispatching.
7670//
7671// Return Value:
7672// addr tree with set resister requirements.
7673//
7674GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call)
7675{
7676 assert(call->IsVirtualStub());
7677 GenTree* stubAddrArg;
7678 if (call->gtCallType == CT_INDIRECT)
7679 {
7680 stubAddrArg = gtClone(call->gtCallAddr, true);
7681 }
7682 else
7683 {
7684 assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
7685 ssize_t addr = ssize_t(call->gtStubCallStubAddr);
7686 stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
7687 }
7688 assert(stubAddrArg != nullptr);
7689 stubAddrArg->gtRegNum = virtualStubParamInfo->GetReg();
7690 return stubAddrArg;
7691}
7692
7693//------------------------------------------------------------------------------
7694// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
7695//
7696//
7697// Arguments:
7698// block - basic block ending with a recursive fast tail call
7699// recursiveTailCall - recursive tail call to transform
7700//
7701// Notes:
7702// The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
7703
7704void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
7705{
7706 assert(recursiveTailCall->IsTailCallConvertibleToLoop());
7707 GenTree* last = block->lastStmt();
7708 assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
7709
7710 // Transform recursive tail call into a loop.
7711
7712 GenTree* earlyArgInsertionPoint = last;
7713 IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
7714
7715 // Hoist arg setup statement for the 'this' argument.
7716 GenTree* thisArg = recursiveTailCall->gtCallObjp;
7717 if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
7718 {
7719 GenTree* thisArgStmt = gtNewStmt(thisArg, callILOffset);
7720 fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
7721 }
7722
7723 // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
7724 // then the temps need to be assigned to the method parameters. This is done so that the caller
7725 // parameters are not re-assigned before call arguments depending on them are evaluated.
7726 // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
7727 // where the next temp or parameter assignment should be inserted.
7728
7729 // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
7730 // while the second call argument (const 1) doesn't.
7731 // Basic block before tail recursion elimination:
7732 // ***** BB04, stmt 1 (top level)
7733 // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
7734 // [000033] --C - G------ - \--* call void RecursiveMethod
7735 // [000030] ------------ | / --* const int - 1
7736 // [000031] ------------arg0 in rcx + --* +int
7737 // [000029] ------------ | \--* lclVar int V00 arg1
7738 // [000032] ------------arg1 in rdx \--* const int 1
7739 //
7740 //
7741 // Basic block after tail recursion elimination :
7742 // ***** BB04, stmt 1 (top level)
7743 // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7744 // [000030] ------------ | / --* const int - 1
7745 // [000031] ------------ | / --* +int
7746 // [000029] ------------ | | \--* lclVar int V00 arg1
7747 // [000050] - A---------- \--* = int
7748 // [000049] D------N---- \--* lclVar int V02 tmp0
7749 //
7750 // ***** BB04, stmt 2 (top level)
7751 // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7752 // [000052] ------------ | / --* lclVar int V02 tmp0
7753 // [000054] - A---------- \--* = int
7754 // [000053] D------N---- \--* lclVar int V00 arg0
7755
7756 // ***** BB04, stmt 3 (top level)
7757 // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
7758 // [000032] ------------ | / --* const int 1
7759 // [000057] - A---------- \--* = int
7760 // [000056] D------N---- \--* lclVar int V01 arg1
7761
7762 GenTree* tmpAssignmentInsertionPoint = last;
7763 GenTree* paramAssignmentInsertionPoint = last;
7764
7765 // Process early args. They may contain both setup statements for late args and actual args.
7766 // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
7767 // below has the correct second argument.
7768 int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
7769 for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
7770 (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
7771 {
7772 GenTree* earlyArg = earlyArgs->Current();
7773 if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
7774 {
7775 if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
7776 {
7777 // This is a setup node so we need to hoist it.
7778 GenTree* earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
7779 fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
7780 }
7781 else
7782 {
7783 // This is an actual argument that needs to be assigned to the corresponding caller parameter.
7784 fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
7785 GenTree* paramAssignStmt =
7786 fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
7787 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7788 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7789 {
7790 // All temp assignments will happen before the first param assignment.
7791 tmpAssignmentInsertionPoint = paramAssignStmt;
7792 }
7793 }
7794 }
7795 }
7796
7797 // Process late args.
7798 int lateArgIndex = 0;
7799 for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
7800 (lateArgIndex++, lateArgs = lateArgs->Rest()))
7801 {
7802 // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
7803 GenTree* lateArg = lateArgs->Current();
7804 fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
7805 GenTree* paramAssignStmt =
7806 fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
7807 tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
7808
7809 if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
7810 {
7811 // All temp assignments will happen before the first param assignment.
7812 tmpAssignmentInsertionPoint = paramAssignStmt;
7813 }
7814 }
7815
7816 // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
7817 // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
7818 // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
7819 if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
7820 {
7821 var_types thisType = lvaTable[info.compThisArg].TypeGet();
7822 GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType);
7823 GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
7824 GenTree* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
7825 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
7826 }
7827
7828 // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog
7829 // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization
7830 // for all non-parameter IL locals as well as temp structs with GC fields.
7831 // Liveness phase will remove unnecessary initializations.
7832 if (info.compInitMem)
7833 {
7834 unsigned varNum;
7835 LclVarDsc* varDsc;
7836 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
7837 {
7838#if FEATURE_FIXED_OUT_ARGS
7839 if (varNum == lvaOutgoingArgSpaceVar)
7840 {
7841 continue;
7842 }
7843#endif // FEATURE_FIXED_OUT_ARGS
7844 if (!varDsc->lvIsParam)
7845 {
7846 var_types lclType = varDsc->TypeGet();
7847 bool isUserLocal = (varNum < info.compLocalsCount);
7848 bool structWithGCFields = ((lclType == TYP_STRUCT) && (varDsc->lvStructGcCount > 0));
7849 if (isUserLocal || structWithGCFields)
7850 {
7851 GenTree* lcl = gtNewLclvNode(varNum, lclType);
7852 GenTree* init = nullptr;
7853 if (varTypeIsStruct(lclType))
7854 {
7855 const bool isVolatile = false;
7856 const bool isCopyBlock = false;
7857 init = gtNewBlkOpNode(lcl, gtNewIconNode(0), varDsc->lvSize(), isVolatile, isCopyBlock);
7858 init = fgMorphInitBlock(init);
7859 }
7860 else
7861 {
7862 GenTree* zero = gtNewZeroConNode(genActualType(lclType));
7863 init = gtNewAssignNode(lcl, zero);
7864 }
7865 GenTree* initStmt = gtNewStmt(init, callILOffset);
7866 fgInsertStmtBefore(block, last, initStmt);
7867 }
7868 }
7869 }
7870 }
7871
7872 // Remove the call
7873 fgRemoveStmt(block, last);
7874
7875 // Set the loop edge. Ensure we have a scratch block and then target the
7876 // next block. Loop detection needs to see a pred out of the loop, so
7877 // mark the scratch block BBF_DONT_REMOVE to prevent empty block removal
7878 // on it.
7879 fgEnsureFirstBBisScratch();
7880 fgFirstBB->bbFlags |= BBF_DONT_REMOVE;
7881 block->bbJumpKind = BBJ_ALWAYS;
7882 block->bbJumpDest = fgFirstBB->bbNext;
7883 fgAddRefPred(block->bbJumpDest, block);
7884 block->bbFlags &= ~BBF_HAS_JMP;
7885}
7886
7887//------------------------------------------------------------------------------
7888// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
7889//
7890//
7891// Arguments:
7892// arg - argument to assign
7893// argTabEntry - argument table entry corresponding to arg
7894// block --- basic block the call is in
7895// callILOffset - IL offset of the call
7896// tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
7897// paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
7898//
7899// Return Value:
7900// parameter assignment statement if one was inserted; nullptr otherwise.
7901
7902GenTree* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg,
7903 fgArgTabEntry* argTabEntry,
7904 BasicBlock* block,
7905 IL_OFFSETX callILOffset,
7906 GenTree* tmpAssignmentInsertionPoint,
7907 GenTree* paramAssignmentInsertionPoint)
7908{
7909 // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
7910 // some argument trees may reference parameters directly.
7911
7912 GenTree* argInTemp = nullptr;
7913 unsigned originalArgNum = argTabEntry->argNum;
7914 bool needToAssignParameter = true;
7915
7916 // TODO-CQ: enable calls with struct arguments passed in registers.
7917 noway_assert(!varTypeIsStruct(arg->TypeGet()));
7918
7919 if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
7920 {
7921 // The argument is already assigned to a temp or is a const.
7922 argInTemp = arg;
7923 }
7924 else if (arg->OperGet() == GT_LCL_VAR)
7925 {
7926 unsigned lclNum = arg->AsLclVar()->gtLclNum;
7927 LclVarDsc* varDsc = &lvaTable[lclNum];
7928 if (!varDsc->lvIsParam)
7929 {
7930 // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
7931 argInTemp = arg;
7932 }
7933 else if (lclNum == originalArgNum)
7934 {
7935 // The argument is the same parameter local that we were about to assign so
7936 // we can skip the assignment.
7937 needToAssignParameter = false;
7938 }
7939 }
7940
7941 // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
7942 // any caller parameters. Some common cases are handled above but we may be able to eliminate
7943 // more temp assignments.
7944
7945 GenTree* paramAssignStmt = nullptr;
7946 if (needToAssignParameter)
7947 {
7948 if (argInTemp == nullptr)
7949 {
7950 // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
7951 // TODO: we can avoid a temp assignment if we can prove that the argument tree
7952 // doesn't involve any caller parameters.
7953 unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
7954 lvaTable[tmpNum].lvType = arg->gtType;
7955 GenTree* tempSrc = arg;
7956 GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
7957 GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
7958 GenTree* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
7959 fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
7960 argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
7961 }
7962
7963 // Now assign the temp to the parameter.
7964 LclVarDsc* paramDsc = lvaTable + originalArgNum;
7965 assert(paramDsc->lvIsParam);
7966 GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
7967 GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
7968 paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
7969
7970 fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
7971 }
7972 return paramAssignStmt;
7973}
7974
7975/*****************************************************************************
7976 *
7977 * Transform the given GT_CALL tree for code generation.
7978 */
7979
7980GenTree* Compiler::fgMorphCall(GenTreeCall* call)
7981{
7982 if (varTypeIsStruct(call))
7983 {
7984 fgFixupStructReturn(call);
7985 }
7986 if (call->CanTailCall())
7987 {
7988 // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
7989 assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
7990
7991 // It cannot be an inline candidate
7992 assert(!call->IsInlineCandidate());
7993
7994 const char* szFailReason = nullptr;
7995 bool hasStructParam = false;
7996 if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
7997 {
7998 szFailReason = "Might turn into an intrinsic";
7999 }
8000
8001 if (opts.compNeedSecurityCheck)
8002 {
8003 szFailReason = "Needs security check";
8004 }
8005 else if (compLocallocUsed || compLocallocOptimized)
8006 {
8007 szFailReason = "Localloc used";
8008 }
8009#ifdef _TARGET_AMD64_
8010 // Needed for Jit64 compat.
8011 // In future, enabling tail calls from methods that need GS cookie check
8012 // would require codegen side work to emit GS cookie check before a tail
8013 // call.
8014 else if (getNeedsGSSecurityCookie())
8015 {
8016 szFailReason = "GS Security cookie check";
8017 }
8018#endif
8019#ifdef DEBUG
8020 // DDB 99324: Just disable tailcall under compGcChecks stress mode.
8021 else if (opts.compGcChecks)
8022 {
8023 szFailReason = "GcChecks";
8024 }
8025#endif
8026#if FEATURE_TAILCALL_OPT
8027 else
8028 {
8029 // We are still not sure whether it can be a tail call. Because, when converting
8030 // a call to an implicit tail call, we must check that there are no locals with
8031 // their address taken. If this is the case, we have to assume that the address
8032 // has been leaked and the current stack frame must live until after the final
8033 // call.
8034
8035 // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
8036 // that lvHasLdAddrOp is much more conservative. We cannot just base it on
8037 // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
8038 // during morph stage. The reason for also checking lvAddrExposed is that in case
8039 // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
8040 // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
8041 // never to be incorrect.
8042 //
8043 // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
8044 // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
8045 // is set. This avoids the need for iterating through all lcl vars of the current
8046 // method. Right now throughout the code base we are not consistently using 'set'
8047 // method to set lvHasLdAddrOp and lvAddrExposed flags.
8048 unsigned varNum;
8049 LclVarDsc* varDsc;
8050 bool hasAddrExposedVars = false;
8051 bool hasStructPromotedParam = false;
8052 bool hasPinnedVars = false;
8053
8054 for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
8055 {
8056 // If the method is marked as an explicit tail call we will skip the
8057 // following three hazard checks.
8058 // We still must check for any struct parameters and set 'hasStructParam'
8059 // so that we won't transform the recursive tail call into a loop.
8060 //
8061 if (call->IsImplicitTailCall())
8062 {
8063 if (varDsc->lvHasLdAddrOp)
8064 {
8065 hasAddrExposedVars = true;
8066 break;
8067 }
8068 if (varDsc->lvAddrExposed)
8069 {
8070 if (lvaIsImplicitByRefLocal(varNum))
8071 {
8072 // The address of the implicit-byref is a non-address use of the pointer parameter.
8073 }
8074 else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
8075 {
8076 // The address of the implicit-byref's field is likewise a non-address use of the pointer
8077 // parameter.
8078 }
8079 else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
8080 {
8081 // This temp was used for struct promotion bookkeeping. It will not be used, and will have
8082 // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
8083 assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
8084 assert(fgGlobalMorph);
8085 }
8086 else
8087 {
8088 hasAddrExposedVars = true;
8089 break;
8090 }
8091 }
8092 if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
8093 {
8094 hasStructPromotedParam = true;
8095 break;
8096 }
8097 if (varDsc->lvPinned)
8098 {
8099 // A tail call removes the method from the stack, which means the pinning
8100 // goes away for the callee. We can't allow that.
8101 hasPinnedVars = true;
8102 break;
8103 }
8104 }
8105 if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
8106 {
8107 hasStructParam = true;
8108 // This prevents transforming a recursive tail call into a loop
8109 // but doesn't prevent tail call optimization so we need to
8110 // look at the rest of parameters.
8111 continue;
8112 }
8113 }
8114
8115 if (hasAddrExposedVars)
8116 {
8117 szFailReason = "Local address taken";
8118 }
8119 if (hasStructPromotedParam)
8120 {
8121 szFailReason = "Has Struct Promoted Param";
8122 }
8123 if (hasPinnedVars)
8124 {
8125 szFailReason = "Has Pinned Vars";
8126 }
8127 }
8128#endif // FEATURE_TAILCALL_OPT
8129
8130 var_types callType = call->TypeGet();
8131
8132 // We have to ensure to pass the incoming retValBuf as the
8133 // outgoing one. Using a temp will not do as this function will
8134 // not regain control to do the copy.
8135
8136 if (info.compRetBuffArg != BAD_VAR_NUM)
8137 {
8138 noway_assert(callType == TYP_VOID);
8139 GenTree* retValBuf = call->gtCallArgs->gtOp.gtOp1;
8140 if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
8141 {
8142 szFailReason = "Need to copy return buffer";
8143 }
8144 }
8145
8146 // If this is an opportunistic tail call and cannot be dispatched as
8147 // fast tail call, go the non-tail call route. This is done for perf
8148 // reason.
8149 //
8150 // Avoid the cost of determining whether can be dispatched as fast tail
8151 // call if we already know that tail call cannot be honored for other
8152 // reasons.
8153 bool canFastTailCall = false;
8154 if (szFailReason == nullptr)
8155 {
8156 canFastTailCall = fgCanFastTailCall(call);
8157 if (!canFastTailCall)
8158 {
8159 // Implicit or opportunistic tail calls are always dispatched via fast tail call
8160 // mechanism and never via tail call helper for perf.
8161 if (call->IsImplicitTailCall())
8162 {
8163 szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
8164 }
8165 else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
8166 {
8167 // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
8168 // dispatched as a fast tail call.
8169
8170 // Methods with non-standard args will have indirection cell or cookie param passed
8171 // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
8172 // tail calling the target method and hence ".tail" prefix on such calls needs to be
8173 // ignored.
8174 //
8175 // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
8176 // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
8177 // This is done by by adding stubAddr as an additional arg before the original list of
8178 // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
8179 // in Stublinkerx86.cpp.
8180 szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
8181 "called via helper";
8182 }
8183#ifdef _TARGET_ARM64_
8184 else
8185 {
8186 // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
8187 // So, bail out if we can't make fast tail call.
8188 szFailReason = "Non-qualified fast tail call";
8189 }
8190#endif
8191 }
8192 }
8193
8194 // Clear these flags before calling fgMorphCall() to avoid recursion.
8195 bool isTailPrefixed = call->IsTailPrefixedCall();
8196 call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
8197
8198#if FEATURE_TAILCALL_OPT
8199 call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
8200#endif
8201
8202 if (szFailReason == nullptr)
8203 {
8204 if (!fgCheckStmtAfterTailCall())
8205 {
8206 szFailReason = "Unexpected statements after the tail call";
8207 }
8208 }
8209
8210 void* pfnCopyArgs = nullptr;
8211#if !defined(_TARGET_X86_)
8212 if (!canFastTailCall && szFailReason == nullptr)
8213 {
8214 pfnCopyArgs =
8215 info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, call->IsVirtualStub()
8216 ? CORINFO_TAILCALL_STUB_DISPATCH_ARG
8217 : CORINFO_TAILCALL_NORMAL);
8218 if (pfnCopyArgs == nullptr)
8219 {
8220 if (!info.compMatchedVM)
8221 {
8222 // If we don't have a matched VM, we won't get valid results when asking for a thunk.
8223 pfnCopyArgs = UlongToPtr(0xCA11CA11); // "callcall"
8224 }
8225 else
8226 {
8227 szFailReason = "TailCallCopyArgsThunk not available.";
8228 }
8229 }
8230 }
8231#endif // !_TARGET_X86_
8232
8233 if (szFailReason != nullptr)
8234 {
8235#ifdef DEBUG
8236 if (verbose)
8237 {
8238 printf("\nRejecting tail call late for call ");
8239 printTreeID(call);
8240 printf(": %s\n", szFailReason);
8241 }
8242#endif
8243
8244 // for non user funcs, we have no handles to report
8245 info.compCompHnd->reportTailCallDecision(nullptr,
8246 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8247 isTailPrefixed, TAILCALL_FAIL, szFailReason);
8248
8249 goto NO_TAIL_CALL;
8250 }
8251
8252#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
8253 // We enable shared-ret tail call optimization for recursive calls even if
8254 // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
8255 if (gtIsRecursiveCall(call))
8256#endif
8257 {
8258 // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
8259 // but if the call falls through to a ret, and we are doing a tailcall, change it here.
8260 if (compCurBB->bbJumpKind != BBJ_RETURN)
8261 {
8262 compCurBB->bbJumpKind = BBJ_RETURN;
8263 }
8264 }
8265
8266 // Set this flag before calling fgMorphCall() to prevent inlining this call.
8267 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
8268
8269 bool fastTailCallToLoop = false;
8270#if FEATURE_TAILCALL_OPT
8271 // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
8272 // or return type is a struct that can be passed in a register.
8273 //
8274 // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
8275 // hidden generic context param or through keep alive thisptr), then while transforming a recursive
8276 // call to such a method requires that the generic context stored on stack slot be updated. Right now,
8277 // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
8278 // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
8279 // generic type parameters of both caller and callee generic method are the same.
8280 if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
8281 !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam &&
8282 !varTypeIsStruct(call->TypeGet()) && ((info.compClassAttr & CORINFO_FLG_MARSHAL_BYREF) == 0))
8283 {
8284 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
8285 fastTailCallToLoop = true;
8286 }
8287#endif
8288
8289 // Do some target-specific transformations (before we process the args, etc.)
8290 // This is needed only for tail prefixed calls that cannot be dispatched as
8291 // fast calls.
8292 if (!canFastTailCall)
8293 {
8294 fgMorphTailCall(call, pfnCopyArgs);
8295 }
8296
8297 // Implementation note : If we optimize tailcall to do a direct jump
8298 // to the target function (after stomping on the return address, etc),
8299 // without using CORINFO_HELP_TAILCALL, we have to make certain that
8300 // we don't starve the hijacking logic (by stomping on the hijacked
8301 // return address etc).
8302
8303 // At this point, we are committed to do the tailcall.
8304 compTailCallUsed = true;
8305
8306 CorInfoTailCall tailCallResult;
8307
8308 if (fastTailCallToLoop)
8309 {
8310 tailCallResult = TAILCALL_RECURSIVE;
8311 }
8312 else if (canFastTailCall)
8313 {
8314 tailCallResult = TAILCALL_OPTIMIZED;
8315 }
8316 else
8317 {
8318 tailCallResult = TAILCALL_HELPER;
8319 }
8320
8321 // for non user funcs, we have no handles to report
8322 info.compCompHnd->reportTailCallDecision(nullptr,
8323 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
8324 isTailPrefixed, tailCallResult, nullptr);
8325
8326 // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
8327 // to avoid doing any extra work for the return value.
8328 call->gtType = TYP_VOID;
8329
8330#ifdef DEBUG
8331 if (verbose)
8332 {
8333 printf("\nGTF_CALL_M_TAILCALL bit set for call ");
8334 printTreeID(call);
8335 printf("\n");
8336 if (fastTailCallToLoop)
8337 {
8338 printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
8339 printTreeID(call);
8340 printf("\n");
8341 }
8342 }
8343#endif
8344
8345 GenTree* stmtExpr = fgMorphStmt->gtStmtExpr;
8346
8347#ifdef DEBUG
8348 // Tail call needs to be in one of the following IR forms
8349 // Either a call stmt or
8350 // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
8351 // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
8352 // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
8353 // In the above,
8354 // GT_CASTS may be nested.
8355 genTreeOps stmtOper = stmtExpr->gtOper;
8356 if (stmtOper == GT_CALL)
8357 {
8358 assert(stmtExpr == call);
8359 }
8360 else
8361 {
8362 assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
8363 GenTree* treeWithCall;
8364 if (stmtOper == GT_RETURN)
8365 {
8366 treeWithCall = stmtExpr->gtGetOp1();
8367 }
8368 else if (stmtOper == GT_COMMA)
8369 {
8370 // Second operation must be nop.
8371 assert(stmtExpr->gtGetOp2()->IsNothingNode());
8372 treeWithCall = stmtExpr->gtGetOp1();
8373 }
8374 else
8375 {
8376 treeWithCall = stmtExpr->gtGetOp2();
8377 }
8378
8379 // Peel off casts
8380 while (treeWithCall->gtOper == GT_CAST)
8381 {
8382 assert(!treeWithCall->gtOverflow());
8383 treeWithCall = treeWithCall->gtGetOp1();
8384 }
8385
8386 assert(treeWithCall == call);
8387 }
8388#endif
8389 GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
8390 // Remove all stmts after the call.
8391 while (nextMorphStmt != nullptr)
8392 {
8393 GenTreeStmt* stmtToRemove = nextMorphStmt;
8394 nextMorphStmt = stmtToRemove->gtNextStmt;
8395 fgRemoveStmt(compCurBB, stmtToRemove);
8396 }
8397
8398 fgMorphStmt->gtStmtExpr = call;
8399
8400 // Tail call via helper: The VM can't use return address hijacking if we're
8401 // not going to return and the helper doesn't have enough info to safely poll,
8402 // so we poll before the tail call, if the block isn't already safe. Since
8403 // tail call via helper is a slow mechanism it doen't matter whether we emit
8404 // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
8405 // size increase if all most all methods are expected to be tail calls (e.g. F#).
8406 //
8407 // Note that we can avoid emitting GC-poll if we know that the current BB is
8408 // dominated by a Gc-SafePoint block. But we don't have dominator info at this
8409 // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
8410 // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
8411 // now it not clear whether optimizing slow tail calls is worth the effort. As a
8412 // low cost check, we check whether the first and current basic blocks are
8413 // GC-SafePoints.
8414 //
8415 // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
8416 // is going to mark the method as fully interruptible if the block containing this tail
8417 // call is reachable without executing any call.
8418 if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
8419 !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
8420 {
8421 // We didn't insert a poll block, so we need to morph the call now
8422 // (Normally it will get morphed when we get to the split poll block)
8423 GenTree* temp = fgMorphCall(call);
8424 noway_assert(temp == call);
8425 }
8426
8427 // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
8428 // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
8429 //
8430 // Fast tail call: in case of fast tail calls, we need a jmp epilog and
8431 // hence mark it as BBJ_RETURN with BBF_JMP flag set.
8432 noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
8433
8434 if (canFastTailCall)
8435 {
8436 compCurBB->bbFlags |= BBF_HAS_JMP;
8437 }
8438 else
8439 {
8440 compCurBB->bbJumpKind = BBJ_THROW;
8441 }
8442
8443 // For non-void calls, we return a place holder which will be
8444 // used by the parent GT_RETURN node of this call.
8445
8446 GenTree* result = call;
8447 if (callType != TYP_VOID && info.compRetType != TYP_VOID)
8448 {
8449#ifdef FEATURE_HFA
8450 // Return a dummy node, as the return is already removed.
8451 if (callType == TYP_STRUCT)
8452 {
8453 // This is a HFA, use float 0.
8454 callType = TYP_FLOAT;
8455 }
8456#elif defined(UNIX_AMD64_ABI)
8457 // Return a dummy node, as the return is already removed.
8458 if (varTypeIsStruct(callType))
8459 {
8460 // This is a register-returned struct. Return a 0.
8461 // The actual return registers are hacked in lower and the register allocator.
8462 callType = TYP_INT;
8463 }
8464#endif
8465#ifdef FEATURE_SIMD
8466 // Return a dummy node, as the return is already removed.
8467 if (varTypeIsSIMD(callType))
8468 {
8469 callType = TYP_DOUBLE;
8470 }
8471#endif
8472 result = gtNewZeroConNode(genActualType(callType));
8473 result = fgMorphTree(result);
8474 }
8475
8476 return result;
8477 }
8478
8479NO_TAIL_CALL:
8480
8481 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
8482 (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
8483#ifdef FEATURE_READYTORUN_COMPILER
8484 || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
8485#endif
8486 ) &&
8487 (call == fgMorphStmt->gtStmtExpr))
8488 {
8489 // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
8490 // Transform it into a null check.
8491
8492 GenTree* thisPtr = call->gtCallArgs->gtOp.gtOp1;
8493
8494 GenTree* nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
8495 nullCheck->gtFlags |= GTF_EXCEPT;
8496
8497 return fgMorphTree(nullCheck);
8498 }
8499
8500 noway_assert(call->gtOper == GT_CALL);
8501
8502 //
8503 // Only count calls once (only in the global morph phase)
8504 //
8505 if (fgGlobalMorph)
8506 {
8507 if (call->gtCallType == CT_INDIRECT)
8508 {
8509 optCallCount++;
8510 optIndirectCallCount++;
8511 }
8512 else if (call->gtCallType == CT_USER_FUNC)
8513 {
8514 optCallCount++;
8515 if (call->IsVirtual())
8516 {
8517 optIndirectCallCount++;
8518 }
8519 }
8520 }
8521
8522 // Couldn't inline - remember that this BB contains method calls
8523
8524 // If this is a 'regular' call, mark the basic block as
8525 // having a call (for computing full interruptibility).
8526 CLANG_FORMAT_COMMENT_ANCHOR;
8527
8528 if (IsGcSafePoint(call))
8529 {
8530 compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
8531 }
8532
8533 // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag
8534 //
8535 // We need to do these before the arguments are morphed
8536 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
8537 {
8538 // See if this is foldable
8539 GenTree* optTree = gtFoldExprCall(call);
8540
8541 // If we optimized, morph the result
8542 if (optTree != call)
8543 {
8544 return fgMorphTree(optTree);
8545 }
8546 }
8547
8548 // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
8549 GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
8550 // copy-back).
8551 unsigned retValTmpNum = BAD_VAR_NUM;
8552 CORINFO_CLASS_HANDLE structHnd = nullptr;
8553 if (call->HasRetBufArg() &&
8554 call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
8555 {
8556 // We're enforcing the invariant that return buffers pointers (at least for
8557 // struct return types containing GC pointers) are never pointers into the heap.
8558 // The large majority of cases are address of local variables, which are OK.
8559 // Otherwise, allocate a local of the given struct type, pass its address,
8560 // then assign from that into the proper destination. (We don't need to do this
8561 // if we're passing the caller's ret buff arg to the callee, since the caller's caller
8562 // will maintain the same invariant.)
8563
8564 GenTree* dest = call->gtCallArgs->gtOp.gtOp1;
8565 assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
8566 if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
8567 {
8568 // We'll exempt helper calls from this, assuming that the helper implementation
8569 // follows the old convention, and does whatever barrier is required.
8570 if (call->gtCallType != CT_HELPER)
8571 {
8572 structHnd = call->gtRetClsHnd;
8573 if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
8574 !(dest->OperGet() == GT_LCL_VAR && dest->gtLclVar.gtLclNum == info.compRetBuffArg))
8575 {
8576 origDest = dest;
8577
8578 retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
8579 lvaSetStruct(retValTmpNum, structHnd, true);
8580 dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8581 }
8582 }
8583 }
8584
8585 call->gtCallArgs->gtOp.gtOp1 = dest;
8586 }
8587
8588 /* Process the "normal" argument list */
8589 call = fgMorphArgs(call);
8590 noway_assert(call->gtOper == GT_CALL);
8591
8592 // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
8593 // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
8594 if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
8595 {
8596 GenTree* value = gtArgEntryByArgNum(call, 2)->node;
8597 if (value->IsIntegralConst(0))
8598 {
8599 assert(value->OperGet() == GT_CNS_INT);
8600
8601 GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
8602 GenTree* index = gtArgEntryByArgNum(call, 1)->node;
8603
8604 // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
8605 // the spill trees as well if necessary.
8606 GenTreeOp* argSetup = nullptr;
8607 for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
8608 {
8609 GenTree* const arg = earlyArgs->Current();
8610 if (arg->OperGet() != GT_ASG)
8611 {
8612 continue;
8613 }
8614
8615 assert(arg != arr);
8616 assert(arg != index);
8617
8618 arg->gtFlags &= ~GTF_LATE_ARG;
8619
8620 GenTree* op1 = argSetup;
8621 if (op1 == nullptr)
8622 {
8623 op1 = gtNewNothingNode();
8624#if DEBUG
8625 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8626#endif // DEBUG
8627 }
8628
8629 argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
8630
8631#if DEBUG
8632 argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8633#endif // DEBUG
8634 }
8635
8636#ifdef DEBUG
8637 auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
8638 (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
8639 return WALK_CONTINUE;
8640 };
8641
8642 fgWalkTreePost(&arr, resetMorphedFlag);
8643 fgWalkTreePost(&index, resetMorphedFlag);
8644 fgWalkTreePost(&value, resetMorphedFlag);
8645#endif // DEBUG
8646
8647 GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
8648 GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
8649 GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
8650 arrStore->gtFlags |= GTF_ASG;
8651
8652 GenTree* result = fgMorphTree(arrStore);
8653 if (argSetup != nullptr)
8654 {
8655 result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
8656#if DEBUG
8657 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8658#endif // DEBUG
8659 }
8660
8661 return result;
8662 }
8663 }
8664
8665 // Optimize get_ManagedThreadId(get_CurrentThread)
8666 if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8667 info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
8668 {
8669 noway_assert(origDest == nullptr);
8670 noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
8671
8672 GenTree* innerCall = call->gtCallLateArgs->gtOp.gtOp1;
8673
8674 if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
8675 info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
8676 CORINFO_INTRINSIC_GetCurrentManagedThread)
8677 {
8678 // substitute expression with call to helper
8679 GenTree* newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT);
8680 JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
8681 return fgMorphTree(newCall);
8682 }
8683 }
8684
8685 if (origDest != nullptr)
8686 {
8687 GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
8688 // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
8689 // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
8690 // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
8691 // be correct.
8692 if (origDest->OperGet() == GT_ASG)
8693 {
8694 if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
8695 {
8696 GenTree* var = origDest->gtOp.gtOp1;
8697 origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
8698 gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
8699 }
8700 }
8701 GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
8702 copyBlk = fgMorphTree(copyBlk);
8703 GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
8704#ifdef DEBUG
8705 result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
8706#endif
8707 return result;
8708 }
8709
8710 if (call->IsNoReturn())
8711 {
8712 //
8713 // If we know that the call does not return then we can set fgRemoveRestOfBlock
8714 // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
8715 // As a result the compiler won't need to preserve live registers across the call.
8716 //
8717 // This isn't need for tail calls as there shouldn't be any code after the call anyway.
8718 // Besides, the tail call code is part of the epilog and converting the block to
8719 // BBJ_THROW would result in the tail call being dropped as the epilog is generated
8720 // only for BBJ_RETURN blocks.
8721 //
8722 // Currently this doesn't work for non-void callees. Some of the code that handles
8723 // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
8724 // do not have this flag by default. We could add the flag here but the proper solution
8725 // would be to replace the return expression with a local var node during inlining
8726 // so the rest of the call tree stays in a separate statement. That statement can then
8727 // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
8728 //
8729
8730 if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
8731 {
8732 fgRemoveRestOfBlock = true;
8733 }
8734 }
8735
8736 return call;
8737}
8738
8739/*****************************************************************************
8740 *
8741 * Transform the given GTK_CONST tree for code generation.
8742 */
8743
8744GenTree* Compiler::fgMorphConst(GenTree* tree)
8745{
8746 assert(tree->OperKind() & GTK_CONST);
8747
8748 /* Clear any exception flags or other unnecessary flags
8749 * that may have been set before folding this node to a constant */
8750
8751 tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
8752
8753 if (tree->OperGet() != GT_CNS_STR)
8754 {
8755 return tree;
8756 }
8757
8758 // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
8759 // guarantee slow performance for that block. Instead cache the return value
8760 // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
8761
8762 if (compCurBB->bbJumpKind == BBJ_THROW)
8763 {
8764 CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
8765 if (helper != CORINFO_HELP_UNDEF)
8766 {
8767 // For un-important blocks, we want to construct the string lazily
8768
8769 GenTreeArgList* args;
8770 if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
8771 {
8772 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
8773 }
8774 else
8775 {
8776 args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
8777 gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
8778 }
8779
8780 tree = gtNewHelperCallNode(helper, TYP_REF, args);
8781 return fgMorphTree(tree);
8782 }
8783 }
8784
8785 assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
8786
8787 LPVOID pValue;
8788 InfoAccessType iat =
8789 info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
8790
8791 tree = gtNewStringLiteralNode(iat, pValue);
8792
8793 return fgMorphTree(tree);
8794}
8795
8796/*****************************************************************************
8797 *
8798 * Transform the given GTK_LEAF tree for code generation.
8799 */
8800
8801GenTree* Compiler::fgMorphLeaf(GenTree* tree)
8802{
8803 assert(tree->OperKind() & GTK_LEAF);
8804
8805 if (tree->gtOper == GT_LCL_VAR)
8806 {
8807 const bool forceRemorph = false;
8808 return fgMorphLocalVar(tree, forceRemorph);
8809 }
8810#ifdef _TARGET_X86_
8811 else if (tree->gtOper == GT_LCL_FLD)
8812 {
8813 if (info.compIsVarArgs)
8814 {
8815 GenTree* newTree =
8816 fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
8817 if (newTree != nullptr)
8818 {
8819 if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
8820 {
8821 fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
8822 }
8823 return newTree;
8824 }
8825 }
8826 }
8827#endif // _TARGET_X86_
8828 else if (tree->gtOper == GT_FTN_ADDR)
8829 {
8830 CORINFO_CONST_LOOKUP addrInfo;
8831
8832#ifdef FEATURE_READYTORUN_COMPILER
8833 if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
8834 {
8835 addrInfo = tree->gtFptrVal.gtEntryPoint;
8836 }
8837 else
8838#endif
8839 {
8840 info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
8841 }
8842
8843 // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
8844 //
8845 tree->SetOper(GT_CNS_INT);
8846 tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
8847 tree->gtFlags |= GTF_ICON_FTN_ADDR;
8848
8849 switch (addrInfo.accessType)
8850 {
8851 case IAT_PPVALUE:
8852 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8853 tree->gtFlags |= GTF_IND_INVARIANT;
8854
8855 __fallthrough;
8856
8857 case IAT_PVALUE:
8858 tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
8859 break;
8860
8861 case IAT_VALUE:
8862 tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
8863 break;
8864
8865 default:
8866 noway_assert(!"Unknown addrInfo.accessType");
8867 }
8868
8869 return fgMorphTree(tree);
8870 }
8871
8872 return tree;
8873}
8874
8875void Compiler::fgAssignSetVarDef(GenTree* tree)
8876{
8877 GenTreeLclVarCommon* lclVarCmnTree;
8878 bool isEntire = false;
8879 if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
8880 {
8881 if (isEntire)
8882 {
8883 lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
8884 }
8885 else
8886 {
8887 // We consider partial definitions to be modeled as uses followed by definitions.
8888 // This captures the idea that precedings defs are not necessarily made redundant
8889 // by this definition.
8890 lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
8891 }
8892 }
8893}
8894
8895//------------------------------------------------------------------------
8896// fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
8897//
8898// Arguments:
8899// tree - The block assignment to be possibly morphed
8900//
8901// Return Value:
8902// The modified tree if successful, nullptr otherwise.
8903//
8904// Assumptions:
8905// 'tree' must be a block assignment.
8906//
8907// Notes:
8908// If successful, this method always returns the incoming tree, modifying only
8909// its arguments.
8910
8911GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
8912{
8913 // This must be a block assignment.
8914 noway_assert(tree->OperIsBlkOp());
8915 var_types asgType = tree->TypeGet();
8916
8917 GenTree* asg = tree;
8918 GenTree* dest = asg->gtGetOp1();
8919 GenTree* src = asg->gtGetOp2();
8920 unsigned destVarNum = BAD_VAR_NUM;
8921 LclVarDsc* destVarDsc = nullptr;
8922 GenTree* destLclVarTree = nullptr;
8923 bool isCopyBlock = asg->OperIsCopyBlkOp();
8924 bool isInitBlock = !isCopyBlock;
8925
8926 unsigned size;
8927 CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
8928#ifdef FEATURE_SIMD
8929 // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic)
8930 // The SIMD type in question could be Vector2f which is 8-bytes in size.
8931 // The below check is to make sure that we don't turn that copyblk
8932 // into a assignment, since rationalizer logic will transform the
8933 // copyblk appropriately. Otherwise, the transformation made in this
8934 // routine will prevent rationalizer logic and we might end up with
8935 // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert
8936 // in codegen.
8937 // TODO-1stClassStructs: This is here to preserve old behavior.
8938 // It should be eliminated.
8939 if (src->OperIsSIMDorSimdHWintrinsic())
8940 {
8941 return nullptr;
8942 }
8943#endif
8944
8945 if (dest->gtEffectiveVal()->OperIsBlk())
8946 {
8947 GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
8948 size = lhsBlk->Size();
8949 if (impIsAddressInLocal(lhsBlk->Addr(), &destLclVarTree))
8950 {
8951 destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
8952 destVarDsc = &(lvaTable[destVarNum]);
8953 }
8954 if (lhsBlk->OperGet() == GT_OBJ)
8955 {
8956 clsHnd = lhsBlk->AsObj()->gtClass;
8957 }
8958 }
8959 else
8960 {
8961 // Is this an enregisterable struct that is already a simple assignment?
8962 // This can happen if we are re-morphing.
8963 if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
8964 {
8965 return tree;
8966 }
8967 noway_assert(dest->OperIsLocal());
8968 destLclVarTree = dest;
8969 destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
8970 destVarDsc = &(lvaTable[destVarNum]);
8971 if (isCopyBlock)
8972 {
8973 clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
8974 size = info.compCompHnd->getClassSize(clsHnd);
8975 }
8976 else
8977 {
8978 size = destVarDsc->lvExactSize;
8979 }
8980 }
8981
8982 //
8983 // See if we can do a simple transformation:
8984 //
8985 // GT_ASG <TYP_size>
8986 // / \
8987 // GT_IND GT_IND or CNS_INT
8988 // | |
8989 // [dest] [src]
8990 //
8991
8992 if (asgType == TYP_STRUCT)
8993 {
8994 if (size == REGSIZE_BYTES)
8995 {
8996 if (clsHnd == NO_CLASS_HANDLE)
8997 {
8998 // A register-sized cpblk can be treated as an integer asignment.
8999 asgType = TYP_I_IMPL;
9000 }
9001 else
9002 {
9003 BYTE gcPtr;
9004 info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
9005 asgType = getJitGCType(gcPtr);
9006 }
9007 }
9008 else
9009 {
9010 switch (size)
9011 {
9012 case 1:
9013 asgType = TYP_BYTE;
9014 break;
9015 case 2:
9016 asgType = TYP_SHORT;
9017 break;
9018
9019#ifdef _TARGET_64BIT_
9020 case 4:
9021 asgType = TYP_INT;
9022 break;
9023#endif // _TARGET_64BIT_
9024 }
9025 }
9026 }
9027
9028 if ((destVarDsc != nullptr) && varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted)
9029 {
9030 // Let fgMorphCopyBlock handle it.
9031 return nullptr;
9032 }
9033
9034 GenTree* srcLclVarTree = nullptr;
9035 LclVarDsc* srcVarDsc = nullptr;
9036 if (isCopyBlock)
9037 {
9038 if (src->OperGet() == GT_LCL_VAR)
9039 {
9040 srcLclVarTree = src;
9041 srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
9042 }
9043 else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &srcLclVarTree))
9044 {
9045 srcVarDsc = &(lvaTable[srcLclVarTree->AsLclVarCommon()->gtLclNum]);
9046 }
9047 if ((srcVarDsc != nullptr) && varTypeIsStruct(srcLclVarTree) && srcVarDsc->lvPromoted)
9048 {
9049 // Let fgMorphCopyBlock handle it.
9050 return nullptr;
9051 }
9052 }
9053
9054 if (asgType != TYP_STRUCT)
9055 {
9056 noway_assert((size <= REGSIZE_BYTES) || varTypeIsSIMD(asgType));
9057
9058 // For initBlk, a non constant source is not going to allow us to fiddle
9059 // with the bits to create a single assigment.
9060 // Nor do we (for now) support transforming an InitBlock of SIMD type.
9061 if (isInitBlock && (!src->IsConstInitVal() || varTypeIsSIMD(asgType)))
9062 {
9063 return nullptr;
9064 }
9065
9066 if (destVarDsc != nullptr)
9067 {
9068#if LOCAL_ASSERTION_PROP
9069 // Kill everything about dest
9070 if (optLocalAssertionProp)
9071 {
9072 if (optAssertionCount > 0)
9073 {
9074 fgKillDependentAssertions(destVarNum DEBUGARG(tree));
9075 }
9076 }
9077#endif // LOCAL_ASSERTION_PROP
9078
9079 // A previous incarnation of this code also required the local not to be
9080 // address-exposed(=taken). That seems orthogonal to the decision of whether
9081 // to do field-wise assignments: being address-exposed will cause it to be
9082 // "dependently" promoted, so it will be in the right memory location. One possible
9083 // further reason for avoiding field-wise stores is that the struct might have alignment-induced
9084 // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
9085 // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
9086 // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
9087 if (varTypeIsStruct(destLclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
9088 {
9089 // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
9090 return nullptr;
9091 }
9092 else if (!varTypeIsFloating(destLclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
9093 {
9094 // Use the dest local var directly, as well as its type.
9095 dest = destLclVarTree;
9096 asgType = destVarDsc->lvType;
9097
9098 // If the block operation had been a write to a local var of a small int type,
9099 // of the exact size of the small int type, and the var is NormalizeOnStore,
9100 // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
9101 // have done that normalization. If we're now making it into an assignment,
9102 // the NormalizeOnStore will work, and it can be a full def.
9103 if (destVarDsc->lvNormalizeOnStore())
9104 {
9105 dest->gtFlags &= (~GTF_VAR_USEASG);
9106 }
9107 }
9108 else
9109 {
9110 // Could be a non-promoted struct, or a floating point type local, or
9111 // an int subject to a partial write. Don't enregister.
9112 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
9113
9114 // Mark the local var tree as a definition point of the local.
9115 destLclVarTree->gtFlags |= GTF_VAR_DEF;
9116 if (size < destVarDsc->lvExactSize)
9117 { // If it's not a full-width assignment....
9118 destLclVarTree->gtFlags |= GTF_VAR_USEASG;
9119 }
9120
9121 if (dest == destLclVarTree)
9122 {
9123 dest = gtNewIndir(asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
9124 }
9125 }
9126 }
9127
9128 // Check to ensure we don't have a reducible *(& ... )
9129 if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
9130 {
9131 // If dest is an Indir or Block, and it has a child that is a Addr node
9132 //
9133 GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR
9134
9135 // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'?
9136 //
9137 GenTree* destOp = addrNode->gtGetOp1();
9138 var_types destOpType = destOp->TypeGet();
9139
9140 // We can if we have a primitive integer type and the sizes are exactly the same.
9141 //
9142 if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType))))
9143 {
9144 dest = destOp;
9145 asgType = destOpType;
9146 }
9147 }
9148
9149 if (dest->gtEffectiveVal()->OperIsIndir())
9150 {
9151 // If we have no information about the destination, we have to assume it could
9152 // live anywhere (not just in the GC heap).
9153 // Mark the GT_IND node so that we use the correct write barrier helper in case
9154 // the field is a GC ref.
9155
9156 if (!fgIsIndirOfAddrOfLocal(dest))
9157 {
9158 dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9159 tree->gtFlags |= GTF_GLOB_REF;
9160 }
9161
9162 dest->gtFlags &= (~GTF_EXCEPT | dest->AsIndir()->Addr()->gtFlags);
9163 dest->SetIndirExceptionFlags(this);
9164 tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT);
9165 }
9166
9167 if (isCopyBlock)
9168 {
9169 if (srcVarDsc != nullptr)
9170 {
9171 // Handled above.
9172 assert(!varTypeIsStruct(srcLclVarTree) || !srcVarDsc->lvPromoted);
9173 if (!varTypeIsFloating(srcLclVarTree->TypeGet()) &&
9174 size == genTypeSize(genActualType(srcLclVarTree->TypeGet())))
9175 {
9176 // Use the src local var directly.
9177 src = srcLclVarTree;
9178 }
9179 else
9180 {
9181 // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar))
9182 // or indir(lclVarAddr) in rational form and liveness won't account for these uses. That said,
9183 // we have to mark this local as address exposed so we don't delete it as a dead store later on.
9184 unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum;
9185 lvaTable[lclVarNum].lvAddrExposed = true;
9186 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
9187 GenTree* srcAddr;
9188 if (src == srcLclVarTree)
9189 {
9190 srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
9191 src = gtNewOperNode(GT_IND, asgType, srcAddr);
9192 }
9193 else
9194 {
9195 assert(src->OperIsIndir());
9196 }
9197 }
9198 }
9199
9200 if (src->OperIsIndir())
9201 {
9202 if (!fgIsIndirOfAddrOfLocal(src))
9203 {
9204 // If we have no information about the src, we have to assume it could
9205 // live anywhere (not just in the GC heap).
9206 // Mark the GT_IND node so that we use the correct write barrier helper in case
9207 // the field is a GC ref.
9208 src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
9209 }
9210
9211 src->gtFlags &= (~GTF_EXCEPT | src->AsIndir()->Addr()->gtFlags);
9212 src->SetIndirExceptionFlags(this);
9213 }
9214 }
9215 else
9216 {
9217// InitBlk
9218#if FEATURE_SIMD
9219 if (varTypeIsSIMD(asgType))
9220 {
9221 assert(!isCopyBlock); // Else we would have returned the tree above.
9222 noway_assert(src->IsIntegralConst(0));
9223 noway_assert(destVarDsc != nullptr);
9224
9225 src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
9226 tree->gtOp.gtOp2 = src;
9227 return tree;
9228 }
9229 else
9230#endif
9231 {
9232 if (src->OperIsInitVal())
9233 {
9234 src = src->gtGetOp1();
9235 }
9236 assert(src->IsCnsIntOrI());
9237 // This will mutate the integer constant, in place, to be the correct
9238 // value for the type we are using in the assignment.
9239 src->AsIntCon()->FixupInitBlkValue(asgType);
9240 }
9241 }
9242
9243 // Ensure that the dest is setup appropriately.
9244 if (dest->gtEffectiveVal()->OperIsIndir())
9245 {
9246 dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
9247 }
9248
9249 // Ensure that the rhs is setup appropriately.
9250 if (isCopyBlock)
9251 {
9252 src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
9253 }
9254
9255 // Set the lhs and rhs on the assignment.
9256 if (dest != tree->gtOp.gtOp1)
9257 {
9258 asg->gtOp.gtOp1 = dest;
9259 }
9260 if (src != asg->gtOp.gtOp2)
9261 {
9262 asg->gtOp.gtOp2 = src;
9263 }
9264
9265 asg->ChangeType(asgType);
9266 dest->gtFlags |= GTF_DONT_CSE;
9267 asg->gtFlags &= ~GTF_EXCEPT;
9268 asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
9269 // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
9270 asg->gtFlags &= ~GTF_REVERSE_OPS;
9271
9272#ifdef DEBUG
9273 if (verbose)
9274 {
9275 printf("fgMorphOneAsgBlock (after):\n");
9276 gtDispTree(tree);
9277 }
9278#endif
9279 return tree;
9280 }
9281
9282 return nullptr;
9283}
9284
9285//------------------------------------------------------------------------
9286// fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
9287//
9288// Arguments:
9289// tree - a tree node with a gtOper of GT_INITBLK
9290// the child nodes for tree have already been Morphed
9291//
9292// Return Value:
9293// We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
9294// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
9295// If we have performed struct promotion of the Dest() then we will try to
9296// perform a field by field assignment for each of the promoted struct fields
9297//
9298// Notes:
9299// If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
9300// if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9301// can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
9302
9303GenTree* Compiler::fgMorphInitBlock(GenTree* tree)
9304{
9305 // We must have the GT_ASG form of InitBlkOp.
9306 noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
9307#ifdef DEBUG
9308 bool morphed = false;
9309#endif // DEBUG
9310
9311 GenTree* asg = tree;
9312 GenTree* src = tree->gtGetOp2();
9313 GenTree* origDest = tree->gtGetOp1();
9314
9315 GenTree* dest = fgMorphBlkNode(origDest, true);
9316 if (dest != origDest)
9317 {
9318 tree->gtOp.gtOp1 = dest;
9319 }
9320 tree->gtType = dest->TypeGet();
9321 // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
9322 // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
9323 if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
9324 {
9325 src->gtType = TYP_INT;
9326 }
9327 JITDUMP("\nfgMorphInitBlock:");
9328
9329 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
9330 if (oneAsgTree)
9331 {
9332 JITDUMP(" using oneAsgTree.\n");
9333 tree = oneAsgTree;
9334 }
9335 else
9336 {
9337 GenTree* destAddr = nullptr;
9338 GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
9339 GenTree* blockSize = nullptr;
9340 unsigned blockWidth = 0;
9341 FieldSeqNode* destFldSeq = nullptr;
9342 LclVarDsc* destLclVar = nullptr;
9343 bool destDoFldAsg = false;
9344 unsigned destLclNum = BAD_VAR_NUM;
9345 bool blockWidthIsConst = false;
9346 GenTreeLclVarCommon* lclVarTree = nullptr;
9347 if (dest->IsLocal())
9348 {
9349 lclVarTree = dest->AsLclVarCommon();
9350 }
9351 else
9352 {
9353 if (dest->OperIsBlk())
9354 {
9355 destAddr = dest->AsBlk()->Addr();
9356 blockWidth = dest->AsBlk()->gtBlkSize;
9357 }
9358 else
9359 {
9360 assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
9361 destAddr = dest->gtGetOp1();
9362 blockWidth = genTypeSize(dest->TypeGet());
9363 }
9364 }
9365 if (lclVarTree != nullptr)
9366 {
9367 destLclNum = lclVarTree->gtLclNum;
9368 destLclVar = &lvaTable[destLclNum];
9369 blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
9370 blockWidthIsConst = true;
9371 }
9372 else
9373 {
9374 if (dest->gtOper == GT_DYN_BLK)
9375 {
9376 // The size must be an integer type
9377 blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
9378 assert(varTypeIsIntegral(blockSize->gtType));
9379 }
9380 else
9381 {
9382 assert(blockWidth != 0);
9383 blockWidthIsConst = true;
9384 }
9385
9386 if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
9387 {
9388 destLclNum = lclVarTree->gtLclNum;
9389 destLclVar = &lvaTable[destLclNum];
9390 }
9391 }
9392 if (destLclNum != BAD_VAR_NUM)
9393 {
9394#if LOCAL_ASSERTION_PROP
9395 // Kill everything about destLclNum (and its field locals)
9396 if (optLocalAssertionProp)
9397 {
9398 if (optAssertionCount > 0)
9399 {
9400 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
9401 }
9402 }
9403#endif // LOCAL_ASSERTION_PROP
9404
9405 if (destLclVar->lvPromoted && blockWidthIsConst)
9406 {
9407 assert(initVal->OperGet() == GT_CNS_INT);
9408 noway_assert(varTypeIsStruct(destLclVar));
9409 noway_assert(!opts.MinOpts());
9410 if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
9411 {
9412 JITDUMP(" dest is address exposed");
9413 }
9414 else
9415 {
9416 if (blockWidth == destLclVar->lvExactSize)
9417 {
9418 JITDUMP(" (destDoFldAsg=true)");
9419 // We may decide later that a copyblk is required when this struct has holes
9420 destDoFldAsg = true;
9421 }
9422 else
9423 {
9424 JITDUMP(" with mismatched size");
9425 }
9426 }
9427 }
9428 }
9429
9430 // Can we use field by field assignment for the dest?
9431 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
9432 {
9433 JITDUMP(" dest contains holes");
9434 destDoFldAsg = false;
9435 }
9436
9437 JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
9438
9439 // If we're doing an InitBlock and we've transformed the dest to a non-Blk
9440 // we need to change it back.
9441 if (!destDoFldAsg && !dest->OperIsBlk())
9442 {
9443 noway_assert(blockWidth != 0);
9444 tree->gtOp.gtOp1 = origDest;
9445 tree->gtType = origDest->gtType;
9446 }
9447
9448 if (!destDoFldAsg && (destLclVar != nullptr))
9449 {
9450 // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
9451 if (!destLclVar->lvRegStruct)
9452 {
9453 // Mark it as DoNotEnregister.
9454 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
9455 }
9456 }
9457
9458 // Mark the dest struct as DoNotEnreg
9459 // when they are LclVar structs and we are using a CopyBlock
9460 // or the struct is not promoted
9461 //
9462 if (!destDoFldAsg)
9463 {
9464 dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
9465 tree->gtOp.gtOp1 = dest;
9466 tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
9467 }
9468 else
9469 {
9470 // The initVal must be a constant of TYP_INT
9471 noway_assert(initVal->OperGet() == GT_CNS_INT);
9472 noway_assert(genActualType(initVal->gtType) == TYP_INT);
9473
9474 // The dest must be of a struct type.
9475 noway_assert(varTypeIsStruct(destLclVar));
9476
9477 //
9478 // Now, convert InitBlock to individual assignments
9479 //
9480
9481 tree = nullptr;
9482 INDEBUG(morphed = true);
9483
9484 GenTree* dest;
9485 GenTree* srcCopy;
9486 unsigned fieldLclNum;
9487 unsigned fieldCnt = destLclVar->lvFieldCnt;
9488
9489 for (unsigned i = 0; i < fieldCnt; ++i)
9490 {
9491 fieldLclNum = destLclVar->lvFieldLclStart + i;
9492 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
9493
9494 noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
9495 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
9496 dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
9497
9498 srcCopy = gtCloneExpr(initVal);
9499 noway_assert(srcCopy != nullptr);
9500
9501 // need type of oper to be same as tree
9502 if (dest->gtType == TYP_LONG)
9503 {
9504 srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
9505 // copy and extend the value
9506 srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
9507 /* Change the types of srcCopy to TYP_LONG */
9508 srcCopy->gtType = TYP_LONG;
9509 }
9510 else if (varTypeIsFloating(dest->gtType))
9511 {
9512 srcCopy->ChangeOperConst(GT_CNS_DBL);
9513 // setup the bit pattern
9514 memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
9515 sizeof(srcCopy->gtDblCon.gtDconVal));
9516 /* Change the types of srcCopy to TYP_DOUBLE */
9517 srcCopy->gtType = TYP_DOUBLE;
9518 }
9519 else
9520 {
9521 noway_assert(srcCopy->gtOper == GT_CNS_INT);
9522 noway_assert(srcCopy->TypeGet() == TYP_INT);
9523 // setup the bit pattern
9524 memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
9525 sizeof(srcCopy->gtIntCon.gtIconVal));
9526 }
9527
9528 srcCopy->gtType = dest->TypeGet();
9529
9530 asg = gtNewAssignNode(dest, srcCopy);
9531
9532#if LOCAL_ASSERTION_PROP
9533 if (optLocalAssertionProp)
9534 {
9535 optAssertionGen(asg);
9536 }
9537#endif // LOCAL_ASSERTION_PROP
9538
9539 if (tree)
9540 {
9541 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
9542 }
9543 else
9544 {
9545 tree = asg;
9546 }
9547 }
9548 }
9549 }
9550
9551#ifdef DEBUG
9552 if (morphed)
9553 {
9554 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9555
9556 if (verbose)
9557 {
9558 printf("fgMorphInitBlock (after):\n");
9559 gtDispTree(tree);
9560 }
9561 }
9562#endif
9563
9564 return tree;
9565}
9566
9567//------------------------------------------------------------------------
9568// fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
9569//
9570// Arguments:
9571// tree - the node to be modified.
9572// type - the type of indirection to change it to.
9573//
9574// Return Value:
9575// Returns the node, modified in place.
9576//
9577// Notes:
9578// This doesn't really warrant a separate method, but is here to abstract
9579// the fact that these nodes can be modified in-place.
9580
9581GenTree* Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
9582{
9583 tree->SetOper(GT_IND);
9584 tree->gtType = type;
9585 return tree;
9586}
9587
9588//------------------------------------------------------------------------
9589// fgMorphGetStructAddr: Gets the address of a struct object
9590//
9591// Arguments:
9592// pTree - the parent's pointer to the struct object node
9593// clsHnd - the class handle for the struct type
9594// isRValue - true if this is a source (not dest)
9595//
9596// Return Value:
9597// Returns the address of the struct value, possibly modifying the existing tree to
9598// sink the address below any comma nodes (this is to canonicalize for value numbering).
9599// If this is a source, it will morph it to an GT_IND before taking its address,
9600// since it may not be remorphed (and we don't want blk nodes as rvalues).
9601
9602GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
9603{
9604 GenTree* addr;
9605 GenTree* tree = *pTree;
9606 // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
9607 // need to hang onto that for the purposes of value numbering.
9608 if (tree->OperIsIndir())
9609 {
9610 if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
9611 {
9612 addr = tree->gtOp.gtOp1;
9613 }
9614 else
9615 {
9616 if (isRValue && tree->OperIsBlk())
9617 {
9618 tree->ChangeOper(GT_IND);
9619 }
9620 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9621 }
9622 }
9623 else if (tree->gtOper == GT_COMMA)
9624 {
9625 // If this is a comma, we're going to "sink" the GT_ADDR below it.
9626 (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
9627 tree->gtType = TYP_BYREF;
9628 addr = tree;
9629 }
9630 else
9631 {
9632 switch (tree->gtOper)
9633 {
9634 case GT_LCL_FLD:
9635 case GT_LCL_VAR:
9636 case GT_INDEX:
9637 case GT_FIELD:
9638 case GT_ARR_ELEM:
9639 addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
9640 break;
9641 case GT_INDEX_ADDR:
9642 addr = tree;
9643 break;
9644 default:
9645 {
9646 // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
9647 // not going to use "temp"
9648 GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
9649 addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
9650 break;
9651 }
9652 }
9653 }
9654 *pTree = addr;
9655 return addr;
9656}
9657
9658//------------------------------------------------------------------------
9659// fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
9660//
9661// Arguments:
9662// tree - The struct type node
9663// isDest - True if this is the destination of the assignment
9664//
9665// Return Value:
9666// Returns the possibly-morphed node. The caller is responsible for updating
9667// the parent of this node..
9668
9669GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest)
9670{
9671 GenTree* handleTree = nullptr;
9672 GenTree* addr = nullptr;
9673 if (tree->OperIs(GT_COMMA))
9674 {
9675 // In order to CSE and value number array index expressions and bounds checks,
9676 // the commas in which they are contained need to match.
9677 // The pattern is that the COMMA should be the address expression.
9678 // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
9679 // TODO-1stClassStructs: Consider whether this can be improved.
9680 // Also consider whether some of this can be included in gtNewBlockVal (though note
9681 // that doing so may cause us to query the type system before we otherwise would).
9682 // Example:
9683 // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct
9684 // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct
9685
9686 addr = tree;
9687 GenTree* effectiveVal = tree->gtEffectiveVal();
9688
9689 GenTreePtrStack commas(getAllocator(CMK_ArrayStack));
9690 for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2())
9691 {
9692 commas.Push(comma);
9693 }
9694
9695 GenTree* lastComma = commas.Top();
9696 noway_assert(lastComma->gtGetOp2() == effectiveVal);
9697 GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9698#ifdef DEBUG
9699 effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9700#endif
9701 lastComma->gtOp.gtOp2 = effectiveValAddr;
9702
9703 while (!commas.Empty())
9704 {
9705 GenTree* comma = commas.Pop();
9706 comma->gtType = TYP_BYREF;
9707 gtUpdateNodeSideEffects(comma);
9708 }
9709
9710 handleTree = effectiveVal;
9711 }
9712 else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR))
9713 {
9714 handleTree = tree;
9715 addr = tree->AsIndir()->Addr();
9716 }
9717
9718 if (addr != nullptr)
9719 {
9720 var_types structType = handleTree->TypeGet();
9721 if (structType == TYP_STRUCT)
9722 {
9723 CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree);
9724 if (structHnd == NO_CLASS_HANDLE)
9725 {
9726 tree = gtNewOperNode(GT_IND, structType, addr);
9727 }
9728 else
9729 {
9730 tree = gtNewObjNode(structHnd, addr);
9731 if (tree->OperGet() == GT_OBJ)
9732 {
9733 gtSetObjGcInfo(tree->AsObj());
9734 }
9735 }
9736 }
9737 else
9738 {
9739 tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
9740 }
9741
9742 gtUpdateNodeSideEffects(tree);
9743#ifdef DEBUG
9744 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
9745#endif
9746 }
9747
9748 if (!tree->OperIsBlk())
9749 {
9750 return tree;
9751 }
9752 GenTreeBlk* blkNode = tree->AsBlk();
9753 if (blkNode->OperGet() == GT_DYN_BLK)
9754 {
9755 if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
9756 {
9757 unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
9758 // A GT_BLK with size of zero is not supported,
9759 // so if we encounter such a thing we just leave it as a GT_DYN_BLK
9760 if (size != 0)
9761 {
9762 blkNode->AsDynBlk()->gtDynamicSize = nullptr;
9763 blkNode->ChangeOper(GT_BLK);
9764 blkNode->gtBlkSize = size;
9765 }
9766 else
9767 {
9768 return tree;
9769 }
9770 }
9771 else
9772 {
9773 return tree;
9774 }
9775 }
9776 if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
9777 (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
9778 {
9779 GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
9780 if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
9781 {
9782 lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
9783 }
9784 }
9785
9786 return tree;
9787}
9788
9789//------------------------------------------------------------------------
9790// fgMorphBlockOperand: Canonicalize an operand of a block assignment
9791//
9792// Arguments:
9793// tree - The block operand
9794// asgType - The type of the assignment
9795// blockWidth - The size of the block
9796// isDest - true iff this is the destination of the assignment
9797//
9798// Return Value:
9799// Returns the morphed block operand
9800//
9801// Notes:
9802// This does the following:
9803// - Ensures that a struct operand is a block node or lclVar.
9804// - Ensures that any COMMAs are above ADDR nodes.
9805// Although 'tree' WAS an operand of a block assignment, the assignment
9806// may have been retyped to be a scalar assignment.
9807
9808GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
9809{
9810 GenTree* effectiveVal = tree->gtEffectiveVal();
9811
9812 if (asgType != TYP_STRUCT)
9813 {
9814 if (effectiveVal->OperIsIndir())
9815 {
9816 GenTree* addr = effectiveVal->AsIndir()->Addr();
9817 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
9818 {
9819 effectiveVal = addr->gtGetOp1();
9820 }
9821 else if (effectiveVal->OperIsBlk())
9822 {
9823 effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9824 }
9825 else
9826 {
9827 effectiveVal->gtType = asgType;
9828 }
9829 }
9830 else if (effectiveVal->TypeGet() != asgType)
9831 {
9832 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9833 effectiveVal = gtNewIndir(asgType, addr);
9834 }
9835 }
9836 else
9837 {
9838 GenTreeIndir* indirTree = nullptr;
9839 GenTreeLclVarCommon* lclNode = nullptr;
9840 bool needsIndirection = true;
9841
9842 if (effectiveVal->OperIsIndir())
9843 {
9844 indirTree = effectiveVal->AsIndir();
9845 GenTree* addr = effectiveVal->AsIndir()->Addr();
9846 if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
9847 {
9848 lclNode = addr->gtGetOp1()->AsLclVarCommon();
9849 }
9850 }
9851 else if (effectiveVal->OperGet() == GT_LCL_VAR)
9852 {
9853 lclNode = effectiveVal->AsLclVarCommon();
9854 }
9855#ifdef FEATURE_SIMD
9856 if (varTypeIsSIMD(asgType))
9857 {
9858 if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
9859 (indirTree->Addr()->gtGetOp1()->OperIsSIMDorSimdHWintrinsic()))
9860 {
9861 assert(!isDest);
9862 needsIndirection = false;
9863 effectiveVal = indirTree->Addr()->gtGetOp1();
9864 }
9865 if (effectiveVal->OperIsSIMDorSimdHWintrinsic())
9866 {
9867 needsIndirection = false;
9868 }
9869 }
9870#endif // FEATURE_SIMD
9871 if (lclNode != nullptr)
9872 {
9873 LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
9874 if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType))
9875 {
9876 if (effectiveVal != lclNode)
9877 {
9878 JITDUMP("Replacing block node [%06d] with lclVar V%02u\n", dspTreeID(tree), lclNode->gtLclNum);
9879 effectiveVal = lclNode;
9880 }
9881 needsIndirection = false;
9882 }
9883 else
9884 {
9885 // This may be a lclVar that was determined to be address-exposed.
9886 effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
9887 }
9888 }
9889 if (needsIndirection)
9890 {
9891 if (indirTree != nullptr)
9892 {
9893 // We should never find a struct indirection on the lhs of an assignment.
9894 assert(!isDest || indirTree->OperIsBlk());
9895 if (!isDest && indirTree->OperIsBlk())
9896 {
9897 (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
9898 }
9899 }
9900 else
9901 {
9902 GenTree* newTree;
9903 GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
9904 if (isDest)
9905 {
9906 CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
9907 if (clsHnd == NO_CLASS_HANDLE)
9908 {
9909 newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
9910 }
9911 else
9912 {
9913 newTree = gtNewObjNode(clsHnd, addr);
9914 if (isDest && (newTree->OperGet() == GT_OBJ))
9915 {
9916 gtSetObjGcInfo(newTree->AsObj());
9917 }
9918 if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
9919 {
9920 // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
9921 // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
9922 // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
9923 // separately now to avoid excess diffs.
9924 newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
9925 }
9926 }
9927 }
9928 else
9929 {
9930 newTree = gtNewIndir(asgType, addr);
9931 }
9932 effectiveVal = newTree;
9933 }
9934 }
9935 }
9936 tree = effectiveVal;
9937 return tree;
9938}
9939
9940//------------------------------------------------------------------------
9941// fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
9942//
9943// Arguments:
9944// dest - the GT_OBJ or GT_STORE_OBJ
9945//
9946// Assumptions:
9947// The destination must be known (by the caller) to be on the stack.
9948//
9949// Notes:
9950// If we have a CopyObj with a dest on the stack, and its size is small enough
9951// to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
9952// GC Unsafe CopyBlk that is non-interruptible.
9953// This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
9954//
9955void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
9956{
9957#if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9958 assert(dest->gtGcPtrCount != 0);
9959 unsigned blockWidth = dest->AsBlk()->gtBlkSize;
9960#ifdef DEBUG
9961 bool destOnStack = false;
9962 GenTree* destAddr = dest->Addr();
9963 assert(destAddr->IsLocalAddrExpr() != nullptr);
9964#endif
9965 if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
9966 {
9967 genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
9968 dest->SetOper(newOper);
9969 dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
9970 }
9971#endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
9972}
9973
9974//------------------------------------------------------------------------
9975// fgMorphCopyBlock: Perform the Morphing of block copy
9976//
9977// Arguments:
9978// tree - a block copy (i.e. an assignment with a block op on the lhs).
9979//
9980// Return Value:
9981// We can return the orginal block copy unmodified (least desirable, but always correct)
9982// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
9983// If we have performed struct promotion of the Source() or the Dest() then we will try to
9984// perform a field by field assignment for each of the promoted struct fields.
9985//
9986// Assumptions:
9987// The child nodes for tree have already been Morphed.
9988//
9989// Notes:
9990// If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
9991// When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
9992// and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
9993// if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
9994// can not use a field by field assignment and must leave the orginal block copy unmodified.
9995
9996GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
9997{
9998 noway_assert(tree->OperIsCopyBlkOp());
9999
10000 JITDUMP("\nfgMorphCopyBlock:");
10001
10002 bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
10003
10004 GenTree* asg = tree;
10005 GenTree* rhs = asg->gtGetOp2();
10006 GenTree* dest = asg->gtGetOp1();
10007
10008#if FEATURE_MULTIREG_RET
10009 // If this is a multi-reg return, we will not do any morphing of this node.
10010 if (rhs->IsMultiRegCall())
10011 {
10012 assert(dest->OperGet() == GT_LCL_VAR);
10013 JITDUMP(" not morphing a multireg call return\n");
10014 return tree;
10015 }
10016#endif // FEATURE_MULTIREG_RET
10017
10018 // If we have an array index on the lhs, we need to create an obj node.
10019
10020 dest = fgMorphBlkNode(dest, true);
10021 if (dest != asg->gtGetOp1())
10022 {
10023 asg->gtOp.gtOp1 = dest;
10024 if (dest->IsLocal())
10025 {
10026 dest->gtFlags |= GTF_VAR_DEF;
10027 }
10028 }
10029 asg->gtType = dest->TypeGet();
10030 rhs = fgMorphBlkNode(rhs, false);
10031
10032 asg->gtOp.gtOp2 = rhs;
10033
10034 GenTree* oldTree = tree;
10035 GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree);
10036
10037 if (oneAsgTree)
10038 {
10039 JITDUMP(" using oneAsgTree.\n");
10040 tree = oneAsgTree;
10041 }
10042 else
10043 {
10044 unsigned blockWidth;
10045 bool blockWidthIsConst = false;
10046 GenTreeLclVarCommon* lclVarTree = nullptr;
10047 GenTreeLclVarCommon* srcLclVarTree = nullptr;
10048 unsigned destLclNum = BAD_VAR_NUM;
10049 LclVarDsc* destLclVar = nullptr;
10050 FieldSeqNode* destFldSeq = nullptr;
10051 bool destDoFldAsg = false;
10052 GenTree* destAddr = nullptr;
10053 GenTree* srcAddr = nullptr;
10054 bool destOnStack = false;
10055 bool hasGCPtrs = false;
10056
10057 JITDUMP("block assignment to morph:\n");
10058 DISPTREE(asg);
10059
10060 if (dest->IsLocal())
10061 {
10062 blockWidthIsConst = true;
10063 destOnStack = true;
10064 if (dest->gtOper == GT_LCL_VAR)
10065 {
10066 lclVarTree = dest->AsLclVarCommon();
10067 destLclNum = lclVarTree->gtLclNum;
10068 destLclVar = &lvaTable[destLclNum];
10069 if (destLclVar->lvType == TYP_STRUCT)
10070 {
10071 // It would be nice if lvExactSize always corresponded to the size of the struct,
10072 // but it doesn't always for the temps that the importer creates when it spills side
10073 // effects.
10074 // TODO-Cleanup: Determine when this happens, and whether it can be changed.
10075 blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
10076 }
10077 else
10078 {
10079 blockWidth = genTypeSize(destLclVar->lvType);
10080 }
10081 hasGCPtrs = destLclVar->lvStructGcCount != 0;
10082 }
10083 else
10084 {
10085 assert(dest->TypeGet() != TYP_STRUCT);
10086 assert(dest->gtOper == GT_LCL_FLD);
10087 blockWidth = genTypeSize(dest->TypeGet());
10088 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10089 destFldSeq = dest->AsLclFld()->gtFieldSeq;
10090 }
10091 }
10092 else
10093 {
10094 GenTree* effectiveDest = dest->gtEffectiveVal();
10095 if (effectiveDest->OperGet() == GT_IND)
10096 {
10097 assert(dest->TypeGet() != TYP_STRUCT);
10098 blockWidth = genTypeSize(effectiveDest->TypeGet());
10099 blockWidthIsConst = true;
10100 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10101 {
10102 destAddr = dest->gtGetOp1();
10103 }
10104 }
10105 else
10106 {
10107 assert(effectiveDest->OperIsBlk());
10108 GenTreeBlk* blk = effectiveDest->AsBlk();
10109
10110 blockWidth = blk->gtBlkSize;
10111 blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
10112 if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
10113 {
10114 destAddr = blk->Addr();
10115 }
10116 }
10117 if (destAddr != nullptr)
10118 {
10119 noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
10120 if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
10121 {
10122 destOnStack = true;
10123 destLclNum = lclVarTree->gtLclNum;
10124 destLclVar = &lvaTable[destLclNum];
10125 }
10126 }
10127 }
10128
10129 if (destLclVar != nullptr)
10130 {
10131#if LOCAL_ASSERTION_PROP
10132 // Kill everything about destLclNum (and its field locals)
10133 if (optLocalAssertionProp)
10134 {
10135 if (optAssertionCount > 0)
10136 {
10137 fgKillDependentAssertions(destLclNum DEBUGARG(tree));
10138 }
10139 }
10140#endif // LOCAL_ASSERTION_PROP
10141
10142 if (destLclVar->lvPromoted && blockWidthIsConst)
10143 {
10144 noway_assert(varTypeIsStruct(destLclVar));
10145 noway_assert(!opts.MinOpts());
10146
10147 if (blockWidth == destLclVar->lvExactSize)
10148 {
10149 JITDUMP(" (destDoFldAsg=true)");
10150 // We may decide later that a copyblk is required when this struct has holes
10151 destDoFldAsg = true;
10152 }
10153 else
10154 {
10155 JITDUMP(" with mismatched dest size");
10156 }
10157 }
10158 }
10159
10160 FieldSeqNode* srcFldSeq = nullptr;
10161 unsigned srcLclNum = BAD_VAR_NUM;
10162 LclVarDsc* srcLclVar = nullptr;
10163 bool srcDoFldAsg = false;
10164
10165 if (rhs->IsLocal())
10166 {
10167 srcLclVarTree = rhs->AsLclVarCommon();
10168 srcLclNum = srcLclVarTree->gtLclNum;
10169 if (rhs->OperGet() == GT_LCL_FLD)
10170 {
10171 srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
10172 }
10173 }
10174 else if (rhs->OperIsIndir())
10175 {
10176 if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
10177 {
10178 srcLclNum = srcLclVarTree->gtLclNum;
10179 }
10180 else
10181 {
10182 srcAddr = rhs->gtOp.gtOp1;
10183 }
10184 }
10185
10186 if (srcLclNum != BAD_VAR_NUM)
10187 {
10188 srcLclVar = &lvaTable[srcLclNum];
10189
10190 if (srcLclVar->lvPromoted && blockWidthIsConst)
10191 {
10192 noway_assert(varTypeIsStruct(srcLclVar));
10193 noway_assert(!opts.MinOpts());
10194
10195 if (blockWidth == srcLclVar->lvExactSize)
10196 {
10197 JITDUMP(" (srcDoFldAsg=true)");
10198 // We may decide later that a copyblk is required when this struct has holes
10199 srcDoFldAsg = true;
10200 }
10201 else
10202 {
10203 JITDUMP(" with mismatched src size");
10204 }
10205 }
10206 }
10207
10208 // Check to see if we are doing a copy to/from the same local block.
10209 // If so, morph it to a nop.
10210 if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq) &&
10211 destFldSeq != FieldSeqStore::NotAField())
10212 {
10213 JITDUMP("Self-copy; replaced with a NOP.\n");
10214 GenTree* nop = gtNewNothingNode();
10215 INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
10216 return nop;
10217 }
10218
10219 // Check to see if we are required to do a copy block because the struct contains holes
10220 // and either the src or dest is externally visible
10221 //
10222 bool requiresCopyBlock = false;
10223 bool srcSingleLclVarAsg = false;
10224 bool destSingleLclVarAsg = false;
10225
10226 // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
10227 if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
10228 {
10229 requiresCopyBlock = true;
10230 }
10231
10232 // Can we use field by field assignment for the dest?
10233 if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
10234 {
10235 JITDUMP(" dest contains custom layout and contains holes");
10236 // C++ style CopyBlock with holes
10237 requiresCopyBlock = true;
10238 }
10239
10240 // Can we use field by field assignment for the src?
10241 if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
10242 {
10243 JITDUMP(" src contains custom layout and contains holes");
10244 // C++ style CopyBlock with holes
10245 requiresCopyBlock = true;
10246 }
10247
10248#if defined(_TARGET_ARM_)
10249 if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
10250 {
10251 JITDUMP(" rhs is unaligned");
10252 requiresCopyBlock = true;
10253 }
10254
10255 if (asg->gtFlags & GTF_BLK_UNALIGNED)
10256 {
10257 JITDUMP(" asg is unaligned");
10258 requiresCopyBlock = true;
10259 }
10260#endif // _TARGET_ARM_
10261
10262 if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
10263 {
10264 requiresCopyBlock = true;
10265 }
10266
10267 // Can't use field by field assignment if the src is a call.
10268 if (rhs->OperGet() == GT_CALL)
10269 {
10270 JITDUMP(" src is a call");
10271 // C++ style CopyBlock with holes
10272 requiresCopyBlock = true;
10273 }
10274
10275 // If we passed the above checks, then we will check these two
10276 if (!requiresCopyBlock)
10277 {
10278 // Are both dest and src promoted structs?
10279 if (destDoFldAsg && srcDoFldAsg)
10280 {
10281 // Both structs should be of the same type, or each have a single field of the same type.
10282 // If not we will use a copy block.
10283 if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
10284 lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
10285 {
10286 unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
10287 unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
10288 if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
10289 (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
10290 {
10291 requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
10292 JITDUMP(" with mismatched types");
10293 }
10294 }
10295 }
10296 // Are neither dest or src promoted structs?
10297 else if (!destDoFldAsg && !srcDoFldAsg)
10298 {
10299 requiresCopyBlock = true; // Leave as a CopyBlock
10300 JITDUMP(" with no promoted structs");
10301 }
10302 else if (destDoFldAsg)
10303 {
10304 // Match the following kinds of trees:
10305 // fgMorphTree BB01, stmt 9 (before)
10306 // [000052] ------------ const int 8
10307 // [000053] -A--G------- copyBlk void
10308 // [000051] ------------ addr byref
10309 // [000050] ------------ lclVar long V07 loc5
10310 // [000054] --------R--- <list> void
10311 // [000049] ------------ addr byref
10312 // [000048] ------------ lclVar struct(P) V06 loc4
10313 // long V06.h (offs=0x00) -> V17 tmp9
10314 // Yields this transformation
10315 // fgMorphCopyBlock (after):
10316 // [000050] ------------ lclVar long V07 loc5
10317 // [000085] -A---------- = long
10318 // [000083] D------N---- lclVar long V17 tmp9
10319 //
10320 if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
10321 (blockWidth == genTypeSize(srcLclVar->TypeGet())))
10322 {
10323 // Reject the following tree:
10324 // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
10325 //
10326 // fgMorphTree BB01, stmt 6 (before)
10327 // [000038] ------------- const int 4
10328 // [000039] -A--G-------- copyBlk void
10329 // [000037] ------------- addr byref
10330 // [000036] ------------- lclVar int V05 loc3
10331 // [000040] --------R---- <list> void
10332 // [000035] ------------- addr byref
10333 // [000034] ------------- lclVar struct(P) V04 loc2
10334 // float V04.f1 (offs=0x00) -> V13 tmp6
10335 // As this would framsform into
10336 // float V13 = int V05
10337 //
10338 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
10339 var_types destType = lvaTable[fieldLclNum].TypeGet();
10340 if (srcLclVar->TypeGet() == destType)
10341 {
10342 srcSingleLclVarAsg = true;
10343 }
10344 }
10345 }
10346 else
10347 {
10348 assert(srcDoFldAsg);
10349 // Check for the symmetric case (which happens for the _pointer field of promoted spans):
10350 //
10351 // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
10352 // /--* byref V18._value (offs=0x00) -> V30 tmp21
10353 // [000245] -A------R--- * = struct (copy)
10354 // [000244] -----+------ \--* obj(8) struct
10355 // [000243] -----+------ \--* addr byref
10356 // [000242] D----+-N---- \--* lclVar byref V28 tmp19
10357 //
10358 if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
10359 (blockWidth == genTypeSize(destLclVar->TypeGet())))
10360 {
10361 // Check for type agreement
10362 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
10363 var_types srcType = lvaTable[fieldLclNum].TypeGet();
10364 if (destLclVar->TypeGet() == srcType)
10365 {
10366 destSingleLclVarAsg = true;
10367 }
10368 }
10369 }
10370 }
10371
10372 // If we require a copy block the set both of the field assign bools to false
10373 if (requiresCopyBlock)
10374 {
10375 // If a copy block is required then we won't do field by field assignments
10376 destDoFldAsg = false;
10377 srcDoFldAsg = false;
10378 }
10379
10380 JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
10381
10382 // Mark the dest/src structs as DoNotEnreg when they are not being fully referenced as the same type.
10383 //
10384 if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
10385 {
10386 if (!destLclVar->lvRegStruct || (destLclVar->lvType != dest->TypeGet()))
10387 {
10388 // Mark it as DoNotEnregister.
10389 lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
10390 }
10391 }
10392
10393 if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
10394 {
10395 if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet()))
10396 {
10397 lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
10398 }
10399 }
10400
10401 if (requiresCopyBlock)
10402 {
10403 var_types asgType = dest->TypeGet();
10404 dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
10405 asg->gtOp.gtOp1 = dest;
10406 asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
10407
10408 // Note that the unrolling of CopyBlk is only implemented on some platforms.
10409 // Currently that includes x64 and ARM but not x86: the code generation for this
10410 // construct requires the ability to mark certain regions of the generated code
10411 // as non-interruptible, and the GC encoding for the latter platform does not
10412 // have this capability.
10413
10414 // If we have a CopyObj with a dest on the stack
10415 // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
10416 // when its size is small enough to be completely unrolled (i.e. between [16..64] bytes).
10417 // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
10418 //
10419 if (destOnStack && (dest->OperGet() == GT_OBJ))
10420 {
10421 fgMorphUnsafeBlk(dest->AsObj());
10422 }
10423
10424 // Eliminate the "OBJ or BLK" node on the rhs.
10425 rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
10426 asg->gtOp.gtOp2 = rhs;
10427
10428 // Formerly, liveness did not consider copyblk arguments of simple types as being
10429 // a use or def, so these variables were marked as address-exposed.
10430 // TODO-1stClassStructs: This should no longer be needed.
10431 if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
10432 {
10433 JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
10434 lvaTable[srcLclNum].lvAddrExposed = true;
10435 }
10436
10437 if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
10438 {
10439 JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
10440 lvaTable[destLclNum].lvAddrExposed = true;
10441 }
10442
10443 goto _Done;
10444 }
10445
10446 //
10447 // Otherwise we convert this CopyBlock into individual field by field assignments
10448 //
10449 tree = nullptr;
10450
10451 GenTree* src;
10452 GenTree* addrSpill = nullptr;
10453 unsigned addrSpillTemp = BAD_VAR_NUM;
10454 bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
10455
10456 unsigned fieldCnt = DUMMY_INIT(0);
10457
10458 if (destDoFldAsg && srcDoFldAsg)
10459 {
10460 // To do fieldwise assignments for both sides, they'd better be the same struct type!
10461 // All of these conditions were checked above...
10462 assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
10463 assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
10464
10465 fieldCnt = destLclVar->lvFieldCnt;
10466 goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
10467 // assignments.
10468 }
10469 else if (destDoFldAsg)
10470 {
10471 fieldCnt = destLclVar->lvFieldCnt;
10472 rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
10473 if (srcAddr == nullptr)
10474 {
10475 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
10476 }
10477 }
10478 else
10479 {
10480 assert(srcDoFldAsg);
10481 fieldCnt = srcLclVar->lvFieldCnt;
10482 dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
10483 if (dest->OperIsBlk())
10484 {
10485 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
10486 }
10487 destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
10488 }
10489
10490 if (destDoFldAsg)
10491 {
10492 noway_assert(!srcDoFldAsg);
10493 if (gtClone(srcAddr))
10494 {
10495 // srcAddr is simple expression. No need to spill.
10496 noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10497 }
10498 else
10499 {
10500 // srcAddr is complex expression. Clone and spill it (unless the destination is
10501 // a struct local that only has one field, in which case we'd only use the
10502 // address value once...)
10503 if (destLclVar->lvFieldCnt > 1)
10504 {
10505 addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
10506 noway_assert(addrSpill != nullptr);
10507 }
10508 }
10509 }
10510
10511 if (srcDoFldAsg)
10512 {
10513 noway_assert(!destDoFldAsg);
10514
10515 // If we're doing field-wise stores, to an address within a local, and we copy
10516 // the address into "addrSpill", do *not* declare the original local var node in the
10517 // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
10518 // field-wise assignments as an "indirect" assignment to the local.
10519 // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
10520 // we clone it.)
10521 if (lclVarTree != nullptr)
10522 {
10523 lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
10524 }
10525
10526 if (gtClone(destAddr))
10527 {
10528 // destAddr is simple expression. No need to spill
10529 noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
10530 }
10531 else
10532 {
10533 // destAddr is complex expression. Clone and spill it (unless
10534 // the source is a struct local that only has one field, in which case we'd only
10535 // use the address value once...)
10536 if (srcLclVar->lvFieldCnt > 1)
10537 {
10538 addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
10539 noway_assert(addrSpill != nullptr);
10540 }
10541
10542 // TODO-CQ: this should be based on a more general
10543 // "BaseAddress" method, that handles fields of structs, before or after
10544 // morphing.
10545 if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
10546 {
10547 if (addrSpill->gtOp.gtOp1->IsLocal())
10548 {
10549 // We will *not* consider this to define the local, but rather have each individual field assign
10550 // be a definition.
10551 addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
10552 assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
10553 PROMOTION_TYPE_INDEPENDENT);
10554 addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
10555 // local stack frame
10556 }
10557 }
10558 }
10559 }
10560
10561 if (addrSpill != nullptr)
10562 {
10563 // Spill the (complex) address to a BYREF temp.
10564 // Note, at most one address may need to be spilled.
10565 addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
10566
10567 lvaTable[addrSpillTemp].lvType = TYP_BYREF;
10568
10569 if (addrSpillIsStackDest)
10570 {
10571 lvaTable[addrSpillTemp].lvStackByref = true;
10572 }
10573
10574 tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
10575
10576 // If we are assigning the address of a LclVar here
10577 // liveness does not account for this kind of address taken use.
10578 //
10579 // We have to mark this local as address exposed so
10580 // that we don't delete the definition for this LclVar
10581 // as a dead store later on.
10582 //
10583 if (addrSpill->OperGet() == GT_ADDR)
10584 {
10585 GenTree* addrOp = addrSpill->gtOp.gtOp1;
10586 if (addrOp->IsLocal())
10587 {
10588 unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
10589 lvaTable[lclVarNum].lvAddrExposed = true;
10590 lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
10591 }
10592 }
10593 }
10594
10595 _AssignFields:
10596
10597 // We may have allocated a temp above, and that may have caused the lvaTable to be expanded.
10598 // So, beyond this point we cannot rely on the old values of 'srcLclVar' and 'destLclVar'.
10599 for (unsigned i = 0; i < fieldCnt; ++i)
10600 {
10601 FieldSeqNode* curFieldSeq = nullptr;
10602 if (destDoFldAsg)
10603 {
10604 noway_assert(destLclNum != BAD_VAR_NUM);
10605 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10606 dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10607 // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
10608 if (destAddr != nullptr)
10609 {
10610 noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
10611 dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10612 }
10613 else
10614 {
10615 noway_assert(lclVarTree != nullptr);
10616 dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
10617 }
10618 // Don't CSE the lhs of an assignment.
10619 dest->gtFlags |= GTF_DONT_CSE;
10620 }
10621 else
10622 {
10623 noway_assert(srcDoFldAsg);
10624 noway_assert(srcLclNum != BAD_VAR_NUM);
10625 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10626
10627 if (destSingleLclVarAsg)
10628 {
10629 noway_assert(fieldCnt == 1);
10630 noway_assert(destLclVar != nullptr);
10631 noway_assert(addrSpill == nullptr);
10632
10633 dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
10634 }
10635 else
10636 {
10637 if (addrSpill)
10638 {
10639 assert(addrSpillTemp != BAD_VAR_NUM);
10640 dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10641 }
10642 else
10643 {
10644 dest = gtCloneExpr(destAddr);
10645 noway_assert(dest != nullptr);
10646
10647 // Is the address of a local?
10648 GenTreeLclVarCommon* lclVarTree = nullptr;
10649 bool isEntire = false;
10650 bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
10651 if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
10652 {
10653 lclVarTree->gtFlags |= GTF_VAR_DEF;
10654 if (!isEntire)
10655 {
10656 lclVarTree->gtFlags |= GTF_VAR_USEASG;
10657 }
10658 }
10659 }
10660
10661 GenTree* fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
10662 // Have to set the field sequence -- which means we need the field handle.
10663 CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
10664 CORINFO_FIELD_HANDLE fieldHnd =
10665 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10666 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10667 fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
10668
10669 dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
10670
10671 dest = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), dest);
10672
10673 // !!! The destination could be on stack. !!!
10674 // This flag will let us choose the correct write barrier.
10675 dest->gtFlags |= GTF_IND_TGTANYWHERE;
10676 }
10677 }
10678
10679 if (srcDoFldAsg)
10680 {
10681 noway_assert(srcLclNum != BAD_VAR_NUM);
10682 unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
10683 src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
10684
10685 noway_assert(srcLclVarTree != nullptr);
10686 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
10687 // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
10688 // but they are when they are under a GT_ADDR.
10689 src->gtFlags |= GTF_DONT_CSE;
10690 }
10691 else
10692 {
10693 noway_assert(destDoFldAsg);
10694 noway_assert(destLclNum != BAD_VAR_NUM);
10695 unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
10696
10697 if (srcSingleLclVarAsg)
10698 {
10699 noway_assert(fieldCnt == 1);
10700 noway_assert(srcLclNum != BAD_VAR_NUM);
10701 noway_assert(addrSpill == nullptr);
10702
10703 src = gtNewLclvNode(srcLclNum, lvaGetDesc(srcLclNum)->TypeGet());
10704 }
10705 else
10706 {
10707 if (addrSpill)
10708 {
10709 assert(addrSpillTemp != BAD_VAR_NUM);
10710 src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
10711 }
10712 else
10713 {
10714 src = gtCloneExpr(srcAddr);
10715 noway_assert(src != nullptr);
10716 }
10717
10718 CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
10719 CORINFO_FIELD_HANDLE fieldHnd =
10720 info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
10721 curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
10722 var_types destType = lvaGetDesc(fieldLclNum)->lvType;
10723
10724 bool done = false;
10725 if (lvaGetDesc(fieldLclNum)->lvFldOffset == 0)
10726 {
10727 // If this is a full-width use of the src via a different type, we need to create a GT_LCL_FLD.
10728 // (Note that if it was the same type, 'srcSingleLclVarAsg' would be true.)
10729 if (srcLclNum != BAD_VAR_NUM)
10730 {
10731 noway_assert(srcLclVarTree != nullptr);
10732 assert(destType != TYP_STRUCT);
10733 unsigned destSize = genTypeSize(destType);
10734 srcLclVar = lvaGetDesc(srcLclNum);
10735 unsigned srcSize =
10736 (srcLclVar->lvType == TYP_STRUCT) ? srcLclVar->lvExactSize : genTypeSize(srcLclVar);
10737 if (destSize == srcSize)
10738 {
10739 srcLclVarTree->gtFlags |= GTF_VAR_CAST;
10740 srcLclVarTree->ChangeOper(GT_LCL_FLD);
10741 srcLclVarTree->gtType = destType;
10742 srcLclVarTree->AsLclFld()->gtFieldSeq = curFieldSeq;
10743 src = srcLclVarTree;
10744 done = true;
10745 }
10746 }
10747 }
10748 else // if (lvaGetDesc(fieldLclNum)->lvFldOffset != 0)
10749 {
10750 src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
10751 new (this, GT_CNS_INT)
10752 GenTreeIntCon(TYP_I_IMPL, lvaGetDesc(fieldLclNum)->lvFldOffset,
10753 curFieldSeq));
10754 }
10755 if (!done)
10756 {
10757 src = gtNewIndir(destType, src);
10758 }
10759 }
10760 }
10761
10762 noway_assert(dest->TypeGet() == src->TypeGet());
10763
10764 asg = gtNewAssignNode(dest, src);
10765
10766 // If we spilled the address, and we didn't do individual field assignments to promoted fields,
10767 // and it was of a local, ensure that the destination local variable has been marked as address
10768 // exposed. Neither liveness nor SSA are able to track this kind of indirect assignments.
10769 if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
10770 {
10771 noway_assert(lvaGetDesc(destLclNum)->lvAddrExposed);
10772 }
10773
10774#if LOCAL_ASSERTION_PROP
10775 if (optLocalAssertionProp)
10776 {
10777 optAssertionGen(asg);
10778 }
10779#endif // LOCAL_ASSERTION_PROP
10780
10781 if (tree)
10782 {
10783 tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
10784 }
10785 else
10786 {
10787 tree = asg;
10788 }
10789 }
10790 }
10791
10792 if (isLateArg)
10793 {
10794 tree->gtFlags |= GTF_LATE_ARG;
10795 }
10796
10797#ifdef DEBUG
10798 if (tree != oldTree)
10799 {
10800 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
10801 }
10802
10803 if (verbose)
10804 {
10805 printf("\nfgMorphCopyBlock (after):\n");
10806 gtDispTree(tree);
10807 }
10808#endif
10809
10810_Done:
10811 return tree;
10812}
10813
10814// insert conversions and normalize to make tree amenable to register
10815// FP architectures
10816GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
10817{
10818 if (tree->OperIsArithmetic())
10819 {
10820 if (varTypeIsFloating(tree))
10821 {
10822 GenTree* op1 = tree->gtOp.gtOp1;
10823 GenTree* op2 = tree->gtGetOp2();
10824
10825 assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet()));
10826
10827 if (op1->TypeGet() != tree->TypeGet())
10828 {
10829 tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet());
10830 }
10831 if (op2->TypeGet() != tree->TypeGet())
10832 {
10833 tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet());
10834 }
10835 }
10836 }
10837 else if (tree->OperIsCompare())
10838 {
10839 GenTree* op1 = tree->gtOp.gtOp1;
10840
10841 if (varTypeIsFloating(op1))
10842 {
10843 GenTree* op2 = tree->gtGetOp2();
10844 assert(varTypeIsFloating(op2));
10845
10846 if (op1->TypeGet() != op2->TypeGet())
10847 {
10848 // both had better be floating, just one bigger than other
10849 if (op1->TypeGet() == TYP_FLOAT)
10850 {
10851 assert(op2->TypeGet() == TYP_DOUBLE);
10852 tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
10853 }
10854 else if (op2->TypeGet() == TYP_FLOAT)
10855 {
10856 assert(op1->TypeGet() == TYP_DOUBLE);
10857 tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
10858 }
10859 }
10860 }
10861 }
10862
10863 return tree;
10864}
10865
10866//--------------------------------------------------------------------------------------------------------------
10867// fgMorphRecognizeBoxNullable:
10868// Recognize this pattern:
10869//
10870// stmtExpr void (IL 0x000... ???)
10871// return int
10872// CNS_INT ref null
10873// EQ/NE/GT int
10874// CALL help ref HELPER.CORINFO_HELP_BOX_NULLABLE
10875// CNS_INT(h) long 0x7fed96836c8 class
10876// ADDR byref
10877// FIELD struct value
10878// LCL_VAR ref V00 this
10879//
10880// which comes from this code:
10881//
10882// return this.value==null;
10883//
10884// and transform it into
10885//
10886// stmtExpr void (IL 0x000... ???)
10887// return int
10888// CNS_INT ref null
10889// EQ/NE/GT int
10890// IND bool
10891// ADDR byref
10892// FIELD struct value
10893// LCL_VAR ref V00 this
10894//
10895// Arguments:
10896// compare - Compare tree to optimize.
10897//
10898// return value:
10899// A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found;
10900// the original tree otherwise.
10901//
10902
10903GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
10904{
10905 GenTree* op1 = compare->gtOp.gtOp1;
10906 GenTree* op2 = compare->gtOp.gtOp2;
10907 GenTree* opCns;
10908 GenTreeCall* opCall;
10909
10910 if (op1->IsCnsIntOrI() && op2->IsHelperCall())
10911 {
10912 opCns = op1;
10913 opCall = op2->AsCall();
10914 }
10915 else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
10916 {
10917 opCns = op2;
10918 opCall = op1->AsCall();
10919 }
10920 else
10921 {
10922 return compare;
10923 }
10924
10925 if (!opCns->IsIntegralConst(0))
10926 {
10927 return compare;
10928 }
10929
10930 if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
10931 {
10932 return compare;
10933 }
10934
10935 // Get the nullable struct argument
10936 GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
10937
10938 // Check for cases that are unsafe to optimize and return the unchanged tree
10939 if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0))
10940 {
10941 return compare;
10942 }
10943
10944 // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset
10945 GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg);
10946
10947 if (opCall == op1)
10948 {
10949 compare->gtOp.gtOp1 = newOp;
10950 }
10951 else
10952 {
10953 compare->gtOp.gtOp2 = newOp;
10954 }
10955
10956 opCns->gtType = TYP_INT;
10957
10958 return compare;
10959}
10960
10961#ifdef FEATURE_SIMD
10962
10963//--------------------------------------------------------------------------------------------------------------
10964// getSIMDStructFromField:
10965// Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for
10966// the struct node, also base type, field index and simd size. If it is not, just return nullptr.
10967// Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
10968// should return nullptr, since in this case we should treat SIMD struct as a regular struct.
10969// However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
10970// as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
10971// if the struct is a SIMD struct.
10972//
10973// Arguments:
10974// tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
10975// struct used for simd intrinsic or not.
10976// pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
10977// to simd lclvar's base type.
10978// indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
10979// equals to the index number of this field.
10980// simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
10981// equals to the simd struct size which this tree belongs to.
10982// ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
10983// the UsedInSIMDIntrinsic check.
10984//
10985// return value:
10986// A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd
10987// instrinic related field, return nullptr.
10988//
10989
10990GenTree* Compiler::getSIMDStructFromField(GenTree* tree,
10991 var_types* pBaseTypeOut,
10992 unsigned* indexOut,
10993 unsigned* simdSizeOut,
10994 bool ignoreUsedInSIMDIntrinsic /*false*/)
10995{
10996 GenTree* ret = nullptr;
10997 if (tree->OperGet() == GT_FIELD)
10998 {
10999 GenTree* objRef = tree->gtField.gtFldObj;
11000 if (objRef != nullptr)
11001 {
11002 GenTree* obj = nullptr;
11003 if (objRef->gtOper == GT_ADDR)
11004 {
11005 obj = objRef->gtOp.gtOp1;
11006 }
11007 else if (ignoreUsedInSIMDIntrinsic)
11008 {
11009 obj = objRef;
11010 }
11011 else
11012 {
11013 return nullptr;
11014 }
11015
11016 if (isSIMDTypeLocal(obj))
11017 {
11018 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
11019 LclVarDsc* varDsc = &lvaTable[lclNum];
11020 if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
11021 {
11022 *simdSizeOut = varDsc->lvExactSize;
11023 *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
11024 ret = obj;
11025 }
11026 }
11027 else if (obj->OperGet() == GT_SIMD)
11028 {
11029 ret = obj;
11030 GenTreeSIMD* simdNode = obj->AsSIMD();
11031 *simdSizeOut = simdNode->gtSIMDSize;
11032 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11033 }
11034#ifdef FEATURE_HW_INTRINSICS
11035 else if (obj->OperIsSimdHWIntrinsic())
11036 {
11037 ret = obj;
11038 GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic();
11039 *simdSizeOut = simdNode->gtSIMDSize;
11040 *pBaseTypeOut = simdNode->gtSIMDBaseType;
11041 }
11042#endif // FEATURE_HW_INTRINSICS
11043 }
11044 }
11045 if (ret != nullptr)
11046 {
11047 unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
11048 *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
11049 }
11050 return ret;
11051}
11052
11053/*****************************************************************************
11054* If a read operation tries to access simd struct field, then transform the
11055* operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
11056* Otherwise, return the old tree.
11057* Argument:
11058* tree - GenTree*. If this pointer points to simd struct which is used for simd
11059* intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
11060* Return:
11061* A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11062* return nullptr.
11063*/
11064
11065GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree)
11066{
11067 unsigned index = 0;
11068 var_types baseType = TYP_UNKNOWN;
11069 unsigned simdSize = 0;
11070 GenTree* simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
11071 if (simdStructNode != nullptr)
11072 {
11073 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11074 GenTree* op2 = gtNewIconNode(index);
11075 tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
11076#ifdef DEBUG
11077 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11078#endif
11079 }
11080 return tree;
11081}
11082
11083/*****************************************************************************
11084* Transform an assignment of a SIMD struct field to SIMD intrinsic
11085* SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
11086* then return the old tree.
11087* Argument:
11088* tree - GenTree*. If this pointer points to simd struct which is used for simd
11089* intrinsic, we will morph it as simd intrinsic set.
11090* Return:
11091* A GenTree* which points to the new tree. If the tree is not for simd intrinsic,
11092* return nullptr.
11093*/
11094
11095GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree)
11096{
11097 assert(tree->OperGet() == GT_ASG);
11098 GenTree* op1 = tree->gtGetOp1();
11099 GenTree* op2 = tree->gtGetOp2();
11100
11101 unsigned index = 0;
11102 var_types baseType = TYP_UNKNOWN;
11103 unsigned simdSize = 0;
11104 GenTree* simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
11105 if (simdOp1Struct != nullptr)
11106 {
11107 // Generate the simd set intrinsic
11108 assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
11109
11110 SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
11111 switch (index)
11112 {
11113 case 0:
11114 simdIntrinsicID = SIMDIntrinsicSetX;
11115 break;
11116 case 1:
11117 simdIntrinsicID = SIMDIntrinsicSetY;
11118 break;
11119 case 2:
11120 simdIntrinsicID = SIMDIntrinsicSetZ;
11121 break;
11122 case 3:
11123 simdIntrinsicID = SIMDIntrinsicSetW;
11124 break;
11125 default:
11126 noway_assert(!"There is no set intrinsic for index bigger than 3");
11127 }
11128
11129 GenTree* target = gtClone(simdOp1Struct);
11130 assert(target != nullptr);
11131 var_types simdType = target->gtType;
11132 GenTree* simdTree = gtNewSIMDNode(simdType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
11133
11134 tree->gtOp.gtOp1 = target;
11135 tree->gtOp.gtOp2 = simdTree;
11136
11137 // fgMorphTree has already called fgMorphImplicitByRefArgs() on this assignment, but the source
11138 // and target have not yet been morphed.
11139 // Therefore, in case the source and/or target are now implicit byrefs, we need to call it again.
11140 if (fgMorphImplicitByRefArgs(tree))
11141 {
11142 if (tree->gtGetOp1()->OperIsBlk())
11143 {
11144 assert(tree->gtGetOp1()->TypeGet() == simdType);
11145 fgMorphBlkToInd(tree->gtGetOp1()->AsBlk(), simdType);
11146 }
11147 }
11148#ifdef DEBUG
11149 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11150#endif
11151 }
11152
11153 return tree;
11154}
11155
11156#endif // FEATURE_SIMD
11157
11158/*****************************************************************************
11159 *
11160 * Transform the given GTK_SMPOP tree for code generation.
11161 */
11162
11163#ifdef _PREFAST_
11164#pragma warning(push)
11165#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11166#endif
11167GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
11168{
11169 ALLOCA_CHECK();
11170 assert(tree->OperKind() & GTK_SMPOP);
11171
11172 /* The steps in this function are :
11173 o Perform required preorder processing
11174 o Process the first, then second operand, if any
11175 o Perform required postorder morphing
11176 o Perform optional postorder morphing if optimizing
11177 */
11178
11179 bool isQmarkColon = false;
11180
11181#if LOCAL_ASSERTION_PROP
11182 AssertionIndex origAssertionCount = DUMMY_INIT(0);
11183 AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
11184
11185 AssertionIndex thenAssertionCount = DUMMY_INIT(0);
11186 AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
11187#endif
11188
11189 if (fgGlobalMorph)
11190 {
11191 tree = fgMorphForRegisterFP(tree);
11192 }
11193
11194 genTreeOps oper = tree->OperGet();
11195 var_types typ = tree->TypeGet();
11196 GenTree* op1 = tree->gtOp.gtOp1;
11197 GenTree* op2 = tree->gtGetOp2IfPresent();
11198
11199 /*-------------------------------------------------------------------------
11200 * First do any PRE-ORDER processing
11201 */
11202
11203 switch (oper)
11204 {
11205 // Some arithmetic operators need to use a helper call to the EE
11206 int helper;
11207
11208 case GT_ASG:
11209 tree = fgDoNormalizeOnStore(tree);
11210 /* fgDoNormalizeOnStore can change op2 */
11211 noway_assert(op1 == tree->gtOp.gtOp1);
11212 op2 = tree->gtOp.gtOp2;
11213
11214#ifdef FEATURE_SIMD
11215 {
11216 // We should check whether op2 should be assigned to a SIMD field or not.
11217 // If it is, we should tranlate the tree to simd intrinsic.
11218 assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
11219 GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
11220 typ = tree->TypeGet();
11221 op1 = tree->gtGetOp1();
11222 op2 = tree->gtGetOp2();
11223#ifdef DEBUG
11224 assert((tree == newTree) && (tree->OperGet() == oper));
11225 if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
11226 {
11227 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
11228 }
11229#endif // DEBUG
11230 }
11231#endif
11232
11233 // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
11234 // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
11235 // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
11236 // TODO-1stClassStructs: improve this.
11237 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
11238 {
11239 op1->gtFlags |= GTF_DONT_CSE;
11240 }
11241 break;
11242
11243 case GT_ADDR:
11244
11245 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
11246 op1->gtFlags |= GTF_DONT_CSE;
11247 break;
11248
11249 case GT_QMARK:
11250 case GT_JTRUE:
11251
11252 noway_assert(op1);
11253
11254 if (op1->OperKind() & GTK_RELOP)
11255 {
11256 noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
11257 /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
11258 not need to materialize the result as a 0 or 1. */
11259
11260 /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
11261 op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
11262
11263 // Request that the codegen for op1 sets the condition flags
11264 // when it generates the code for op1.
11265 //
11266 // Codegen for op1 must set the condition flags if
11267 // this method returns true.
11268 //
11269 op1->gtRequestSetFlags();
11270 }
11271 else
11272 {
11273 GenTree* effOp1 = op1->gtEffectiveVal();
11274 noway_assert((effOp1->gtOper == GT_CNS_INT) &&
11275 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
11276 }
11277 break;
11278
11279 case GT_COLON:
11280#if LOCAL_ASSERTION_PROP
11281 if (optLocalAssertionProp)
11282#endif
11283 {
11284 isQmarkColon = true;
11285 }
11286 break;
11287
11288 case GT_INDEX:
11289 return fgMorphArrayIndex(tree);
11290
11291 case GT_CAST:
11292 return fgMorphCast(tree);
11293
11294 case GT_MUL:
11295
11296#ifndef _TARGET_64BIT_
11297 if (typ == TYP_LONG)
11298 {
11299 /* For (long)int1 * (long)int2, we dont actually do the
11300 casts, and just multiply the 32 bit values, which will
11301 give us the 64 bit result in edx:eax */
11302
11303 noway_assert(op2);
11304 if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
11305 genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) &&
11306 !op1->gtOverflow() && !op2->gtOverflow())
11307 {
11308 // The casts have to be of the same signedness.
11309 if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
11310 {
11311 // We see if we can force an int constant to change its signedness
11312 GenTree* constOp;
11313 if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
11314 constOp = op1;
11315 else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
11316 constOp = op2;
11317 else
11318 goto NO_MUL_64RSLT;
11319
11320 if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
11321 constOp->gtFlags ^= GTF_UNSIGNED;
11322 else
11323 goto NO_MUL_64RSLT;
11324 }
11325
11326 // The only combination that can overflow
11327 if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
11328 goto NO_MUL_64RSLT;
11329
11330 /* Remaining combinations can never overflow during long mul. */
11331
11332 tree->gtFlags &= ~GTF_OVERFLOW;
11333
11334 /* Do unsigned mul only if the casts were unsigned */
11335
11336 tree->gtFlags &= ~GTF_UNSIGNED;
11337 tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
11338
11339 /* Since we are committing to GTF_MUL_64RSLT, we don't want
11340 the casts to be folded away. So morph the castees directly */
11341
11342 op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
11343 op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
11344
11345 // Propagate side effect flags up the tree
11346 op1->gtFlags &= ~GTF_ALL_EFFECT;
11347 op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11348 op2->gtFlags &= ~GTF_ALL_EFFECT;
11349 op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11350
11351 // If the GT_MUL can be altogether folded away, we should do that.
11352
11353 if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
11354 opts.OptEnabled(CLFLG_CONSTANTFOLD))
11355 {
11356 tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
11357 tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
11358 noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
11359 tree = gtFoldExprConst(tree);
11360 noway_assert(tree->OperIsConst());
11361 return tree;
11362 }
11363
11364 tree->gtFlags |= GTF_MUL_64RSLT;
11365
11366 // If op1 and op2 are unsigned casts, we need to do an unsigned mult
11367 tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
11368
11369 // Insert GT_NOP nodes for the cast operands so that they do not get folded
11370 // And propagate the new flags. We don't want to CSE the casts because
11371 // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
11372
11373 if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
11374 {
11375 op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
11376 op1->gtFlags &= ~GTF_ALL_EFFECT;
11377 op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11378 }
11379
11380 if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
11381 {
11382 op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
11383 op2->gtFlags &= ~GTF_ALL_EFFECT;
11384 op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
11385 }
11386
11387 op1->gtFlags |= GTF_DONT_CSE;
11388 op2->gtFlags |= GTF_DONT_CSE;
11389
11390 tree->gtFlags &= ~GTF_ALL_EFFECT;
11391 tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
11392
11393 goto DONE_MORPHING_CHILDREN;
11394 }
11395 else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
11396 {
11397 NO_MUL_64RSLT:
11398 if (tree->gtOverflow())
11399 helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
11400 else
11401 helper = CORINFO_HELP_LMUL;
11402
11403 goto USE_HELPER_FOR_ARITH;
11404 }
11405 else
11406 {
11407 /* We are seeing this node again. We have decided to use
11408 GTF_MUL_64RSLT, so leave it alone. */
11409
11410 assert(tree->gtIsValid64RsltMul());
11411 }
11412 }
11413#endif // !_TARGET_64BIT_
11414 break;
11415
11416 case GT_DIV:
11417
11418#ifndef _TARGET_64BIT_
11419 if (typ == TYP_LONG)
11420 {
11421 helper = CORINFO_HELP_LDIV;
11422 goto USE_HELPER_FOR_ARITH;
11423 }
11424
11425#if USE_HELPERS_FOR_INT_DIV
11426 if (typ == TYP_INT)
11427 {
11428 helper = CORINFO_HELP_DIV;
11429 goto USE_HELPER_FOR_ARITH;
11430 }
11431#endif
11432#endif // !_TARGET_64BIT_
11433
11434 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11435 {
11436 op2 = gtFoldExprConst(op2);
11437 }
11438 break;
11439
11440 case GT_UDIV:
11441
11442#ifndef _TARGET_64BIT_
11443 if (typ == TYP_LONG)
11444 {
11445 helper = CORINFO_HELP_ULDIV;
11446 goto USE_HELPER_FOR_ARITH;
11447 }
11448#if USE_HELPERS_FOR_INT_DIV
11449 if (typ == TYP_INT)
11450 {
11451 helper = CORINFO_HELP_UDIV;
11452 goto USE_HELPER_FOR_ARITH;
11453 }
11454#endif
11455#endif // _TARGET_64BIT_
11456 break;
11457
11458 case GT_MOD:
11459
11460 if (varTypeIsFloating(typ))
11461 {
11462 helper = CORINFO_HELP_DBLREM;
11463 noway_assert(op2);
11464 if (op1->TypeGet() == TYP_FLOAT)
11465 {
11466 if (op2->TypeGet() == TYP_FLOAT)
11467 {
11468 helper = CORINFO_HELP_FLTREM;
11469 }
11470 else
11471 {
11472 tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
11473 }
11474 }
11475 else if (op2->TypeGet() == TYP_FLOAT)
11476 {
11477 tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
11478 }
11479 goto USE_HELPER_FOR_ARITH;
11480 }
11481
11482 // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
11483 // A similar optimization for signed mod will not work for a negative perfectly divisible
11484 // HI-word. To make it correct, we would need to divide without the sign and then flip the
11485 // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
11486 goto ASSIGN_HELPER_FOR_MOD;
11487
11488 case GT_UMOD:
11489
11490#ifdef _TARGET_ARMARCH_
11491//
11492// Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
11493//
11494#else // _TARGET_XARCH
11495 /* If this is an unsigned long mod with op2 which is a cast to long from a
11496 constant int, then don't morph to a call to the helper. This can be done
11497 faster inline using idiv.
11498 */
11499
11500 noway_assert(op2);
11501 if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
11502 ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
11503 ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
11504 {
11505 if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
11506 op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
11507 op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
11508 (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
11509 {
11510 tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
11511 noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
11512 }
11513
11514 if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
11515 op2->gtIntConCommon.LngValue() <= 0x3fffffff)
11516 {
11517 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
11518 noway_assert(op1->TypeGet() == TYP_LONG);
11519
11520 // Update flags for op1 morph
11521 tree->gtFlags &= ~GTF_ALL_EFFECT;
11522
11523 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
11524
11525 // If op1 is a constant, then do constant folding of the division operator
11526 if (op1->gtOper == GT_CNS_NATIVELONG)
11527 {
11528 tree = gtFoldExpr(tree);
11529 }
11530 return tree;
11531 }
11532 }
11533#endif // _TARGET_XARCH
11534
11535 ASSIGN_HELPER_FOR_MOD:
11536
11537 // For "val % 1", return 0 if op1 doesn't have any side effects
11538 // and we are not in the CSE phase, we cannot discard 'tree'
11539 // because it may contain CSE expressions that we haven't yet examined.
11540 //
11541 if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
11542 {
11543 if (op2->IsIntegralConst(1))
11544 {
11545 GenTree* zeroNode = gtNewZeroConNode(typ);
11546#ifdef DEBUG
11547 zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
11548#endif
11549 DEBUG_DESTROY_NODE(tree);
11550 return zeroNode;
11551 }
11552 }
11553
11554#ifndef _TARGET_64BIT_
11555 if (typ == TYP_LONG)
11556 {
11557 helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
11558 goto USE_HELPER_FOR_ARITH;
11559 }
11560
11561#if USE_HELPERS_FOR_INT_DIV
11562 if (typ == TYP_INT)
11563 {
11564 if (oper == GT_UMOD)
11565 {
11566 helper = CORINFO_HELP_UMOD;
11567 goto USE_HELPER_FOR_ARITH;
11568 }
11569 else if (oper == GT_MOD)
11570 {
11571 helper = CORINFO_HELP_MOD;
11572 goto USE_HELPER_FOR_ARITH;
11573 }
11574 }
11575#endif
11576#endif // !_TARGET_64BIT_
11577
11578 if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
11579 {
11580 op2 = gtFoldExprConst(op2);
11581 }
11582
11583#ifdef _TARGET_ARM64_
11584 // For ARM64 we don't have a remainder instruction,
11585 // The architecture manual suggests the following transformation to
11586 // generate code for such operator:
11587 //
11588 // a % b = a - (a / b) * b;
11589 //
11590 // TODO: there are special cases where it can be done better, for example
11591 // when the modulo operation is unsigned and the divisor is a
11592 // integer constant power of two. In this case, we can make the transform:
11593 //
11594 // a % b = a & (b - 1);
11595 //
11596 // Lower supports it for all cases except when `a` is constant, but
11597 // in Morph we can't guarantee that `a` won't be transformed into a constant,
11598 // so can't guarantee that lower will be able to do this optimization.
11599 {
11600 // Do "a % b = a - (a / b) * b" morph always, see TODO before this block.
11601 bool doMorphModToSubMulDiv = true;
11602
11603 if (doMorphModToSubMulDiv)
11604 {
11605 assert(!optValnumCSE_phase);
11606
11607 tree = fgMorphModToSubMulDiv(tree->AsOp());
11608 op1 = tree->gtOp.gtOp1;
11609 op2 = tree->gtOp.gtOp2;
11610 }
11611 }
11612#else // !_TARGET_ARM64_
11613 // If b is not a power of 2 constant then lowering replaces a % b
11614 // with a - (a / b) * b and applies magic division optimization to
11615 // a / b. The code may already contain an a / b expression (e.g.
11616 // x = a / 10; y = a % 10;) and then we end up with redundant code.
11617 // If we convert % to / here we give CSE the opportunity to eliminate
11618 // the redundant division. If there's no redundant division then
11619 // nothing is lost, lowering would have done this transform anyway.
11620
11621 if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
11622 {
11623 ssize_t divisorValue = op2->AsIntCon()->IconValue();
11624 size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
11625 : static_cast<size_t>(abs(divisorValue));
11626
11627 if (!isPow2(absDivisorValue))
11628 {
11629 tree = fgMorphModToSubMulDiv(tree->AsOp());
11630 op1 = tree->gtOp.gtOp1;
11631 op2 = tree->gtOp.gtOp2;
11632 }
11633 }
11634#endif // !_TARGET_ARM64_
11635 break;
11636
11637 USE_HELPER_FOR_ARITH:
11638 {
11639 // TODO: this comment is wrong now, do an appropriate fix.
11640 /* We have to morph these arithmetic operations into helper calls
11641 before morphing the arguments (preorder), else the arguments
11642 won't get correct values of fgPtrArgCntCur.
11643 However, try to fold the tree first in case we end up with a
11644 simple node which won't need a helper call at all */
11645
11646 noway_assert(tree->OperIsBinary());
11647
11648 GenTree* oldTree = tree;
11649
11650 tree = gtFoldExpr(tree);
11651
11652 // Were we able to fold it ?
11653 // Note that gtFoldExpr may return a non-leaf even if successful
11654 // e.g. for something like "expr / 1" - see also bug #290853
11655 if (tree->OperIsLeaf() || (oldTree != tree))
11656 {
11657 return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
11658 }
11659
11660 // Did we fold it into a comma node with throw?
11661 if (tree->gtOper == GT_COMMA)
11662 {
11663 noway_assert(fgIsCommaThrow(tree));
11664 return fgMorphTree(tree);
11665 }
11666 }
11667 return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
11668
11669 case GT_RETURN:
11670 // normalize small integer return values
11671 if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) &&
11672 fgCastNeeded(op1, info.compRetType))
11673 {
11674 // Small-typed return values are normalized by the callee
11675 op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType);
11676
11677 // Propagate GTF_COLON_COND
11678 op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
11679
11680 tree->gtOp.gtOp1 = fgMorphCast(op1);
11681
11682 // Propagate side effect flags
11683 tree->gtFlags &= ~GTF_ALL_EFFECT;
11684 tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
11685
11686 return tree;
11687 }
11688 break;
11689
11690 case GT_EQ:
11691 case GT_NE:
11692 {
11693 GenTree* optimizedTree = gtFoldTypeCompare(tree);
11694
11695 if (optimizedTree != tree)
11696 {
11697 return fgMorphTree(optimizedTree);
11698 }
11699 }
11700
11701 __fallthrough;
11702
11703 case GT_GT:
11704
11705 // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT.
11706 if ((oper != GT_GT) || tree->IsUnsigned())
11707 {
11708 fgMorphRecognizeBoxNullable(tree);
11709 }
11710
11711 op1 = tree->gtOp.gtOp1;
11712 op2 = tree->gtGetOp2IfPresent();
11713
11714 break;
11715
11716 case GT_RUNTIMELOOKUP:
11717 return fgMorphTree(op1);
11718
11719#ifdef _TARGET_ARM_
11720 case GT_INTRINSIC:
11721 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
11722 {
11723 switch (tree->TypeGet())
11724 {
11725 case TYP_DOUBLE:
11726 return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
11727 case TYP_FLOAT:
11728 return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
11729 default:
11730 unreached();
11731 }
11732 }
11733 break;
11734#endif
11735 case GT_LIST:
11736 // Special handling for the arg list.
11737 return fgMorphArgList(tree->AsArgList(), mac);
11738
11739 default:
11740 break;
11741 }
11742
11743#if !CPU_HAS_FP_SUPPORT
11744 tree = fgMorphToEmulatedFP(tree);
11745#endif
11746
11747 /*-------------------------------------------------------------------------
11748 * Process the first operand, if any
11749 */
11750
11751 if (op1)
11752 {
11753
11754#if LOCAL_ASSERTION_PROP
11755 // If we are entering the "then" part of a Qmark-Colon we must
11756 // save the state of the current copy assignment table
11757 // so that we can restore this state when entering the "else" part
11758 if (isQmarkColon)
11759 {
11760 noway_assert(optLocalAssertionProp);
11761 if (optAssertionCount)
11762 {
11763 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11764 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11765 origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11766 origAssertionCount = optAssertionCount;
11767 memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
11768 }
11769 else
11770 {
11771 origAssertionCount = 0;
11772 origAssertionTab = nullptr;
11773 }
11774 }
11775#endif // LOCAL_ASSERTION_PROP
11776
11777 // We might need a new MorphAddressContext context. (These are used to convey
11778 // parent context about how addresses being calculated will be used; see the
11779 // specification comment for MorphAddrContext for full details.)
11780 // Assume it's an Ind context to start.
11781 MorphAddrContext subIndMac1(MACK_Ind);
11782 MorphAddrContext* subMac1 = mac;
11783 if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
11784 {
11785 switch (tree->gtOper)
11786 {
11787 case GT_ADDR:
11788 if (subMac1 == nullptr)
11789 {
11790 subMac1 = &subIndMac1;
11791 subMac1->m_kind = MACK_Addr;
11792 }
11793 break;
11794 case GT_COMMA:
11795 // In a comma, the incoming context only applies to the rightmost arg of the
11796 // comma list. The left arg (op1) gets a fresh context.
11797 subMac1 = nullptr;
11798 break;
11799 case GT_OBJ:
11800 case GT_BLK:
11801 case GT_DYN_BLK:
11802 case GT_IND:
11803 subMac1 = &subIndMac1;
11804 break;
11805 default:
11806 break;
11807 }
11808 }
11809
11810 // For additions, if we're in an IND context keep track of whether
11811 // all offsets added to the address are constant, and their sum.
11812 if (tree->gtOper == GT_ADD && subMac1 != nullptr)
11813 {
11814 assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
11815 GenTree* otherOp = tree->gtOp.gtOp2;
11816 // Is the other operator a constant?
11817 if (otherOp->IsCnsIntOrI())
11818 {
11819 ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
11820 totalOffset += otherOp->gtIntConCommon.IconValue();
11821 if (totalOffset.IsOverflow())
11822 {
11823 // We will consider an offset so large as to overflow as "not a constant" --
11824 // we will do a null check.
11825 subMac1->m_allConstantOffsets = false;
11826 }
11827 else
11828 {
11829 subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11830 }
11831 }
11832 else
11833 {
11834 subMac1->m_allConstantOffsets = false;
11835 }
11836 }
11837
11838 // If gtOp1 is a GT_FIELD, we need to pass down the mac if
11839 // its parent is GT_ADDR, since the address of the field
11840 // is part of an ongoing address computation. Otherwise
11841 // op1 represents the value of the field and so any address
11842 // calculations it does are in a new context.
11843 if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
11844 {
11845 subMac1 = nullptr;
11846
11847 // The impact of this field's value to any ongoing
11848 // address computation is handled below when looking
11849 // at op2.
11850 }
11851
11852 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
11853
11854#if LOCAL_ASSERTION_PROP
11855 // If we are exiting the "then" part of a Qmark-Colon we must
11856 // save the state of the current copy assignment table
11857 // so that we can merge this state with the "else" part exit
11858 if (isQmarkColon)
11859 {
11860 noway_assert(optLocalAssertionProp);
11861 if (optAssertionCount)
11862 {
11863 noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
11864 unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
11865 thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
11866 thenAssertionCount = optAssertionCount;
11867 memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
11868 }
11869 else
11870 {
11871 thenAssertionCount = 0;
11872 thenAssertionTab = nullptr;
11873 }
11874 }
11875#endif // LOCAL_ASSERTION_PROP
11876
11877 /* Morphing along with folding and inlining may have changed the
11878 * side effect flags, so we have to reset them
11879 *
11880 * NOTE: Don't reset the exception flags on nodes that may throw */
11881
11882 assert(tree->gtOper != GT_CALL);
11883
11884 if (!tree->OperRequiresCallFlag(this))
11885 {
11886 tree->gtFlags &= ~GTF_CALL;
11887 }
11888
11889 /* Propagate the new flags */
11890 tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
11891
11892 // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
11893 // Similarly for clsVar
11894 if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
11895 {
11896 tree->gtFlags &= ~GTF_GLOB_REF;
11897 }
11898 } // if (op1)
11899
11900 /*-------------------------------------------------------------------------
11901 * Process the second operand, if any
11902 */
11903
11904 if (op2)
11905 {
11906
11907#if LOCAL_ASSERTION_PROP
11908 // If we are entering the "else" part of a Qmark-Colon we must
11909 // reset the state of the current copy assignment table
11910 if (isQmarkColon)
11911 {
11912 noway_assert(optLocalAssertionProp);
11913 optAssertionReset(0);
11914 if (origAssertionCount)
11915 {
11916 size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
11917 memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
11918 optAssertionReset(origAssertionCount);
11919 }
11920 }
11921#endif // LOCAL_ASSERTION_PROP
11922
11923 // We might need a new MorphAddressContext context to use in evaluating op2.
11924 // (These are used to convey parent context about how addresses being calculated
11925 // will be used; see the specification comment for MorphAddrContext for full details.)
11926 // Assume it's an Ind context to start.
11927 switch (tree->gtOper)
11928 {
11929 case GT_ADD:
11930 if (mac != nullptr && mac->m_kind == MACK_Ind)
11931 {
11932 GenTree* otherOp = tree->gtOp.gtOp1;
11933 // Is the other operator a constant?
11934 if (otherOp->IsCnsIntOrI())
11935 {
11936 mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
11937 }
11938 else
11939 {
11940 mac->m_allConstantOffsets = false;
11941 }
11942 }
11943 break;
11944 default:
11945 break;
11946 }
11947
11948 // If gtOp2 is a GT_FIELD, we must be taking its value,
11949 // so it should evaluate its address in a new context.
11950 if (op2->gtOper == GT_FIELD)
11951 {
11952 // The impact of this field's value to any ongoing
11953 // address computation is handled above when looking
11954 // at op1.
11955 mac = nullptr;
11956 }
11957
11958 tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
11959
11960 /* Propagate the side effect flags from op2 */
11961
11962 tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
11963
11964#if LOCAL_ASSERTION_PROP
11965 // If we are exiting the "else" part of a Qmark-Colon we must
11966 // merge the state of the current copy assignment table with
11967 // that of the exit of the "then" part.
11968 if (isQmarkColon)
11969 {
11970 noway_assert(optLocalAssertionProp);
11971 // If either exit table has zero entries then
11972 // the merged table also has zero entries
11973 if (optAssertionCount == 0 || thenAssertionCount == 0)
11974 {
11975 optAssertionReset(0);
11976 }
11977 else
11978 {
11979 size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
11980 if ((optAssertionCount != thenAssertionCount) ||
11981 (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
11982 {
11983 // Yes they are different so we have to find the merged set
11984 // Iterate over the copy asgn table removing any entries
11985 // that do not have an exact match in the thenAssertionTab
11986 AssertionIndex index = 1;
11987 while (index <= optAssertionCount)
11988 {
11989 AssertionDsc* curAssertion = optGetAssertion(index);
11990
11991 for (unsigned j = 0; j < thenAssertionCount; j++)
11992 {
11993 AssertionDsc* thenAssertion = &thenAssertionTab[j];
11994
11995 // Do the left sides match?
11996 if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
11997 (curAssertion->assertionKind == thenAssertion->assertionKind))
11998 {
11999 // Do the right sides match?
12000 if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
12001 (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
12002 {
12003 goto KEEP;
12004 }
12005 else
12006 {
12007 goto REMOVE;
12008 }
12009 }
12010 }
12011 //
12012 // If we fall out of the loop above then we didn't find
12013 // any matching entry in the thenAssertionTab so it must
12014 // have been killed on that path so we remove it here
12015 //
12016 REMOVE:
12017 // The data at optAssertionTabPrivate[i] is to be removed
12018 CLANG_FORMAT_COMMENT_ANCHOR;
12019#ifdef DEBUG
12020 if (verbose)
12021 {
12022 printf("The QMARK-COLON ");
12023 printTreeID(tree);
12024 printf(" removes assertion candidate #%d\n", index);
12025 }
12026#endif
12027 optAssertionRemove(index);
12028 continue;
12029 KEEP:
12030 // The data at optAssertionTabPrivate[i] is to be kept
12031 index++;
12032 }
12033 }
12034 }
12035 }
12036#endif // LOCAL_ASSERTION_PROP
12037 } // if (op2)
12038
12039DONE_MORPHING_CHILDREN:
12040
12041 if (tree->OperMayThrow(this))
12042 {
12043 // Mark the tree node as potentially throwing an exception
12044 tree->gtFlags |= GTF_EXCEPT;
12045 }
12046 else
12047 {
12048 if (tree->OperIsIndirOrArrLength())
12049 {
12050 tree->gtFlags |= GTF_IND_NONFAULTING;
12051 }
12052 if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) &&
12053 ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0)))
12054 {
12055 tree->gtFlags &= ~GTF_EXCEPT;
12056 }
12057 }
12058
12059 if (tree->OperRequiresAsgFlag())
12060 {
12061 tree->gtFlags |= GTF_ASG;
12062 }
12063 else
12064 {
12065 if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) &&
12066 ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
12067 {
12068 tree->gtFlags &= ~GTF_ASG;
12069 }
12070 }
12071
12072 if (tree->OperRequiresCallFlag(this))
12073 {
12074 tree->gtFlags |= GTF_CALL;
12075 }
12076 else
12077 {
12078 if (((op1 == nullptr) || ((op1->gtFlags & GTF_CALL) == 0)) &&
12079 ((op2 == nullptr) || ((op2->gtFlags & GTF_CALL) == 0)))
12080 {
12081 tree->gtFlags &= ~GTF_CALL;
12082 }
12083 }
12084 /*-------------------------------------------------------------------------
12085 * Now do POST-ORDER processing
12086 */
12087
12088 if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet())))
12089 {
12090 // The tree is really not GC but was marked as such. Now that the
12091 // children have been unmarked, unmark the tree too.
12092
12093 // Remember that GT_COMMA inherits it's type only from op2
12094 if (tree->gtOper == GT_COMMA)
12095 {
12096 tree->gtType = genActualType(op2->TypeGet());
12097 }
12098 else
12099 {
12100 tree->gtType = genActualType(op1->TypeGet());
12101 }
12102 }
12103
12104 GenTree* oldTree = tree;
12105
12106 GenTree* qmarkOp1 = nullptr;
12107 GenTree* qmarkOp2 = nullptr;
12108
12109 if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
12110 {
12111 qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
12112 qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
12113 }
12114
12115 // Try to fold it, maybe we get lucky,
12116 tree = gtFoldExpr(tree);
12117
12118 if (oldTree != tree)
12119 {
12120 /* if gtFoldExpr returned op1 or op2 then we are done */
12121 if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
12122 {
12123 return tree;
12124 }
12125
12126 /* If we created a comma-throw tree then we need to morph op1 */
12127 if (fgIsCommaThrow(tree))
12128 {
12129 tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
12130 fgMorphTreeDone(tree);
12131 return tree;
12132 }
12133
12134 return tree;
12135 }
12136 else if (tree->OperKind() & GTK_CONST)
12137 {
12138 return tree;
12139 }
12140
12141 /* gtFoldExpr could have used setOper to change the oper */
12142 oper = tree->OperGet();
12143 typ = tree->TypeGet();
12144
12145 /* gtFoldExpr could have changed op1 and op2 */
12146 op1 = tree->gtOp.gtOp1;
12147 op2 = tree->gtGetOp2IfPresent();
12148
12149 // Do we have an integer compare operation?
12150 //
12151 if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
12152 {
12153 // Are we comparing against zero?
12154 //
12155 if (op2->IsIntegralConst(0))
12156 {
12157 // Request that the codegen for op1 sets the condition flags
12158 // when it generates the code for op1.
12159 //
12160 // Codegen for op1 must set the condition flags if
12161 // this method returns true.
12162 //
12163 op1->gtRequestSetFlags();
12164 }
12165 }
12166 /*-------------------------------------------------------------------------
12167 * Perform the required oper-specific postorder morphing
12168 */
12169
12170 GenTree* temp;
12171 GenTree* cns1;
12172 GenTree* cns2;
12173 size_t ival1, ival2;
12174 GenTree* lclVarTree;
12175 GenTree* effectiveOp1;
12176 FieldSeqNode* fieldSeq = nullptr;
12177
12178 switch (oper)
12179 {
12180 case GT_ASG:
12181
12182 lclVarTree = fgIsIndirOfAddrOfLocal(op1);
12183 if (lclVarTree != nullptr)
12184 {
12185 lclVarTree->gtFlags |= GTF_VAR_DEF;
12186 }
12187
12188 effectiveOp1 = op1->gtEffectiveVal();
12189
12190 if (effectiveOp1->OperIsConst())
12191 {
12192 op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
12193 tree->gtOp.gtOp1 = op1;
12194 }
12195
12196 /* If we are storing a small type, we might be able to omit a cast */
12197 if ((effectiveOp1->gtOper == GT_IND) && varTypeIsSmall(effectiveOp1->TypeGet()))
12198 {
12199 if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
12200 {
12201 var_types castType = op2->CastToType();
12202
12203 // If we are performing a narrowing cast and
12204 // castType is larger or the same as op1's type
12205 // then we can discard the cast.
12206
12207 if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(effectiveOp1->TypeGet())))
12208 {
12209 tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
12210 }
12211 }
12212 else if (op2->OperIsCompare() && varTypeIsByte(effectiveOp1->TypeGet()))
12213 {
12214 /* We don't need to zero extend the setcc instruction */
12215 op2->gtType = TYP_BYTE;
12216 }
12217 }
12218 // If we introduced a CSE we may need to undo the optimization above
12219 // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
12220 // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
12221 else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
12222 {
12223 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
12224 LclVarDsc* varDsc = &lvaTable[varNum];
12225
12226 /* We again need to zero extend the setcc instruction */
12227 op2->gtType = varDsc->TypeGet();
12228 }
12229 fgAssignSetVarDef(tree);
12230
12231 /* We can't CSE the LHS of an assignment */
12232 /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
12233 if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
12234 {
12235 op1->gtFlags |= GTF_DONT_CSE;
12236 }
12237 break;
12238
12239 case GT_EQ:
12240 case GT_NE:
12241
12242 /* Make sure we're allowed to do this */
12243
12244 if (optValnumCSE_phase)
12245 {
12246 // It is not safe to reorder/delete CSE's
12247 break;
12248 }
12249
12250 cns2 = op2;
12251
12252 /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
12253
12254 if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
12255 {
12256 op1 = tree->gtOp.gtOp1;
12257
12258 /* Since this can occur repeatedly we use a while loop */
12259
12260 while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) &&
12261 (op1->gtType == TYP_INT) && (op1->gtOverflow() == false))
12262 {
12263 /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
12264
12265 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
12266 ival2 = cns2->gtIntCon.gtIconVal;
12267
12268 if (op1->gtOper == GT_ADD)
12269 {
12270 ival2 -= ival1;
12271 }
12272 else
12273 {
12274 ival2 += ival1;
12275 }
12276 cns2->gtIntCon.gtIconVal = ival2;
12277
12278#ifdef _TARGET_64BIT_
12279 // we need to properly re-sign-extend or truncate as needed.
12280 cns2->AsIntCon()->TruncateOrSignExtend32();
12281#endif // _TARGET_64BIT_
12282
12283 op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12284 }
12285 }
12286
12287 //
12288 // Here we look for the following tree
12289 //
12290 // EQ/NE
12291 // / \
12292 // op1 CNS 0/1
12293 //
12294 ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
12295
12296 // cast to unsigned allows test for both 0 and 1
12297 if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
12298 {
12299 ival2 = (size_t)cns2->gtIntConCommon.IconValue();
12300 }
12301 else // cast to UINT64 allows test for both 0 and 1
12302 if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
12303 {
12304 ival2 = (size_t)cns2->gtIntConCommon.LngValue();
12305 }
12306
12307 if (ival2 != INT_MAX)
12308 {
12309 // If we don't have a comma and relop, we can't do this optimization
12310 //
12311 if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
12312 {
12313 // Here we look for the following transformation
12314 //
12315 // EQ/NE Possible REVERSE(RELOP)
12316 // / \ / \
12317 // COMMA CNS 0/1 -> COMMA relop_op2
12318 // / \ / \
12319 // x RELOP x relop_op1
12320 // / \
12321 // relop_op1 relop_op2
12322 //
12323 //
12324 //
12325 GenTree* comma = op1;
12326 GenTree* relop = comma->gtOp.gtOp2;
12327
12328 GenTree* relop_op1 = relop->gtOp.gtOp1;
12329
12330 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12331
12332 if (reverse)
12333 {
12334 gtReverseCond(relop);
12335 }
12336
12337 relop->gtOp.gtOp1 = comma;
12338 comma->gtOp.gtOp2 = relop_op1;
12339
12340 // Comma now has fewer nodes underneath it, so we need to regenerate its flags
12341 comma->gtFlags &= ~GTF_ALL_EFFECT;
12342 comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
12343 comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
12344
12345 noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
12346 noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
12347 relop->gtFlags |=
12348 tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
12349
12350 return relop;
12351 }
12352
12353 if (op1->gtOper == GT_COMMA)
12354 {
12355 // Here we look for the following tree
12356 // and when the LCL_VAR is a temp we can fold the tree:
12357 //
12358 // EQ/NE EQ/NE
12359 // / \ / \
12360 // COMMA CNS 0/1 -> RELOP CNS 0/1
12361 // / \ / \
12362 // ASG LCL_VAR
12363 // / \
12364 // LCL_VAR RELOP
12365 // / \
12366 //
12367
12368 GenTree* asg = op1->gtOp.gtOp1;
12369 GenTree* lcl = op1->gtOp.gtOp2;
12370
12371 /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
12372 if (asg->gtOper != GT_ASG)
12373 {
12374 goto SKIP;
12375 }
12376
12377 /* The right side of the comma must be a LCL_VAR temp */
12378 if (lcl->gtOper != GT_LCL_VAR)
12379 {
12380 goto SKIP;
12381 }
12382
12383 unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
12384 noway_assert(lclNum < lvaCount);
12385
12386 /* If the LCL_VAR is not a temp then bail, a temp has a single def */
12387 if (!lvaTable[lclNum].lvIsTemp)
12388 {
12389 goto SKIP;
12390 }
12391
12392#if FEATURE_ANYCSE
12393 /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
12394 // Fix 383856 X86/ARM ILGEN
12395 if (lclNumIsCSE(lclNum))
12396 {
12397 goto SKIP;
12398 }
12399#endif
12400
12401 /* We also must be assigning the result of a RELOP */
12402 if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
12403 {
12404 goto SKIP;
12405 }
12406
12407 /* Both of the LCL_VAR must match */
12408 if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
12409 {
12410 goto SKIP;
12411 }
12412
12413 /* If right side of asg is not a RELOP then skip */
12414 if (!asg->gtOp.gtOp2->OperIsCompare())
12415 {
12416 goto SKIP;
12417 }
12418
12419 LclVarDsc* varDsc = lvaTable + lclNum;
12420
12421 /* Set op1 to the right side of asg, (i.e. the RELOP) */
12422 op1 = asg->gtOp.gtOp2;
12423
12424 DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
12425 DEBUG_DESTROY_NODE(lcl);
12426 }
12427
12428 if (op1->OperIsCompare())
12429 {
12430 // Here we look for the following tree
12431 //
12432 // EQ/NE -> RELOP/!RELOP
12433 // / \ / \
12434 // RELOP CNS 0/1
12435 // / \
12436 //
12437 // Note that we will remove/destroy the EQ/NE node and move
12438 // the RELOP up into it's location.
12439
12440 /* Here we reverse the RELOP if necessary */
12441
12442 bool reverse = ((ival2 == 0) == (oper == GT_EQ));
12443
12444 if (reverse)
12445 {
12446 gtReverseCond(op1);
12447 }
12448
12449 /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
12450 op1->gtType = tree->gtType;
12451
12452 noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
12453 op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
12454
12455 DEBUG_DESTROY_NODE(tree);
12456 return op1;
12457 }
12458
12459 //
12460 // Now we check for a compare with the result of an '&' operator
12461 //
12462 // Here we look for the following transformation:
12463 //
12464 // EQ/NE EQ/NE
12465 // / \ / \
12466 // AND CNS 0/1 -> AND CNS 0
12467 // / \ / \
12468 // RSZ/RSH CNS 1 x CNS (1 << y)
12469 // / \
12470 // x CNS_INT +y
12471
12472 if (op1->gtOper == GT_AND)
12473 {
12474 GenTree* andOp = op1;
12475 GenTree* rshiftOp = andOp->gtOp.gtOp1;
12476
12477 if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
12478 {
12479 goto SKIP;
12480 }
12481
12482 if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
12483 {
12484 goto SKIP;
12485 }
12486
12487 ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
12488
12489 if (shiftAmount < 0)
12490 {
12491 goto SKIP;
12492 }
12493
12494 if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
12495 {
12496 goto SKIP;
12497 }
12498
12499 if (andOp->gtType == TYP_INT)
12500 {
12501 if (shiftAmount > 31)
12502 {
12503 goto SKIP;
12504 }
12505
12506 UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
12507
12508 andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
12509
12510 // Reverse the cond if necessary
12511 if (ival2 == 1)
12512 {
12513 gtReverseCond(tree);
12514 cns2->gtIntCon.gtIconVal = 0;
12515 oper = tree->gtOper;
12516 }
12517 }
12518 else if (andOp->gtType == TYP_LONG)
12519 {
12520 if (shiftAmount > 63)
12521 {
12522 goto SKIP;
12523 }
12524
12525 UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
12526
12527 andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
12528
12529 // Reverse the cond if necessary
12530 if (ival2 == 1)
12531 {
12532 gtReverseCond(tree);
12533 cns2->gtIntConCommon.SetLngValue(0);
12534 oper = tree->gtOper;
12535 }
12536 }
12537
12538 andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
12539
12540 DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
12541 DEBUG_DESTROY_NODE(rshiftOp);
12542 }
12543 } // END if (ival2 != INT_MAX)
12544
12545 SKIP:
12546 /* Now check for compares with small constant longs that can be cast to int */
12547
12548 if (!cns2->OperIsConst())
12549 {
12550 goto COMPARE;
12551 }
12552
12553 if (cns2->TypeGet() != TYP_LONG)
12554 {
12555 goto COMPARE;
12556 }
12557
12558 /* Is the constant 31 bits or smaller? */
12559
12560 if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
12561 {
12562 goto COMPARE;
12563 }
12564
12565 /* Is the first comparand mask operation of type long ? */
12566
12567 if (op1->gtOper != GT_AND)
12568 {
12569 /* Another interesting case: cast from int */
12570
12571 if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
12572 !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
12573 !op1->gtOverflow()) // cannot be an overflow checking cast
12574 {
12575 /* Simply make this into an integer comparison */
12576
12577 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
12578 tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
12579 }
12580
12581 goto COMPARE;
12582 }
12583
12584 noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
12585
12586 /* Is the result of the mask effectively an INT ? */
12587
12588 GenTree* andMask;
12589 andMask = op1->gtOp.gtOp2;
12590 if (andMask->gtOper != GT_CNS_NATIVELONG)
12591 {
12592 goto COMPARE;
12593 }
12594 if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
12595 {
12596 goto COMPARE;
12597 }
12598
12599 /* Now we know that we can cast gtOp.gtOp1 of AND to int */
12600
12601 op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, false, TYP_INT);
12602
12603 /* now replace the mask node (gtOp.gtOp2 of AND node) */
12604
12605 noway_assert(andMask == op1->gtOp.gtOp2);
12606
12607 ival1 = (int)andMask->gtIntConCommon.LngValue();
12608 andMask->SetOper(GT_CNS_INT);
12609 andMask->gtType = TYP_INT;
12610 andMask->gtIntCon.gtIconVal = ival1;
12611
12612 /* now change the type of the AND node */
12613
12614 op1->gtType = TYP_INT;
12615
12616 /* finally we replace the comparand */
12617
12618 ival2 = (int)cns2->gtIntConCommon.LngValue();
12619 cns2->SetOper(GT_CNS_INT);
12620 cns2->gtType = TYP_INT;
12621
12622 noway_assert(cns2 == op2);
12623 cns2->gtIntCon.gtIconVal = ival2;
12624
12625 goto COMPARE;
12626
12627 case GT_LT:
12628 case GT_LE:
12629 case GT_GE:
12630 case GT_GT:
12631
12632 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
12633 {
12634 if (op2->gtOper == GT_CNS_INT)
12635 {
12636 cns2 = op2;
12637 /* Check for "expr relop 1" */
12638 if (cns2->IsIntegralConst(1))
12639 {
12640 /* Check for "expr >= 1" */
12641 if (oper == GT_GE)
12642 {
12643 /* Change to "expr > 0" */
12644 oper = GT_GT;
12645 goto SET_OPER;
12646 }
12647 /* Check for "expr < 1" */
12648 else if (oper == GT_LT)
12649 {
12650 /* Change to "expr <= 0" */
12651 oper = GT_LE;
12652 goto SET_OPER;
12653 }
12654 }
12655 /* Check for "expr relop -1" */
12656 else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
12657 {
12658 /* Check for "expr <= -1" */
12659 if (oper == GT_LE)
12660 {
12661 /* Change to "expr < 0" */
12662 oper = GT_LT;
12663 goto SET_OPER;
12664 }
12665 /* Check for "expr > -1" */
12666 else if (oper == GT_GT)
12667 {
12668 /* Change to "expr >= 0" */
12669 oper = GT_GE;
12670
12671 SET_OPER:
12672 // IF we get here we should be changing 'oper'
12673 assert(tree->OperGet() != oper);
12674
12675 // Keep the old ValueNumber for 'tree' as the new expr
12676 // will still compute the same value as before
12677 tree->SetOper(oper, GenTree::PRESERVE_VN);
12678 cns2->gtIntCon.gtIconVal = 0;
12679
12680 // vnStore is null before the ValueNumber phase has run
12681 if (vnStore != nullptr)
12682 {
12683 // Update the ValueNumber for 'cns2', as we just changed it to 0
12684 fgValueNumberTreeConst(cns2);
12685 }
12686
12687 op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
12688 }
12689 }
12690 }
12691 }
12692 else // we have an unsigned comparison
12693 {
12694 if (op2->IsIntegralConst(0))
12695 {
12696 if ((oper == GT_GT) || (oper == GT_LE))
12697 {
12698 // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
12699 // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
12700 // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
12701 // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
12702 // occurs as a result of branch inversion.
12703 oper = (oper == GT_LE) ? GT_EQ : GT_NE;
12704 tree->SetOper(oper, GenTree::PRESERVE_VN);
12705 tree->gtFlags &= ~GTF_UNSIGNED;
12706 }
12707 }
12708 }
12709
12710 COMPARE:
12711
12712 noway_assert(tree->OperKind() & GTK_RELOP);
12713 break;
12714
12715 case GT_MUL:
12716
12717#ifndef _TARGET_64BIT_
12718 if (typ == TYP_LONG)
12719 {
12720 // This must be GTF_MUL_64RSLT
12721 assert(tree->gtIsValid64RsltMul());
12722 return tree;
12723 }
12724#endif // _TARGET_64BIT_
12725 goto CM_OVF_OP;
12726
12727 case GT_SUB:
12728
12729 if (tree->gtOverflow())
12730 {
12731 goto CM_OVF_OP;
12732 }
12733
12734 // TODO #4104: there are a lot of other places where
12735 // this condition is not checked before transformations.
12736 if (fgGlobalMorph)
12737 {
12738 /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
12739
12740 noway_assert(op2);
12741 if (op2->IsCnsIntOrI())
12742 {
12743 /* Negate the constant and change the node to be "+" */
12744
12745 op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
12746 oper = GT_ADD;
12747 tree->ChangeOper(oper);
12748 goto CM_ADD_OP;
12749 }
12750
12751 /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
12752
12753 noway_assert(op1);
12754 if (op1->IsCnsIntOrI())
12755 {
12756 noway_assert(varTypeIsIntOrI(tree));
12757
12758 // The type of the new GT_NEG node cannot just be op2->TypeGet().
12759 // Otherwise we may sign-extend incorrectly in cases where the GT_NEG
12760 // node ends up feeding directly into a cast, for example in
12761 // GT_CAST<ubyte>(GT_SUB(0, s_1.ubyte))
12762 tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2);
12763 fgMorphTreeDone(op2);
12764
12765 oper = GT_ADD;
12766 tree->ChangeOper(oper);
12767 goto CM_ADD_OP;
12768 }
12769
12770 /* No match - exit */
12771 }
12772 break;
12773
12774#ifdef _TARGET_ARM64_
12775 case GT_DIV:
12776 if (!varTypeIsFloating(tree->gtType))
12777 {
12778 // Codegen for this instruction needs to be able to throw two exceptions:
12779 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
12780 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
12781 }
12782 break;
12783 case GT_UDIV:
12784 // Codegen for this instruction needs to be able to throw one exception:
12785 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
12786 break;
12787#endif
12788
12789 case GT_ADD:
12790
12791 CM_OVF_OP:
12792 if (tree->gtOverflow())
12793 {
12794 tree->gtRequestSetFlags();
12795
12796 // Add the excptn-throwing basic block to jump to on overflow
12797
12798 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
12799
12800 // We can't do any commutative morphing for overflow instructions
12801
12802 break;
12803 }
12804
12805 CM_ADD_OP:
12806
12807 case GT_OR:
12808 case GT_XOR:
12809 case GT_AND:
12810
12811 /* Commute any non-REF constants to the right */
12812
12813 noway_assert(op1);
12814 if (op1->OperIsConst() && (op1->gtType != TYP_REF))
12815 {
12816 // TODO-Review: We used to assert here that
12817 // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
12818 // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
12819 // and would sometimes hit this assertion. This may indicate a missed "remorph".
12820 // Task is to re-enable this assertion and investigate.
12821
12822 /* Swap the operands */
12823 tree->gtOp.gtOp1 = op2;
12824 tree->gtOp.gtOp2 = op1;
12825
12826 op1 = op2;
12827 op2 = tree->gtOp.gtOp2;
12828 }
12829
12830 /* See if we can fold GT_ADD nodes. */
12831
12832 if (oper == GT_ADD)
12833 {
12834 /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
12835
12836 if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
12837 op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
12838 !op1->gtOverflow() && !op2->gtOverflow())
12839 {
12840 cns1 = op1->gtOp.gtOp2;
12841 cns2 = op2->gtOp.gtOp2;
12842 cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
12843#ifdef _TARGET_64BIT_
12844 if (cns1->TypeGet() == TYP_INT)
12845 {
12846 // we need to properly re-sign-extend or truncate after adding two int constants above
12847 cns1->AsIntCon()->TruncateOrSignExtend32();
12848 }
12849#endif //_TARGET_64BIT_
12850
12851 tree->gtOp.gtOp2 = cns1;
12852 DEBUG_DESTROY_NODE(cns2);
12853
12854 op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
12855 op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
12856 DEBUG_DESTROY_NODE(op2);
12857 op2 = tree->gtOp.gtOp2;
12858 }
12859
12860 if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
12861 {
12862 /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
12863 CLANG_FORMAT_COMMENT_ANCHOR;
12864
12865 if (op1->gtOper == GT_ADD && //
12866 !gtIsActiveCSE_Candidate(op1) && //
12867 !op1->gtOverflow() && //
12868 op1->gtOp.gtOp2->IsCnsIntOrI() && //
12869 (op1->gtOp.gtOp2->OperGet() == op2->OperGet()) && //
12870 (op1->gtOp.gtOp2->TypeGet() != TYP_REF) && // Don't fold REFs
12871 (op2->TypeGet() != TYP_REF)) // Don't fold REFs
12872 {
12873 cns1 = op1->gtOp.gtOp2;
12874 op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
12875 op2->gtIntConCommon.IconValue());
12876#ifdef _TARGET_64BIT_
12877 if (op2->TypeGet() == TYP_INT)
12878 {
12879 // we need to properly re-sign-extend or truncate after adding two int constants above
12880 op2->AsIntCon()->TruncateOrSignExtend32();
12881 }
12882#endif //_TARGET_64BIT_
12883
12884 if (cns1->OperGet() == GT_CNS_INT)
12885 {
12886 op2->gtIntCon.gtFieldSeq =
12887 GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
12888 }
12889 DEBUG_DESTROY_NODE(cns1);
12890
12891 tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
12892 DEBUG_DESTROY_NODE(op1);
12893 op1 = tree->gtOp.gtOp1;
12894 }
12895
12896 // Fold (x + 0).
12897
12898 if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
12899 {
12900
12901 // If this addition is adding an offset to a null pointer,
12902 // avoid the work and yield the null pointer immediately.
12903 // Dereferencing the pointer in either case will have the
12904 // same effect.
12905
12906 if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
12907 ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
12908 {
12909 op2->gtType = tree->gtType;
12910 DEBUG_DESTROY_NODE(op1);
12911 DEBUG_DESTROY_NODE(tree);
12912 return op2;
12913 }
12914
12915 // Remove the addition iff it won't change the tree type
12916 // to TYP_REF.
12917
12918 if (!gtIsActiveCSE_Candidate(op2) &&
12919 ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
12920 {
12921 if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
12922 (op2->gtIntCon.gtFieldSeq != nullptr) &&
12923 (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
12924 {
12925 fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
12926 }
12927
12928 DEBUG_DESTROY_NODE(op2);
12929 DEBUG_DESTROY_NODE(tree);
12930
12931 return op1;
12932 }
12933 }
12934 }
12935 }
12936 /* See if we can fold GT_MUL by const nodes */
12937 else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
12938 {
12939#ifndef _TARGET_64BIT_
12940 noway_assert(typ <= TYP_UINT);
12941#endif // _TARGET_64BIT_
12942 noway_assert(!tree->gtOverflow());
12943
12944 ssize_t mult = op2->gtIntConCommon.IconValue();
12945 bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12946 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
12947
12948 assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
12949
12950 if (mult == 0)
12951 {
12952 // We may be able to throw away op1 (unless it has side-effects)
12953
12954 if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
12955 {
12956 DEBUG_DESTROY_NODE(op1);
12957 DEBUG_DESTROY_NODE(tree);
12958 return op2; // Just return the "0" node
12959 }
12960
12961 // We need to keep op1 for the side-effects. Hang it off
12962 // a GT_COMMA node
12963
12964 tree->ChangeOper(GT_COMMA);
12965 return tree;
12966 }
12967
12968 size_t abs_mult = (mult >= 0) ? mult : -mult;
12969 size_t lowestBit = genFindLowestBit(abs_mult);
12970 bool changeToShift = false;
12971
12972 // is it a power of two? (positive or negative)
12973 if (abs_mult == lowestBit)
12974 {
12975 // if negative negate (min-int does not need negation)
12976 if (mult < 0 && mult != SSIZE_T_MIN)
12977 {
12978 // The type of the new GT_NEG node cannot just be op1->TypeGet().
12979 // Otherwise we may sign-extend incorrectly in cases where the GT_NEG
12980 // node ends up feeding directly a cast, for example in
12981 // GT_CAST<ubyte>(GT_MUL(-1, s_1.ubyte))
12982 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1);
12983 fgMorphTreeDone(op1);
12984 }
12985
12986 // If "op2" is a constant array index, the other multiplicand must be a constant.
12987 // Transfer the annotation to the other one.
12988 if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
12989 op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
12990 {
12991 assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
12992 GenTree* otherOp = op1;
12993 if (otherOp->OperGet() == GT_NEG)
12994 {
12995 otherOp = otherOp->gtOp.gtOp1;
12996 }
12997 assert(otherOp->OperGet() == GT_CNS_INT);
12998 assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
12999 otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
13000 }
13001
13002 if (abs_mult == 1)
13003 {
13004 DEBUG_DESTROY_NODE(op2);
13005 DEBUG_DESTROY_NODE(tree);
13006 return op1;
13007 }
13008
13009 /* Change the multiplication into a shift by log2(val) bits */
13010 op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
13011 changeToShift = true;
13012 }
13013#if LEA_AVAILABLE
13014 else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
13015 {
13016 int shift = genLog2(lowestBit);
13017 ssize_t factor = abs_mult >> shift;
13018
13019 if (factor == 3 || factor == 5 || factor == 9)
13020 {
13021 // if negative negate (min-int does not need negation)
13022 if (mult < 0 && mult != SSIZE_T_MIN)
13023 {
13024 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1);
13025 fgMorphTreeDone(op1);
13026 }
13027
13028 GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
13029 if (op2IsConstIndex)
13030 {
13031 factorIcon->AsIntCon()->gtFieldSeq =
13032 GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
13033 }
13034
13035 // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
13036 tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
13037 fgMorphTreeDone(op1);
13038
13039 op2->gtIntConCommon.SetIconValue(shift);
13040 changeToShift = true;
13041 }
13042 }
13043#endif // LEA_AVAILABLE
13044 if (changeToShift)
13045 {
13046 // vnStore is null before the ValueNumber phase has run
13047 if (vnStore != nullptr)
13048 {
13049 // Update the ValueNumber for 'op2', as we just changed the constant
13050 fgValueNumberTreeConst(op2);
13051 }
13052 oper = GT_LSH;
13053 // Keep the old ValueNumber for 'tree' as the new expr
13054 // will still compute the same value as before
13055 tree->ChangeOper(oper, GenTree::PRESERVE_VN);
13056
13057 goto DONE_MORPHING_CHILDREN;
13058 }
13059 }
13060 else if (fgOperIsBitwiseRotationRoot(oper))
13061 {
13062 tree = fgRecognizeAndMorphBitwiseRotation(tree);
13063
13064 // fgRecognizeAndMorphBitwiseRotation may return a new tree
13065 oper = tree->OperGet();
13066 typ = tree->TypeGet();
13067 op1 = tree->gtOp.gtOp1;
13068 op2 = tree->gtOp.gtOp2;
13069 }
13070
13071 break;
13072
13073 case GT_NOT:
13074 case GT_NEG:
13075
13076 /* Any constant cases should have been folded earlier */
13077 noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
13078 break;
13079
13080 case GT_CKFINITE:
13081
13082 noway_assert(varTypeIsFloating(op1->TypeGet()));
13083
13084 fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN);
13085 break;
13086
13087 case GT_OBJ:
13088 // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
13089 // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
13090 // is a local or clsVar, even if it has been address-exposed.
13091 if (op1->OperGet() == GT_ADDR)
13092 {
13093 tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
13094 }
13095 break;
13096
13097 case GT_IND:
13098
13099 // Can not remove a GT_IND if it is currently a CSE candidate.
13100 if (gtIsActiveCSE_Candidate(tree))
13101 {
13102 break;
13103 }
13104
13105 bool foldAndReturnTemp;
13106 foldAndReturnTemp = false;
13107 temp = nullptr;
13108 ival1 = 0;
13109
13110 // Don't remove a volatile GT_IND, even if the address points to a local variable.
13111 if ((tree->gtFlags & GTF_IND_VOLATILE) == 0)
13112 {
13113 /* Try to Fold *(&X) into X */
13114 if (op1->gtOper == GT_ADDR)
13115 {
13116 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13117 if (gtIsActiveCSE_Candidate(op1))
13118 {
13119 break;
13120 }
13121
13122 temp = op1->gtOp.gtOp1; // X
13123
13124 // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
13125 // they are the *same* struct type. In fact, they almost certainly aren't. If the
13126 // address has an associated field sequence, that identifies this case; go through
13127 // the "lcl_fld" path rather than this one.
13128 FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
13129 if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
13130 {
13131 foldAndReturnTemp = true;
13132 }
13133 else if (temp->OperIsLocal())
13134 {
13135 unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
13136 LclVarDsc* varDsc = &lvaTable[lclNum];
13137
13138 // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
13139 if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
13140 {
13141 noway_assert(varTypeIsStruct(varDsc));
13142
13143 // We will try to optimize when we have a single field struct that is being struct promoted
13144 if (varDsc->lvFieldCnt == 1)
13145 {
13146 unsigned lclNumFld = varDsc->lvFieldLclStart;
13147 // just grab the promoted field
13148 LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
13149
13150 // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
13151 // is zero
13152 if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
13153 {
13154 // We can just use the existing promoted field LclNum
13155 temp->gtLclVarCommon.SetLclNum(lclNumFld);
13156 temp->gtType = fieldVarDsc->TypeGet();
13157
13158 foldAndReturnTemp = true;
13159 }
13160 }
13161 }
13162 // If the type of the IND (typ) is a "small int", and the type of the local has the
13163 // same width, then we can reduce to just the local variable -- it will be
13164 // correctly normalized, and signed/unsigned differences won't matter.
13165 //
13166 // The below transformation cannot be applied if the local var needs to be normalized on load.
13167 else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
13168 !lvaTable[lclNum].lvNormalizeOnLoad())
13169 {
13170 tree->gtType = typ = temp->TypeGet();
13171 foldAndReturnTemp = true;
13172 }
13173 else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) &&
13174 !lvaTable[lclNum].lvNormalizeOnLoad())
13175 {
13176 tree->gtType = typ = temp->TypeGet();
13177 foldAndReturnTemp = true;
13178 }
13179 else
13180 {
13181 // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
13182 // nullptr)
13183 assert(fieldSeq == nullptr);
13184 bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
13185 assert(b || fieldSeq == nullptr);
13186
13187 if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
13188 {
13189 // Append the field sequence, change the type.
13190 temp->AsLclFld()->gtFieldSeq =
13191 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13192 temp->gtType = typ;
13193
13194 foldAndReturnTemp = true;
13195 }
13196 }
13197 // Otherwise will will fold this into a GT_LCL_FLD below
13198 // where we check (temp != nullptr)
13199 }
13200 else // !temp->OperIsLocal()
13201 {
13202 // We don't try to fold away the GT_IND/GT_ADDR for this case
13203 temp = nullptr;
13204 }
13205 }
13206 else if (op1->OperGet() == GT_ADD)
13207 {
13208 /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
13209
13210 if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
13211 opts.OptimizationEnabled())
13212 {
13213 // No overflow arithmetic with pointers
13214 noway_assert(!op1->gtOverflow());
13215
13216 temp = op1->gtOp.gtOp1->gtOp.gtOp1;
13217 if (!temp->OperIsLocal())
13218 {
13219 temp = nullptr;
13220 break;
13221 }
13222
13223 // Can not remove the GT_ADDR if it is currently a CSE candidate.
13224 if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
13225 {
13226 break;
13227 }
13228
13229 ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
13230 fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
13231
13232 // Does the address have an associated zero-offset field sequence?
13233 FieldSeqNode* addrFieldSeq = nullptr;
13234 if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
13235 {
13236 fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
13237 }
13238
13239 if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
13240 {
13241 noway_assert(!varTypeIsGC(temp->TypeGet()));
13242 foldAndReturnTemp = true;
13243 }
13244 else
13245 {
13246 // The emitter can't handle large offsets
13247 if (ival1 != (unsigned short)ival1)
13248 {
13249 break;
13250 }
13251
13252 // The emitter can get confused by invalid offsets
13253 if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
13254 {
13255 break;
13256 }
13257
13258#ifdef _TARGET_ARM_
13259 // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
13260 //
13261 if (varTypeIsFloating(typ))
13262 {
13263 if ((ival1 % emitTypeSize(typ)) != 0)
13264 {
13265 tree->gtFlags |= GTF_IND_UNALIGNED;
13266 break;
13267 }
13268 }
13269#endif
13270 }
13271 // Now we can fold this into a GT_LCL_FLD below
13272 // where we check (temp != nullptr)
13273 }
13274 }
13275 }
13276
13277 // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
13278 // - We may have a load of a local where the load has a different type than the local
13279 // - We may have a load of a local plus an offset
13280 //
13281 // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
13282 // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
13283 // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
13284 // out-of-bounds w.r.t. the local).
13285 if ((temp != nullptr) && !foldAndReturnTemp)
13286 {
13287 assert(temp->OperIsLocal());
13288
13289 const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
13290 LclVarDsc* const varDsc = &lvaTable[lclNum];
13291
13292 const var_types tempTyp = temp->TypeGet();
13293 const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
13294 const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
13295
13296 // Make sure we do not enregister this lclVar.
13297 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
13298
13299 // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
13300 // a lclFld: the access represented by an lclFld node must begin at or after the start of the
13301 // lclVar and must not extend beyond the end of the lclVar.
13302 if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
13303 {
13304 // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
13305 // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
13306 // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
13307 //
13308 if (temp->OperGet() == GT_LCL_FLD)
13309 {
13310 temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
13311 temp->AsLclFld()->gtFieldSeq =
13312 GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
13313 }
13314 else
13315 {
13316 temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
13317 temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
13318 if (fieldSeq != nullptr)
13319 { // If it does represent a field, note that.
13320 temp->AsLclFld()->gtFieldSeq = fieldSeq;
13321 }
13322 }
13323 temp->gtType = tree->gtType;
13324 foldAndReturnTemp = true;
13325 }
13326 }
13327
13328 if (foldAndReturnTemp)
13329 {
13330 assert(temp != nullptr);
13331 assert(temp->TypeGet() == typ);
13332 assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
13333
13334 // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
13335 // 'temp' because a GT_ADDR always marks it for its operand.
13336 temp->gtFlags &= ~GTF_DONT_CSE;
13337 temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
13338
13339 if (op1->OperGet() == GT_ADD)
13340 {
13341 DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
13342 DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
13343 }
13344 DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
13345 DEBUG_DESTROY_NODE(tree); // GT_IND
13346
13347 // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
13348 // normalization.
13349 if (temp->OperIs(GT_LCL_VAR))
13350 {
13351#ifdef DEBUG
13352 // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
13353 // and the node in question must have this bit set (as it has already been morphed).
13354 temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
13355#endif // DEBUG
13356 const bool forceRemorph = true;
13357 temp = fgMorphLocalVar(temp, forceRemorph);
13358#ifdef DEBUG
13359 // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
13360 // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
13361 // returns.
13362 temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13363#endif // DEBUG
13364 }
13365
13366 return temp;
13367 }
13368
13369 // Only do this optimization when we are in the global optimizer. Doing this after value numbering
13370 // could result in an invalid value number for the newly generated GT_IND node.
13371 if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
13372 {
13373 // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
13374 // TBD: this transformation is currently necessary for correctness -- it might
13375 // be good to analyze the failures that result if we don't do this, and fix them
13376 // in other ways. Ideally, this should be optional.
13377 GenTree* commaNode = op1;
13378 unsigned treeFlags = tree->gtFlags;
13379 commaNode->gtType = typ;
13380 commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
13381 // dangerous, clear the GTF_REVERSE_OPS at
13382 // least.
13383#ifdef DEBUG
13384 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13385#endif
13386 while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
13387 {
13388 commaNode = commaNode->gtOp.gtOp2;
13389 commaNode->gtType = typ;
13390 commaNode->gtFlags =
13391 (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG & ~GTF_CALL); // Bashing the GT_COMMA flags here is
13392 // dangerous, clear the GTF_REVERSE_OPS, GT_ASG, and GT_CALL at
13393 // least.
13394 commaNode->gtFlags |=
13395 ((commaNode->gtOp.gtOp1->gtFlags | commaNode->gtOp.gtOp2->gtFlags) & (GTF_ASG | GTF_CALL));
13396#ifdef DEBUG
13397 commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13398#endif
13399 }
13400 bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
13401 ArrayInfo arrInfo;
13402 if (wasArrIndex)
13403 {
13404 bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
13405 assert(b);
13406 GetArrayInfoMap()->Remove(tree);
13407 }
13408 tree = op1;
13409 GenTree* addr = commaNode->gtOp.gtOp2;
13410 op1 = gtNewIndir(typ, addr);
13411 // This is very conservative
13412 op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING;
13413 op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
13414
13415 if (wasArrIndex)
13416 {
13417 GetArrayInfoMap()->Set(op1, arrInfo);
13418 }
13419#ifdef DEBUG
13420 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13421#endif
13422 commaNode->gtOp.gtOp2 = op1;
13423 commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
13424 return tree;
13425 }
13426
13427 break;
13428
13429 case GT_ADDR:
13430
13431 // Can not remove op1 if it is currently a CSE candidate.
13432 if (gtIsActiveCSE_Candidate(op1))
13433 {
13434 break;
13435 }
13436
13437 if (op1->OperGet() == GT_IND)
13438 {
13439 if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
13440 {
13441 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13442 if (gtIsActiveCSE_Candidate(tree))
13443 {
13444 break;
13445 }
13446
13447 // Perform the transform ADDR(IND(...)) == (...).
13448 GenTree* addr = op1->gtOp.gtOp1;
13449
13450 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13451
13452 DEBUG_DESTROY_NODE(op1);
13453 DEBUG_DESTROY_NODE(tree);
13454
13455 return addr;
13456 }
13457 }
13458 else if (op1->OperGet() == GT_OBJ)
13459 {
13460 // Can not remove a GT_ADDR if it is currently a CSE candidate.
13461 if (gtIsActiveCSE_Candidate(tree))
13462 {
13463 break;
13464 }
13465
13466 // Perform the transform ADDR(OBJ(...)) == (...).
13467 GenTree* addr = op1->AsObj()->Addr();
13468
13469 noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
13470
13471 DEBUG_DESTROY_NODE(op1);
13472 DEBUG_DESTROY_NODE(tree);
13473
13474 return addr;
13475 }
13476 else if (op1->gtOper == GT_CAST)
13477 {
13478 GenTree* casting = op1->gtCast.CastOp();
13479 if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
13480 {
13481 DEBUG_DESTROY_NODE(op1);
13482 tree->gtOp.gtOp1 = op1 = casting;
13483 }
13484 }
13485 else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
13486 {
13487 // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
13488 // (Be sure to mark "z" as an l-value...)
13489
13490 GenTreePtrStack commas(getAllocator(CMK_ArrayStack));
13491 for (GenTree* comma = op1; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2())
13492 {
13493 commas.Push(comma);
13494 }
13495 GenTree* commaNode = commas.Top();
13496
13497 // The top-level addr might be annotated with a zeroOffset field.
13498 FieldSeqNode* zeroFieldSeq = nullptr;
13499 bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
13500 tree = op1;
13501 commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
13502
13503 // If the node we're about to put under a GT_ADDR is an indirection, it
13504 // doesn't need to be materialized, since we only want the addressing mode. Because
13505 // of this, this GT_IND is not a faulting indirection and we don't have to extract it
13506 // as a side effect.
13507 GenTree* commaOp2 = commaNode->gtOp.gtOp2;
13508 if (commaOp2->OperIsBlk())
13509 {
13510 commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
13511 }
13512 if (commaOp2->gtOper == GT_IND)
13513 {
13514 commaOp2->gtFlags |= GTF_IND_NONFAULTING;
13515 commaOp2->gtFlags &= ~GTF_EXCEPT;
13516 commaOp2->gtFlags |= (commaOp2->gtOp.gtOp1->gtFlags & GTF_EXCEPT);
13517 }
13518
13519 op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
13520
13521 if (isZeroOffset)
13522 {
13523 // Transfer the annotation to the new GT_ADDR node.
13524 GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
13525 }
13526 commaNode->gtOp.gtOp2 = op1;
13527 // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
13528 // might give op1 a type different from byref (like, say, native int). So now go back and give
13529 // all the comma nodes the type of op1.
13530 // TODO: the comma flag update below is conservative and can be improved.
13531 // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
13532 // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
13533
13534 while (!commas.Empty())
13535 {
13536 GenTree* comma = commas.Pop();
13537 comma->gtType = op1->gtType;
13538 comma->gtFlags |= op1->gtFlags;
13539#ifdef DEBUG
13540 comma->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
13541#endif
13542 gtUpdateNodeSideEffects(comma);
13543 }
13544
13545 return tree;
13546 }
13547
13548 /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
13549 op1->gtFlags |= GTF_DONT_CSE;
13550 break;
13551
13552 case GT_COLON:
13553 if (fgGlobalMorph)
13554 {
13555 /* Mark the nodes that are conditionally executed */
13556 fgWalkTreePre(&tree, gtMarkColonCond);
13557 }
13558 /* Since we're doing this postorder we clear this if it got set by a child */
13559 fgRemoveRestOfBlock = false;
13560 break;
13561
13562 case GT_COMMA:
13563
13564 /* Special case: trees that don't produce a value */
13565 if (op2->OperIs(GT_ASG) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2))
13566 {
13567 typ = tree->gtType = TYP_VOID;
13568 }
13569
13570 // If we are in the Valuenum CSE phase then don't morph away anything as these
13571 // nodes may have CSE defs/uses in them.
13572 //
13573 if (!optValnumCSE_phase)
13574 {
13575 // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
13576 // is all we need.
13577
13578 GenTree* op1SideEffects = nullptr;
13579 // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
13580 // hoisted expressions in loops.
13581 gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
13582 if (op1SideEffects)
13583 {
13584 // Replace the left hand side with the side effect list.
13585 tree->gtOp.gtOp1 = op1SideEffects;
13586 gtUpdateNodeSideEffects(tree);
13587 }
13588 else
13589 {
13590 op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13591 DEBUG_DESTROY_NODE(tree);
13592 DEBUG_DESTROY_NODE(op1);
13593 return op2;
13594 }
13595
13596 /* If the right operand is just a void nop node, throw it away */
13597 if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
13598 {
13599 op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
13600 DEBUG_DESTROY_NODE(tree);
13601 DEBUG_DESTROY_NODE(op2);
13602 return op1;
13603 }
13604 }
13605
13606 break;
13607
13608 case GT_JTRUE:
13609
13610 /* Special case if fgRemoveRestOfBlock is set to true */
13611 if (fgRemoveRestOfBlock)
13612 {
13613 if (fgIsCommaThrow(op1, true))
13614 {
13615 GenTree* throwNode = op1->gtOp.gtOp1;
13616 noway_assert(throwNode->gtType == TYP_VOID);
13617
13618 JITDUMP("Removing [%06d] GT_JTRUE as the block now unconditionally throws an exception.\n",
13619 dspTreeID(tree));
13620 DEBUG_DESTROY_NODE(tree);
13621
13622 return throwNode;
13623 }
13624
13625 noway_assert(op1->OperKind() & GTK_RELOP);
13626 noway_assert(op1->gtFlags & GTF_EXCEPT);
13627
13628 // We need to keep op1 for the side-effects. Hang it off
13629 // a GT_COMMA node
13630
13631 JITDUMP("Keeping side-effects by bashing [%06d] GT_JTRUE into a GT_COMMA.\n", dspTreeID(tree));
13632
13633 tree->ChangeOper(GT_COMMA);
13634 tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
13635
13636 // Additionally since we're eliminating the JTRUE
13637 // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
13638 // So we change it into a GT_COMMA as well.
13639 JITDUMP("Also bashing [%06d] (a relop) into a GT_COMMA.\n", dspTreeID(op1));
13640 op1->ChangeOper(GT_COMMA);
13641 op1->gtFlags &= ~GTF_UNSIGNED; // Clear the unsigned flag if it was set on the relop
13642 op1->gtType = op1->gtOp.gtOp1->gtType;
13643
13644 return tree;
13645 }
13646
13647 default:
13648 break;
13649 }
13650
13651 assert(oper == tree->gtOper);
13652
13653 // If we are in the Valuenum CSE phase then don't morph away anything as these
13654 // nodes may have CSE defs/uses in them.
13655 //
13656 if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
13657 {
13658 /* Check for op1 as a GT_COMMA with a unconditional throw node */
13659 if (op1 && fgIsCommaThrow(op1, true))
13660 {
13661 if ((op1->gtFlags & GTF_COLON_COND) == 0)
13662 {
13663 /* We can safely throw out the rest of the statements */
13664 fgRemoveRestOfBlock = true;
13665 }
13666
13667 GenTree* throwNode = op1->gtOp.gtOp1;
13668 noway_assert(throwNode->gtType == TYP_VOID);
13669
13670 if (oper == GT_COMMA)
13671 {
13672 /* Both tree and op1 are GT_COMMA nodes */
13673 /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
13674 tree->gtOp.gtOp1 = throwNode;
13675
13676 // Possibly reset the assignment flag
13677 if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0)))
13678 {
13679 tree->gtFlags &= ~GTF_ASG;
13680 }
13681
13682 return tree;
13683 }
13684 else if (oper != GT_NOP)
13685 {
13686 if (genActualType(typ) == genActualType(op1->gtType))
13687 {
13688 /* The types match so, return the comma throw node as the new tree */
13689 return op1;
13690 }
13691 else
13692 {
13693 if (typ == TYP_VOID)
13694 {
13695 // Return the throw node
13696 return throwNode;
13697 }
13698 else
13699 {
13700 GenTree* commaOp2 = op1->gtOp.gtOp2;
13701
13702 // need type of oper to be same as tree
13703 if (typ == TYP_LONG)
13704 {
13705 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13706 commaOp2->gtIntConCommon.SetLngValue(0);
13707 /* Change the types of oper and commaOp2 to TYP_LONG */
13708 op1->gtType = commaOp2->gtType = TYP_LONG;
13709 }
13710 else if (varTypeIsFloating(typ))
13711 {
13712 commaOp2->ChangeOperConst(GT_CNS_DBL);
13713 commaOp2->gtDblCon.gtDconVal = 0.0;
13714 /* Change the types of oper and commaOp2 to TYP_DOUBLE */
13715 op1->gtType = commaOp2->gtType = TYP_DOUBLE;
13716 }
13717 else
13718 {
13719 commaOp2->ChangeOperConst(GT_CNS_INT);
13720 commaOp2->gtIntConCommon.SetIconValue(0);
13721 /* Change the types of oper and commaOp2 to TYP_INT */
13722 op1->gtType = commaOp2->gtType = TYP_INT;
13723 }
13724
13725 /* Return the GT_COMMA node as the new tree */
13726 return op1;
13727 }
13728 }
13729 }
13730 }
13731
13732 /* Check for op2 as a GT_COMMA with a unconditional throw */
13733
13734 if (op2 && fgIsCommaThrow(op2, true))
13735 {
13736 if ((op2->gtFlags & GTF_COLON_COND) == 0)
13737 {
13738 /* We can safely throw out the rest of the statements */
13739 fgRemoveRestOfBlock = true;
13740 }
13741
13742 // If op1 has no side-effects
13743 if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
13744 {
13745 // If tree is an asg node
13746 if (tree->OperIs(GT_ASG))
13747 {
13748 /* Return the throw node as the new tree */
13749 return op2->gtOp.gtOp1;
13750 }
13751
13752 if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
13753 {
13754 /* Return the throw node as the new tree */
13755 return op2->gtOp.gtOp1;
13756 }
13757
13758 // If tree is a comma node
13759 if (tree->OperGet() == GT_COMMA)
13760 {
13761 /* Return the throw node as the new tree */
13762 return op2->gtOp.gtOp1;
13763 }
13764
13765 /* for the shift nodes the type of op2 can differ from the tree type */
13766 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
13767 {
13768 noway_assert(GenTree::OperIsShiftOrRotate(oper));
13769
13770 GenTree* commaOp2 = op2->gtOp.gtOp2;
13771
13772 commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
13773 commaOp2->gtIntConCommon.SetLngValue(0);
13774
13775 /* Change the types of oper and commaOp2 to TYP_LONG */
13776 op2->gtType = commaOp2->gtType = TYP_LONG;
13777 }
13778
13779 if ((genActualType(typ) == TYP_INT) &&
13780 (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
13781 {
13782 // An example case is comparison (say GT_GT) of two longs or floating point values.
13783
13784 GenTree* commaOp2 = op2->gtOp.gtOp2;
13785
13786 commaOp2->ChangeOperConst(GT_CNS_INT);
13787 commaOp2->gtIntCon.gtIconVal = 0;
13788 /* Change the types of oper and commaOp2 to TYP_INT */
13789 op2->gtType = commaOp2->gtType = TYP_INT;
13790 }
13791
13792 if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
13793 {
13794 noway_assert(tree->OperGet() == GT_ADD);
13795
13796 GenTree* commaOp2 = op2->gtOp.gtOp2;
13797
13798 commaOp2->ChangeOperConst(GT_CNS_INT);
13799 commaOp2->gtIntCon.gtIconVal = 0;
13800 /* Change the types of oper and commaOp2 to TYP_BYREF */
13801 op2->gtType = commaOp2->gtType = TYP_BYREF;
13802 }
13803
13804 /* types should now match */
13805 noway_assert((genActualType(typ) == genActualType(op2->gtType)));
13806
13807 /* Return the GT_COMMA node as the new tree */
13808 return op2;
13809 }
13810 }
13811 }
13812
13813 /*-------------------------------------------------------------------------
13814 * Optional morphing is done if tree transformations is permitted
13815 */
13816
13817 if ((opts.compFlags & CLFLG_TREETRANS) == 0)
13818 {
13819 return tree;
13820 }
13821
13822 tree = fgMorphSmpOpOptional(tree->AsOp());
13823
13824 return tree;
13825}
13826#ifdef _PREFAST_
13827#pragma warning(pop)
13828#endif
13829
13830GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
13831{
13832 genTreeOps oper = tree->gtOper;
13833 GenTree* op1 = tree->gtOp1;
13834 GenTree* op2 = tree->gtOp2;
13835 var_types typ = tree->TypeGet();
13836
13837 if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
13838 {
13839 /* Swap the operands so that the more expensive one is 'op1' */
13840
13841 if (tree->gtFlags & GTF_REVERSE_OPS)
13842 {
13843 tree->gtOp1 = op2;
13844 tree->gtOp2 = op1;
13845
13846 op2 = op1;
13847 op1 = tree->gtOp1;
13848
13849 tree->gtFlags &= ~GTF_REVERSE_OPS;
13850 }
13851
13852 if (oper == op2->gtOper)
13853 {
13854 /* Reorder nested operators at the same precedence level to be
13855 left-recursive. For example, change "(a+(b+c))" to the
13856 equivalent expression "((a+b)+c)".
13857 */
13858
13859 /* Things are handled differently for floating-point operators */
13860
13861 if (!varTypeIsFloating(tree->TypeGet()))
13862 {
13863 fgMoveOpsLeft(tree);
13864 op1 = tree->gtOp1;
13865 op2 = tree->gtOp2;
13866 }
13867 }
13868 }
13869
13870#if REARRANGE_ADDS
13871
13872 /* Change "((x+icon)+y)" to "((x+y)+icon)"
13873 Don't reorder floating-point operations */
13874
13875 if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
13876 varTypeIsIntegralOrI(typ))
13877 {
13878 GenTree* ad2 = op1->gtOp.gtOp2;
13879
13880 if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
13881 {
13882 // This takes
13883 // + (tree)
13884 // / \
13885 // / \
13886 // / \
13887 // + (op1) op2
13888 // / \
13889 // \
13890 // ad2
13891 //
13892 // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
13893 // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
13894 // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
13895 // type as (tree).
13896 //
13897 // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
13898 // necessary
13899
13900 if (varTypeIsGC(op2->TypeGet()))
13901 {
13902 noway_assert(varTypeIsGC(typ));
13903 op1->gtType = typ;
13904 }
13905 tree->gtOp2 = ad2;
13906
13907 op1->gtOp.gtOp2 = op2;
13908 op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
13909
13910 op2 = tree->gtOp2;
13911 }
13912 }
13913
13914#endif
13915
13916 /*-------------------------------------------------------------------------
13917 * Perform optional oper-specific postorder morphing
13918 */
13919
13920 switch (oper)
13921 {
13922 case GT_ASG:
13923 if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
13924 {
13925 if (tree->OperIsCopyBlkOp())
13926 {
13927 return fgMorphCopyBlock(tree);
13928 }
13929 else
13930 {
13931 return fgMorphInitBlock(tree);
13932 }
13933 }
13934
13935 if (typ == TYP_LONG)
13936 {
13937 break;
13938 }
13939
13940 /* Make sure we're allowed to do this */
13941
13942 if (optValnumCSE_phase)
13943 {
13944 // It is not safe to reorder/delete CSE's
13945 break;
13946 }
13947
13948 if (op2->gtFlags & GTF_ASG)
13949 {
13950 break;
13951 }
13952
13953 if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
13954 {
13955 break;
13956 }
13957
13958 /* Special case: a cast that can be thrown away */
13959
13960 // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only
13961 // one cast and sometimes there is another one after it that gets removed by this
13962 // code. fgMorphSmp should be improved to remove all redundant casts so this code
13963 // can be removed.
13964
13965 if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
13966 {
13967 var_types srct;
13968 var_types cast;
13969 var_types dstt;
13970
13971 srct = op2->gtCast.CastOp()->TypeGet();
13972 cast = (var_types)op2->CastToType();
13973 dstt = op1->TypeGet();
13974
13975 /* Make sure these are all ints and precision is not lost */
13976
13977 if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT)
13978 {
13979 op2 = tree->gtOp2 = op2->gtCast.CastOp();
13980 }
13981 }
13982
13983 break;
13984
13985 case GT_MUL:
13986
13987 /* Check for the case "(val + icon) * icon" */
13988
13989 if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
13990 {
13991 GenTree* add = op1->gtOp.gtOp2;
13992
13993 if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
13994 {
13995 if (tree->gtOverflow() || op1->gtOverflow())
13996 {
13997 break;
13998 }
13999
14000 ssize_t imul = op2->gtIntCon.gtIconVal;
14001 ssize_t iadd = add->gtIntCon.gtIconVal;
14002
14003 /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
14004
14005 oper = GT_ADD;
14006 tree->ChangeOper(oper);
14007
14008 op2->gtIntCon.gtIconVal = iadd * imul;
14009
14010 op1->ChangeOper(GT_MUL);
14011
14012 add->gtIntCon.gtIconVal = imul;
14013#ifdef _TARGET_64BIT_
14014 if (add->gtType == TYP_INT)
14015 {
14016 // we need to properly re-sign-extend or truncate after multiplying two int constants above
14017 add->AsIntCon()->TruncateOrSignExtend32();
14018 }
14019#endif //_TARGET_64BIT_
14020 }
14021 }
14022
14023 break;
14024
14025 case GT_DIV:
14026
14027 /* For "val / 1", just return "val" */
14028
14029 if (op2->IsIntegralConst(1))
14030 {
14031 DEBUG_DESTROY_NODE(tree);
14032 return op1;
14033 }
14034
14035 break;
14036
14037 case GT_LSH:
14038
14039 /* Check for the case "(val + icon) << icon" */
14040
14041 if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
14042 {
14043 GenTree* cns = op1->gtOp.gtOp2;
14044
14045 if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
14046 {
14047 ssize_t ishf = op2->gtIntConCommon.IconValue();
14048 ssize_t iadd = cns->gtIntConCommon.IconValue();
14049
14050 // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
14051
14052 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
14053
14054 tree->ChangeOper(GT_ADD);
14055 ssize_t result = iadd << ishf;
14056 op2->gtIntConCommon.SetIconValue(result);
14057#ifdef _TARGET_64BIT_
14058 if (op1->gtType == TYP_INT)
14059 {
14060 op2->AsIntCon()->TruncateOrSignExtend32();
14061 }
14062#endif // _TARGET_64BIT_
14063
14064 // we are reusing the shift amount node here, but the type we want is that of the shift result
14065 op2->gtType = op1->gtType;
14066
14067 if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
14068 cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
14069 {
14070 assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
14071 op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
14072 }
14073
14074 op1->ChangeOper(GT_LSH);
14075
14076 cns->gtIntConCommon.SetIconValue(ishf);
14077 }
14078 }
14079
14080 break;
14081
14082 case GT_XOR:
14083
14084 if (!optValnumCSE_phase)
14085 {
14086 /* "x ^ -1" is "~x" */
14087
14088 if (op2->IsIntegralConst(-1))
14089 {
14090 tree->ChangeOper(GT_NOT);
14091 tree->gtOp2 = nullptr;
14092 DEBUG_DESTROY_NODE(op2);
14093 }
14094 else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
14095 {
14096 /* "binaryVal ^ 1" is "!binaryVal" */
14097 gtReverseCond(op1);
14098 DEBUG_DESTROY_NODE(op2);
14099 DEBUG_DESTROY_NODE(tree);
14100 return op1;
14101 }
14102 }
14103
14104 break;
14105
14106 case GT_INIT_VAL:
14107 // Initialization values for initBlk have special semantics - their lower
14108 // byte is used to fill the struct. However, we allow 0 as a "bare" value,
14109 // which enables them to get a VNForZero, and be propagated.
14110 if (op1->IsIntegralConst(0))
14111 {
14112 return op1;
14113 }
14114 break;
14115
14116 default:
14117 break;
14118 }
14119 return tree;
14120}
14121
14122//------------------------------------------------------------------------
14123// fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
14124// (see ECMA III 3.55 and III.3.56).
14125//
14126// Arguments:
14127// tree - The GT_MOD/GT_UMOD tree to morph
14128//
14129// Returns:
14130// The morphed tree
14131//
14132// Notes:
14133// For ARM64 we don't have a remainder instruction so this transform is
14134// always done. For XARCH this transform is done if we know that magic
14135// division will be used, in that case this transform allows CSE to
14136// eliminate the redundant div from code like "x = a / 3; y = a % 3;".
14137//
14138// This method will produce the above expression in 'a' and 'b' are
14139// leaf nodes, otherwise, if any of them is not a leaf it will spill
14140// its value into a temporary variable, an example:
14141// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
14142//
14143GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
14144{
14145 if (tree->OperGet() == GT_MOD)
14146 {
14147 tree->SetOper(GT_DIV);
14148 }
14149 else if (tree->OperGet() == GT_UMOD)
14150 {
14151 tree->SetOper(GT_UDIV);
14152 }
14153 else
14154 {
14155 noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
14156 }
14157
14158 var_types type = tree->gtType;
14159 GenTree* denominator = tree->gtOp2;
14160 GenTree* numerator = tree->gtOp1;
14161
14162 if (!numerator->OperIsLeaf())
14163 {
14164 numerator = fgMakeMultiUse(&tree->gtOp1);
14165 }
14166
14167 if (!denominator->OperIsLeaf())
14168 {
14169 denominator = fgMakeMultiUse(&tree->gtOp2);
14170 }
14171
14172 // The numerator and denominator may have been assigned to temps, in which case
14173 // their defining assignments are in the current tree. Therefore, we need to
14174 // set the execuction order accordingly on the nodes we create.
14175 // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
14176 // be set to be evaluated in reverse order.
14177 //
14178 GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
14179 assert(!mul->IsReverseOp());
14180 GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
14181 sub->gtFlags |= GTF_REVERSE_OPS;
14182
14183#ifdef DEBUG
14184 sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
14185#endif
14186
14187 return sub;
14188}
14189
14190//------------------------------------------------------------------------------
14191// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
14192//
14193//
14194// Arguments:
14195// oper - Operation to check
14196//
14197// Return Value:
14198// True if the operation can be a root of a bitwise rotation tree; false otherwise.
14199
14200bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
14201{
14202 return (oper == GT_OR) || (oper == GT_XOR);
14203}
14204
14205//------------------------------------------------------------------------------
14206// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
14207// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
14208//
14209// Arguments:
14210// tree - tree to check for a rotation pattern
14211//
14212// Return Value:
14213// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
14214//
14215// Assumption:
14216// The input is a GT_OR or a GT_XOR tree.
14217
14218GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree)
14219{
14220 //
14221 // Check for a rotation pattern, e.g.,
14222 //
14223 // OR ROL
14224 // / \ / \
14225 // LSH RSZ -> x y
14226 // / \ / \
14227 // x AND x AND
14228 // / \ / \
14229 // y 31 ADD 31
14230 // / \
14231 // NEG 32
14232 // |
14233 // y
14234 // The patterns recognized:
14235 // (x << (y & M)) op (x >>> ((-y + N) & M))
14236 // (x >>> ((-y + N) & M)) op (x << (y & M))
14237 //
14238 // (x << y) op (x >>> (-y + N))
14239 // (x >> > (-y + N)) op (x << y)
14240 //
14241 // (x >>> (y & M)) op (x << ((-y + N) & M))
14242 // (x << ((-y + N) & M)) op (x >>> (y & M))
14243 //
14244 // (x >>> y) op (x << (-y + N))
14245 // (x << (-y + N)) op (x >>> y)
14246 //
14247 // (x << c1) op (x >>> c2)
14248 // (x >>> c1) op (x << c2)
14249 //
14250 // where
14251 // c1 and c2 are const
14252 // c1 + c2 == bitsize(x)
14253 // N == bitsize(x)
14254 // M is const
14255 // M & (N - 1) == N - 1
14256 // op is either | or ^
14257
14258 if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
14259 {
14260 // We can't do anything if the tree has assignments, calls, or volatile
14261 // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
14262 // thrown by the original tree will be thrown by the transformed tree as well.
14263 return tree;
14264 }
14265
14266 genTreeOps oper = tree->OperGet();
14267 assert(fgOperIsBitwiseRotationRoot(oper));
14268
14269 // Check if we have an LSH on one side of the OR and an RSZ on the other side.
14270 GenTree* op1 = tree->gtGetOp1();
14271 GenTree* op2 = tree->gtGetOp2();
14272 GenTree* leftShiftTree = nullptr;
14273 GenTree* rightShiftTree = nullptr;
14274 if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
14275 {
14276 leftShiftTree = op1;
14277 rightShiftTree = op2;
14278 }
14279 else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
14280 {
14281 leftShiftTree = op2;
14282 rightShiftTree = op1;
14283 }
14284 else
14285 {
14286 return tree;
14287 }
14288
14289 // Check if the trees representing the value to shift are identical.
14290 // We already checked that there are no side effects above.
14291 if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
14292 {
14293 GenTree* rotatedValue = leftShiftTree->gtGetOp1();
14294 var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
14295 ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
14296 noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
14297 GenTree* leftShiftIndex = leftShiftTree->gtGetOp2();
14298 GenTree* rightShiftIndex = rightShiftTree->gtGetOp2();
14299
14300 // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
14301 // shouldn't be masked for the transformation to be valid. If additional
14302 // higher bits are not masked, the transformation is still valid since the result
14303 // of MSIL shift instructions is unspecified if the shift amount is greater or equal
14304 // than the width of the value being shifted.
14305 ssize_t minimalMask = rotatedValueBitSize - 1;
14306 ssize_t leftShiftMask = -1;
14307 ssize_t rightShiftMask = -1;
14308
14309 if ((leftShiftIndex->OperGet() == GT_AND))
14310 {
14311 if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
14312 {
14313 leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14314 leftShiftIndex = leftShiftIndex->gtGetOp1();
14315 }
14316 else
14317 {
14318 return tree;
14319 }
14320 }
14321
14322 if ((rightShiftIndex->OperGet() == GT_AND))
14323 {
14324 if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
14325 {
14326 rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
14327 rightShiftIndex = rightShiftIndex->gtGetOp1();
14328 }
14329 else
14330 {
14331 return tree;
14332 }
14333 }
14334
14335 if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
14336 {
14337 // The shift index is overmasked, e.g., we have
14338 // something like (x << y & 15) or
14339 // (x >> (32 - y) & 15 with 32 bit x.
14340 // The transformation is not valid.
14341 return tree;
14342 }
14343
14344 GenTree* shiftIndexWithAdd = nullptr;
14345 GenTree* shiftIndexWithoutAdd = nullptr;
14346 genTreeOps rotateOp = GT_NONE;
14347 GenTree* rotateIndex = nullptr;
14348
14349 if (leftShiftIndex->OperGet() == GT_ADD)
14350 {
14351 shiftIndexWithAdd = leftShiftIndex;
14352 shiftIndexWithoutAdd = rightShiftIndex;
14353 rotateOp = GT_ROR;
14354 }
14355 else if (rightShiftIndex->OperGet() == GT_ADD)
14356 {
14357 shiftIndexWithAdd = rightShiftIndex;
14358 shiftIndexWithoutAdd = leftShiftIndex;
14359 rotateOp = GT_ROL;
14360 }
14361
14362 if (shiftIndexWithAdd != nullptr)
14363 {
14364 if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
14365 {
14366 if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
14367 {
14368 if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
14369 {
14370 if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
14371 {
14372 // We found one of these patterns:
14373 // (x << (y & M)) | (x >>> ((-y + N) & M))
14374 // (x << y) | (x >>> (-y + N))
14375 // (x >>> (y & M)) | (x << ((-y + N) & M))
14376 // (x >>> y) | (x << (-y + N))
14377 // where N == bitsize(x), M is const, and
14378 // M & (N - 1) == N - 1
14379 CLANG_FORMAT_COMMENT_ANCHOR;
14380
14381#ifndef _TARGET_64BIT_
14382 if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
14383 {
14384 // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
14385 // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
14386 // to add helpers for GT_ROL and GT_ROR.
14387 return tree;
14388 }
14389#endif
14390
14391 rotateIndex = shiftIndexWithoutAdd;
14392 }
14393 }
14394 }
14395 }
14396 }
14397 else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
14398 {
14399 if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
14400 {
14401 // We found this pattern:
14402 // (x << c1) | (x >>> c2)
14403 // where c1 and c2 are const and c1 + c2 == bitsize(x)
14404 rotateOp = GT_ROL;
14405 rotateIndex = leftShiftIndex;
14406 }
14407 }
14408
14409 if (rotateIndex != nullptr)
14410 {
14411 noway_assert(GenTree::OperIsRotate(rotateOp));
14412
14413 unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
14414
14415 // We can use the same tree only during global morph; reusing the tree in a later morph
14416 // may invalidate value numbers.
14417 if (fgGlobalMorph)
14418 {
14419 tree->gtOp.gtOp1 = rotatedValue;
14420 tree->gtOp.gtOp2 = rotateIndex;
14421 tree->ChangeOper(rotateOp);
14422
14423 unsigned childFlags = 0;
14424 for (GenTree* op : tree->Operands())
14425 {
14426 childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
14427 }
14428
14429 // The parent's flags should be a superset of its operands' flags
14430 noway_assert((inputTreeEffects & childFlags) == childFlags);
14431 }
14432 else
14433 {
14434 tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
14435 noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
14436 }
14437
14438 return tree;
14439 }
14440 }
14441 return tree;
14442}
14443
14444#if !CPU_HAS_FP_SUPPORT
14445GenTree* Compiler::fgMorphToEmulatedFP(GenTree* tree)
14446{
14447
14448 genTreeOps oper = tree->OperGet();
14449 var_types typ = tree->TypeGet();
14450 GenTree* op1 = tree->gtOp.gtOp1;
14451 GenTree* op2 = tree->gtGetOp2IfPresent();
14452
14453 /*
14454 We have to use helper calls for all FP operations:
14455
14456 FP operators that operate on FP values
14457 casts to and from FP
14458 comparisons of FP values
14459 */
14460
14461 if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
14462 {
14463 int helper;
14464 GenTree* args;
14465
14466 /* Not all FP operations need helper calls */
14467
14468 switch (oper)
14469 {
14470 case GT_ASG:
14471 case GT_IND:
14472 case GT_LIST:
14473 case GT_ADDR:
14474 case GT_COMMA:
14475 return tree;
14476 }
14477
14478#ifdef DEBUG
14479
14480 /* If the result isn't FP, it better be a compare or cast */
14481
14482 if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
14483 gtDispTree(tree);
14484
14485 noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
14486#endif
14487
14488 /* Keep track of how many arguments we're passing */
14489
14490 /* Is this a binary operator? */
14491
14492 if (op2)
14493 {
14494 /* What kind of an operator do we have? */
14495
14496 switch (oper)
14497 {
14498 case GT_ADD:
14499 helper = CPX_R4_ADD;
14500 break;
14501 case GT_SUB:
14502 helper = CPX_R4_SUB;
14503 break;
14504 case GT_MUL:
14505 helper = CPX_R4_MUL;
14506 break;
14507 case GT_DIV:
14508 helper = CPX_R4_DIV;
14509 break;
14510 // case GT_MOD: helper = CPX_R4_REM; break;
14511
14512 case GT_EQ:
14513 helper = CPX_R4_EQ;
14514 break;
14515 case GT_NE:
14516 helper = CPX_R4_NE;
14517 break;
14518 case GT_LT:
14519 helper = CPX_R4_LT;
14520 break;
14521 case GT_LE:
14522 helper = CPX_R4_LE;
14523 break;
14524 case GT_GE:
14525 helper = CPX_R4_GE;
14526 break;
14527 case GT_GT:
14528 helper = CPX_R4_GT;
14529 break;
14530
14531 default:
14532#ifdef DEBUG
14533 gtDispTree(tree);
14534#endif
14535 noway_assert(!"unexpected FP binary op");
14536 break;
14537 }
14538
14539 args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
14540 }
14541 else
14542 {
14543 switch (oper)
14544 {
14545 case GT_RETURN:
14546 return tree;
14547
14548 case GT_CAST:
14549 noway_assert(!"FP cast");
14550
14551 case GT_NEG:
14552 helper = CPX_R4_NEG;
14553 break;
14554
14555 default:
14556#ifdef DEBUG
14557 gtDispTree(tree);
14558#endif
14559 noway_assert(!"unexpected FP unary op");
14560 break;
14561 }
14562
14563 args = gtNewArgList(tree->gtOp.gtOp1);
14564 }
14565
14566 /* If we have double result/operands, modify the helper */
14567
14568 if (typ == TYP_DOUBLE)
14569 {
14570 static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG);
14571 static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD);
14572 static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB);
14573 static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL);
14574 static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV);
14575
14576 helper++;
14577 }
14578 else
14579 {
14580 noway_assert(tree->OperIsCompare());
14581
14582 static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ);
14583 static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE);
14584 static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT);
14585 static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE);
14586 static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE);
14587 static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT);
14588 }
14589
14590 tree = fgMorphIntoHelperCall(tree, helper, args);
14591
14592 return tree;
14593
14594 case GT_RETURN:
14595
14596 if (op1)
14597 {
14598
14599 if (compCurBB == genReturnBB)
14600 {
14601 /* This is the 'exitCrit' call at the exit label */
14602
14603 noway_assert(op1->gtType == TYP_VOID);
14604 noway_assert(op2 == 0);
14605
14606 tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
14607
14608 return tree;
14609 }
14610
14611 /* This is a (real) return value -- check its type */
14612 CLANG_FORMAT_COMMENT_ANCHOR;
14613
14614#ifdef DEBUG
14615 if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
14616 {
14617 bool allowMismatch = false;
14618
14619 // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
14620 if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
14621 (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
14622 allowMismatch = true;
14623
14624 if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
14625 allowMismatch = true;
14626
14627 if (!allowMismatch)
14628 NO_WAY("Return type mismatch");
14629 }
14630#endif
14631 }
14632 break;
14633 }
14634 return tree;
14635}
14636#endif
14637
14638/*****************************************************************************
14639 *
14640 * Transform the given tree for code generation and return an equivalent tree.
14641 */
14642
14643GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
14644{
14645 assert(tree);
14646 assert(tree->gtOper != GT_STMT);
14647
14648#ifdef DEBUG
14649 if (verbose)
14650 {
14651 if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
14652 {
14653 noway_assert(!"JitBreakMorphTree hit");
14654 }
14655 }
14656#endif
14657
14658#ifdef DEBUG
14659 int thisMorphNum = 0;
14660 if (verbose && treesBeforeAfterMorph)
14661 {
14662 thisMorphNum = morphNum++;
14663 printf("\nfgMorphTree (before %d):\n", thisMorphNum);
14664 gtDispTree(tree);
14665 }
14666#endif
14667
14668 if (fgGlobalMorph)
14669 {
14670 // Apply any rewrites for implicit byref arguments before morphing the
14671 // tree.
14672
14673 if (fgMorphImplicitByRefArgs(tree))
14674 {
14675#ifdef DEBUG
14676 if (verbose && treesBeforeAfterMorph)
14677 {
14678 printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
14679 gtDispTree(tree);
14680 }
14681#endif
14682 }
14683 }
14684
14685/*-------------------------------------------------------------------------
14686 * fgMorphTree() can potentially replace a tree with another, and the
14687 * caller has to store the return value correctly.
14688 * Turn this on to always make copy of "tree" here to shake out
14689 * hidden/unupdated references.
14690 */
14691
14692#ifdef DEBUG
14693
14694 if (compStressCompile(STRESS_GENERIC_CHECK, 0))
14695 {
14696 GenTree* copy;
14697
14698#ifdef SMALL_TREE_NODES
14699 if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
14700 {
14701 copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
14702 }
14703 else
14704#endif
14705 {
14706 copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
14707 }
14708
14709 copy->ReplaceWith(tree, this);
14710
14711#if defined(LATE_DISASM)
14712 // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields
14713 if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
14714 {
14715 copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
14716 }
14717#endif
14718
14719 DEBUG_DESTROY_NODE(tree);
14720 tree = copy;
14721 }
14722#endif // DEBUG
14723
14724 if (fgGlobalMorph)
14725 {
14726 /* Ensure that we haven't morphed this node already */
14727 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
14728
14729#if LOCAL_ASSERTION_PROP
14730 /* Before morphing the tree, we try to propagate any active assertions */
14731 if (optLocalAssertionProp)
14732 {
14733 /* Do we have any active assertions? */
14734
14735 if (optAssertionCount > 0)
14736 {
14737 GenTree* newTree = tree;
14738 while (newTree != nullptr)
14739 {
14740 tree = newTree;
14741 /* newTree is non-Null if we propagated an assertion */
14742 newTree = optAssertionProp(apFull, tree, nullptr);
14743 }
14744 assert(tree != nullptr);
14745 }
14746 }
14747 PREFAST_ASSUME(tree != nullptr);
14748#endif
14749 }
14750
14751 /* Save the original un-morphed tree for fgMorphTreeDone */
14752
14753 GenTree* oldTree = tree;
14754
14755 /* Figure out what kind of a node we have */
14756
14757 unsigned kind = tree->OperKind();
14758
14759 /* Is this a constant node? */
14760
14761 if (kind & GTK_CONST)
14762 {
14763 tree = fgMorphConst(tree);
14764 goto DONE;
14765 }
14766
14767 /* Is this a leaf node? */
14768
14769 if (kind & GTK_LEAF)
14770 {
14771 tree = fgMorphLeaf(tree);
14772 goto DONE;
14773 }
14774
14775 /* Is it a 'simple' unary/binary operator? */
14776
14777 if (kind & GTK_SMPOP)
14778 {
14779 tree = fgMorphSmpOp(tree, mac);
14780 goto DONE;
14781 }
14782
14783 /* See what kind of a special operator we have here */
14784
14785 switch (tree->OperGet())
14786 {
14787 case GT_FIELD:
14788 tree = fgMorphField(tree, mac);
14789 break;
14790
14791 case GT_CALL:
14792 if (tree->OperMayThrow(this))
14793 {
14794 tree->gtFlags |= GTF_EXCEPT;
14795 }
14796 else
14797 {
14798 tree->gtFlags &= ~GTF_EXCEPT;
14799 }
14800 tree = fgMorphCall(tree->AsCall());
14801 break;
14802
14803 case GT_ARR_BOUNDS_CHECK:
14804#ifdef FEATURE_SIMD
14805 case GT_SIMD_CHK:
14806#endif // FEATURE_SIMD
14807#ifdef FEATURE_HW_INTRINSICS
14808 case GT_HW_INTRINSIC_CHK:
14809#endif // FEATURE_HW_INTRINSICS
14810 {
14811 fgSetRngChkTarget(tree);
14812
14813 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
14814 bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
14815 bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
14816 // If the index is a comma(throw, x), just return that.
14817 if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
14818 {
14819 tree = bndsChk->gtIndex;
14820 }
14821
14822 bndsChk->gtFlags &= ~GTF_CALL;
14823
14824 // Propagate effects flags upwards
14825 bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
14826 bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
14827
14828 // Otherwise, we don't change the tree.
14829 }
14830 break;
14831
14832 case GT_ARR_ELEM:
14833 tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
14834
14835 unsigned dim;
14836 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
14837 {
14838 tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
14839 }
14840
14841 tree->gtFlags &= ~GTF_CALL;
14842
14843 tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
14844
14845 for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
14846 {
14847 tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
14848 }
14849
14850 if (fgGlobalMorph)
14851 {
14852 fgSetRngChkTarget(tree, false);
14853 }
14854 break;
14855
14856 case GT_ARR_OFFSET:
14857 tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
14858 tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
14859 tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
14860
14861 tree->gtFlags &= ~GTF_CALL;
14862 tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
14863 tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
14864 tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
14865 if (fgGlobalMorph)
14866 {
14867 fgSetRngChkTarget(tree, false);
14868 }
14869 break;
14870
14871 case GT_CMPXCHG:
14872 tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
14873 tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
14874 tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
14875
14876 tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL);
14877
14878 tree->gtFlags |= tree->gtCmpXchg.gtOpLocation->gtFlags & GTF_ALL_EFFECT;
14879 tree->gtFlags |= tree->gtCmpXchg.gtOpValue->gtFlags & GTF_ALL_EFFECT;
14880 tree->gtFlags |= tree->gtCmpXchg.gtOpComparand->gtFlags & GTF_ALL_EFFECT;
14881 break;
14882
14883 case GT_STORE_DYN_BLK:
14884 case GT_DYN_BLK:
14885 if (tree->OperGet() == GT_STORE_DYN_BLK)
14886 {
14887 tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
14888 }
14889 tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
14890 tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
14891
14892 tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL);
14893 tree->SetIndirExceptionFlags(this);
14894
14895 if (tree->OperGet() == GT_STORE_DYN_BLK)
14896 {
14897 tree->gtFlags |= tree->gtDynBlk.Data()->gtFlags & GTF_ALL_EFFECT;
14898 }
14899 tree->gtFlags |= tree->gtDynBlk.Addr()->gtFlags & GTF_ALL_EFFECT;
14900 tree->gtFlags |= tree->gtDynBlk.gtDynamicSize->gtFlags & GTF_ALL_EFFECT;
14901 break;
14902
14903 case GT_INDEX_ADDR:
14904 GenTreeIndexAddr* indexAddr;
14905 indexAddr = tree->AsIndexAddr();
14906 indexAddr->Index() = fgMorphTree(indexAddr->Index());
14907 indexAddr->Arr() = fgMorphTree(indexAddr->Arr());
14908
14909 tree->gtFlags &= ~GTF_CALL;
14910
14911 tree->gtFlags |= indexAddr->Index()->gtFlags & GTF_ALL_EFFECT;
14912 tree->gtFlags |= indexAddr->Arr()->gtFlags & GTF_ALL_EFFECT;
14913 break;
14914
14915 default:
14916#ifdef DEBUG
14917 gtDispTree(tree);
14918#endif
14919 noway_assert(!"unexpected operator");
14920 }
14921DONE:
14922
14923 fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
14924
14925 return tree;
14926}
14927
14928#if LOCAL_ASSERTION_PROP
14929//------------------------------------------------------------------------
14930// fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
14931//
14932// Arguments:
14933// lclNum - The varNum of the lclVar for which we're killing assertions.
14934// tree - (DEBUG only) the tree responsible for killing its assertions.
14935//
14936void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree))
14937{
14938 /* All dependent assertions are killed here */
14939
14940 ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
14941
14942 if (killed)
14943 {
14944 AssertionIndex index = optAssertionCount;
14945 while (killed && (index > 0))
14946 {
14947 if (BitVecOps::IsMember(apTraits, killed, index - 1))
14948 {
14949#ifdef DEBUG
14950 AssertionDsc* curAssertion = optGetAssertion(index);
14951 noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
14952 ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
14953 if (verbose)
14954 {
14955 printf("\nThe assignment ");
14956 printTreeID(tree);
14957 printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
14958 optPrintAssertion(curAssertion);
14959 }
14960#endif
14961 // Remove this bit from the killed mask
14962 BitVecOps::RemoveElemD(apTraits, killed, index - 1);
14963
14964 optAssertionRemove(index);
14965 }
14966
14967 index--;
14968 }
14969
14970 // killed mask should now be zero
14971 noway_assert(BitVecOps::IsEmpty(apTraits, killed));
14972 }
14973}
14974//------------------------------------------------------------------------
14975// fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
14976//
14977// Arguments:
14978// lclNum - The varNum of the lclVar for which we're killing assertions.
14979// tree - (DEBUG only) the tree responsible for killing its assertions.
14980//
14981// Notes:
14982// For structs and struct fields, it will invalidate the children and parent
14983// respectively.
14984// Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
14985//
14986void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree))
14987{
14988 LclVarDsc* varDsc = &lvaTable[lclNum];
14989
14990 if (varDsc->lvPromoted)
14991 {
14992 noway_assert(varTypeIsStruct(varDsc));
14993
14994 // Kill the field locals.
14995 for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
14996 {
14997 fgKillDependentAssertionsSingle(i DEBUGARG(tree));
14998 }
14999
15000 // Kill the struct local itself.
15001 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15002 }
15003 else if (varDsc->lvIsStructField)
15004 {
15005 // Kill the field local.
15006 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15007
15008 // Kill the parent struct.
15009 fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
15010 }
15011 else
15012 {
15013 fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
15014 }
15015}
15016#endif // LOCAL_ASSERTION_PROP
15017
15018/*****************************************************************************
15019 *
15020 * This function is called to complete the morphing of a tree node
15021 * It should only be called once for each node.
15022 * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
15023 * to enforce the invariant that each node is only morphed once.
15024 * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
15025 * by an equivalent tree.
15026 *
15027 */
15028
15029void Compiler::fgMorphTreeDone(GenTree* tree,
15030 GenTree* oldTree /* == NULL */
15031 DEBUGARG(int morphNum))
15032{
15033#ifdef DEBUG
15034 if (verbose && treesBeforeAfterMorph)
15035 {
15036 printf("\nfgMorphTree (after %d):\n", morphNum);
15037 gtDispTree(tree);
15038 printf(""); // in our logic this causes a flush
15039 }
15040#endif
15041
15042 if (!fgGlobalMorph)
15043 {
15044 return;
15045 }
15046
15047 if ((oldTree != nullptr) && (oldTree != tree))
15048 {
15049 /* Ensure that we have morphed this node */
15050 assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
15051
15052#ifdef DEBUG
15053 TransferTestDataToNode(oldTree, tree);
15054#endif
15055 }
15056 else
15057 {
15058 // Ensure that we haven't morphed this node already
15059 assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
15060 }
15061
15062 if (tree->OperKind() & GTK_CONST)
15063 {
15064 goto DONE;
15065 }
15066
15067#if LOCAL_ASSERTION_PROP
15068
15069 if (!optLocalAssertionProp)
15070 {
15071 goto DONE;
15072 }
15073
15074 /* Do we have any active assertions? */
15075
15076 if (optAssertionCount > 0)
15077 {
15078 /* Is this an assignment to a local variable */
15079 GenTreeLclVarCommon* lclVarTree = nullptr;
15080 if (tree->DefinesLocal(this, &lclVarTree))
15081 {
15082 unsigned lclNum = lclVarTree->gtLclNum;
15083 noway_assert(lclNum < lvaCount);
15084 fgKillDependentAssertions(lclNum DEBUGARG(tree));
15085 }
15086 }
15087
15088 /* If this tree makes a new assertion - make it available */
15089 optAssertionGen(tree);
15090
15091#endif // LOCAL_ASSERTION_PROP
15092
15093DONE:;
15094
15095#ifdef DEBUG
15096 /* Mark this node as being morphed */
15097 tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
15098#endif
15099}
15100
15101/*****************************************************************************
15102 *
15103 * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
15104 * Returns true if we modified the flow graph
15105 */
15106
15107bool Compiler::fgFoldConditional(BasicBlock* block)
15108{
15109 bool result = false;
15110
15111 // We don't want to make any code unreachable
15112 if (opts.OptimizationDisabled())
15113 {
15114 return false;
15115 }
15116
15117 if (block->bbJumpKind == BBJ_COND)
15118 {
15119 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15120
15121 GenTree* stmt = block->bbTreeList->gtPrev;
15122
15123 noway_assert(stmt->gtNext == nullptr);
15124
15125 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15126 {
15127 noway_assert(fgRemoveRestOfBlock);
15128
15129 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15130 fgConvertBBToThrowBB(block);
15131
15132 /* Remove 'block' from the predecessor list of 'block->bbNext' */
15133 fgRemoveRefPred(block->bbNext, block);
15134
15135 /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
15136 fgRemoveRefPred(block->bbJumpDest, block);
15137
15138#ifdef DEBUG
15139 if (verbose)
15140 {
15141 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15142 printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum);
15143 }
15144#endif
15145 goto DONE_COND;
15146 }
15147
15148 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
15149
15150 /* Did we fold the conditional */
15151
15152 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15153 GenTree* cond;
15154 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15155
15156 if (cond->OperKind() & GTK_CONST)
15157 {
15158 /* Yupee - we folded the conditional!
15159 * Remove the conditional statement */
15160
15161 noway_assert(cond->gtOper == GT_CNS_INT);
15162 noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
15163
15164 /* remove the statement from bbTreelist - No need to update
15165 * the reference counts since there are no lcl vars */
15166 fgRemoveStmt(block, stmt);
15167
15168 // block is a BBJ_COND that we are folding the conditional for
15169 // bTaken is the path that will always be taken from block
15170 // bNotTaken is the path that will never be taken from block
15171 //
15172 BasicBlock* bTaken;
15173 BasicBlock* bNotTaken;
15174
15175 if (cond->gtIntCon.gtIconVal != 0)
15176 {
15177 /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
15178 block->bbJumpKind = BBJ_ALWAYS;
15179 bTaken = block->bbJumpDest;
15180 bNotTaken = block->bbNext;
15181 }
15182 else
15183 {
15184 /* Unmark the loop if we are removing a backwards branch */
15185 /* dest block must also be marked as a loop head and */
15186 /* We must be able to reach the backedge block */
15187 if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
15188 fgReachable(block->bbJumpDest, block))
15189 {
15190 optUnmarkLoopBlocks(block->bbJumpDest, block);
15191 }
15192
15193 /* JTRUE 0 - transform the basic block into a BBJ_NONE */
15194 block->bbJumpKind = BBJ_NONE;
15195 noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
15196 bTaken = block->bbNext;
15197 bNotTaken = block->bbJumpDest;
15198 }
15199
15200 if (fgHaveValidEdgeWeights)
15201 {
15202 // We are removing an edge from block to bNotTaken
15203 // and we have already computed the edge weights, so
15204 // we will try to adjust some of the weights
15205 //
15206 flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
15207 BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
15208
15209 // We examine the taken edge (block -> bTaken)
15210 // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
15211 // else if bTaken has valid profile weight and block does not we try to adjust block's weight
15212 // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
15213 //
15214 if (block->hasProfileWeight())
15215 {
15216 // The edge weights for (block -> bTaken) are 100% of block's weight
15217 edgeTaken->flEdgeWeightMin = block->bbWeight;
15218 edgeTaken->flEdgeWeightMax = block->bbWeight;
15219
15220 if (!bTaken->hasProfileWeight())
15221 {
15222 if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
15223 {
15224 // Update the weight of bTaken
15225 bTaken->inheritWeight(block);
15226 bUpdated = bTaken;
15227 }
15228 }
15229 }
15230 else if (bTaken->hasProfileWeight())
15231 {
15232 if (bTaken->countOfInEdges() == 1)
15233 {
15234 // There is only one in edge to bTaken
15235 edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
15236 edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
15237
15238 // Update the weight of block
15239 block->inheritWeight(bTaken);
15240 bUpdated = block;
15241 }
15242 }
15243
15244 if (bUpdated != nullptr)
15245 {
15246 flowList* edge;
15247 // Now fix the weights of the edges out of 'bUpdated'
15248 switch (bUpdated->bbJumpKind)
15249 {
15250 case BBJ_NONE:
15251 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15252 edge->flEdgeWeightMax = bUpdated->bbWeight;
15253 break;
15254 case BBJ_COND:
15255 edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
15256 edge->flEdgeWeightMax = bUpdated->bbWeight;
15257 __fallthrough;
15258 case BBJ_ALWAYS:
15259 edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
15260 edge->flEdgeWeightMax = bUpdated->bbWeight;
15261 break;
15262 default:
15263 // We don't handle BBJ_SWITCH
15264 break;
15265 }
15266 }
15267 }
15268
15269 /* modify the flow graph */
15270
15271 /* Remove 'block' from the predecessor list of 'bNotTaken' */
15272 fgRemoveRefPred(bNotTaken, block);
15273
15274#ifdef DEBUG
15275 if (verbose)
15276 {
15277 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15278 printf(FMT_BB " becomes a %s", block->bbNum,
15279 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15280 if (block->bbJumpKind == BBJ_ALWAYS)
15281 {
15282 printf(" to " FMT_BB, block->bbJumpDest->bbNum);
15283 }
15284 printf("\n");
15285 }
15286#endif
15287
15288 /* if the block was a loop condition we may have to modify
15289 * the loop table */
15290
15291 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
15292 {
15293 /* Some loops may have been already removed by
15294 * loop unrolling or conditional folding */
15295
15296 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
15297 {
15298 continue;
15299 }
15300
15301 /* We are only interested in the loop bottom */
15302
15303 if (optLoopTable[loopNum].lpBottom == block)
15304 {
15305 if (cond->gtIntCon.gtIconVal == 0)
15306 {
15307 /* This was a bogus loop (condition always false)
15308 * Remove the loop from the table */
15309
15310 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
15311#ifdef DEBUG
15312 if (verbose)
15313 {
15314 printf("Removing loop L%02u (from " FMT_BB " to " FMT_BB ")\n\n", loopNum,
15315 optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
15316 }
15317#endif
15318 }
15319 }
15320 }
15321 DONE_COND:
15322 result = true;
15323 }
15324 }
15325 else if (block->bbJumpKind == BBJ_SWITCH)
15326 {
15327 noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
15328
15329 GenTree* stmt = block->bbTreeList->gtPrev;
15330
15331 noway_assert(stmt->gtNext == nullptr);
15332
15333 if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
15334 {
15335 noway_assert(fgRemoveRestOfBlock);
15336
15337 /* Unconditional throw - transform the basic block into a BBJ_THROW */
15338 fgConvertBBToThrowBB(block);
15339
15340 /* update the flow graph */
15341
15342 unsigned jumpCnt = block->bbJumpSwt->bbsCount;
15343 BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
15344
15345 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15346 {
15347 BasicBlock* curJump = *jumpTab;
15348
15349 /* Remove 'block' from the predecessor list of 'curJump' */
15350 fgRemoveRefPred(curJump, block);
15351 }
15352
15353#ifdef DEBUG
15354 if (verbose)
15355 {
15356 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15357 printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum);
15358 }
15359#endif
15360 goto DONE_SWITCH;
15361 }
15362
15363 noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
15364
15365 /* Did we fold the conditional */
15366
15367 noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
15368 GenTree* cond;
15369 cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
15370
15371 if (cond->OperKind() & GTK_CONST)
15372 {
15373 /* Yupee - we folded the conditional!
15374 * Remove the conditional statement */
15375
15376 noway_assert(cond->gtOper == GT_CNS_INT);
15377
15378 /* remove the statement from bbTreelist - No need to update
15379 * the reference counts since there are no lcl vars */
15380 fgRemoveStmt(block, stmt);
15381
15382 /* modify the flow graph */
15383
15384 /* Find the actual jump target */
15385 unsigned switchVal;
15386 switchVal = (unsigned)cond->gtIntCon.gtIconVal;
15387 unsigned jumpCnt;
15388 jumpCnt = block->bbJumpSwt->bbsCount;
15389 BasicBlock** jumpTab;
15390 jumpTab = block->bbJumpSwt->bbsDstTab;
15391 bool foundVal;
15392 foundVal = false;
15393
15394 for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
15395 {
15396 BasicBlock* curJump = *jumpTab;
15397
15398 assert(curJump->countOfInEdges() > 0);
15399
15400 // If val matches switchVal or we are at the last entry and
15401 // we never found the switch value then set the new jump dest
15402
15403 if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
15404 {
15405 if (curJump != block->bbNext)
15406 {
15407 /* transform the basic block into a BBJ_ALWAYS */
15408 block->bbJumpKind = BBJ_ALWAYS;
15409 block->bbJumpDest = curJump;
15410
15411 // if we are jumping backwards, make sure we have a GC Poll.
15412 if (curJump->bbNum > block->bbNum)
15413 {
15414 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15415 }
15416 }
15417 else
15418 {
15419 /* transform the basic block into a BBJ_NONE */
15420 block->bbJumpKind = BBJ_NONE;
15421 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
15422 }
15423 foundVal = true;
15424 }
15425 else
15426 {
15427 /* Remove 'block' from the predecessor list of 'curJump' */
15428 fgRemoveRefPred(curJump, block);
15429 }
15430 }
15431#ifdef DEBUG
15432 if (verbose)
15433 {
15434 printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
15435 printf(FMT_BB " becomes a %s", block->bbNum,
15436 block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
15437 if (block->bbJumpKind == BBJ_ALWAYS)
15438 {
15439 printf(" to " FMT_BB, block->bbJumpDest->bbNum);
15440 }
15441 printf("\n");
15442 }
15443#endif
15444 DONE_SWITCH:
15445 result = true;
15446 }
15447 }
15448 return result;
15449}
15450
15451//*****************************************************************************
15452//
15453// Morphs a single statement in a block.
15454// Can be called anytime, unlike fgMorphStmts() which should only be called once.
15455//
15456// Returns true if 'stmt' was removed from the block.
15457// Returns false if 'stmt' is still in the block (even if other statements were removed).
15458//
15459
15460bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
15461{
15462 assert(block != nullptr);
15463 assert(stmt != nullptr);
15464
15465 compCurBB = block;
15466 compCurStmt = stmt;
15467
15468 GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
15469
15470 // Bug 1106830 - During the CSE phase we can't just remove
15471 // morph->gtOp.gtOp2 as it could contain CSE expressions.
15472 // This leads to a noway_assert in OptCSE.cpp when
15473 // searching for the removed CSE ref. (using gtFindLink)
15474 //
15475 if (!optValnumCSE_phase)
15476 {
15477 // Check for morph as a GT_COMMA with an unconditional throw
15478 if (fgIsCommaThrow(morph, true))
15479 {
15480#ifdef DEBUG
15481 if (verbose)
15482 {
15483 printf("Folding a top-level fgIsCommaThrow stmt\n");
15484 printf("Removing op2 as unreachable:\n");
15485 gtDispTree(morph->gtOp.gtOp2);
15486 printf("\n");
15487 }
15488#endif
15489 // Use the call as the new stmt
15490 morph = morph->gtOp.gtOp1;
15491 noway_assert(morph->gtOper == GT_CALL);
15492 }
15493
15494 // we can get a throw as a statement root
15495 if (fgIsThrow(morph))
15496 {
15497#ifdef DEBUG
15498 if (verbose)
15499 {
15500 printf("We have a top-level fgIsThrow stmt\n");
15501 printf("Removing the rest of block as unreachable:\n");
15502 }
15503#endif
15504 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15505 fgRemoveRestOfBlock = true;
15506 }
15507 }
15508
15509 stmt->gtStmtExpr = morph;
15510
15511 // Can the entire tree be removed?
15512 bool removedStmt = false;
15513
15514 // Defer removing statements during CSE so we don't inadvertently remove any CSE defs.
15515 if (!optValnumCSE_phase)
15516 {
15517 removedStmt = fgCheckRemoveStmt(block, stmt);
15518 }
15519
15520 // Or this is the last statement of a conditional branch that was just folded?
15521 if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
15522 {
15523 if (fgFoldConditional(block))
15524 {
15525 if (block->bbJumpKind != BBJ_THROW)
15526 {
15527 removedStmt = true;
15528 }
15529 }
15530 }
15531
15532 if (!removedStmt)
15533 {
15534 // Have to re-do the evaluation order since for example some later code does not expect constants as op1
15535 gtSetStmtInfo(stmt);
15536
15537 // Have to re-link the nodes for this statement
15538 fgSetStmtSeq(stmt);
15539 }
15540
15541#ifdef DEBUG
15542 if (verbose)
15543 {
15544 printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
15545 gtDispTree(morph);
15546 printf("\n");
15547 }
15548#endif
15549
15550 if (fgRemoveRestOfBlock)
15551 {
15552 // Remove the rest of the stmts in the block
15553 for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
15554 {
15555 fgRemoveStmt(block, stmt);
15556 }
15557
15558 // The rest of block has been removed and we will always throw an exception.
15559
15560 // Update succesors of block
15561 fgRemoveBlockAsPred(block);
15562
15563 // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
15564 // We should not convert it to a ThrowBB.
15565 if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
15566 {
15567 // Convert block to a throw bb
15568 fgConvertBBToThrowBB(block);
15569 }
15570
15571#ifdef DEBUG
15572 if (verbose)
15573 {
15574 printf("\n%s Block " FMT_BB " becomes a throw block.\n", msg, block->bbNum);
15575 }
15576#endif
15577 fgRemoveRestOfBlock = false;
15578 }
15579
15580 return removedStmt;
15581}
15582
15583/*****************************************************************************
15584 *
15585 * Morph the statements of the given block.
15586 * This function should be called just once for a block. Use fgMorphBlockStmt()
15587 * for reentrant calls.
15588 */
15589
15590void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw)
15591{
15592 fgRemoveRestOfBlock = false;
15593
15594 /* Make the current basic block address available globally */
15595
15596 compCurBB = block;
15597
15598 *lnot = *loadw = false;
15599
15600 fgCurrentlyInUseArgTemps = hashBv::Create(this);
15601
15602 GenTreeStmt* stmt = block->firstStmt();
15603 GenTree* prev = nullptr;
15604 for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
15605 {
15606 assert(stmt->gtOper == GT_STMT);
15607
15608 if (fgRemoveRestOfBlock)
15609 {
15610 fgRemoveStmt(block, stmt);
15611 continue;
15612 }
15613#ifdef FEATURE_SIMD
15614 if (opts.OptimizationEnabled() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT &&
15615 stmt->gtStmtExpr->OperGet() == GT_ASG)
15616 {
15617 fgMorphCombineSIMDFieldAssignments(block, stmt);
15618 }
15619#endif
15620
15621 fgMorphStmt = stmt;
15622 compCurStmt = stmt;
15623 GenTree* tree = stmt->gtStmtExpr;
15624
15625#ifdef DEBUG
15626 compCurStmtNum++;
15627 if (stmt == block->bbTreeList)
15628 {
15629 block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
15630 }
15631
15632 unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
15633
15634 if (verbose)
15635 {
15636 printf("\nfgMorphTree " FMT_BB ", stmt %d (before)\n", block->bbNum, compCurStmtNum);
15637 gtDispTree(tree);
15638 }
15639#endif
15640
15641 /* Morph this statement tree */
15642
15643 GenTree* morph = fgMorphTree(tree);
15644
15645 // mark any outgoing arg temps as free so we can reuse them in the next statement.
15646
15647 fgCurrentlyInUseArgTemps->ZeroAll();
15648
15649 // Has fgMorphStmt been sneakily changed ?
15650
15651 if (stmt->gtStmtExpr != tree)
15652 {
15653 /* This must be tailcall. Ignore 'morph' and carry on with
15654 the tail-call node */
15655
15656 morph = stmt->gtStmtExpr;
15657 noway_assert(compTailCallUsed);
15658 noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
15659 noway_assert(stmt->gtNextStmt == nullptr);
15660
15661 GenTreeCall* call = morph->AsCall();
15662 // Could either be
15663 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15664 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15665 // a jmp.
15666 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15667 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15668 (compCurBB->bbFlags & BBF_HAS_JMP)));
15669 }
15670 else if (block != compCurBB)
15671 {
15672 /* This must be a tail call that caused a GCPoll to get
15673 injected. We haven't actually morphed the call yet
15674 but the flag still got set, clear it here... */
15675 CLANG_FORMAT_COMMENT_ANCHOR;
15676
15677#ifdef DEBUG
15678 tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
15679#endif
15680
15681 noway_assert(compTailCallUsed);
15682 noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
15683 noway_assert(stmt->gtNextStmt == nullptr);
15684
15685 GenTreeCall* call = morph->AsCall();
15686
15687 // Could either be
15688 // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
15689 // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
15690 // a jmp.
15691 noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
15692 (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
15693 (compCurBB->bbFlags & BBF_HAS_JMP)));
15694 }
15695
15696#ifdef DEBUG
15697 if (compStressCompile(STRESS_CLONE_EXPR, 30))
15698 {
15699 // Clone all the trees to stress gtCloneExpr()
15700
15701 if (verbose)
15702 {
15703 printf("\nfgMorphTree (stressClone from):\n");
15704 gtDispTree(morph);
15705 }
15706
15707 morph = gtCloneExpr(morph);
15708 noway_assert(morph);
15709
15710 if (verbose)
15711 {
15712 printf("\nfgMorphTree (stressClone to):\n");
15713 gtDispTree(morph);
15714 }
15715 }
15716
15717 /* If the hash value changes. we modified the tree during morphing */
15718 if (verbose)
15719 {
15720 unsigned newHash = gtHashValue(morph);
15721 if (newHash != oldHash)
15722 {
15723 printf("\nfgMorphTree " FMT_BB ", stmt %d (after)\n", block->bbNum, compCurStmtNum);
15724 gtDispTree(morph);
15725 }
15726 }
15727#endif
15728
15729 /* Check for morph as a GT_COMMA with an unconditional throw */
15730 if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
15731 {
15732 /* Use the call as the new stmt */
15733 morph = morph->gtOp.gtOp1;
15734 noway_assert(morph->gtOper == GT_CALL);
15735 noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
15736
15737 fgRemoveRestOfBlock = true;
15738 }
15739
15740 stmt->gtStmtExpr = tree = morph;
15741
15742 if (fgRemoveRestOfBlock)
15743 {
15744 continue;
15745 }
15746
15747 /* Has the statement been optimized away */
15748
15749 if (fgCheckRemoveStmt(block, stmt))
15750 {
15751 continue;
15752 }
15753
15754 /* Check if this block ends with a conditional branch that can be folded */
15755
15756 if (fgFoldConditional(block))
15757 {
15758 continue;
15759 }
15760
15761 if (ehBlockHasExnFlowDsc(block))
15762 {
15763 continue;
15764 }
15765 }
15766
15767 if (fgRemoveRestOfBlock)
15768 {
15769 if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
15770 {
15771 GenTree* first = block->bbTreeList;
15772 noway_assert(first);
15773 GenTree* last = first->gtPrev;
15774 noway_assert(last && last->gtNext == nullptr);
15775 GenTree* lastStmt = last->gtStmt.gtStmtExpr;
15776
15777 if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
15778 ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
15779 {
15780 GenTree* op1 = lastStmt->gtOp.gtOp1;
15781
15782 if (op1->OperKind() & GTK_RELOP)
15783 {
15784 /* Unmark the comparison node with GTF_RELOP_JMP_USED */
15785 op1->gtFlags &= ~GTF_RELOP_JMP_USED;
15786 }
15787
15788 last->gtStmt.gtStmtExpr = fgMorphTree(op1);
15789 }
15790 }
15791
15792 /* Mark block as a BBJ_THROW block */
15793 fgConvertBBToThrowBB(block);
15794 }
15795
15796#if FEATURE_FASTTAILCALL
15797 GenTree* recursiveTailCall = nullptr;
15798 if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
15799 {
15800 fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
15801 }
15802#endif
15803
15804#ifdef DEBUG
15805 compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
15806#endif
15807
15808 // Reset this back so that it doesn't leak out impacting other blocks
15809 fgRemoveRestOfBlock = false;
15810}
15811
15812/*****************************************************************************
15813 *
15814 * Morph the blocks of the method.
15815 * Returns true if the basic block list is modified.
15816 * This function should be called just once.
15817 */
15818
15819void Compiler::fgMorphBlocks()
15820{
15821#ifdef DEBUG
15822 if (verbose)
15823 {
15824 printf("\n*************** In fgMorphBlocks()\n");
15825 }
15826#endif
15827
15828 /* Since fgMorphTree can be called after various optimizations to re-arrange
15829 * the nodes we need a global flag to signal if we are during the one-pass
15830 * global morphing */
15831
15832 fgGlobalMorph = true;
15833
15834#if LOCAL_ASSERTION_PROP
15835 //
15836 // Local assertion prop is enabled if we are optimized
15837 //
15838 optLocalAssertionProp = opts.OptimizationEnabled();
15839
15840 if (optLocalAssertionProp)
15841 {
15842 //
15843 // Initialize for local assertion prop
15844 //
15845 optAssertionInit(true);
15846 }
15847#elif ASSERTION_PROP
15848 //
15849 // If LOCAL_ASSERTION_PROP is not set
15850 // and we have global assertion prop
15851 // then local assertion prop is always off
15852 //
15853 optLocalAssertionProp = false;
15854
15855#endif
15856
15857 /*-------------------------------------------------------------------------
15858 * Process all basic blocks in the function
15859 */
15860
15861 BasicBlock* block = fgFirstBB;
15862 noway_assert(block);
15863
15864#ifdef DEBUG
15865 compCurStmtNum = 0;
15866#endif
15867
15868 do
15869 {
15870#if OPT_BOOL_OPS
15871 bool lnot = false;
15872#endif
15873
15874 bool loadw = false;
15875
15876#ifdef DEBUG
15877 if (verbose)
15878 {
15879 printf("\nMorphing " FMT_BB " of '%s'\n", block->bbNum, info.compFullName);
15880 }
15881#endif
15882
15883#if LOCAL_ASSERTION_PROP
15884 if (optLocalAssertionProp)
15885 {
15886 //
15887 // Clear out any currently recorded assertion candidates
15888 // before processing each basic block,
15889 // also we must handle QMARK-COLON specially
15890 //
15891 optAssertionReset(0);
15892 }
15893#endif
15894
15895 /* Process all statement trees in the basic block */
15896
15897 fgMorphStmts(block, &lnot, &loadw);
15898
15899 /* Are we using a single return block? */
15900
15901 if (block->bbJumpKind == BBJ_RETURN)
15902 {
15903 if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
15904 {
15905
15906 // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
15907 // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
15908 // Such blocks do materialize as part of in-lining.
15909 //
15910 // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
15911 // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
15912 // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
15913 // is BAD_VAR_NUM.
15914 //
15915 // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
15916
15917 GenTree* last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
15918 GenTree* ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
15919
15920 if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0))
15921 {
15922 // This return was generated during epilog merging, so leave it alone
15923 }
15924 else
15925 {
15926 /* We'll jump to the genReturnBB */
15927 CLANG_FORMAT_COMMENT_ANCHOR;
15928
15929#if !defined(_TARGET_X86_)
15930 if (info.compFlags & CORINFO_FLG_SYNCH)
15931 {
15932 fgConvertSyncReturnToLeave(block);
15933 }
15934 else
15935#endif // !_TARGET_X86_
15936 {
15937 block->bbJumpKind = BBJ_ALWAYS;
15938 block->bbJumpDest = genReturnBB;
15939 fgReturnCount--;
15940 }
15941 if (genReturnLocal != BAD_VAR_NUM)
15942 {
15943 // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
15944
15945 // Method must be returning a value other than TYP_VOID.
15946 noway_assert(compMethodHasRetVal());
15947
15948 // This block must be ending with a GT_RETURN
15949 noway_assert(last != nullptr);
15950 noway_assert(last->gtOper == GT_STMT);
15951 noway_assert(last->gtNext == nullptr);
15952 noway_assert(ret != nullptr);
15953
15954 // GT_RETURN must have non-null operand as the method is returning the value assigned to
15955 // genReturnLocal
15956 noway_assert(ret->OperGet() == GT_RETURN);
15957 noway_assert(ret->gtGetOp1() != nullptr);
15958
15959 GenTree* pAfterStatement = last;
15960 IL_OFFSETX offset = last->AsStmt()->gtStmtILoffsx;
15961 GenTree* tree =
15962 gtNewTempAssign(genReturnLocal, ret->gtGetOp1(), &pAfterStatement, offset, block);
15963 if (tree->OperIsCopyBlkOp())
15964 {
15965 tree = fgMorphCopyBlock(tree);
15966 }
15967
15968 if (pAfterStatement == last)
15969 {
15970 last->gtStmt.gtStmtExpr = tree;
15971 }
15972 else
15973 {
15974 // gtNewTempAssign inserted additional statements after last
15975 fgRemoveStmt(block, last);
15976 last = fgInsertStmtAfter(block, pAfterStatement, gtNewStmt(tree, offset));
15977 }
15978
15979 // make sure that copy-prop ignores this assignment.
15980 last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
15981 }
15982 else if (ret != nullptr && ret->OperGet() == GT_RETURN)
15983 {
15984 // This block ends with a GT_RETURN
15985 noway_assert(last != nullptr);
15986 noway_assert(last->gtOper == GT_STMT);
15987 noway_assert(last->gtNext == nullptr);
15988
15989 // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn
15990 // block
15991 noway_assert(ret->TypeGet() == TYP_VOID);
15992 noway_assert(ret->gtGetOp1() == nullptr);
15993
15994 fgRemoveStmt(block, last);
15995 }
15996#ifdef DEBUG
15997 if (verbose)
15998 {
15999 printf("morph " FMT_BB " to point at onereturn. New block is\n", block->bbNum);
16000 fgTableDispBasicBlock(block);
16001 }
16002#endif
16003 }
16004 }
16005 }
16006 block = block->bbNext;
16007 } while (block);
16008
16009 /* We are done with the global morphing phase */
16010
16011 fgGlobalMorph = false;
16012
16013#ifdef DEBUG
16014 if (verboseTrees)
16015 {
16016 fgDispBasicBlocks(true);
16017 }
16018#endif
16019}
16020
16021/*****************************************************************************
16022 *
16023 * Make some decisions about the kind of code to generate.
16024 */
16025
16026void Compiler::fgSetOptions()
16027{
16028#ifdef DEBUG
16029 /* Should we force fully interruptible code ? */
16030 if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
16031 {
16032 noway_assert(!codeGen->isGCTypeFixed());
16033 genInterruptible = true;
16034 }
16035#endif
16036
16037 if (opts.compDbgCode)
16038 {
16039 assert(!codeGen->isGCTypeFixed());
16040 genInterruptible = true; // debugging is easier this way ...
16041 }
16042
16043 /* Assume we won't need an explicit stack frame if this is allowed */
16044
16045 // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
16046 // the callee-saved registers.
16047 noway_assert(!compTailCallUsed || !compLocallocUsed);
16048
16049 if (compLocallocUsed)
16050 {
16051 codeGen->setFramePointerRequired(true);
16052 }
16053
16054#ifdef _TARGET_X86_
16055
16056 if (compTailCallUsed)
16057 codeGen->setFramePointerRequired(true);
16058
16059#endif // _TARGET_X86_
16060
16061 if (!opts.genFPopt)
16062 {
16063 codeGen->setFramePointerRequired(true);
16064 }
16065
16066 // Assert that the EH table has been initialized by now. Note that
16067 // compHndBBtabAllocCount never decreases; it is a high-water mark
16068 // of table allocation. In contrast, compHndBBtabCount does shrink
16069 // if we delete a dead EH region, and if it shrinks to zero, the
16070 // table pointer compHndBBtab is unreliable.
16071 assert(compHndBBtabAllocCount >= info.compXcptnsCount);
16072
16073#ifdef _TARGET_X86_
16074
16075 // Note: this case, and the !X86 case below, should both use the
16076 // !X86 path. This would require a few more changes for X86 to use
16077 // compHndBBtabCount (the current number of EH clauses) instead of
16078 // info.compXcptnsCount (the number of EH clauses in IL), such as
16079 // in ehNeedsShadowSPslots(). This is because sometimes the IL has
16080 // an EH clause that we delete as statically dead code before we
16081 // get here, leaving no EH clauses left, and thus no requirement
16082 // to use a frame pointer because of EH. But until all the code uses
16083 // the same test, leave info.compXcptnsCount here.
16084 if (info.compXcptnsCount > 0)
16085 {
16086 codeGen->setFramePointerRequiredEH(true);
16087 }
16088
16089#else // !_TARGET_X86_
16090
16091 if (compHndBBtabCount > 0)
16092 {
16093 codeGen->setFramePointerRequiredEH(true);
16094 }
16095
16096#endif // _TARGET_X86_
16097
16098#ifdef UNIX_X86_ABI
16099 if (info.compXcptnsCount > 0)
16100 {
16101 assert(!codeGen->isGCTypeFixed());
16102 // Enforce fully interruptible codegen for funclet unwinding
16103 genInterruptible = true;
16104 }
16105#endif // UNIX_X86_ABI
16106
16107 if (info.compCallUnmanaged)
16108 {
16109 codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
16110 }
16111
16112 if (info.compPublishStubParam)
16113 {
16114 codeGen->setFramePointerRequiredGCInfo(true);
16115 }
16116
16117 if (opts.compNeedSecurityCheck)
16118 {
16119 codeGen->setFramePointerRequiredGCInfo(true);
16120
16121#ifndef JIT32_GCENCODER
16122
16123 // The decoder only reports objects in frames with exceptions if the frame
16124 // is fully interruptible.
16125 // Even if there is no catch or other way to resume execution in this frame
16126 // the VM requires the security object to remain alive until later, so
16127 // Frames with security objects must be fully interruptible.
16128 genInterruptible = true;
16129
16130#endif // JIT32_GCENCODER
16131 }
16132
16133 if (compIsProfilerHookNeeded())
16134 {
16135 codeGen->setFramePointerRequired(true);
16136 }
16137
16138 if (info.compIsVarArgs)
16139 {
16140 // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
16141 codeGen->setFramePointerRequiredGCInfo(true);
16142 }
16143
16144 if (lvaReportParamTypeArg())
16145 {
16146 codeGen->setFramePointerRequiredGCInfo(true);
16147 }
16148
16149 // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
16150}
16151
16152/*****************************************************************************/
16153
16154GenTree* Compiler::fgInitThisClass()
16155{
16156 noway_assert(!compIsForInlining());
16157
16158 CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
16159
16160 if (!kind.needsRuntimeLookup)
16161 {
16162 return fgGetSharedCCtor(info.compClassHnd);
16163 }
16164 else
16165 {
16166#ifdef FEATURE_READYTORUN_COMPILER
16167 // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
16168 if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
16169 {
16170 CORINFO_RESOLVED_TOKEN resolvedToken;
16171 memset(&resolvedToken, 0, sizeof(resolvedToken));
16172
16173 // We are in a shared method body, but maybe we don't need a runtime lookup after all.
16174 // This covers the case of a generic method on a non-generic type.
16175 if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
16176 {
16177 resolvedToken.hClass = info.compClassHnd;
16178 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
16179 }
16180
16181 // We need a runtime lookup.
16182 GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
16183
16184 // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
16185 // base of the class that owns the method being compiled". If we're in this method, it means we're not
16186 // inlining and there's no ambiguity.
16187 return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
16188 gtNewArgList(ctxTree), &kind);
16189 }
16190#endif
16191
16192 // Collectible types requires that for shared generic code, if we use the generic context paramter
16193 // that we report it. (This is a conservative approach, we could detect some cases particularly when the
16194 // context parameter is this that we don't need the eager reporting logic.)
16195 lvaGenericsContextUseCount++;
16196
16197 switch (kind.runtimeLookupKind)
16198 {
16199 case CORINFO_LOOKUP_THISOBJ:
16200 // This code takes a this pointer; but we need to pass the static method desc to get the right point in
16201 // the hierarchy
16202 {
16203 GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
16204 // Vtable pointer of this object
16205 vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
16206 vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
16207 GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
16208
16209 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewArgList(vtTree, methodHnd));
16210 }
16211
16212 case CORINFO_LOOKUP_CLASSPARAM:
16213 {
16214 GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16215 return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewArgList(vtTree));
16216 }
16217
16218 case CORINFO_LOOKUP_METHODPARAM:
16219 {
16220 GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
16221 return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID,
16222 gtNewArgList(gtNewIconNode(0), methHndTree));
16223 }
16224 }
16225 }
16226
16227 noway_assert(!"Unknown LOOKUP_KIND");
16228 UNREACHABLE();
16229}
16230
16231#ifdef DEBUG
16232/*****************************************************************************
16233 *
16234 * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
16235 * except for the allowed ? 1 : 0; pattern.
16236 */
16237Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data)
16238{
16239 if ((*tree)->OperGet() == GT_QMARK)
16240 {
16241 fgCheckQmarkAllowedForm(*tree);
16242 }
16243 return WALK_CONTINUE;
16244}
16245
16246void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
16247{
16248 assert(tree->OperGet() == GT_QMARK);
16249 assert(!"Qmarks beyond morph disallowed.");
16250}
16251
16252/*****************************************************************************
16253 *
16254 * Verify that the importer has created GT_QMARK nodes in a way we can
16255 * process them. The following is allowed:
16256 *
16257 * 1. A top level qmark. Top level qmark is of the form:
16258 * a) (bool) ? (void) : (void) OR
16259 * b) V0N = (bool) ? (type) : (type)
16260 *
16261 * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
16262 * of either op1 of colon or op2 of colon but not a child of any other
16263 * operator.
16264 */
16265void Compiler::fgPreExpandQmarkChecks(GenTree* expr)
16266{
16267 GenTree* topQmark = fgGetTopLevelQmark(expr);
16268
16269 // If the top level Qmark is null, then scan the tree to make sure
16270 // there are no qmarks within it.
16271 if (topQmark == nullptr)
16272 {
16273 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16274 }
16275 else
16276 {
16277 // We could probably expand the cond node also, but don't think the extra effort is necessary,
16278 // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
16279 fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
16280
16281 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
16282 fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
16283 }
16284}
16285#endif // DEBUG
16286
16287/*****************************************************************************
16288 *
16289 * Get the top level GT_QMARK node in a given "expr", return NULL if such a
16290 * node is not present. If the top level GT_QMARK node is assigned to a
16291 * GT_LCL_VAR, then return the lcl node in ppDst.
16292 *
16293 */
16294GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */)
16295{
16296 if (ppDst != nullptr)
16297 {
16298 *ppDst = nullptr;
16299 }
16300
16301 GenTree* topQmark = nullptr;
16302 if (expr->gtOper == GT_QMARK)
16303 {
16304 topQmark = expr;
16305 }
16306 else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16307 {
16308 topQmark = expr->gtOp.gtOp2;
16309 if (ppDst != nullptr)
16310 {
16311 *ppDst = expr->gtOp.gtOp1;
16312 }
16313 }
16314 return topQmark;
16315}
16316
16317/*********************************************************************************
16318 *
16319 * For a castclass helper call,
16320 * Importer creates the following tree:
16321 * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
16322 *
16323 * This method splits the qmark expression created by the importer into the
16324 * following blocks: (block, asg, cond1, cond2, helper, remainder)
16325 * Notice that op1 is the result for both the conditions. So we coalesce these
16326 * assignments into a single block instead of two blocks resulting a nested diamond.
16327 *
16328 * +---------->-----------+
16329 * | | |
16330 * ^ ^ v
16331 * | | |
16332 * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
16333 *
16334 * We expect to achieve the following codegen:
16335 * mov rsi, rdx tmp = op1 // asgBlock
16336 * test rsi, rsi goto skip if tmp == null ? // cond1Block
16337 * je SKIP
16338 * mov rcx, 0x76543210 cns = op2 // cond2Block
16339 * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
16340 * je SKIP
16341 * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
16342 * mov rsi, rax
16343 * SKIP: // remainderBlock
16344 * tmp has the result.
16345 *
16346 */
16347void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTree* stmt)
16348{
16349#ifdef DEBUG
16350 if (verbose)
16351 {
16352 printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n", block->bbNum);
16353 fgDispBasicBlocks(block, block, true);
16354 }
16355#endif // DEBUG
16356
16357 GenTree* expr = stmt->gtStmt.gtStmtExpr;
16358
16359 GenTree* dst = nullptr;
16360 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
16361 noway_assert(dst != nullptr);
16362
16363 assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
16364
16365 // Get cond, true, false exprs for the qmark.
16366 GenTree* condExpr = qmark->gtGetOp1();
16367 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16368 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16369
16370 // Get cond, true, false exprs for the nested qmark.
16371 GenTree* nestedQmark = falseExpr;
16372 GenTree* cond2Expr;
16373 GenTree* true2Expr;
16374 GenTree* false2Expr;
16375
16376 if (nestedQmark->gtOper == GT_QMARK)
16377 {
16378 cond2Expr = nestedQmark->gtGetOp1();
16379 true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
16380 false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
16381
16382 assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
16383 cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
16384 }
16385 else
16386 {
16387 // This is a rare case that arises when we are doing minopts and encounter isinst of null
16388 // gtFoldExpr was still is able to optimize away part of the tree (but not all).
16389 // That means it does not match our pattern.
16390
16391 // Rather than write code to handle this case, just fake up some nodes to make it match the common
16392 // case. Synthesize a comparison that is always true, and for the result-on-true, use the
16393 // entire subtree we expected to be the nested question op.
16394
16395 cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
16396 true2Expr = nestedQmark;
16397 false2Expr = gtNewIconNode(0, TYP_I_IMPL);
16398 }
16399 assert(false2Expr->OperGet() == trueExpr->OperGet());
16400
16401 // Clear flags as they are now going to be part of JTRUE.
16402 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16403 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16404
16405 // Create the chain of blocks. See method header comment.
16406 // The order of blocks after this is the following:
16407 // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
16408 //
16409 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16410 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16411 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16412 // remainderBlock will still be GC safe.
16413 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16414 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16415 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16416
16417 BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
16418 BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
16419 BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
16420 BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
16421
16422 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16423
16424 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16425 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16426 if ((block->bbFlags & BBF_INTERNAL) == 0)
16427 {
16428 helperBlock->bbFlags &= ~BBF_INTERNAL;
16429 cond2Block->bbFlags &= ~BBF_INTERNAL;
16430 cond1Block->bbFlags &= ~BBF_INTERNAL;
16431 asgBlock->bbFlags &= ~BBF_INTERNAL;
16432 helperBlock->bbFlags |= BBF_IMPORTED;
16433 cond2Block->bbFlags |= BBF_IMPORTED;
16434 cond1Block->bbFlags |= BBF_IMPORTED;
16435 asgBlock->bbFlags |= BBF_IMPORTED;
16436 }
16437
16438 // Chain the flow correctly.
16439 fgAddRefPred(asgBlock, block);
16440 fgAddRefPred(cond1Block, asgBlock);
16441 fgAddRefPred(cond2Block, cond1Block);
16442 fgAddRefPred(helperBlock, cond2Block);
16443 fgAddRefPred(remainderBlock, helperBlock);
16444 fgAddRefPred(remainderBlock, cond1Block);
16445 fgAddRefPred(remainderBlock, cond2Block);
16446
16447 cond1Block->bbJumpDest = remainderBlock;
16448 cond2Block->bbJumpDest = remainderBlock;
16449
16450 // Set the weights; some are guesses.
16451 asgBlock->inheritWeight(block);
16452 cond1Block->inheritWeight(block);
16453 cond2Block->inheritWeightPercentage(cond1Block, 50);
16454 helperBlock->inheritWeightPercentage(cond2Block, 50);
16455
16456 // Append cond1 as JTRUE to cond1Block
16457 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
16458 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16459 fgInsertStmtAtEnd(cond1Block, jmpStmt);
16460
16461 // Append cond2 as JTRUE to cond2Block
16462 jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
16463 jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16464 fgInsertStmtAtEnd(cond2Block, jmpStmt);
16465
16466 // AsgBlock should get tmp = op1 assignment.
16467 trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
16468 GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16469 fgInsertStmtAtEnd(asgBlock, trueStmt);
16470
16471 // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
16472 gtReverseCond(cond2Expr);
16473 GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
16474 GenTree* helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
16475 fgInsertStmtAtEnd(helperBlock, helperStmt);
16476
16477 // Finally remove the nested qmark stmt.
16478 fgRemoveStmt(block, stmt);
16479
16480#ifdef DEBUG
16481 if (verbose)
16482 {
16483 printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n", block->bbNum);
16484 fgDispBasicBlocks(block, remainderBlock, true);
16485 }
16486#endif // DEBUG
16487}
16488
16489/*****************************************************************************
16490 *
16491 * Expand a statement with a top level qmark node. There are three cases, based
16492 * on whether the qmark has both "true" and "false" arms, or just one of them.
16493 *
16494 * S0;
16495 * C ? T : F;
16496 * S1;
16497 *
16498 * Generates ===>
16499 *
16500 * bbj_always
16501 * +---->------+
16502 * false | |
16503 * S0 -->-- ~C -->-- T F -->-- S1
16504 * | |
16505 * +--->--------+
16506 * bbj_cond(true)
16507 *
16508 * -----------------------------------------
16509 *
16510 * S0;
16511 * C ? T : NOP;
16512 * S1;
16513 *
16514 * Generates ===>
16515 *
16516 * false
16517 * S0 -->-- ~C -->-- T -->-- S1
16518 * | |
16519 * +-->-------------+
16520 * bbj_cond(true)
16521 *
16522 * -----------------------------------------
16523 *
16524 * S0;
16525 * C ? NOP : F;
16526 * S1;
16527 *
16528 * Generates ===>
16529 *
16530 * false
16531 * S0 -->-- C -->-- F -->-- S1
16532 * | |
16533 * +-->------------+
16534 * bbj_cond(true)
16535 *
16536 * If the qmark assigns to a variable, then create tmps for "then"
16537 * and "else" results and assign the temp to the variable as a writeback step.
16538 */
16539void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTree* stmt)
16540{
16541 GenTree* expr = stmt->gtStmt.gtStmtExpr;
16542
16543 // Retrieve the Qmark node to be expanded.
16544 GenTree* dst = nullptr;
16545 GenTree* qmark = fgGetTopLevelQmark(expr, &dst);
16546 if (qmark == nullptr)
16547 {
16548 return;
16549 }
16550
16551 if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
16552 {
16553 fgExpandQmarkForCastInstOf(block, stmt);
16554 return;
16555 }
16556
16557#ifdef DEBUG
16558 if (verbose)
16559 {
16560 printf("\nExpanding top-level qmark in " FMT_BB " (before)\n", block->bbNum);
16561 fgDispBasicBlocks(block, block, true);
16562 }
16563#endif // DEBUG
16564
16565 // Retrieve the operands.
16566 GenTree* condExpr = qmark->gtGetOp1();
16567 GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
16568 GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
16569
16570 assert(condExpr->gtFlags & GTF_RELOP_QMARK);
16571 condExpr->gtFlags &= ~GTF_RELOP_QMARK;
16572
16573 assert(!varTypeIsFloating(condExpr->TypeGet()));
16574
16575 bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
16576 bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
16577 assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
16578
16579 // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
16580 // block ... condBlock ... elseBlock ... remainderBlock
16581 //
16582 // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
16583 // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
16584 // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
16585 // remainderBlock will still be GC safe.
16586 unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
16587 BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
16588 fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
16589
16590 BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
16591 BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
16592
16593 // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
16594 // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
16595 if ((block->bbFlags & BBF_INTERNAL) == 0)
16596 {
16597 condBlock->bbFlags &= ~BBF_INTERNAL;
16598 elseBlock->bbFlags &= ~BBF_INTERNAL;
16599 condBlock->bbFlags |= BBF_IMPORTED;
16600 elseBlock->bbFlags |= BBF_IMPORTED;
16601 }
16602
16603 remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
16604
16605 condBlock->inheritWeight(block);
16606
16607 fgAddRefPred(condBlock, block);
16608 fgAddRefPred(elseBlock, condBlock);
16609 fgAddRefPred(remainderBlock, elseBlock);
16610
16611 BasicBlock* thenBlock = nullptr;
16612 if (hasTrueExpr && hasFalseExpr)
16613 {
16614 // bbj_always
16615 // +---->------+
16616 // false | |
16617 // S0 -->-- ~C -->-- T F -->-- S1
16618 // | |
16619 // +--->--------+
16620 // bbj_cond(true)
16621 //
16622 gtReverseCond(condExpr);
16623 condBlock->bbJumpDest = elseBlock;
16624
16625 thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
16626 thenBlock->bbJumpDest = remainderBlock;
16627 if ((block->bbFlags & BBF_INTERNAL) == 0)
16628 {
16629 thenBlock->bbFlags &= ~BBF_INTERNAL;
16630 thenBlock->bbFlags |= BBF_IMPORTED;
16631 }
16632
16633 elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
16634
16635 fgAddRefPred(thenBlock, condBlock);
16636 fgAddRefPred(remainderBlock, thenBlock);
16637
16638 thenBlock->inheritWeightPercentage(condBlock, 50);
16639 elseBlock->inheritWeightPercentage(condBlock, 50);
16640 }
16641 else if (hasTrueExpr)
16642 {
16643 // false
16644 // S0 -->-- ~C -->-- T -->-- S1
16645 // | |
16646 // +-->-------------+
16647 // bbj_cond(true)
16648 //
16649 gtReverseCond(condExpr);
16650 condBlock->bbJumpDest = remainderBlock;
16651 fgAddRefPred(remainderBlock, condBlock);
16652 // Since we have no false expr, use the one we'd already created.
16653 thenBlock = elseBlock;
16654 elseBlock = nullptr;
16655
16656 thenBlock->inheritWeightPercentage(condBlock, 50);
16657 }
16658 else if (hasFalseExpr)
16659 {
16660 // false
16661 // S0 -->-- C -->-- F -->-- S1
16662 // | |
16663 // +-->------------+
16664 // bbj_cond(true)
16665 //
16666 condBlock->bbJumpDest = remainderBlock;
16667 fgAddRefPred(remainderBlock, condBlock);
16668
16669 elseBlock->inheritWeightPercentage(condBlock, 50);
16670 }
16671
16672 GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
16673 GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
16674 fgInsertStmtAtEnd(condBlock, jmpStmt);
16675
16676 // Remove the original qmark statement.
16677 fgRemoveStmt(block, stmt);
16678
16679 // Since we have top level qmarks, we either have a dst for it in which case
16680 // we need to create tmps for true and falseExprs, else just don't bother
16681 // assigning.
16682 unsigned lclNum = BAD_VAR_NUM;
16683 if (dst != nullptr)
16684 {
16685 assert(dst->gtOper == GT_LCL_VAR);
16686 lclNum = dst->gtLclVar.gtLclNum;
16687 }
16688 else
16689 {
16690 assert(qmark->TypeGet() == TYP_VOID);
16691 }
16692
16693 if (hasTrueExpr)
16694 {
16695 if (dst != nullptr)
16696 {
16697 trueExpr = gtNewTempAssign(lclNum, trueExpr);
16698 }
16699 GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
16700 fgInsertStmtAtEnd(thenBlock, trueStmt);
16701 }
16702
16703 // Assign the falseExpr into the dst or tmp, insert in elseBlock
16704 if (hasFalseExpr)
16705 {
16706 if (dst != nullptr)
16707 {
16708 falseExpr = gtNewTempAssign(lclNum, falseExpr);
16709 }
16710 GenTree* falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
16711 fgInsertStmtAtEnd(elseBlock, falseStmt);
16712 }
16713
16714#ifdef DEBUG
16715 if (verbose)
16716 {
16717 printf("\nExpanding top-level qmark in " FMT_BB " (after)\n", block->bbNum);
16718 fgDispBasicBlocks(block, remainderBlock, true);
16719 }
16720#endif // DEBUG
16721}
16722
16723/*****************************************************************************
16724 *
16725 * Expand GT_QMARK nodes from the flow graph into basic blocks.
16726 *
16727 */
16728
16729void Compiler::fgExpandQmarkNodes()
16730{
16731 if (compQmarkUsed)
16732 {
16733 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
16734 {
16735 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
16736 {
16737 GenTree* expr = stmt->gtStmt.gtStmtExpr;
16738#ifdef DEBUG
16739 fgPreExpandQmarkChecks(expr);
16740#endif
16741 fgExpandQmarkStmt(block, stmt);
16742 }
16743 }
16744#ifdef DEBUG
16745 fgPostExpandQmarkChecks();
16746#endif
16747 }
16748 compQmarkRationalized = true;
16749}
16750
16751#ifdef DEBUG
16752/*****************************************************************************
16753 *
16754 * Make sure we don't have any more GT_QMARK nodes.
16755 *
16756 */
16757void Compiler::fgPostExpandQmarkChecks()
16758{
16759 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
16760 {
16761 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
16762 {
16763 GenTree* expr = stmt->gtStmt.gtStmtExpr;
16764 fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
16765 }
16766 }
16767}
16768#endif
16769
16770/*****************************************************************************
16771 *
16772 * Transform all basic blocks for codegen.
16773 */
16774
16775void Compiler::fgMorph()
16776{
16777 noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
16778
16779 fgOutgoingArgTemps = nullptr;
16780
16781#ifdef DEBUG
16782 if (verbose)
16783 {
16784 printf("*************** In fgMorph()\n");
16785 }
16786 if (verboseTrees)
16787 {
16788 fgDispBasicBlocks(true);
16789 }
16790#endif // DEBUG
16791
16792 // Insert call to class constructor as the first basic block if
16793 // we were asked to do so.
16794 if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
16795 impTokenLookupContextHandle /* context */) &
16796 CORINFO_INITCLASS_USE_HELPER)
16797 {
16798 fgEnsureFirstBBisScratch();
16799 fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
16800 }
16801
16802#ifdef DEBUG
16803 if (opts.compGcChecks)
16804 {
16805 for (unsigned i = 0; i < info.compArgsCount; i++)
16806 {
16807 if (lvaTable[i].TypeGet() == TYP_REF)
16808 {
16809 // confirm that the argument is a GC pointer (for debugging (GC stress))
16810 GenTree* op = gtNewLclvNode(i, TYP_REF);
16811 GenTreeArgList* args = gtNewArgList(op);
16812 op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, args);
16813
16814 fgEnsureFirstBBisScratch();
16815 fgInsertStmtAtEnd(fgFirstBB, op);
16816 }
16817 }
16818 }
16819#endif // DEBUG
16820
16821#if defined(DEBUG) && defined(_TARGET_XARCH_)
16822 if (opts.compStackCheckOnRet)
16823 {
16824 lvaReturnSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnSpCheck"));
16825 lvaTable[lvaReturnSpCheck].lvType = TYP_I_IMPL;
16826 }
16827#endif // defined(DEBUG) && defined(_TARGET_XARCH_)
16828
16829#if defined(DEBUG) && defined(_TARGET_X86_)
16830 if (opts.compStackCheckOnCall)
16831 {
16832 lvaCallSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallSpCheck"));
16833 lvaTable[lvaCallSpCheck].lvType = TYP_I_IMPL;
16834 }
16835#endif // defined(DEBUG) && defined(_TARGET_X86_)
16836
16837 /* Filter out unimported BBs */
16838
16839 fgRemoveEmptyBlocks();
16840
16841#ifdef DEBUG
16842 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
16843 fgDebugCheckBBlist(false, false);
16844#endif // DEBUG
16845
16846 EndPhase(PHASE_MORPH_INIT);
16847
16848 /* Inline */
16849 fgInline();
16850#if 0
16851 JITDUMP("trees after inlining\n");
16852 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
16853#endif
16854
16855 RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
16856
16857 EndPhase(PHASE_MORPH_INLINE);
16858
16859 // Transform each GT_ALLOCOBJ node into either an allocation helper call or
16860 // local variable allocation on the stack.
16861 ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS
16862
16863// TODO-ObjectStackAllocation: Enable the optimization for architectures using
16864// JIT32_GCENCODER (i.e., x86).
16865#ifndef JIT32_GCENCODER
16866 if (JitConfig.JitObjectStackAllocation() && opts.OptimizationEnabled())
16867 {
16868 objectAllocator.EnableObjectStackAllocation();
16869 }
16870#endif // JIT32_GCENCODER
16871
16872 objectAllocator.Run();
16873
16874 /* Add any internal blocks/trees we may need */
16875
16876 fgAddInternal();
16877
16878#ifdef DEBUG
16879 /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
16880 fgDebugCheckBBlist(false, false);
16881 /* Inliner could clone some trees. */
16882 fgDebugCheckNodesUniqueness();
16883#endif // DEBUG
16884
16885 fgRemoveEmptyTry();
16886
16887 EndPhase(PHASE_EMPTY_TRY);
16888
16889 fgRemoveEmptyFinally();
16890
16891 EndPhase(PHASE_EMPTY_FINALLY);
16892
16893 fgMergeFinallyChains();
16894
16895 EndPhase(PHASE_MERGE_FINALLY_CHAINS);
16896
16897 fgCloneFinally();
16898
16899 EndPhase(PHASE_CLONE_FINALLY);
16900
16901 fgUpdateFinallyTargetFlags();
16902
16903 /* For x64 and ARM64 we need to mark irregular parameters */
16904
16905 lvaRefCountState = RCS_EARLY;
16906 fgMarkImplicitByRefArgs();
16907
16908 /* Promote struct locals if necessary */
16909 fgPromoteStructs();
16910
16911 /* Now it is the time to figure out what locals have address-taken. */
16912 fgMarkAddressExposedLocals();
16913
16914 EndPhase(PHASE_STR_ADRLCL);
16915
16916 /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
16917 analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
16918 fgRetypeImplicitByRefArgs();
16919
16920#ifdef DEBUG
16921 /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
16922 lvaStressLclFld();
16923 fgStress64RsltMul();
16924#endif // DEBUG
16925
16926 EndPhase(PHASE_MORPH_IMPBYREF);
16927
16928 /* Morph the trees in all the blocks of the method */
16929
16930 fgMorphBlocks();
16931
16932 /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
16933 fgMarkDemotedImplicitByRefArgs();
16934 lvaRefCountState = RCS_INVALID;
16935
16936 EndPhase(PHASE_MORPH_GLOBAL);
16937
16938#if 0
16939 JITDUMP("trees after fgMorphBlocks\n");
16940 DBEXEC(VERBOSE, fgDispBasicBlocks(true));
16941#endif
16942
16943#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
16944 if (fgNeedToAddFinallyTargetBits)
16945 {
16946 // We previously wiped out the BBF_FINALLY_TARGET bits due to some morphing; add them back.
16947 fgAddFinallyTargetFlags();
16948 fgNeedToAddFinallyTargetBits = false;
16949 }
16950#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
16951
16952 /* Decide the kind of code we want to generate */
16953
16954 fgSetOptions();
16955
16956 fgExpandQmarkNodes();
16957
16958#ifdef DEBUG
16959 compCurBB = nullptr;
16960#endif // DEBUG
16961}
16962
16963/*****************************************************************************
16964 *
16965 * Promoting struct locals
16966 */
16967void Compiler::fgPromoteStructs()
16968{
16969#ifdef DEBUG
16970 if (verbose)
16971 {
16972 printf("*************** In fgPromoteStructs()\n");
16973 }
16974#endif // DEBUG
16975
16976 if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
16977 {
16978 JITDUMP(" promotion opt flag not enabled\n");
16979 return;
16980 }
16981
16982 if (fgNoStructPromotion)
16983 {
16984 JITDUMP(" promotion disabled by JitNoStructPromotion\n");
16985 return;
16986 }
16987
16988#if 0
16989 // The code in this #if has been useful in debugging struct promotion issues, by
16990 // enabling selective enablement of the struct promotion optimization according to
16991 // method hash.
16992#ifdef DEBUG
16993 unsigned methHash = info.compMethodHash();
16994 char* lostr = getenv("structpromohashlo");
16995 unsigned methHashLo = 0;
16996 if (lostr != NULL)
16997 {
16998 sscanf_s(lostr, "%x", &methHashLo);
16999 }
17000 char* histr = getenv("structpromohashhi");
17001 unsigned methHashHi = UINT32_MAX;
17002 if (histr != NULL)
17003 {
17004 sscanf_s(histr, "%x", &methHashHi);
17005 }
17006 if (methHash < methHashLo || methHash > methHashHi)
17007 {
17008 return;
17009 }
17010 else
17011 {
17012 printf("Promoting structs for method %s, hash = 0x%x.\n",
17013 info.compFullName, info.compMethodHash());
17014 printf(""); // in our logic this causes a flush
17015 }
17016#endif // DEBUG
17017#endif // 0
17018
17019 if (info.compIsVarArgs)
17020 {
17021 JITDUMP(" promotion disabled because of varargs\n");
17022 return;
17023 }
17024
17025#ifdef DEBUG
17026 if (verbose)
17027 {
17028 printf("\nlvaTable before fgPromoteStructs\n");
17029 lvaTableDump();
17030 }
17031#endif // DEBUG
17032
17033 // The lvaTable might grow as we grab temps. Make a local copy here.
17034 unsigned startLvaCount = lvaCount;
17035
17036 //
17037 // Loop through the original lvaTable. Looking for struct locals to be promoted.
17038 //
17039 lvaStructPromotionInfo structPromotionInfo;
17040 bool tooManyLocalsReported = false;
17041
17042 for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
17043 {
17044 // Whether this var got promoted
17045 bool promotedVar = false;
17046 LclVarDsc* varDsc = &lvaTable[lclNum];
17047
17048 // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
17049 // its fields. Instead, we will attempt to enregister the entire struct.
17050 if (varDsc->lvIsSIMDType() && (varDsc->lvIsUsedInSIMDIntrinsic() || isOpaqueSIMDLclVar(varDsc)))
17051 {
17052 varDsc->lvRegStruct = true;
17053 }
17054 // Don't promote if we have reached the tracking limit.
17055 else if (lvaHaveManyLocals())
17056 {
17057 // Print the message first time when we detected this condition
17058 if (!tooManyLocalsReported)
17059 {
17060 JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
17061 }
17062 tooManyLocalsReported = true;
17063 }
17064 else if (varTypeIsStruct(varDsc))
17065 {
17066 assert(structPromotionHelper != nullptr);
17067 promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum);
17068 }
17069
17070 if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
17071 {
17072 // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
17073 // we will treat it as a reg struct.
17074 varDsc->lvRegStruct = true;
17075 }
17076 }
17077
17078#ifdef _TARGET_ARM_
17079 if (structPromotionHelper->GetRequiresScratchVar())
17080 {
17081 // Ensure that the scratch variable is allocated, in case we
17082 // pass a promoted struct as an argument.
17083 if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
17084 {
17085 lvaPromotedStructAssemblyScratchVar =
17086 lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
17087 lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
17088 }
17089 }
17090#endif // _TARGET_ARM_
17091
17092#ifdef DEBUG
17093 if (verbose)
17094 {
17095 printf("\nlvaTable after fgPromoteStructs\n");
17096 lvaTableDump();
17097 }
17098#endif // DEBUG
17099}
17100
17101void Compiler::fgMorphStructField(GenTree* tree, GenTree* parent)
17102{
17103 noway_assert(tree->OperGet() == GT_FIELD);
17104
17105 GenTreeField* field = tree->AsField();
17106 GenTree* objRef = field->gtFldObj;
17107 GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
17108 noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
17109
17110 /* Is this an instance data member? */
17111
17112 if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
17113 {
17114 unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
17115 const LclVarDsc* varDsc = &lvaTable[lclNum];
17116
17117 if (varTypeIsStruct(obj))
17118 {
17119 if (varDsc->lvPromoted)
17120 {
17121 // Promoted struct
17122 unsigned fldOffset = field->gtFldOffset;
17123 unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17124
17125 if (fieldLclIndex == BAD_VAR_NUM)
17126 {
17127 // Access a promoted struct's field with an offset that doesn't correspond to any field.
17128 // It can happen if the struct was cast to another struct with different offsets.
17129 return;
17130 }
17131
17132 const LclVarDsc* fieldDsc = &lvaTable[fieldLclIndex];
17133 var_types fieldType = fieldDsc->TypeGet();
17134
17135 assert(fieldType != TYP_STRUCT); // promoted LCL_VAR can't have a struct type.
17136 if (tree->TypeGet() != fieldType)
17137 {
17138 if (tree->TypeGet() != TYP_STRUCT)
17139 {
17140 // This is going to be an incorrect instruction promotion.
17141 // For example when we try to read int as long.
17142 return;
17143 }
17144
17145 if (field->gtFldHnd != fieldDsc->lvFieldHnd)
17146 {
17147 CORINFO_CLASS_HANDLE fieldTreeClass = nullptr, fieldDscClass = nullptr;
17148
17149 CorInfoType fieldTreeType = info.compCompHnd->getFieldType(field->gtFldHnd, &fieldTreeClass);
17150 CorInfoType fieldDscType = info.compCompHnd->getFieldType(fieldDsc->lvFieldHnd, &fieldDscClass);
17151 if (fieldTreeType != fieldDscType || fieldTreeClass != fieldDscClass)
17152 {
17153 // Access the promoted field with a different class handle, can't check that types match.
17154 return;
17155 }
17156 // Access the promoted field as a field of a non-promoted struct with the same class handle.
17157 }
17158#ifdef DEBUG
17159 else if (tree->TypeGet() == TYP_STRUCT)
17160 {
17161 // The field tree accesses it as a struct, but the promoted lcl var for the field
17162 // says that it has another type. It can happen only if struct promotion faked
17163 // field type for a struct of single field of scalar type aligned at their natural boundary.
17164 assert(structPromotionHelper != nullptr);
17165 structPromotionHelper->CheckRetypedAsScalar(field->gtFldHnd, fieldType);
17166 }
17167#endif // DEBUG
17168 }
17169
17170 tree->SetOper(GT_LCL_VAR);
17171 tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
17172 tree->gtType = fieldType;
17173 tree->gtFlags &= GTF_NODE_MASK;
17174 tree->gtFlags &= ~GTF_GLOB_REF;
17175
17176 if (parent->gtOper == GT_ASG)
17177 {
17178 if (parent->gtOp.gtOp1 == tree)
17179 {
17180 tree->gtFlags |= GTF_VAR_DEF;
17181 tree->gtFlags |= GTF_DONT_CSE;
17182 }
17183
17184 // Promotion of struct containing struct fields where the field
17185 // is a struct with a single pointer sized scalar type field: in
17186 // this case struct promotion uses the type of the underlying
17187 // scalar field as the type of struct field instead of recursively
17188 // promoting. This can lead to a case where we have a block-asgn
17189 // with its RHS replaced with a scalar type. Mark RHS value as
17190 // DONT_CSE so that assertion prop will not do const propagation.
17191 // The reason this is required is that if RHS of a block-asg is a
17192 // constant, then it is interpreted as init-block incorrectly.
17193 //
17194 // TODO - This can also be avoided if we implement recursive struct
17195 // promotion, tracked by #10019.
17196 if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
17197 {
17198 tree->gtFlags |= GTF_DONT_CSE;
17199 }
17200 }
17201#ifdef DEBUG
17202 if (verbose)
17203 {
17204 printf("Replacing the field in promoted struct with local var V%02u\n", fieldLclIndex);
17205 }
17206#endif // DEBUG
17207 }
17208 }
17209 else
17210 {
17211 // Normed struct
17212 // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
17213 // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
17214 // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
17215 // there is one extremely rare case where that won't be true. An enum type is a special value type
17216 // that contains exactly one element of a primitive integer type (that, for CLS programs is named
17217 // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
17218 // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
17219 // ldfld. For example:
17220 //
17221 // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
17222 // {
17223 // .field public specialname rtspecialname int16 value__
17224 // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
17225 // }
17226 // .method public hidebysig static void Main() cil managed
17227 // {
17228 // .locals init (valuetype mynamespace.e_t V_0)
17229 // ...
17230 // ldloca.s V_0
17231 // ldflda int16 mynamespace.e_t::value__
17232 // ...
17233 // }
17234 //
17235 // Normally, compilers will not generate the ldflda, since it is superfluous.
17236 //
17237 // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
17238 // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
17239 // mismatch like this, don't do this morphing. The local var may end up getting marked as
17240 // address taken, and the appropriate SHORT load will be done from memory in that case.
17241
17242 if (tree->TypeGet() == obj->TypeGet())
17243 {
17244 tree->ChangeOper(GT_LCL_VAR);
17245 tree->gtLclVarCommon.SetLclNum(lclNum);
17246 tree->gtFlags &= GTF_NODE_MASK;
17247
17248 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17249 {
17250 tree->gtFlags |= GTF_VAR_DEF;
17251 tree->gtFlags |= GTF_DONT_CSE;
17252 }
17253#ifdef DEBUG
17254 if (verbose)
17255 {
17256 printf("Replacing the field in normed struct with local var V%02u\n", lclNum);
17257 }
17258#endif // DEBUG
17259 }
17260 }
17261 }
17262}
17263
17264void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent)
17265{
17266 noway_assert(tree->OperGet() == GT_LCL_FLD);
17267
17268 unsigned lclNum = tree->gtLclFld.gtLclNum;
17269 LclVarDsc* varDsc = &lvaTable[lclNum];
17270
17271 if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
17272 {
17273 // Promoted struct
17274 unsigned fldOffset = tree->gtLclFld.gtLclOffs;
17275 unsigned fieldLclIndex = 0;
17276 LclVarDsc* fldVarDsc = nullptr;
17277
17278 if (fldOffset != BAD_VAR_NUM)
17279 {
17280 fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
17281 noway_assert(fieldLclIndex != BAD_VAR_NUM);
17282 fldVarDsc = &lvaTable[fieldLclIndex];
17283 }
17284
17285 if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
17286#ifdef _TARGET_X86_
17287 && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
17288#endif
17289 )
17290 {
17291 // There is an existing sub-field we can use.
17292 tree->gtLclFld.SetLclNum(fieldLclIndex);
17293
17294 // The field must be an enregisterable type; otherwise it would not be a promoted field.
17295 // The tree type may not match, e.g. for return types that have been morphed, but both
17296 // must be enregisterable types.
17297 // TODO-Cleanup: varTypeCanReg should presumably return true for SIMD types, but
17298 // there may be places where that would violate existing assumptions.
17299 var_types treeType = tree->TypeGet();
17300 var_types fieldType = fldVarDsc->TypeGet();
17301 assert((varTypeCanReg(treeType) || varTypeIsSIMD(treeType)) &&
17302 (varTypeCanReg(fieldType) || varTypeIsSIMD(fieldType)));
17303
17304 tree->ChangeOper(GT_LCL_VAR);
17305 assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
17306 tree->gtType = fldVarDsc->TypeGet();
17307#ifdef DEBUG
17308 if (verbose)
17309 {
17310 printf("Replacing the GT_LCL_FLD in promoted struct with local var V%02u\n", fieldLclIndex);
17311 }
17312#endif // DEBUG
17313
17314 if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
17315 {
17316 tree->gtFlags |= GTF_VAR_DEF;
17317 tree->gtFlags |= GTF_DONT_CSE;
17318 }
17319 }
17320 else
17321 {
17322 // There is no existing field that has all the parts that we need
17323 // So we must ensure that the struct lives in memory.
17324 lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
17325
17326#ifdef DEBUG
17327 // We can't convert this guy to a float because he really does have his
17328 // address taken..
17329 varDsc->lvKeepType = 1;
17330#endif // DEBUG
17331 }
17332 }
17333}
17334
17335//------------------------------------------------------------------------
17336// fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
17337// i.e. which the ABI requires to be passed by making a copy in the caller and
17338// passing its address to the callee. Mark their `LclVarDsc`s such that
17339// `lvaIsImplicitByRefLocal` will return true for them.
17340
17341void Compiler::fgMarkImplicitByRefArgs()
17342{
17343#if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
17344#ifdef DEBUG
17345 if (verbose)
17346 {
17347 printf("\n*************** In fgMarkImplicitByRefs()\n");
17348 }
17349#endif // DEBUG
17350
17351 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17352 {
17353 LclVarDsc* varDsc = &lvaTable[lclNum];
17354
17355 if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
17356 {
17357 size_t size;
17358
17359 if (varDsc->lvSize() > REGSIZE_BYTES)
17360 {
17361 size = varDsc->lvSize();
17362 }
17363 else
17364 {
17365 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17366 size = info.compCompHnd->getClassSize(typeHnd);
17367 }
17368
17369#if defined(_TARGET_AMD64_)
17370 if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
17371#elif defined(_TARGET_ARM64_)
17372 if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc, this->info.compIsVarArgs))
17373#endif
17374 {
17375 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
17376 // So I am now using it to indicate that this is one of the weird implicit
17377 // by ref locals.
17378 // The address taken cleanup will look for references to locals marked like
17379 // this, and transform them appropriately.
17380 varDsc->lvIsTemp = 1;
17381
17382 // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
17383 // appearance of implicit-by-ref param so that call arg morphing can do an
17384 // optimization for single-use implicit-by-ref params whose single use is as
17385 // an outgoing call argument.
17386 varDsc->setLvRefCnt(0, RCS_EARLY);
17387 }
17388 }
17389 }
17390
17391#endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17392}
17393
17394//------------------------------------------------------------------------
17395// fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
17396// struct to pointer). Also choose (based on address-exposed analysis)
17397// which struct promotions of implicit byrefs to keep or discard.
17398// For those which are kept, insert the appropriate initialization code.
17399// For those which are to be discarded, annotate the promoted field locals
17400// so that fgMorphImplicitByRefArgs will know to rewrite their appearances
17401// using indirections off the pointer parameters.
17402
17403void Compiler::fgRetypeImplicitByRefArgs()
17404{
17405#if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
17406#ifdef DEBUG
17407 if (verbose)
17408 {
17409 printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
17410 }
17411#endif // DEBUG
17412
17413 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17414 {
17415 LclVarDsc* varDsc = &lvaTable[lclNum];
17416
17417 if (lvaIsImplicitByRefLocal(lclNum))
17418 {
17419 size_t size;
17420
17421 if (varDsc->lvSize() > REGSIZE_BYTES)
17422 {
17423 size = varDsc->lvSize();
17424 }
17425 else
17426 {
17427 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
17428 size = info.compCompHnd->getClassSize(typeHnd);
17429 }
17430
17431 if (varDsc->lvPromoted)
17432 {
17433 // This implicit-by-ref was promoted; create a new temp to represent the
17434 // promoted struct before rewriting this parameter as a pointer.
17435 unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
17436 lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
17437 if (info.compIsVarArgs)
17438 {
17439 lvaSetStructUsedAsVarArg(newLclNum);
17440 }
17441
17442 // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
17443 varDsc = &lvaTable[lclNum];
17444
17445 // Copy the struct promotion annotations to the new temp.
17446 LclVarDsc* newVarDsc = &lvaTable[newLclNum];
17447 newVarDsc->lvPromoted = true;
17448 newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
17449 newVarDsc->lvFieldCnt = varDsc->lvFieldCnt;
17450 newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
17451 newVarDsc->lvCustomLayout = varDsc->lvCustomLayout;
17452#ifdef DEBUG
17453 newVarDsc->lvKeepType = true;
17454#endif // DEBUG
17455
17456 // Propagate address-taken-ness and do-not-enregister-ness.
17457 newVarDsc->lvAddrExposed = varDsc->lvAddrExposed;
17458 newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
17459#ifdef DEBUG
17460 newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr;
17461 newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr;
17462 newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
17463 newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
17464 newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall;
17465#endif // DEBUG
17466
17467 // If the promotion is dependent, the promoted temp would just be committed
17468 // to memory anyway, so we'll rewrite its appearances to be indirections
17469 // through the pointer parameter, the same as we'd do for this
17470 // parameter if it weren't promoted at all (otherwise the initialization
17471 // of the new temp would just be a needless memcpy at method entry).
17472 bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
17473 (varDsc->lvRefCnt(RCS_EARLY) <= varDsc->lvFieldCnt);
17474
17475 if (!undoPromotion)
17476 {
17477 // Insert IR that initializes the temp from the parameter.
17478 // LHS is a simple reference to the temp.
17479 fgEnsureFirstBBisScratch();
17480 GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
17481 // RHS is an indirection (using GT_OBJ) off the parameter.
17482 GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF);
17483 GenTree* rhs = gtNewBlockVal(addr, (unsigned)size);
17484 GenTree* assign = gtNewAssignNode(lhs, rhs);
17485 fgInsertStmtAtBeg(fgFirstBB, assign);
17486 }
17487
17488 // Update the locals corresponding to the promoted fields.
17489 unsigned fieldLclStart = varDsc->lvFieldLclStart;
17490 unsigned fieldCount = varDsc->lvFieldCnt;
17491 unsigned fieldLclStop = fieldLclStart + fieldCount;
17492
17493 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17494 {
17495 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17496
17497 if (undoPromotion)
17498 {
17499 // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
17500 // will know to rewrite appearances of this local.
17501 assert(fieldVarDsc->lvParentLcl == lclNum);
17502 }
17503 else
17504 {
17505 // Set the new parent.
17506 fieldVarDsc->lvParentLcl = newLclNum;
17507 // Clear the ref count field; it is used to communicate the nubmer of references
17508 // to the implicit byref parameter when morphing calls that pass the implicit byref
17509 // out as an outgoing argument value, but that doesn't pertain to this field local
17510 // which is now a field of a non-arg local.
17511 fieldVarDsc->setLvRefCnt(0, RCS_EARLY);
17512 }
17513
17514 fieldVarDsc->lvIsParam = false;
17515 // The fields shouldn't inherit any register preferences from
17516 // the parameter which is really a pointer to the struct.
17517 fieldVarDsc->lvIsRegArg = false;
17518 fieldVarDsc->lvIsMultiRegArg = false;
17519 fieldVarDsc->lvSetIsHfaRegArg(false);
17520 fieldVarDsc->lvArgReg = REG_NA;
17521#if FEATURE_MULTIREG_ARGS
17522 fieldVarDsc->lvOtherArgReg = REG_NA;
17523#endif
17524 }
17525
17526 // Hijack lvFieldLclStart to record the new temp number.
17527 // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
17528 varDsc->lvFieldLclStart = newLclNum;
17529 // Go ahead and clear lvFieldCnt -- either we're promoting
17530 // a replacement temp or we're not promoting this arg, and
17531 // in either case the parameter is now a pointer that doesn't
17532 // have these fields.
17533 varDsc->lvFieldCnt = 0;
17534
17535 // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs
17536 // whether references to the struct should be rewritten as
17537 // indirections off the pointer (not promoted) or references
17538 // to the new struct local (promoted).
17539 varDsc->lvPromoted = !undoPromotion;
17540 }
17541 else
17542 {
17543 // The "undo promotion" path above clears lvPromoted for args that struct
17544 // promotion wanted to promote but that aren't considered profitable to
17545 // rewrite. It hijacks lvFieldLclStart to communicate to
17546 // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
17547 // on such args for fgMorphImplicitByRefArgs to consult in the interim.
17548 // Here we have an arg that was simply never promoted, so make sure it doesn't
17549 // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs
17550 // and fgMarkDemotedImplicitByRefArgs.
17551 assert(varDsc->lvFieldLclStart == 0);
17552 }
17553
17554 // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
17555 varDsc->lvType = TYP_BYREF;
17556
17557 // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
17558 // make sure that the following flag is not set as these will force SSA to
17559 // exclude tracking/enregistering these LclVars. (see SsaBuilder::IncludeInSsa)
17560 //
17561 varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
17562
17563 // The struct parameter may have had its address taken, but the pointer parameter
17564 // cannot -- any uses of the struct parameter's address are uses of the pointer
17565 // parameter's value, and there's no way for the MSIL to reference the pointer
17566 // parameter's address. So clear the address-taken bit for the parameter.
17567 varDsc->lvAddrExposed = 0;
17568 varDsc->lvDoNotEnregister = 0;
17569
17570#ifdef DEBUG
17571 // This should not be converted to a double in stress mode,
17572 // because it is really a pointer
17573 varDsc->lvKeepType = 1;
17574
17575 if (verbose)
17576 {
17577 printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
17578 }
17579#endif // DEBUG
17580 }
17581 }
17582
17583#endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17584}
17585
17586//------------------------------------------------------------------------
17587// fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
17588// asked to promote. Appearances of these have now been rewritten
17589// (by fgMorphImplicitByRefArgs) using indirections from the pointer
17590// parameter or references to the promotion temp, as appropriate.
17591
17592void Compiler::fgMarkDemotedImplicitByRefArgs()
17593{
17594#if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_)
17595
17596 for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
17597 {
17598 LclVarDsc* varDsc = &lvaTable[lclNum];
17599
17600 if (lvaIsImplicitByRefLocal(lclNum))
17601 {
17602 if (varDsc->lvPromoted)
17603 {
17604 // The parameter is simply a pointer now, so clear lvPromoted. It was left set
17605 // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that
17606 // appearances of this arg needed to be rewritten to a new promoted struct local.
17607 varDsc->lvPromoted = false;
17608
17609 // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
17610 // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one.
17611 varDsc->lvFieldLclStart = 0;
17612 }
17613 else if (varDsc->lvFieldLclStart != 0)
17614 {
17615 // We created new temps to represent a promoted struct corresponding to this
17616 // parameter, but decided not to go through with the promotion and have
17617 // rewritten all uses as indirections off the pointer parameter.
17618 // We stashed the pointer to the new struct temp in lvFieldLclStart; make
17619 // note of that and clear the annotation.
17620 unsigned structLclNum = varDsc->lvFieldLclStart;
17621 varDsc->lvFieldLclStart = 0;
17622
17623 // Clear the arg's ref count; this was set during address-taken analysis so that
17624 // call morphing could identify single-use implicit byrefs; we're done with
17625 // that, and want it to be in its default state of zero when we go to set
17626 // real ref counts for all variables.
17627 varDsc->setLvRefCnt(0, RCS_EARLY);
17628
17629 // The temp struct is now unused; set flags appropriately so that we
17630 // won't allocate space for it on the stack.
17631 LclVarDsc* structVarDsc = &lvaTable[structLclNum];
17632 structVarDsc->setLvRefCnt(0, RCS_EARLY);
17633 structVarDsc->lvAddrExposed = false;
17634#ifdef DEBUG
17635 structVarDsc->lvUnusedStruct = true;
17636#endif // DEBUG
17637
17638 unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
17639 unsigned fieldCount = structVarDsc->lvFieldCnt;
17640 unsigned fieldLclStop = fieldLclStart + fieldCount;
17641
17642 for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
17643 {
17644 // Fix the pointer to the parent local.
17645 LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
17646 assert(fieldVarDsc->lvParentLcl == lclNum);
17647 fieldVarDsc->lvParentLcl = structLclNum;
17648
17649 // The field local is now unused; set flags appropriately so that
17650 // we won't allocate stack space for it.
17651 fieldVarDsc->setLvRefCnt(0, RCS_EARLY);
17652 fieldVarDsc->lvAddrExposed = false;
17653 }
17654 }
17655 }
17656 }
17657
17658#endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17659}
17660
17661/*****************************************************************************
17662 *
17663 * Morph irregular parameters
17664 * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
17665 */
17666bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
17667{
17668#if (!defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)) && !defined(_TARGET_ARM64_)
17669
17670 return false;
17671
17672#else // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17673
17674 bool changed = false;
17675
17676 // Implicit byref morphing needs to know if the reference to the parameter is a
17677 // child of GT_ADDR or not, so this method looks one level down and does the
17678 // rewrite whenever a child is a reference to an implicit byref parameter.
17679 if (tree->gtOper == GT_ADDR)
17680 {
17681 if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17682 {
17683 GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true);
17684 changed = (morphedTree != nullptr);
17685 assert(!changed || (morphedTree == tree));
17686 }
17687 }
17688 else
17689 {
17690 for (GenTree** pTree : tree->UseEdges())
17691 {
17692 GenTree* childTree = *pTree;
17693 if (childTree->gtOper == GT_LCL_VAR)
17694 {
17695 GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false);
17696 if (newChildTree != nullptr)
17697 {
17698 changed = true;
17699 *pTree = newChildTree;
17700 }
17701 }
17702 }
17703 }
17704
17705 return changed;
17706#endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_
17707}
17708
17709GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
17710{
17711 assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
17712 assert(isAddr == (tree->gtOper == GT_ADDR));
17713
17714 GenTree* lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
17715 unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
17716 LclVarDsc* lclVarDsc = &lvaTable[lclNum];
17717
17718 CORINFO_FIELD_HANDLE fieldHnd;
17719 unsigned fieldOffset = 0;
17720 var_types fieldRefType = TYP_UNKNOWN;
17721
17722 if (lvaIsImplicitByRefLocal(lclNum))
17723 {
17724 // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
17725 // re-invoke the traversal to mark address-taken locals.
17726 // So, we may encounter a tree that has already been transformed to TYP_BYREF.
17727 // If we do, leave it as-is.
17728 if (!varTypeIsStruct(lclVarTree))
17729 {
17730 assert(lclVarTree->TypeGet() == TYP_BYREF);
17731
17732 return nullptr;
17733 }
17734 else if (lclVarDsc->lvPromoted)
17735 {
17736 // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this
17737 // arg. Rewrite this to refer to the new local.
17738 assert(lclVarDsc->lvFieldLclStart != 0);
17739 lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart);
17740 return tree;
17741 }
17742
17743 fieldHnd = nullptr;
17744 }
17745 else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
17746 {
17747 // This was a field reference to an implicit-by-reference struct parameter that was
17748 // dependently promoted; update it to a field reference off the pointer.
17749 // Grab the field handle from the struct field lclVar.
17750 fieldHnd = lclVarDsc->lvFieldHnd;
17751 fieldOffset = lclVarDsc->lvFldOffset;
17752 assert(fieldHnd != nullptr);
17753 // Update lclNum/lclVarDsc to refer to the parameter
17754 lclNum = lclVarDsc->lvParentLcl;
17755 lclVarDsc = &lvaTable[lclNum];
17756 fieldRefType = lclVarTree->TypeGet();
17757 }
17758 else
17759 {
17760 // We only need to tranform the 'marked' implicit by ref parameters
17761 return nullptr;
17762 }
17763
17764 // This is no longer a def of the lclVar, even if it WAS a def of the struct.
17765 lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
17766
17767 if (isAddr)
17768 {
17769 if (fieldHnd == nullptr)
17770 {
17771 // change &X into just plain X
17772 tree->ReplaceWith(lclVarTree, this);
17773 tree->gtType = TYP_BYREF;
17774 }
17775 else
17776 {
17777 // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
17778 // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
17779 lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
17780 lclVarTree->gtType = TYP_BYREF;
17781 tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
17782 }
17783
17784#ifdef DEBUG
17785 if (verbose)
17786 {
17787 printf("Replacing address of implicit by ref struct parameter with byref:\n");
17788 }
17789#endif // DEBUG
17790 }
17791 else
17792 {
17793 // Change X into OBJ(X) or FIELD(X, f)
17794 var_types structType = tree->gtType;
17795 tree->gtType = TYP_BYREF;
17796
17797 if (fieldHnd)
17798 {
17799 tree->gtLclVarCommon.SetLclNum(lclNum);
17800 tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
17801 }
17802 else
17803 {
17804 tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
17805 }
17806
17807 if (structType == TYP_STRUCT)
17808 {
17809 gtSetObjGcInfo(tree->AsObj());
17810 }
17811
17812 // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
17813 // we could remove TGTANYWHERE
17814 tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
17815
17816#ifdef DEBUG
17817 if (verbose)
17818 {
17819 printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
17820 }
17821#endif // DEBUG
17822 }
17823
17824#ifdef DEBUG
17825 if (verbose)
17826 {
17827 gtDispTree(tree);
17828 }
17829#endif // DEBUG
17830
17831 return tree;
17832}
17833
17834class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
17835{
17836 // During tree traversal every GenTree node produces a "value" that represents:
17837 // - the memory location associated with a local variable, including an offset
17838 // accumulated from GT_LCL_FLD and GT_FIELD nodes.
17839 // - the address of local variable memory location, including an offset as well.
17840 // - an unknown value - the result of a node we don't know how to process. This
17841 // also includes the result of TYP_VOID nodes (or any other nodes that don't
17842 // actually produce values in IR) in order to support the invariant that every
17843 // node produces a value.
17844 //
17845 // The existence of GT_ADDR nodes and their use together with GT_FIELD to form
17846 // FIELD/ADDR/FIELD/ADDR/LCL_VAR sequences complicate things a bit. A typical
17847 // GT_FIELD node acts like an indirection and should produce an unknown value,
17848 // local address analysis doesn't know or care what value the field stores.
17849 // But a GT_FIELD can also be used as an operand for a GT_ADDR node and then
17850 // the GT_FIELD node does not perform an indirection, it's just represents a
17851 // location, similar to GT_LCL_VAR and GT_LCL_FLD.
17852 //
17853 // To avoid this issue, the semantics of GT_FIELD (and for simplicity's sake any other
17854 // indirection) nodes slightly deviates from the IR semantics - an indirection does not
17855 // actually produce an unknown value but a location value, if the indirection address
17856 // operand is an address value.
17857 //
17858 // The actual indirection is performed when the indirection's user node is processed:
17859 // - A GT_ADDR user turns the location value produced by the indirection back
17860 // into an address value.
17861 // - Any other user node performs the indirection and produces an unknown value.
17862 //
17863 class Value
17864 {
17865 GenTree* m_node;
17866 unsigned m_lclNum;
17867 unsigned m_offset;
17868 bool m_address;
17869 INDEBUG(bool m_consumed;)
17870
17871 public:
17872 // Produce an unknown value associated with the specified node.
17873 Value(GenTree* node)
17874 : m_node(node)
17875 , m_lclNum(BAD_VAR_NUM)
17876 , m_offset(0)
17877 , m_address(false)
17878#ifdef DEBUG
17879 , m_consumed(false)
17880#endif // DEBUG
17881 {
17882 }
17883
17884 // Get the node that produced this value.
17885 GenTree* Node() const
17886 {
17887 return m_node;
17888 }
17889
17890 // Does this value represent a location?
17891 bool IsLocation() const
17892 {
17893 return (m_lclNum != BAD_VAR_NUM) && !m_address;
17894 }
17895
17896 // Does this value represent the address of a location?
17897 bool IsAddress() const
17898 {
17899 assert((m_lclNum != BAD_VAR_NUM) || !m_address);
17900
17901 return m_address;
17902 }
17903
17904 // Get the location's variable number.
17905 unsigned LclNum() const
17906 {
17907 assert(IsLocation() || IsAddress());
17908
17909 return m_lclNum;
17910 }
17911
17912 // Get the location's byte offset.
17913 unsigned Offset() const
17914 {
17915 assert(IsLocation() || IsAddress());
17916
17917 return m_offset;
17918 }
17919
17920 //------------------------------------------------------------------------
17921 // Location: Produce a location value.
17922 //
17923 // Arguments:
17924 // lclNum - the local variable number
17925 // offset - the byte offset of the location (used for GT_LCL_FLD nodes)
17926 //
17927 // Notes:
17928 // - (lclnum, offset) => LOCATION(lclNum, offset)
17929 //
17930 void Location(unsigned lclNum, unsigned offset = 0)
17931 {
17932 assert(!IsLocation() && !IsAddress());
17933
17934 m_lclNum = lclNum;
17935 m_offset = offset;
17936 }
17937
17938 //------------------------------------------------------------------------
17939 // Address: Produce an address value from a location value.
17940 //
17941 // Arguments:
17942 // val - the input value
17943 //
17944 // Notes:
17945 // - LOCATION(lclNum, offset) => ADDRESS(lclNum, offset)
17946 // - ADDRESS(lclNum, offset) => invalid, we should never encounter something like ADDR(ADDR(...))
17947 // - UNKNOWN => UNKNOWN
17948 //
17949 void Address(Value& val)
17950 {
17951 assert(!IsLocation() && !IsAddress());
17952 assert(!val.IsAddress());
17953
17954 if (val.IsLocation())
17955 {
17956 m_address = true;
17957 m_lclNum = val.m_lclNum;
17958 m_offset = val.m_offset;
17959 }
17960
17961 INDEBUG(val.Consume();)
17962 }
17963
17964 //------------------------------------------------------------------------
17965 // Field: Produce a location value from an address value.
17966 //
17967 // Arguments:
17968 // val - the input value
17969 // offset - the offset to add to the existing location offset
17970 //
17971 // Return Value:
17972 // `true` if the value was consumed. `false` if the input value
17973 // cannot be consumed because it is itsef a location or because
17974 // the offset overflowed. In this case the caller is expected
17975 // to escape the input value.
17976 //
17977 // Notes:
17978 // - LOCATION(lclNum, offset) => not representable, must escape
17979 // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset + field.Offset)
17980 // if the offset overflows then location is not representable, must escape
17981 // - UNKNOWN => UNKNOWN
17982 //
17983 bool Field(Value& val, unsigned offset)
17984 {
17985 assert(!IsLocation() && !IsAddress());
17986
17987 if (val.IsLocation())
17988 {
17989 return false;
17990 }
17991
17992 if (val.IsAddress())
17993 {
17994 ClrSafeInt<unsigned> newOffset = ClrSafeInt<unsigned>(val.m_offset) + ClrSafeInt<unsigned>(offset);
17995
17996 if (newOffset.IsOverflow())
17997 {
17998 return false;
17999 }
18000
18001 m_lclNum = val.m_lclNum;
18002 m_offset = newOffset.Value();
18003 }
18004
18005 INDEBUG(val.Consume();)
18006 return true;
18007 }
18008
18009 //------------------------------------------------------------------------
18010 // Indir: Produce a location value from an address value.
18011 //
18012 // Arguments:
18013 // val - the input value
18014 //
18015 // Return Value:
18016 // `true` if the value was consumed. `false` if the input value
18017 // cannot be consumed because it is itsef a location. In this
18018 // case the caller is expected to escape the input value.
18019 //
18020 // Notes:
18021 // - LOCATION(lclNum, offset) => not representable, must escape
18022 // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset)
18023 // - UNKNOWN => UNKNOWN
18024 //
18025 bool Indir(Value& val)
18026 {
18027 assert(!IsLocation() && !IsAddress());
18028
18029 if (val.IsLocation())
18030 {
18031 return false;
18032 }
18033
18034 if (val.IsAddress())
18035 {
18036 m_lclNum = val.m_lclNum;
18037 m_offset = val.m_offset;
18038 }
18039
18040 INDEBUG(val.Consume();)
18041 return true;
18042 }
18043
18044#ifdef DEBUG
18045 void Consume()
18046 {
18047 assert(!m_consumed);
18048 // Mark the value as consumed so that PopValue can ensure that values
18049 // aren't popped from the stack without being processed appropriately.
18050 m_consumed = true;
18051 }
18052
18053 bool IsConsumed()
18054 {
18055 return m_consumed;
18056 }
18057#endif // DEBUG
18058 };
18059
18060 ArrayStack<Value> m_valueStack;
18061 INDEBUG(bool m_stmtModified;)
18062
18063public:
18064 enum
18065 {
18066 DoPreOrder = true,
18067 DoPostOrder = true,
18068 ComputeStack = true,
18069 DoLclVarsOnly = false,
18070 UseExecutionOrder = false,
18071 };
18072
18073 LocalAddressVisitor(Compiler* comp)
18074 : GenTreeVisitor<LocalAddressVisitor>(comp), m_valueStack(comp->getAllocator(CMK_LocalAddressVisitor))
18075 {
18076 }
18077
18078 void VisitStmt(GenTreeStmt* stmt)
18079 {
18080#ifdef DEBUG
18081 if (m_compiler->verbose)
18082 {
18083 printf("LocalAddressVisitor visiting statement:\n");
18084 m_compiler->gtDispTree(stmt);
18085 m_stmtModified = false;
18086 }
18087#endif // DEBUG
18088
18089 WalkTree(&stmt->gtStmtExpr, nullptr);
18090
18091 // We could have somethinge like STMT(IND(ADDR(LCL_VAR))) so we need to escape
18092 // the location here. This doesn't seem to happen often, if ever. The importer
18093 // tends to wrap such a tree in a COMMA.
18094 if (TopValue(0).IsLocation())
18095 {
18096 EscapeLocation(TopValue(0), stmt);
18097 }
18098 else
18099 {
18100 // If we have an address on the stack then we don't need to do anything.
18101 // The address tree isn't actually used and it will be discarded during
18102 // morphing. So just mark any value as consumed to keep PopValue happy.
18103 INDEBUG(TopValue(0).Consume();)
18104 }
18105
18106 PopValue();
18107 assert(m_valueStack.Empty());
18108
18109#ifdef DEBUG
18110 if (m_compiler->verbose)
18111 {
18112 if (m_stmtModified)
18113 {
18114 printf("LocalAddressVisitor modified statement:\n");
18115 m_compiler->gtDispTree(stmt);
18116 }
18117
18118 printf("\n");
18119 }
18120#endif // DEBUG
18121 }
18122
18123 // Morph promoted struct fields and count implict byref argument occurrences.
18124 // Also create and push the value produced by the visited node. This is done here
18125 // rather than in PostOrderVisit because it makes it easy to handle nodes with an
18126 // arbitrary number of operands - just pop values until the value corresponding
18127 // to the visited node is encountered.
18128 fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
18129 {
18130 GenTree* node = *use;
18131
18132 if (node->OperIs(GT_FIELD))
18133 {
18134 MorphStructField(node, user);
18135 }
18136 else if (node->OperIs(GT_LCL_FLD))
18137 {
18138 MorphLocalField(node, user);
18139 }
18140
18141 if (node->OperIsLocal())
18142 {
18143 unsigned lclNum = node->AsLclVarCommon()->GetLclNum();
18144
18145 LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
18146 if (varDsc->lvIsStructField)
18147 {
18148 // Promoted field, increase counter for the parent lclVar.
18149 assert(!m_compiler->lvaIsImplicitByRefLocal(lclNum));
18150 unsigned parentLclNum = varDsc->lvParentLcl;
18151 UpdateEarlyRefCountForImplicitByRef(parentLclNum);
18152 }
18153 else
18154 {
18155 UpdateEarlyRefCountForImplicitByRef(lclNum);
18156 }
18157 }
18158
18159 PushValue(node);
18160
18161 return Compiler::WALK_CONTINUE;
18162 }
18163
18164 // Evaluate a node. Since this is done in postorder, the node's operands have already been
18165 // evaluated and are available on the value stack. The value produced by the visited node
18166 // is left on the top of the evaluation stack.
18167 fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
18168 {
18169 GenTree* node = *use;
18170
18171 switch (node->OperGet())
18172 {
18173 case GT_LCL_VAR:
18174 assert(TopValue(0).Node() == node);
18175
18176 TopValue(0).Location(node->AsLclVar()->GetLclNum());
18177 break;
18178
18179 case GT_LCL_FLD:
18180 assert(TopValue(0).Node() == node);
18181
18182 TopValue(0).Location(node->AsLclFld()->GetLclNum(), node->AsLclFld()->gtLclOffs);
18183 break;
18184
18185 case GT_ADDR:
18186 assert(TopValue(1).Node() == node);
18187 assert(TopValue(0).Node() == node->gtGetOp1());
18188
18189 TopValue(1).Address(TopValue(0));
18190 PopValue();
18191 break;
18192
18193 case GT_FIELD:
18194 if (node->AsField()->gtFldObj != nullptr)
18195 {
18196 assert(TopValue(1).Node() == node);
18197 assert(TopValue(0).Node() == node->AsField()->gtFldObj);
18198
18199 if (!TopValue(1).Field(TopValue(0), node->AsField()->gtFldOffset))
18200 {
18201 // Either the address comes from a location value (e.g. FIELD(IND(...)))
18202 // or the field offset has overflowed.
18203 EscapeValue(TopValue(0), node);
18204 }
18205
18206 PopValue();
18207 }
18208 else
18209 {
18210 assert(TopValue(0).Node() == node);
18211 }
18212 break;
18213
18214 case GT_OBJ:
18215 case GT_BLK:
18216 case GT_IND:
18217 assert(TopValue(1).Node() == node);
18218 assert(TopValue(0).Node() == node->gtGetOp1());
18219
18220 if ((node->gtFlags & GTF_IND_VOLATILE) != 0)
18221 {
18222 // Volatile indirections must not be removed so the address,
18223 // if any, must be escaped.
18224 EscapeValue(TopValue(0), node);
18225 }
18226 else if (!TopValue(1).Indir(TopValue(0)))
18227 {
18228 // If the address comes from another indirection (e.g. IND(IND(...))
18229 // then we need to escape the location.
18230 EscapeLocation(TopValue(0), node);
18231 }
18232
18233 PopValue();
18234 break;
18235
18236 case GT_DYN_BLK:
18237 assert(TopValue(2).Node() == node);
18238 assert(TopValue(1).Node() == node->AsDynBlk()->Addr());
18239 assert(TopValue(0).Node() == node->AsDynBlk()->gtDynamicSize);
18240
18241 // The block size may be the result of an indirection so we need
18242 // to escape the location that may be associated with it.
18243 EscapeValue(TopValue(0), node);
18244
18245 if (!TopValue(2).Indir(TopValue(1)))
18246 {
18247 // If the address comes from another indirection (e.g. DYN_BLK(IND(...))
18248 // then we need to escape the location.
18249 EscapeLocation(TopValue(1), node);
18250 }
18251
18252 PopValue();
18253 PopValue();
18254 break;
18255
18256 default:
18257 while (TopValue(0).Node() != node)
18258 {
18259 EscapeValue(TopValue(0), node);
18260 PopValue();
18261 }
18262 break;
18263 }
18264
18265 assert(TopValue(0).Node() == node);
18266 return Compiler::WALK_CONTINUE;
18267 }
18268
18269private:
18270 void PushValue(GenTree* node)
18271 {
18272 m_valueStack.Push(node);
18273 }
18274
18275 Value& TopValue(unsigned index)
18276 {
18277 return m_valueStack.IndexRef(index);
18278 }
18279
18280 void PopValue()
18281 {
18282 assert(TopValue(0).IsConsumed());
18283 m_valueStack.Pop();
18284 }
18285
18286 //------------------------------------------------------------------------
18287 // EscapeValue: Process an escaped value
18288 //
18289 // Arguments:
18290 // val - the escaped address value
18291 // user - the node that uses the escaped value
18292 //
18293 void EscapeValue(Value& val, GenTree* user)
18294 {
18295 if (val.IsLocation())
18296 {
18297 EscapeLocation(val, user);
18298 }
18299 else if (val.IsAddress())
18300 {
18301 EscapeAddress(val, user);
18302 }
18303 else
18304 {
18305 INDEBUG(val.Consume();)
18306 }
18307 }
18308
18309 //------------------------------------------------------------------------
18310 // EscapeAddress: Process an escaped address value
18311 //
18312 // Arguments:
18313 // val - the escaped address value
18314 // user - the node that uses the address value
18315 //
18316 void EscapeAddress(Value& val, GenTree* user)
18317 {
18318 assert(val.IsAddress());
18319
18320 LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum());
18321
18322 // In general we don't know how an exposed struct field address will be used - it may be used to
18323 // access only that specific field or it may be used to access other fields in the same struct
18324 // be using pointer/ref arithmetic. It seems reasonable to make an exception for the "this" arg
18325 // of calls - it would be highly unsual for a struct member method to attempt to access memory
18326 // beyond "this" instance. And calling struct member methods is common enough that attempting to
18327 // mark the entire struct as address exposed results in CQ regressions.
18328 bool isThisArg = user->IsCall() && (val.Node() == user->AsCall()->gtCallObjp);
18329 bool exposeParentLcl = varDsc->lvIsStructField && !isThisArg;
18330
18331 m_compiler->lvaSetVarAddrExposed(exposeParentLcl ? varDsc->lvParentLcl : val.LclNum());
18332
18333#ifdef _TARGET_64BIT_
18334 // If the address of a variable is passed in a call and the allocation size of the variable
18335 // is 32 bits we will quirk the size to 64 bits. Some PInvoke signatures incorrectly specify
18336 // a ByRef to an INT32 when they actually write a SIZE_T or INT64. There are cases where
18337 // overwriting these extra 4 bytes corrupts some data (such as a saved register) that leads
18338 // to A/V. Wheras previously the JIT64 codegen did not lead to an A/V.
18339 if (!varDsc->lvIsParam && !varDsc->lvIsStructField && (genActualType(varDsc->TypeGet()) == TYP_INT))
18340 {
18341 // TODO-Cleanup: This should simply check if the user is a call node, not if a call ancestor exists.
18342 if (Compiler::gtHasCallOnStack(&m_ancestors))
18343 {
18344 varDsc->lvQuirkToLong = true;
18345 JITDUMP("Adding a quirk for the storage size of V%02u of type %s", val.LclNum(),
18346 varTypeName(varDsc->TypeGet()));
18347 }
18348 }
18349#endif // _TARGET_64BIT_
18350
18351 INDEBUG(val.Consume();)
18352 }
18353
18354 //------------------------------------------------------------------------
18355 // EscapeLocation: Process an escaped location value
18356 //
18357 // Arguments:
18358 // val - the escaped location value
18359 // user - the node that uses the location value
18360 //
18361 // Notes:
18362 // Unlike EscapeAddress, this does not necessarily mark the lclvar associated
18363 // with the value as address exposed. This is needed only if the indirection
18364 // is wider than the lclvar.
18365 //
18366 void EscapeLocation(Value& val, GenTree* user)
18367 {
18368 assert(val.IsLocation());
18369
18370 GenTree* node = val.Node();
18371
18372 if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD))
18373 {
18374 // If the location is accessed directly then we don't need to do anything.
18375
18376 assert(node->AsLclVarCommon()->GetLclNum() == val.LclNum());
18377 }
18378 else
18379 {
18380 // Otherwise it must be accessed through some kind of indirection. Usually this is
18381 // something like IND(ADDR(LCL_VAR)), global morph will change it to GT_LCL_VAR or
18382 // GT_LCL_FLD so the lclvar does not need to be address exposed.
18383 //
18384 // However, it is possible for the indirection to be wider than the lclvar
18385 // (e.g. *(long*)&int32Var) or to have a field offset that pushes the indirection
18386 // past the end of the lclvar memory location. In such cases morph doesn't do
18387 // anything so the lclvar needs to be address exposed.
18388 //
18389 // More importantly, if the lclvar is a promoted struct field then the parent lclvar
18390 // also needs to be address exposed so we get dependent struct promotion. Code like
18391 // *(long*)&int32Var has undefined behavior and it's practically useless but reading,
18392 // say, 2 consecutive Int32 struct fields as Int64 has more practical value.
18393
18394 LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum());
18395 unsigned indirSize = GetIndirSize(node, user);
18396 bool isWide;
18397
18398 if (indirSize == 0)
18399 {
18400 // If we can't figure out the indirection size then treat it as a wide indirection.
18401 isWide = true;
18402 }
18403 else
18404 {
18405 ClrSafeInt<unsigned> endOffset = ClrSafeInt<unsigned>(val.Offset()) + ClrSafeInt<unsigned>(indirSize);
18406
18407 if (endOffset.IsOverflow())
18408 {
18409 isWide = true;
18410 }
18411 else if (varDsc->TypeGet() == TYP_STRUCT)
18412 {
18413 isWide = (endOffset.Value() > varDsc->lvExactSize);
18414 }
18415 else
18416 {
18417 // For small int types use the real type size, not the stack slot size.
18418 // Morph does manage to transform `*(int*)&byteVar` into just byteVar where
18419 // the LCL_VAR node has type TYP_INT. But such code is simply bogus and
18420 // there's no reason to attempt to optimize it. It makes more sense to
18421 // mark the variable address exposed in such circumstances.
18422 //
18423 // Same for "small" SIMD types - SIMD8/12 have 8/12 bytes, even if the
18424 // stack location may have 16 bytes.
18425 //
18426 // For TYP_BLK variables the type size is 0 so they're always address
18427 // exposed.
18428 isWide = (endOffset.Value() > genTypeSize(varDsc->TypeGet()));
18429 }
18430 }
18431
18432 if (isWide)
18433 {
18434 m_compiler->lvaSetVarAddrExposed(varDsc->lvIsStructField ? varDsc->lvParentLcl : val.LclNum());
18435 }
18436 }
18437
18438 INDEBUG(val.Consume();)
18439 }
18440
18441 //------------------------------------------------------------------------
18442 // GetIndirSize: Return the size (in bytes) of an indirection node.
18443 //
18444 // Arguments:
18445 // indir - the indirection node
18446 // user - the node that uses the indirection
18447 //
18448 // Notes:
18449 // This returns 0 for indirection of unknown size, typically GT_DYN_BLK.
18450 // GT_IND nodes that have type TYP_STRUCT are expected to only appears
18451 // on the RHS of an assignment, in which case the LHS size will be used instead.
18452 // Otherwise 0 is returned as well.
18453 //
18454 unsigned GetIndirSize(GenTree* indir, GenTree* user)
18455 {
18456 assert(indir->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_DYN_BLK, GT_FIELD));
18457
18458 if (indir->TypeGet() != TYP_STRUCT)
18459 {
18460 return genTypeSize(indir->TypeGet());
18461 }
18462
18463 // A struct indir that is the RHS of an assignment needs special casing:
18464 // - It can be a GT_IND of type TYP_STRUCT, in which case the size is given by the LHS.
18465 // - It can be a GT_OBJ that has a correct size, but different than the size of the LHS.
18466 // The LHS size takes precedence.
18467 // Just take the LHS size in all cases.
18468 if (user->OperIs(GT_ASG) && (indir == user->gtGetOp2()))
18469 {
18470 indir = user->gtGetOp1();
18471
18472 if (indir->TypeGet() != TYP_STRUCT)
18473 {
18474 return genTypeSize(indir->TypeGet());
18475 }
18476
18477 // The LHS may be a LCL_VAR/LCL_FLD, these are not indirections so we need to handle them here.
18478 // It can also be a GT_INDEX, this is an indirection but it never applies to lclvar addresses
18479 // so it needs to be handled here as well.
18480
18481 switch (indir->OperGet())
18482 {
18483 case GT_LCL_VAR:
18484 return m_compiler->lvaGetDesc(indir->AsLclVar())->lvExactSize;
18485 case GT_LCL_FLD:
18486 return genTypeSize(indir->TypeGet());
18487 case GT_INDEX:
18488 return indir->AsIndex()->gtIndElemSize;
18489 default:
18490 break;
18491 }
18492 }
18493
18494 switch (indir->OperGet())
18495 {
18496 case GT_FIELD:
18497 return m_compiler->info.compCompHnd->getClassSize(
18498 m_compiler->info.compCompHnd->getFieldClass(indir->AsField()->gtFldHnd));
18499 case GT_BLK:
18500 case GT_OBJ:
18501 return indir->AsBlk()->gtBlkSize;
18502 default:
18503 assert(indir->OperIs(GT_IND, GT_DYN_BLK));
18504 return 0;
18505 }
18506 }
18507
18508 //------------------------------------------------------------------------
18509 // MorphStructField: Replaces a GT_FIELD based promoted/normed struct field access
18510 // (e.g. FIELD(ADDR(LCL_VAR))) with a GT_LCL_VAR that references the struct field.
18511 //
18512 // Arguments:
18513 // node - the GT_FIELD node
18514 // user - the node that uses the field
18515 //
18516 // Notes:
18517 // This does not do anything if the field access does not denote
18518 // a promoted/normed struct field.
18519 //
18520 void MorphStructField(GenTree* node, GenTree* user)
18521 {
18522 assert(node->OperIs(GT_FIELD));
18523 // TODO-Cleanup: Move fgMorphStructField implementation here, it's not used anywhere else.
18524 m_compiler->fgMorphStructField(node, user);
18525 INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);)
18526 }
18527
18528 //------------------------------------------------------------------------
18529 // MorphLocalField: Replaces a GT_LCL_FLD based promoted struct field access
18530 // with a GT_LCL_VAR that references the struct field.
18531 //
18532 // Arguments:
18533 // node - the GT_LCL_FLD node
18534 // user - the node that uses the field
18535 //
18536 // Notes:
18537 // This does not do anything if the field access does not denote
18538 // involved a promoted struct local.
18539 // If the GT_LCL_FLD offset does not have a coresponding promoted struct
18540 // field then no transformation is done and struct local's enregistration
18541 // is disabled.
18542 //
18543 void MorphLocalField(GenTree* node, GenTree* user)
18544 {
18545 assert(node->OperIs(GT_LCL_FLD));
18546 // TODO-Cleanup: Move fgMorphLocalField implementation here, it's not used anywhere else.
18547 m_compiler->fgMorphLocalField(node, user);
18548 INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);)
18549 }
18550
18551 //------------------------------------------------------------------------
18552 // UpdateEarlyRefCountForImplicitByRef: updates the ref count for implicit byref params.
18553 //
18554 // Arguments:
18555 // lclNum - the local number to update the count for.
18556 //
18557 // Notes:
18558 // fgMakeOutgoingStructArgCopy checks the ref counts for implicit byref params when it decides
18559 // if it's legal to elide certain copies of them;
18560 // fgRetypeImplicitByRefArgs checks the ref counts when it decides to undo promotions.
18561 //
18562 void UpdateEarlyRefCountForImplicitByRef(unsigned lclNum)
18563 {
18564 if (!m_compiler->lvaIsImplicitByRefLocal(lclNum))
18565 {
18566 return;
18567 }
18568 LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
18569 JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for V%02d\n", varDsc->lvRefCnt(RCS_EARLY),
18570 varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum);
18571 varDsc->incLvRefCnt(1, RCS_EARLY);
18572 }
18573};
18574
18575void Compiler::fgAddFieldSeqForZeroOffset(GenTree* op1, FieldSeqNode* fieldSeq)
18576{
18577 assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
18578
18579 switch (op1->OperGet())
18580 {
18581 case GT_ADDR:
18582 if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
18583 {
18584 GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
18585 lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
18586 }
18587 break;
18588
18589 case GT_ADD:
18590 if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
18591 {
18592 FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
18593 if (op1Fs != nullptr)
18594 {
18595 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18596 op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
18597 }
18598 }
18599 else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
18600 {
18601 FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
18602 if (op2Fs != nullptr)
18603 {
18604 op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
18605 op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
18606 }
18607 }
18608 break;
18609
18610 case GT_CNS_INT:
18611 {
18612 FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
18613 if (op1Fs != nullptr)
18614 {
18615 op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
18616 op1->gtIntCon.gtFieldSeq = op1Fs;
18617 }
18618 }
18619 break;
18620
18621 default:
18622 // Record in the general zero-offset map.
18623 GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
18624 break;
18625 }
18626}
18627
18628//------------------------------------------------------------------------
18629// fgMarkAddressExposedLocals: Traverses the entire method and marks address
18630// exposed locals.
18631//
18632// Notes:
18633// Trees such as IND(ADDR(LCL_VAR)), that morph is expected to fold
18634// to just LCL_VAR, do not result in the involved local being marked
18635// address exposed.
18636//
18637void Compiler::fgMarkAddressExposedLocals()
18638{
18639#ifdef DEBUG
18640 if (verbose)
18641 {
18642 printf("\n*************** In fgMarkAddressExposedLocals()\n");
18643 }
18644#endif // DEBUG
18645
18646 LocalAddressVisitor visitor(this);
18647
18648 for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
18649 {
18650 // Make the current basic block address available globally
18651 compCurBB = block;
18652
18653 for (GenTree* stmt = block->bbTreeList; stmt != nullptr; stmt = stmt->gtNext)
18654 {
18655 visitor.VisitStmt(stmt->AsStmt());
18656 }
18657 }
18658}
18659
18660#ifdef FEATURE_SIMD
18661
18662//-----------------------------------------------------------------------------------
18663// fgMorphCombineSIMDFieldAssignments:
18664// If the RHS of the input stmt is a read for simd vector X Field, then this function
18665// will keep reading next few stmts based on the vector size(2, 3, 4).
18666// If the next stmts LHS are located contiguous and RHS are also located
18667// contiguous, then we replace those statements with a copyblk.
18668//
18669// Argument:
18670// block - BasicBlock*. block which stmt belongs to
18671// stmt - GenTreeStmt*. the stmt node we want to check
18672//
18673// return value:
18674// if this funciton successfully optimized the stmts, then return true. Otherwise
18675// return false;
18676
18677bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* stmt)
18678{
18679
18680 noway_assert(stmt->gtOper == GT_STMT);
18681 GenTree* tree = stmt->gtStmt.gtStmtExpr;
18682 assert(tree->OperGet() == GT_ASG);
18683
18684 GenTree* originalLHS = tree->gtOp.gtOp1;
18685 GenTree* prevLHS = tree->gtOp.gtOp1;
18686 GenTree* prevRHS = tree->gtOp.gtOp2;
18687 unsigned index = 0;
18688 var_types baseType = TYP_UNKNOWN;
18689 unsigned simdSize = 0;
18690 GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
18691
18692 if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
18693 {
18694 // if the RHS is not from a SIMD vector field X, then there is no need to check further.
18695 return false;
18696 }
18697
18698 var_types simdType = getSIMDTypeForSize(simdSize);
18699 int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
18700 int remainingAssignments = assignmentsCount;
18701 GenTree* curStmt = stmt->gtNext;
18702 GenTree* lastStmt = stmt;
18703
18704 while (curStmt != nullptr && remainingAssignments > 0)
18705 {
18706 GenTree* exp = curStmt->gtStmt.gtStmtExpr;
18707 if (exp->OperGet() != GT_ASG)
18708 {
18709 break;
18710 }
18711 GenTree* curLHS = exp->gtGetOp1();
18712 GenTree* curRHS = exp->gtGetOp2();
18713
18714 if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
18715 {
18716 break;
18717 }
18718
18719 remainingAssignments--;
18720 prevLHS = curLHS;
18721 prevRHS = curRHS;
18722
18723 lastStmt = curStmt;
18724 curStmt = curStmt->gtNext;
18725 }
18726
18727 if (remainingAssignments > 0)
18728 {
18729 // if the left assignments number is bigger than zero, then this means
18730 // that the assignments are not assgining to the contiguously memory
18731 // locations from same vector.
18732 return false;
18733 }
18734#ifdef DEBUG
18735 if (verbose)
18736 {
18737 printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
18738 printf("From " FMT_BB ", stmt", block->bbNum);
18739 printTreeID(stmt);
18740 printf(" to stmt");
18741 printTreeID(lastStmt);
18742 printf("\n");
18743 }
18744#endif
18745
18746 for (int i = 0; i < assignmentsCount; i++)
18747 {
18748 fgRemoveStmt(block, stmt->gtNext);
18749 }
18750
18751 GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
18752 if (simdStructNode->OperIsLocal())
18753 {
18754 setLclRelatedToSIMDIntrinsic(simdStructNode);
18755 }
18756 GenTree* copyBlkAddr = copyBlkDst;
18757 if (copyBlkAddr->gtOper == GT_LEA)
18758 {
18759 copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
18760 }
18761 GenTreeLclVarCommon* localDst = nullptr;
18762 if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
18763 {
18764 setLclRelatedToSIMDIntrinsic(localDst);
18765 }
18766
18767 if (simdStructNode->TypeGet() == TYP_BYREF)
18768 {
18769 assert(simdStructNode->OperIsLocal());
18770 assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
18771 simdStructNode = gtNewIndir(simdType, simdStructNode);
18772 }
18773 else
18774 {
18775 assert(varTypeIsSIMD(simdStructNode));
18776 }
18777
18778#ifdef DEBUG
18779 if (verbose)
18780 {
18781 printf("\n" FMT_BB " stmt", block->bbNum);
18782 printTreeID(stmt);
18783 printf("(before)\n");
18784 gtDispTree(stmt);
18785 }
18786#endif
18787
18788 // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
18789 GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
18790 blkNode->gtType = simdType;
18791 tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
18792 false, // not volatile
18793 true); // copyBlock
18794
18795 stmt->gtStmt.gtStmtExpr = tree;
18796
18797 // Since we generated a new address node which didn't exist before,
18798 // we should expose this address manually here.
18799 LocalAddressVisitor visitor(this);
18800 visitor.VisitStmt(stmt->AsStmt());
18801
18802#ifdef DEBUG
18803 if (verbose)
18804 {
18805 printf("\nReplaced " FMT_BB " stmt", block->bbNum);
18806 printTreeID(stmt);
18807 printf("(after)\n");
18808 gtDispTree(stmt);
18809 }
18810#endif
18811 return true;
18812}
18813
18814#endif // FEATURE_SIMD
18815
18816#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
18817GenTreeStmt* SkipNopStmts(GenTreeStmt* stmt)
18818{
18819 while ((stmt != nullptr) && !stmt->IsNothingNode())
18820 {
18821 stmt = stmt->gtNextStmt;
18822 }
18823 return stmt;
18824}
18825
18826#endif // !FEATURE_CORECLR && _TARGET_AMD64_
18827
18828//------------------------------------------------------------------------
18829// fgCheckStmtAfterTailCall: check that statements after the tail call stmt
18830// candidate are in one of expected forms, that are desctibed below.
18831//
18832// Return Value:
18833// 'true' if stmts are in the expected form, else 'false'.
18834//
18835bool Compiler::fgCheckStmtAfterTailCall()
18836{
18837
18838 // For void calls, we would have created a GT_CALL in the stmt list.
18839 // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
18840 // For calls returning structs, we would have a void call, followed by a void return.
18841 // For debuggable code, it would be an assignment of the call to a temp
18842 // We want to get rid of any of this extra trees, and just leave
18843 // the call.
18844 GenTreeStmt* callStmt = fgMorphStmt;
18845
18846 GenTreeStmt* nextMorphStmt = callStmt->gtNextStmt;
18847
18848#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
18849 // Legacy Jit64 Compat:
18850 // There could be any number of GT_NOPs between tail call and GT_RETURN.
18851 // That is tail call pattern could be one of the following:
18852 // 1) tail.call, nop*, ret
18853 // 2) tail.call, nop*, pop, nop*, ret
18854 // 3) var=tail.call, nop*, ret(var)
18855 // 4) var=tail.call, nop*, pop, ret
18856 // 5) comma(tail.call, nop), nop*, ret
18857 //
18858 // See impIsTailCallILPattern() for details on tail call IL patterns
18859 // that are supported.
18860 GenTree* callExpr = callStmt->gtStmtExpr;
18861
18862 if (callExpr->gtOper != GT_RETURN)
18863 {
18864 // First skip all GT_NOPs after the call
18865 nextMorphStmt = SkipNopStmts(nextMorphStmt);
18866
18867 // Check to see if there is a pop.
18868 // Since tail call is honored, we can get rid of the stmt corresponding to pop.
18869 if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
18870 {
18871 // Note that pop opcode may or may not result in a new stmt (for details see
18872 // impImportBlockCode()). Hence, it is not possible to assert about the IR
18873 // form generated by pop but pop tree must be side-effect free so that we can
18874 // delete it safely.
18875 GenTreeStmt* popStmt = nextMorphStmt;
18876
18877 // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
18878 // the constituent nodes.
18879 GenTree* popExpr = popStmt->gtStmtExpr;
18880 bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
18881 if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
18882 {
18883 isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
18884 ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
18885 }
18886 noway_assert(isSideEffectFree);
18887
18888 nextMorphStmt = popStmt->gtNextStmt;
18889 }
18890
18891 // Next skip any GT_NOP nodes after the pop
18892 nextMorphStmt = SkipNopStmts(nextMorphStmt);
18893 }
18894#endif // !FEATURE_CORECLR && _TARGET_AMD64_
18895
18896 // Check that the rest stmts in the block are in one of the following pattern:
18897 // 1) ret(void)
18898 // 2) ret(cast*(callResultLclVar))
18899 // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block
18900 if (nextMorphStmt != nullptr)
18901 {
18902 GenTree* callExpr = callStmt->gtStmtExpr;
18903 if (callExpr->gtOper != GT_ASG)
18904 {
18905 // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar),
18906 // where lclVar was return buffer in the call for structs or simd.
18907 GenTreeStmt* retStmt = nextMorphStmt;
18908 GenTree* retExpr = retStmt->gtStmtExpr;
18909 noway_assert(retExpr->gtOper == GT_RETURN);
18910
18911 nextMorphStmt = retStmt->gtNextStmt;
18912 }
18913 else
18914 {
18915 noway_assert(callExpr->gtGetOp1()->OperIsLocal());
18916 unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
18917
18918#if FEATURE_TAILCALL_OPT_SHARED_RETURN
18919
18920 // We can have a move from the call result to an lvaInlineeReturnSpillTemp.
18921 // However, we can't check that this assignment was created there.
18922 if (nextMorphStmt->gtStmtExpr->gtOper == GT_ASG)
18923 {
18924 GenTreeStmt* moveStmt = nextMorphStmt;
18925 GenTree* moveExpr = nextMorphStmt->gtStmtExpr;
18926 noway_assert(moveExpr->gtGetOp1()->OperIsLocal() && moveExpr->gtGetOp2()->OperIsLocal());
18927
18928 unsigned srcLclNum = moveExpr->gtGetOp2()->AsLclVarCommon()->gtLclNum;
18929 noway_assert(srcLclNum == callResultLclNumber);
18930 unsigned dstLclNum = moveExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
18931 callResultLclNumber = dstLclNum;
18932
18933 nextMorphStmt = moveStmt->gtNextStmt;
18934 }
18935 if (nextMorphStmt != nullptr)
18936#endif
18937 {
18938 GenTreeStmt* retStmt = nextMorphStmt;
18939 GenTree* retExpr = nextMorphStmt->gtStmtExpr;
18940 noway_assert(retExpr->gtOper == GT_RETURN);
18941
18942 GenTree* treeWithLcl = retExpr->gtGetOp1();
18943 while (treeWithLcl->gtOper == GT_CAST)
18944 {
18945 noway_assert(!treeWithLcl->gtOverflow());
18946 treeWithLcl = treeWithLcl->gtGetOp1();
18947 }
18948
18949 noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->gtLclNum);
18950
18951 nextMorphStmt = retStmt->gtNextStmt;
18952 }
18953 }
18954 }
18955 return nextMorphStmt == nullptr;
18956}
18957
18958static const int numberOfTrackedFlags = 5;
18959static const unsigned trackedFlags[numberOfTrackedFlags] = {GTF_ASG, GTF_CALL, GTF_EXCEPT, GTF_GLOB_REF,
18960 GTF_ORDER_SIDEEFF};
18961
18962//------------------------------------------------------------------------
18963// fgMorphArgList: morph argument list tree without recursion.
18964//
18965// Arguments:
18966// args - argument list tree to morph;
18967// mac - morph address context, used to morph children.
18968//
18969// Return Value:
18970// morphed argument list.
18971//
18972GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac)
18973{
18974 // Use a non-recursive algorithm that morphs all actual list values,
18975 // memorizes the last node for each effect flag and resets
18976 // them during the second iteration.
18977 assert((trackedFlags[0] | trackedFlags[1] | trackedFlags[2] | trackedFlags[3] | trackedFlags[4]) == GTF_ALL_EFFECT);
18978
18979 GenTree* memorizedLastNodes[numberOfTrackedFlags] = {nullptr};
18980
18981 for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest())
18982 {
18983 // Morph actual list values.
18984 GenTree*& arg = listNode->Current();
18985 arg = fgMorphTree(arg, mac);
18986
18987 // Remember the last list node with each flag.
18988 for (int i = 0; i < numberOfTrackedFlags; ++i)
18989 {
18990 if ((arg->gtFlags & trackedFlags[i]) != 0)
18991 {
18992 memorizedLastNodes[i] = listNode;
18993 }
18994 }
18995 }
18996
18997 for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest())
18998 {
18999 // Clear all old effects from the list node.
19000 listNode->gtFlags &= ~GTF_ALL_EFFECT;
19001
19002 // Spread each flag to all list nodes (to the prefix) before the memorized last node.
19003 for (int i = 0; i < numberOfTrackedFlags; ++i)
19004 {
19005 if (memorizedLastNodes[i] != nullptr)
19006 {
19007 listNode->gtFlags |= trackedFlags[i];
19008 }
19009 if (listNode == memorizedLastNodes[i])
19010 {
19011 memorizedLastNodes[i] = nullptr;
19012 }
19013 }
19014 }
19015
19016 return args;
19017}
19018