1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Morph XX |
9 | XX XX |
10 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
11 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
12 | */ |
13 | |
14 | #include "jitpch.h" |
15 | #ifdef _MSC_VER |
16 | #pragma hdrstop |
17 | #endif |
18 | |
19 | #include "allocacheck.h" // for alloca |
20 | |
21 | // Convert the given node into a call to the specified helper passing |
22 | // the given argument list. |
23 | // |
24 | // Tries to fold constants and also adds an edge for overflow exception |
25 | // returns the morphed tree |
26 | GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper) |
27 | { |
28 | GenTree* result; |
29 | |
30 | /* If the operand is a constant, we'll try to fold it */ |
31 | if (oper->OperIsConst()) |
32 | { |
33 | GenTree* oldTree = tree; |
34 | |
35 | tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) |
36 | |
37 | if (tree != oldTree) |
38 | { |
39 | return fgMorphTree(tree); |
40 | } |
41 | else if (tree->OperKind() & GTK_CONST) |
42 | { |
43 | return fgMorphConst(tree); |
44 | } |
45 | |
46 | // assert that oper is unchanged and that it is still a GT_CAST node |
47 | noway_assert(tree->gtCast.CastOp() == oper); |
48 | noway_assert(tree->gtOper == GT_CAST); |
49 | } |
50 | result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper)); |
51 | assert(result == tree); |
52 | return result; |
53 | } |
54 | |
55 | /***************************************************************************** |
56 | * |
57 | * Convert the given node into a call to the specified helper passing |
58 | * the given argument list. |
59 | */ |
60 | |
61 | GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeArgList* args, bool morphArgs) |
62 | { |
63 | // The helper call ought to be semantically equivalent to the original node, so preserve its VN. |
64 | tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN); |
65 | |
66 | tree->gtCall.gtCallType = CT_HELPER; |
67 | tree->gtCall.gtCallMethHnd = eeFindHelper(helper); |
68 | tree->gtCall.gtCallArgs = args; |
69 | tree->gtCall.gtCallObjp = nullptr; |
70 | tree->gtCall.gtCallLateArgs = nullptr; |
71 | tree->gtCall.fgArgInfo = nullptr; |
72 | tree->gtCall.gtRetClsHnd = nullptr; |
73 | tree->gtCall.gtCallMoreFlags = 0; |
74 | tree->gtCall.gtInlineCandidateInfo = nullptr; |
75 | tree->gtCall.gtControlExpr = nullptr; |
76 | |
77 | #if DEBUG |
78 | // Helper calls are never candidates. |
79 | |
80 | tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; |
81 | #endif // DEBUG |
82 | |
83 | #ifdef FEATURE_READYTORUN_COMPILER |
84 | tree->gtCall.gtEntryPoint.addr = nullptr; |
85 | tree->gtCall.gtEntryPoint.accessType = IAT_VALUE; |
86 | #endif |
87 | |
88 | #ifndef _TARGET_64BIT_ |
89 | if (varTypeIsLong(tree)) |
90 | { |
91 | GenTreeCall* callNode = tree->AsCall(); |
92 | ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); |
93 | retTypeDesc->Reset(); |
94 | retTypeDesc->InitializeLongReturnType(this); |
95 | callNode->ClearOtherRegs(); |
96 | } |
97 | #endif // !_TARGET_64BIT_ |
98 | |
99 | if (tree->OperMayThrow(this)) |
100 | { |
101 | tree->gtFlags |= GTF_EXCEPT; |
102 | } |
103 | else |
104 | { |
105 | tree->gtFlags &= ~GTF_EXCEPT; |
106 | } |
107 | tree->gtFlags |= GTF_CALL; |
108 | if (args) |
109 | { |
110 | tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT); |
111 | } |
112 | |
113 | /* Perform the morphing */ |
114 | |
115 | if (morphArgs) |
116 | { |
117 | tree = fgMorphArgs(tree->AsCall()); |
118 | } |
119 | |
120 | return tree; |
121 | } |
122 | |
123 | /***************************************************************************** |
124 | * |
125 | * Morph a cast node (we perform some very simple transformations here). |
126 | */ |
127 | |
128 | #ifdef _PREFAST_ |
129 | #pragma warning(push) |
130 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
131 | #endif |
132 | GenTree* Compiler::fgMorphCast(GenTree* tree) |
133 | { |
134 | noway_assert(tree->gtOper == GT_CAST); |
135 | noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE); |
136 | |
137 | /* The first sub-operand is the thing being cast */ |
138 | |
139 | GenTree* oper = tree->gtCast.CastOp(); |
140 | |
141 | if (fgGlobalMorph && (oper->gtOper == GT_ADDR)) |
142 | { |
143 | // Make sure we've checked if 'oper' is an address of an implicit-byref parameter. |
144 | // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast |
145 | // morphing code to see that type. |
146 | fgMorphImplicitByRefArgs(oper); |
147 | } |
148 | |
149 | var_types srcType = genActualType(oper->TypeGet()); |
150 | |
151 | var_types dstType = tree->CastToType(); |
152 | unsigned dstSize = genTypeSize(dstType); |
153 | |
154 | // See if the cast has to be done in two steps. R -> I |
155 | if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) |
156 | { |
157 | if (srcType == TYP_FLOAT |
158 | #if defined(_TARGET_ARM64_) |
159 | // Arm64: src = float, dst is overflow conversion. |
160 | // This goes through helper and hence src needs to be converted to double. |
161 | && tree->gtOverflow() |
162 | #elif defined(_TARGET_AMD64_) |
163 | // Amd64: src = float, dst = uint64 or overflow conversion. |
164 | // This goes through helper and hence src needs to be converted to double. |
165 | && (tree->gtOverflow() || (dstType == TYP_ULONG)) |
166 | #elif defined(_TARGET_ARM_) |
167 | // Arm: src = float, dst = int64/uint64 or overflow conversion. |
168 | && (tree->gtOverflow() || varTypeIsLong(dstType)) |
169 | #else |
170 | // x86: src = float, dst = uint32/int64/uint64 or overflow conversion. |
171 | && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT)) |
172 | #endif |
173 | ) |
174 | { |
175 | oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); |
176 | } |
177 | |
178 | // do we need to do it in two steps R -> I, '-> smallType |
179 | CLANG_FORMAT_COMMENT_ANCHOR; |
180 | |
181 | #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_) |
182 | if (dstSize < genTypeSize(TYP_INT)) |
183 | { |
184 | oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT); |
185 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
186 | tree->gtFlags &= ~GTF_UNSIGNED; |
187 | } |
188 | #else |
189 | if (dstSize < TARGET_POINTER_SIZE) |
190 | { |
191 | oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL); |
192 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
193 | } |
194 | #endif |
195 | else |
196 | { |
197 | /* Note that if we need to use a helper call then we can not morph oper */ |
198 | if (!tree->gtOverflow()) |
199 | { |
200 | #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized |
201 | goto OPTIMIZECAST; |
202 | #else |
203 | switch (dstType) |
204 | { |
205 | case TYP_INT: |
206 | goto OPTIMIZECAST; |
207 | |
208 | case TYP_UINT: |
209 | #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) |
210 | goto OPTIMIZECAST; |
211 | #else // _TARGET_X86_ |
212 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); |
213 | #endif // _TARGET_X86_ |
214 | |
215 | case TYP_LONG: |
216 | #ifdef _TARGET_AMD64_ |
217 | // SSE2 has instructions to convert a float/double directly to a long |
218 | goto OPTIMIZECAST; |
219 | #else // !_TARGET_AMD64_ |
220 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); |
221 | #endif // !_TARGET_AMD64_ |
222 | |
223 | case TYP_ULONG: |
224 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); |
225 | default: |
226 | break; |
227 | } |
228 | #endif // _TARGET_ARM64_ |
229 | } |
230 | else |
231 | { |
232 | switch (dstType) |
233 | { |
234 | case TYP_INT: |
235 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper); |
236 | case TYP_UINT: |
237 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper); |
238 | case TYP_LONG: |
239 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper); |
240 | case TYP_ULONG: |
241 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper); |
242 | default: |
243 | break; |
244 | } |
245 | } |
246 | noway_assert(!"Unexpected dstType" ); |
247 | } |
248 | } |
249 | #ifndef _TARGET_64BIT_ |
250 | // The code generation phase (for x86 & ARM32) does not handle casts |
251 | // directly from [u]long to anything other than [u]int. Insert an |
252 | // intermediate cast to native int. |
253 | else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType)) |
254 | { |
255 | oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL); |
256 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
257 | tree->gtFlags &= ~GTF_UNSIGNED; |
258 | } |
259 | #endif //!_TARGET_64BIT_ |
260 | |
261 | #ifdef _TARGET_ARM_ |
262 | else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) && |
263 | !varTypeIsLong(oper->gtCast.CastOp())) |
264 | { |
265 | // optimization: conv.r4(conv.r8(?)) -> conv.r4(d) |
266 | // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step. |
267 | // This happens semi-frequently because there is no IL 'conv.r4.un' |
268 | oper->gtType = TYP_FLOAT; |
269 | oper->CastToType() = TYP_FLOAT; |
270 | return fgMorphTree(oper); |
271 | } |
272 | // converts long/ulong --> float/double casts into helper calls. |
273 | else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType)) |
274 | { |
275 | if (dstType == TYP_FLOAT) |
276 | { |
277 | // there is only a double helper, so we |
278 | // - change the dsttype to double |
279 | // - insert a cast from double to float |
280 | // - recurse into the resulting tree |
281 | tree->CastToType() = TYP_DOUBLE; |
282 | tree->gtType = TYP_DOUBLE; |
283 | |
284 | tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); |
285 | |
286 | return fgMorphTree(tree); |
287 | } |
288 | if (tree->gtFlags & GTF_UNSIGNED) |
289 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); |
290 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); |
291 | } |
292 | #endif //_TARGET_ARM_ |
293 | |
294 | #ifdef _TARGET_AMD64_ |
295 | // Do we have to do two step U4/8 -> R4/8 ? |
296 | // Codegen supports the following conversion as one-step operation |
297 | // a) Long -> R4/R8 |
298 | // b) U8 -> R8 |
299 | // |
300 | // The following conversions are performed as two-step operations using above. |
301 | // U4 -> R4/8 = U4-> Long -> R4/8 |
302 | // U8 -> R4 = U8 -> R8 -> R4 |
303 | else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) |
304 | { |
305 | srcType = genUnsignedType(srcType); |
306 | |
307 | if (srcType == TYP_ULONG) |
308 | { |
309 | if (dstType == TYP_FLOAT) |
310 | { |
311 | // Codegen can handle U8 -> R8 conversion. |
312 | // U8 -> R4 = U8 -> R8 -> R4 |
313 | // - change the dsttype to double |
314 | // - insert a cast from double to float |
315 | // - recurse into the resulting tree |
316 | tree->CastToType() = TYP_DOUBLE; |
317 | tree->gtType = TYP_DOUBLE; |
318 | tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); |
319 | return fgMorphTree(tree); |
320 | } |
321 | } |
322 | else if (srcType == TYP_UINT) |
323 | { |
324 | oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); |
325 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
326 | tree->gtFlags &= ~GTF_UNSIGNED; |
327 | } |
328 | } |
329 | #endif // _TARGET_AMD64_ |
330 | |
331 | #ifdef _TARGET_X86_ |
332 | // Do we have to do two step U4/8 -> R4/8 ? |
333 | else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) |
334 | { |
335 | srcType = genUnsignedType(srcType); |
336 | |
337 | if (srcType == TYP_ULONG) |
338 | { |
339 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); |
340 | } |
341 | else if (srcType == TYP_UINT) |
342 | { |
343 | oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); |
344 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
345 | tree->gtFlags &= ~GTF_UNSIGNED; |
346 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); |
347 | } |
348 | } |
349 | else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType)) |
350 | { |
351 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); |
352 | } |
353 | #endif //_TARGET_X86_ |
354 | else if (varTypeIsGC(srcType) != varTypeIsGC(dstType)) |
355 | { |
356 | // We are casting away GC information. we would like to just |
357 | // change the type to int, however this gives the emitter fits because |
358 | // it believes the variable is a GC variable at the begining of the |
359 | // instruction group, but is not turned non-gc by the code generator |
360 | // we fix this by copying the GC pointer to a non-gc pointer temp. |
361 | noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?" ); |
362 | |
363 | // We generate an assignment to an int and then do the cast from an int. With this we avoid |
364 | // the gc problem and we allow casts to bytes, longs, etc... |
365 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC" )); |
366 | oper->gtType = TYP_I_IMPL; |
367 | GenTree* asg = gtNewTempAssign(lclNum, oper); |
368 | oper->gtType = srcType; |
369 | |
370 | // do the real cast |
371 | GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType); |
372 | |
373 | // Generate the comma tree |
374 | oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast); |
375 | |
376 | return fgMorphTree(oper); |
377 | } |
378 | |
379 | // Look for narrowing casts ([u]long -> [u]int) and try to push them |
380 | // down into the operand before morphing it. |
381 | // |
382 | // It doesn't matter if this is cast is from ulong or long (i.e. if |
383 | // GTF_UNSIGNED is set) because the transformation is only applied to |
384 | // overflow-insensitive narrowing casts, which always silently truncate. |
385 | // |
386 | // Note that casts from [u]long to small integer types are handled above. |
387 | if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT))) |
388 | { |
389 | // As a special case, look for overflow-sensitive casts of an AND |
390 | // expression, and see if the second operand is a small constant. Since |
391 | // the result of an AND is bound by its smaller operand, it may be |
392 | // possible to prove that the cast won't overflow, which will in turn |
393 | // allow the cast's operand to be transformed. |
394 | if (tree->gtOverflow() && (oper->OperGet() == GT_AND)) |
395 | { |
396 | GenTree* andOp2 = oper->gtOp.gtOp2; |
397 | |
398 | // Special case to the special case: AND with a casted int. |
399 | if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT)) |
400 | { |
401 | // gtFoldExprConst will deal with whether the cast is signed or |
402 | // unsigned, or overflow-sensitive. |
403 | andOp2 = gtFoldExprConst(andOp2); |
404 | oper->gtOp.gtOp2 = andOp2; |
405 | } |
406 | |
407 | // Look for a constant less than 2^{32} for a cast to uint, or less |
408 | // than 2^{31} for a cast to int. |
409 | int maxWidth = (dstType == TYP_UINT) ? 32 : 31; |
410 | |
411 | if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0)) |
412 | { |
413 | // This cast can't overflow. |
414 | tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT); |
415 | } |
416 | } |
417 | |
418 | // Only apply this transformation during global morph, |
419 | // when neither the cast node nor the oper node may throw an exception |
420 | // based on the upper 32 bits. |
421 | // |
422 | if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx()) |
423 | { |
424 | // For these operations the lower 32 bits of the result only depends |
425 | // upon the lower 32 bits of the operands. |
426 | // |
427 | bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG); |
428 | |
429 | // For long LSH cast to int, there is a discontinuity in behavior |
430 | // when the shift amount is 32 or larger. |
431 | // |
432 | // CAST(INT, LSH(1LL, 31)) == LSH(1, 31) |
433 | // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31) |
434 | // |
435 | // CAST(INT, LSH(1LL, 32)) == 0 |
436 | // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1 |
437 | // |
438 | // So some extra validation is needed. |
439 | // |
440 | if (oper->OperIs(GT_LSH)) |
441 | { |
442 | GenTree* shiftAmount = oper->gtOp.gtOp2; |
443 | |
444 | // Expose constant value for shift, if possible, to maximize the number |
445 | // of cases we can handle. |
446 | shiftAmount = gtFoldExpr(shiftAmount); |
447 | oper->gtOp.gtOp2 = shiftAmount; |
448 | |
449 | #if DEBUG |
450 | // We may remorph the shift amount tree again later, so clear any morphed flag. |
451 | shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
452 | #endif // DEBUG |
453 | |
454 | if (shiftAmount->IsIntegralConst()) |
455 | { |
456 | const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue(); |
457 | |
458 | if ((shiftAmountValue >= 64) || (shiftAmountValue < 0)) |
459 | { |
460 | // Shift amount is large enough or negative so result is undefined. |
461 | // Don't try to optimize. |
462 | assert(!canPushCast); |
463 | } |
464 | else if ((shiftAmountValue >= 32) && ((tree->gtFlags & GTF_ALL_EFFECT) == 0)) |
465 | { |
466 | // Result of the shift is zero. |
467 | DEBUG_DESTROY_NODE(tree); |
468 | GenTree* zero = gtNewZeroConNode(TYP_INT); |
469 | return fgMorphTree(zero); |
470 | } |
471 | else |
472 | { |
473 | // Shift amount is positive and small enough that we can push the cast through. |
474 | canPushCast = true; |
475 | } |
476 | } |
477 | else |
478 | { |
479 | // Shift amount is unknown. We can't optimize this case. |
480 | assert(!canPushCast); |
481 | } |
482 | } |
483 | |
484 | if (canPushCast) |
485 | { |
486 | DEBUG_DESTROY_NODE(tree); |
487 | |
488 | // Insert narrowing casts for op1 and op2. |
489 | oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, false, dstType); |
490 | if (oper->gtOp.gtOp2 != nullptr) |
491 | { |
492 | oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, false, dstType); |
493 | } |
494 | |
495 | // Clear the GT_MUL_64RSLT if it is set. |
496 | if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT)) |
497 | { |
498 | oper->gtFlags &= ~GTF_MUL_64RSLT; |
499 | } |
500 | |
501 | // The operation now produces a 32-bit result. |
502 | oper->gtType = TYP_INT; |
503 | |
504 | // Remorph the new tree as the casts that we added may be folded away. |
505 | return fgMorphTree(oper); |
506 | } |
507 | } |
508 | } |
509 | |
510 | OPTIMIZECAST: |
511 | noway_assert(tree->gtOper == GT_CAST); |
512 | |
513 | /* Morph the operand */ |
514 | tree->gtCast.CastOp() = oper = fgMorphTree(oper); |
515 | |
516 | /* Reset the call flag */ |
517 | tree->gtFlags &= ~GTF_CALL; |
518 | |
519 | /* Reset the assignment flag */ |
520 | tree->gtFlags &= ~GTF_ASG; |
521 | |
522 | /* unless we have an overflow cast, reset the except flag */ |
523 | if (!tree->gtOverflow()) |
524 | { |
525 | tree->gtFlags &= ~GTF_EXCEPT; |
526 | } |
527 | |
528 | /* Just in case new side effects were introduced */ |
529 | tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT); |
530 | |
531 | if (!gtIsActiveCSE_Candidate(tree) && !gtIsActiveCSE_Candidate(oper)) |
532 | { |
533 | srcType = oper->TypeGet(); |
534 | |
535 | /* See if we can discard the cast */ |
536 | if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType)) |
537 | { |
538 | if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType)) |
539 | { |
540 | if (varTypeIsSmall(srcType)) |
541 | { |
542 | // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the |
543 | // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType |
544 | // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is |
545 | // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion |
546 | // from u4 to i2. |
547 | srcType = genActualType(srcType); |
548 | } |
549 | |
550 | srcType = genUnsignedType(srcType); |
551 | } |
552 | |
553 | if (srcType == dstType) |
554 | { // Certainly if they are identical it is pointless |
555 | goto REMOVE_CAST; |
556 | } |
557 | |
558 | if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType)) |
559 | { |
560 | unsigned varNum = oper->gtLclVarCommon.gtLclNum; |
561 | LclVarDsc* varDsc = &lvaTable[varNum]; |
562 | if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore()) |
563 | { |
564 | goto REMOVE_CAST; |
565 | } |
566 | } |
567 | |
568 | bool unsignedSrc = varTypeIsUnsigned(srcType); |
569 | bool unsignedDst = varTypeIsUnsigned(dstType); |
570 | bool signsDiffer = (unsignedSrc != unsignedDst); |
571 | unsigned srcSize = genTypeSize(srcType); |
572 | |
573 | // For same sized casts with |
574 | // the same signs or non-overflow cast we discard them as well |
575 | if (srcSize == dstSize) |
576 | { |
577 | /* This should have been handled above */ |
578 | noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType)); |
579 | |
580 | if (!signsDiffer) |
581 | { |
582 | goto REMOVE_CAST; |
583 | } |
584 | |
585 | if (!tree->gtOverflow()) |
586 | { |
587 | /* For small type casts, when necessary we force |
588 | the src operand to the dstType and allow the |
589 | implied load from memory to perform the casting */ |
590 | if (varTypeIsSmall(srcType)) |
591 | { |
592 | switch (oper->gtOper) |
593 | { |
594 | case GT_IND: |
595 | case GT_CLS_VAR: |
596 | case GT_LCL_FLD: |
597 | case GT_ARR_ELEM: |
598 | oper->gtType = dstType; |
599 | // We're changing the type here so we need to update the VN; |
600 | // in other cases we discard the cast without modifying oper |
601 | // so the VN doesn't change. |
602 | oper->SetVNsFromNode(tree); |
603 | goto REMOVE_CAST; |
604 | default: |
605 | break; |
606 | } |
607 | } |
608 | else |
609 | { |
610 | goto REMOVE_CAST; |
611 | } |
612 | } |
613 | } |
614 | else if (srcSize < dstSize) // widening cast |
615 | { |
616 | // Keep any long casts |
617 | if (dstSize == sizeof(int)) |
618 | { |
619 | // Only keep signed to unsigned widening cast with overflow check |
620 | if (!tree->gtOverflow() || !unsignedDst || unsignedSrc) |
621 | { |
622 | goto REMOVE_CAST; |
623 | } |
624 | } |
625 | |
626 | // Widening casts from unsigned or to signed can never overflow |
627 | |
628 | if (unsignedSrc || !unsignedDst) |
629 | { |
630 | tree->gtFlags &= ~GTF_OVERFLOW; |
631 | if (!(oper->gtFlags & GTF_EXCEPT)) |
632 | { |
633 | tree->gtFlags &= ~GTF_EXCEPT; |
634 | } |
635 | } |
636 | } |
637 | else // if (srcSize > dstSize) |
638 | { |
639 | // Try to narrow the operand of the cast and discard the cast |
640 | // Note: Do not narrow a cast that is marked as a CSE |
641 | // And do not narrow if the oper is marked as a CSE either |
642 | // |
643 | if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) && |
644 | optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false)) |
645 | { |
646 | optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true); |
647 | |
648 | /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */ |
649 | if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType())) |
650 | { |
651 | oper = oper->gtCast.CastOp(); |
652 | } |
653 | goto REMOVE_CAST; |
654 | } |
655 | } |
656 | } |
657 | |
658 | switch (oper->gtOper) |
659 | { |
660 | /* If the operand is a constant, we'll fold it */ |
661 | case GT_CNS_INT: |
662 | case GT_CNS_LNG: |
663 | case GT_CNS_DBL: |
664 | case GT_CNS_STR: |
665 | { |
666 | GenTree* oldTree = tree; |
667 | |
668 | tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) |
669 | |
670 | // Did we get a comma throw as a result of gtFoldExprConst? |
671 | if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA)) |
672 | { |
673 | noway_assert(fgIsCommaThrow(tree)); |
674 | tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1); |
675 | fgMorphTreeDone(tree); |
676 | return tree; |
677 | } |
678 | else if (tree->gtOper != GT_CAST) |
679 | { |
680 | return tree; |
681 | } |
682 | |
683 | noway_assert(tree->gtCast.CastOp() == oper); // unchanged |
684 | } |
685 | break; |
686 | |
687 | case GT_CAST: |
688 | /* Check for two consecutive casts into the same dstType */ |
689 | if (!tree->gtOverflow()) |
690 | { |
691 | var_types dstType2 = oper->CastToType(); |
692 | if (dstType == dstType2) |
693 | { |
694 | goto REMOVE_CAST; |
695 | } |
696 | } |
697 | break; |
698 | |
699 | case GT_COMMA: |
700 | // Check for cast of a GT_COMMA with a throw overflow |
701 | // Bug 110829: Since this optimization will bash the types |
702 | // neither oper or commaOp2 can be CSE candidates |
703 | if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate |
704 | { |
705 | GenTree* commaOp2 = oper->gtOp.gtOp2; |
706 | |
707 | if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate |
708 | { |
709 | // need type of oper to be same as tree |
710 | if (tree->gtType == TYP_LONG) |
711 | { |
712 | commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); |
713 | commaOp2->gtIntConCommon.SetLngValue(0); |
714 | /* Change the types of oper and commaOp2 to TYP_LONG */ |
715 | oper->gtType = commaOp2->gtType = TYP_LONG; |
716 | } |
717 | else if (varTypeIsFloating(tree->gtType)) |
718 | { |
719 | commaOp2->ChangeOperConst(GT_CNS_DBL); |
720 | commaOp2->gtDblCon.gtDconVal = 0.0; |
721 | // Change the types of oper and commaOp2 |
722 | oper->gtType = commaOp2->gtType = tree->gtType; |
723 | } |
724 | else |
725 | { |
726 | commaOp2->ChangeOperConst(GT_CNS_INT); |
727 | commaOp2->gtIntCon.gtIconVal = 0; |
728 | /* Change the types of oper and commaOp2 to TYP_INT */ |
729 | oper->gtType = commaOp2->gtType = TYP_INT; |
730 | } |
731 | } |
732 | |
733 | if (vnStore != nullptr) |
734 | { |
735 | fgValueNumberTreeConst(commaOp2); |
736 | } |
737 | |
738 | /* Return the GT_COMMA node as the new tree */ |
739 | return oper; |
740 | } |
741 | break; |
742 | |
743 | default: |
744 | break; |
745 | } /* end switch (oper->gtOper) */ |
746 | } |
747 | |
748 | if (tree->gtOverflow()) |
749 | { |
750 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); |
751 | } |
752 | |
753 | return tree; |
754 | |
755 | REMOVE_CAST: |
756 | /* Here we've eliminated the cast, so just return it's operand */ |
757 | assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate |
758 | |
759 | DEBUG_DESTROY_NODE(tree); |
760 | return oper; |
761 | } |
762 | #ifdef _PREFAST_ |
763 | #pragma warning(pop) |
764 | #endif |
765 | |
766 | /***************************************************************************** |
767 | * |
768 | * Perform an unwrap operation on a Proxy object |
769 | */ |
770 | |
771 | GenTree* Compiler::fgUnwrapProxy(GenTree* objRef) |
772 | { |
773 | assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef)); |
774 | |
775 | CORINFO_EE_INFO* pInfo = eeGetEEInfo(); |
776 | GenTree* addTree; |
777 | |
778 | // Perform the unwrap: |
779 | // |
780 | // This requires two extra indirections. |
781 | // We mark these indirections as 'invariant' and |
782 | // the CSE logic will hoist them when appropriate. |
783 | // |
784 | // Note that each dereference is a GC pointer |
785 | |
786 | addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL)); |
787 | |
788 | objRef = gtNewOperNode(GT_IND, TYP_REF, addTree); |
789 | objRef->gtFlags |= GTF_IND_INVARIANT; |
790 | |
791 | addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL)); |
792 | |
793 | objRef = gtNewOperNode(GT_IND, TYP_REF, addTree); |
794 | objRef->gtFlags |= GTF_IND_INVARIANT; |
795 | |
796 | // objRef now hold the 'real this' reference (i.e. the unwrapped proxy) |
797 | return objRef; |
798 | } |
799 | |
800 | /***************************************************************************** |
801 | * |
802 | * Morph an argument list; compute the pointer argument count in the process. |
803 | * |
804 | * NOTE: This function can be called from any place in the JIT to perform re-morphing |
805 | * due to graph altering modifications such as copy / constant propagation |
806 | */ |
807 | |
808 | unsigned UpdateGT_LISTFlags(GenTree* tree) |
809 | { |
810 | assert(tree->gtOper == GT_LIST); |
811 | |
812 | unsigned flags = 0; |
813 | if (tree->gtOp.gtOp2) |
814 | { |
815 | flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2); |
816 | } |
817 | |
818 | flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
819 | |
820 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
821 | tree->gtFlags |= flags; |
822 | |
823 | return tree->gtFlags; |
824 | } |
825 | |
826 | #ifdef DEBUG |
827 | void fgArgTabEntry::Dump() |
828 | { |
829 | printf("fgArgTabEntry[arg %u" , argNum); |
830 | printf(" %d.%s" , node->gtTreeID, GenTree::OpName(node->gtOper)); |
831 | if (regNum != REG_STK) |
832 | { |
833 | printf(", %u reg%s:" , numRegs, numRegs == 1 ? "" : "s" ); |
834 | for (unsigned i = 0; i < numRegs; i++) |
835 | { |
836 | printf(" %s" , getRegName(regNums[i])); |
837 | } |
838 | } |
839 | if (numSlots > 0) |
840 | { |
841 | printf(", numSlots=%u, slotNum=%u" , numSlots, slotNum); |
842 | } |
843 | printf(", align=%u" , alignment); |
844 | if (isLateArg()) |
845 | { |
846 | printf(", lateArgInx=%u" , lateArgInx); |
847 | } |
848 | if (isSplit) |
849 | { |
850 | printf(", isSplit" ); |
851 | } |
852 | if (needTmp) |
853 | { |
854 | printf(", tmpNum=V%02u" , tmpNum); |
855 | } |
856 | if (needPlace) |
857 | { |
858 | printf(", needPlace" ); |
859 | } |
860 | if (isTmp) |
861 | { |
862 | printf(", isTmp" ); |
863 | } |
864 | if (processed) |
865 | { |
866 | printf(", processed" ); |
867 | } |
868 | if (isHfaRegArg) |
869 | { |
870 | printf(", isHfa" ); |
871 | } |
872 | if (isBackFilled) |
873 | { |
874 | printf(", isBackFilled" ); |
875 | } |
876 | if (isNonStandard) |
877 | { |
878 | printf(", isNonStandard" ); |
879 | } |
880 | if (isStruct) |
881 | { |
882 | printf(", isStruct" ); |
883 | } |
884 | printf("]\n" ); |
885 | } |
886 | #endif |
887 | |
888 | fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs) |
889 | { |
890 | compiler = comp; |
891 | callTree = call; |
892 | argCount = 0; // filled in arg count, starts at zero |
893 | nextSlotNum = INIT_ARG_STACK_SLOT; |
894 | stkLevel = 0; |
895 | #if defined(UNIX_X86_ABI) |
896 | alignmentDone = false; |
897 | stkSizeBytes = 0; |
898 | padStkAlign = 0; |
899 | #endif |
900 | #if FEATURE_FIXED_OUT_ARGS |
901 | outArgSize = 0; |
902 | #endif |
903 | |
904 | argTableSize = numArgs; // the allocated table size |
905 | |
906 | hasRegArgs = false; |
907 | hasStackArgs = false; |
908 | argsComplete = false; |
909 | argsSorted = false; |
910 | |
911 | if (argTableSize == 0) |
912 | { |
913 | argTable = nullptr; |
914 | } |
915 | else |
916 | { |
917 | argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; |
918 | } |
919 | } |
920 | |
921 | /***************************************************************************** |
922 | * |
923 | * fgArgInfo Copy Constructor |
924 | * |
925 | * This method needs to act like a copy constructor for fgArgInfo. |
926 | * The newCall needs to have its fgArgInfo initialized such that |
927 | * we have newCall that is an exact copy of the oldCall. |
928 | * We have to take care since the argument information |
929 | * in the argTable contains pointers that must point to the |
930 | * new arguments and not the old arguments. |
931 | */ |
932 | fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall) |
933 | { |
934 | fgArgInfo* oldArgInfo = oldCall->gtCall.fgArgInfo; |
935 | |
936 | compiler = oldArgInfo->compiler; |
937 | callTree = newCall; |
938 | argCount = 0; // filled in arg count, starts at zero |
939 | nextSlotNum = INIT_ARG_STACK_SLOT; |
940 | stkLevel = oldArgInfo->stkLevel; |
941 | #if defined(UNIX_X86_ABI) |
942 | alignmentDone = oldArgInfo->alignmentDone; |
943 | stkSizeBytes = oldArgInfo->stkSizeBytes; |
944 | padStkAlign = oldArgInfo->padStkAlign; |
945 | #endif |
946 | #if FEATURE_FIXED_OUT_ARGS |
947 | outArgSize = oldArgInfo->outArgSize; |
948 | #endif |
949 | argTableSize = oldArgInfo->argTableSize; |
950 | argsComplete = false; |
951 | argTable = nullptr; |
952 | if (argTableSize > 0) |
953 | { |
954 | argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; |
955 | for (unsigned inx = 0; inx < argTableSize; inx++) |
956 | { |
957 | argTable[inx] = nullptr; |
958 | } |
959 | } |
960 | |
961 | assert(oldArgInfo->argsComplete); |
962 | |
963 | // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument, |
964 | // so we can iterate over these argument lists more uniformly. |
965 | // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them |
966 | GenTreeArgList* newArgs; |
967 | GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs); |
968 | GenTreeArgList* oldArgs; |
969 | GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs); |
970 | |
971 | if (newCall->gtCallObjp == nullptr) |
972 | { |
973 | assert(oldCall->gtCallObjp == nullptr); |
974 | newArgs = newCall->gtCallArgs; |
975 | oldArgs = oldCall->gtCallArgs; |
976 | } |
977 | else |
978 | { |
979 | assert(oldCall->gtCallObjp != nullptr); |
980 | newArgObjp.Current() = newCall->gtCallArgs; |
981 | newArgs = &newArgObjp; |
982 | oldArgObjp.Current() = oldCall->gtCallObjp; |
983 | oldArgs = &oldArgObjp; |
984 | } |
985 | |
986 | GenTree* newCurr; |
987 | GenTree* oldCurr; |
988 | GenTreeArgList* newParent = nullptr; |
989 | GenTreeArgList* oldParent = nullptr; |
990 | fgArgTabEntry** oldArgTable = oldArgInfo->argTable; |
991 | bool scanRegArgs = false; |
992 | |
993 | while (newArgs) |
994 | { |
995 | /* Get hold of the next argument values for the oldCall and newCall */ |
996 | |
997 | newCurr = newArgs->Current(); |
998 | oldCurr = oldArgs->Current(); |
999 | if (newArgs != &newArgObjp) |
1000 | { |
1001 | newParent = newArgs; |
1002 | oldParent = oldArgs; |
1003 | } |
1004 | else |
1005 | { |
1006 | assert(newParent == nullptr && oldParent == nullptr); |
1007 | } |
1008 | newArgs = newArgs->Rest(); |
1009 | oldArgs = oldArgs->Rest(); |
1010 | |
1011 | fgArgTabEntry* oldArgTabEntry = nullptr; |
1012 | fgArgTabEntry* newArgTabEntry = nullptr; |
1013 | |
1014 | for (unsigned inx = 0; inx < argTableSize; inx++) |
1015 | { |
1016 | oldArgTabEntry = oldArgTable[inx]; |
1017 | |
1018 | if (oldArgTabEntry->parent == oldParent) |
1019 | { |
1020 | assert((oldParent == nullptr) == (newParent == nullptr)); |
1021 | |
1022 | // We have found the matching "parent" field in oldArgTabEntry |
1023 | |
1024 | newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; |
1025 | |
1026 | // First block copy all fields |
1027 | // |
1028 | *newArgTabEntry = *oldArgTabEntry; |
1029 | |
1030 | // Then update all GenTree* fields in the newArgTabEntry |
1031 | // |
1032 | newArgTabEntry->parent = newParent; |
1033 | |
1034 | // The node field is likely to have been updated |
1035 | // to point at a node in the gtCallLateArgs list |
1036 | // |
1037 | if (oldArgTabEntry->node == oldCurr) |
1038 | { |
1039 | // node is not pointing into the gtCallLateArgs list |
1040 | newArgTabEntry->node = newCurr; |
1041 | } |
1042 | else |
1043 | { |
1044 | // node must be pointing into the gtCallLateArgs list |
1045 | // |
1046 | // We will fix this pointer up in the next loop |
1047 | // |
1048 | newArgTabEntry->node = nullptr; // For now we assign a NULL to this field |
1049 | |
1050 | scanRegArgs = true; |
1051 | } |
1052 | |
1053 | // Now initialize the proper element in the argTable array |
1054 | // |
1055 | argTable[inx] = newArgTabEntry; |
1056 | break; |
1057 | } |
1058 | } |
1059 | // We should have found the matching oldArgTabEntry and created the newArgTabEntry |
1060 | // |
1061 | assert(newArgTabEntry != nullptr); |
1062 | } |
1063 | |
1064 | if (scanRegArgs) |
1065 | { |
1066 | newArgs = newCall->gtCallLateArgs; |
1067 | oldArgs = oldCall->gtCallLateArgs; |
1068 | |
1069 | while (newArgs) |
1070 | { |
1071 | /* Get hold of the next argument values for the oldCall and newCall */ |
1072 | |
1073 | assert(newArgs->OperIsList()); |
1074 | |
1075 | newCurr = newArgs->Current(); |
1076 | newArgs = newArgs->Rest(); |
1077 | |
1078 | assert(oldArgs->OperIsList()); |
1079 | |
1080 | oldCurr = oldArgs->Current(); |
1081 | oldArgs = oldArgs->Rest(); |
1082 | |
1083 | fgArgTabEntry* oldArgTabEntry = nullptr; |
1084 | fgArgTabEntry* newArgTabEntry = nullptr; |
1085 | |
1086 | for (unsigned inx = 0; inx < argTableSize; inx++) |
1087 | { |
1088 | oldArgTabEntry = oldArgTable[inx]; |
1089 | |
1090 | if (oldArgTabEntry->node == oldCurr) |
1091 | { |
1092 | // We have found the matching "node" field in oldArgTabEntry |
1093 | |
1094 | newArgTabEntry = argTable[inx]; |
1095 | assert(newArgTabEntry != nullptr); |
1096 | |
1097 | // update the "node" GenTree* fields in the newArgTabEntry |
1098 | // |
1099 | assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field |
1100 | |
1101 | newArgTabEntry->node = newCurr; |
1102 | break; |
1103 | } |
1104 | } |
1105 | } |
1106 | } |
1107 | |
1108 | argCount = oldArgInfo->argCount; |
1109 | nextSlotNum = oldArgInfo->nextSlotNum; |
1110 | hasRegArgs = oldArgInfo->hasRegArgs; |
1111 | hasStackArgs = oldArgInfo->hasStackArgs; |
1112 | argsComplete = true; |
1113 | argsSorted = true; |
1114 | } |
1115 | |
1116 | void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry) |
1117 | { |
1118 | assert(argCount < argTableSize); |
1119 | argTable[argCount] = curArgTabEntry; |
1120 | argCount++; |
1121 | } |
1122 | |
1123 | fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, |
1124 | GenTree* node, |
1125 | GenTree* parent, |
1126 | regNumber regNum, |
1127 | unsigned numRegs, |
1128 | unsigned alignment, |
1129 | bool isStruct, |
1130 | bool isVararg /*=false*/) |
1131 | { |
1132 | fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; |
1133 | |
1134 | // Any additional register numbers are set by the caller. |
1135 | // This is primarily because on ARM we don't yet know if it |
1136 | // will be split or if it is a double HFA, so the number of registers |
1137 | // may actually be less. |
1138 | curArgTabEntry->setRegNum(0, regNum); |
1139 | |
1140 | curArgTabEntry->argNum = argNum; |
1141 | curArgTabEntry->node = node; |
1142 | curArgTabEntry->parent = parent; |
1143 | curArgTabEntry->slotNum = 0; |
1144 | curArgTabEntry->numRegs = numRegs; |
1145 | curArgTabEntry->numSlots = 0; |
1146 | curArgTabEntry->alignment = alignment; |
1147 | curArgTabEntry->lateArgInx = UINT_MAX; |
1148 | curArgTabEntry->tmpNum = BAD_VAR_NUM; |
1149 | curArgTabEntry->isSplit = false; |
1150 | curArgTabEntry->isTmp = false; |
1151 | curArgTabEntry->needTmp = false; |
1152 | curArgTabEntry->needPlace = false; |
1153 | curArgTabEntry->processed = false; |
1154 | #ifdef FEATURE_HFA |
1155 | curArgTabEntry->_isHfaArg = false; |
1156 | #endif |
1157 | curArgTabEntry->isBackFilled = false; |
1158 | curArgTabEntry->isNonStandard = false; |
1159 | curArgTabEntry->isStruct = isStruct; |
1160 | curArgTabEntry->isVararg = isVararg; |
1161 | |
1162 | hasRegArgs = true; |
1163 | AddArg(curArgTabEntry); |
1164 | return curArgTabEntry; |
1165 | } |
1166 | |
1167 | #if defined(UNIX_AMD64_ABI) |
1168 | fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, |
1169 | GenTree* node, |
1170 | GenTree* parent, |
1171 | regNumber regNum, |
1172 | unsigned numRegs, |
1173 | unsigned alignment, |
1174 | const bool isStruct, |
1175 | const bool isVararg, |
1176 | const regNumber otherRegNum, |
1177 | const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr) |
1178 | { |
1179 | fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct, isVararg); |
1180 | assert(curArgTabEntry != nullptr); |
1181 | |
1182 | curArgTabEntry->isStruct = isStruct; // is this a struct arg |
1183 | |
1184 | curArgTabEntry->checkIsStruct(); |
1185 | assert(numRegs <= 2); |
1186 | if (numRegs == 2) |
1187 | { |
1188 | curArgTabEntry->setRegNum(1, otherRegNum); |
1189 | } |
1190 | |
1191 | if (isStruct && structDescPtr != nullptr) |
1192 | { |
1193 | curArgTabEntry->structDesc.CopyFrom(*structDescPtr); |
1194 | } |
1195 | |
1196 | return curArgTabEntry; |
1197 | } |
1198 | #endif // defined(UNIX_AMD64_ABI) |
1199 | |
1200 | fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, |
1201 | GenTree* node, |
1202 | GenTree* parent, |
1203 | unsigned numSlots, |
1204 | unsigned alignment, |
1205 | bool isStruct, |
1206 | bool isVararg /*=false*/) |
1207 | { |
1208 | fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; |
1209 | |
1210 | nextSlotNum = roundUp(nextSlotNum, alignment); |
1211 | |
1212 | curArgTabEntry->setRegNum(0, REG_STK); |
1213 | curArgTabEntry->argNum = argNum; |
1214 | curArgTabEntry->node = node; |
1215 | curArgTabEntry->parent = parent; |
1216 | curArgTabEntry->slotNum = nextSlotNum; |
1217 | curArgTabEntry->numRegs = 0; |
1218 | curArgTabEntry->numSlots = numSlots; |
1219 | curArgTabEntry->alignment = alignment; |
1220 | curArgTabEntry->lateArgInx = UINT_MAX; |
1221 | curArgTabEntry->tmpNum = BAD_VAR_NUM; |
1222 | curArgTabEntry->isSplit = false; |
1223 | curArgTabEntry->isTmp = false; |
1224 | curArgTabEntry->needTmp = false; |
1225 | curArgTabEntry->needPlace = false; |
1226 | curArgTabEntry->processed = false; |
1227 | #ifdef FEATURE_HFA |
1228 | curArgTabEntry->_isHfaArg = false; |
1229 | #endif |
1230 | curArgTabEntry->isBackFilled = false; |
1231 | curArgTabEntry->isNonStandard = false; |
1232 | curArgTabEntry->isStruct = isStruct; |
1233 | curArgTabEntry->isVararg = isVararg; |
1234 | |
1235 | hasStackArgs = true; |
1236 | AddArg(curArgTabEntry); |
1237 | |
1238 | nextSlotNum += numSlots; |
1239 | return curArgTabEntry; |
1240 | } |
1241 | |
1242 | void fgArgInfo::RemorphReset() |
1243 | { |
1244 | nextSlotNum = INIT_ARG_STACK_SLOT; |
1245 | } |
1246 | |
1247 | //------------------------------------------------------------------------ |
1248 | // UpdateRegArg: Update the given fgArgTabEntry while morphing. |
1249 | // |
1250 | // Arguments: |
1251 | // curArgTabEntry - the fgArgTabEntry to update. |
1252 | // node - the tree node that defines the argument |
1253 | // reMorphing - a boolean value indicate whether we are remorphing the call |
1254 | // |
1255 | // Assumptions: |
1256 | // This must have already been determined to be at least partially passed in registers. |
1257 | // |
1258 | void fgArgInfo::UpdateRegArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) |
1259 | { |
1260 | bool isLateArg = curArgTabEntry->isLateArg(); |
1261 | // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. |
1262 | assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || |
1263 | (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); |
1264 | |
1265 | assert(curArgTabEntry->numRegs != 0); |
1266 | |
1267 | if (curArgTabEntry->parent != nullptr) |
1268 | { |
1269 | assert(curArgTabEntry->parent->OperIsList()); |
1270 | assert(curArgTabEntry->parent->Current() == node); |
1271 | } |
1272 | |
1273 | if (curArgTabEntry->node != node) |
1274 | { |
1275 | if (reMorphing) |
1276 | { |
1277 | // Find the arg in the late args list. |
1278 | GenTree* argx = Compiler::gtArgNodeByLateArgInx(callTree, curArgTabEntry->lateArgInx); |
1279 | if (curArgTabEntry->node != argx) |
1280 | { |
1281 | curArgTabEntry->node = argx; |
1282 | } |
1283 | } |
1284 | else |
1285 | { |
1286 | assert(!isLateArg); |
1287 | curArgTabEntry->node = node; |
1288 | } |
1289 | } |
1290 | } |
1291 | |
1292 | //------------------------------------------------------------------------ |
1293 | // UpdateStkArg: Update the given fgArgTabEntry while morphing. |
1294 | // |
1295 | // Arguments: |
1296 | // curArgTabEntry - the fgArgTabEntry to update. |
1297 | // node - the tree node that defines the argument |
1298 | // reMorphing - a boolean value indicate whether we are remorphing the call |
1299 | // |
1300 | // Assumptions: |
1301 | // This must have already been determined to be passed on the stack. |
1302 | // |
1303 | void fgArgInfo::UpdateStkArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) |
1304 | { |
1305 | bool isLateArg = curArgTabEntry->isLateArg(); |
1306 | // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. |
1307 | assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || |
1308 | (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); |
1309 | |
1310 | noway_assert(curArgTabEntry->parent != nullptr); |
1311 | assert((curArgTabEntry->regNum == REG_STK) || curArgTabEntry->isSplit); |
1312 | assert(curArgTabEntry->parent->OperIsList()); |
1313 | assert(curArgTabEntry->parent->Current() == node); |
1314 | nextSlotNum = (unsigned)roundUp(nextSlotNum, curArgTabEntry->alignment); |
1315 | assert(curArgTabEntry->slotNum == nextSlotNum); |
1316 | |
1317 | if (curArgTabEntry->node != node) |
1318 | { |
1319 | #if FEATURE_FIXED_OUT_ARGS |
1320 | if (isLateArg) |
1321 | { |
1322 | GenTree* argx = nullptr; |
1323 | unsigned lateArgInx = curArgTabEntry->lateArgInx; |
1324 | |
1325 | // Traverse the late argument list to find this argument so that we can update it. |
1326 | unsigned listInx = 0; |
1327 | for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), listInx++) |
1328 | { |
1329 | argx = list->Current(); |
1330 | assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs; |
1331 | if (listInx == lateArgInx) |
1332 | { |
1333 | break; |
1334 | } |
1335 | } |
1336 | assert(listInx == lateArgInx); |
1337 | assert(lateArgInx == curArgTabEntry->lateArgInx); |
1338 | |
1339 | if (curArgTabEntry->node != argx) |
1340 | { |
1341 | curArgTabEntry->node = argx; |
1342 | } |
1343 | } |
1344 | else |
1345 | #endif // FEATURE_FIXED_OUT_ARGS |
1346 | { |
1347 | curArgTabEntry->node = node; |
1348 | } |
1349 | } |
1350 | nextSlotNum += curArgTabEntry->numSlots; |
1351 | } |
1352 | |
1353 | void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots) |
1354 | { |
1355 | fgArgTabEntry* curArgTabEntry = nullptr; |
1356 | assert(argNum < argCount); |
1357 | for (unsigned inx = 0; inx < argCount; inx++) |
1358 | { |
1359 | curArgTabEntry = argTable[inx]; |
1360 | if (curArgTabEntry->argNum == argNum) |
1361 | { |
1362 | break; |
1363 | } |
1364 | } |
1365 | |
1366 | assert(numRegs > 0); |
1367 | assert(numSlots > 0); |
1368 | |
1369 | if (argsComplete) |
1370 | { |
1371 | assert(curArgTabEntry->isSplit == true); |
1372 | assert(curArgTabEntry->numRegs == numRegs); |
1373 | assert(curArgTabEntry->numSlots == numSlots); |
1374 | assert(hasStackArgs == true); |
1375 | } |
1376 | else |
1377 | { |
1378 | curArgTabEntry->isSplit = true; |
1379 | curArgTabEntry->numRegs = numRegs; |
1380 | curArgTabEntry->numSlots = numSlots; |
1381 | hasStackArgs = true; |
1382 | } |
1383 | nextSlotNum += numSlots; |
1384 | } |
1385 | |
1386 | //------------------------------------------------------------------------ |
1387 | // EvalToTmp: Replace the node in the given fgArgTabEntry with a temp |
1388 | // |
1389 | // Arguments: |
1390 | // curArgTabEntry - the fgArgTabEntry for the argument |
1391 | // tmpNum - the varNum for the temp |
1392 | // newNode - the assignment of the argument value to the temp |
1393 | // |
1394 | // Notes: |
1395 | // Although the name of this method is EvalToTmp, it doesn't actually create |
1396 | // the temp or the copy. |
1397 | // |
1398 | void fgArgInfo::EvalToTmp(fgArgTabEntry* curArgTabEntry, unsigned tmpNum, GenTree* newNode) |
1399 | { |
1400 | assert(curArgTabEntry->parent->Current() == newNode); |
1401 | |
1402 | curArgTabEntry->node = newNode; |
1403 | curArgTabEntry->tmpNum = tmpNum; |
1404 | curArgTabEntry->isTmp = true; |
1405 | } |
1406 | |
1407 | void fgArgInfo::ArgsComplete() |
1408 | { |
1409 | bool hasStackArgs = false; |
1410 | bool hasStructRegArg = false; |
1411 | |
1412 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
1413 | { |
1414 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
1415 | assert(curArgTabEntry != nullptr); |
1416 | GenTree* argx = curArgTabEntry->node; |
1417 | |
1418 | if (curArgTabEntry->regNum == REG_STK) |
1419 | { |
1420 | hasStackArgs = true; |
1421 | #if !FEATURE_FIXED_OUT_ARGS |
1422 | // On x86 we use push instructions to pass arguments: |
1423 | // The non-register arguments are evaluated and pushed in order |
1424 | // and they are never evaluated into temps |
1425 | // |
1426 | continue; |
1427 | #endif |
1428 | } |
1429 | #if FEATURE_ARG_SPLIT |
1430 | else if (curArgTabEntry->isSplit) |
1431 | { |
1432 | hasStructRegArg = true; |
1433 | hasStackArgs = true; |
1434 | } |
1435 | #endif // FEATURE_ARG_SPLIT |
1436 | else // we have a register argument, next we look for a struct type. |
1437 | { |
1438 | if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct)) |
1439 | { |
1440 | hasStructRegArg = true; |
1441 | } |
1442 | } |
1443 | |
1444 | /* If the argument tree contains an assignment (GTF_ASG) then the argument and |
1445 | and every earlier argument (except constants) must be evaluated into temps |
1446 | since there may be other arguments that follow and they may use the value being assigned. |
1447 | |
1448 | EXAMPLE: ArgTab is "a, a=5, a" |
1449 | -> when we see the second arg "a=5" |
1450 | we know the first two arguments "a, a=5" have to be evaluated into temps |
1451 | |
1452 | For the case of an assignment, we only know that there exist some assignment someplace |
1453 | in the tree. We don't know what is being assigned so we are very conservative here |
1454 | and assume that any local variable could have been assigned. |
1455 | */ |
1456 | |
1457 | if (argx->gtFlags & GTF_ASG) |
1458 | { |
1459 | // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to |
1460 | // a tmp, then we need a temp in the late arg list. |
1461 | if ((argCount > 1) || argx->OperIsCopyBlkOp() |
1462 | #ifdef FEATURE_FIXED_OUT_ARGS |
1463 | || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property |
1464 | // that we only have late non-register args when that feature is on. |
1465 | #endif // FEATURE_FIXED_OUT_ARGS |
1466 | ) |
1467 | { |
1468 | curArgTabEntry->needTmp = true; |
1469 | } |
1470 | |
1471 | // For all previous arguments, unless they are a simple constant |
1472 | // we require that they be evaluated into temps |
1473 | for (unsigned prevInx = 0; prevInx < curInx; prevInx++) |
1474 | { |
1475 | fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; |
1476 | assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); |
1477 | |
1478 | assert(prevArgTabEntry->node); |
1479 | if (prevArgTabEntry->node->gtOper != GT_CNS_INT) |
1480 | { |
1481 | prevArgTabEntry->needTmp = true; |
1482 | } |
1483 | } |
1484 | } |
1485 | |
1486 | bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0); |
1487 | #if FEATURE_FIXED_OUT_ARGS |
1488 | // Like calls, if this argument has a tree that will do an inline throw, |
1489 | // a call to a jit helper, then we need to treat it like a call (but only |
1490 | // if there are/were any stack args). |
1491 | // This means unnesting, sorting, etc. Technically this is overly |
1492 | // conservative, but I want to avoid as much special-case debug-only code |
1493 | // as possible, so leveraging the GTF_CALL flag is the easiest. |
1494 | // |
1495 | if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && compiler->opts.compDbgCode && |
1496 | (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT)) |
1497 | { |
1498 | for (unsigned otherInx = 0; otherInx < argCount; otherInx++) |
1499 | { |
1500 | if (otherInx == curInx) |
1501 | { |
1502 | continue; |
1503 | } |
1504 | |
1505 | if (argTable[otherInx]->regNum == REG_STK) |
1506 | { |
1507 | treatLikeCall = true; |
1508 | break; |
1509 | } |
1510 | } |
1511 | } |
1512 | #endif // FEATURE_FIXED_OUT_ARGS |
1513 | |
1514 | /* If it contains a call (GTF_CALL) then itself and everything before the call |
1515 | with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT |
1516 | has to be kept in the right order since we will move the call to the first position) |
1517 | |
1518 | For calls we don't have to be quite as conservative as we are with an assignment |
1519 | since the call won't be modifying any non-address taken LclVars. |
1520 | */ |
1521 | |
1522 | if (treatLikeCall) |
1523 | { |
1524 | if (argCount > 1) // If this is not the only argument |
1525 | { |
1526 | curArgTabEntry->needTmp = true; |
1527 | } |
1528 | else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL)) |
1529 | { |
1530 | // Spill all arguments that are floating point calls |
1531 | curArgTabEntry->needTmp = true; |
1532 | } |
1533 | |
1534 | // All previous arguments may need to be evaluated into temps |
1535 | for (unsigned prevInx = 0; prevInx < curInx; prevInx++) |
1536 | { |
1537 | fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; |
1538 | assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); |
1539 | assert(prevArgTabEntry->node); |
1540 | |
1541 | // For all previous arguments, if they have any GTF_ALL_EFFECT |
1542 | // we require that they be evaluated into a temp |
1543 | if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0) |
1544 | { |
1545 | prevArgTabEntry->needTmp = true; |
1546 | } |
1547 | #if FEATURE_FIXED_OUT_ARGS |
1548 | // Or, if they are stored into the FIXED_OUT_ARG area |
1549 | // we require that they be moved to the gtCallLateArgs |
1550 | // and replaced with a placeholder node |
1551 | else if (prevArgTabEntry->regNum == REG_STK) |
1552 | { |
1553 | prevArgTabEntry->needPlace = true; |
1554 | } |
1555 | #if FEATURE_ARG_SPLIT |
1556 | else if (prevArgTabEntry->isSplit) |
1557 | { |
1558 | prevArgTabEntry->needPlace = true; |
1559 | } |
1560 | #endif // _TARGET_ARM_ |
1561 | #endif |
1562 | } |
1563 | } |
1564 | |
1565 | #if FEATURE_MULTIREG_ARGS |
1566 | // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST |
1567 | // with multiple indirections, so here we consider spilling it into a tmp LclVar. |
1568 | // |
1569 | CLANG_FORMAT_COMMENT_ANCHOR; |
1570 | #ifdef _TARGET_ARM_ |
1571 | bool isMultiRegArg = (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->numSlots > 1); |
1572 | #else |
1573 | bool isMultiRegArg = (curArgTabEntry->numRegs > 1); |
1574 | #endif |
1575 | |
1576 | if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false)) |
1577 | { |
1578 | if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)) |
1579 | { |
1580 | // Spill multireg struct arguments that have Assignments or Calls embedded in them |
1581 | curArgTabEntry->needTmp = true; |
1582 | } |
1583 | else |
1584 | { |
1585 | // We call gtPrepareCost to measure the cost of evaluating this tree |
1586 | compiler->gtPrepareCost(argx); |
1587 | |
1588 | if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX))) |
1589 | { |
1590 | // Spill multireg struct arguments that are expensive to evaluate twice |
1591 | curArgTabEntry->needTmp = true; |
1592 | } |
1593 | #if defined(FEATURE_SIMD) && defined(_TARGET_ARM64_) |
1594 | else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet())) |
1595 | { |
1596 | // SIMD types do not need the optimization below due to their sizes |
1597 | if (argx->OperIsSIMDorSimdHWintrinsic() || |
1598 | (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) && |
1599 | argx->AsObj()->gtOp1->gtOp.gtOp1->OperIsSIMDorSimdHWintrinsic())) |
1600 | { |
1601 | curArgTabEntry->needTmp = true; |
1602 | } |
1603 | } |
1604 | #endif |
1605 | #ifndef _TARGET_ARM_ |
1606 | // TODO-Arm: This optimization is not implemented for ARM32 |
1607 | // so we skip this for ARM32 until it is ported to use RyuJIT backend |
1608 | // |
1609 | else if (argx->OperGet() == GT_OBJ) |
1610 | { |
1611 | GenTreeObj* argObj = argx->AsObj(); |
1612 | CORINFO_CLASS_HANDLE objClass = argObj->gtClass; |
1613 | unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass); |
1614 | switch (structSize) |
1615 | { |
1616 | case 3: |
1617 | case 5: |
1618 | case 6: |
1619 | case 7: |
1620 | // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes |
1621 | // |
1622 | if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar? |
1623 | { |
1624 | // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes |
1625 | // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp. |
1626 | // |
1627 | curArgTabEntry->needTmp = true; |
1628 | } |
1629 | break; |
1630 | case 11: |
1631 | case 13: |
1632 | case 14: |
1633 | case 15: |
1634 | // Spill any GT_OBJ multireg structs that are difficult to extract |
1635 | // |
1636 | // When we have a GT_OBJ of a struct with the above sizes we would need |
1637 | // to use 3 or 4 load instructions to load the exact size of this struct. |
1638 | // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence |
1639 | // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp. |
1640 | // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing |
1641 | // the argument. |
1642 | // |
1643 | curArgTabEntry->needTmp = true; |
1644 | break; |
1645 | |
1646 | default: |
1647 | break; |
1648 | } |
1649 | } |
1650 | #endif // !_TARGET_ARM_ |
1651 | } |
1652 | } |
1653 | #endif // FEATURE_MULTIREG_ARGS |
1654 | } |
1655 | |
1656 | // We only care because we can't spill structs and qmarks involve a lot of spilling, but |
1657 | // if we don't have qmarks, then it doesn't matter. |
1658 | // So check for Qmark's globally once here, instead of inside the loop. |
1659 | // |
1660 | const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed); |
1661 | |
1662 | #if FEATURE_FIXED_OUT_ARGS |
1663 | |
1664 | // For Arm/x64 we only care because we can't reorder a register |
1665 | // argument that uses GT_LCLHEAP. This is an optimization to |
1666 | // save a check inside the below loop. |
1667 | // |
1668 | const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed); |
1669 | |
1670 | #else |
1671 | |
1672 | const bool hasStackArgsWeCareAbout = hasStackArgs; |
1673 | |
1674 | #endif // FEATURE_FIXED_OUT_ARGS |
1675 | |
1676 | // If we have any stack args we have to force the evaluation |
1677 | // of any arguments passed in registers that might throw an exception |
1678 | // |
1679 | // Technically we only a required to handle the following two cases: |
1680 | // a GT_IND with GTF_IND_RNGCHK (only on x86) or |
1681 | // a GT_LCLHEAP node that allocates stuff on the stack |
1682 | // |
1683 | if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout) |
1684 | { |
1685 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
1686 | { |
1687 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
1688 | assert(curArgTabEntry != nullptr); |
1689 | GenTree* argx = curArgTabEntry->node; |
1690 | |
1691 | // Examine the register args that are currently not marked needTmp |
1692 | // |
1693 | if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK)) |
1694 | { |
1695 | if (hasStackArgsWeCareAbout) |
1696 | { |
1697 | #if !FEATURE_FIXED_OUT_ARGS |
1698 | // On x86 we previously recorded a stack depth of zero when |
1699 | // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag |
1700 | // Thus we can not reorder the argument after any stack based argument |
1701 | // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to |
1702 | // check for it explicitly.) |
1703 | // |
1704 | if (argx->gtFlags & GTF_EXCEPT) |
1705 | { |
1706 | curArgTabEntry->needTmp = true; |
1707 | continue; |
1708 | } |
1709 | #else |
1710 | // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP |
1711 | // |
1712 | if (argx->gtFlags & GTF_EXCEPT) |
1713 | { |
1714 | assert(compiler->compLocallocUsed); |
1715 | |
1716 | // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree |
1717 | // |
1718 | if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT) |
1719 | { |
1720 | curArgTabEntry->needTmp = true; |
1721 | continue; |
1722 | } |
1723 | } |
1724 | #endif |
1725 | } |
1726 | if (hasStructRegArgWeCareAbout) |
1727 | { |
1728 | // Returns true if a GT_QMARK node is encountered in the argx tree |
1729 | // |
1730 | if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT) |
1731 | { |
1732 | curArgTabEntry->needTmp = true; |
1733 | continue; |
1734 | } |
1735 | } |
1736 | } |
1737 | } |
1738 | } |
1739 | |
1740 | argsComplete = true; |
1741 | } |
1742 | |
1743 | void fgArgInfo::SortArgs() |
1744 | { |
1745 | assert(argsComplete == true); |
1746 | |
1747 | #ifdef DEBUG |
1748 | if (compiler->verbose) |
1749 | { |
1750 | printf("\nSorting the arguments:\n" ); |
1751 | } |
1752 | #endif |
1753 | |
1754 | /* Shuffle the arguments around before we build the gtCallLateArgs list. |
1755 | The idea is to move all "simple" arguments like constants and local vars |
1756 | to the end of the table, and move the complex arguments towards the beginning |
1757 | of the table. This will help prevent registers from being spilled by |
1758 | allowing us to evaluate the more complex arguments before the simpler arguments. |
1759 | The argTable ends up looking like: |
1760 | +------------------------------------+ <--- argTable[argCount - 1] |
1761 | | constants | |
1762 | +------------------------------------+ |
1763 | | local var / local field | |
1764 | +------------------------------------+ |
1765 | | remaining arguments sorted by cost | |
1766 | +------------------------------------+ |
1767 | | temps (argTable[].needTmp = true) | |
1768 | +------------------------------------+ |
1769 | | args with calls (GTF_CALL) | |
1770 | +------------------------------------+ <--- argTable[0] |
1771 | */ |
1772 | |
1773 | /* Set the beginning and end for the new argument table */ |
1774 | unsigned curInx; |
1775 | int regCount = 0; |
1776 | unsigned begTab = 0; |
1777 | unsigned endTab = argCount - 1; |
1778 | unsigned argsRemaining = argCount; |
1779 | |
1780 | // First take care of arguments that are constants. |
1781 | // [We use a backward iterator pattern] |
1782 | // |
1783 | curInx = argCount; |
1784 | do |
1785 | { |
1786 | curInx--; |
1787 | |
1788 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
1789 | |
1790 | if (curArgTabEntry->regNum != REG_STK) |
1791 | { |
1792 | regCount++; |
1793 | } |
1794 | |
1795 | // Skip any already processed args |
1796 | // |
1797 | if (!curArgTabEntry->processed) |
1798 | { |
1799 | GenTree* argx = curArgTabEntry->node; |
1800 | |
1801 | // put constants at the end of the table |
1802 | // |
1803 | if (argx->gtOper == GT_CNS_INT) |
1804 | { |
1805 | noway_assert(curInx <= endTab); |
1806 | |
1807 | curArgTabEntry->processed = true; |
1808 | |
1809 | // place curArgTabEntry at the endTab position by performing a swap |
1810 | // |
1811 | if (curInx != endTab) |
1812 | { |
1813 | argTable[curInx] = argTable[endTab]; |
1814 | argTable[endTab] = curArgTabEntry; |
1815 | } |
1816 | |
1817 | endTab--; |
1818 | argsRemaining--; |
1819 | } |
1820 | } |
1821 | } while (curInx > 0); |
1822 | |
1823 | if (argsRemaining > 0) |
1824 | { |
1825 | // Next take care of arguments that are calls. |
1826 | // [We use a forward iterator pattern] |
1827 | // |
1828 | for (curInx = begTab; curInx <= endTab; curInx++) |
1829 | { |
1830 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
1831 | |
1832 | // Skip any already processed args |
1833 | // |
1834 | if (!curArgTabEntry->processed) |
1835 | { |
1836 | GenTree* argx = curArgTabEntry->node; |
1837 | |
1838 | // put calls at the beginning of the table |
1839 | // |
1840 | if (argx->gtFlags & GTF_CALL) |
1841 | { |
1842 | curArgTabEntry->processed = true; |
1843 | |
1844 | // place curArgTabEntry at the begTab position by performing a swap |
1845 | // |
1846 | if (curInx != begTab) |
1847 | { |
1848 | argTable[curInx] = argTable[begTab]; |
1849 | argTable[begTab] = curArgTabEntry; |
1850 | } |
1851 | |
1852 | begTab++; |
1853 | argsRemaining--; |
1854 | } |
1855 | } |
1856 | } |
1857 | } |
1858 | |
1859 | if (argsRemaining > 0) |
1860 | { |
1861 | // Next take care arguments that are temps. |
1862 | // These temps come before the arguments that are |
1863 | // ordinary local vars or local fields |
1864 | // since this will give them a better chance to become |
1865 | // enregistered into their actual argument register. |
1866 | // [We use a forward iterator pattern] |
1867 | // |
1868 | for (curInx = begTab; curInx <= endTab; curInx++) |
1869 | { |
1870 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
1871 | |
1872 | // Skip any already processed args |
1873 | // |
1874 | if (!curArgTabEntry->processed) |
1875 | { |
1876 | if (curArgTabEntry->needTmp) |
1877 | { |
1878 | curArgTabEntry->processed = true; |
1879 | |
1880 | // place curArgTabEntry at the begTab position by performing a swap |
1881 | // |
1882 | if (curInx != begTab) |
1883 | { |
1884 | argTable[curInx] = argTable[begTab]; |
1885 | argTable[begTab] = curArgTabEntry; |
1886 | } |
1887 | |
1888 | begTab++; |
1889 | argsRemaining--; |
1890 | } |
1891 | } |
1892 | } |
1893 | } |
1894 | |
1895 | if (argsRemaining > 0) |
1896 | { |
1897 | // Next take care of local var and local field arguments. |
1898 | // These are moved towards the end of the argument evaluation. |
1899 | // [We use a backward iterator pattern] |
1900 | // |
1901 | curInx = endTab + 1; |
1902 | do |
1903 | { |
1904 | curInx--; |
1905 | |
1906 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
1907 | |
1908 | // Skip any already processed args |
1909 | // |
1910 | if (!curArgTabEntry->processed) |
1911 | { |
1912 | GenTree* argx = curArgTabEntry->node; |
1913 | |
1914 | if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD)) |
1915 | { |
1916 | noway_assert(curInx <= endTab); |
1917 | |
1918 | curArgTabEntry->processed = true; |
1919 | |
1920 | // place curArgTabEntry at the endTab position by performing a swap |
1921 | // |
1922 | if (curInx != endTab) |
1923 | { |
1924 | argTable[curInx] = argTable[endTab]; |
1925 | argTable[endTab] = curArgTabEntry; |
1926 | } |
1927 | |
1928 | endTab--; |
1929 | argsRemaining--; |
1930 | } |
1931 | } |
1932 | } while (curInx > begTab); |
1933 | } |
1934 | |
1935 | // Finally, take care of all the remaining arguments. |
1936 | // Note that we fill in one arg at a time using a while loop. |
1937 | bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop |
1938 | while (argsRemaining > 0) |
1939 | { |
1940 | /* Find the most expensive arg remaining and evaluate it next */ |
1941 | |
1942 | fgArgTabEntry* expensiveArgTabEntry = nullptr; |
1943 | unsigned expensiveArg = UINT_MAX; |
1944 | unsigned expensiveArgCost = 0; |
1945 | |
1946 | // [We use a forward iterator pattern] |
1947 | // |
1948 | for (curInx = begTab; curInx <= endTab; curInx++) |
1949 | { |
1950 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
1951 | |
1952 | // Skip any already processed args |
1953 | // |
1954 | if (!curArgTabEntry->processed) |
1955 | { |
1956 | GenTree* argx = curArgTabEntry->node; |
1957 | |
1958 | // We should have already handled these kinds of args |
1959 | assert(argx->gtOper != GT_LCL_VAR); |
1960 | assert(argx->gtOper != GT_LCL_FLD); |
1961 | assert(argx->gtOper != GT_CNS_INT); |
1962 | |
1963 | // This arg should either have no persistent side effects or be the last one in our table |
1964 | // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); |
1965 | |
1966 | if (argsRemaining == 1) |
1967 | { |
1968 | // This is the last arg to place |
1969 | expensiveArg = curInx; |
1970 | expensiveArgTabEntry = curArgTabEntry; |
1971 | assert(begTab == endTab); |
1972 | break; |
1973 | } |
1974 | else |
1975 | { |
1976 | if (!costsPrepared) |
1977 | { |
1978 | /* We call gtPrepareCost to measure the cost of evaluating this tree */ |
1979 | compiler->gtPrepareCost(argx); |
1980 | } |
1981 | |
1982 | if (argx->gtCostEx > expensiveArgCost) |
1983 | { |
1984 | // Remember this arg as the most expensive one that we have yet seen |
1985 | expensiveArgCost = argx->gtCostEx; |
1986 | expensiveArg = curInx; |
1987 | expensiveArgTabEntry = curArgTabEntry; |
1988 | } |
1989 | } |
1990 | } |
1991 | } |
1992 | |
1993 | noway_assert(expensiveArg != UINT_MAX); |
1994 | |
1995 | // put the most expensive arg towards the beginning of the table |
1996 | |
1997 | expensiveArgTabEntry->processed = true; |
1998 | |
1999 | // place expensiveArgTabEntry at the begTab position by performing a swap |
2000 | // |
2001 | if (expensiveArg != begTab) |
2002 | { |
2003 | argTable[expensiveArg] = argTable[begTab]; |
2004 | argTable[begTab] = expensiveArgTabEntry; |
2005 | } |
2006 | |
2007 | begTab++; |
2008 | argsRemaining--; |
2009 | |
2010 | costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop |
2011 | } |
2012 | |
2013 | // The table should now be completely filled and thus begTab should now be adjacent to endTab |
2014 | // and regArgsRemaining should be zero |
2015 | assert(begTab == (endTab + 1)); |
2016 | assert(argsRemaining == 0); |
2017 | |
2018 | #if !FEATURE_FIXED_OUT_ARGS |
2019 | // Finally build the regArgList |
2020 | // |
2021 | callTree->gtCall.regArgList = NULL; |
2022 | callTree->gtCall.regArgListCount = regCount; |
2023 | |
2024 | unsigned regInx = 0; |
2025 | for (curInx = 0; curInx < argCount; curInx++) |
2026 | { |
2027 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
2028 | |
2029 | if (curArgTabEntry->regNum != REG_STK) |
2030 | { |
2031 | // Encode the argument register in the register mask |
2032 | // |
2033 | callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum; |
2034 | regInx++; |
2035 | } |
2036 | } |
2037 | #endif // !FEATURE_FIXED_OUT_ARGS |
2038 | |
2039 | argsSorted = true; |
2040 | } |
2041 | |
2042 | #ifdef DEBUG |
2043 | void fgArgInfo::Dump(Compiler* compiler) |
2044 | { |
2045 | for (unsigned curInx = 0; curInx < ArgCount(); curInx++) |
2046 | { |
2047 | fgArgTabEntry* curArgEntry = ArgTable()[curInx]; |
2048 | curArgEntry->Dump(); |
2049 | } |
2050 | } |
2051 | #endif |
2052 | |
2053 | //------------------------------------------------------------------------------ |
2054 | // fgMakeTmpArgNode : This function creates a tmp var only if needed. |
2055 | // We need this to be done in order to enforce ordering |
2056 | // of the evaluation of arguments. |
2057 | // |
2058 | // Arguments: |
2059 | // curArgTabEntry |
2060 | // |
2061 | // Return Value: |
2062 | // the newly created temp var tree. |
2063 | |
2064 | GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) |
2065 | { |
2066 | unsigned tmpVarNum = curArgTabEntry->tmpNum; |
2067 | LclVarDsc* varDsc = &lvaTable[tmpVarNum]; |
2068 | assert(varDsc->lvIsTemp); |
2069 | var_types type = varDsc->TypeGet(); |
2070 | |
2071 | // Create a copy of the temp to go into the late argument list |
2072 | GenTree* arg = gtNewLclvNode(tmpVarNum, type); |
2073 | GenTree* addrNode = nullptr; |
2074 | |
2075 | if (varTypeIsStruct(type)) |
2076 | { |
2077 | |
2078 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_) |
2079 | |
2080 | // Can this type be passed as a primitive type? |
2081 | // If so, the following call will return the corresponding primitive type. |
2082 | // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type. |
2083 | |
2084 | bool passedAsPrimitive = false; |
2085 | if (curArgTabEntry->isSingleRegOrSlot()) |
2086 | { |
2087 | CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle(); |
2088 | var_types structBaseType = |
2089 | getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd, curArgTabEntry->isVararg); |
2090 | |
2091 | if (structBaseType != TYP_UNKNOWN) |
2092 | { |
2093 | passedAsPrimitive = true; |
2094 | #if defined(UNIX_AMD64_ABI) |
2095 | // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry, |
2096 | // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take |
2097 | // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again. |
2098 | // |
2099 | if (genIsValidFloatReg(curArgTabEntry->regNum)) |
2100 | { |
2101 | if (structBaseType == TYP_INT) |
2102 | { |
2103 | structBaseType = TYP_FLOAT; |
2104 | } |
2105 | else |
2106 | { |
2107 | assert(structBaseType == TYP_LONG); |
2108 | structBaseType = TYP_DOUBLE; |
2109 | } |
2110 | } |
2111 | #endif |
2112 | type = structBaseType; |
2113 | } |
2114 | } |
2115 | |
2116 | // If it is passed in registers, don't get the address of the var. Make it a |
2117 | // field instead. It will be loaded in registers with putarg_reg tree in lower. |
2118 | if (passedAsPrimitive) |
2119 | { |
2120 | arg->ChangeOper(GT_LCL_FLD); |
2121 | arg->gtType = type; |
2122 | } |
2123 | else |
2124 | { |
2125 | var_types addrType = TYP_BYREF; |
2126 | arg = gtNewOperNode(GT_ADDR, addrType, arg); |
2127 | addrNode = arg; |
2128 | |
2129 | #if FEATURE_MULTIREG_ARGS |
2130 | #ifdef _TARGET_ARM64_ |
2131 | assert(varTypeIsStruct(type)); |
2132 | if (lvaIsMultiregStruct(varDsc, curArgTabEntry->isVararg)) |
2133 | { |
2134 | // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD); |
2135 | // as that is how UNIX_AMD64_ABI works. |
2136 | // We will create a GT_OBJ for the argument below. |
2137 | // This will be passed by value in two registers. |
2138 | assert(addrNode != nullptr); |
2139 | |
2140 | // Create an Obj of the temp to use it as a call argument. |
2141 | arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); |
2142 | |
2143 | // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here; |
2144 | // this is only to preserve former behavior (though some CSE'ing of struct |
2145 | // values can be pessimizing, so enabling this may require some additional tuning). |
2146 | arg->gtFlags |= GTF_DONT_CSE; |
2147 | } |
2148 | #else |
2149 | // Always create an Obj of the temp to use it as a call argument. |
2150 | arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); |
2151 | arg->gtFlags |= GTF_DONT_CSE; |
2152 | #endif // !_TARGET_ARM64_ |
2153 | #endif // FEATURE_MULTIREG_ARGS |
2154 | } |
2155 | |
2156 | #else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_) |
2157 | |
2158 | // other targets, we pass the struct by value |
2159 | assert(varTypeIsStruct(type)); |
2160 | |
2161 | addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); |
2162 | |
2163 | // Get a new Obj node temp to use it as a call argument. |
2164 | // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object. |
2165 | arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode); |
2166 | |
2167 | #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_) |
2168 | |
2169 | } // (varTypeIsStruct(type)) |
2170 | |
2171 | if (addrNode != nullptr) |
2172 | { |
2173 | assert(addrNode->gtOper == GT_ADDR); |
2174 | |
2175 | // This will prevent this LclVar from being optimized away |
2176 | lvaSetVarAddrExposed(tmpVarNum); |
2177 | |
2178 | // the child of a GT_ADDR is required to have this flag set |
2179 | addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE; |
2180 | } |
2181 | |
2182 | return arg; |
2183 | } |
2184 | |
2185 | //------------------------------------------------------------------------------ |
2186 | // EvalArgsToTemps : Create temp assignments and populate the LateArgs list. |
2187 | |
2188 | void fgArgInfo::EvalArgsToTemps() |
2189 | { |
2190 | assert(argsSorted == true); |
2191 | |
2192 | unsigned regArgInx = 0; |
2193 | // Now go through the argument table and perform the necessary evaluation into temps |
2194 | GenTreeArgList* tmpRegArgNext = nullptr; |
2195 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
2196 | { |
2197 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
2198 | |
2199 | GenTree* argx = curArgTabEntry->node; |
2200 | GenTree* setupArg = nullptr; |
2201 | GenTree* defArg; |
2202 | |
2203 | #if !FEATURE_FIXED_OUT_ARGS |
2204 | // Only ever set for FEATURE_FIXED_OUT_ARGS |
2205 | assert(curArgTabEntry->needPlace == false); |
2206 | |
2207 | // On x86 and other archs that use push instructions to pass arguments: |
2208 | // Only the register arguments need to be replaced with placeholder nodes. |
2209 | // Stacked arguments are evaluated and pushed (or stored into the stack) in order. |
2210 | // |
2211 | if (curArgTabEntry->regNum == REG_STK) |
2212 | continue; |
2213 | #endif |
2214 | |
2215 | if (curArgTabEntry->needTmp) |
2216 | { |
2217 | if (curArgTabEntry->isTmp == true) |
2218 | { |
2219 | // Create a copy of the temp to go into the late argument list |
2220 | defArg = compiler->fgMakeTmpArgNode(curArgTabEntry); |
2221 | |
2222 | // mark the original node as a late argument |
2223 | argx->gtFlags |= GTF_LATE_ARG; |
2224 | } |
2225 | else |
2226 | { |
2227 | // Create a temp assignment for the argument |
2228 | // Put the temp in the gtCallLateArgs list |
2229 | CLANG_FORMAT_COMMENT_ANCHOR; |
2230 | |
2231 | #ifdef DEBUG |
2232 | if (compiler->verbose) |
2233 | { |
2234 | printf("Argument with 'side effect'...\n" ); |
2235 | compiler->gtDispTree(argx); |
2236 | } |
2237 | #endif |
2238 | |
2239 | #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) |
2240 | noway_assert(argx->gtType != TYP_STRUCT); |
2241 | #endif |
2242 | |
2243 | unsigned tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect" )); |
2244 | if (argx->gtOper == GT_MKREFANY) |
2245 | { |
2246 | // For GT_MKREFANY, typically the actual struct copying does |
2247 | // not have any side-effects and can be delayed. So instead |
2248 | // of using a temp for the whole struct, we can just use a temp |
2249 | // for operand that that has a side-effect |
2250 | GenTree* operand; |
2251 | if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0) |
2252 | { |
2253 | operand = argx->gtOp.gtOp1; |
2254 | |
2255 | // In the early argument evaluation, place an assignment to the temp |
2256 | // from the source operand of the mkrefany |
2257 | setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); |
2258 | |
2259 | // Replace the operand for the mkrefany with the new temp. |
2260 | argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); |
2261 | } |
2262 | else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0) |
2263 | { |
2264 | operand = argx->gtOp.gtOp2; |
2265 | |
2266 | // In the early argument evaluation, place an assignment to the temp |
2267 | // from the source operand of the mkrefany |
2268 | setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); |
2269 | |
2270 | // Replace the operand for the mkrefany with the new temp. |
2271 | argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); |
2272 | } |
2273 | } |
2274 | |
2275 | if (setupArg != nullptr) |
2276 | { |
2277 | // Now keep the mkrefany for the late argument list |
2278 | defArg = argx; |
2279 | |
2280 | // Clear the side-effect flags because now both op1 and op2 have no side-effects |
2281 | defArg->gtFlags &= ~GTF_ALL_EFFECT; |
2282 | } |
2283 | else |
2284 | { |
2285 | setupArg = compiler->gtNewTempAssign(tmpVarNum, argx); |
2286 | |
2287 | LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum; |
2288 | var_types lclVarType = genActualType(argx->gtType); |
2289 | var_types scalarType = TYP_UNKNOWN; |
2290 | |
2291 | if (setupArg->OperIsCopyBlkOp()) |
2292 | { |
2293 | setupArg = compiler->fgMorphCopyBlock(setupArg); |
2294 | #if defined(_TARGET_ARMARCH_) |
2295 | // This scalar LclVar widening step is only performed for ARM architectures. |
2296 | // |
2297 | CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); |
2298 | unsigned structSize = varDsc->lvExactSize; |
2299 | |
2300 | scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg); |
2301 | #endif // _TARGET_ARMARCH_ |
2302 | } |
2303 | |
2304 | // scalarType can be set to a wider type for ARM architectures: (3 => 4) or (5,6,7 => 8) |
2305 | if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) |
2306 | { |
2307 | // Create a GT_LCL_FLD using the wider type to go to the late argument list |
2308 | defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0); |
2309 | } |
2310 | else |
2311 | { |
2312 | // Create a copy of the temp to go to the late argument list |
2313 | defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType); |
2314 | } |
2315 | |
2316 | curArgTabEntry->isTmp = true; |
2317 | curArgTabEntry->tmpNum = tmpVarNum; |
2318 | |
2319 | #ifdef _TARGET_ARM_ |
2320 | // Previously we might have thought the local was promoted, and thus the 'COPYBLK' |
2321 | // might have left holes in the used registers (see |
2322 | // fgAddSkippedRegsInPromotedStructArg). |
2323 | // Too bad we're not that smart for these intermediate temps... |
2324 | if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1)) |
2325 | { |
2326 | regNumber argReg = curArgTabEntry->regNum; |
2327 | regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum); |
2328 | for (unsigned i = 1; i < curArgTabEntry->numRegs; i++) |
2329 | { |
2330 | argReg = genRegArgNext(argReg); |
2331 | allUsedRegs |= genRegMask(argReg); |
2332 | } |
2333 | } |
2334 | #endif // _TARGET_ARM_ |
2335 | } |
2336 | |
2337 | /* mark the assignment as a late argument */ |
2338 | setupArg->gtFlags |= GTF_LATE_ARG; |
2339 | |
2340 | #ifdef DEBUG |
2341 | if (compiler->verbose) |
2342 | { |
2343 | printf("\n Evaluate to a temp:\n" ); |
2344 | compiler->gtDispTree(setupArg); |
2345 | } |
2346 | #endif |
2347 | } |
2348 | } |
2349 | else // curArgTabEntry->needTmp == false |
2350 | { |
2351 | // On x86 - |
2352 | // Only register args are replaced with placeholder nodes |
2353 | // and the stack based arguments are evaluated and pushed in order. |
2354 | // |
2355 | // On Arm/x64 - When needTmp is false and needPlace is false, |
2356 | // the non-register arguments are evaluated and stored in order. |
2357 | // When needPlace is true we have a nested call that comes after |
2358 | // this argument so we have to replace it in the gtCallArgs list |
2359 | // (the initial argument evaluation list) with a placeholder. |
2360 | // |
2361 | if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false)) |
2362 | { |
2363 | continue; |
2364 | } |
2365 | |
2366 | /* No temp needed - move the whole node to the gtCallLateArgs list */ |
2367 | |
2368 | /* The argument is deferred and put in the late argument list */ |
2369 | |
2370 | defArg = argx; |
2371 | |
2372 | // Create a placeholder node to put in its place in gtCallLateArgs. |
2373 | |
2374 | // For a struct type we also need to record the class handle of the arg. |
2375 | CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; |
2376 | |
2377 | #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) |
2378 | |
2379 | // All structs are either passed (and retyped) as integral types, OR they |
2380 | // are passed by reference. |
2381 | noway_assert(argx->gtType != TYP_STRUCT); |
2382 | |
2383 | #else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI) |
2384 | |
2385 | if (varTypeIsStruct(defArg)) |
2386 | { |
2387 | clsHnd = compiler->gtGetStructHandleIfPresent(defArg); |
2388 | noway_assert(clsHnd != NO_CLASS_HANDLE); |
2389 | } |
2390 | |
2391 | #endif // !(defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) |
2392 | |
2393 | setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd); |
2394 | |
2395 | /* mark the placeholder node as a late argument */ |
2396 | setupArg->gtFlags |= GTF_LATE_ARG; |
2397 | |
2398 | #ifdef DEBUG |
2399 | if (compiler->verbose) |
2400 | { |
2401 | if (curArgTabEntry->regNum == REG_STK) |
2402 | { |
2403 | printf("Deferred stack argument :\n" ); |
2404 | } |
2405 | else |
2406 | { |
2407 | printf("Deferred argument ('%s'):\n" , getRegName(curArgTabEntry->regNum)); |
2408 | } |
2409 | |
2410 | compiler->gtDispTree(argx); |
2411 | printf("Replaced with placeholder node:\n" ); |
2412 | compiler->gtDispTree(setupArg); |
2413 | } |
2414 | #endif |
2415 | } |
2416 | |
2417 | if (setupArg != nullptr) |
2418 | { |
2419 | if (curArgTabEntry->parent) |
2420 | { |
2421 | GenTree* parent = curArgTabEntry->parent; |
2422 | /* a normal argument from the list */ |
2423 | noway_assert(parent->OperIsList()); |
2424 | noway_assert(parent->gtOp.gtOp1 == argx); |
2425 | |
2426 | parent->gtFlags |= (setupArg->gtFlags & GTF_ALL_EFFECT); |
2427 | |
2428 | parent->gtOp.gtOp1 = setupArg; |
2429 | } |
2430 | else |
2431 | { |
2432 | /* must be the gtCallObjp */ |
2433 | noway_assert(callTree->gtCall.gtCallObjp == argx); |
2434 | |
2435 | callTree->gtCall.gtCallObjp = setupArg; |
2436 | } |
2437 | } |
2438 | |
2439 | /* deferred arg goes into the late argument list */ |
2440 | |
2441 | if (tmpRegArgNext == nullptr) |
2442 | { |
2443 | tmpRegArgNext = compiler->gtNewArgList(defArg); |
2444 | callTree->gtCall.gtCallLateArgs = tmpRegArgNext; |
2445 | } |
2446 | else |
2447 | { |
2448 | noway_assert(tmpRegArgNext->OperIsList()); |
2449 | noway_assert(tmpRegArgNext->Current()); |
2450 | tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg); |
2451 | |
2452 | tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT); |
2453 | tmpRegArgNext = tmpRegArgNext->Rest(); |
2454 | } |
2455 | |
2456 | tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT); |
2457 | |
2458 | curArgTabEntry->node = defArg; |
2459 | curArgTabEntry->lateArgInx = regArgInx++; |
2460 | } |
2461 | |
2462 | #ifdef DEBUG |
2463 | if (compiler->verbose) |
2464 | { |
2465 | printf("\nShuffled argument table: " ); |
2466 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
2467 | { |
2468 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
2469 | |
2470 | if (curArgTabEntry->regNum != REG_STK) |
2471 | { |
2472 | printf("%s " , getRegName(curArgTabEntry->regNum)); |
2473 | } |
2474 | } |
2475 | printf("\n" ); |
2476 | } |
2477 | #endif |
2478 | } |
2479 | |
2480 | // Return a conservative estimate of the stack size in bytes. |
2481 | // It will be used only on the intercepted-for-host code path to copy the arguments. |
2482 | int Compiler::fgEstimateCallStackSize(GenTreeCall* call) |
2483 | { |
2484 | |
2485 | int numArgs = 0; |
2486 | for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest()) |
2487 | { |
2488 | numArgs++; |
2489 | } |
2490 | |
2491 | int numStkArgs; |
2492 | if (numArgs > MAX_REG_ARG) |
2493 | { |
2494 | numStkArgs = numArgs - MAX_REG_ARG; |
2495 | } |
2496 | else |
2497 | { |
2498 | numStkArgs = 0; |
2499 | } |
2500 | |
2501 | return numStkArgs * REGSIZE_BYTES; |
2502 | } |
2503 | |
2504 | //------------------------------------------------------------------------------ |
2505 | // fgMakeMultiUse : If the node is a local, clone it and increase the ref count |
2506 | // otherwise insert a comma form temp |
2507 | // |
2508 | // Arguments: |
2509 | // ppTree - a pointer to the child node we will be replacing with the comma expression that |
2510 | // evaluates ppTree to a temp and returns the result |
2511 | // |
2512 | // Return Value: |
2513 | // A fresh GT_LCL_VAR node referencing the temp which has not been used |
2514 | // |
2515 | // Assumption: |
2516 | // The result tree MUST be added to the tree structure since the ref counts are |
2517 | // already incremented. |
2518 | |
2519 | GenTree* Compiler::fgMakeMultiUse(GenTree** pOp) |
2520 | { |
2521 | GenTree* tree = *pOp; |
2522 | if (tree->IsLocal()) |
2523 | { |
2524 | return gtClone(tree); |
2525 | } |
2526 | else |
2527 | { |
2528 | return fgInsertCommaFormTemp(pOp); |
2529 | } |
2530 | } |
2531 | |
2532 | //------------------------------------------------------------------------------ |
2533 | // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree, |
2534 | // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl) |
2535 | // |
2536 | // Arguments: |
2537 | // ppTree - a pointer to the child node we will be replacing with the comma expression that |
2538 | // evaluates ppTree to a temp and returns the result |
2539 | // |
2540 | // structType - value type handle if the temp created is of TYP_STRUCT. |
2541 | // |
2542 | // Return Value: |
2543 | // A fresh GT_LCL_VAR node referencing the temp which has not been used |
2544 | // |
2545 | |
2546 | GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/) |
2547 | { |
2548 | GenTree* subTree = *ppTree; |
2549 | |
2550 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable" )); |
2551 | |
2552 | if (varTypeIsStruct(subTree)) |
2553 | { |
2554 | assert(structType != nullptr); |
2555 | lvaSetStruct(lclNum, structType, false); |
2556 | } |
2557 | |
2558 | // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree. |
2559 | // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for |
2560 | // setting type of lcl vars created. |
2561 | GenTree* asg = gtNewTempAssign(lclNum, subTree); |
2562 | |
2563 | GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET); |
2564 | |
2565 | GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load); |
2566 | |
2567 | *ppTree = comma; |
2568 | |
2569 | return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET); |
2570 | } |
2571 | |
2572 | //------------------------------------------------------------------------ |
2573 | // fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg |
2574 | // |
2575 | // Arguments: |
2576 | // callNode - the call for which we are generating the fgArgInfo |
2577 | // |
2578 | // Return Value: |
2579 | // None |
2580 | // |
2581 | // Notes: |
2582 | // This method is idempotent in that it checks whether the fgArgInfo has already been |
2583 | // constructed, and just returns. |
2584 | // This method only computes the arg table and arg entries for the call (the fgArgInfo), |
2585 | // and makes no modification of the args themselves. |
2586 | // |
2587 | void Compiler::fgInitArgInfo(GenTreeCall* call) |
2588 | { |
2589 | GenTree* args; |
2590 | GenTree* argx; |
2591 | |
2592 | unsigned argIndex = 0; |
2593 | unsigned intArgRegNum = 0; |
2594 | unsigned fltArgRegNum = 0; |
2595 | unsigned argSlots = 0; |
2596 | |
2597 | bool callHasRetBuffArg = call->HasRetBufArg(); |
2598 | bool callIsVararg = call->IsVarargs(); |
2599 | |
2600 | #ifdef _TARGET_ARM_ |
2601 | regMaskTP argSkippedRegMask = RBM_NONE; |
2602 | regMaskTP fltArgSkippedRegMask = RBM_NONE; |
2603 | #endif // _TARGET_ARM_ |
2604 | |
2605 | #if defined(_TARGET_X86_) |
2606 | unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated |
2607 | #else |
2608 | const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number |
2609 | #endif |
2610 | |
2611 | if (call->fgArgInfo != nullptr) |
2612 | { |
2613 | // We've already initialized and set the fgArgInfo. |
2614 | return; |
2615 | } |
2616 | JITDUMP("Initializing arg info for %d.%s:\n" , call->gtTreeID, GenTree::OpName(call->gtOper)); |
2617 | |
2618 | // At this point, we should never have gtCallLateArgs, as this needs to be done before those are determined. |
2619 | assert(call->gtCallLateArgs == nullptr); |
2620 | |
2621 | #ifdef _TARGET_UNIX_ |
2622 | if (callIsVararg) |
2623 | { |
2624 | // Currently native varargs is not implemented on non windows targets. |
2625 | // |
2626 | // Note that some targets like Arm64 Unix should not need much work as |
2627 | // the ABI is the same. While other targets may only need small changes |
2628 | // such as amd64 Unix, which just expects RAX to pass numFPArguments. |
2629 | NYI("Morphing Vararg call not yet implemented on non Windows targets." ); |
2630 | } |
2631 | #endif // _TARGET_UNIX_ |
2632 | |
2633 | // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed |
2634 | // following the normal calling convention or in the normal argument registers. We either mark existing |
2635 | // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the |
2636 | // non-standard arguments into the argument list, below. |
2637 | class NonStandardArgs |
2638 | { |
2639 | struct NonStandardArg |
2640 | { |
2641 | regNumber reg; // The register to be assigned to this non-standard argument. |
2642 | GenTree* node; // The tree node representing this non-standard argument. |
2643 | // Note that this must be updated if the tree node changes due to morphing! |
2644 | }; |
2645 | |
2646 | ArrayStack<NonStandardArg> args; |
2647 | |
2648 | public: |
2649 | NonStandardArgs(CompAllocator alloc) : args(alloc, 3) // We will have at most 3 non-standard arguments |
2650 | { |
2651 | } |
2652 | |
2653 | //----------------------------------------------------------------------------- |
2654 | // Add: add a non-standard argument to the table of non-standard arguments |
2655 | // |
2656 | // Arguments: |
2657 | // node - a GenTree node that has a non-standard argument. |
2658 | // reg - the register to assign to this node. |
2659 | // |
2660 | // Return Value: |
2661 | // None. |
2662 | // |
2663 | void Add(GenTree* node, regNumber reg) |
2664 | { |
2665 | NonStandardArg nsa = {reg, node}; |
2666 | args.Push(nsa); |
2667 | } |
2668 | |
2669 | //----------------------------------------------------------------------------- |
2670 | // Find: Look for a GenTree* in the set of non-standard args. |
2671 | // |
2672 | // Arguments: |
2673 | // node - a GenTree node to look for |
2674 | // |
2675 | // Return Value: |
2676 | // The index of the non-standard argument (a non-negative, unique, stable number). |
2677 | // If the node is not a non-standard argument, return -1. |
2678 | // |
2679 | int Find(GenTree* node) |
2680 | { |
2681 | for (int i = 0; i < args.Height(); i++) |
2682 | { |
2683 | if (node == args.Index(i).node) |
2684 | { |
2685 | return i; |
2686 | } |
2687 | } |
2688 | return -1; |
2689 | } |
2690 | |
2691 | //----------------------------------------------------------------------------- |
2692 | // FindReg: Look for a GenTree node in the non-standard arguments set. If found, |
2693 | // set the register to use for the node. |
2694 | // |
2695 | // Arguments: |
2696 | // node - a GenTree node to look for |
2697 | // pReg - an OUT argument. *pReg is set to the non-standard register to use if |
2698 | // 'node' is found in the non-standard argument set. |
2699 | // |
2700 | // Return Value: |
2701 | // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the |
2702 | // register to use. |
2703 | // 'false' otherwise (in this case, *pReg is unmodified). |
2704 | // |
2705 | bool FindReg(GenTree* node, regNumber* pReg) |
2706 | { |
2707 | for (int i = 0; i < args.Height(); i++) |
2708 | { |
2709 | NonStandardArg& nsa = args.IndexRef(i); |
2710 | if (node == nsa.node) |
2711 | { |
2712 | *pReg = nsa.reg; |
2713 | return true; |
2714 | } |
2715 | } |
2716 | return false; |
2717 | } |
2718 | |
2719 | //----------------------------------------------------------------------------- |
2720 | // Replace: Replace the non-standard argument node at a given index. This is done when |
2721 | // the original node was replaced via morphing, but we need to continue to assign a |
2722 | // particular non-standard arg to it. |
2723 | // |
2724 | // Arguments: |
2725 | // index - the index of the non-standard arg. It must exist. |
2726 | // node - the new GenTree node. |
2727 | // |
2728 | // Return Value: |
2729 | // None. |
2730 | // |
2731 | void Replace(int index, GenTree* node) |
2732 | { |
2733 | args.IndexRef(index).node = node; |
2734 | } |
2735 | |
2736 | } nonStandardArgs(getAllocator(CMK_ArrayStack)); |
2737 | |
2738 | // Count of args. On first morph, this is counted before we've filled in the arg table. |
2739 | // On remorph, we grab it from the arg table. |
2740 | unsigned numArgs = 0; |
2741 | |
2742 | // First we need to count the args |
2743 | if (call->gtCallObjp) |
2744 | { |
2745 | numArgs++; |
2746 | } |
2747 | for (GenTree* args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) |
2748 | { |
2749 | numArgs++; |
2750 | } |
2751 | |
2752 | // Insert or mark non-standard args. These are either outside the normal calling convention, or |
2753 | // arguments registers that don't follow the normal progression of argument registers in the calling |
2754 | // convention (such as for the ARM64 fixed return buffer argument x8). |
2755 | // |
2756 | // *********** NOTE ************* |
2757 | // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments |
2758 | // in the implementation of fast tail call. |
2759 | // *********** END NOTE ********* |
2760 | CLANG_FORMAT_COMMENT_ANCHOR; |
2761 | |
2762 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
2763 | // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention. |
2764 | // Set the argument registers correctly here. |
2765 | if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME)) |
2766 | { |
2767 | GenTreeArgList* args = call->gtCallArgs; |
2768 | GenTree* arg1 = args->Current(); |
2769 | assert(arg1 != nullptr); |
2770 | nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME); |
2771 | } |
2772 | #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
2773 | #if defined(_TARGET_ARM_) |
2774 | // A non-standard calling convention using secure delegate invoke is used on ARM, only, but not for secure |
2775 | // delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing |
2776 | // R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs |
2777 | // to be preserved. Thus, the VM uses a secure delegate IL stub, which preserves R4 and also sets up R4 |
2778 | // correctly for the VSD call. The VM is simply reusing an existing mechanism (secure delegate IL stub) |
2779 | // to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details. |
2780 | else if (call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV) |
2781 | { |
2782 | GenTree* arg = call->gtCallObjp; |
2783 | if (arg->OperIsLocal()) |
2784 | { |
2785 | arg = gtClone(arg, true); |
2786 | } |
2787 | else |
2788 | { |
2789 | GenTree* tmp = fgInsertCommaFormTemp(&arg); |
2790 | call->gtCallObjp = arg; |
2791 | call->gtFlags |= GTF_ASG; |
2792 | arg = tmp; |
2793 | } |
2794 | noway_assert(arg != nullptr); |
2795 | |
2796 | GenTree* newArg = new (this, GT_ADDR) |
2797 | GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfSecureDelegateIndirectCell); |
2798 | |
2799 | // Append newArg as the last arg |
2800 | GenTreeArgList** insertionPoint = &call->gtCallArgs; |
2801 | for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest()) |
2802 | { |
2803 | } |
2804 | *insertionPoint = gtNewListNode(newArg, nullptr); |
2805 | |
2806 | numArgs++; |
2807 | nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg()); |
2808 | } |
2809 | #endif // defined(_TARGET_ARM_) |
2810 | #if defined(_TARGET_X86_) |
2811 | // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the |
2812 | // hi part to be in EDX. This sets the argument registers up correctly. |
2813 | else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || |
2814 | call->IsHelperCall(this, CORINFO_HELP_LRSZ)) |
2815 | { |
2816 | GenTreeArgList* args = call->gtCallArgs; |
2817 | GenTree* arg1 = args->Current(); |
2818 | assert(arg1 != nullptr); |
2819 | nonStandardArgs.Add(arg1, REG_LNGARG_LO); |
2820 | |
2821 | args = args->Rest(); |
2822 | GenTree* arg2 = args->Current(); |
2823 | assert(arg2 != nullptr); |
2824 | nonStandardArgs.Add(arg2, REG_LNGARG_HI); |
2825 | } |
2826 | #else // !_TARGET_X86_ |
2827 | // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed. |
2828 | // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling |
2829 | // convention for x86/SSE. |
2830 | |
2831 | // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it |
2832 | // |
2833 | if (hasFixedRetBuffReg() && call->HasRetBufArg()) |
2834 | { |
2835 | args = call->gtCallArgs; |
2836 | assert(args != nullptr); |
2837 | assert(args->OperIsList()); |
2838 | |
2839 | argx = call->gtCallArgs->Current(); |
2840 | |
2841 | // We don't increment numArgs here, since we already counted this argument above. |
2842 | |
2843 | nonStandardArgs.Add(argx, theFixedRetBuffReg()); |
2844 | } |
2845 | |
2846 | // We are allowed to have a Fixed Return Buffer argument combined |
2847 | // with any of the remaining non-standard arguments |
2848 | // |
2849 | if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers()) |
2850 | { |
2851 | assert(!call->gtCallCookie); |
2852 | // Add a conservative estimate of the stack size in a special parameter (r11) at the call site. |
2853 | // It will be used only on the intercepted-for-host code path to copy the arguments. |
2854 | |
2855 | GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call)); |
2856 | call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs); |
2857 | numArgs++; |
2858 | |
2859 | nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM); |
2860 | } |
2861 | else if (call->IsVirtualStub()) |
2862 | { |
2863 | if (!call->IsTailCallViaHelper()) |
2864 | { |
2865 | GenTree* stubAddrArg = fgGetStubAddrArg(call); |
2866 | // And push the stub address onto the list of arguments |
2867 | call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs); |
2868 | |
2869 | numArgs++; |
2870 | nonStandardArgs.Add(stubAddrArg, stubAddrArg->gtRegNum); |
2871 | } |
2872 | else |
2873 | { |
2874 | // If it is a VSD call getting dispatched via tail call helper, |
2875 | // fgMorphTailCall() would materialize stub addr as an additional |
2876 | // parameter added to the original arg list and hence no need to |
2877 | // add as a non-standard arg. |
2878 | } |
2879 | } |
2880 | else |
2881 | #endif // !_TARGET_X86_ |
2882 | if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr)) |
2883 | { |
2884 | assert(!call->IsUnmanaged()); |
2885 | |
2886 | GenTree* arg = call->gtCallCookie; |
2887 | noway_assert(arg != nullptr); |
2888 | call->gtCallCookie = nullptr; |
2889 | |
2890 | #if defined(_TARGET_X86_) |
2891 | // x86 passes the cookie on the stack as the final argument to the call. |
2892 | GenTreeArgList** insertionPoint = &call->gtCallArgs; |
2893 | for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest()) |
2894 | { |
2895 | } |
2896 | *insertionPoint = gtNewListNode(arg, nullptr); |
2897 | #else // !defined(_TARGET_X86_) |
2898 | // All other architectures pass the cookie in a register. |
2899 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
2900 | #endif // defined(_TARGET_X86_) |
2901 | |
2902 | nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM); |
2903 | numArgs++; |
2904 | |
2905 | // put destination into R10/EAX |
2906 | arg = gtClone(call->gtCallAddr, true); |
2907 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
2908 | numArgs++; |
2909 | |
2910 | nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM); |
2911 | |
2912 | // finally change this call to a helper call |
2913 | call->gtCallType = CT_HELPER; |
2914 | call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); |
2915 | } |
2916 | #if defined(FEATURE_READYTORUN_COMPILER) && defined(_TARGET_ARMARCH_) |
2917 | // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg() |
2918 | // for indirection cell address, which ZapIndirectHelperThunk expects. |
2919 | if (call->IsR2RRelativeIndir()) |
2920 | { |
2921 | assert(call->gtEntryPoint.addr != nullptr); |
2922 | |
2923 | size_t addrValue = (size_t)call->gtEntryPoint.addr; |
2924 | GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR); |
2925 | indirectCellAddress->gtRegNum = REG_R2R_INDIRECT_PARAM; |
2926 | |
2927 | // Push the stub address onto the list of arguments. |
2928 | call->gtCallArgs = gtNewListNode(indirectCellAddress, call->gtCallArgs); |
2929 | |
2930 | numArgs++; |
2931 | nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->gtRegNum); |
2932 | } |
2933 | |
2934 | #endif // FEATURE_READYTORUN_COMPILER && _TARGET_ARMARCH_ |
2935 | |
2936 | // Allocate the fgArgInfo for the call node; |
2937 | // |
2938 | call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs); |
2939 | |
2940 | // Add the 'this' argument value, if present. |
2941 | argx = call->gtCallObjp; |
2942 | if (argx != nullptr) |
2943 | { |
2944 | assert(argIndex == 0); |
2945 | assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT); |
2946 | assert(varTypeIsGC(argx) || (argx->gtType == TYP_I_IMPL)); |
2947 | |
2948 | // This is a register argument - put it in the table. |
2949 | call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1, false, |
2950 | callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); |
2951 | |
2952 | intArgRegNum++; |
2953 | #ifdef WINDOWS_AMD64_ABI |
2954 | // Whenever we pass an integer register argument |
2955 | // we skip the corresponding floating point register argument |
2956 | fltArgRegNum++; |
2957 | #endif // WINDOWS_AMD64_ABI |
2958 | argIndex++; |
2959 | argSlots++; |
2960 | } |
2961 | |
2962 | #ifdef _TARGET_X86_ |
2963 | // Compute the maximum number of arguments that can be passed in registers. |
2964 | // For X86 we handle the varargs and unmanaged calling conventions |
2965 | |
2966 | if (call->gtFlags & GTF_CALL_POP_ARGS) |
2967 | { |
2968 | noway_assert(intArgRegNum < MAX_REG_ARG); |
2969 | // No more register arguments for varargs (CALL_POP_ARGS) |
2970 | maxRegArgs = intArgRegNum; |
2971 | |
2972 | // Add in the ret buff arg |
2973 | if (callHasRetBuffArg) |
2974 | maxRegArgs++; |
2975 | } |
2976 | |
2977 | if (call->IsUnmanaged()) |
2978 | { |
2979 | noway_assert(intArgRegNum == 0); |
2980 | |
2981 | if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL) |
2982 | { |
2983 | noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL || |
2984 | call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF || |
2985 | call->gtCallArgs->gtOp.gtOp1->gtOper == |
2986 | GT_NOP); // the arg was already morphed to a register (fgMorph called twice) |
2987 | maxRegArgs = 1; |
2988 | } |
2989 | else |
2990 | { |
2991 | maxRegArgs = 0; |
2992 | } |
2993 | |
2994 | // Add in the ret buff arg |
2995 | if (callHasRetBuffArg) |
2996 | maxRegArgs++; |
2997 | } |
2998 | #endif // _TARGET_X86_ |
2999 | |
3000 | /* Morph the user arguments */ |
3001 | CLANG_FORMAT_COMMENT_ANCHOR; |
3002 | |
3003 | #if defined(_TARGET_ARM_) |
3004 | |
3005 | // The ARM ABI has a concept of back-filling of floating-point argument registers, according |
3006 | // to the "Procedure Call Standard for the ARM Architecture" document, especially |
3007 | // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can |
3008 | // appear in a lower-numbered register than floating point argument N. That is, argument |
3009 | // register allocation is not strictly increasing. To support this, we need to keep track of unused |
3010 | // floating-point argument registers that we can back-fill. We only support 4-byte float and |
3011 | // 8-byte double types, and one to four element HFAs composed of these types. With this, we will |
3012 | // only back-fill single registers, since there is no way with these types to create |
3013 | // an alignment hole greater than one register. However, there can be up to 3 back-fill slots |
3014 | // available (with 16 FP argument registers). Consider this code: |
3015 | // |
3016 | // struct HFA { float x, y, z; }; // a three element HFA |
3017 | // void bar(float a1, // passed in f0 |
3018 | // double a2, // passed in f2/f3; skip f1 for alignment |
3019 | // HFA a3, // passed in f4/f5/f6 |
3020 | // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot |
3021 | // HFA a5, // passed in f10/f11/f12 |
3022 | // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill |
3023 | // // slots |
3024 | // float a7, // passed in f1 (back-filled) |
3025 | // float a8, // passed in f7 (back-filled) |
3026 | // float a9, // passed in f13 (back-filled) |
3027 | // float a10) // passed on the stack in [OutArg+0] |
3028 | // |
3029 | // Note that if we ever support FP types with larger alignment requirements, then there could |
3030 | // be more than single register back-fills. |
3031 | // |
3032 | // Once we assign a floating-pointer register to the stack, they all must be on the stack. |
3033 | // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling |
3034 | // continues only so long as no VFP CPRC has been allocated to a slot on the stack." |
3035 | // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack |
3036 | // and prevent any additional floating-point arguments from going in registers. |
3037 | |
3038 | bool anyFloatStackArgs = false; |
3039 | |
3040 | #endif // _TARGET_ARM_ |
3041 | |
3042 | #ifdef UNIX_AMD64_ABI |
3043 | SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; |
3044 | #endif // UNIX_AMD64_ABI |
3045 | |
3046 | for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++) |
3047 | { |
3048 | assert(args->OperIsList()); |
3049 | argx = args->Current(); |
3050 | fgArgTabEntry* argEntry = nullptr; |
3051 | |
3052 | // Change the node to TYP_I_IMPL so we don't report GC info |
3053 | // NOTE: We deferred this from the importer because of the inliner. |
3054 | |
3055 | if (argx->IsVarAddr()) |
3056 | { |
3057 | argx->gtType = TYP_I_IMPL; |
3058 | } |
3059 | |
3060 | // We should never have any ArgPlaceHolder nodes at this point. |
3061 | assert(!argx->IsArgPlaceHolderNode()); |
3062 | |
3063 | // Setup any HFA information about 'argx' |
3064 | bool isHfaArg = false; |
3065 | var_types hfaType = TYP_UNDEF; |
3066 | unsigned hfaSlots = 0; |
3067 | |
3068 | bool passUsingFloatRegs; |
3069 | unsigned argAlign = 1; |
3070 | unsigned size = 0; |
3071 | CORINFO_CLASS_HANDLE copyBlkClass = nullptr; |
3072 | bool isRegArg = false; |
3073 | bool isNonStandard = false; |
3074 | regNumber nonStdRegNum = REG_NA; |
3075 | |
3076 | #ifdef FEATURE_HFA |
3077 | hfaType = GetHfaType(argx); |
3078 | isHfaArg = varTypeIsFloating(hfaType); |
3079 | |
3080 | #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) |
3081 | // Make sure for vararg methods isHfaArg is not true. |
3082 | isHfaArg = callIsVararg ? false : isHfaArg; |
3083 | #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) |
3084 | |
3085 | if (isHfaArg) |
3086 | { |
3087 | isHfaArg = true; |
3088 | hfaSlots = GetHfaCount(argx); |
3089 | |
3090 | // If we have a HFA struct it's possible we transition from a method that originally |
3091 | // only had integer types to now start having FP types. We have to communicate this |
3092 | // through this flag since LSRA later on will use this flag to determine whether |
3093 | // or not to track the FP register set. |
3094 | // |
3095 | compFloatingPointUsed = true; |
3096 | } |
3097 | #endif // FEATURE_HFA |
3098 | |
3099 | #ifdef _TARGET_ARM_ |
3100 | passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP; |
3101 | bool passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); |
3102 | |
3103 | // We don't use the "size" return value from InferOpSizeAlign(). |
3104 | codeGen->InferOpSizeAlign(argx, &argAlign); |
3105 | |
3106 | argAlign = roundUp(argAlign, TARGET_POINTER_SIZE); |
3107 | argAlign /= TARGET_POINTER_SIZE; |
3108 | |
3109 | if (argAlign == 2) |
3110 | { |
3111 | if (passUsingFloatRegs) |
3112 | { |
3113 | if (fltArgRegNum % 2 == 1) |
3114 | { |
3115 | fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); |
3116 | fltArgRegNum++; |
3117 | } |
3118 | } |
3119 | else if (passUsingIntRegs) |
3120 | { |
3121 | if (intArgRegNum % 2 == 1) |
3122 | { |
3123 | argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); |
3124 | intArgRegNum++; |
3125 | } |
3126 | } |
3127 | |
3128 | if (argSlots % 2 == 1) |
3129 | { |
3130 | argSlots++; |
3131 | } |
3132 | } |
3133 | |
3134 | #elif defined(_TARGET_ARM64_) |
3135 | |
3136 | assert(!callIsVararg || !isHfaArg); |
3137 | passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)); |
3138 | |
3139 | #elif defined(_TARGET_AMD64_) |
3140 | |
3141 | passUsingFloatRegs = varTypeIsFloating(argx); |
3142 | |
3143 | #elif defined(_TARGET_X86_) |
3144 | |
3145 | passUsingFloatRegs = false; |
3146 | |
3147 | #else |
3148 | #error Unsupported or unset target architecture |
3149 | #endif // _TARGET_* |
3150 | |
3151 | bool isBackFilled = false; |
3152 | unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use |
3153 | var_types structBaseType = TYP_STRUCT; |
3154 | unsigned structSize = 0; |
3155 | bool passStructByRef = false; |
3156 | |
3157 | bool isStructArg; |
3158 | GenTree* actualArg = argx->gtEffectiveVal(true /* Commas only */); |
3159 | |
3160 | // |
3161 | // Figure out the size of the argument. This is either in number of registers, or number of |
3162 | // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and |
3163 | // the stack. |
3164 | // |
3165 | isStructArg = varTypeIsStruct(argx); |
3166 | CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE; |
3167 | if (isStructArg) |
3168 | { |
3169 | objClass = gtGetStructHandle(argx); |
3170 | if (argx->TypeGet() == TYP_STRUCT) |
3171 | { |
3172 | // For TYP_STRUCT arguments we must have an OBJ, LCL_VAR or MKREFANY |
3173 | switch (actualArg->OperGet()) |
3174 | { |
3175 | case GT_OBJ: |
3176 | // Get the size off the OBJ node. |
3177 | structSize = actualArg->AsObj()->gtBlkSize; |
3178 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
3179 | break; |
3180 | case GT_LCL_VAR: |
3181 | structSize = lvaGetDesc(actualArg->AsLclVarCommon())->lvExactSize; |
3182 | break; |
3183 | case GT_MKREFANY: |
3184 | structSize = info.compCompHnd->getClassSize(objClass); |
3185 | break; |
3186 | default: |
3187 | BADCODE("illegal argument tree in fgInitArgInfo" ); |
3188 | break; |
3189 | } |
3190 | } |
3191 | else |
3192 | { |
3193 | structSize = genTypeSize(argx); |
3194 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
3195 | } |
3196 | } |
3197 | #if defined(_TARGET_AMD64_) |
3198 | #ifdef UNIX_AMD64_ABI |
3199 | if (!isStructArg) |
3200 | { |
3201 | size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' |
3202 | } |
3203 | else |
3204 | { |
3205 | size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; |
3206 | eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); |
3207 | } |
3208 | #else // !UNIX_AMD64_ABI |
3209 | size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' |
3210 | #endif // UNIX_AMD64_ABI |
3211 | #elif defined(_TARGET_ARM64_) |
3212 | if (isStructArg) |
3213 | { |
3214 | if (isHfaArg) |
3215 | { |
3216 | // HFA structs are passed by value in multiple registers. |
3217 | // The "size" in registers may differ the size in pointer-sized units. |
3218 | size = GetHfaCount(argx); |
3219 | } |
3220 | else |
3221 | { |
3222 | // Structs are either passed in 1 or 2 (64-bit) slots. |
3223 | // Structs that are the size of 2 pointers are passed by value in multiple registers, |
3224 | // if sufficient registers are available. |
3225 | // Structs that are larger than 2 pointers (except for HFAs) are passed by |
3226 | // reference (to a copy) |
3227 | size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; |
3228 | |
3229 | if (size > 2) |
3230 | { |
3231 | size = 1; |
3232 | } |
3233 | } |
3234 | // Note that there are some additional rules for multireg structs. |
3235 | // (i.e they cannot be split between registers and the stack) |
3236 | } |
3237 | else |
3238 | { |
3239 | size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot' |
3240 | } |
3241 | #elif defined(_TARGET_ARM_) || defined(_TARGET_X86_) |
3242 | if (isStructArg) |
3243 | { |
3244 | size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; |
3245 | } |
3246 | else |
3247 | { |
3248 | // The typical case. |
3249 | // Long/double type argument(s) will be modified as needed in Lowering. |
3250 | size = genTypeStSz(argx->gtType); |
3251 | } |
3252 | #else |
3253 | #error Unsupported or unset target architecture |
3254 | #endif // _TARGET_XXX_ |
3255 | if (isStructArg) |
3256 | { |
3257 | // We have an argument with a struct type, but it may be be a child of a GT_COMMA |
3258 | GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/); |
3259 | |
3260 | assert(args->OperIsList()); |
3261 | assert(argx == args->Current()); |
3262 | |
3263 | unsigned originalSize = structSize; |
3264 | originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize); |
3265 | unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); |
3266 | |
3267 | structSize = originalSize; |
3268 | |
3269 | structPassingKind howToPassStruct; |
3270 | |
3271 | structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, originalSize); |
3272 | |
3273 | bool passedInRegisters = false; |
3274 | passStructByRef = (howToPassStruct == SPK_ByReference); |
3275 | |
3276 | if (howToPassStruct == SPK_PrimitiveType) |
3277 | { |
3278 | // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register. |
3279 | // For ARM or AMD64/Windows only power-of-2 structs are passed in registers. |
3280 | #if !defined(_TARGET_ARM64_) && !defined(UNIX_AMD64_ABI) |
3281 | if (!isPow2(originalSize)) |
3282 | #endif // !_TARGET_ARM64_ && !UNIX_AMD64_ABI |
3283 | { |
3284 | passedInRegisters = true; |
3285 | } |
3286 | #ifdef _TARGET_ARM_ |
3287 | // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct, |
3288 | // or for a struct of two floats. This causes the struct to be address-taken. |
3289 | if (structBaseType == TYP_DOUBLE) |
3290 | { |
3291 | size = 2; |
3292 | } |
3293 | else |
3294 | #endif // _TARGET_ARM_ |
3295 | { |
3296 | size = 1; |
3297 | } |
3298 | } |
3299 | else if (passStructByRef) |
3300 | { |
3301 | size = 1; |
3302 | } |
3303 | } |
3304 | |
3305 | // The 'size' value has now must have been set. (the original value of zero is an invalid value) |
3306 | assert(size != 0); |
3307 | |
3308 | // |
3309 | // Figure out if the argument will be passed in a register. |
3310 | // |
3311 | |
3312 | if (isRegParamType(genActualType(argx->TypeGet())) |
3313 | #ifdef UNIX_AMD64_ABI |
3314 | && (!isStructArg || structDesc.passedInRegisters) |
3315 | #endif |
3316 | ) |
3317 | { |
3318 | #ifdef _TARGET_ARM_ |
3319 | if (passUsingFloatRegs) |
3320 | { |
3321 | // First, see if it can be back-filled |
3322 | if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) |
3323 | (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot? |
3324 | (size == 1)) // The size to back-fill is one float register |
3325 | { |
3326 | // Back-fill the register. |
3327 | isBackFilled = true; |
3328 | regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); |
3329 | fltArgSkippedRegMask &= |
3330 | ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask |
3331 | nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); |
3332 | assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG); |
3333 | } |
3334 | |
3335 | // Does the entire float, double, or HFA fit in the FP arg registers? |
3336 | // Check if the last register needed is still in the argument register range. |
3337 | isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG; |
3338 | |
3339 | if (!isRegArg) |
3340 | { |
3341 | anyFloatStackArgs = true; |
3342 | } |
3343 | } |
3344 | else |
3345 | { |
3346 | isRegArg = intArgRegNum < MAX_REG_ARG; |
3347 | } |
3348 | #elif defined(_TARGET_ARM64_) |
3349 | if (passUsingFloatRegs) |
3350 | { |
3351 | // Check if the last register needed is still in the fp argument register range. |
3352 | isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; |
3353 | |
3354 | // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers? |
3355 | if (isHfaArg && !isRegArg) |
3356 | { |
3357 | // recompute the 'size' so that it represent the number of stack slots rather than the number of |
3358 | // registers |
3359 | // |
3360 | unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); |
3361 | size = roundupSize / TARGET_POINTER_SIZE; |
3362 | |
3363 | // We also must update fltArgRegNum so that we no longer try to |
3364 | // allocate any new floating point registers for args |
3365 | // This prevents us from backfilling a subsequent arg into d7 |
3366 | // |
3367 | fltArgRegNum = MAX_FLOAT_REG_ARG; |
3368 | } |
3369 | } |
3370 | else |
3371 | { |
3372 | // Check if the last register needed is still in the int argument register range. |
3373 | isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; |
3374 | |
3375 | // Did we run out of registers when we had a 16-byte struct (size===2) ? |
3376 | // (i.e we only have one register remaining but we needed two registers to pass this arg) |
3377 | // This prevents us from backfilling a subsequent arg into x7 |
3378 | // |
3379 | if (!isRegArg && (size > 1)) |
3380 | { |
3381 | #if defined(_TARGET_WINDOWS_) |
3382 | // Arm64 windows native varargs allows splitting a 16 byte struct between stack |
3383 | // and the last general purpose register. |
3384 | if (callIsVararg) |
3385 | { |
3386 | // Override the decision and force a split. |
3387 | isRegArg = isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs; |
3388 | } |
3389 | else |
3390 | #endif // defined(_TARGET_WINDOWS_) |
3391 | { |
3392 | // We also must update intArgRegNum so that we no longer try to |
3393 | // allocate any new general purpose registers for args |
3394 | // |
3395 | intArgRegNum = maxRegArgs; |
3396 | } |
3397 | } |
3398 | } |
3399 | #else // not _TARGET_ARM_ or _TARGET_ARM64_ |
3400 | |
3401 | #if defined(UNIX_AMD64_ABI) |
3402 | |
3403 | // Here a struct can be passed in register following the classifications of its members and size. |
3404 | // Now make sure there are actually enough registers to do so. |
3405 | if (isStructArg) |
3406 | { |
3407 | unsigned int structFloatRegs = 0; |
3408 | unsigned int structIntRegs = 0; |
3409 | for (unsigned int i = 0; i < structDesc.eightByteCount; i++) |
3410 | { |
3411 | if (structDesc.IsIntegralSlot(i)) |
3412 | { |
3413 | structIntRegs++; |
3414 | } |
3415 | else if (structDesc.IsSseSlot(i)) |
3416 | { |
3417 | structFloatRegs++; |
3418 | } |
3419 | } |
3420 | |
3421 | isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) && |
3422 | ((intArgRegNum + structIntRegs) <= MAX_REG_ARG); |
3423 | } |
3424 | else |
3425 | { |
3426 | if (passUsingFloatRegs) |
3427 | { |
3428 | isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG; |
3429 | } |
3430 | else |
3431 | { |
3432 | isRegArg = intArgRegNum < MAX_REG_ARG; |
3433 | } |
3434 | } |
3435 | #else // !defined(UNIX_AMD64_ABI) |
3436 | isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; |
3437 | #endif // !defined(UNIX_AMD64_ABI) |
3438 | #endif // _TARGET_ARM_ |
3439 | } |
3440 | else |
3441 | { |
3442 | isRegArg = false; |
3443 | } |
3444 | |
3445 | // If there are nonstandard args (outside the calling convention) they were inserted above |
3446 | // and noted them in a table so we can recognize them here and build their argInfo. |
3447 | // |
3448 | // They should not affect the placement of any other args or stack space required. |
3449 | // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. |
3450 | isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum); |
3451 | if (isNonStandard) |
3452 | { |
3453 | isRegArg = (nonStdRegNum != REG_STK); |
3454 | } |
3455 | #if defined(_TARGET_X86_) |
3456 | else if (call->IsTailCallViaHelper()) |
3457 | { |
3458 | // We have already (before calling fgMorphArgs()) appended the 4 special args |
3459 | // required by the x86 tailcall helper. These args are required to go on the |
3460 | // stack. Force them to the stack here. |
3461 | assert(numArgs >= 4); |
3462 | if (argIndex >= numArgs - 4) |
3463 | { |
3464 | isRegArg = false; |
3465 | } |
3466 | } |
3467 | #endif // defined(_TARGET_X86_) |
3468 | |
3469 | // Now we know if the argument goes in registers or not and how big it is. |
3470 | CLANG_FORMAT_COMMENT_ANCHOR; |
3471 | |
3472 | #ifdef _TARGET_ARM_ |
3473 | // If we ever allocate a floating point argument to the stack, then all |
3474 | // subsequent HFA/float/double arguments go on the stack. |
3475 | if (!isRegArg && passUsingFloatRegs) |
3476 | { |
3477 | for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum) |
3478 | { |
3479 | fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); |
3480 | } |
3481 | } |
3482 | |
3483 | // If we think we're going to split a struct between integer registers and the stack, check to |
3484 | // see if we've already assigned a floating-point arg to the stack. |
3485 | if (isRegArg && // We decided above to use a register for the argument |
3486 | !passUsingFloatRegs && // We're using integer registers |
3487 | (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack |
3488 | anyFloatStackArgs) // We've already used the stack for a floating-point argument |
3489 | { |
3490 | isRegArg = false; // Change our mind; don't pass this struct partially in registers |
3491 | |
3492 | // Skip the rest of the integer argument registers |
3493 | for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum) |
3494 | { |
3495 | argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); |
3496 | } |
3497 | } |
3498 | #endif // _TARGET_ARM_ |
3499 | |
3500 | // Now create the fgArgTabEntry. |
3501 | fgArgTabEntry* newArgEntry; |
3502 | if (isRegArg) |
3503 | { |
3504 | regNumber nextRegNum = REG_STK; |
3505 | |
3506 | #if defined(UNIX_AMD64_ABI) |
3507 | regNumber nextOtherRegNum = REG_STK; |
3508 | unsigned int structFloatRegs = 0; |
3509 | unsigned int structIntRegs = 0; |
3510 | #endif // defined(UNIX_AMD64_ABI) |
3511 | |
3512 | if (isNonStandard) |
3513 | { |
3514 | nextRegNum = nonStdRegNum; |
3515 | } |
3516 | #if defined(UNIX_AMD64_ABI) |
3517 | else if (isStructArg && structDesc.passedInRegisters) |
3518 | { |
3519 | // It is a struct passed in registers. Assign the next available register. |
3520 | assert((structDesc.eightByteCount <= 2) && "Too many eightbytes." ); |
3521 | regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum}; |
3522 | for (unsigned int i = 0; i < structDesc.eightByteCount; i++) |
3523 | { |
3524 | if (structDesc.IsIntegralSlot(i)) |
3525 | { |
3526 | *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs); |
3527 | structIntRegs++; |
3528 | } |
3529 | else if (structDesc.IsSseSlot(i)) |
3530 | { |
3531 | *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs); |
3532 | structFloatRegs++; |
3533 | } |
3534 | } |
3535 | } |
3536 | #endif // defined(UNIX_AMD64_ABI) |
3537 | else |
3538 | { |
3539 | // fill in or update the argInfo table |
3540 | nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) |
3541 | : genMapIntRegArgNumToRegNum(intArgRegNum); |
3542 | } |
3543 | |
3544 | #ifdef _TARGET_AMD64_ |
3545 | #ifndef UNIX_AMD64_ABI |
3546 | assert(size == 1); |
3547 | #endif |
3548 | #endif |
3549 | |
3550 | // This is a register argument - put it in the table |
3551 | newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign, isStructArg, |
3552 | callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) |
3553 | UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); |
3554 | |
3555 | newArgEntry->SetIsBackFilled(isBackFilled); |
3556 | newArgEntry->isNonStandard = isNonStandard; |
3557 | |
3558 | // Set up the next intArgRegNum and fltArgRegNum values. |
3559 | if (!isBackFilled) |
3560 | { |
3561 | #if defined(UNIX_AMD64_ABI) |
3562 | if (isStructArg) |
3563 | { |
3564 | // For this case, we've already set the regNums in the argTabEntry |
3565 | intArgRegNum += structIntRegs; |
3566 | fltArgRegNum += structFloatRegs; |
3567 | } |
3568 | else |
3569 | #endif // defined(UNIX_AMD64_ABI) |
3570 | { |
3571 | if (!isNonStandard) |
3572 | { |
3573 | #if FEATURE_ARG_SPLIT |
3574 | // Check for a split (partially enregistered) struct |
3575 | if (!passUsingFloatRegs && ((intArgRegNum + size) > MAX_REG_ARG)) |
3576 | { |
3577 | // This indicates a partial enregistration of a struct type |
3578 | assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() || |
3579 | (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); |
3580 | unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum; |
3581 | assert((unsigned char)numRegsPartial == numRegsPartial); |
3582 | call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); |
3583 | } |
3584 | #endif // FEATURE_ARG_SPLIT |
3585 | |
3586 | if (passUsingFloatRegs) |
3587 | { |
3588 | fltArgRegNum += size; |
3589 | |
3590 | #ifdef WINDOWS_AMD64_ABI |
3591 | // Whenever we pass an integer register argument |
3592 | // we skip the corresponding floating point register argument |
3593 | intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); |
3594 | #endif // WINDOWS_AMD64_ABI |
3595 | // No supported architecture supports partial structs using float registers. |
3596 | assert(fltArgRegNum <= MAX_FLOAT_REG_ARG); |
3597 | } |
3598 | else |
3599 | { |
3600 | // Increment intArgRegNum by 'size' registers |
3601 | intArgRegNum += size; |
3602 | |
3603 | #ifdef WINDOWS_AMD64_ABI |
3604 | fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); |
3605 | #endif // WINDOWS_AMD64_ABI |
3606 | } |
3607 | } |
3608 | } |
3609 | } |
3610 | } |
3611 | else // We have an argument that is not passed in a register |
3612 | { |
3613 | // This is a stack argument - put it in the table |
3614 | newArgEntry = call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg, callIsVararg); |
3615 | #ifdef UNIX_AMD64_ABI |
3616 | // TODO-Amd64-Unix-CQ: This is temporary (see also in fgMorphArgs). |
3617 | if (structDesc.passedInRegisters) |
3618 | { |
3619 | newArgEntry->structDesc.CopyFrom(structDesc); |
3620 | } |
3621 | #endif |
3622 | } |
3623 | |
3624 | #ifdef FEATURE_HFA |
3625 | if (isHfaArg) |
3626 | { |
3627 | newArgEntry->setHfaType(hfaType, hfaSlots); |
3628 | } |
3629 | #endif // FEATURE_HFA |
3630 | newArgEntry->SetMultiRegNums(); |
3631 | |
3632 | noway_assert(newArgEntry != nullptr); |
3633 | if (newArgEntry->isStruct) |
3634 | { |
3635 | newArgEntry->passedByRef = passStructByRef; |
3636 | newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; |
3637 | } |
3638 | else |
3639 | { |
3640 | newArgEntry->argType = argx->TypeGet(); |
3641 | } |
3642 | |
3643 | argSlots += size; |
3644 | } // end foreach argument loop |
3645 | |
3646 | #ifdef DEBUG |
3647 | if (verbose) |
3648 | { |
3649 | call->fgArgInfo->Dump(this); |
3650 | JITDUMP("\n" ); |
3651 | } |
3652 | #endif |
3653 | } |
3654 | |
3655 | //------------------------------------------------------------------------ |
3656 | // fgMorphArgs: Walk and transform (morph) the arguments of a call |
3657 | // |
3658 | // Arguments: |
3659 | // callNode - the call for which we are doing the argument morphing |
3660 | // |
3661 | // Return Value: |
3662 | // Like most morph methods, this method returns the morphed node, |
3663 | // though in this case there are currently no scenarios where the |
3664 | // node itself is re-created. |
3665 | // |
3666 | // Notes: |
3667 | // This calls fgInitArgInfo to create the 'fgArgInfo' for the call. |
3668 | // If it has already been created, that method will simply return. |
3669 | // |
3670 | // This method changes the state of the call node. It uses the existence |
3671 | // of gtCallLateArgs (the late arguments list) to determine if it has |
3672 | // already done the first round of morphing. |
3673 | // |
3674 | // The first time it is called (i.e. during global morphing), this method |
3675 | // computes the "late arguments". This is when it determines which arguments |
3676 | // need to be evaluated to temps prior to the main argument setup, and which |
3677 | // can be directly evaluated into the argument location. It also creates a |
3678 | // second argument list (gtCallLateArgs) that does the final placement of the |
3679 | // arguments, e.g. into registers or onto the stack. |
3680 | // |
3681 | // The "non-late arguments", aka the gtCallArgs, are doing the in-order |
3682 | // evaluation of the arguments that might have side-effects, such as embedded |
3683 | // assignments, calls or possible throws. In these cases, it and earlier |
3684 | // arguments must be evaluated to temps. |
3685 | // |
3686 | // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS), |
3687 | // if we have any nested calls, we need to defer the copying of the argument |
3688 | // into the fixed argument area until after the call. If the argument did not |
3689 | // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and |
3690 | // replaced in the "early" arg list (gtCallArgs) with a placeholder node. |
3691 | |
3692 | #ifdef _PREFAST_ |
3693 | #pragma warning(push) |
3694 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
3695 | #endif |
3696 | GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) |
3697 | { |
3698 | GenTree* args; |
3699 | GenTree* argx; |
3700 | |
3701 | unsigned flagsSummary = 0; |
3702 | |
3703 | unsigned argIndex = 0; |
3704 | unsigned argSlots = 0; |
3705 | |
3706 | bool reMorphing = call->AreArgsComplete(); |
3707 | |
3708 | // Set up the fgArgInfo. |
3709 | fgInitArgInfo(call); |
3710 | unsigned numArgs = call->fgArgInfo->ArgCount(); |
3711 | JITDUMP("%sMorphing args for %d.%s:\n" , (reMorphing) ? "Re" : "" , call->gtTreeID, GenTree::OpName(call->gtOper)); |
3712 | |
3713 | // If we are remorphing, process the late arguments (which were determined by a previous caller). |
3714 | if (reMorphing) |
3715 | { |
3716 | // We need to reMorph the gtCallLateArgs early since that is what triggers |
3717 | // the expression folding and we need to have the final folded gtCallLateArgs |
3718 | // available when we call UpdateRegArg so that we correctly update the fgArgInfo |
3719 | // with the folded tree that represents the final optimized argument nodes. |
3720 | // |
3721 | if (call->gtCallLateArgs != nullptr) |
3722 | { |
3723 | |
3724 | call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList(); |
3725 | flagsSummary |= call->gtCallLateArgs->gtFlags; |
3726 | } |
3727 | assert(call->fgArgInfo != nullptr); |
3728 | } |
3729 | call->fgArgInfo->RemorphReset(); |
3730 | |
3731 | // First we morph the argument subtrees ('this' pointer, arguments, etc.). |
3732 | // During the first call to fgMorphArgs we also record the |
3733 | // information about late arguments we have in 'fgArgInfo'. |
3734 | // This information is used later to contruct the gtCallLateArgs */ |
3735 | |
3736 | // Process the 'this' argument value, if present. |
3737 | argx = call->gtCallObjp; |
3738 | if (argx) |
3739 | { |
3740 | fgArgTabEntry* thisArgEntry = call->fgArgInfo->GetArgEntry(0, reMorphing); |
3741 | argx = fgMorphTree(argx); |
3742 | call->gtCallObjp = argx; |
3743 | // This is a register argument - possibly update it in the table. |
3744 | call->fgArgInfo->UpdateRegArg(thisArgEntry, argx, reMorphing); |
3745 | flagsSummary |= argx->gtFlags; |
3746 | assert(argIndex == 0); |
3747 | argIndex++; |
3748 | argSlots++; |
3749 | } |
3750 | |
3751 | // Note that this name is a bit of a misnomer - it indicates that there are struct args |
3752 | // that occupy more than a single slot that are passed by value (not necessarily in regs). |
3753 | bool hasMultiregStructArgs = false; |
3754 | for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++) |
3755 | { |
3756 | GenTree** parentArgx = &args->gtOp.gtOp1; |
3757 | fgArgTabEntry* argEntry = call->fgArgInfo->GetArgEntry(argIndex, reMorphing); |
3758 | |
3759 | // Morph the arg node, and update the parent and argEntry pointers. |
3760 | argx = *parentArgx; |
3761 | argx = fgMorphTree(argx); |
3762 | *parentArgx = argx; |
3763 | assert(args->OperIsList()); |
3764 | assert(argx == args->Current()); |
3765 | |
3766 | unsigned argAlign = argEntry->alignment; |
3767 | unsigned size = argEntry->getSize(); |
3768 | CORINFO_CLASS_HANDLE copyBlkClass = NO_CLASS_HANDLE; |
3769 | |
3770 | if (argAlign == 2) |
3771 | { |
3772 | if (argSlots % 2 == 1) |
3773 | { |
3774 | argSlots++; |
3775 | } |
3776 | } |
3777 | if (argEntry->isNonStandard) |
3778 | { |
3779 | // We need to update the node field for this nonStandard arg here |
3780 | // as it may have been changed by the call to fgMorphTree. |
3781 | call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); |
3782 | flagsSummary |= argx->gtFlags; |
3783 | continue; |
3784 | } |
3785 | |
3786 | assert(size != 0); |
3787 | argSlots += argEntry->getSlotCount(); |
3788 | |
3789 | // lclVar address should have been retyped to TYP_I_IMPL. |
3790 | assert(!argx->IsVarAddr() || (argx->gtType = TYP_I_IMPL)); |
3791 | |
3792 | // Get information about this argument. |
3793 | var_types hfaType = argEntry->hfaType; |
3794 | bool isHfaArg = (hfaType != TYP_UNDEF); |
3795 | bool isHfaRegArg = argEntry->isHfaRegArg; |
3796 | unsigned hfaSlots = argEntry->numRegs; |
3797 | bool passUsingFloatRegs = argEntry->isPassedInFloatRegisters(); |
3798 | bool isBackFilled = argEntry->IsBackFilled(); |
3799 | unsigned structSize = 0; |
3800 | |
3801 | // Struct arguments may be morphed into a node that is not a struct type. |
3802 | // In such case the fgArgTabEntry keeps track of whether the original node (before morphing) |
3803 | // was a struct and the struct classification. |
3804 | bool isStructArg = argEntry->isStruct; |
3805 | |
3806 | GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/); |
3807 | if (isStructArg && varTypeIsStruct(argObj) && !argObj->OperIs(GT_ASG, GT_MKREFANY, GT_FIELD_LIST, GT_ARGPLACE)) |
3808 | { |
3809 | CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(argObj); |
3810 | unsigned originalSize; |
3811 | if (argObj->TypeGet() == TYP_STRUCT) |
3812 | { |
3813 | if (argObj->OperIs(GT_OBJ)) |
3814 | { |
3815 | // Get the size off the OBJ node. |
3816 | originalSize = argObj->AsObj()->gtBlkSize; |
3817 | assert(originalSize == info.compCompHnd->getClassSize(objClass)); |
3818 | } |
3819 | else |
3820 | { |
3821 | // We have a BADCODE assert for this in fgInitArgInfo. |
3822 | assert(argObj->OperIs(GT_LCL_VAR)); |
3823 | originalSize = lvaGetDesc(argObj->AsLclVarCommon())->lvExactSize; |
3824 | } |
3825 | } |
3826 | else |
3827 | { |
3828 | originalSize = genTypeSize(argx); |
3829 | assert(originalSize == info.compCompHnd->getClassSize(objClass)); |
3830 | } |
3831 | unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); |
3832 | var_types structBaseType = argEntry->argType; |
3833 | |
3834 | #ifndef _TARGET_X86_ |
3835 | // First, handle the case where the argument is passed by reference. |
3836 | if (argEntry->passedByRef) |
3837 | { |
3838 | assert(size == 1); |
3839 | copyBlkClass = objClass; |
3840 | #ifdef UNIX_AMD64_ABI |
3841 | assert(!"Structs are not passed by reference on x64/ux" ); |
3842 | #endif // UNIX_AMD64_ABI |
3843 | } |
3844 | else |
3845 | { |
3846 | // This is passed by value. |
3847 | // Check to see if we can transform this into load of a primitive type. |
3848 | // 'size' must be the number of pointer sized items |
3849 | assert(size == roundupSize / TARGET_POINTER_SIZE); |
3850 | |
3851 | structSize = originalSize; |
3852 | unsigned passingSize = originalSize; |
3853 | |
3854 | // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size. |
3855 | // When it can do this is platform-dependent: |
3856 | // - In general, it can be done for power of 2 structs that fit in a single register. |
3857 | // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field. |
3858 | // - This is irrelevant for X86, since structs are always passed by value on the stack. |
3859 | |
3860 | GenTree** parentOfArgObj = parentArgx; |
3861 | GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj); |
3862 | bool canTransform = false; |
3863 | |
3864 | if (structBaseType != TYP_STRUCT) |
3865 | { |
3866 | if (isPow2(passingSize)) |
3867 | { |
3868 | canTransform = true; |
3869 | } |
3870 | |
3871 | #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) |
3872 | // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can |
3873 | // only transform in that case if the arg is a local. |
3874 | // TODO-CQ: This transformation should be applicable in general, not just for the ARM64 |
3875 | // or UNIX_AMD64_ABI cases where they will be passed in registers. |
3876 | else |
3877 | { |
3878 | canTransform = (lclVar != nullptr); |
3879 | passingSize = genTypeSize(structBaseType); |
3880 | } |
3881 | #endif // _TARGET_ARM64_ || UNIX_AMD64_ABI |
3882 | } |
3883 | |
3884 | if (!canTransform) |
3885 | { |
3886 | #if defined(_TARGET_AMD64_) |
3887 | #ifndef UNIX_AMD64_ABI |
3888 | // On Windows structs are always copied and passed by reference (handled above) unless they are |
3889 | // passed by value in a single register. |
3890 | assert(size == 1); |
3891 | copyBlkClass = objClass; |
3892 | #else // UNIX_AMD64_ABI |
3893 | // On Unix, structs are always passed by value. |
3894 | // We only need a copy if we have one of the following: |
3895 | // - We have a lclVar that has been promoted and is passed in registers. |
3896 | // - The sizes don't match for a non-lclVar argument. |
3897 | // - We have a known struct type (e.g. SIMD) that requires multiple registers. |
3898 | // TODO-Amd64-Unix-CQ: The first case could and should be handled without copies. |
3899 | // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not |
3900 | // actually passed in registers. |
3901 | if (argEntry->isPassedInRegisters()) |
3902 | { |
3903 | assert(argEntry->structDesc.passedInRegisters); |
3904 | if (lclVar != nullptr) |
3905 | { |
3906 | if (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT) |
3907 | { |
3908 | copyBlkClass = objClass; |
3909 | } |
3910 | } |
3911 | else if (argObj->OperIs(GT_OBJ)) |
3912 | { |
3913 | if (passingSize != structSize) |
3914 | { |
3915 | copyBlkClass = objClass; |
3916 | } |
3917 | } |
3918 | else |
3919 | { |
3920 | // This should only be the case of a value directly producing a known struct type. |
3921 | assert(argObj->TypeGet() != TYP_STRUCT); |
3922 | if (argEntry->numRegs > 1) |
3923 | { |
3924 | copyBlkClass = objClass; |
3925 | } |
3926 | } |
3927 | } |
3928 | #endif // UNIX_AMD64_ABI |
3929 | #elif defined(_TARGET_ARM64_) |
3930 | if ((passingSize != structSize) && (lclVar == nullptr)) |
3931 | { |
3932 | copyBlkClass = objClass; |
3933 | } |
3934 | #endif |
3935 | |
3936 | #ifdef _TARGET_ARM_ |
3937 | // TODO-1stClassStructs: Unify these conditions across targets. |
3938 | if (((lclVar != nullptr) && |
3939 | (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)) || |
3940 | ((argObj->OperIs(GT_OBJ)) && (passingSize != structSize))) |
3941 | { |
3942 | copyBlkClass = objClass; |
3943 | } |
3944 | |
3945 | if (structSize < TARGET_POINTER_SIZE) |
3946 | { |
3947 | copyBlkClass = objClass; |
3948 | } |
3949 | #endif // _TARGET_ARM_ |
3950 | } |
3951 | else |
3952 | { |
3953 | // We have a struct argument that's less than pointer size, and it is either a power of 2, |
3954 | // or a local. |
3955 | // Change our GT_OBJ into a GT_IND of the correct type. |
3956 | CLANG_FORMAT_COMMENT_ANCHOR; |
3957 | |
3958 | #ifdef _TARGET_ARM_ |
3959 | assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2))); |
3960 | #else |
3961 | assert(size == 1); |
3962 | #endif |
3963 | |
3964 | assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize)); |
3965 | |
3966 | if (argObj->OperIs(GT_OBJ)) |
3967 | { |
3968 | argObj->ChangeOper(GT_IND); |
3969 | |
3970 | // Now see if we can fold *(&X) into X |
3971 | if (argObj->gtOp.gtOp1->gtOper == GT_ADDR) |
3972 | { |
3973 | GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1; |
3974 | |
3975 | // Keep the DONT_CSE flag in sync |
3976 | // (as the addr always marks it for its op1) |
3977 | temp->gtFlags &= ~GTF_DONT_CSE; |
3978 | temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE); |
3979 | DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR |
3980 | DEBUG_DESTROY_NODE(argObj); // GT_IND |
3981 | |
3982 | argObj = temp; |
3983 | *parentOfArgObj = temp; |
3984 | |
3985 | // If the OBJ had been the top level node, we've now changed argx. |
3986 | if (parentOfArgObj == parentArgx) |
3987 | { |
3988 | argx = temp; |
3989 | } |
3990 | } |
3991 | } |
3992 | if (argObj->gtOper == GT_LCL_VAR) |
3993 | { |
3994 | unsigned lclNum = argObj->gtLclVarCommon.gtLclNum; |
3995 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
3996 | |
3997 | if (varDsc->lvPromoted) |
3998 | { |
3999 | if (varDsc->lvFieldCnt == 1) |
4000 | { |
4001 | // get the first and only promoted field |
4002 | LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; |
4003 | if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize) |
4004 | { |
4005 | // we will use the first and only promoted field |
4006 | argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart); |
4007 | |
4008 | if (varTypeCanReg(fieldVarDsc->TypeGet()) && |
4009 | (genTypeSize(fieldVarDsc->TypeGet()) == originalSize)) |
4010 | { |
4011 | // Just use the existing field's type |
4012 | argObj->gtType = fieldVarDsc->TypeGet(); |
4013 | } |
4014 | else |
4015 | { |
4016 | // Can't use the existing field's type, so use GT_LCL_FLD to swizzle |
4017 | // to a new type |
4018 | argObj->ChangeOper(GT_LCL_FLD); |
4019 | argObj->gtType = structBaseType; |
4020 | } |
4021 | assert(varTypeCanReg(argObj->TypeGet())); |
4022 | assert(copyBlkClass == NO_CLASS_HANDLE); |
4023 | } |
4024 | else |
4025 | { |
4026 | // use GT_LCL_FLD to swizzle the single field struct to a new type |
4027 | lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); |
4028 | argObj->ChangeOper(GT_LCL_FLD); |
4029 | argObj->gtType = structBaseType; |
4030 | } |
4031 | } |
4032 | else |
4033 | { |
4034 | // The struct fits into a single register, but it has been promoted into its |
4035 | // constituent fields, and so we have to re-assemble it |
4036 | copyBlkClass = objClass; |
4037 | } |
4038 | } |
4039 | else if (!varTypeIsIntegralOrI(varDsc->TypeGet())) |
4040 | { |
4041 | // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD |
4042 | argObj->ChangeOper(GT_LCL_FLD); |
4043 | argObj->gtType = structBaseType; |
4044 | } |
4045 | } |
4046 | else |
4047 | { |
4048 | // Not a GT_LCL_VAR, so we can just change the type on the node |
4049 | argObj->gtType = structBaseType; |
4050 | } |
4051 | assert(varTypeCanReg(argObj->TypeGet()) || |
4052 | ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType))); |
4053 | |
4054 | size = 1; |
4055 | } |
4056 | |
4057 | #ifndef UNIX_AMD64_ABI |
4058 | // We still have a struct unless we converted the GT_OBJ into a GT_IND above... |
4059 | if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef) |
4060 | { |
4061 | if (isHfaArg && passUsingFloatRegs) |
4062 | { |
4063 | size = argEntry->numRegs; |
4064 | } |
4065 | else |
4066 | { |
4067 | // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, |
4068 | // we must copyblk to a temp before doing the obj to avoid |
4069 | // the obj reading memory past the end of the valuetype |
4070 | CLANG_FORMAT_COMMENT_ANCHOR; |
4071 | |
4072 | if (roundupSize > originalSize) |
4073 | { |
4074 | copyBlkClass = objClass; |
4075 | |
4076 | // There are a few special cases where we can omit using a CopyBlk |
4077 | // where we normally would need to use one. |
4078 | |
4079 | if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar? |
4080 | { |
4081 | copyBlkClass = NO_CLASS_HANDLE; |
4082 | } |
4083 | } |
4084 | |
4085 | size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items |
4086 | } |
4087 | } |
4088 | #endif // !UNIX_AMD64_ABI |
4089 | } |
4090 | #endif // !_TARGET_X86_ |
4091 | } |
4092 | |
4093 | if (argEntry->isPassedInRegisters()) |
4094 | { |
4095 | call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); |
4096 | } |
4097 | else |
4098 | { |
4099 | call->fgArgInfo->UpdateStkArg(argEntry, argx, reMorphing); |
4100 | } |
4101 | |
4102 | if (copyBlkClass != NO_CLASS_HANDLE) |
4103 | { |
4104 | fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); |
4105 | } |
4106 | |
4107 | if (argx->gtOper == GT_MKREFANY) |
4108 | { |
4109 | // 'Lower' the MKREFANY tree and insert it. |
4110 | noway_assert(!reMorphing); |
4111 | |
4112 | #ifdef _TARGET_X86_ |
4113 | |
4114 | // Build the mkrefany as a GT_FIELD_LIST |
4115 | GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) |
4116 | GenTreeFieldList(argx->gtOp.gtOp1, OFFSETOF__CORINFO_TypedReference__dataPtr, TYP_BYREF, nullptr); |
4117 | (void)new (this, GT_FIELD_LIST) |
4118 | GenTreeFieldList(argx->gtOp.gtOp2, OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL, fieldList); |
4119 | fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx); |
4120 | fp->node = fieldList; |
4121 | args->gtOp.gtOp1 = fieldList; |
4122 | |
4123 | #else // !_TARGET_X86_ |
4124 | |
4125 | // Get a new temp |
4126 | // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany |
4127 | unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument" )); |
4128 | lvaSetStruct(tmp, impGetRefAnyClass(), false); |
4129 | |
4130 | // Build the mkrefany as a comma node: |
4131 | // (tmp.ptr=argx),(tmp.type=handle) |
4132 | GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__dataPtr); |
4133 | GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__type); |
4134 | destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField()); |
4135 | destPtrSlot->gtFlags |= GTF_VAR_DEF; |
4136 | destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField()); |
4137 | destTypeSlot->gtFlags |= GTF_VAR_DEF; |
4138 | |
4139 | GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1); |
4140 | GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2); |
4141 | GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot); |
4142 | |
4143 | // Change the expression to "(tmp=val)" |
4144 | args->gtOp.gtOp1 = asg; |
4145 | |
4146 | // EvalArgsToTemps will cause tmp to actually get loaded as the argument |
4147 | call->fgArgInfo->EvalToTmp(argEntry, tmp, asg); |
4148 | lvaSetVarAddrExposed(tmp); |
4149 | #endif // !_TARGET_X86_ |
4150 | } |
4151 | |
4152 | #if FEATURE_MULTIREG_ARGS |
4153 | if (isStructArg) |
4154 | { |
4155 | if (size > 1 || isHfaArg) |
4156 | { |
4157 | hasMultiregStructArgs = true; |
4158 | } |
4159 | } |
4160 | #ifdef _TARGET_ARM_ |
4161 | else if ((argEntry->argType == TYP_LONG) || (argEntry->argType == TYP_DOUBLE)) |
4162 | { |
4163 | assert((argEntry->numRegs == 2) || (argEntry->numSlots == 2)); |
4164 | } |
4165 | #endif |
4166 | else |
4167 | { |
4168 | // We must have exactly one register or slot. |
4169 | assert(((argEntry->numRegs == 1) && (argEntry->numSlots == 0)) || |
4170 | ((argEntry->numRegs == 0) && (argEntry->numSlots == 1))); |
4171 | } |
4172 | #endif |
4173 | |
4174 | #if defined(_TARGET_X86_) |
4175 | if (isStructArg) |
4176 | { |
4177 | GenTree* lclNode = argx->OperIs(GT_LCL_VAR) ? argx : fgIsIndirOfAddrOfLocal(argx); |
4178 | if ((lclNode != nullptr) && |
4179 | (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT)) |
4180 | { |
4181 | // Make a GT_FIELD_LIST of the field lclVars. |
4182 | GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon(); |
4183 | LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]); |
4184 | GenTreeFieldList* fieldList = nullptr; |
4185 | for (unsigned fieldLclNum = varDsc->lvFieldLclStart; |
4186 | fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum) |
4187 | { |
4188 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
4189 | if (fieldList == nullptr) |
4190 | { |
4191 | lcl->SetLclNum(fieldLclNum); |
4192 | lcl->ChangeOper(GT_LCL_VAR); |
4193 | lcl->gtType = fieldVarDsc->lvType; |
4194 | fieldList = new (this, GT_FIELD_LIST) |
4195 | GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr); |
4196 | fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx); |
4197 | fp->node = fieldList; |
4198 | args->gtOp.gtOp1 = fieldList; |
4199 | } |
4200 | else |
4201 | { |
4202 | GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType); |
4203 | fieldList = new (this, GT_FIELD_LIST) |
4204 | GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList); |
4205 | } |
4206 | } |
4207 | } |
4208 | } |
4209 | #endif // _TARGET_X86_ |
4210 | |
4211 | flagsSummary |= args->Current()->gtFlags; |
4212 | |
4213 | } // end foreach argument loop |
4214 | |
4215 | if (!reMorphing) |
4216 | { |
4217 | call->fgArgInfo->ArgsComplete(); |
4218 | } |
4219 | |
4220 | if (call->gtCallArgs) |
4221 | { |
4222 | UpdateGT_LISTFlags(call->gtCallArgs); |
4223 | } |
4224 | |
4225 | /* Process the function address, if indirect call */ |
4226 | |
4227 | if (call->gtCallType == CT_INDIRECT) |
4228 | { |
4229 | call->gtCallAddr = fgMorphTree(call->gtCallAddr); |
4230 | } |
4231 | |
4232 | #if FEATURE_FIXED_OUT_ARGS |
4233 | |
4234 | // Record the outgoing argument size. If the call is a fast tail |
4235 | // call, it will setup its arguments in incoming arg area instead |
4236 | // of the out-going arg area, so we don't need to track the |
4237 | // outgoing arg size. |
4238 | if (!call->IsFastTailCall()) |
4239 | { |
4240 | unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum(); |
4241 | |
4242 | #if defined(UNIX_AMD64_ABI) |
4243 | // This is currently required for the UNIX ABI to work correctly. |
4244 | opts.compNeedToAlignFrame = true; |
4245 | #endif // UNIX_AMD64_ABI |
4246 | |
4247 | const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; |
4248 | call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL)); |
4249 | |
4250 | #ifdef DEBUG |
4251 | if (verbose) |
4252 | { |
4253 | printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n" , argSlots, |
4254 | preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize); |
4255 | } |
4256 | #endif |
4257 | } |
4258 | #endif // FEATURE_FIXED_OUT_ARGS |
4259 | |
4260 | // Clear the ASG and EXCEPT (if possible) flags on the call node |
4261 | call->gtFlags &= ~GTF_ASG; |
4262 | if (!call->OperMayThrow(this)) |
4263 | { |
4264 | call->gtFlags &= ~GTF_EXCEPT; |
4265 | } |
4266 | |
4267 | // Union in the side effect flags from the call's operands |
4268 | call->gtFlags |= flagsSummary & GTF_ALL_EFFECT; |
4269 | |
4270 | // If the register arguments have already been determined |
4271 | // or we have no register arguments then we don't need to |
4272 | // call SortArgs() and EvalArgsToTemps() |
4273 | // |
4274 | // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch |
4275 | // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy |
4276 | // is added to make sure to call EvalArgsToTemp. |
4277 | if (!reMorphing && (call->fgArgInfo->HasRegArgs())) |
4278 | { |
4279 | // This is the first time that we morph this call AND it has register arguments. |
4280 | // Follow into the code below and do the 'defer or eval to temp' analysis. |
4281 | |
4282 | call->fgArgInfo->SortArgs(); |
4283 | |
4284 | call->fgArgInfo->EvalArgsToTemps(); |
4285 | |
4286 | // We may have updated the arguments |
4287 | if (call->gtCallArgs) |
4288 | { |
4289 | UpdateGT_LISTFlags(call->gtCallArgs); |
4290 | } |
4291 | } |
4292 | |
4293 | if (hasMultiregStructArgs) |
4294 | { |
4295 | fgMorphMultiregStructArgs(call); |
4296 | } |
4297 | |
4298 | #ifdef DEBUG |
4299 | if (verbose) |
4300 | { |
4301 | call->fgArgInfo->Dump(this); |
4302 | JITDUMP("\n" ); |
4303 | } |
4304 | #endif |
4305 | return call; |
4306 | } |
4307 | #ifdef _PREFAST_ |
4308 | #pragma warning(pop) |
4309 | #endif |
4310 | |
4311 | //----------------------------------------------------------------------------- |
4312 | // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and |
4313 | // call fgMorphMultiregStructArg on each of them. |
4314 | // |
4315 | // Arguments: |
4316 | // call : a GenTreeCall node that has one or more TYP_STRUCT arguments\ |
4317 | // |
4318 | // Notes: |
4319 | // We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types. |
4320 | // It will ensure that the struct arguments are in the correct form. |
4321 | // If this method fails to find any TYP_STRUCT arguments it will assert. |
4322 | // |
4323 | void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) |
4324 | { |
4325 | bool foundStructArg = false; |
4326 | unsigned initialFlags = call->gtFlags; |
4327 | unsigned flagsSummary = 0; |
4328 | fgArgInfo* allArgInfo = call->fgArgInfo; |
4329 | |
4330 | #ifdef _TARGET_X86_ |
4331 | assert(!"Logic error: no MultiregStructArgs for X86" ); |
4332 | #endif |
4333 | #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) |
4334 | assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI" ); |
4335 | #endif |
4336 | |
4337 | for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2) |
4338 | { |
4339 | // For late arguments the arg tree that is overridden is in the gtCallLateArgs list. |
4340 | // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.) |
4341 | // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping |
4342 | // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself, |
4343 | // otherwise points to the list in the late args list. |
4344 | bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; |
4345 | fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); |
4346 | assert(fgEntryPtr != nullptr); |
4347 | GenTree* argx = fgEntryPtr->node; |
4348 | GenTree* lateList = nullptr; |
4349 | GenTree* lateNode = nullptr; |
4350 | |
4351 | if (isLateArg) |
4352 | { |
4353 | for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext()) |
4354 | { |
4355 | assert(list->OperIsList()); |
4356 | |
4357 | GenTree* argNode = list->Current(); |
4358 | if (argx == argNode) |
4359 | { |
4360 | lateList = list; |
4361 | lateNode = argNode; |
4362 | break; |
4363 | } |
4364 | } |
4365 | assert(lateList != nullptr && lateNode != nullptr); |
4366 | } |
4367 | |
4368 | GenTree* arg = argx; |
4369 | |
4370 | if (!fgEntryPtr->isStruct) |
4371 | { |
4372 | continue; |
4373 | } |
4374 | |
4375 | unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots); |
4376 | if ((size > 1) || fgEntryPtr->isHfaArg) |
4377 | { |
4378 | foundStructArg = true; |
4379 | if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) |
4380 | { |
4381 | arg = fgMorphMultiregStructArg(arg, fgEntryPtr); |
4382 | |
4383 | // Did we replace 'argx' with a new tree? |
4384 | if (arg != argx) |
4385 | { |
4386 | fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node |
4387 | |
4388 | // link the new arg node into either the late arg list or the gtCallArgs list |
4389 | if (isLateArg) |
4390 | { |
4391 | lateList->gtOp.gtOp1 = arg; |
4392 | } |
4393 | else |
4394 | { |
4395 | args->gtOp.gtOp1 = arg; |
4396 | } |
4397 | } |
4398 | } |
4399 | } |
4400 | } |
4401 | |
4402 | // We should only call this method when we actually have one or more multireg struct args |
4403 | assert(foundStructArg); |
4404 | |
4405 | // Update the flags |
4406 | call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); |
4407 | } |
4408 | |
4409 | //----------------------------------------------------------------------------- |
4410 | // fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list, |
4411 | // morph the argument as needed to be passed correctly. |
4412 | // |
4413 | // Arguments: |
4414 | // arg - A GenTree node containing a TYP_STRUCT arg |
4415 | // fgEntryPtr - the fgArgTabEntry information for the current 'arg' |
4416 | // |
4417 | // Notes: |
4418 | // The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT. |
4419 | // If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the |
4420 | // stack are marked as doNotEnregister, and then we return. |
4421 | // |
4422 | // If it is passed by register, we mutate the argument into the GT_FIELD_LIST form |
4423 | // which is only used for struct arguments. |
4424 | // |
4425 | // If arg is a LclVar we check if it is struct promoted and has the right number of fields |
4426 | // and if they are at the appropriate offsets we will use the struct promted fields |
4427 | // in the GT_FIELD_LIST nodes that we create. |
4428 | // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements |
4429 | // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct |
4430 | // this also forces the struct to be stack allocated into the local frame. |
4431 | // For the GT_OBJ case will clone the address expression and generate two (or more) |
4432 | // indirections. |
4433 | // Currently the implementation handles ARM64/ARM and will NYI for other architectures. |
4434 | // |
4435 | GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr) |
4436 | { |
4437 | assert(varTypeIsStruct(arg->TypeGet())); |
4438 | |
4439 | #if !defined(_TARGET_ARMARCH_) && !defined(UNIX_AMD64_ABI) |
4440 | NYI("fgMorphMultiregStructArg requires implementation for this target" ); |
4441 | #endif |
4442 | |
4443 | #ifdef _TARGET_ARM_ |
4444 | if ((fgEntryPtr->isSplit && fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4) || |
4445 | (!fgEntryPtr->isSplit && fgEntryPtr->regNum == REG_STK)) |
4446 | #else |
4447 | if (fgEntryPtr->regNum == REG_STK) |
4448 | #endif |
4449 | { |
4450 | GenTreeLclVarCommon* lcl = nullptr; |
4451 | GenTree* actualArg = arg->gtEffectiveVal(); |
4452 | |
4453 | if (actualArg->OperGet() == GT_OBJ) |
4454 | { |
4455 | if (actualArg->gtGetOp1()->OperIs(GT_ADDR) && actualArg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR)) |
4456 | { |
4457 | lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVarCommon(); |
4458 | } |
4459 | } |
4460 | else if (actualArg->OperGet() == GT_LCL_VAR) |
4461 | { |
4462 | lcl = actualArg->AsLclVarCommon(); |
4463 | } |
4464 | if (lcl != nullptr) |
4465 | { |
4466 | if (lvaGetPromotionType(lcl->gtLclNum) == PROMOTION_TYPE_INDEPENDENT) |
4467 | { |
4468 | arg = fgMorphLclArgToFieldlist(lcl); |
4469 | } |
4470 | else if (arg->TypeGet() == TYP_STRUCT) |
4471 | { |
4472 | // If this is a non-register struct, it must be referenced from memory. |
4473 | if (!actualArg->OperIs(GT_OBJ)) |
4474 | { |
4475 | // Create an Obj of the temp to use it as a call argument. |
4476 | arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); |
4477 | arg = gtNewObjNode(lvaGetStruct(lcl->gtLclNum), arg); |
4478 | } |
4479 | // Its fields will need to be accessed by address. |
4480 | lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUG_ARG(DNER_IsStructArg)); |
4481 | } |
4482 | } |
4483 | |
4484 | return arg; |
4485 | } |
4486 | |
4487 | #if FEATURE_MULTIREG_ARGS |
4488 | // Examine 'arg' and setup argValue objClass and structSize |
4489 | // |
4490 | CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); |
4491 | GenTree* argValue = arg; // normally argValue will be arg, but see right below |
4492 | unsigned structSize = 0; |
4493 | |
4494 | if (arg->OperGet() == GT_OBJ) |
4495 | { |
4496 | GenTreeObj* argObj = arg->AsObj(); |
4497 | objClass = argObj->gtClass; |
4498 | structSize = argObj->Size(); |
4499 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
4500 | |
4501 | // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR. |
4502 | GenTree* op1 = argObj->gtOp1; |
4503 | if (op1->OperGet() == GT_ADDR) |
4504 | { |
4505 | GenTree* underlyingTree = op1->gtOp.gtOp1; |
4506 | |
4507 | // Only update to the same type. |
4508 | if (underlyingTree->OperIs(GT_LCL_VAR) && (underlyingTree->TypeGet() == argValue->TypeGet()) && |
4509 | (objClass == gtGetStructHandleIfPresent(underlyingTree))) |
4510 | { |
4511 | argValue = underlyingTree; |
4512 | } |
4513 | } |
4514 | } |
4515 | else if (arg->OperGet() == GT_LCL_VAR) |
4516 | { |
4517 | GenTreeLclVarCommon* varNode = arg->AsLclVarCommon(); |
4518 | unsigned varNum = varNode->gtLclNum; |
4519 | assert(varNum < lvaCount); |
4520 | LclVarDsc* varDsc = &lvaTable[varNum]; |
4521 | |
4522 | structSize = varDsc->lvExactSize; |
4523 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
4524 | } |
4525 | else |
4526 | { |
4527 | objClass = gtGetStructHandleIfPresent(arg); |
4528 | structSize = info.compCompHnd->getClassSize(objClass); |
4529 | } |
4530 | noway_assert(objClass != NO_CLASS_HANDLE); |
4531 | |
4532 | var_types hfaType = TYP_UNDEF; |
4533 | var_types elemType = TYP_UNDEF; |
4534 | unsigned elemCount = 0; |
4535 | unsigned elemSize = 0; |
4536 | var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0 |
4537 | |
4538 | hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF |
4539 | if (varTypeIsFloating(hfaType) |
4540 | #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
4541 | && !fgEntryPtr->isVararg |
4542 | #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
4543 | ) |
4544 | { |
4545 | elemType = hfaType; |
4546 | elemSize = genTypeSize(elemType); |
4547 | elemCount = structSize / elemSize; |
4548 | assert(elemSize * elemCount == structSize); |
4549 | for (unsigned inx = 0; inx < elemCount; inx++) |
4550 | { |
4551 | type[inx] = elemType; |
4552 | } |
4553 | } |
4554 | else |
4555 | { |
4556 | assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE); |
4557 | BYTE gcPtrs[MAX_ARG_REG_COUNT]; |
4558 | elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; |
4559 | info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); |
4560 | |
4561 | for (unsigned inx = 0; inx < elemCount; inx++) |
4562 | { |
4563 | #ifdef UNIX_AMD64_ABI |
4564 | if (gcPtrs[inx] == TYPE_GC_NONE) |
4565 | { |
4566 | type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx], |
4567 | fgEntryPtr->structDesc.eightByteSizes[inx]); |
4568 | } |
4569 | else |
4570 | #endif // UNIX_AMD64_ABI |
4571 | { |
4572 | type[inx] = getJitGCType(gcPtrs[inx]); |
4573 | } |
4574 | } |
4575 | |
4576 | #ifndef UNIX_AMD64_ABI |
4577 | if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) |
4578 | { |
4579 | elemSize = TARGET_POINTER_SIZE; |
4580 | // We can safely widen this to aligned bytes since we are loading from |
4581 | // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and |
4582 | // lives in the stack frame or will be a promoted field. |
4583 | // |
4584 | structSize = elemCount * TARGET_POINTER_SIZE; |
4585 | } |
4586 | else // we must have a GT_OBJ |
4587 | { |
4588 | assert(argValue->OperGet() == GT_OBJ); |
4589 | |
4590 | // We need to load the struct from an arbitrary address |
4591 | // and we can't read past the end of the structSize |
4592 | // We adjust the last load type here |
4593 | // |
4594 | unsigned remainingBytes = structSize % TARGET_POINTER_SIZE; |
4595 | unsigned lastElem = elemCount - 1; |
4596 | if (remainingBytes != 0) |
4597 | { |
4598 | switch (remainingBytes) |
4599 | { |
4600 | case 1: |
4601 | type[lastElem] = TYP_BYTE; |
4602 | break; |
4603 | case 2: |
4604 | type[lastElem] = TYP_SHORT; |
4605 | break; |
4606 | #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) |
4607 | case 4: |
4608 | type[lastElem] = TYP_INT; |
4609 | break; |
4610 | #endif // (_TARGET_ARM64_) || (UNIX_AMD64_ABI) |
4611 | default: |
4612 | noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg" ); |
4613 | break; |
4614 | } |
4615 | } |
4616 | } |
4617 | #endif // !UNIX_AMD64_ABI |
4618 | } |
4619 | |
4620 | // We should still have a TYP_STRUCT |
4621 | assert(varTypeIsStruct(argValue->TypeGet())); |
4622 | |
4623 | GenTreeFieldList* newArg = nullptr; |
4624 | |
4625 | // Are we passing a struct LclVar? |
4626 | // |
4627 | if (argValue->OperGet() == GT_LCL_VAR) |
4628 | { |
4629 | GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); |
4630 | unsigned varNum = varNode->gtLclNum; |
4631 | assert(varNum < lvaCount); |
4632 | LclVarDsc* varDsc = &lvaTable[varNum]; |
4633 | |
4634 | // At this point any TYP_STRUCT LclVar must be an aligned struct |
4635 | // or an HFA struct, both which are passed by value. |
4636 | // |
4637 | assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa()); |
4638 | |
4639 | varDsc->lvIsMultiRegArg = true; |
4640 | |
4641 | #ifdef DEBUG |
4642 | if (verbose) |
4643 | { |
4644 | JITDUMP("Multireg struct argument V%02u : " , varNum); |
4645 | fgEntryPtr->Dump(); |
4646 | } |
4647 | #endif // DEBUG |
4648 | |
4649 | #ifndef UNIX_AMD64_ABI |
4650 | // This local variable must match the layout of the 'objClass' type exactly |
4651 | if (varDsc->lvIsHfa() |
4652 | #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
4653 | && !fgEntryPtr->isVararg |
4654 | #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
4655 | ) |
4656 | { |
4657 | // We have a HFA struct |
4658 | noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE)); |
4659 | noway_assert(elemSize == genTypeSize(elemType)); |
4660 | noway_assert(elemCount == (varDsc->lvExactSize / elemSize)); |
4661 | noway_assert(elemSize * elemCount == varDsc->lvExactSize); |
4662 | |
4663 | for (unsigned inx = 0; (inx < elemCount); inx++) |
4664 | { |
4665 | noway_assert(type[inx] == elemType); |
4666 | } |
4667 | } |
4668 | else |
4669 | { |
4670 | #if defined(_TARGET_ARM64_) |
4671 | // We must have a 16-byte struct (non-HFA) |
4672 | noway_assert(elemCount == 2); |
4673 | #elif defined(_TARGET_ARM_) |
4674 | noway_assert(elemCount <= 4); |
4675 | #endif |
4676 | |
4677 | for (unsigned inx = 0; inx < elemCount; inx++) |
4678 | { |
4679 | CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx]; |
4680 | |
4681 | // We setup the type[inx] value above using the GC info from 'objClass' |
4682 | // This GT_LCL_VAR must have the same GC layout info |
4683 | // |
4684 | if (currentGcLayoutType != TYPE_GC_NONE) |
4685 | { |
4686 | noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType)); |
4687 | } |
4688 | else |
4689 | { |
4690 | // We may have use a small type when we setup the type[inx] values above |
4691 | // We can safely widen this to TYP_I_IMPL |
4692 | type[inx] = TYP_I_IMPL; |
4693 | } |
4694 | } |
4695 | } |
4696 | #endif // !UNIX_AMD64_ABI |
4697 | |
4698 | #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) |
4699 | // Is this LclVar a promoted struct with exactly 2 fields? |
4700 | // TODO-ARM64-CQ: Support struct promoted HFA types here |
4701 | if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && (!varDsc->lvIsHfa() |
4702 | #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
4703 | && !fgEntryPtr->isVararg |
4704 | #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
4705 | )) |
4706 | { |
4707 | // See if we have two promoted fields that start at offset 0 and 8? |
4708 | unsigned loVarNum = lvaGetFieldLocal(varDsc, 0); |
4709 | unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE); |
4710 | |
4711 | // Did we find the promoted fields at the necessary offsets? |
4712 | if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM)) |
4713 | { |
4714 | LclVarDsc* loVarDsc = &lvaTable[loVarNum]; |
4715 | LclVarDsc* hiVarDsc = &lvaTable[hiVarNum]; |
4716 | |
4717 | var_types loType = loVarDsc->lvType; |
4718 | var_types hiType = hiVarDsc->lvType; |
4719 | |
4720 | if (varTypeIsFloating(loType) || varTypeIsFloating(hiType)) |
4721 | { |
4722 | // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer |
4723 | // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) |
4724 | // |
4725 | JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n" , |
4726 | varNum); |
4727 | // |
4728 | // we call lvaSetVarDoNotEnregister and do the proper transformation below. |
4729 | // |
4730 | } |
4731 | else |
4732 | { |
4733 | // We can use the struct promoted field as the two arguments |
4734 | |
4735 | GenTree* loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum); |
4736 | GenTree* hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum); |
4737 | |
4738 | // Create a new tree for 'arg' |
4739 | // replace the existing LDOBJ(ADDR(LCLVAR)) |
4740 | // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr)) |
4741 | // |
4742 | newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr); |
4743 | (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg); |
4744 | } |
4745 | } |
4746 | } |
4747 | else |
4748 | { |
4749 | // |
4750 | // We will create a list of GT_LCL_FLDs nodes to pass this struct |
4751 | // |
4752 | lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); |
4753 | } |
4754 | #elif defined(_TARGET_ARM_) |
4755 | // Is this LclVar a promoted struct with exactly same size? |
4756 | if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa()) |
4757 | { |
4758 | // See if we have promoted fields? |
4759 | unsigned varNums[4]; |
4760 | bool hasBadVarNum = false; |
4761 | for (unsigned inx = 0; inx < elemCount; inx++) |
4762 | { |
4763 | varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx); |
4764 | if (varNums[inx] == BAD_VAR_NUM) |
4765 | { |
4766 | hasBadVarNum = true; |
4767 | break; |
4768 | } |
4769 | } |
4770 | |
4771 | // Did we find the promoted fields at the necessary offsets? |
4772 | if (!hasBadVarNum) |
4773 | { |
4774 | LclVarDsc* varDscs[4]; |
4775 | var_types varType[4]; |
4776 | bool varIsFloat = false; |
4777 | |
4778 | for (unsigned inx = 0; inx < elemCount; inx++) |
4779 | { |
4780 | varDscs[inx] = &lvaTable[varNums[inx]]; |
4781 | varType[inx] = varDscs[inx]->lvType; |
4782 | if (varTypeIsFloating(varType[inx])) |
4783 | { |
4784 | // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the |
4785 | // integer |
4786 | // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) |
4787 | // |
4788 | JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n" , |
4789 | varNum); |
4790 | // |
4791 | // we call lvaSetVarDoNotEnregister and do the proper transformation below. |
4792 | // |
4793 | varIsFloat = true; |
4794 | break; |
4795 | } |
4796 | } |
4797 | |
4798 | if (!varIsFloat) |
4799 | { |
4800 | newArg = fgMorphLclArgToFieldlist(varNode); |
4801 | } |
4802 | } |
4803 | } |
4804 | else |
4805 | { |
4806 | // |
4807 | // We will create a list of GT_LCL_FLDs nodes to pass this struct |
4808 | // |
4809 | lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); |
4810 | } |
4811 | #endif // _TARGET_ARM_ |
4812 | } |
4813 | |
4814 | // If we didn't set newarg to a new List Node tree |
4815 | // |
4816 | if (newArg == nullptr) |
4817 | { |
4818 | if (fgEntryPtr->regNum == REG_STK) |
4819 | { |
4820 | // We leave this stack passed argument alone |
4821 | return arg; |
4822 | } |
4823 | |
4824 | // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted ) |
4825 | // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it? |
4826 | // |
4827 | if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) |
4828 | { |
4829 | GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); |
4830 | unsigned varNum = varNode->gtLclNum; |
4831 | assert(varNum < lvaCount); |
4832 | LclVarDsc* varDsc = &lvaTable[varNum]; |
4833 | |
4834 | unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0; |
4835 | unsigned lastOffset = baseOffset + structSize; |
4836 | |
4837 | // The allocated size of our LocalVar must be at least as big as lastOffset |
4838 | assert(varDsc->lvSize() >= lastOffset); |
4839 | |
4840 | if (varDsc->lvStructGcCount > 0) |
4841 | { |
4842 | // alignment of the baseOffset is required |
4843 | noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0); |
4844 | #ifndef UNIX_AMD64_ABI |
4845 | noway_assert(elemSize == TARGET_POINTER_SIZE); |
4846 | #endif |
4847 | unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE; |
4848 | const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable |
4849 | for (unsigned inx = 0; (inx < elemCount); inx++) |
4850 | { |
4851 | // The GC information must match what we setup using 'objClass' |
4852 | if ((gcPtrs[baseIndex + inx] != TYPE_GC_NONE) || varTypeGCtype(type[inx])) |
4853 | { |
4854 | noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx])); |
4855 | } |
4856 | } |
4857 | } |
4858 | else // this varDsc contains no GC pointers |
4859 | { |
4860 | for (unsigned inx = 0; inx < elemCount; inx++) |
4861 | { |
4862 | // The GC information must match what we setup using 'objClass' |
4863 | noway_assert(!varTypeIsGC(type[inx])); |
4864 | } |
4865 | } |
4866 | |
4867 | // |
4868 | // We create a list of GT_LCL_FLDs nodes to pass this struct |
4869 | // |
4870 | lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); |
4871 | |
4872 | // Create a new tree for 'arg' |
4873 | // replace the existing LDOBJ(ADDR(LCLVAR)) |
4874 | // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...) |
4875 | // |
4876 | unsigned offset = baseOffset; |
4877 | GenTreeFieldList* listEntry = nullptr; |
4878 | for (unsigned inx = 0; inx < elemCount; inx++) |
4879 | { |
4880 | elemSize = genTypeSize(type[inx]); |
4881 | GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); |
4882 | listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry); |
4883 | if (newArg == nullptr) |
4884 | { |
4885 | newArg = listEntry; |
4886 | } |
4887 | offset += elemSize; |
4888 | } |
4889 | } |
4890 | // Are we passing a GT_OBJ struct? |
4891 | // |
4892 | else if (argValue->OperGet() == GT_OBJ) |
4893 | { |
4894 | GenTreeObj* argObj = argValue->AsObj(); |
4895 | GenTree* baseAddr = argObj->gtOp1; |
4896 | var_types addrType = baseAddr->TypeGet(); |
4897 | |
4898 | if (baseAddr->OperGet() == GT_ADDR) |
4899 | { |
4900 | GenTree* addrTaken = baseAddr->gtOp.gtOp1; |
4901 | if (addrTaken->IsLocal()) |
4902 | { |
4903 | GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon(); |
4904 | unsigned varNum = varNode->gtLclNum; |
4905 | // We access non-struct type (for example, long) as a struct type. |
4906 | // Make sure lclVar lives on stack to make sure its fields are accessible by address. |
4907 | lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField)); |
4908 | } |
4909 | } |
4910 | |
4911 | // Create a new tree for 'arg' |
4912 | // replace the existing LDOBJ(EXPR) |
4913 | // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...) |
4914 | // |
4915 | |
4916 | unsigned offset = 0; |
4917 | GenTreeFieldList* listEntry = nullptr; |
4918 | for (unsigned inx = 0; inx < elemCount; inx++) |
4919 | { |
4920 | elemSize = genTypeSize(type[inx]); |
4921 | GenTree* curAddr = baseAddr; |
4922 | if (offset != 0) |
4923 | { |
4924 | GenTree* baseAddrDup = gtCloneExpr(baseAddr); |
4925 | noway_assert(baseAddrDup != nullptr); |
4926 | curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); |
4927 | } |
4928 | else |
4929 | { |
4930 | curAddr = baseAddr; |
4931 | } |
4932 | GenTree* curItem = gtNewIndir(type[inx], curAddr); |
4933 | |
4934 | // For safety all GT_IND should have at least GT_GLOB_REF set. |
4935 | curItem->gtFlags |= GTF_GLOB_REF; |
4936 | |
4937 | listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry); |
4938 | if (newArg == nullptr) |
4939 | { |
4940 | newArg = listEntry; |
4941 | } |
4942 | offset += elemSize; |
4943 | } |
4944 | } |
4945 | } |
4946 | |
4947 | #ifdef DEBUG |
4948 | // If we reach here we should have set newArg to something |
4949 | if (newArg == nullptr) |
4950 | { |
4951 | gtDispTree(argValue); |
4952 | assert(!"Missing case in fgMorphMultiregStructArg" ); |
4953 | } |
4954 | #endif |
4955 | |
4956 | noway_assert(newArg != nullptr); |
4957 | noway_assert(newArg->OperIsFieldList()); |
4958 | |
4959 | // We need to propagate any GTF_ALL_EFFECT flags from the end of the list back to the beginning. |
4960 | // This is verified in fgDebugCheckFlags(). |
4961 | |
4962 | ArrayStack<GenTree*> stack(getAllocator(CMK_ArrayStack)); |
4963 | GenTree* tree; |
4964 | for (tree = newArg; (tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsFieldList(); tree = tree->gtGetOp2()) |
4965 | { |
4966 | stack.Push(tree); |
4967 | } |
4968 | |
4969 | unsigned propFlags = (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
4970 | tree->gtFlags |= propFlags; |
4971 | |
4972 | while (!stack.Empty()) |
4973 | { |
4974 | tree = stack.Pop(); |
4975 | propFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
4976 | propFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT); |
4977 | tree->gtFlags |= propFlags; |
4978 | } |
4979 | |
4980 | #ifdef DEBUG |
4981 | if (verbose) |
4982 | { |
4983 | printf("fgMorphMultiregStructArg created tree:\n" ); |
4984 | gtDispTree(newArg); |
4985 | } |
4986 | #endif |
4987 | |
4988 | arg = newArg; // consider calling fgMorphTree(newArg); |
4989 | |
4990 | #endif // FEATURE_MULTIREG_ARGS |
4991 | |
4992 | return arg; |
4993 | } |
4994 | |
4995 | //------------------------------------------------------------------------ |
4996 | // fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields |
4997 | // |
4998 | // Arguments: |
4999 | // lcl - The GT_LCL_VAR node we will transform |
5000 | // |
5001 | // Return value: |
5002 | // The new GT_FIELD_LIST that we have created. |
5003 | // |
5004 | GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) |
5005 | { |
5006 | LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]); |
5007 | assert(varDsc->lvPromoted == true); |
5008 | |
5009 | unsigned fieldCount = varDsc->lvFieldCnt; |
5010 | GenTreeFieldList* listEntry = nullptr; |
5011 | GenTreeFieldList* newArg = nullptr; |
5012 | unsigned fieldLclNum = varDsc->lvFieldLclStart; |
5013 | |
5014 | // We can use the struct promoted field as arguments |
5015 | for (unsigned i = 0; i < fieldCount; i++) |
5016 | { |
5017 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
5018 | GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType); |
5019 | listEntry = new (this, GT_FIELD_LIST) |
5020 | GenTreeFieldList(lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, listEntry); |
5021 | if (newArg == nullptr) |
5022 | { |
5023 | newArg = listEntry; |
5024 | } |
5025 | fieldLclNum++; |
5026 | } |
5027 | return newArg; |
5028 | } |
5029 | |
5030 | //------------------------------------------------------------------------ |
5031 | // fgMakeOutgoingStructArgCopy: make a copy of a struct variable if necessary, |
5032 | // to pass to a callee. |
5033 | // |
5034 | // Arguments: |
5035 | // call - call being processed |
5036 | // args - args for the call |
5037 | /// argIndex - arg being processed |
5038 | // copyBlkClass - class handle for the struct |
5039 | // |
5040 | // Return value: |
5041 | // tree that computes address of the outgoing arg |
5042 | // |
5043 | void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, |
5044 | GenTree* args, |
5045 | unsigned argIndex, |
5046 | CORINFO_CLASS_HANDLE copyBlkClass) |
5047 | { |
5048 | GenTree* argx = args->Current(); |
5049 | noway_assert(argx->gtOper != GT_MKREFANY); |
5050 | fgArgTabEntry* argEntry = Compiler::gtArgEntryByNode(call, argx); |
5051 | |
5052 | // If we're optimizing, see if we can avoid making a copy. |
5053 | // |
5054 | // We don't need a copy if this is the last use of an implicit by-ref local. |
5055 | // |
5056 | // We can't determine that all of the time, but if there is only |
5057 | // one use and the method has no loops, then this use must be the last. |
5058 | if (opts.OptimizationEnabled()) |
5059 | { |
5060 | GenTreeLclVarCommon* lcl = nullptr; |
5061 | |
5062 | if (argx->OperIsLocal()) |
5063 | { |
5064 | lcl = argx->AsLclVarCommon(); |
5065 | } |
5066 | else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal()) |
5067 | { |
5068 | lcl = argx->AsObj()->Addr()->AsLclVarCommon(); |
5069 | } |
5070 | |
5071 | if (lcl != nullptr) |
5072 | { |
5073 | unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); |
5074 | if (lvaIsImplicitByRefLocal(varNum)) |
5075 | { |
5076 | LclVarDsc* varDsc = &lvaTable[varNum]; |
5077 | // JIT_TailCall helper has an implicit assumption that all tail call arguments live |
5078 | // on the caller's frame. If an argument lives on the caller caller's frame, it may get |
5079 | // overwritten if that frame is reused for the tail call. Therefore, we should always copy |
5080 | // struct parameters if they are passed as arguments to a tail call. |
5081 | if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop()) |
5082 | { |
5083 | varDsc->setLvRefCnt(0, RCS_EARLY); |
5084 | args->gtOp.gtOp1 = lcl; |
5085 | argEntry->node = lcl; |
5086 | |
5087 | JITDUMP("did not have to make outgoing copy for V%2d" , varNum); |
5088 | return; |
5089 | } |
5090 | } |
5091 | } |
5092 | } |
5093 | |
5094 | if (fgOutgoingArgTemps == nullptr) |
5095 | { |
5096 | fgOutgoingArgTemps = hashBv::Create(this); |
5097 | } |
5098 | |
5099 | unsigned tmp = 0; |
5100 | bool found = false; |
5101 | |
5102 | // Attempt to find a local we have already used for an outgoing struct and reuse it. |
5103 | // We do not reuse within a statement. |
5104 | if (!opts.MinOpts()) |
5105 | { |
5106 | indexType lclNum; |
5107 | FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps) |
5108 | { |
5109 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
5110 | if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) && |
5111 | !fgCurrentlyInUseArgTemps->testBit(lclNum)) |
5112 | { |
5113 | tmp = (unsigned)lclNum; |
5114 | found = true; |
5115 | JITDUMP("reusing outgoing struct arg" ); |
5116 | break; |
5117 | } |
5118 | } |
5119 | NEXT_HBV_BIT_SET; |
5120 | } |
5121 | |
5122 | // Create the CopyBlk tree and insert it. |
5123 | if (!found) |
5124 | { |
5125 | // Get a new temp |
5126 | // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk. |
5127 | tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument" )); |
5128 | lvaSetStruct(tmp, copyBlkClass, false); |
5129 | if (call->IsVarargs()) |
5130 | { |
5131 | lvaSetStructUsedAsVarArg(tmp); |
5132 | } |
5133 | |
5134 | fgOutgoingArgTemps->setBit(tmp); |
5135 | } |
5136 | |
5137 | fgCurrentlyInUseArgTemps->setBit(tmp); |
5138 | |
5139 | // TYP_SIMD structs should not be enregistered, since ABI requires it to be |
5140 | // allocated on stack and address of it needs to be passed. |
5141 | if (lclVarIsSIMDType(tmp)) |
5142 | { |
5143 | lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct)); |
5144 | } |
5145 | |
5146 | // Create a reference to the temp |
5147 | GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType); |
5148 | dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction. |
5149 | |
5150 | if (argx->gtOper == GT_OBJ) |
5151 | { |
5152 | argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT); |
5153 | argx->SetIndirExceptionFlags(this); |
5154 | } |
5155 | else |
5156 | { |
5157 | argx->gtFlags |= GTF_DONT_CSE; |
5158 | } |
5159 | |
5160 | // Copy the valuetype to the temp |
5161 | unsigned size = info.compCompHnd->getClassSize(copyBlkClass); |
5162 | GenTree* copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */); |
5163 | copyBlk = fgMorphCopyBlock(copyBlk); |
5164 | |
5165 | #if FEATURE_FIXED_OUT_ARGS |
5166 | |
5167 | // Do the copy early, and evalute the temp later (see EvalArgsToTemps) |
5168 | // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode |
5169 | GenTree* arg = copyBlk; |
5170 | |
5171 | #else // FEATURE_FIXED_OUT_ARGS |
5172 | |
5173 | // Structs are always on the stack, and thus never need temps |
5174 | // so we have to put the copy and temp all into one expression. |
5175 | argEntry->tmpNum = tmp; |
5176 | GenTree* arg = fgMakeTmpArgNode(argEntry); |
5177 | |
5178 | // Change the expression to "(tmp=val),tmp" |
5179 | arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); |
5180 | |
5181 | #endif // FEATURE_FIXED_OUT_ARGS |
5182 | |
5183 | args->gtOp.gtOp1 = arg; |
5184 | call->fgArgInfo->EvalToTmp(argEntry, tmp, arg); |
5185 | |
5186 | return; |
5187 | } |
5188 | |
5189 | #ifdef _TARGET_ARM_ |
5190 | // See declaration for specification comment. |
5191 | void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, |
5192 | unsigned firstArgRegNum, |
5193 | regMaskTP* pArgSkippedRegMask) |
5194 | { |
5195 | assert(varDsc->lvPromoted); |
5196 | // There's no way to do these calculations without breaking abstraction and assuming that |
5197 | // integer register arguments are consecutive ints. They are on ARM. |
5198 | |
5199 | // To start, figure out what register contains the last byte of the first argument. |
5200 | LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; |
5201 | unsigned lastFldRegOfLastByte = |
5202 | (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; |
5203 | ; |
5204 | |
5205 | // Now we're keeping track of the register that the last field ended in; see what registers |
5206 | // subsequent fields start in, and whether any are skipped. |
5207 | // (We assume here the invariant that the fields are sorted in offset order.) |
5208 | for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++) |
5209 | { |
5210 | unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset; |
5211 | LclVarDsc* fldVarDsc = &lvaTable[fldVarNum]; |
5212 | unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE; |
5213 | assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields. |
5214 | // This loop should enumerate the offsets of any registers skipped. |
5215 | // Find what reg contains the last byte: |
5216 | // And start at the first register after that. If that isn't the first reg of the current |
5217 | for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset; |
5218 | skippedRegOffsets++) |
5219 | { |
5220 | // If the register number would not be an arg reg, we're done. |
5221 | if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG) |
5222 | return; |
5223 | *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets)); |
5224 | } |
5225 | lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; |
5226 | } |
5227 | } |
5228 | |
5229 | #endif // _TARGET_ARM_ |
5230 | |
5231 | //**************************************************************************** |
5232 | // fgFixupStructReturn: |
5233 | // The companion to impFixupCallStructReturn. Now that the importer is done |
5234 | // change the gtType to the precomputed native return type |
5235 | // requires that callNode currently has a struct type |
5236 | // |
5237 | void Compiler::fgFixupStructReturn(GenTree* callNode) |
5238 | { |
5239 | assert(varTypeIsStruct(callNode)); |
5240 | |
5241 | GenTreeCall* call = callNode->AsCall(); |
5242 | bool callHasRetBuffArg = call->HasRetBufArg(); |
5243 | bool isHelperCall = call->IsHelperCall(); |
5244 | |
5245 | // Decide on the proper return type for this call that currently returns a struct |
5246 | // |
5247 | CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd; |
5248 | Compiler::structPassingKind howToReturnStruct; |
5249 | var_types returnType; |
5250 | |
5251 | // There are a couple of Helper Calls that say they return a TYP_STRUCT but they |
5252 | // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType) |
5253 | // |
5254 | // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD |
5255 | // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD |
5256 | // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL |
5257 | // |
5258 | if (isHelperCall) |
5259 | { |
5260 | assert(!callHasRetBuffArg); |
5261 | assert(retClsHnd == NO_CLASS_HANDLE); |
5262 | |
5263 | // Now that we are past the importer, re-type this node |
5264 | howToReturnStruct = SPK_PrimitiveType; |
5265 | returnType = (var_types)call->gtReturnType; |
5266 | } |
5267 | else |
5268 | { |
5269 | returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct); |
5270 | } |
5271 | |
5272 | if (howToReturnStruct == SPK_ByReference) |
5273 | { |
5274 | assert(returnType == TYP_UNKNOWN); |
5275 | assert(callHasRetBuffArg); |
5276 | } |
5277 | else |
5278 | { |
5279 | assert(returnType != TYP_UNKNOWN); |
5280 | |
5281 | if (!varTypeIsStruct(returnType)) |
5282 | { |
5283 | // Widen the primitive type if necessary |
5284 | returnType = genActualType(returnType); |
5285 | } |
5286 | call->gtType = returnType; |
5287 | } |
5288 | |
5289 | #if FEATURE_MULTIREG_RET |
5290 | // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer. |
5291 | assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg); |
5292 | #else // !FEATURE_MULTIREG_RET |
5293 | // No more struct returns |
5294 | assert(call->TypeGet() != TYP_STRUCT); |
5295 | #endif |
5296 | |
5297 | #if !defined(UNIX_AMD64_ABI) |
5298 | // If it was a struct return, it has been transformed into a call |
5299 | // with a return buffer (that returns TYP_VOID) or into a return |
5300 | // of a primitive/enregisterable type |
5301 | assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID)); |
5302 | #endif |
5303 | } |
5304 | |
5305 | /***************************************************************************** |
5306 | * |
5307 | * A little helper used to rearrange nested commutative operations. The |
5308 | * effect is that nested associative, commutative operations are transformed |
5309 | * into a 'left-deep' tree, i.e. into something like this: |
5310 | * |
5311 | * (((a op b) op c) op d) op... |
5312 | */ |
5313 | |
5314 | #if REARRANGE_ADDS |
5315 | |
5316 | void Compiler::fgMoveOpsLeft(GenTree* tree) |
5317 | { |
5318 | GenTree* op1; |
5319 | GenTree* op2; |
5320 | genTreeOps oper; |
5321 | |
5322 | do |
5323 | { |
5324 | op1 = tree->gtOp.gtOp1; |
5325 | op2 = tree->gtOp.gtOp2; |
5326 | oper = tree->OperGet(); |
5327 | |
5328 | noway_assert(GenTree::OperIsCommutative(oper)); |
5329 | noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL); |
5330 | noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder); |
5331 | noway_assert(oper == op2->gtOper); |
5332 | |
5333 | // Commutativity doesn't hold if overflow checks are needed |
5334 | |
5335 | if (tree->gtOverflowEx() || op2->gtOverflowEx()) |
5336 | { |
5337 | return; |
5338 | } |
5339 | |
5340 | if (gtIsActiveCSE_Candidate(op2)) |
5341 | { |
5342 | // If we have marked op2 as a CSE candidate, |
5343 | // we can't perform a commutative reordering |
5344 | // because any value numbers that we computed for op2 |
5345 | // will be incorrect after performing a commutative reordering |
5346 | // |
5347 | return; |
5348 | } |
5349 | |
5350 | if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT)) |
5351 | { |
5352 | return; |
5353 | } |
5354 | |
5355 | // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators |
5356 | if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)) |
5357 | { |
5358 | return; |
5359 | } |
5360 | |
5361 | if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN) |
5362 | { |
5363 | // We could deal with this, but we were always broken and just hit the assert |
5364 | // below regarding flags, which means it's not frequent, so will just bail out. |
5365 | // See #195514 |
5366 | return; |
5367 | } |
5368 | |
5369 | noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx()); |
5370 | |
5371 | GenTree* ad1 = op2->gtOp.gtOp1; |
5372 | GenTree* ad2 = op2->gtOp.gtOp2; |
5373 | |
5374 | // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT |
5375 | // We can not reorder such GT_OR trees |
5376 | // |
5377 | if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet())) |
5378 | { |
5379 | break; |
5380 | } |
5381 | |
5382 | // Don't split up a byref calculation and create a new byref. E.g., |
5383 | // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int). |
5384 | // Doing this transformation could create a situation where the first |
5385 | // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that |
5386 | // no longer points within the ref object. If a GC happens, the byref won't |
5387 | // get updated. This can happen, for instance, if one of the int components |
5388 | // is negative. It also requires the address generation be in a fully-interruptible |
5389 | // code region. |
5390 | // |
5391 | if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL) |
5392 | { |
5393 | assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD)); |
5394 | break; |
5395 | } |
5396 | |
5397 | /* Change "(x op (y op z))" to "(x op y) op z" */ |
5398 | /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */ |
5399 | |
5400 | GenTree* new_op1 = op2; |
5401 | |
5402 | new_op1->gtOp.gtOp1 = op1; |
5403 | new_op1->gtOp.gtOp2 = ad1; |
5404 | |
5405 | /* Change the flags. */ |
5406 | |
5407 | // Make sure we arent throwing away any flags |
5408 | noway_assert((new_op1->gtFlags & |
5409 | ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag. |
5410 | GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated |
5411 | GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0); |
5412 | |
5413 | new_op1->gtFlags = |
5414 | (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag. |
5415 | (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT); |
5416 | |
5417 | /* Retype new_op1 if it has not/become a GC ptr. */ |
5418 | |
5419 | if (varTypeIsGC(op1->TypeGet())) |
5420 | { |
5421 | noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && |
5422 | oper == GT_ADD) || // byref(ref + (int+int)) |
5423 | (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && |
5424 | oper == GT_OR)); // int(gcref | int(gcref|intval)) |
5425 | |
5426 | new_op1->gtType = tree->gtType; |
5427 | } |
5428 | else if (varTypeIsGC(ad2->TypeGet())) |
5429 | { |
5430 | // Neither ad1 nor op1 are GC. So new_op1 isnt either |
5431 | noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); |
5432 | new_op1->gtType = TYP_I_IMPL; |
5433 | } |
5434 | |
5435 | // If new_op1 is a new expression. Assign it a new unique value number. |
5436 | // vnStore is null before the ValueNumber phase has run |
5437 | if (vnStore != nullptr) |
5438 | { |
5439 | // We can only keep the old value number on new_op1 if both op1 and ad2 |
5440 | // have the same non-NoVN value numbers. Since op is commutative, comparing |
5441 | // only ad2 and op1 is enough. |
5442 | if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || |
5443 | (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || |
5444 | (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal())) |
5445 | { |
5446 | new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet())); |
5447 | } |
5448 | } |
5449 | |
5450 | tree->gtOp.gtOp1 = new_op1; |
5451 | tree->gtOp.gtOp2 = ad2; |
5452 | |
5453 | /* If 'new_op1' is now the same nested op, process it recursively */ |
5454 | |
5455 | if ((ad1->gtOper == oper) && !ad1->gtOverflowEx()) |
5456 | { |
5457 | fgMoveOpsLeft(new_op1); |
5458 | } |
5459 | |
5460 | /* If 'ad2' is now the same nested op, process it |
5461 | * Instead of recursion, we set up op1 and op2 for the next loop. |
5462 | */ |
5463 | |
5464 | op1 = new_op1; |
5465 | op2 = ad2; |
5466 | } while ((op2->gtOper == oper) && !op2->gtOverflowEx()); |
5467 | |
5468 | return; |
5469 | } |
5470 | |
5471 | #endif |
5472 | |
5473 | /*****************************************************************************/ |
5474 | |
5475 | void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay) |
5476 | { |
5477 | if (tree->OperIsBoundsCheck()) |
5478 | { |
5479 | GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk(); |
5480 | BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay); |
5481 | if (failBlock != nullptr) |
5482 | { |
5483 | boundsChk->gtIndRngFailBB = gtNewCodeRef(failBlock); |
5484 | } |
5485 | } |
5486 | else if (tree->OperIs(GT_INDEX_ADDR)) |
5487 | { |
5488 | GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr(); |
5489 | BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); |
5490 | if (failBlock != nullptr) |
5491 | { |
5492 | indexAddr->gtIndRngFailBB = gtNewCodeRef(failBlock); |
5493 | } |
5494 | } |
5495 | else |
5496 | { |
5497 | noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX)); |
5498 | fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); |
5499 | } |
5500 | } |
5501 | |
5502 | BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay) |
5503 | { |
5504 | if (opts.MinOpts()) |
5505 | { |
5506 | delay = false; |
5507 | } |
5508 | |
5509 | if (!opts.compDbgCode) |
5510 | { |
5511 | if (!delay && !compIsForInlining()) |
5512 | { |
5513 | // Create/find the appropriate "range-fail" label |
5514 | return fgRngChkTarget(compCurBB, kind); |
5515 | } |
5516 | } |
5517 | |
5518 | return nullptr; |
5519 | } |
5520 | |
5521 | /***************************************************************************** |
5522 | * |
5523 | * Expand a GT_INDEX node and fully morph the child operands |
5524 | * |
5525 | * The orginal GT_INDEX node is bashed into the GT_IND node that accesses |
5526 | * the array element. We expand the GT_INDEX node into a larger tree that |
5527 | * evaluates the array base and index. The simplest expansion is a GT_COMMA |
5528 | * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag. |
5529 | * For complex array or index expressions one or more GT_COMMA assignments |
5530 | * are inserted so that we only evaluate the array or index expressions once. |
5531 | * |
5532 | * The fully expanded tree is then morphed. This causes gtFoldExpr to |
5533 | * perform local constant prop and reorder the constants in the tree and |
5534 | * fold them. |
5535 | * |
5536 | * We then parse the resulting array element expression in order to locate |
5537 | * and label the constants and variables that occur in the tree. |
5538 | */ |
5539 | |
5540 | const int MAX_ARR_COMPLEXITY = 4; |
5541 | const int MAX_INDEX_COMPLEXITY = 4; |
5542 | |
5543 | GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) |
5544 | { |
5545 | noway_assert(tree->gtOper == GT_INDEX); |
5546 | GenTreeIndex* asIndex = tree->AsIndex(); |
5547 | |
5548 | var_types elemTyp = tree->TypeGet(); |
5549 | unsigned elemSize = tree->gtIndex.gtIndElemSize; |
5550 | CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass; |
5551 | |
5552 | noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr); |
5553 | |
5554 | #ifdef FEATURE_SIMD |
5555 | if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= maxSIMDStructBytes()) |
5556 | { |
5557 | // If this is a SIMD type, this is the point at which we lose the type information, |
5558 | // so we need to set the correct type on the GT_IND. |
5559 | // (We don't care about the base type here, so we only check, but don't retain, the return value). |
5560 | unsigned simdElemSize = 0; |
5561 | if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN) |
5562 | { |
5563 | assert(simdElemSize == elemSize); |
5564 | elemTyp = getSIMDTypeForSize(elemSize); |
5565 | // This is the new type of the node. |
5566 | tree->gtType = elemTyp; |
5567 | // Now set elemStructType to null so that we don't confuse value numbering. |
5568 | elemStructType = nullptr; |
5569 | } |
5570 | } |
5571 | #endif // FEATURE_SIMD |
5572 | |
5573 | // Set up the the array length's offset into lenOffs |
5574 | // And the the first element's offset into elemOffs |
5575 | ssize_t lenOffs; |
5576 | ssize_t elemOffs; |
5577 | if (tree->gtFlags & GTF_INX_STRING_LAYOUT) |
5578 | { |
5579 | lenOffs = OFFSETOF__CORINFO_String__stringLen; |
5580 | elemOffs = OFFSETOF__CORINFO_String__chars; |
5581 | tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE |
5582 | } |
5583 | else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT) |
5584 | { |
5585 | lenOffs = OFFSETOF__CORINFO_Array__length; |
5586 | elemOffs = eeGetEEInfo()->offsetOfObjArrayData; |
5587 | } |
5588 | else // We have a standard array |
5589 | { |
5590 | lenOffs = OFFSETOF__CORINFO_Array__length; |
5591 | elemOffs = OFFSETOF__CORINFO_Array__data; |
5592 | } |
5593 | |
5594 | // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts |
5595 | // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down. |
5596 | // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion |
5597 | // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in |
5598 | // minopts). |
5599 | // |
5600 | // When we *are* optimizing, we fully expand GT_INDEX to: |
5601 | // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or |
5602 | // side-effecting. |
5603 | // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or |
5604 | // side-effecting. |
5605 | // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array)) |
5606 | // 4. Compute the address of the element that will be accessed: |
5607 | // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize)) |
5608 | // 5. Dereference the address with a GT_IND. |
5609 | // |
5610 | // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows |
5611 | // for more straightforward bounds-check removal, CSE, etc. |
5612 | if (opts.MinOpts()) |
5613 | { |
5614 | GenTree* const array = fgMorphTree(asIndex->Arr()); |
5615 | GenTree* const index = fgMorphTree(asIndex->Index()); |
5616 | |
5617 | GenTreeIndexAddr* const indexAddr = |
5618 | new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize, |
5619 | static_cast<unsigned>(lenOffs), static_cast<unsigned>(elemOffs)); |
5620 | indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT; |
5621 | |
5622 | // Mark the indirection node as needing a range check if necessary. |
5623 | // Note this will always be true unless JitSkipArrayBoundCheck() is used |
5624 | if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0) |
5625 | { |
5626 | fgSetRngChkTarget(indexAddr); |
5627 | } |
5628 | |
5629 | // Change `tree` into an indirection and return. |
5630 | tree->ChangeOper(GT_IND); |
5631 | GenTreeIndir* const indir = tree->AsIndir(); |
5632 | indir->Addr() = indexAddr; |
5633 | indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT); |
5634 | |
5635 | #ifdef DEBUG |
5636 | indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
5637 | #endif // DEBUG |
5638 | |
5639 | return indir; |
5640 | } |
5641 | |
5642 | GenTree* arrRef = asIndex->Arr(); |
5643 | GenTree* index = asIndex->Index(); |
5644 | |
5645 | bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled |
5646 | bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0); |
5647 | |
5648 | GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression |
5649 | GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression |
5650 | GenTree* bndsChk = nullptr; |
5651 | |
5652 | // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address. |
5653 | if (chkd) |
5654 | { |
5655 | GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression |
5656 | GenTree* index2 = nullptr; |
5657 | |
5658 | // If the arrRef expression involves an assignment, a call or reads from global memory, |
5659 | // then we *must* allocate a temporary in which to "localize" those values, |
5660 | // to ensure that the same values are used in the bounds check and the actual |
5661 | // dereference. |
5662 | // Also we allocate the temporary when the arrRef is sufficiently complex/expensive. |
5663 | // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true |
5664 | // complexity is not exposed. (Without that condition there are cases of local struct |
5665 | // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was |
5666 | // fixed, there were some regressions that were mostly ameliorated by adding this condition.) |
5667 | // |
5668 | if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || |
5669 | gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD)) |
5670 | { |
5671 | unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr" )); |
5672 | arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef); |
5673 | arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); |
5674 | arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); |
5675 | } |
5676 | else |
5677 | { |
5678 | arrRef2 = gtCloneExpr(arrRef); |
5679 | noway_assert(arrRef2 != nullptr); |
5680 | } |
5681 | |
5682 | // If the index expression involves an assignment, a call or reads from global memory, |
5683 | // we *must* allocate a temporary in which to "localize" those values, |
5684 | // to ensure that the same values are used in the bounds check and the actual |
5685 | // dereference. |
5686 | // Also we allocate the temporary when the index is sufficiently complex/expensive. |
5687 | // |
5688 | if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) || |
5689 | (arrRef->OperGet() == GT_FIELD)) |
5690 | { |
5691 | unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr" )); |
5692 | indexDefn = gtNewTempAssign(indexTmpNum, index); |
5693 | index = gtNewLclvNode(indexTmpNum, index->TypeGet()); |
5694 | index2 = gtNewLclvNode(indexTmpNum, index->TypeGet()); |
5695 | } |
5696 | else |
5697 | { |
5698 | index2 = gtCloneExpr(index); |
5699 | noway_assert(index2 != nullptr); |
5700 | } |
5701 | |
5702 | // Next introduce a GT_ARR_BOUNDS_CHECK node |
5703 | var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. |
5704 | |
5705 | #ifdef _TARGET_64BIT_ |
5706 | // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case |
5707 | // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case, |
5708 | // the comparison will have to be widen to 64 bits. |
5709 | if (index->TypeGet() == TYP_I_IMPL) |
5710 | { |
5711 | bndsChkType = TYP_I_IMPL; |
5712 | } |
5713 | #endif // _TARGET_64BIT_ |
5714 | |
5715 | GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs); |
5716 | |
5717 | if (bndsChkType != TYP_INT) |
5718 | { |
5719 | arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType); |
5720 | } |
5721 | |
5722 | GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) |
5723 | GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL); |
5724 | |
5725 | bndsChk = arrBndsChk; |
5726 | |
5727 | // Now we'll switch to using the second copies for arrRef and index |
5728 | // to compute the address expression |
5729 | |
5730 | arrRef = arrRef2; |
5731 | index = index2; |
5732 | } |
5733 | |
5734 | // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))" |
5735 | |
5736 | GenTree* addr; |
5737 | |
5738 | #ifdef _TARGET_64BIT_ |
5739 | // Widen 'index' on 64-bit targets |
5740 | if (index->TypeGet() != TYP_I_IMPL) |
5741 | { |
5742 | if (index->OperGet() == GT_CNS_INT) |
5743 | { |
5744 | index->gtType = TYP_I_IMPL; |
5745 | } |
5746 | else |
5747 | { |
5748 | index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL); |
5749 | } |
5750 | } |
5751 | #endif // _TARGET_64BIT_ |
5752 | |
5753 | /* Scale the index value if necessary */ |
5754 | if (elemSize > 1) |
5755 | { |
5756 | GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL); |
5757 | |
5758 | // Fix 392756 WP7 Crossgen |
5759 | // |
5760 | // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node |
5761 | // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar. |
5762 | // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE. |
5763 | // |
5764 | size->gtFlags |= GTF_DONT_CSE; |
5765 | |
5766 | /* Multiply by the array element size */ |
5767 | addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size); |
5768 | } |
5769 | else |
5770 | { |
5771 | addr = index; |
5772 | } |
5773 | |
5774 | // Be careful to only create the byref pointer when the full index expression is added to the array reference. |
5775 | // We don't want to create a partial byref address expression that doesn't include the full index offset: |
5776 | // a byref must point within the containing object. It is dangerous (especially when optimizations come into |
5777 | // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that |
5778 | // the partial byref will not point within the object, and thus not get updated correctly during a GC. |
5779 | // This is mostly a risk in fully-interruptible code regions. |
5780 | // |
5781 | // NOTE: the tree form created here is pattern matched by optExtractArrIndex(), so changes here must |
5782 | // be reflected there. |
5783 | |
5784 | /* Add the first element's offset */ |
5785 | |
5786 | GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL); |
5787 | |
5788 | addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns); |
5789 | |
5790 | /* Add the object ref to the element's offset */ |
5791 | |
5792 | addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); |
5793 | |
5794 | #if SMALL_TREE_NODES |
5795 | assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL); |
5796 | #endif |
5797 | |
5798 | // Change the orginal GT_INDEX node into a GT_IND node |
5799 | tree->SetOper(GT_IND); |
5800 | |
5801 | // If the index node is a floating-point type, notify the compiler |
5802 | // we'll potentially use floating point registers at the time of codegen. |
5803 | if (varTypeIsFloating(tree->gtType)) |
5804 | { |
5805 | this->compFloatingPointUsed = true; |
5806 | } |
5807 | |
5808 | // We've now consumed the GTF_INX_RNGCHK, and the node |
5809 | // is no longer a GT_INDEX node. |
5810 | tree->gtFlags &= ~GTF_INX_RNGCHK; |
5811 | |
5812 | tree->gtOp.gtOp1 = addr; |
5813 | |
5814 | // This is an array index expression. |
5815 | tree->gtFlags |= GTF_IND_ARR_INDEX; |
5816 | |
5817 | /* An indirection will cause a GPF if the address is null */ |
5818 | tree->gtFlags |= GTF_EXCEPT; |
5819 | |
5820 | if (nCSE) |
5821 | { |
5822 | tree->gtFlags |= GTF_DONT_CSE; |
5823 | } |
5824 | |
5825 | // Store information about it. |
5826 | GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType)); |
5827 | |
5828 | // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it. |
5829 | |
5830 | GenTree* indTree = tree; |
5831 | |
5832 | // Did we create a bndsChk tree? |
5833 | if (bndsChk) |
5834 | { |
5835 | // Use a GT_COMMA node to prepend the array bound check |
5836 | // |
5837 | tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree); |
5838 | |
5839 | /* Mark the indirection node as needing a range check */ |
5840 | fgSetRngChkTarget(bndsChk); |
5841 | } |
5842 | |
5843 | if (indexDefn != nullptr) |
5844 | { |
5845 | // Use a GT_COMMA node to prepend the index assignment |
5846 | // |
5847 | tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree); |
5848 | } |
5849 | if (arrRefDefn != nullptr) |
5850 | { |
5851 | // Use a GT_COMMA node to prepend the arRef assignment |
5852 | // |
5853 | tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree); |
5854 | } |
5855 | |
5856 | // Currently we morph the tree to perform some folding operations prior |
5857 | // to attaching fieldSeq info and labeling constant array index contributions |
5858 | // |
5859 | fgMorphTree(tree); |
5860 | |
5861 | // Ideally we just want to proceed to attaching fieldSeq info and labeling the |
5862 | // constant array index contributions, but the morphing operation may have changed |
5863 | // the 'tree' into something that now unconditionally throws an exception. |
5864 | // |
5865 | // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified |
5866 | // or it could be left unchanged. If it is unchanged then we should not return, |
5867 | // instead we should proceed to attaching fieldSeq info, etc... |
5868 | // |
5869 | GenTree* arrElem = tree->gtEffectiveVal(); |
5870 | |
5871 | if (fgIsCommaThrow(tree)) |
5872 | { |
5873 | if ((arrElem != indTree) || // A new tree node may have been created |
5874 | (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT |
5875 | { |
5876 | return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc.. |
5877 | } |
5878 | } |
5879 | |
5880 | assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)); |
5881 | |
5882 | addr = arrElem->gtOp.gtOp1; |
5883 | |
5884 | assert(addr->TypeGet() == TYP_BYREF); |
5885 | |
5886 | GenTree* cnsOff = nullptr; |
5887 | if (addr->OperGet() == GT_ADD) |
5888 | { |
5889 | assert(addr->TypeGet() == TYP_BYREF); |
5890 | assert(addr->gtOp.gtOp1->TypeGet() == TYP_REF); |
5891 | |
5892 | addr = addr->gtOp.gtOp2; |
5893 | |
5894 | // Look for the constant [#FirstElem] node here, or as the RHS of an ADD. |
5895 | |
5896 | if (addr->gtOper == GT_CNS_INT) |
5897 | { |
5898 | cnsOff = addr; |
5899 | addr = nullptr; |
5900 | } |
5901 | else |
5902 | { |
5903 | if ((addr->OperGet() == GT_ADD) && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)) |
5904 | { |
5905 | cnsOff = addr->gtOp.gtOp2; |
5906 | addr = addr->gtOp.gtOp1; |
5907 | } |
5908 | |
5909 | // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX |
5910 | addr->LabelIndex(this); |
5911 | } |
5912 | } |
5913 | else if (addr->OperGet() == GT_CNS_INT) |
5914 | { |
5915 | cnsOff = addr; |
5916 | } |
5917 | |
5918 | FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField); |
5919 | |
5920 | if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs)) |
5921 | { |
5922 | // Assign it the [#FirstElem] field sequence |
5923 | // |
5924 | cnsOff->gtIntCon.gtFieldSeq = firstElemFseq; |
5925 | } |
5926 | else // We have folded the first element's offset with the index expression |
5927 | { |
5928 | // Build the [#ConstantIndex, #FirstElem] field sequence |
5929 | // |
5930 | FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); |
5931 | FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq); |
5932 | |
5933 | if (cnsOff == nullptr) // It must have folded into a zero offset |
5934 | { |
5935 | // Record in the general zero-offset map. |
5936 | GetZeroOffsetFieldMap()->Set(addr, fieldSeq); |
5937 | } |
5938 | else |
5939 | { |
5940 | cnsOff->gtIntCon.gtFieldSeq = fieldSeq; |
5941 | } |
5942 | } |
5943 | |
5944 | return tree; |
5945 | } |
5946 | |
5947 | #ifdef _TARGET_X86_ |
5948 | /***************************************************************************** |
5949 | * |
5950 | * Wrap fixed stack arguments for varargs functions to go through varargs |
5951 | * cookie to access them, except for the cookie itself. |
5952 | * |
5953 | * Non-x86 platforms are allowed to access all arguments directly |
5954 | * so we don't need this code. |
5955 | * |
5956 | */ |
5957 | GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs) |
5958 | { |
5959 | /* For the fixed stack arguments of a varargs function, we need to go |
5960 | through the varargs cookies to access them, except for the |
5961 | cookie itself */ |
5962 | |
5963 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
5964 | |
5965 | if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg) |
5966 | { |
5967 | // Create a node representing the local pointing to the base of the args |
5968 | GenTree* ptrArg = |
5969 | gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL), |
5970 | gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES + |
5971 | lclOffs)); |
5972 | |
5973 | // Access the argument through the local |
5974 | GenTree* tree; |
5975 | if (varTypeIsStruct(varType)) |
5976 | { |
5977 | tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize); |
5978 | } |
5979 | else |
5980 | { |
5981 | tree = gtNewOperNode(GT_IND, varType, ptrArg); |
5982 | } |
5983 | tree->gtFlags |= GTF_IND_TGTANYWHERE; |
5984 | |
5985 | if (varDsc->lvAddrExposed) |
5986 | { |
5987 | tree->gtFlags |= GTF_GLOB_REF; |
5988 | } |
5989 | |
5990 | return fgMorphTree(tree); |
5991 | } |
5992 | |
5993 | return NULL; |
5994 | } |
5995 | #endif |
5996 | |
5997 | /***************************************************************************** |
5998 | * |
5999 | * Transform the given GT_LCL_VAR tree for code generation. |
6000 | */ |
6001 | |
6002 | GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph) |
6003 | { |
6004 | assert(tree->gtOper == GT_LCL_VAR); |
6005 | |
6006 | unsigned lclNum = tree->gtLclVarCommon.gtLclNum; |
6007 | var_types varType = lvaGetRealType(lclNum); |
6008 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
6009 | |
6010 | if (varDsc->lvAddrExposed) |
6011 | { |
6012 | tree->gtFlags |= GTF_GLOB_REF; |
6013 | } |
6014 | |
6015 | #ifdef _TARGET_X86_ |
6016 | if (info.compIsVarArgs) |
6017 | { |
6018 | GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0); |
6019 | if (newTree != nullptr) |
6020 | { |
6021 | if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) |
6022 | { |
6023 | fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType); |
6024 | } |
6025 | return newTree; |
6026 | } |
6027 | } |
6028 | #endif // _TARGET_X86_ |
6029 | |
6030 | /* If not during the global morphing phase bail */ |
6031 | |
6032 | if (!fgGlobalMorph && !forceRemorph) |
6033 | { |
6034 | return tree; |
6035 | } |
6036 | |
6037 | bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0; |
6038 | |
6039 | noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr |
6040 | |
6041 | if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad()) |
6042 | { |
6043 | #if LOCAL_ASSERTION_PROP |
6044 | /* Assertion prop can tell us to omit adding a cast here */ |
6045 | if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX) |
6046 | { |
6047 | return tree; |
6048 | } |
6049 | #endif |
6050 | /* Small-typed arguments and aliased locals are normalized on load. |
6051 | Other small-typed locals are normalized on store. |
6052 | Also, under the debugger as the debugger could write to the variable. |
6053 | If this is one of the former, insert a narrowing cast on the load. |
6054 | ie. Convert: var-short --> cast-short(var-int) */ |
6055 | |
6056 | tree->gtType = TYP_INT; |
6057 | fgMorphTreeDone(tree); |
6058 | tree = gtNewCastNode(TYP_INT, tree, false, varType); |
6059 | fgMorphTreeDone(tree); |
6060 | return tree; |
6061 | } |
6062 | |
6063 | return tree; |
6064 | } |
6065 | |
6066 | /***************************************************************************** |
6067 | Grab a temp for big offset morphing. |
6068 | This method will grab a new temp if no temp of this "type" has been created. |
6069 | Or it will return the same cached one if it has been created. |
6070 | */ |
6071 | unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type) |
6072 | { |
6073 | unsigned lclNum = fgBigOffsetMorphingTemps[type]; |
6074 | |
6075 | if (lclNum == BAD_VAR_NUM) |
6076 | { |
6077 | // We haven't created a temp for this kind of type. Create one now. |
6078 | lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing" )); |
6079 | fgBigOffsetMorphingTemps[type] = lclNum; |
6080 | } |
6081 | else |
6082 | { |
6083 | // We better get the right type. |
6084 | noway_assert(lvaTable[lclNum].TypeGet() == type); |
6085 | } |
6086 | |
6087 | noway_assert(lclNum != BAD_VAR_NUM); |
6088 | return lclNum; |
6089 | } |
6090 | |
6091 | /***************************************************************************** |
6092 | * |
6093 | * Transform the given GT_FIELD tree for code generation. |
6094 | */ |
6095 | |
6096 | GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) |
6097 | { |
6098 | assert(tree->gtOper == GT_FIELD); |
6099 | |
6100 | CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd; |
6101 | unsigned fldOffset = tree->gtField.gtFldOffset; |
6102 | GenTree* objRef = tree->gtField.gtFldObj; |
6103 | bool fieldMayOverlap = false; |
6104 | bool objIsLocal = false; |
6105 | |
6106 | if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR)) |
6107 | { |
6108 | // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter. |
6109 | // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the |
6110 | // simd field rewrites are sensitive to. |
6111 | fgMorphImplicitByRefArgs(objRef); |
6112 | } |
6113 | |
6114 | noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) || |
6115 | ((tree->gtFlags & GTF_GLOB_REF) != 0)); |
6116 | |
6117 | if (tree->gtField.gtFldMayOverlap) |
6118 | { |
6119 | fieldMayOverlap = true; |
6120 | // Reset the flag because we may reuse the node. |
6121 | tree->gtField.gtFldMayOverlap = false; |
6122 | } |
6123 | |
6124 | #ifdef FEATURE_SIMD |
6125 | // if this field belongs to simd struct, translate it to simd instrinsic. |
6126 | if (mac == nullptr) |
6127 | { |
6128 | GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree); |
6129 | if (newTree != tree) |
6130 | { |
6131 | newTree = fgMorphSmpOp(newTree); |
6132 | return newTree; |
6133 | } |
6134 | } |
6135 | else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1())) |
6136 | { |
6137 | GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr(); |
6138 | if (lcl != nullptr) |
6139 | { |
6140 | lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField)); |
6141 | } |
6142 | } |
6143 | #endif |
6144 | |
6145 | /* Is this an instance data member? */ |
6146 | |
6147 | if (objRef) |
6148 | { |
6149 | GenTree* addr; |
6150 | objIsLocal = objRef->IsLocal(); |
6151 | |
6152 | if (tree->gtFlags & GTF_IND_TLS_REF) |
6153 | { |
6154 | NO_WAY("instance field can not be a TLS ref." ); |
6155 | } |
6156 | |
6157 | /* We'll create the expression "*(objRef + mem_offs)" */ |
6158 | |
6159 | noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL); |
6160 | |
6161 | // An optimization for Contextful classes: |
6162 | // we unwrap the proxy when we have a 'this reference' |
6163 | if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef)) |
6164 | { |
6165 | objRef = fgUnwrapProxy(objRef); |
6166 | } |
6167 | |
6168 | /* |
6169 | Now we have a tree like this: |
6170 | |
6171 | +--------------------+ |
6172 | | GT_FIELD | tree |
6173 | +----------+---------+ |
6174 | | |
6175 | +--------------+-------------+ |
6176 | | tree->gtField.gtFldObj | |
6177 | +--------------+-------------+ |
6178 | |
6179 | |
6180 | We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): |
6181 | |
6182 | +--------------------+ |
6183 | | GT_IND/GT_OBJ | tree |
6184 | +---------+----------+ |
6185 | | |
6186 | | |
6187 | +---------+----------+ |
6188 | | GT_ADD | addr |
6189 | +---------+----------+ |
6190 | | |
6191 | / \ |
6192 | / \ |
6193 | / \ |
6194 | +-------------------+ +----------------------+ |
6195 | | objRef | | fldOffset | |
6196 | | | | (when fldOffset !=0) | |
6197 | +-------------------+ +----------------------+ |
6198 | |
6199 | |
6200 | or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): |
6201 | |
6202 | |
6203 | +--------------------+ |
6204 | | GT_IND/GT_OBJ | tree |
6205 | +----------+---------+ |
6206 | | |
6207 | +----------+---------+ |
6208 | | GT_COMMA | comma2 |
6209 | +----------+---------+ |
6210 | | |
6211 | / \ |
6212 | / \ |
6213 | / \ |
6214 | / \ |
6215 | +---------+----------+ +---------+----------+ |
6216 | comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr |
6217 | +---------+----------+ +---------+----------+ |
6218 | | | |
6219 | / \ / \ |
6220 | / \ / \ |
6221 | / \ / \ |
6222 | +-----+-----+ +-----+-----+ +---------+ +-----------+ |
6223 | asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset | |
6224 | +-----+-----+ +-----+-----+ +---------+ +-----------+ |
6225 | | | |
6226 | / \ | |
6227 | / \ | |
6228 | / \ | |
6229 | +-----+-----+ +-----+-----+ +-----------+ |
6230 | | tmpLcl | | objRef | | tmpLcl | |
6231 | +-----------+ +-----------+ +-----------+ |
6232 | |
6233 | |
6234 | */ |
6235 | |
6236 | var_types objRefType = objRef->TypeGet(); |
6237 | |
6238 | GenTree* comma = nullptr; |
6239 | |
6240 | // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field, |
6241 | // and thus is equivalent to a MACK_Ind with zero offset. |
6242 | MorphAddrContext defMAC(MACK_Ind); |
6243 | if (mac == nullptr) |
6244 | { |
6245 | mac = &defMAC; |
6246 | } |
6247 | |
6248 | // This flag is set to enable the "conservative" style of explicit null-check insertion. |
6249 | // This means that we insert an explicit null check whenever we create byref by adding a |
6250 | // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately |
6251 | // dereferenced). The alternative is "aggressive", which would not insert such checks (for |
6252 | // small offsets); in this plan, we would transfer some null-checking responsibility to |
6253 | // callee's of methods taking byref parameters. They would have to add explicit null checks |
6254 | // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in |
6255 | // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too |
6256 | // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null |
6257 | // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs. |
6258 | // This is left here to point out how to implement it. |
6259 | CLANG_FORMAT_COMMENT_ANCHOR; |
6260 | |
6261 | #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1 |
6262 | |
6263 | bool addExplicitNullCheck = false; |
6264 | |
6265 | // Implicit byref locals are never null. |
6266 | if (!((objRef->gtOper == GT_LCL_VAR) && lvaIsImplicitByRefLocal(objRef->gtLclVarCommon.gtLclNum))) |
6267 | { |
6268 | // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression |
6269 | // whose address is being taken is either a local or static variable, whose address is necessarily |
6270 | // non-null, or else it is a field dereference, which will do its own bounds checking if necessary. |
6271 | if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind)) |
6272 | { |
6273 | if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)) |
6274 | { |
6275 | addExplicitNullCheck = true; |
6276 | } |
6277 | else |
6278 | { |
6279 | // In R2R mode the field offset for some fields may change when the code |
6280 | // is loaded. So we can't rely on a zero offset here to suppress the null check. |
6281 | // |
6282 | // See GitHub issue #16454. |
6283 | bool fieldHasChangeableOffset = false; |
6284 | |
6285 | #ifdef FEATURE_READYTORUN_COMPILER |
6286 | fieldHasChangeableOffset = (tree->gtField.gtFieldLookup.addr != nullptr); |
6287 | #endif |
6288 | |
6289 | #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION |
6290 | addExplicitNullCheck = (mac->m_kind == MACK_Addr) && |
6291 | ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset); |
6292 | #else |
6293 | addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr && |
6294 | ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset)); |
6295 | #endif |
6296 | } |
6297 | } |
6298 | } |
6299 | |
6300 | if (addExplicitNullCheck) |
6301 | { |
6302 | #ifdef DEBUG |
6303 | if (verbose) |
6304 | { |
6305 | printf("Before explicit null check morphing:\n" ); |
6306 | gtDispTree(tree); |
6307 | } |
6308 | #endif |
6309 | |
6310 | // |
6311 | // Create the "comma" subtree |
6312 | // |
6313 | GenTree* asg = nullptr; |
6314 | GenTree* nullchk; |
6315 | |
6316 | unsigned lclNum; |
6317 | |
6318 | if (objRef->gtOper != GT_LCL_VAR) |
6319 | { |
6320 | lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet())); |
6321 | |
6322 | // Create the "asg" node |
6323 | asg = gtNewTempAssign(lclNum, objRef); |
6324 | } |
6325 | else |
6326 | { |
6327 | lclNum = objRef->gtLclVarCommon.gtLclNum; |
6328 | } |
6329 | |
6330 | // Create the "nullchk" node. |
6331 | // Make it TYP_BYTE so we only deference it for 1 byte. |
6332 | GenTree* lclVar = gtNewLclvNode(lclNum, objRefType); |
6333 | nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr); |
6334 | |
6335 | nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections |
6336 | |
6337 | // An indirection will cause a GPF if the address is null. |
6338 | nullchk->gtFlags |= GTF_EXCEPT; |
6339 | |
6340 | compCurBB->bbFlags |= BBF_HAS_NULLCHECK; |
6341 | optMethodFlags |= OMF_HAS_NULLCHECK; |
6342 | |
6343 | if (asg) |
6344 | { |
6345 | // Create the "comma" node. |
6346 | comma = gtNewOperNode(GT_COMMA, |
6347 | TYP_VOID, // We don't want to return anything from this "comma" node. |
6348 | // Set the type to TYP_VOID, so we can select "cmp" instruction |
6349 | // instead of "mov" instruction later on. |
6350 | asg, nullchk); |
6351 | } |
6352 | else |
6353 | { |
6354 | comma = nullchk; |
6355 | } |
6356 | |
6357 | addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node. |
6358 | } |
6359 | else if (fldOffset == 0) |
6360 | { |
6361 | // Generate the "addr" node. |
6362 | addr = objRef; |
6363 | FieldSeqNode* fieldSeq = |
6364 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
6365 | GetZeroOffsetFieldMap()->Set(addr, fieldSeq); |
6366 | } |
6367 | else |
6368 | { |
6369 | addr = objRef; |
6370 | } |
6371 | |
6372 | #ifdef FEATURE_READYTORUN_COMPILER |
6373 | if (tree->gtField.gtFieldLookup.addr != nullptr) |
6374 | { |
6375 | GenTree* offsetNode = nullptr; |
6376 | if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE) |
6377 | { |
6378 | offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->gtField.gtFieldLookup.addr, |
6379 | GTF_ICON_FIELD_HDL, false); |
6380 | } |
6381 | else |
6382 | { |
6383 | noway_assert(!"unexpected accessType for R2R field access" ); |
6384 | } |
6385 | |
6386 | var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; |
6387 | addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode); |
6388 | } |
6389 | #endif |
6390 | if (fldOffset != 0) |
6391 | { |
6392 | // Generate the "addr" node. |
6393 | /* Add the member offset to the object's address */ |
6394 | FieldSeqNode* fieldSeq = |
6395 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
6396 | addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, |
6397 | gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq)); |
6398 | } |
6399 | |
6400 | // Now let's set the "tree" as a GT_IND tree. |
6401 | |
6402 | tree->SetOper(GT_IND); |
6403 | tree->gtOp.gtOp1 = addr; |
6404 | |
6405 | tree->gtFlags &= (~GTF_EXCEPT | addr->gtFlags); |
6406 | tree->SetIndirExceptionFlags(this); |
6407 | |
6408 | if (addExplicitNullCheck) |
6409 | { |
6410 | // |
6411 | // Create "comma2" node and link it to "tree". |
6412 | // |
6413 | GenTree* comma2; |
6414 | comma2 = gtNewOperNode(GT_COMMA, |
6415 | addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node. |
6416 | comma, addr); |
6417 | tree->gtOp.gtOp1 = comma2; |
6418 | } |
6419 | |
6420 | #ifdef DEBUG |
6421 | if (verbose) |
6422 | { |
6423 | if (addExplicitNullCheck) |
6424 | { |
6425 | printf("After adding explicit null check:\n" ); |
6426 | gtDispTree(tree); |
6427 | } |
6428 | } |
6429 | #endif |
6430 | } |
6431 | else /* This is a static data member */ |
6432 | { |
6433 | if (tree->gtFlags & GTF_IND_TLS_REF) |
6434 | { |
6435 | // Thread Local Storage static field reference |
6436 | // |
6437 | // Field ref is a TLS 'Thread-Local-Storage' reference |
6438 | // |
6439 | // Build this tree: IND(*) # |
6440 | // | |
6441 | // ADD(I_IMPL) |
6442 | // / \ |
6443 | // / CNS(fldOffset) |
6444 | // / |
6445 | // / |
6446 | // / |
6447 | // IND(I_IMPL) == [Base of this DLL's TLS] |
6448 | // | |
6449 | // ADD(I_IMPL) |
6450 | // / \ |
6451 | // / CNS(IdValue*4) or MUL |
6452 | // / / \ |
6453 | // IND(I_IMPL) / CNS(4) |
6454 | // | / |
6455 | // CNS(TLS_HDL,0x2C) IND |
6456 | // | |
6457 | // CNS(pIdAddr) |
6458 | // |
6459 | // # Denotes the orginal node |
6460 | // |
6461 | void** pIdAddr = nullptr; |
6462 | unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr); |
6463 | |
6464 | // |
6465 | // If we can we access the TLS DLL index ID value directly |
6466 | // then pIdAddr will be NULL and |
6467 | // IdValue will be the actual TLS DLL index ID |
6468 | // |
6469 | GenTree* dllRef = nullptr; |
6470 | if (pIdAddr == nullptr) |
6471 | { |
6472 | if (IdValue != 0) |
6473 | { |
6474 | dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL); |
6475 | } |
6476 | } |
6477 | else |
6478 | { |
6479 | dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_STATIC_HDL, true); |
6480 | |
6481 | // Next we multiply by 4 |
6482 | dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL)); |
6483 | } |
6484 | |
6485 | #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides |
6486 | |
6487 | // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] |
6488 | |
6489 | GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL); |
6490 | |
6491 | // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS |
6492 | if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) |
6493 | { |
6494 | tree->gtFlags &= ~GTF_FLD_INITCLASS; |
6495 | tlsRef->gtFlags |= GTF_ICON_INITCLASS; |
6496 | } |
6497 | |
6498 | tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); |
6499 | |
6500 | if (dllRef != nullptr) |
6501 | { |
6502 | /* Add the dllRef */ |
6503 | tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); |
6504 | } |
6505 | |
6506 | /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */ |
6507 | tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); |
6508 | |
6509 | if (fldOffset != 0) |
6510 | { |
6511 | FieldSeqNode* fieldSeq = |
6512 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
6513 | GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq); |
6514 | |
6515 | /* Add the TLS static field offset to the address */ |
6516 | |
6517 | tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode); |
6518 | } |
6519 | |
6520 | // Final indirect to get to actual value of TLS static field |
6521 | |
6522 | tree->SetOper(GT_IND); |
6523 | tree->gtOp.gtOp1 = tlsRef; |
6524 | |
6525 | noway_assert(tree->gtFlags & GTF_IND_TLS_REF); |
6526 | } |
6527 | else |
6528 | { |
6529 | // Normal static field reference |
6530 | |
6531 | // |
6532 | // If we can we access the static's address directly |
6533 | // then pFldAddr will be NULL and |
6534 | // fldAddr will be the actual address of the static field |
6535 | // |
6536 | void** pFldAddr = nullptr; |
6537 | void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr); |
6538 | |
6539 | if (pFldAddr == nullptr) |
6540 | { |
6541 | #ifdef _TARGET_64BIT_ |
6542 | if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr)) |
6543 | { |
6544 | // The address is not directly addressible, so force it into a |
6545 | // constant, so we handle it properly |
6546 | |
6547 | GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL); |
6548 | addr->gtType = TYP_I_IMPL; |
6549 | FieldSeqNode* fieldSeq = |
6550 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
6551 | addr->gtIntCon.gtFieldSeq = fieldSeq; |
6552 | // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS |
6553 | if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) |
6554 | { |
6555 | tree->gtFlags &= ~GTF_FLD_INITCLASS; |
6556 | addr->gtFlags |= GTF_ICON_INITCLASS; |
6557 | } |
6558 | |
6559 | tree->SetOper(GT_IND); |
6560 | tree->gtOp.gtOp1 = addr; |
6561 | |
6562 | return fgMorphSmpOp(tree); |
6563 | } |
6564 | else |
6565 | #endif // _TARGET_64BIT_ |
6566 | { |
6567 | // Only volatile or classinit could be set, and they map over |
6568 | noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0); |
6569 | static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE); |
6570 | static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS); |
6571 | tree->SetOper(GT_CLS_VAR); |
6572 | tree->gtClsVar.gtClsVarHnd = symHnd; |
6573 | FieldSeqNode* fieldSeq = |
6574 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
6575 | tree->gtClsVar.gtFieldSeq = fieldSeq; |
6576 | } |
6577 | |
6578 | return tree; |
6579 | } |
6580 | else |
6581 | { |
6582 | GenTree* addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL); |
6583 | |
6584 | // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS |
6585 | if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) |
6586 | { |
6587 | tree->gtFlags &= ~GTF_FLD_INITCLASS; |
6588 | addr->gtFlags |= GTF_ICON_INITCLASS; |
6589 | } |
6590 | |
6591 | // There are two cases here, either the static is RVA based, |
6592 | // in which case the type of the FIELD node is not a GC type |
6593 | // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is |
6594 | // a GC type and the handle to it is a TYP_BYREF in the GC heap |
6595 | // because handles to statics now go into the large object heap |
6596 | |
6597 | var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL); |
6598 | GenTree* op1 = gtNewOperNode(GT_IND, handleTyp, addr); |
6599 | op1->gtFlags |= GTF_IND_INVARIANT; |
6600 | |
6601 | tree->SetOper(GT_IND); |
6602 | tree->gtOp.gtOp1 = op1; |
6603 | } |
6604 | } |
6605 | } |
6606 | noway_assert(tree->gtOper == GT_IND); |
6607 | |
6608 | GenTree* res = fgMorphSmpOp(tree); |
6609 | |
6610 | // If we have a struct type, this node would previously have been under a GT_ADDR, |
6611 | // and therefore would have been marked GTF_DONT_CSE. |
6612 | // TODO-1stClassStructs: revisit this. |
6613 | if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal) |
6614 | { |
6615 | res->gtFlags |= GTF_DONT_CSE; |
6616 | } |
6617 | |
6618 | if (fldOffset == 0 && res->OperGet() == GT_IND) |
6619 | { |
6620 | GenTree* addr = res->gtOp.gtOp1; |
6621 | // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node. |
6622 | FieldSeqNode* fieldSeq = |
6623 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
6624 | fgAddFieldSeqForZeroOffset(addr, fieldSeq); |
6625 | } |
6626 | |
6627 | return res; |
6628 | } |
6629 | |
6630 | //------------------------------------------------------------------------------ |
6631 | // fgMorphCallInline: attempt to inline a call |
6632 | // |
6633 | // Arguments: |
6634 | // call - call expression to inline, inline candidate |
6635 | // inlineResult - result tracking and reporting |
6636 | // |
6637 | // Notes: |
6638 | // Attempts to inline the call. |
6639 | // |
6640 | // If successful, callee's IR is inserted in place of the call, and |
6641 | // is marked with an InlineContext. |
6642 | // |
6643 | // If unsuccessful, the transformations done in anticipation of a |
6644 | // possible inline are undone, and the candidate flag on the call |
6645 | // is cleared. |
6646 | |
6647 | void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) |
6648 | { |
6649 | bool inliningFailed = false; |
6650 | |
6651 | // Is this call an inline candidate? |
6652 | if (call->IsInlineCandidate()) |
6653 | { |
6654 | // Attempt the inline |
6655 | fgMorphCallInlineHelper(call, inlineResult); |
6656 | |
6657 | // We should have made up our minds one way or another.... |
6658 | assert(inlineResult->IsDecided()); |
6659 | |
6660 | // If we failed to inline, we have a bit of work to do to cleanup |
6661 | if (inlineResult->IsFailure()) |
6662 | { |
6663 | |
6664 | #ifdef DEBUG |
6665 | |
6666 | // Before we do any cleanup, create a failing InlineContext to |
6667 | // capture details of the inlining attempt. |
6668 | m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult); |
6669 | |
6670 | #endif |
6671 | |
6672 | inliningFailed = true; |
6673 | |
6674 | // Clear the Inline Candidate flag so we can ensure later we tried |
6675 | // inlining all candidates. |
6676 | // |
6677 | call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE; |
6678 | } |
6679 | } |
6680 | else |
6681 | { |
6682 | // This wasn't an inline candidate. So it must be a GDV candidate. |
6683 | assert(call->IsGuardedDevirtualizationCandidate()); |
6684 | |
6685 | // We already know we can't inline this call, so don't even bother to try. |
6686 | inliningFailed = true; |
6687 | } |
6688 | |
6689 | // If we failed to inline (or didn't even try), do some cleanup. |
6690 | if (inliningFailed) |
6691 | { |
6692 | if (call->gtReturnType != TYP_VOID) |
6693 | { |
6694 | JITDUMP("Inlining [%06u] failed, so bashing [%06u] to NOP\n" , dspTreeID(call), dspTreeID(fgMorphStmt)); |
6695 | |
6696 | // Detach the GT_CALL tree from the original statement by |
6697 | // hanging a "nothing" node to it. Later the "nothing" node will be removed |
6698 | // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. |
6699 | |
6700 | noway_assert(fgMorphStmt->gtStmtExpr == call); |
6701 | fgMorphStmt->gtStmtExpr = gtNewNothingNode(); |
6702 | } |
6703 | } |
6704 | } |
6705 | |
6706 | /***************************************************************************** |
6707 | * Helper to attempt to inline a call |
6708 | * Sets success/failure in inline result |
6709 | * If success, modifies current method's IR with inlinee's IR |
6710 | * If failed, undoes any speculative modifications to current method |
6711 | */ |
6712 | |
6713 | void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) |
6714 | { |
6715 | // Don't expect any surprises here. |
6716 | assert(result->IsCandidate()); |
6717 | |
6718 | if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING) |
6719 | { |
6720 | // For now, attributing this to call site, though it's really |
6721 | // more of a budget issue (lvaCount currently includes all |
6722 | // caller and prospective callee locals). We still might be |
6723 | // able to inline other callees into this caller, or inline |
6724 | // this callee in other callers. |
6725 | result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS); |
6726 | return; |
6727 | } |
6728 | |
6729 | if (call->IsVirtual()) |
6730 | { |
6731 | result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL); |
6732 | return; |
6733 | } |
6734 | |
6735 | // Re-check this because guarded devirtualization may allow these through. |
6736 | if (gtIsRecursiveCall(call) && call->IsImplicitTailCall()) |
6737 | { |
6738 | result->NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL); |
6739 | return; |
6740 | } |
6741 | |
6742 | // impMarkInlineCandidate() is expected not to mark tail prefixed calls |
6743 | // and recursive tail calls as inline candidates. |
6744 | noway_assert(!call->IsTailPrefixedCall()); |
6745 | noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call)); |
6746 | |
6747 | /* If the caller's stack frame is marked, then we can't do any inlining. Period. |
6748 | Although we have checked this in impCanInline, it is possible that later IL instructions |
6749 | might cause compNeedSecurityCheck to be set. Therefore we need to check it here again. |
6750 | */ |
6751 | |
6752 | if (opts.compNeedSecurityCheck) |
6753 | { |
6754 | result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK); |
6755 | return; |
6756 | } |
6757 | |
6758 | // |
6759 | // Calling inlinee's compiler to inline the method. |
6760 | // |
6761 | |
6762 | unsigned startVars = lvaCount; |
6763 | |
6764 | #ifdef DEBUG |
6765 | if (verbose) |
6766 | { |
6767 | printf("Expanding INLINE_CANDIDATE in statement " ); |
6768 | printTreeID(fgMorphStmt); |
6769 | printf(" in " FMT_BB ":\n" , compCurBB->bbNum); |
6770 | gtDispTree(fgMorphStmt); |
6771 | if (call->IsImplicitTailCall()) |
6772 | { |
6773 | printf("Note: candidate is implicit tail call\n" ); |
6774 | } |
6775 | } |
6776 | #endif |
6777 | |
6778 | impInlineRoot()->m_inlineStrategy->NoteAttempt(result); |
6779 | |
6780 | // |
6781 | // Invoke the compiler to inline the call. |
6782 | // |
6783 | |
6784 | fgInvokeInlineeCompiler(call, result); |
6785 | |
6786 | if (result->IsFailure()) |
6787 | { |
6788 | // Undo some changes made in anticipation of inlining... |
6789 | |
6790 | // Zero out the used locals |
6791 | memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable)); |
6792 | for (unsigned i = startVars; i < lvaCount; i++) |
6793 | { |
6794 | new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(); // call the constructor. |
6795 | } |
6796 | |
6797 | lvaCount = startVars; |
6798 | |
6799 | #ifdef DEBUG |
6800 | if (verbose) |
6801 | { |
6802 | // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount); |
6803 | } |
6804 | #endif |
6805 | |
6806 | return; |
6807 | } |
6808 | |
6809 | #ifdef DEBUG |
6810 | if (verbose) |
6811 | { |
6812 | // printf("After inlining lvaCount=%d.\n", lvaCount); |
6813 | } |
6814 | #endif |
6815 | } |
6816 | |
6817 | //------------------------------------------------------------------------ |
6818 | // fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp. |
6819 | // |
6820 | // Arguments: |
6821 | // callee - The callee to check |
6822 | // |
6823 | // Return Value: |
6824 | // Returns true or false based on whether the callee can be fastTailCalled |
6825 | // |
6826 | // Notes: |
6827 | // This function is target specific and each target will make the fastTailCall |
6828 | // decision differently. See the notes below. |
6829 | // |
6830 | // |
6831 | // Windows Amd64: |
6832 | // A fast tail call can be made whenever the number of callee arguments |
6833 | // is larger than or equal to the number of caller arguments, or we have four |
6834 | // or fewer callee arguments. This is because, on Windows AMD64, each |
6835 | // argument uses exactly one register or one 8-byte stack slot. Thus, we only |
6836 | // need to count arguments, and not be concerned with the size of each |
6837 | // incoming or outgoing argument. |
6838 | // |
6839 | // Can fast tail call examples (amd64 Windows): |
6840 | // |
6841 | // -- Callee will have all register arguments -- |
6842 | // caller(int, int, int, int) |
6843 | // callee(int, int, float, int) |
6844 | // |
6845 | // -- Callee requires stack space that is equal to the caller -- |
6846 | // caller(struct, struct, struct, struct, struct, struct) |
6847 | // callee(int, int, int, int, int, int) |
6848 | // |
6849 | // -- Callee requires stack space that is less than the caller -- |
6850 | // caller(struct, double, struct, float, struct, struct) |
6851 | // callee(int, int, int, int, int) |
6852 | // |
6853 | // -- Callee will have all register arguments -- |
6854 | // caller(int) |
6855 | // callee(int, int, int, int) |
6856 | // |
6857 | // Cannot fast tail call examples (amd64 Windows): |
6858 | // |
6859 | // -- Callee requires stack space that is larger than the caller -- |
6860 | // caller(struct, double, struct, float, struct, struct) |
6861 | // callee(int, int, int, int, int, double, double, double) |
6862 | // |
6863 | // Unix Amd64 && Arm64: |
6864 | // A fastTailCall decision can be made whenever the callee's stack space is |
6865 | // less than or equal to the caller's stack space. There are many permutations |
6866 | // of when the caller and callee have different stack sizes if there are |
6867 | // structs being passed to either the caller or callee. |
6868 | // |
6869 | // Exceptions: |
6870 | // 1) If the callee has structs which cannot be enregistered it will be |
6871 | // reported as cannot fast tail call. This is an implementation limitation |
6872 | // where the callee only is checked for non enregisterable structs. This is |
6873 | // tracked with https://github.com/dotnet/coreclr/issues/12644. |
6874 | // |
6875 | // 2) If the caller or callee has stack arguments and the callee has more |
6876 | // arguments then the caller it will be reported as cannot fast tail call. |
6877 | // This is due to a bug in LowerFastTailCall which assumes that |
6878 | // nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This |
6879 | // is tracked with https://github.com/dotnet/coreclr/issues/12468. |
6880 | // |
6881 | // 3) If the callee has a 9 to 16 byte struct argument and the callee has |
6882 | // stack arguments, the decision will be to not fast tail call. This is |
6883 | // because before fgMorphArgs is done, the struct is unknown whether it |
6884 | // will be placed on the stack or enregistered. Therefore, the conservative |
6885 | // decision of do not fast tail call is taken. This limitations should be |
6886 | // removed if/when fgMorphArgs no longer depends on fgCanFastTailCall. |
6887 | // |
6888 | // 4) Arm64 Only, if there are HFA arguments and the callee has stack |
6889 | // arguments, the decision will be reported as cannot fast tail call. |
6890 | // This is because before fgMorphArgs is done, the struct is unknown whether it |
6891 | // will be placed on the stack or enregistered. Therefore, the conservative |
6892 | // decision of do not fast tail call is taken. |
6893 | // |
6894 | // Can fast tail call examples (amd64 Unix): |
6895 | // |
6896 | // -- Callee will have all register arguments -- |
6897 | // caller(int, int, int, int) |
6898 | // callee(int, int, float, int) |
6899 | // |
6900 | // -- Callee requires stack space that is equal to the caller -- |
6901 | // caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack |
6902 | // space |
6903 | // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space |
6904 | // |
6905 | // -- Callee requires stack space that is less than the caller -- |
6906 | // caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack |
6907 | // space |
6908 | // callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space |
6909 | // |
6910 | // -- Callee will have all register arguments -- |
6911 | // caller(int) |
6912 | // callee(int, int, int, int) |
6913 | // |
6914 | // Cannot fast tail call examples (amd64 Unix): |
6915 | // |
6916 | // -- Callee requires stack space that is larger than the caller -- |
6917 | // caller(float, float, float, float, float, float, float, float) -- 8 float register arguments |
6918 | // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space |
6919 | // |
6920 | // -- Callee has structs which cannot be enregistered (Implementation Limitation) -- |
6921 | // caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register |
6922 | // arguments, 24 byte stack space |
6923 | // callee({ double, double, double }) -- 24 bytes stack space |
6924 | // |
6925 | // -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) -- |
6926 | // caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space |
6927 | // callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space |
6928 | // |
6929 | // -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) -- |
6930 | // caller({ double, double, double, double, double, double }) // 48 byte stack |
6931 | // callee(int, int) -- 2 int registers |
6932 | |
6933 | bool Compiler::fgCanFastTailCall(GenTreeCall* callee) |
6934 | { |
6935 | #if FEATURE_FASTTAILCALL |
6936 | // To reach here means that the return types of the caller and callee are tail call compatible. |
6937 | // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type. |
6938 | // |
6939 | // In an implicit tail call case callSig may not be available but it is guaranteed to be available |
6940 | // for explicit tail call cases. The reason implicit tail case callSig may not be available is that |
6941 | // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case |
6942 | // fgInline() will replace return value place holder with call node using gtCloneExpr() which is |
6943 | // currently not copying/setting callSig. |
6944 | CLANG_FORMAT_COMMENT_ANCHOR; |
6945 | |
6946 | #ifdef DEBUG |
6947 | if (callee->IsTailPrefixedCall()) |
6948 | { |
6949 | assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass, |
6950 | (var_types)callee->gtReturnType, callee->callSig->retTypeClass)); |
6951 | } |
6952 | #endif |
6953 | |
6954 | auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) { |
6955 | #if DEBUG |
6956 | if ((JitConfig.JitReportFastTailCallDecisions()) == 1) |
6957 | { |
6958 | if (callee->gtCallType != CT_INDIRECT) |
6959 | { |
6960 | const char* methodName; |
6961 | |
6962 | methodName = eeGetMethodFullName(callee->gtCallMethHnd); |
6963 | |
6964 | printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: " , |
6965 | info.compFullName, methodName); |
6966 | } |
6967 | else |
6968 | { |
6969 | printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- " |
6970 | "Decision: " , |
6971 | info.compFullName); |
6972 | } |
6973 | |
6974 | if (callerStackSize != -1) |
6975 | { |
6976 | printf("%s (CallerStackSize: %d, CalleeStackSize: %d)\n\n" , msg, callerStackSize, calleeStackSize); |
6977 | } |
6978 | else |
6979 | { |
6980 | printf("%s\n\n" , msg); |
6981 | } |
6982 | } |
6983 | else |
6984 | { |
6985 | JITDUMP("[Fast tailcall decision]: %s\n" , msg); |
6986 | } |
6987 | #else |
6988 | (void)this; |
6989 | (void)callee; |
6990 | #endif // DEBUG |
6991 | }; |
6992 | |
6993 | // Note on vararg methods: |
6994 | // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. |
6995 | // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its |
6996 | // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as |
6997 | // out-going area required for callee is bounded by caller's fixed argument space. |
6998 | // |
6999 | // Note that callee being a vararg method is not a problem since we can account the params being passed. |
7000 | unsigned nCallerArgs = info.compArgsCount; |
7001 | |
7002 | size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount; |
7003 | size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount; |
7004 | |
7005 | // Count the callee args including implicit and hidden. |
7006 | // Note that GenericContext and VarargCookie are added by importer while |
7007 | // importing the call to gtCallArgs list along with explicit user args. |
7008 | size_t calleeArgRegCount = 0; |
7009 | size_t calleeFloatArgRegCount = 0; |
7010 | |
7011 | if (callee->gtCallObjp) // thisPtr |
7012 | { |
7013 | ++calleeArgRegCount; |
7014 | } |
7015 | |
7016 | if (callee->HasRetBufArg()) // RetBuf |
7017 | { |
7018 | // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs. |
7019 | |
7020 | // If callee has RetBuf param, caller too must have it. |
7021 | // Otherwise go the slow route. |
7022 | if (info.compRetBuffArg == BAD_VAR_NUM) |
7023 | { |
7024 | reportFastTailCallDecision("Callee has RetBuf but caller does not." , 0, 0); |
7025 | return false; |
7026 | } |
7027 | } |
7028 | |
7029 | // Count user args while tracking whether any of them is a multi-byte params |
7030 | // that cannot be passed in a register. Note that we don't need to count |
7031 | // non-standard and secret params passed in registers (e.g. R10, R11) since |
7032 | // these won't contribute to out-going arg size. |
7033 | // For each struct arg, hasMultiByteStackArgs will track if it can be passed in registers. |
7034 | // If it cannot we will break the loop and not fastTailCall. This is an implementation limitation |
7035 | // where the callee only is checked for non enregisterable structs. |
7036 | // It is tracked with https://github.com/dotnet/coreclr/issues/12644. |
7037 | bool hasMultiByteStackArgs = false; |
7038 | bool hasTwoSlotSizedStruct = false; |
7039 | bool hasHfaArg = false; |
7040 | size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have. |
7041 | size_t calleeStackSize = 0; |
7042 | for (GenTree* args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) |
7043 | { |
7044 | ++nCalleeArgs; |
7045 | assert(args->OperIsList()); |
7046 | GenTree* argx = args->gtOp.gtOp1; |
7047 | |
7048 | if (varTypeIsStruct(argx)) |
7049 | { |
7050 | // Actual arg may be a child of a GT_COMMA. Skip over comma opers. |
7051 | argx = argx->gtEffectiveVal(true /*commaOnly*/); |
7052 | |
7053 | // Get the size of the struct and see if it is register passable. |
7054 | CORINFO_CLASS_HANDLE objClass = nullptr; |
7055 | |
7056 | if (argx->OperGet() == GT_OBJ) |
7057 | { |
7058 | objClass = argx->AsObj()->gtClass; |
7059 | } |
7060 | else if (argx->IsLocal()) |
7061 | { |
7062 | objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle(); |
7063 | } |
7064 | if (objClass != nullptr) |
7065 | { |
7066 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) |
7067 | |
7068 | unsigned typeSize = 0; |
7069 | // We should have already broken out of the loop if we've set hasMultiByteStackArgs to true. |
7070 | assert(!hasMultiByteStackArgs); |
7071 | hasMultiByteStackArgs = |
7072 | !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false, false); |
7073 | |
7074 | #if defined(UNIX_AMD64_ABI) |
7075 | SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; |
7076 | |
7077 | assert(objClass != nullptr); |
7078 | eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); |
7079 | |
7080 | if (structDesc.passedInRegisters) |
7081 | { |
7082 | if (structDesc.eightByteCount == 2) |
7083 | { |
7084 | hasTwoSlotSizedStruct = true; |
7085 | } |
7086 | |
7087 | for (unsigned int i = 0; i < structDesc.eightByteCount; i++) |
7088 | { |
7089 | if (structDesc.IsIntegralSlot(i)) |
7090 | { |
7091 | ++calleeArgRegCount; |
7092 | } |
7093 | else if (structDesc.IsSseSlot(i)) |
7094 | { |
7095 | ++calleeFloatArgRegCount; |
7096 | } |
7097 | else |
7098 | { |
7099 | assert(false && "Invalid eightbyte classification type." ); |
7100 | break; |
7101 | } |
7102 | } |
7103 | } |
7104 | else |
7105 | { |
7106 | calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE); |
7107 | hasMultiByteStackArgs = true; |
7108 | } |
7109 | |
7110 | #elif defined(_TARGET_ARM64_) // ARM64 |
7111 | var_types hfaType = GetHfaType(argx); |
7112 | bool isHfaArg = varTypeIsFloating(hfaType); |
7113 | size_t size = 1; |
7114 | |
7115 | if (isHfaArg) |
7116 | { |
7117 | hasHfaArg = true; |
7118 | |
7119 | calleeFloatArgRegCount += GetHfaCount(argx); |
7120 | } |
7121 | else |
7122 | { |
7123 | // Structs are either passed in 1 or 2 (64-bit) slots |
7124 | size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE); |
7125 | size = roundupSize / TARGET_POINTER_SIZE; |
7126 | |
7127 | if (size > 2) |
7128 | { |
7129 | size = 1; |
7130 | } |
7131 | |
7132 | else if (size == 2) |
7133 | { |
7134 | hasTwoSlotSizedStruct = true; |
7135 | } |
7136 | |
7137 | calleeArgRegCount += size; |
7138 | } |
7139 | |
7140 | #elif defined(WINDOWS_AMD64_ABI) |
7141 | |
7142 | ++calleeArgRegCount; |
7143 | |
7144 | #endif // UNIX_AMD64_ABI |
7145 | |
7146 | #else |
7147 | assert(!"Target platform ABI rules regarding passing struct type args in registers" ); |
7148 | unreached(); |
7149 | #endif //_TARGET_AMD64_ || _TARGET_ARM64_ |
7150 | } |
7151 | else |
7152 | { |
7153 | hasMultiByteStackArgs = true; |
7154 | } |
7155 | } |
7156 | else |
7157 | { |
7158 | varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount; |
7159 | } |
7160 | |
7161 | // We can break early on multiByte cases. |
7162 | if (hasMultiByteStackArgs) |
7163 | { |
7164 | break; |
7165 | } |
7166 | } |
7167 | |
7168 | const unsigned maxRegArgs = MAX_REG_ARG; |
7169 | |
7170 | // If we reached here means that callee has only those argument types which can be passed in |
7171 | // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. |
7172 | // If we are passing args on stack for the callee and it has more args passed on stack than |
7173 | // the caller, then fast tail call cannot be performed. |
7174 | // |
7175 | // Note that the GC'ness of on stack args need not match since the arg setup area is marked |
7176 | // as non-interruptible for fast tail calls. |
7177 | |
7178 | #ifdef WINDOWS_AMD64_ABI |
7179 | assert(calleeStackSize == 0); |
7180 | size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs) |
7181 | ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs |
7182 | : 0; |
7183 | calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE; |
7184 | size_t callerStackSize = info.compArgStackSize; |
7185 | |
7186 | bool hasStackArgs = false; |
7187 | |
7188 | if (callerStackSize > 0 || calleeStackSize > 0) |
7189 | { |
7190 | hasStackArgs = true; |
7191 | } |
7192 | |
7193 | // Go the slow route, if it has multi-byte params. This is an implementation |
7194 | // limitatio see https://github.com/dotnet/coreclr/issues/12644. |
7195 | if (hasMultiByteStackArgs) |
7196 | { |
7197 | reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs" , callerStackSize, calleeStackSize); |
7198 | return false; |
7199 | } |
7200 | |
7201 | // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then |
7202 | // make sure the callee's incoming arguments is less than the caller's |
7203 | if (hasStackArgs && (nCalleeArgs > nCallerArgs)) |
7204 | { |
7205 | reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)" , callerStackSize, |
7206 | calleeStackSize); |
7207 | return false; |
7208 | } |
7209 | |
7210 | #elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
7211 | |
7212 | // For *nix Amd64 and Arm64 check to see if all arguments for the callee |
7213 | // and caller are passing in registers. If not, ensure that the outgoing argument stack size |
7214 | // requirement for the callee is less than or equal to the caller's entire stack frame usage. |
7215 | // |
7216 | // Also, in the case that we have to pass arguments on the stack make sure |
7217 | // that we are not dealing with structs that are >8 bytes. |
7218 | |
7219 | bool hasStackArgs = false; |
7220 | size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG; |
7221 | |
7222 | size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0; |
7223 | size_t calleeFloatStackArgCount = |
7224 | calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0; |
7225 | |
7226 | size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount; |
7227 | size_t callerStackSize = info.compArgStackSize; |
7228 | calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE; |
7229 | |
7230 | if (callerStackSize > 0 || calleeStackSize > 0) |
7231 | { |
7232 | hasStackArgs = true; |
7233 | } |
7234 | |
7235 | // Go the slow route, if it has multi-byte params. This is an implementation |
7236 | // limitation see https://github.com/dotnet/coreclr/issues/12644. |
7237 | if (hasMultiByteStackArgs) |
7238 | { |
7239 | reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs" , callerStackSize, calleeStackSize); |
7240 | return false; |
7241 | } |
7242 | |
7243 | // Callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not fastTailCall. |
7244 | if (calleeStackSize > 0 && hasTwoSlotSizedStruct) |
7245 | { |
7246 | reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct" , |
7247 | callerStackSize, calleeStackSize); |
7248 | return false; |
7249 | } |
7250 | |
7251 | // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall. |
7252 | if (calleeStackSize > 0 && hasHfaArg) |
7253 | { |
7254 | reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg" , callerStackSize, |
7255 | calleeStackSize); |
7256 | return false; |
7257 | } |
7258 | |
7259 | // TODO-AMD64-Unix |
7260 | // TODO-ARM64 |
7261 | // |
7262 | // LowerFastTailCall currently assumes nCalleeArgs <= nCallerArgs. This is |
7263 | // not true in many cases on x64 linux, remove this pessimization when |
7264 | // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468 |
7265 | // for more information. |
7266 | if (hasStackArgs && (nCalleeArgs > nCallerArgs)) |
7267 | { |
7268 | reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)" , callerStackSize, |
7269 | calleeStackSize); |
7270 | return false; |
7271 | } |
7272 | |
7273 | if (calleeStackSize > callerStackSize) |
7274 | { |
7275 | reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize" , callerStackSize, |
7276 | calleeStackSize); |
7277 | return false; |
7278 | } |
7279 | |
7280 | #else |
7281 | |
7282 | NYI("fastTailCall not supported on this Architecture." ); |
7283 | |
7284 | #endif // WINDOWS_AMD64_ABI |
7285 | |
7286 | reportFastTailCallDecision("Will fastTailCall" , callerStackSize, calleeStackSize); |
7287 | return true; |
7288 | #else // FEATURE_FASTTAILCALL |
7289 | return false; |
7290 | #endif |
7291 | } |
7292 | |
7293 | /***************************************************************************** |
7294 | * |
7295 | * Transform the given GT_CALL tree for tail call code generation. |
7296 | */ |
7297 | void Compiler::fgMorphTailCall(GenTreeCall* call, void* pfnCopyArgs) |
7298 | { |
7299 | JITDUMP("fgMorphTailCall (before):\n" ); |
7300 | DISPTREE(call); |
7301 | |
7302 | // The runtime requires that we perform a null check on the `this` argument before |
7303 | // tail calling to a virtual dispatch stub. This requirement is a consequence of limitations |
7304 | // in the runtime's ability to map an AV to a NullReferenceException if |
7305 | // the AV occurs in a dispatch stub that has unmanaged caller. |
7306 | if (call->IsVirtualStub()) |
7307 | { |
7308 | call->gtFlags |= GTF_CALL_NULLCHECK; |
7309 | } |
7310 | |
7311 | #if defined(_TARGET_ARM_) |
7312 | // For the helper-assisted tail calls, we need to push all the arguments |
7313 | // into a single list, and then add a few extra at the beginning |
7314 | |
7315 | // Check for PInvoke call types that we don't handle in codegen yet. |
7316 | assert(!call->IsUnmanaged()); |
7317 | assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL)); |
7318 | |
7319 | // First move the this pointer (if any) onto the regular arg list |
7320 | GenTree* thisPtr = NULL; |
7321 | if (call->gtCallObjp) |
7322 | { |
7323 | GenTree* objp = call->gtCallObjp; |
7324 | call->gtCallObjp = NULL; |
7325 | |
7326 | if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable()) |
7327 | { |
7328 | thisPtr = gtClone(objp, true); |
7329 | var_types vt = objp->TypeGet(); |
7330 | if (thisPtr == NULL) |
7331 | { |
7332 | // Too complex, so use a temp |
7333 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr" )); |
7334 | GenTree* asg = gtNewTempAssign(lclNum, objp); |
7335 | if (!call->IsVirtualVtable()) |
7336 | { |
7337 | // Add an indirection to get the nullcheck |
7338 | GenTree* tmp = gtNewLclvNode(lclNum, vt); |
7339 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp); |
7340 | asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind); |
7341 | } |
7342 | objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); |
7343 | thisPtr = gtNewLclvNode(lclNum, vt); |
7344 | } |
7345 | else if (!call->IsVirtualVtable()) |
7346 | { |
7347 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr); |
7348 | objp = gtNewOperNode(GT_COMMA, vt, ind, objp); |
7349 | thisPtr = gtClone(thisPtr, true); |
7350 | } |
7351 | |
7352 | call->gtFlags &= ~GTF_CALL_NULLCHECK; |
7353 | } |
7354 | |
7355 | call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs); |
7356 | } |
7357 | |
7358 | // Add the extra VSD parameter if needed |
7359 | if (call->IsVirtualStub()) |
7360 | { |
7361 | GenTree* stubAddrArg = fgGetStubAddrArg(call); |
7362 | // And push the stub address onto the list of arguments |
7363 | call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs); |
7364 | } |
7365 | else if (call->IsVirtualVtable()) |
7366 | { |
7367 | noway_assert(thisPtr != NULL); |
7368 | |
7369 | GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL)); |
7370 | GenTree* vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); |
7371 | vtbl->gtFlags |= GTF_EXCEPT; |
7372 | |
7373 | unsigned vtabOffsOfIndirection; |
7374 | unsigned vtabOffsAfterIndirection; |
7375 | bool isRelative; |
7376 | info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection, |
7377 | &isRelative); |
7378 | |
7379 | /* Get the appropriate vtable chunk */ |
7380 | |
7381 | if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK) |
7382 | { |
7383 | add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL)); |
7384 | |
7385 | GenTree* indOffTree = nullptr; |
7386 | |
7387 | if (isRelative) |
7388 | { |
7389 | indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, |
7390 | nullptr DEBUGARG("virtual table call" )); |
7391 | } |
7392 | |
7393 | vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); |
7394 | |
7395 | if (isRelative) |
7396 | { |
7397 | vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree); |
7398 | } |
7399 | } |
7400 | |
7401 | /* Now the appropriate vtable slot */ |
7402 | |
7403 | add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL)); |
7404 | |
7405 | GenTree* indOffTree = nullptr; |
7406 | |
7407 | if (isRelative) |
7408 | { |
7409 | indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, |
7410 | nullptr DEBUGARG("virtual table call 2" )); |
7411 | } |
7412 | |
7413 | vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); |
7414 | |
7415 | if (isRelative) |
7416 | { |
7417 | vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree); |
7418 | } |
7419 | |
7420 | // Switch this to a plain indirect call |
7421 | call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; |
7422 | assert(!call->IsVirtual()); |
7423 | call->gtCallType = CT_INDIRECT; |
7424 | |
7425 | call->gtCallAddr = vtbl; |
7426 | call->gtCallCookie = NULL; |
7427 | call->gtFlags |= GTF_EXCEPT; |
7428 | } |
7429 | |
7430 | // Now inject a placeholder for the real call target that codegen will generate |
7431 | GenTree* arg = gtNewIconNode(0, TYP_I_IMPL); |
7432 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
7433 | |
7434 | // Lastly inject the pointer for the copy routine |
7435 | noway_assert(pfnCopyArgs != nullptr); |
7436 | arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); |
7437 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
7438 | |
7439 | // It is now a varargs tail call |
7440 | call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; |
7441 | call->gtFlags &= ~GTF_CALL_POP_ARGS; |
7442 | |
7443 | #elif defined(_TARGET_XARCH_) |
7444 | |
7445 | // For the helper-assisted tail calls, we need to push all the arguments |
7446 | // into a single list, and then add a few extra at the beginning or end. |
7447 | // |
7448 | // For AMD64, the tailcall helper (JIT_TailCall) is defined as: |
7449 | // |
7450 | // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>) |
7451 | // |
7452 | // We need to add "copyRoutine" and "callTarget" extra params at the beginning. |
7453 | // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg |
7454 | // for callTarget here which will be replaced later with callTarget in tail call lowering. |
7455 | // |
7456 | // For x86, the tailcall helper is defined as: |
7457 | // |
7458 | // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* |
7459 | // callTarget) |
7460 | // |
7461 | // Note that the special arguments are on the stack, whereas the function arguments follow |
7462 | // the normal convention: there might be register arguments in ECX and EDX. The stack will |
7463 | // look like (highest address at the top): |
7464 | // first normal stack argument |
7465 | // ... |
7466 | // last normal stack argument |
7467 | // numberOfOldStackArgs |
7468 | // numberOfNewStackArgs |
7469 | // flags |
7470 | // callTarget |
7471 | // |
7472 | // Each special arg is 4 bytes. |
7473 | // |
7474 | // 'flags' is a bitmask where: |
7475 | // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all |
7476 | // callee-saved registers for tailcall functions. Note that the helper assumes |
7477 | // that the callee-saved registers live immediately below EBP, and must have been |
7478 | // pushed in this order: EDI, ESI, EBX. |
7479 | // 2 == call target is a virtual stub dispatch. |
7480 | // |
7481 | // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details |
7482 | // on the custom calling convention. |
7483 | |
7484 | // Check for PInvoke call types that we don't handle in codegen yet. |
7485 | assert(!call->IsUnmanaged()); |
7486 | assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr)); |
7487 | |
7488 | // Don't support tail calling helper methods |
7489 | assert(call->gtCallType != CT_HELPER); |
7490 | |
7491 | // We come this route only for tail prefixed calls that cannot be dispatched as |
7492 | // fast tail calls |
7493 | assert(!call->IsImplicitTailCall()); |
7494 | assert(!fgCanFastTailCall(call)); |
7495 | |
7496 | // First move the 'this' pointer (if any) onto the regular arg list. We do this because |
7497 | // we are going to prepend special arguments onto the argument list (for non-x86 platforms), |
7498 | // and thus shift where the 'this' pointer will be passed to a later argument slot. In |
7499 | // addition, for all platforms, we are going to change the call into a helper call. Our code |
7500 | // generation code for handling calls to helpers does not handle 'this' pointers. So, when we |
7501 | // do this transformation, we must explicitly create a null 'this' pointer check, if required, |
7502 | // since special 'this' pointer handling will no longer kick in. |
7503 | // |
7504 | // Some call types, such as virtual vtable calls, require creating a call address expression |
7505 | // that involves the "this" pointer. Lowering will sometimes create an embedded statement |
7506 | // to create a temporary that is assigned to the "this" pointer expression, and then use |
7507 | // that temp to create the call address expression. This temp creation embedded statement |
7508 | // will occur immediately before the "this" pointer argument, and then will be used for both |
7509 | // the "this" pointer argument as well as the call address expression. In the normal ordering, |
7510 | // the embedded statement establishing the "this" pointer temp will execute before both uses |
7511 | // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the |
7512 | // normal call argument list, and insert a placeholder which will hold the call address |
7513 | // expression. For non-x86, things are ok, because the order of execution of these is not |
7514 | // altered. However, for x86, the call address expression is inserted as the *last* argument |
7515 | // in the argument list, *after* the "this" pointer. It will be put on the stack, and be |
7516 | // evaluated first. To ensure we don't end up with out-of-order temp definition and use, |
7517 | // for those cases where call lowering creates an embedded form temp of "this", we will |
7518 | // create a temp here, early, that will later get morphed correctly. |
7519 | |
7520 | if (call->gtCallObjp) |
7521 | { |
7522 | GenTree* thisPtr = nullptr; |
7523 | GenTree* objp = call->gtCallObjp; |
7524 | call->gtCallObjp = nullptr; |
7525 | |
7526 | #ifdef _TARGET_X86_ |
7527 | if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal()) |
7528 | { |
7529 | // tmp = "this" |
7530 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr" )); |
7531 | GenTree* asg = gtNewTempAssign(lclNum, objp); |
7532 | |
7533 | // COMMA(tmp = "this", tmp) |
7534 | var_types vt = objp->TypeGet(); |
7535 | GenTree* tmp = gtNewLclvNode(lclNum, vt); |
7536 | thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp); |
7537 | |
7538 | objp = thisPtr; |
7539 | } |
7540 | #endif // _TARGET_X86_ |
7541 | |
7542 | if (call->NeedsNullCheck()) |
7543 | { |
7544 | // clone "this" if "this" has no side effects. |
7545 | if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT)) |
7546 | { |
7547 | thisPtr = gtClone(objp, true); |
7548 | } |
7549 | |
7550 | var_types vt = objp->TypeGet(); |
7551 | if (thisPtr == nullptr) |
7552 | { |
7553 | // create a temp if either "this" has side effects or "this" is too complex to clone. |
7554 | |
7555 | // tmp = "this" |
7556 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr" )); |
7557 | GenTree* asg = gtNewTempAssign(lclNum, objp); |
7558 | |
7559 | // COMMA(tmp = "this", deref(tmp)) |
7560 | GenTree* tmp = gtNewLclvNode(lclNum, vt); |
7561 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp); |
7562 | asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind); |
7563 | |
7564 | // COMMA(COMMA(tmp = "this", deref(tmp)), tmp) |
7565 | thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); |
7566 | } |
7567 | else |
7568 | { |
7569 | // thisPtr = COMMA(deref("this"), "this") |
7570 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr); |
7571 | thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true)); |
7572 | } |
7573 | |
7574 | call->gtFlags &= ~GTF_CALL_NULLCHECK; |
7575 | } |
7576 | else |
7577 | { |
7578 | thisPtr = objp; |
7579 | } |
7580 | |
7581 | // During rationalization tmp="this" and null check will |
7582 | // materialize as embedded stmts in right execution order. |
7583 | assert(thisPtr != nullptr); |
7584 | call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs); |
7585 | } |
7586 | |
7587 | #if defined(_TARGET_AMD64_) |
7588 | |
7589 | // Add the extra VSD parameter to arg list in case of VSD calls. |
7590 | // Tail call arg copying thunk will move this extra VSD parameter |
7591 | // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk() |
7592 | // in Stublinkerx86.cpp for more details. |
7593 | if (call->IsVirtualStub()) |
7594 | { |
7595 | GenTree* stubAddrArg = fgGetStubAddrArg(call); |
7596 | // And push the stub address onto the list of arguments |
7597 | call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs); |
7598 | } |
7599 | |
7600 | // Now inject a placeholder for the real call target that Lower phase will generate. |
7601 | GenTree* arg = gtNewIconNode(0, TYP_I_IMPL); |
7602 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
7603 | |
7604 | // Inject the pointer for the copy routine to be used for struct copying |
7605 | noway_assert(pfnCopyArgs != nullptr); |
7606 | arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); |
7607 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
7608 | |
7609 | #else // !_TARGET_AMD64_ |
7610 | |
7611 | // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will |
7612 | // append to the list. |
7613 | GenTreeArgList** ppArg = &call->gtCallArgs; |
7614 | for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest()) |
7615 | { |
7616 | ppArg = (GenTreeArgList**)&args->gtOp2; |
7617 | } |
7618 | assert(ppArg != nullptr); |
7619 | assert(*ppArg == nullptr); |
7620 | |
7621 | unsigned nOldStkArgsWords = |
7622 | (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; |
7623 | GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); |
7624 | *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs |
7625 | ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); |
7626 | |
7627 | // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. |
7628 | // The constant will be replaced. |
7629 | GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL); |
7630 | *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs |
7631 | ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); |
7632 | |
7633 | // Inject a placeholder for the flags. |
7634 | // The constant will be replaced. |
7635 | GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL); |
7636 | *ppArg = gtNewListNode(arg1, nullptr); |
7637 | ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); |
7638 | |
7639 | // Inject a placeholder for the real call target that the Lowering phase will generate. |
7640 | // The constant will be replaced. |
7641 | GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL); |
7642 | *ppArg = gtNewListNode(arg0, nullptr); |
7643 | |
7644 | #endif // !_TARGET_AMD64_ |
7645 | |
7646 | // It is now a varargs tail call dispatched via helper. |
7647 | call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; |
7648 | call->gtFlags &= ~GTF_CALL_POP_ARGS; |
7649 | |
7650 | #elif defined(_TARGET_ARM64_) |
7651 | NYI_ARM64("Tail calls via stub are unsupported on this platform." ); |
7652 | #endif // _TARGET_ARM64_ |
7653 | |
7654 | // The function is responsible for doing explicit null check when it is necessary. |
7655 | assert(!call->NeedsNullCheck()); |
7656 | |
7657 | JITDUMP("fgMorphTailCall (after):\n" ); |
7658 | DISPTREE(call); |
7659 | } |
7660 | |
7661 | //------------------------------------------------------------------------ |
7662 | // fgGetStubAddrArg: Return the virtual stub address for the given call. |
7663 | // |
7664 | // Notes: |
7665 | // the JIT must place the address of the stub used to load the call target, |
7666 | // the "stub indirection cell", in special call argument with special register. |
7667 | // |
7668 | // Arguments: |
7669 | // call - a call that needs virtual stub dispatching. |
7670 | // |
7671 | // Return Value: |
7672 | // addr tree with set resister requirements. |
7673 | // |
7674 | GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) |
7675 | { |
7676 | assert(call->IsVirtualStub()); |
7677 | GenTree* stubAddrArg; |
7678 | if (call->gtCallType == CT_INDIRECT) |
7679 | { |
7680 | stubAddrArg = gtClone(call->gtCallAddr, true); |
7681 | } |
7682 | else |
7683 | { |
7684 | assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT); |
7685 | ssize_t addr = ssize_t(call->gtStubCallStubAddr); |
7686 | stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); |
7687 | } |
7688 | assert(stubAddrArg != nullptr); |
7689 | stubAddrArg->gtRegNum = virtualStubParamInfo->GetReg(); |
7690 | return stubAddrArg; |
7691 | } |
7692 | |
7693 | //------------------------------------------------------------------------------ |
7694 | // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop. |
7695 | // |
7696 | // |
7697 | // Arguments: |
7698 | // block - basic block ending with a recursive fast tail call |
7699 | // recursiveTailCall - recursive tail call to transform |
7700 | // |
7701 | // Notes: |
7702 | // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop. |
7703 | |
7704 | void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall) |
7705 | { |
7706 | assert(recursiveTailCall->IsTailCallConvertibleToLoop()); |
7707 | GenTree* last = block->lastStmt(); |
7708 | assert(recursiveTailCall == last->gtStmt.gtStmtExpr); |
7709 | |
7710 | // Transform recursive tail call into a loop. |
7711 | |
7712 | GenTree* earlyArgInsertionPoint = last; |
7713 | IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx; |
7714 | |
7715 | // Hoist arg setup statement for the 'this' argument. |
7716 | GenTree* thisArg = recursiveTailCall->gtCallObjp; |
7717 | if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode()) |
7718 | { |
7719 | GenTree* thisArgStmt = gtNewStmt(thisArg, callILOffset); |
7720 | fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt); |
7721 | } |
7722 | |
7723 | // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first; |
7724 | // then the temps need to be assigned to the method parameters. This is done so that the caller |
7725 | // parameters are not re-assigned before call arguments depending on them are evaluated. |
7726 | // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of |
7727 | // where the next temp or parameter assignment should be inserted. |
7728 | |
7729 | // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first |
7730 | // while the second call argument (const 1) doesn't. |
7731 | // Basic block before tail recursion elimination: |
7732 | // ***** BB04, stmt 1 (top level) |
7733 | // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013) |
7734 | // [000033] --C - G------ - \--* call void RecursiveMethod |
7735 | // [000030] ------------ | / --* const int - 1 |
7736 | // [000031] ------------arg0 in rcx + --* +int |
7737 | // [000029] ------------ | \--* lclVar int V00 arg1 |
7738 | // [000032] ------------arg1 in rdx \--* const int 1 |
7739 | // |
7740 | // |
7741 | // Basic block after tail recursion elimination : |
7742 | // ***** BB04, stmt 1 (top level) |
7743 | // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) |
7744 | // [000030] ------------ | / --* const int - 1 |
7745 | // [000031] ------------ | / --* +int |
7746 | // [000029] ------------ | | \--* lclVar int V00 arg1 |
7747 | // [000050] - A---------- \--* = int |
7748 | // [000049] D------N---- \--* lclVar int V02 tmp0 |
7749 | // |
7750 | // ***** BB04, stmt 2 (top level) |
7751 | // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) |
7752 | // [000052] ------------ | / --* lclVar int V02 tmp0 |
7753 | // [000054] - A---------- \--* = int |
7754 | // [000053] D------N---- \--* lclVar int V00 arg0 |
7755 | |
7756 | // ***** BB04, stmt 3 (top level) |
7757 | // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) |
7758 | // [000032] ------------ | / --* const int 1 |
7759 | // [000057] - A---------- \--* = int |
7760 | // [000056] D------N---- \--* lclVar int V01 arg1 |
7761 | |
7762 | GenTree* tmpAssignmentInsertionPoint = last; |
7763 | GenTree* paramAssignmentInsertionPoint = last; |
7764 | |
7765 | // Process early args. They may contain both setup statements for late args and actual args. |
7766 | // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum |
7767 | // below has the correct second argument. |
7768 | int earlyArgIndex = (thisArg == nullptr) ? 0 : 1; |
7769 | for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr; |
7770 | (earlyArgIndex++, earlyArgs = earlyArgs->Rest())) |
7771 | { |
7772 | GenTree* earlyArg = earlyArgs->Current(); |
7773 | if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode()) |
7774 | { |
7775 | if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0) |
7776 | { |
7777 | // This is a setup node so we need to hoist it. |
7778 | GenTree* earlyArgStmt = gtNewStmt(earlyArg, callILOffset); |
7779 | fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt); |
7780 | } |
7781 | else |
7782 | { |
7783 | // This is an actual argument that needs to be assigned to the corresponding caller parameter. |
7784 | fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); |
7785 | GenTree* paramAssignStmt = |
7786 | fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset, |
7787 | tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); |
7788 | if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr)) |
7789 | { |
7790 | // All temp assignments will happen before the first param assignment. |
7791 | tmpAssignmentInsertionPoint = paramAssignStmt; |
7792 | } |
7793 | } |
7794 | } |
7795 | } |
7796 | |
7797 | // Process late args. |
7798 | int lateArgIndex = 0; |
7799 | for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr; |
7800 | (lateArgIndex++, lateArgs = lateArgs->Rest())) |
7801 | { |
7802 | // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. |
7803 | GenTree* lateArg = lateArgs->Current(); |
7804 | fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); |
7805 | GenTree* paramAssignStmt = |
7806 | fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset, |
7807 | tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); |
7808 | |
7809 | if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr)) |
7810 | { |
7811 | // All temp assignments will happen before the first param assignment. |
7812 | tmpAssignmentInsertionPoint = paramAssignStmt; |
7813 | } |
7814 | } |
7815 | |
7816 | // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that |
7817 | // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that |
7818 | // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here. |
7819 | if (!info.compIsStatic && (lvaArg0Var != info.compThisArg)) |
7820 | { |
7821 | var_types thisType = lvaTable[info.compThisArg].TypeGet(); |
7822 | GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType); |
7823 | GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType)); |
7824 | GenTree* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset); |
7825 | fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt); |
7826 | } |
7827 | |
7828 | // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog |
7829 | // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization |
7830 | // for all non-parameter IL locals as well as temp structs with GC fields. |
7831 | // Liveness phase will remove unnecessary initializations. |
7832 | if (info.compInitMem) |
7833 | { |
7834 | unsigned varNum; |
7835 | LclVarDsc* varDsc; |
7836 | for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) |
7837 | { |
7838 | #if FEATURE_FIXED_OUT_ARGS |
7839 | if (varNum == lvaOutgoingArgSpaceVar) |
7840 | { |
7841 | continue; |
7842 | } |
7843 | #endif // FEATURE_FIXED_OUT_ARGS |
7844 | if (!varDsc->lvIsParam) |
7845 | { |
7846 | var_types lclType = varDsc->TypeGet(); |
7847 | bool isUserLocal = (varNum < info.compLocalsCount); |
7848 | bool structWithGCFields = ((lclType == TYP_STRUCT) && (varDsc->lvStructGcCount > 0)); |
7849 | if (isUserLocal || structWithGCFields) |
7850 | { |
7851 | GenTree* lcl = gtNewLclvNode(varNum, lclType); |
7852 | GenTree* init = nullptr; |
7853 | if (varTypeIsStruct(lclType)) |
7854 | { |
7855 | const bool isVolatile = false; |
7856 | const bool isCopyBlock = false; |
7857 | init = gtNewBlkOpNode(lcl, gtNewIconNode(0), varDsc->lvSize(), isVolatile, isCopyBlock); |
7858 | init = fgMorphInitBlock(init); |
7859 | } |
7860 | else |
7861 | { |
7862 | GenTree* zero = gtNewZeroConNode(genActualType(lclType)); |
7863 | init = gtNewAssignNode(lcl, zero); |
7864 | } |
7865 | GenTree* initStmt = gtNewStmt(init, callILOffset); |
7866 | fgInsertStmtBefore(block, last, initStmt); |
7867 | } |
7868 | } |
7869 | } |
7870 | } |
7871 | |
7872 | // Remove the call |
7873 | fgRemoveStmt(block, last); |
7874 | |
7875 | // Set the loop edge. Ensure we have a scratch block and then target the |
7876 | // next block. Loop detection needs to see a pred out of the loop, so |
7877 | // mark the scratch block BBF_DONT_REMOVE to prevent empty block removal |
7878 | // on it. |
7879 | fgEnsureFirstBBisScratch(); |
7880 | fgFirstBB->bbFlags |= BBF_DONT_REMOVE; |
7881 | block->bbJumpKind = BBJ_ALWAYS; |
7882 | block->bbJumpDest = fgFirstBB->bbNext; |
7883 | fgAddRefPred(block->bbJumpDest, block); |
7884 | block->bbFlags &= ~BBF_HAS_JMP; |
7885 | } |
7886 | |
7887 | //------------------------------------------------------------------------------ |
7888 | // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter. |
7889 | // |
7890 | // |
7891 | // Arguments: |
7892 | // arg - argument to assign |
7893 | // argTabEntry - argument table entry corresponding to arg |
7894 | // block --- basic block the call is in |
7895 | // callILOffset - IL offset of the call |
7896 | // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary) |
7897 | // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted |
7898 | // |
7899 | // Return Value: |
7900 | // parameter assignment statement if one was inserted; nullptr otherwise. |
7901 | |
7902 | GenTree* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, |
7903 | fgArgTabEntry* argTabEntry, |
7904 | BasicBlock* block, |
7905 | IL_OFFSETX callILOffset, |
7906 | GenTree* tmpAssignmentInsertionPoint, |
7907 | GenTree* paramAssignmentInsertionPoint) |
7908 | { |
7909 | // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because |
7910 | // some argument trees may reference parameters directly. |
7911 | |
7912 | GenTree* argInTemp = nullptr; |
7913 | unsigned originalArgNum = argTabEntry->argNum; |
7914 | bool needToAssignParameter = true; |
7915 | |
7916 | // TODO-CQ: enable calls with struct arguments passed in registers. |
7917 | noway_assert(!varTypeIsStruct(arg->TypeGet())); |
7918 | |
7919 | if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) |
7920 | { |
7921 | // The argument is already assigned to a temp or is a const. |
7922 | argInTemp = arg; |
7923 | } |
7924 | else if (arg->OperGet() == GT_LCL_VAR) |
7925 | { |
7926 | unsigned lclNum = arg->AsLclVar()->gtLclNum; |
7927 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
7928 | if (!varDsc->lvIsParam) |
7929 | { |
7930 | // The argument is a non-parameter local so it doesn't need to be assigned to a temp. |
7931 | argInTemp = arg; |
7932 | } |
7933 | else if (lclNum == originalArgNum) |
7934 | { |
7935 | // The argument is the same parameter local that we were about to assign so |
7936 | // we can skip the assignment. |
7937 | needToAssignParameter = false; |
7938 | } |
7939 | } |
7940 | |
7941 | // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve |
7942 | // any caller parameters. Some common cases are handled above but we may be able to eliminate |
7943 | // more temp assignments. |
7944 | |
7945 | GenTree* paramAssignStmt = nullptr; |
7946 | if (needToAssignParameter) |
7947 | { |
7948 | if (argInTemp == nullptr) |
7949 | { |
7950 | // The argument is not assigned to a temp. We need to create a new temp and insert an assignment. |
7951 | // TODO: we can avoid a temp assignment if we can prove that the argument tree |
7952 | // doesn't involve any caller parameters. |
7953 | unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp" )); |
7954 | lvaTable[tmpNum].lvType = arg->gtType; |
7955 | GenTree* tempSrc = arg; |
7956 | GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType); |
7957 | GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc); |
7958 | GenTree* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset); |
7959 | fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt); |
7960 | argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType); |
7961 | } |
7962 | |
7963 | // Now assign the temp to the parameter. |
7964 | LclVarDsc* paramDsc = lvaTable + originalArgNum; |
7965 | assert(paramDsc->lvIsParam); |
7966 | GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType); |
7967 | GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp); |
7968 | paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset); |
7969 | |
7970 | fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt); |
7971 | } |
7972 | return paramAssignStmt; |
7973 | } |
7974 | |
7975 | /***************************************************************************** |
7976 | * |
7977 | * Transform the given GT_CALL tree for code generation. |
7978 | */ |
7979 | |
7980 | GenTree* Compiler::fgMorphCall(GenTreeCall* call) |
7981 | { |
7982 | if (varTypeIsStruct(call)) |
7983 | { |
7984 | fgFixupStructReturn(call); |
7985 | } |
7986 | if (call->CanTailCall()) |
7987 | { |
7988 | // It should either be an explicit (i.e. tail prefixed) or an implicit tail call |
7989 | assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall()); |
7990 | |
7991 | // It cannot be an inline candidate |
7992 | assert(!call->IsInlineCandidate()); |
7993 | |
7994 | const char* szFailReason = nullptr; |
7995 | bool hasStructParam = false; |
7996 | if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) |
7997 | { |
7998 | szFailReason = "Might turn into an intrinsic" ; |
7999 | } |
8000 | |
8001 | if (opts.compNeedSecurityCheck) |
8002 | { |
8003 | szFailReason = "Needs security check" ; |
8004 | } |
8005 | else if (compLocallocUsed || compLocallocOptimized) |
8006 | { |
8007 | szFailReason = "Localloc used" ; |
8008 | } |
8009 | #ifdef _TARGET_AMD64_ |
8010 | // Needed for Jit64 compat. |
8011 | // In future, enabling tail calls from methods that need GS cookie check |
8012 | // would require codegen side work to emit GS cookie check before a tail |
8013 | // call. |
8014 | else if (getNeedsGSSecurityCookie()) |
8015 | { |
8016 | szFailReason = "GS Security cookie check" ; |
8017 | } |
8018 | #endif |
8019 | #ifdef DEBUG |
8020 | // DDB 99324: Just disable tailcall under compGcChecks stress mode. |
8021 | else if (opts.compGcChecks) |
8022 | { |
8023 | szFailReason = "GcChecks" ; |
8024 | } |
8025 | #endif |
8026 | #if FEATURE_TAILCALL_OPT |
8027 | else |
8028 | { |
8029 | // We are still not sure whether it can be a tail call. Because, when converting |
8030 | // a call to an implicit tail call, we must check that there are no locals with |
8031 | // their address taken. If this is the case, we have to assume that the address |
8032 | // has been leaked and the current stack frame must live until after the final |
8033 | // call. |
8034 | |
8035 | // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note |
8036 | // that lvHasLdAddrOp is much more conservative. We cannot just base it on |
8037 | // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs |
8038 | // during morph stage. The reason for also checking lvAddrExposed is that in case |
8039 | // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp. |
8040 | // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us |
8041 | // never to be incorrect. |
8042 | // |
8043 | // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose |
8044 | // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed |
8045 | // is set. This avoids the need for iterating through all lcl vars of the current |
8046 | // method. Right now throughout the code base we are not consistently using 'set' |
8047 | // method to set lvHasLdAddrOp and lvAddrExposed flags. |
8048 | unsigned varNum; |
8049 | LclVarDsc* varDsc; |
8050 | bool hasAddrExposedVars = false; |
8051 | bool hasStructPromotedParam = false; |
8052 | bool hasPinnedVars = false; |
8053 | |
8054 | for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) |
8055 | { |
8056 | // If the method is marked as an explicit tail call we will skip the |
8057 | // following three hazard checks. |
8058 | // We still must check for any struct parameters and set 'hasStructParam' |
8059 | // so that we won't transform the recursive tail call into a loop. |
8060 | // |
8061 | if (call->IsImplicitTailCall()) |
8062 | { |
8063 | if (varDsc->lvHasLdAddrOp) |
8064 | { |
8065 | hasAddrExposedVars = true; |
8066 | break; |
8067 | } |
8068 | if (varDsc->lvAddrExposed) |
8069 | { |
8070 | if (lvaIsImplicitByRefLocal(varNum)) |
8071 | { |
8072 | // The address of the implicit-byref is a non-address use of the pointer parameter. |
8073 | } |
8074 | else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl)) |
8075 | { |
8076 | // The address of the implicit-byref's field is likewise a non-address use of the pointer |
8077 | // parameter. |
8078 | } |
8079 | else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum)) |
8080 | { |
8081 | // This temp was used for struct promotion bookkeeping. It will not be used, and will have |
8082 | // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs. |
8083 | assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl)); |
8084 | assert(fgGlobalMorph); |
8085 | } |
8086 | else |
8087 | { |
8088 | hasAddrExposedVars = true; |
8089 | break; |
8090 | } |
8091 | } |
8092 | if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum)) |
8093 | { |
8094 | hasStructPromotedParam = true; |
8095 | break; |
8096 | } |
8097 | if (varDsc->lvPinned) |
8098 | { |
8099 | // A tail call removes the method from the stack, which means the pinning |
8100 | // goes away for the callee. We can't allow that. |
8101 | hasPinnedVars = true; |
8102 | break; |
8103 | } |
8104 | } |
8105 | if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam) |
8106 | { |
8107 | hasStructParam = true; |
8108 | // This prevents transforming a recursive tail call into a loop |
8109 | // but doesn't prevent tail call optimization so we need to |
8110 | // look at the rest of parameters. |
8111 | continue; |
8112 | } |
8113 | } |
8114 | |
8115 | if (hasAddrExposedVars) |
8116 | { |
8117 | szFailReason = "Local address taken" ; |
8118 | } |
8119 | if (hasStructPromotedParam) |
8120 | { |
8121 | szFailReason = "Has Struct Promoted Param" ; |
8122 | } |
8123 | if (hasPinnedVars) |
8124 | { |
8125 | szFailReason = "Has Pinned Vars" ; |
8126 | } |
8127 | } |
8128 | #endif // FEATURE_TAILCALL_OPT |
8129 | |
8130 | var_types callType = call->TypeGet(); |
8131 | |
8132 | // We have to ensure to pass the incoming retValBuf as the |
8133 | // outgoing one. Using a temp will not do as this function will |
8134 | // not regain control to do the copy. |
8135 | |
8136 | if (info.compRetBuffArg != BAD_VAR_NUM) |
8137 | { |
8138 | noway_assert(callType == TYP_VOID); |
8139 | GenTree* retValBuf = call->gtCallArgs->gtOp.gtOp1; |
8140 | if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg) |
8141 | { |
8142 | szFailReason = "Need to copy return buffer" ; |
8143 | } |
8144 | } |
8145 | |
8146 | // If this is an opportunistic tail call and cannot be dispatched as |
8147 | // fast tail call, go the non-tail call route. This is done for perf |
8148 | // reason. |
8149 | // |
8150 | // Avoid the cost of determining whether can be dispatched as fast tail |
8151 | // call if we already know that tail call cannot be honored for other |
8152 | // reasons. |
8153 | bool canFastTailCall = false; |
8154 | if (szFailReason == nullptr) |
8155 | { |
8156 | canFastTailCall = fgCanFastTailCall(call); |
8157 | if (!canFastTailCall) |
8158 | { |
8159 | // Implicit or opportunistic tail calls are always dispatched via fast tail call |
8160 | // mechanism and never via tail call helper for perf. |
8161 | if (call->IsImplicitTailCall()) |
8162 | { |
8163 | szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp" ; |
8164 | } |
8165 | else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this)) |
8166 | { |
8167 | // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be |
8168 | // dispatched as a fast tail call. |
8169 | |
8170 | // Methods with non-standard args will have indirection cell or cookie param passed |
8171 | // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before |
8172 | // tail calling the target method and hence ".tail" prefix on such calls needs to be |
8173 | // ignored. |
8174 | // |
8175 | // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require |
8176 | // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper. |
8177 | // This is done by by adding stubAddr as an additional arg before the original list of |
8178 | // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk() |
8179 | // in Stublinkerx86.cpp. |
8180 | szFailReason = "Method with non-standard args passed in callee trash register cannot be tail " |
8181 | "called via helper" ; |
8182 | } |
8183 | #ifdef _TARGET_ARM64_ |
8184 | else |
8185 | { |
8186 | // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER. |
8187 | // So, bail out if we can't make fast tail call. |
8188 | szFailReason = "Non-qualified fast tail call" ; |
8189 | } |
8190 | #endif |
8191 | } |
8192 | } |
8193 | |
8194 | // Clear these flags before calling fgMorphCall() to avoid recursion. |
8195 | bool isTailPrefixed = call->IsTailPrefixedCall(); |
8196 | call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; |
8197 | |
8198 | #if FEATURE_TAILCALL_OPT |
8199 | call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; |
8200 | #endif |
8201 | |
8202 | if (szFailReason == nullptr) |
8203 | { |
8204 | if (!fgCheckStmtAfterTailCall()) |
8205 | { |
8206 | szFailReason = "Unexpected statements after the tail call" ; |
8207 | } |
8208 | } |
8209 | |
8210 | void* pfnCopyArgs = nullptr; |
8211 | #if !defined(_TARGET_X86_) |
8212 | if (!canFastTailCall && szFailReason == nullptr) |
8213 | { |
8214 | pfnCopyArgs = |
8215 | info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, call->IsVirtualStub() |
8216 | ? CORINFO_TAILCALL_STUB_DISPATCH_ARG |
8217 | : CORINFO_TAILCALL_NORMAL); |
8218 | if (pfnCopyArgs == nullptr) |
8219 | { |
8220 | if (!info.compMatchedVM) |
8221 | { |
8222 | // If we don't have a matched VM, we won't get valid results when asking for a thunk. |
8223 | pfnCopyArgs = UlongToPtr(0xCA11CA11); // "callcall" |
8224 | } |
8225 | else |
8226 | { |
8227 | szFailReason = "TailCallCopyArgsThunk not available." ; |
8228 | } |
8229 | } |
8230 | } |
8231 | #endif // !_TARGET_X86_ |
8232 | |
8233 | if (szFailReason != nullptr) |
8234 | { |
8235 | #ifdef DEBUG |
8236 | if (verbose) |
8237 | { |
8238 | printf("\nRejecting tail call late for call " ); |
8239 | printTreeID(call); |
8240 | printf(": %s\n" , szFailReason); |
8241 | } |
8242 | #endif |
8243 | |
8244 | // for non user funcs, we have no handles to report |
8245 | info.compCompHnd->reportTailCallDecision(nullptr, |
8246 | (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, |
8247 | isTailPrefixed, TAILCALL_FAIL, szFailReason); |
8248 | |
8249 | goto NO_TAIL_CALL; |
8250 | } |
8251 | |
8252 | #if !FEATURE_TAILCALL_OPT_SHARED_RETURN |
8253 | // We enable shared-ret tail call optimization for recursive calls even if |
8254 | // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. |
8255 | if (gtIsRecursiveCall(call)) |
8256 | #endif |
8257 | { |
8258 | // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN, |
8259 | // but if the call falls through to a ret, and we are doing a tailcall, change it here. |
8260 | if (compCurBB->bbJumpKind != BBJ_RETURN) |
8261 | { |
8262 | compCurBB->bbJumpKind = BBJ_RETURN; |
8263 | } |
8264 | } |
8265 | |
8266 | // Set this flag before calling fgMorphCall() to prevent inlining this call. |
8267 | call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; |
8268 | |
8269 | bool fastTailCallToLoop = false; |
8270 | #if FEATURE_TAILCALL_OPT |
8271 | // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register |
8272 | // or return type is a struct that can be passed in a register. |
8273 | // |
8274 | // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through |
8275 | // hidden generic context param or through keep alive thisptr), then while transforming a recursive |
8276 | // call to such a method requires that the generic context stored on stack slot be updated. Right now, |
8277 | // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming |
8278 | // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the |
8279 | // generic type parameters of both caller and callee generic method are the same. |
8280 | if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() && |
8281 | !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && |
8282 | !varTypeIsStruct(call->TypeGet()) && ((info.compClassAttr & CORINFO_FLG_MARSHAL_BYREF) == 0)) |
8283 | { |
8284 | call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP; |
8285 | fastTailCallToLoop = true; |
8286 | } |
8287 | #endif |
8288 | |
8289 | // Do some target-specific transformations (before we process the args, etc.) |
8290 | // This is needed only for tail prefixed calls that cannot be dispatched as |
8291 | // fast calls. |
8292 | if (!canFastTailCall) |
8293 | { |
8294 | fgMorphTailCall(call, pfnCopyArgs); |
8295 | } |
8296 | |
8297 | // Implementation note : If we optimize tailcall to do a direct jump |
8298 | // to the target function (after stomping on the return address, etc), |
8299 | // without using CORINFO_HELP_TAILCALL, we have to make certain that |
8300 | // we don't starve the hijacking logic (by stomping on the hijacked |
8301 | // return address etc). |
8302 | |
8303 | // At this point, we are committed to do the tailcall. |
8304 | compTailCallUsed = true; |
8305 | |
8306 | CorInfoTailCall tailCallResult; |
8307 | |
8308 | if (fastTailCallToLoop) |
8309 | { |
8310 | tailCallResult = TAILCALL_RECURSIVE; |
8311 | } |
8312 | else if (canFastTailCall) |
8313 | { |
8314 | tailCallResult = TAILCALL_OPTIMIZED; |
8315 | } |
8316 | else |
8317 | { |
8318 | tailCallResult = TAILCALL_HELPER; |
8319 | } |
8320 | |
8321 | // for non user funcs, we have no handles to report |
8322 | info.compCompHnd->reportTailCallDecision(nullptr, |
8323 | (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, |
8324 | isTailPrefixed, tailCallResult, nullptr); |
8325 | |
8326 | // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID. |
8327 | // to avoid doing any extra work for the return value. |
8328 | call->gtType = TYP_VOID; |
8329 | |
8330 | #ifdef DEBUG |
8331 | if (verbose) |
8332 | { |
8333 | printf("\nGTF_CALL_M_TAILCALL bit set for call " ); |
8334 | printTreeID(call); |
8335 | printf("\n" ); |
8336 | if (fastTailCallToLoop) |
8337 | { |
8338 | printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call " ); |
8339 | printTreeID(call); |
8340 | printf("\n" ); |
8341 | } |
8342 | } |
8343 | #endif |
8344 | |
8345 | GenTree* stmtExpr = fgMorphStmt->gtStmtExpr; |
8346 | |
8347 | #ifdef DEBUG |
8348 | // Tail call needs to be in one of the following IR forms |
8349 | // Either a call stmt or |
8350 | // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..))) |
8351 | // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..))) |
8352 | // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP) |
8353 | // In the above, |
8354 | // GT_CASTS may be nested. |
8355 | genTreeOps stmtOper = stmtExpr->gtOper; |
8356 | if (stmtOper == GT_CALL) |
8357 | { |
8358 | assert(stmtExpr == call); |
8359 | } |
8360 | else |
8361 | { |
8362 | assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA); |
8363 | GenTree* treeWithCall; |
8364 | if (stmtOper == GT_RETURN) |
8365 | { |
8366 | treeWithCall = stmtExpr->gtGetOp1(); |
8367 | } |
8368 | else if (stmtOper == GT_COMMA) |
8369 | { |
8370 | // Second operation must be nop. |
8371 | assert(stmtExpr->gtGetOp2()->IsNothingNode()); |
8372 | treeWithCall = stmtExpr->gtGetOp1(); |
8373 | } |
8374 | else |
8375 | { |
8376 | treeWithCall = stmtExpr->gtGetOp2(); |
8377 | } |
8378 | |
8379 | // Peel off casts |
8380 | while (treeWithCall->gtOper == GT_CAST) |
8381 | { |
8382 | assert(!treeWithCall->gtOverflow()); |
8383 | treeWithCall = treeWithCall->gtGetOp1(); |
8384 | } |
8385 | |
8386 | assert(treeWithCall == call); |
8387 | } |
8388 | #endif |
8389 | GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt; |
8390 | // Remove all stmts after the call. |
8391 | while (nextMorphStmt != nullptr) |
8392 | { |
8393 | GenTreeStmt* stmtToRemove = nextMorphStmt; |
8394 | nextMorphStmt = stmtToRemove->gtNextStmt; |
8395 | fgRemoveStmt(compCurBB, stmtToRemove); |
8396 | } |
8397 | |
8398 | fgMorphStmt->gtStmtExpr = call; |
8399 | |
8400 | // Tail call via helper: The VM can't use return address hijacking if we're |
8401 | // not going to return and the helper doesn't have enough info to safely poll, |
8402 | // so we poll before the tail call, if the block isn't already safe. Since |
8403 | // tail call via helper is a slow mechanism it doen't matter whether we emit |
8404 | // GC poll. This is done to be in parity with Jit64. Also this avoids GC info |
8405 | // size increase if all most all methods are expected to be tail calls (e.g. F#). |
8406 | // |
8407 | // Note that we can avoid emitting GC-poll if we know that the current BB is |
8408 | // dominated by a Gc-SafePoint block. But we don't have dominator info at this |
8409 | // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL) |
8410 | // here and remove it in lowering if the block is dominated by a GC-SafePoint. For |
8411 | // now it not clear whether optimizing slow tail calls is worth the effort. As a |
8412 | // low cost check, we check whether the first and current basic blocks are |
8413 | // GC-SafePoints. |
8414 | // |
8415 | // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder() |
8416 | // is going to mark the method as fully interruptible if the block containing this tail |
8417 | // call is reachable without executing any call. |
8418 | if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) || |
8419 | !fgCreateGCPoll(GCPOLL_INLINE, compCurBB)) |
8420 | { |
8421 | // We didn't insert a poll block, so we need to morph the call now |
8422 | // (Normally it will get morphed when we get to the split poll block) |
8423 | GenTree* temp = fgMorphCall(call); |
8424 | noway_assert(temp == call); |
8425 | } |
8426 | |
8427 | // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to |
8428 | // the target. So we don't need an epilog - just like CORINFO_HELP_THROW. |
8429 | // |
8430 | // Fast tail call: in case of fast tail calls, we need a jmp epilog and |
8431 | // hence mark it as BBJ_RETURN with BBF_JMP flag set. |
8432 | noway_assert(compCurBB->bbJumpKind == BBJ_RETURN); |
8433 | |
8434 | if (canFastTailCall) |
8435 | { |
8436 | compCurBB->bbFlags |= BBF_HAS_JMP; |
8437 | } |
8438 | else |
8439 | { |
8440 | compCurBB->bbJumpKind = BBJ_THROW; |
8441 | } |
8442 | |
8443 | // For non-void calls, we return a place holder which will be |
8444 | // used by the parent GT_RETURN node of this call. |
8445 | |
8446 | GenTree* result = call; |
8447 | if (callType != TYP_VOID && info.compRetType != TYP_VOID) |
8448 | { |
8449 | #ifdef FEATURE_HFA |
8450 | // Return a dummy node, as the return is already removed. |
8451 | if (callType == TYP_STRUCT) |
8452 | { |
8453 | // This is a HFA, use float 0. |
8454 | callType = TYP_FLOAT; |
8455 | } |
8456 | #elif defined(UNIX_AMD64_ABI) |
8457 | // Return a dummy node, as the return is already removed. |
8458 | if (varTypeIsStruct(callType)) |
8459 | { |
8460 | // This is a register-returned struct. Return a 0. |
8461 | // The actual return registers are hacked in lower and the register allocator. |
8462 | callType = TYP_INT; |
8463 | } |
8464 | #endif |
8465 | #ifdef FEATURE_SIMD |
8466 | // Return a dummy node, as the return is already removed. |
8467 | if (varTypeIsSIMD(callType)) |
8468 | { |
8469 | callType = TYP_DOUBLE; |
8470 | } |
8471 | #endif |
8472 | result = gtNewZeroConNode(genActualType(callType)); |
8473 | result = fgMorphTree(result); |
8474 | } |
8475 | |
8476 | return result; |
8477 | } |
8478 | |
8479 | NO_TAIL_CALL: |
8480 | |
8481 | if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 && |
8482 | (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR) |
8483 | #ifdef FEATURE_READYTORUN_COMPILER |
8484 | || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR) |
8485 | #endif |
8486 | ) && |
8487 | (call == fgMorphStmt->gtStmtExpr)) |
8488 | { |
8489 | // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result. |
8490 | // Transform it into a null check. |
8491 | |
8492 | GenTree* thisPtr = call->gtCallArgs->gtOp.gtOp1; |
8493 | |
8494 | GenTree* nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr); |
8495 | nullCheck->gtFlags |= GTF_EXCEPT; |
8496 | |
8497 | return fgMorphTree(nullCheck); |
8498 | } |
8499 | |
8500 | noway_assert(call->gtOper == GT_CALL); |
8501 | |
8502 | // |
8503 | // Only count calls once (only in the global morph phase) |
8504 | // |
8505 | if (fgGlobalMorph) |
8506 | { |
8507 | if (call->gtCallType == CT_INDIRECT) |
8508 | { |
8509 | optCallCount++; |
8510 | optIndirectCallCount++; |
8511 | } |
8512 | else if (call->gtCallType == CT_USER_FUNC) |
8513 | { |
8514 | optCallCount++; |
8515 | if (call->IsVirtual()) |
8516 | { |
8517 | optIndirectCallCount++; |
8518 | } |
8519 | } |
8520 | } |
8521 | |
8522 | // Couldn't inline - remember that this BB contains method calls |
8523 | |
8524 | // If this is a 'regular' call, mark the basic block as |
8525 | // having a call (for computing full interruptibility). |
8526 | CLANG_FORMAT_COMMENT_ANCHOR; |
8527 | |
8528 | if (IsGcSafePoint(call)) |
8529 | { |
8530 | compCurBB->bbFlags |= BBF_GC_SAFE_POINT; |
8531 | } |
8532 | |
8533 | // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag |
8534 | // |
8535 | // We need to do these before the arguments are morphed |
8536 | if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)) |
8537 | { |
8538 | // See if this is foldable |
8539 | GenTree* optTree = gtFoldExprCall(call); |
8540 | |
8541 | // If we optimized, morph the result |
8542 | if (optTree != call) |
8543 | { |
8544 | return fgMorphTree(optTree); |
8545 | } |
8546 | } |
8547 | |
8548 | // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack. |
8549 | GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require |
8550 | // copy-back). |
8551 | unsigned retValTmpNum = BAD_VAR_NUM; |
8552 | CORINFO_CLASS_HANDLE structHnd = nullptr; |
8553 | if (call->HasRetBufArg() && |
8554 | call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null). |
8555 | { |
8556 | // We're enforcing the invariant that return buffers pointers (at least for |
8557 | // struct return types containing GC pointers) are never pointers into the heap. |
8558 | // The large majority of cases are address of local variables, which are OK. |
8559 | // Otherwise, allocate a local of the given struct type, pass its address, |
8560 | // then assign from that into the proper destination. (We don't need to do this |
8561 | // if we're passing the caller's ret buff arg to the callee, since the caller's caller |
8562 | // will maintain the same invariant.) |
8563 | |
8564 | GenTree* dest = call->gtCallArgs->gtOp.gtOp1; |
8565 | assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above. |
8566 | if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)) |
8567 | { |
8568 | // We'll exempt helper calls from this, assuming that the helper implementation |
8569 | // follows the old convention, and does whatever barrier is required. |
8570 | if (call->gtCallType != CT_HELPER) |
8571 | { |
8572 | structHnd = call->gtRetClsHnd; |
8573 | if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) && |
8574 | !(dest->OperGet() == GT_LCL_VAR && dest->gtLclVar.gtLclNum == info.compRetBuffArg)) |
8575 | { |
8576 | origDest = dest; |
8577 | |
8578 | retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg" )); |
8579 | lvaSetStruct(retValTmpNum, structHnd, true); |
8580 | dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); |
8581 | } |
8582 | } |
8583 | } |
8584 | |
8585 | call->gtCallArgs->gtOp.gtOp1 = dest; |
8586 | } |
8587 | |
8588 | /* Process the "normal" argument list */ |
8589 | call = fgMorphArgs(call); |
8590 | noway_assert(call->gtOper == GT_CALL); |
8591 | |
8592 | // Morph stelem.ref helper call to store a null value, into a store into an array without the helper. |
8593 | // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place. |
8594 | if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST))) |
8595 | { |
8596 | GenTree* value = gtArgEntryByArgNum(call, 2)->node; |
8597 | if (value->IsIntegralConst(0)) |
8598 | { |
8599 | assert(value->OperGet() == GT_CNS_INT); |
8600 | |
8601 | GenTree* arr = gtArgEntryByArgNum(call, 0)->node; |
8602 | GenTree* index = gtArgEntryByArgNum(call, 1)->node; |
8603 | |
8604 | // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy |
8605 | // the spill trees as well if necessary. |
8606 | GenTreeOp* argSetup = nullptr; |
8607 | for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest()) |
8608 | { |
8609 | GenTree* const arg = earlyArgs->Current(); |
8610 | if (arg->OperGet() != GT_ASG) |
8611 | { |
8612 | continue; |
8613 | } |
8614 | |
8615 | assert(arg != arr); |
8616 | assert(arg != index); |
8617 | |
8618 | arg->gtFlags &= ~GTF_LATE_ARG; |
8619 | |
8620 | GenTree* op1 = argSetup; |
8621 | if (op1 == nullptr) |
8622 | { |
8623 | op1 = gtNewNothingNode(); |
8624 | #if DEBUG |
8625 | op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
8626 | #endif // DEBUG |
8627 | } |
8628 | |
8629 | argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg); |
8630 | |
8631 | #if DEBUG |
8632 | argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
8633 | #endif // DEBUG |
8634 | } |
8635 | |
8636 | #ifdef DEBUG |
8637 | auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult { |
8638 | (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
8639 | return WALK_CONTINUE; |
8640 | }; |
8641 | |
8642 | fgWalkTreePost(&arr, resetMorphedFlag); |
8643 | fgWalkTreePost(&index, resetMorphedFlag); |
8644 | fgWalkTreePost(&value, resetMorphedFlag); |
8645 | #endif // DEBUG |
8646 | |
8647 | GenTree* const nullCheckedArr = impCheckForNullPointer(arr); |
8648 | GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index); |
8649 | GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value); |
8650 | arrStore->gtFlags |= GTF_ASG; |
8651 | |
8652 | GenTree* result = fgMorphTree(arrStore); |
8653 | if (argSetup != nullptr) |
8654 | { |
8655 | result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result); |
8656 | #if DEBUG |
8657 | result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
8658 | #endif // DEBUG |
8659 | } |
8660 | |
8661 | return result; |
8662 | } |
8663 | } |
8664 | |
8665 | // Optimize get_ManagedThreadId(get_CurrentThread) |
8666 | if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) && |
8667 | info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId) |
8668 | { |
8669 | noway_assert(origDest == nullptr); |
8670 | noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr); |
8671 | |
8672 | GenTree* innerCall = call->gtCallLateArgs->gtOp.gtOp1; |
8673 | |
8674 | if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) && |
8675 | info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) == |
8676 | CORINFO_INTRINSIC_GetCurrentManagedThread) |
8677 | { |
8678 | // substitute expression with call to helper |
8679 | GenTree* newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT); |
8680 | JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n" ); |
8681 | return fgMorphTree(newCall); |
8682 | } |
8683 | } |
8684 | |
8685 | if (origDest != nullptr) |
8686 | { |
8687 | GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); |
8688 | // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused |
8689 | // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the |
8690 | // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to |
8691 | // be correct. |
8692 | if (origDest->OperGet() == GT_ASG) |
8693 | { |
8694 | if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR) |
8695 | { |
8696 | GenTree* var = origDest->gtOp.gtOp1; |
8697 | origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest, |
8698 | gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet())); |
8699 | } |
8700 | } |
8701 | GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false); |
8702 | copyBlk = fgMorphTree(copyBlk); |
8703 | GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk); |
8704 | #ifdef DEBUG |
8705 | result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
8706 | #endif |
8707 | return result; |
8708 | } |
8709 | |
8710 | if (call->IsNoReturn()) |
8711 | { |
8712 | // |
8713 | // If we know that the call does not return then we can set fgRemoveRestOfBlock |
8714 | // to remove all subsequent statements and change the call's basic block to BBJ_THROW. |
8715 | // As a result the compiler won't need to preserve live registers across the call. |
8716 | // |
8717 | // This isn't need for tail calls as there shouldn't be any code after the call anyway. |
8718 | // Besides, the tail call code is part of the epilog and converting the block to |
8719 | // BBJ_THROW would result in the tail call being dropped as the epilog is generated |
8720 | // only for BBJ_RETURN blocks. |
8721 | // |
8722 | // Currently this doesn't work for non-void callees. Some of the code that handles |
8723 | // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes |
8724 | // do not have this flag by default. We could add the flag here but the proper solution |
8725 | // would be to replace the return expression with a local var node during inlining |
8726 | // so the rest of the call tree stays in a separate statement. That statement can then |
8727 | // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere. |
8728 | // |
8729 | |
8730 | if (!call->IsTailCall() && call->TypeGet() == TYP_VOID) |
8731 | { |
8732 | fgRemoveRestOfBlock = true; |
8733 | } |
8734 | } |
8735 | |
8736 | return call; |
8737 | } |
8738 | |
8739 | /***************************************************************************** |
8740 | * |
8741 | * Transform the given GTK_CONST tree for code generation. |
8742 | */ |
8743 | |
8744 | GenTree* Compiler::fgMorphConst(GenTree* tree) |
8745 | { |
8746 | assert(tree->OperKind() & GTK_CONST); |
8747 | |
8748 | /* Clear any exception flags or other unnecessary flags |
8749 | * that may have been set before folding this node to a constant */ |
8750 | |
8751 | tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS); |
8752 | |
8753 | if (tree->OperGet() != GT_CNS_STR) |
8754 | { |
8755 | return tree; |
8756 | } |
8757 | |
8758 | // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will |
8759 | // guarantee slow performance for that block. Instead cache the return value |
8760 | // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf. |
8761 | |
8762 | if (compCurBB->bbJumpKind == BBJ_THROW) |
8763 | { |
8764 | CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd); |
8765 | if (helper != CORINFO_HELP_UNDEF) |
8766 | { |
8767 | // For un-important blocks, we want to construct the string lazily |
8768 | |
8769 | GenTreeArgList* args; |
8770 | if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE) |
8771 | { |
8772 | args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT)); |
8773 | } |
8774 | else |
8775 | { |
8776 | args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT), |
8777 | gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd)); |
8778 | } |
8779 | |
8780 | tree = gtNewHelperCallNode(helper, TYP_REF, args); |
8781 | return fgMorphTree(tree); |
8782 | } |
8783 | } |
8784 | |
8785 | assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd)); |
8786 | |
8787 | LPVOID pValue; |
8788 | InfoAccessType iat = |
8789 | info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue); |
8790 | |
8791 | tree = gtNewStringLiteralNode(iat, pValue); |
8792 | |
8793 | return fgMorphTree(tree); |
8794 | } |
8795 | |
8796 | /***************************************************************************** |
8797 | * |
8798 | * Transform the given GTK_LEAF tree for code generation. |
8799 | */ |
8800 | |
8801 | GenTree* Compiler::fgMorphLeaf(GenTree* tree) |
8802 | { |
8803 | assert(tree->OperKind() & GTK_LEAF); |
8804 | |
8805 | if (tree->gtOper == GT_LCL_VAR) |
8806 | { |
8807 | const bool forceRemorph = false; |
8808 | return fgMorphLocalVar(tree, forceRemorph); |
8809 | } |
8810 | #ifdef _TARGET_X86_ |
8811 | else if (tree->gtOper == GT_LCL_FLD) |
8812 | { |
8813 | if (info.compIsVarArgs) |
8814 | { |
8815 | GenTree* newTree = |
8816 | fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs); |
8817 | if (newTree != nullptr) |
8818 | { |
8819 | if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) |
8820 | { |
8821 | fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType); |
8822 | } |
8823 | return newTree; |
8824 | } |
8825 | } |
8826 | } |
8827 | #endif // _TARGET_X86_ |
8828 | else if (tree->gtOper == GT_FTN_ADDR) |
8829 | { |
8830 | CORINFO_CONST_LOOKUP addrInfo; |
8831 | |
8832 | #ifdef FEATURE_READYTORUN_COMPILER |
8833 | if (tree->gtFptrVal.gtEntryPoint.addr != nullptr) |
8834 | { |
8835 | addrInfo = tree->gtFptrVal.gtEntryPoint; |
8836 | } |
8837 | else |
8838 | #endif |
8839 | { |
8840 | info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo); |
8841 | } |
8842 | |
8843 | // Refer to gtNewIconHandleNode() as the template for constructing a constant handle |
8844 | // |
8845 | tree->SetOper(GT_CNS_INT); |
8846 | tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle)); |
8847 | tree->gtFlags |= GTF_ICON_FTN_ADDR; |
8848 | |
8849 | switch (addrInfo.accessType) |
8850 | { |
8851 | case IAT_PPVALUE: |
8852 | tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree); |
8853 | tree->gtFlags |= GTF_IND_INVARIANT; |
8854 | |
8855 | __fallthrough; |
8856 | |
8857 | case IAT_PVALUE: |
8858 | tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree); |
8859 | break; |
8860 | |
8861 | case IAT_VALUE: |
8862 | tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding |
8863 | break; |
8864 | |
8865 | default: |
8866 | noway_assert(!"Unknown addrInfo.accessType" ); |
8867 | } |
8868 | |
8869 | return fgMorphTree(tree); |
8870 | } |
8871 | |
8872 | return tree; |
8873 | } |
8874 | |
8875 | void Compiler::fgAssignSetVarDef(GenTree* tree) |
8876 | { |
8877 | GenTreeLclVarCommon* lclVarCmnTree; |
8878 | bool isEntire = false; |
8879 | if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire)) |
8880 | { |
8881 | if (isEntire) |
8882 | { |
8883 | lclVarCmnTree->gtFlags |= GTF_VAR_DEF; |
8884 | } |
8885 | else |
8886 | { |
8887 | // We consider partial definitions to be modeled as uses followed by definitions. |
8888 | // This captures the idea that precedings defs are not necessarily made redundant |
8889 | // by this definition. |
8890 | lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG); |
8891 | } |
8892 | } |
8893 | } |
8894 | |
8895 | //------------------------------------------------------------------------ |
8896 | // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment |
8897 | // |
8898 | // Arguments: |
8899 | // tree - The block assignment to be possibly morphed |
8900 | // |
8901 | // Return Value: |
8902 | // The modified tree if successful, nullptr otherwise. |
8903 | // |
8904 | // Assumptions: |
8905 | // 'tree' must be a block assignment. |
8906 | // |
8907 | // Notes: |
8908 | // If successful, this method always returns the incoming tree, modifying only |
8909 | // its arguments. |
8910 | |
8911 | GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) |
8912 | { |
8913 | // This must be a block assignment. |
8914 | noway_assert(tree->OperIsBlkOp()); |
8915 | var_types asgType = tree->TypeGet(); |
8916 | |
8917 | GenTree* asg = tree; |
8918 | GenTree* dest = asg->gtGetOp1(); |
8919 | GenTree* src = asg->gtGetOp2(); |
8920 | unsigned destVarNum = BAD_VAR_NUM; |
8921 | LclVarDsc* destVarDsc = nullptr; |
8922 | GenTree* destLclVarTree = nullptr; |
8923 | bool isCopyBlock = asg->OperIsCopyBlkOp(); |
8924 | bool isInitBlock = !isCopyBlock; |
8925 | |
8926 | unsigned size; |
8927 | CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; |
8928 | #ifdef FEATURE_SIMD |
8929 | // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic) |
8930 | // The SIMD type in question could be Vector2f which is 8-bytes in size. |
8931 | // The below check is to make sure that we don't turn that copyblk |
8932 | // into a assignment, since rationalizer logic will transform the |
8933 | // copyblk appropriately. Otherwise, the transformation made in this |
8934 | // routine will prevent rationalizer logic and we might end up with |
8935 | // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert |
8936 | // in codegen. |
8937 | // TODO-1stClassStructs: This is here to preserve old behavior. |
8938 | // It should be eliminated. |
8939 | if (src->OperIsSIMDorSimdHWintrinsic()) |
8940 | { |
8941 | return nullptr; |
8942 | } |
8943 | #endif |
8944 | |
8945 | if (dest->gtEffectiveVal()->OperIsBlk()) |
8946 | { |
8947 | GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk(); |
8948 | size = lhsBlk->Size(); |
8949 | if (impIsAddressInLocal(lhsBlk->Addr(), &destLclVarTree)) |
8950 | { |
8951 | destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum; |
8952 | destVarDsc = &(lvaTable[destVarNum]); |
8953 | } |
8954 | if (lhsBlk->OperGet() == GT_OBJ) |
8955 | { |
8956 | clsHnd = lhsBlk->AsObj()->gtClass; |
8957 | } |
8958 | } |
8959 | else |
8960 | { |
8961 | // Is this an enregisterable struct that is already a simple assignment? |
8962 | // This can happen if we are re-morphing. |
8963 | if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock) |
8964 | { |
8965 | return tree; |
8966 | } |
8967 | noway_assert(dest->OperIsLocal()); |
8968 | destLclVarTree = dest; |
8969 | destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum; |
8970 | destVarDsc = &(lvaTable[destVarNum]); |
8971 | if (isCopyBlock) |
8972 | { |
8973 | clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle(); |
8974 | size = info.compCompHnd->getClassSize(clsHnd); |
8975 | } |
8976 | else |
8977 | { |
8978 | size = destVarDsc->lvExactSize; |
8979 | } |
8980 | } |
8981 | |
8982 | // |
8983 | // See if we can do a simple transformation: |
8984 | // |
8985 | // GT_ASG <TYP_size> |
8986 | // / \ |
8987 | // GT_IND GT_IND or CNS_INT |
8988 | // | | |
8989 | // [dest] [src] |
8990 | // |
8991 | |
8992 | if (asgType == TYP_STRUCT) |
8993 | { |
8994 | if (size == REGSIZE_BYTES) |
8995 | { |
8996 | if (clsHnd == NO_CLASS_HANDLE) |
8997 | { |
8998 | // A register-sized cpblk can be treated as an integer asignment. |
8999 | asgType = TYP_I_IMPL; |
9000 | } |
9001 | else |
9002 | { |
9003 | BYTE gcPtr; |
9004 | info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); |
9005 | asgType = getJitGCType(gcPtr); |
9006 | } |
9007 | } |
9008 | else |
9009 | { |
9010 | switch (size) |
9011 | { |
9012 | case 1: |
9013 | asgType = TYP_BYTE; |
9014 | break; |
9015 | case 2: |
9016 | asgType = TYP_SHORT; |
9017 | break; |
9018 | |
9019 | #ifdef _TARGET_64BIT_ |
9020 | case 4: |
9021 | asgType = TYP_INT; |
9022 | break; |
9023 | #endif // _TARGET_64BIT_ |
9024 | } |
9025 | } |
9026 | } |
9027 | |
9028 | if ((destVarDsc != nullptr) && varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted) |
9029 | { |
9030 | // Let fgMorphCopyBlock handle it. |
9031 | return nullptr; |
9032 | } |
9033 | |
9034 | GenTree* srcLclVarTree = nullptr; |
9035 | LclVarDsc* srcVarDsc = nullptr; |
9036 | if (isCopyBlock) |
9037 | { |
9038 | if (src->OperGet() == GT_LCL_VAR) |
9039 | { |
9040 | srcLclVarTree = src; |
9041 | srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]); |
9042 | } |
9043 | else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &srcLclVarTree)) |
9044 | { |
9045 | srcVarDsc = &(lvaTable[srcLclVarTree->AsLclVarCommon()->gtLclNum]); |
9046 | } |
9047 | if ((srcVarDsc != nullptr) && varTypeIsStruct(srcLclVarTree) && srcVarDsc->lvPromoted) |
9048 | { |
9049 | // Let fgMorphCopyBlock handle it. |
9050 | return nullptr; |
9051 | } |
9052 | } |
9053 | |
9054 | if (asgType != TYP_STRUCT) |
9055 | { |
9056 | noway_assert((size <= REGSIZE_BYTES) || varTypeIsSIMD(asgType)); |
9057 | |
9058 | // For initBlk, a non constant source is not going to allow us to fiddle |
9059 | // with the bits to create a single assigment. |
9060 | // Nor do we (for now) support transforming an InitBlock of SIMD type. |
9061 | if (isInitBlock && (!src->IsConstInitVal() || varTypeIsSIMD(asgType))) |
9062 | { |
9063 | return nullptr; |
9064 | } |
9065 | |
9066 | if (destVarDsc != nullptr) |
9067 | { |
9068 | #if LOCAL_ASSERTION_PROP |
9069 | // Kill everything about dest |
9070 | if (optLocalAssertionProp) |
9071 | { |
9072 | if (optAssertionCount > 0) |
9073 | { |
9074 | fgKillDependentAssertions(destVarNum DEBUGARG(tree)); |
9075 | } |
9076 | } |
9077 | #endif // LOCAL_ASSERTION_PROP |
9078 | |
9079 | // A previous incarnation of this code also required the local not to be |
9080 | // address-exposed(=taken). That seems orthogonal to the decision of whether |
9081 | // to do field-wise assignments: being address-exposed will cause it to be |
9082 | // "dependently" promoted, so it will be in the right memory location. One possible |
9083 | // further reason for avoiding field-wise stores is that the struct might have alignment-induced |
9084 | // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid |
9085 | // concern, then we could compromise, and say that address-exposed + fields do not completely cover the |
9086 | // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision. |
9087 | if (varTypeIsStruct(destLclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType())) |
9088 | { |
9089 | // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) |
9090 | return nullptr; |
9091 | } |
9092 | else if (!varTypeIsFloating(destLclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc))) |
9093 | { |
9094 | // Use the dest local var directly, as well as its type. |
9095 | dest = destLclVarTree; |
9096 | asgType = destVarDsc->lvType; |
9097 | |
9098 | // If the block operation had been a write to a local var of a small int type, |
9099 | // of the exact size of the small int type, and the var is NormalizeOnStore, |
9100 | // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't |
9101 | // have done that normalization. If we're now making it into an assignment, |
9102 | // the NormalizeOnStore will work, and it can be a full def. |
9103 | if (destVarDsc->lvNormalizeOnStore()) |
9104 | { |
9105 | dest->gtFlags &= (~GTF_VAR_USEASG); |
9106 | } |
9107 | } |
9108 | else |
9109 | { |
9110 | // Could be a non-promoted struct, or a floating point type local, or |
9111 | // an int subject to a partial write. Don't enregister. |
9112 | lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField)); |
9113 | |
9114 | // Mark the local var tree as a definition point of the local. |
9115 | destLclVarTree->gtFlags |= GTF_VAR_DEF; |
9116 | if (size < destVarDsc->lvExactSize) |
9117 | { // If it's not a full-width assignment.... |
9118 | destLclVarTree->gtFlags |= GTF_VAR_USEASG; |
9119 | } |
9120 | |
9121 | if (dest == destLclVarTree) |
9122 | { |
9123 | dest = gtNewIndir(asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest)); |
9124 | } |
9125 | } |
9126 | } |
9127 | |
9128 | // Check to ensure we don't have a reducible *(& ... ) |
9129 | if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR) |
9130 | { |
9131 | // If dest is an Indir or Block, and it has a child that is a Addr node |
9132 | // |
9133 | GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR |
9134 | |
9135 | // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'? |
9136 | // |
9137 | GenTree* destOp = addrNode->gtGetOp1(); |
9138 | var_types destOpType = destOp->TypeGet(); |
9139 | |
9140 | // We can if we have a primitive integer type and the sizes are exactly the same. |
9141 | // |
9142 | if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType)))) |
9143 | { |
9144 | dest = destOp; |
9145 | asgType = destOpType; |
9146 | } |
9147 | } |
9148 | |
9149 | if (dest->gtEffectiveVal()->OperIsIndir()) |
9150 | { |
9151 | // If we have no information about the destination, we have to assume it could |
9152 | // live anywhere (not just in the GC heap). |
9153 | // Mark the GT_IND node so that we use the correct write barrier helper in case |
9154 | // the field is a GC ref. |
9155 | |
9156 | if (!fgIsIndirOfAddrOfLocal(dest)) |
9157 | { |
9158 | dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); |
9159 | tree->gtFlags |= GTF_GLOB_REF; |
9160 | } |
9161 | |
9162 | dest->gtFlags &= (~GTF_EXCEPT | dest->AsIndir()->Addr()->gtFlags); |
9163 | dest->SetIndirExceptionFlags(this); |
9164 | tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT); |
9165 | } |
9166 | |
9167 | if (isCopyBlock) |
9168 | { |
9169 | if (srcVarDsc != nullptr) |
9170 | { |
9171 | // Handled above. |
9172 | assert(!varTypeIsStruct(srcLclVarTree) || !srcVarDsc->lvPromoted); |
9173 | if (!varTypeIsFloating(srcLclVarTree->TypeGet()) && |
9174 | size == genTypeSize(genActualType(srcLclVarTree->TypeGet()))) |
9175 | { |
9176 | // Use the src local var directly. |
9177 | src = srcLclVarTree; |
9178 | } |
9179 | else |
9180 | { |
9181 | // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar)) |
9182 | // or indir(lclVarAddr) in rational form and liveness won't account for these uses. That said, |
9183 | // we have to mark this local as address exposed so we don't delete it as a dead store later on. |
9184 | unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum; |
9185 | lvaTable[lclVarNum].lvAddrExposed = true; |
9186 | lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed)); |
9187 | GenTree* srcAddr; |
9188 | if (src == srcLclVarTree) |
9189 | { |
9190 | srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src); |
9191 | src = gtNewOperNode(GT_IND, asgType, srcAddr); |
9192 | } |
9193 | else |
9194 | { |
9195 | assert(src->OperIsIndir()); |
9196 | } |
9197 | } |
9198 | } |
9199 | |
9200 | if (src->OperIsIndir()) |
9201 | { |
9202 | if (!fgIsIndirOfAddrOfLocal(src)) |
9203 | { |
9204 | // If we have no information about the src, we have to assume it could |
9205 | // live anywhere (not just in the GC heap). |
9206 | // Mark the GT_IND node so that we use the correct write barrier helper in case |
9207 | // the field is a GC ref. |
9208 | src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); |
9209 | } |
9210 | |
9211 | src->gtFlags &= (~GTF_EXCEPT | src->AsIndir()->Addr()->gtFlags); |
9212 | src->SetIndirExceptionFlags(this); |
9213 | } |
9214 | } |
9215 | else |
9216 | { |
9217 | // InitBlk |
9218 | #if FEATURE_SIMD |
9219 | if (varTypeIsSIMD(asgType)) |
9220 | { |
9221 | assert(!isCopyBlock); // Else we would have returned the tree above. |
9222 | noway_assert(src->IsIntegralConst(0)); |
9223 | noway_assert(destVarDsc != nullptr); |
9224 | |
9225 | src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size); |
9226 | tree->gtOp.gtOp2 = src; |
9227 | return tree; |
9228 | } |
9229 | else |
9230 | #endif |
9231 | { |
9232 | if (src->OperIsInitVal()) |
9233 | { |
9234 | src = src->gtGetOp1(); |
9235 | } |
9236 | assert(src->IsCnsIntOrI()); |
9237 | // This will mutate the integer constant, in place, to be the correct |
9238 | // value for the type we are using in the assignment. |
9239 | src->AsIntCon()->FixupInitBlkValue(asgType); |
9240 | } |
9241 | } |
9242 | |
9243 | // Ensure that the dest is setup appropriately. |
9244 | if (dest->gtEffectiveVal()->OperIsIndir()) |
9245 | { |
9246 | dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/); |
9247 | } |
9248 | |
9249 | // Ensure that the rhs is setup appropriately. |
9250 | if (isCopyBlock) |
9251 | { |
9252 | src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/); |
9253 | } |
9254 | |
9255 | // Set the lhs and rhs on the assignment. |
9256 | if (dest != tree->gtOp.gtOp1) |
9257 | { |
9258 | asg->gtOp.gtOp1 = dest; |
9259 | } |
9260 | if (src != asg->gtOp.gtOp2) |
9261 | { |
9262 | asg->gtOp.gtOp2 = src; |
9263 | } |
9264 | |
9265 | asg->ChangeType(asgType); |
9266 | dest->gtFlags |= GTF_DONT_CSE; |
9267 | asg->gtFlags &= ~GTF_EXCEPT; |
9268 | asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT); |
9269 | // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate. |
9270 | asg->gtFlags &= ~GTF_REVERSE_OPS; |
9271 | |
9272 | #ifdef DEBUG |
9273 | if (verbose) |
9274 | { |
9275 | printf("fgMorphOneAsgBlock (after):\n" ); |
9276 | gtDispTree(tree); |
9277 | } |
9278 | #endif |
9279 | return tree; |
9280 | } |
9281 | |
9282 | return nullptr; |
9283 | } |
9284 | |
9285 | //------------------------------------------------------------------------ |
9286 | // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node |
9287 | // |
9288 | // Arguments: |
9289 | // tree - a tree node with a gtOper of GT_INITBLK |
9290 | // the child nodes for tree have already been Morphed |
9291 | // |
9292 | // Return Value: |
9293 | // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct) |
9294 | // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable) |
9295 | // If we have performed struct promotion of the Dest() then we will try to |
9296 | // perform a field by field assignment for each of the promoted struct fields |
9297 | // |
9298 | // Notes: |
9299 | // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp |
9300 | // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we |
9301 | // can not use a field by field assignment and must the orginal GT_INITBLK unmodified. |
9302 | |
9303 | GenTree* Compiler::fgMorphInitBlock(GenTree* tree) |
9304 | { |
9305 | // We must have the GT_ASG form of InitBlkOp. |
9306 | noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp()); |
9307 | #ifdef DEBUG |
9308 | bool morphed = false; |
9309 | #endif // DEBUG |
9310 | |
9311 | GenTree* asg = tree; |
9312 | GenTree* src = tree->gtGetOp2(); |
9313 | GenTree* origDest = tree->gtGetOp1(); |
9314 | |
9315 | GenTree* dest = fgMorphBlkNode(origDest, true); |
9316 | if (dest != origDest) |
9317 | { |
9318 | tree->gtOp.gtOp1 = dest; |
9319 | } |
9320 | tree->gtType = dest->TypeGet(); |
9321 | // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its |
9322 | // type will be the type of the original lclVar, in which case we will change it to TYP_INT). |
9323 | if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src)) |
9324 | { |
9325 | src->gtType = TYP_INT; |
9326 | } |
9327 | JITDUMP("\nfgMorphInitBlock:" ); |
9328 | |
9329 | GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); |
9330 | if (oneAsgTree) |
9331 | { |
9332 | JITDUMP(" using oneAsgTree.\n" ); |
9333 | tree = oneAsgTree; |
9334 | } |
9335 | else |
9336 | { |
9337 | GenTree* destAddr = nullptr; |
9338 | GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src; |
9339 | GenTree* blockSize = nullptr; |
9340 | unsigned blockWidth = 0; |
9341 | FieldSeqNode* destFldSeq = nullptr; |
9342 | LclVarDsc* destLclVar = nullptr; |
9343 | bool destDoFldAsg = false; |
9344 | unsigned destLclNum = BAD_VAR_NUM; |
9345 | bool blockWidthIsConst = false; |
9346 | GenTreeLclVarCommon* lclVarTree = nullptr; |
9347 | if (dest->IsLocal()) |
9348 | { |
9349 | lclVarTree = dest->AsLclVarCommon(); |
9350 | } |
9351 | else |
9352 | { |
9353 | if (dest->OperIsBlk()) |
9354 | { |
9355 | destAddr = dest->AsBlk()->Addr(); |
9356 | blockWidth = dest->AsBlk()->gtBlkSize; |
9357 | } |
9358 | else |
9359 | { |
9360 | assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT)); |
9361 | destAddr = dest->gtGetOp1(); |
9362 | blockWidth = genTypeSize(dest->TypeGet()); |
9363 | } |
9364 | } |
9365 | if (lclVarTree != nullptr) |
9366 | { |
9367 | destLclNum = lclVarTree->gtLclNum; |
9368 | destLclVar = &lvaTable[destLclNum]; |
9369 | blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar); |
9370 | blockWidthIsConst = true; |
9371 | } |
9372 | else |
9373 | { |
9374 | if (dest->gtOper == GT_DYN_BLK) |
9375 | { |
9376 | // The size must be an integer type |
9377 | blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize; |
9378 | assert(varTypeIsIntegral(blockSize->gtType)); |
9379 | } |
9380 | else |
9381 | { |
9382 | assert(blockWidth != 0); |
9383 | blockWidthIsConst = true; |
9384 | } |
9385 | |
9386 | if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) |
9387 | { |
9388 | destLclNum = lclVarTree->gtLclNum; |
9389 | destLclVar = &lvaTable[destLclNum]; |
9390 | } |
9391 | } |
9392 | if (destLclNum != BAD_VAR_NUM) |
9393 | { |
9394 | #if LOCAL_ASSERTION_PROP |
9395 | // Kill everything about destLclNum (and its field locals) |
9396 | if (optLocalAssertionProp) |
9397 | { |
9398 | if (optAssertionCount > 0) |
9399 | { |
9400 | fgKillDependentAssertions(destLclNum DEBUGARG(tree)); |
9401 | } |
9402 | } |
9403 | #endif // LOCAL_ASSERTION_PROP |
9404 | |
9405 | if (destLclVar->lvPromoted && blockWidthIsConst) |
9406 | { |
9407 | assert(initVal->OperGet() == GT_CNS_INT); |
9408 | noway_assert(varTypeIsStruct(destLclVar)); |
9409 | noway_assert(!opts.MinOpts()); |
9410 | if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles) |
9411 | { |
9412 | JITDUMP(" dest is address exposed" ); |
9413 | } |
9414 | else |
9415 | { |
9416 | if (blockWidth == destLclVar->lvExactSize) |
9417 | { |
9418 | JITDUMP(" (destDoFldAsg=true)" ); |
9419 | // We may decide later that a copyblk is required when this struct has holes |
9420 | destDoFldAsg = true; |
9421 | } |
9422 | else |
9423 | { |
9424 | JITDUMP(" with mismatched size" ); |
9425 | } |
9426 | } |
9427 | } |
9428 | } |
9429 | |
9430 | // Can we use field by field assignment for the dest? |
9431 | if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) |
9432 | { |
9433 | JITDUMP(" dest contains holes" ); |
9434 | destDoFldAsg = false; |
9435 | } |
9436 | |
9437 | JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n" ); |
9438 | |
9439 | // If we're doing an InitBlock and we've transformed the dest to a non-Blk |
9440 | // we need to change it back. |
9441 | if (!destDoFldAsg && !dest->OperIsBlk()) |
9442 | { |
9443 | noway_assert(blockWidth != 0); |
9444 | tree->gtOp.gtOp1 = origDest; |
9445 | tree->gtType = origDest->gtType; |
9446 | } |
9447 | |
9448 | if (!destDoFldAsg && (destLclVar != nullptr)) |
9449 | { |
9450 | // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister. |
9451 | if (!destLclVar->lvRegStruct) |
9452 | { |
9453 | // Mark it as DoNotEnregister. |
9454 | lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); |
9455 | } |
9456 | } |
9457 | |
9458 | // Mark the dest struct as DoNotEnreg |
9459 | // when they are LclVar structs and we are using a CopyBlock |
9460 | // or the struct is not promoted |
9461 | // |
9462 | if (!destDoFldAsg) |
9463 | { |
9464 | dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true); |
9465 | tree->gtOp.gtOp1 = dest; |
9466 | tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); |
9467 | } |
9468 | else |
9469 | { |
9470 | // The initVal must be a constant of TYP_INT |
9471 | noway_assert(initVal->OperGet() == GT_CNS_INT); |
9472 | noway_assert(genActualType(initVal->gtType) == TYP_INT); |
9473 | |
9474 | // The dest must be of a struct type. |
9475 | noway_assert(varTypeIsStruct(destLclVar)); |
9476 | |
9477 | // |
9478 | // Now, convert InitBlock to individual assignments |
9479 | // |
9480 | |
9481 | tree = nullptr; |
9482 | INDEBUG(morphed = true); |
9483 | |
9484 | GenTree* dest; |
9485 | GenTree* srcCopy; |
9486 | unsigned fieldLclNum; |
9487 | unsigned fieldCnt = destLclVar->lvFieldCnt; |
9488 | |
9489 | for (unsigned i = 0; i < fieldCnt; ++i) |
9490 | { |
9491 | fieldLclNum = destLclVar->lvFieldLclStart + i; |
9492 | dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); |
9493 | |
9494 | noway_assert(lclVarTree->gtOper == GT_LCL_VAR); |
9495 | // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not. |
9496 | dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG)); |
9497 | |
9498 | srcCopy = gtCloneExpr(initVal); |
9499 | noway_assert(srcCopy != nullptr); |
9500 | |
9501 | // need type of oper to be same as tree |
9502 | if (dest->gtType == TYP_LONG) |
9503 | { |
9504 | srcCopy->ChangeOperConst(GT_CNS_NATIVELONG); |
9505 | // copy and extend the value |
9506 | srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue()); |
9507 | /* Change the types of srcCopy to TYP_LONG */ |
9508 | srcCopy->gtType = TYP_LONG; |
9509 | } |
9510 | else if (varTypeIsFloating(dest->gtType)) |
9511 | { |
9512 | srcCopy->ChangeOperConst(GT_CNS_DBL); |
9513 | // setup the bit pattern |
9514 | memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal, |
9515 | sizeof(srcCopy->gtDblCon.gtDconVal)); |
9516 | /* Change the types of srcCopy to TYP_DOUBLE */ |
9517 | srcCopy->gtType = TYP_DOUBLE; |
9518 | } |
9519 | else |
9520 | { |
9521 | noway_assert(srcCopy->gtOper == GT_CNS_INT); |
9522 | noway_assert(srcCopy->TypeGet() == TYP_INT); |
9523 | // setup the bit pattern |
9524 | memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal, |
9525 | sizeof(srcCopy->gtIntCon.gtIconVal)); |
9526 | } |
9527 | |
9528 | srcCopy->gtType = dest->TypeGet(); |
9529 | |
9530 | asg = gtNewAssignNode(dest, srcCopy); |
9531 | |
9532 | #if LOCAL_ASSERTION_PROP |
9533 | if (optLocalAssertionProp) |
9534 | { |
9535 | optAssertionGen(asg); |
9536 | } |
9537 | #endif // LOCAL_ASSERTION_PROP |
9538 | |
9539 | if (tree) |
9540 | { |
9541 | tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); |
9542 | } |
9543 | else |
9544 | { |
9545 | tree = asg; |
9546 | } |
9547 | } |
9548 | } |
9549 | } |
9550 | |
9551 | #ifdef DEBUG |
9552 | if (morphed) |
9553 | { |
9554 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
9555 | |
9556 | if (verbose) |
9557 | { |
9558 | printf("fgMorphInitBlock (after):\n" ); |
9559 | gtDispTree(tree); |
9560 | } |
9561 | } |
9562 | #endif |
9563 | |
9564 | return tree; |
9565 | } |
9566 | |
9567 | //------------------------------------------------------------------------ |
9568 | // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type |
9569 | // |
9570 | // Arguments: |
9571 | // tree - the node to be modified. |
9572 | // type - the type of indirection to change it to. |
9573 | // |
9574 | // Return Value: |
9575 | // Returns the node, modified in place. |
9576 | // |
9577 | // Notes: |
9578 | // This doesn't really warrant a separate method, but is here to abstract |
9579 | // the fact that these nodes can be modified in-place. |
9580 | |
9581 | GenTree* Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type) |
9582 | { |
9583 | tree->SetOper(GT_IND); |
9584 | tree->gtType = type; |
9585 | return tree; |
9586 | } |
9587 | |
9588 | //------------------------------------------------------------------------ |
9589 | // fgMorphGetStructAddr: Gets the address of a struct object |
9590 | // |
9591 | // Arguments: |
9592 | // pTree - the parent's pointer to the struct object node |
9593 | // clsHnd - the class handle for the struct type |
9594 | // isRValue - true if this is a source (not dest) |
9595 | // |
9596 | // Return Value: |
9597 | // Returns the address of the struct value, possibly modifying the existing tree to |
9598 | // sink the address below any comma nodes (this is to canonicalize for value numbering). |
9599 | // If this is a source, it will morph it to an GT_IND before taking its address, |
9600 | // since it may not be remorphed (and we don't want blk nodes as rvalues). |
9601 | |
9602 | GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue) |
9603 | { |
9604 | GenTree* addr; |
9605 | GenTree* tree = *pTree; |
9606 | // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we |
9607 | // need to hang onto that for the purposes of value numbering. |
9608 | if (tree->OperIsIndir()) |
9609 | { |
9610 | if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0) |
9611 | { |
9612 | addr = tree->gtOp.gtOp1; |
9613 | } |
9614 | else |
9615 | { |
9616 | if (isRValue && tree->OperIsBlk()) |
9617 | { |
9618 | tree->ChangeOper(GT_IND); |
9619 | } |
9620 | addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); |
9621 | } |
9622 | } |
9623 | else if (tree->gtOper == GT_COMMA) |
9624 | { |
9625 | // If this is a comma, we're going to "sink" the GT_ADDR below it. |
9626 | (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue); |
9627 | tree->gtType = TYP_BYREF; |
9628 | addr = tree; |
9629 | } |
9630 | else |
9631 | { |
9632 | switch (tree->gtOper) |
9633 | { |
9634 | case GT_LCL_FLD: |
9635 | case GT_LCL_VAR: |
9636 | case GT_INDEX: |
9637 | case GT_FIELD: |
9638 | case GT_ARR_ELEM: |
9639 | addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); |
9640 | break; |
9641 | case GT_INDEX_ADDR: |
9642 | addr = tree; |
9643 | break; |
9644 | default: |
9645 | { |
9646 | // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're |
9647 | // not going to use "temp" |
9648 | GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd); |
9649 | addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue); |
9650 | break; |
9651 | } |
9652 | } |
9653 | } |
9654 | *pTree = addr; |
9655 | return addr; |
9656 | } |
9657 | |
9658 | //------------------------------------------------------------------------ |
9659 | // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment |
9660 | // |
9661 | // Arguments: |
9662 | // tree - The struct type node |
9663 | // isDest - True if this is the destination of the assignment |
9664 | // |
9665 | // Return Value: |
9666 | // Returns the possibly-morphed node. The caller is responsible for updating |
9667 | // the parent of this node.. |
9668 | |
9669 | GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest) |
9670 | { |
9671 | GenTree* handleTree = nullptr; |
9672 | GenTree* addr = nullptr; |
9673 | if (tree->OperIs(GT_COMMA)) |
9674 | { |
9675 | // In order to CSE and value number array index expressions and bounds checks, |
9676 | // the commas in which they are contained need to match. |
9677 | // The pattern is that the COMMA should be the address expression. |
9678 | // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind. |
9679 | // TODO-1stClassStructs: Consider whether this can be improved. |
9680 | // Also consider whether some of this can be included in gtNewBlockVal (though note |
9681 | // that doing so may cause us to query the type system before we otherwise would). |
9682 | // Example: |
9683 | // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct |
9684 | // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct |
9685 | |
9686 | addr = tree; |
9687 | GenTree* effectiveVal = tree->gtEffectiveVal(); |
9688 | |
9689 | GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); |
9690 | for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) |
9691 | { |
9692 | commas.Push(comma); |
9693 | } |
9694 | |
9695 | GenTree* lastComma = commas.Top(); |
9696 | noway_assert(lastComma->gtGetOp2() == effectiveVal); |
9697 | GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); |
9698 | #ifdef DEBUG |
9699 | effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
9700 | #endif |
9701 | lastComma->gtOp.gtOp2 = effectiveValAddr; |
9702 | |
9703 | while (!commas.Empty()) |
9704 | { |
9705 | GenTree* comma = commas.Pop(); |
9706 | comma->gtType = TYP_BYREF; |
9707 | gtUpdateNodeSideEffects(comma); |
9708 | } |
9709 | |
9710 | handleTree = effectiveVal; |
9711 | } |
9712 | else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR)) |
9713 | { |
9714 | handleTree = tree; |
9715 | addr = tree->AsIndir()->Addr(); |
9716 | } |
9717 | |
9718 | if (addr != nullptr) |
9719 | { |
9720 | var_types structType = handleTree->TypeGet(); |
9721 | if (structType == TYP_STRUCT) |
9722 | { |
9723 | CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree); |
9724 | if (structHnd == NO_CLASS_HANDLE) |
9725 | { |
9726 | tree = gtNewOperNode(GT_IND, structType, addr); |
9727 | } |
9728 | else |
9729 | { |
9730 | tree = gtNewObjNode(structHnd, addr); |
9731 | if (tree->OperGet() == GT_OBJ) |
9732 | { |
9733 | gtSetObjGcInfo(tree->AsObj()); |
9734 | } |
9735 | } |
9736 | } |
9737 | else |
9738 | { |
9739 | tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType)); |
9740 | } |
9741 | |
9742 | gtUpdateNodeSideEffects(tree); |
9743 | #ifdef DEBUG |
9744 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
9745 | #endif |
9746 | } |
9747 | |
9748 | if (!tree->OperIsBlk()) |
9749 | { |
9750 | return tree; |
9751 | } |
9752 | GenTreeBlk* blkNode = tree->AsBlk(); |
9753 | if (blkNode->OperGet() == GT_DYN_BLK) |
9754 | { |
9755 | if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI()) |
9756 | { |
9757 | unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue(); |
9758 | // A GT_BLK with size of zero is not supported, |
9759 | // so if we encounter such a thing we just leave it as a GT_DYN_BLK |
9760 | if (size != 0) |
9761 | { |
9762 | blkNode->AsDynBlk()->gtDynamicSize = nullptr; |
9763 | blkNode->ChangeOper(GT_BLK); |
9764 | blkNode->gtBlkSize = size; |
9765 | } |
9766 | else |
9767 | { |
9768 | return tree; |
9769 | } |
9770 | } |
9771 | else |
9772 | { |
9773 | return tree; |
9774 | } |
9775 | } |
9776 | if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) && |
9777 | (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR)) |
9778 | { |
9779 | GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon(); |
9780 | if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode))) |
9781 | { |
9782 | lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr)); |
9783 | } |
9784 | } |
9785 | |
9786 | return tree; |
9787 | } |
9788 | |
9789 | //------------------------------------------------------------------------ |
9790 | // fgMorphBlockOperand: Canonicalize an operand of a block assignment |
9791 | // |
9792 | // Arguments: |
9793 | // tree - The block operand |
9794 | // asgType - The type of the assignment |
9795 | // blockWidth - The size of the block |
9796 | // isDest - true iff this is the destination of the assignment |
9797 | // |
9798 | // Return Value: |
9799 | // Returns the morphed block operand |
9800 | // |
9801 | // Notes: |
9802 | // This does the following: |
9803 | // - Ensures that a struct operand is a block node or lclVar. |
9804 | // - Ensures that any COMMAs are above ADDR nodes. |
9805 | // Although 'tree' WAS an operand of a block assignment, the assignment |
9806 | // may have been retyped to be a scalar assignment. |
9807 | |
9808 | GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest) |
9809 | { |
9810 | GenTree* effectiveVal = tree->gtEffectiveVal(); |
9811 | |
9812 | if (asgType != TYP_STRUCT) |
9813 | { |
9814 | if (effectiveVal->OperIsIndir()) |
9815 | { |
9816 | GenTree* addr = effectiveVal->AsIndir()->Addr(); |
9817 | if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType)) |
9818 | { |
9819 | effectiveVal = addr->gtGetOp1(); |
9820 | } |
9821 | else if (effectiveVal->OperIsBlk()) |
9822 | { |
9823 | effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); |
9824 | } |
9825 | else |
9826 | { |
9827 | effectiveVal->gtType = asgType; |
9828 | } |
9829 | } |
9830 | else if (effectiveVal->TypeGet() != asgType) |
9831 | { |
9832 | GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); |
9833 | effectiveVal = gtNewIndir(asgType, addr); |
9834 | } |
9835 | } |
9836 | else |
9837 | { |
9838 | GenTreeIndir* indirTree = nullptr; |
9839 | GenTreeLclVarCommon* lclNode = nullptr; |
9840 | bool needsIndirection = true; |
9841 | |
9842 | if (effectiveVal->OperIsIndir()) |
9843 | { |
9844 | indirTree = effectiveVal->AsIndir(); |
9845 | GenTree* addr = effectiveVal->AsIndir()->Addr(); |
9846 | if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR)) |
9847 | { |
9848 | lclNode = addr->gtGetOp1()->AsLclVarCommon(); |
9849 | } |
9850 | } |
9851 | else if (effectiveVal->OperGet() == GT_LCL_VAR) |
9852 | { |
9853 | lclNode = effectiveVal->AsLclVarCommon(); |
9854 | } |
9855 | #ifdef FEATURE_SIMD |
9856 | if (varTypeIsSIMD(asgType)) |
9857 | { |
9858 | if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) && |
9859 | (indirTree->Addr()->gtGetOp1()->OperIsSIMDorSimdHWintrinsic())) |
9860 | { |
9861 | assert(!isDest); |
9862 | needsIndirection = false; |
9863 | effectiveVal = indirTree->Addr()->gtGetOp1(); |
9864 | } |
9865 | if (effectiveVal->OperIsSIMDorSimdHWintrinsic()) |
9866 | { |
9867 | needsIndirection = false; |
9868 | } |
9869 | } |
9870 | #endif // FEATURE_SIMD |
9871 | if (lclNode != nullptr) |
9872 | { |
9873 | LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]); |
9874 | if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType)) |
9875 | { |
9876 | if (effectiveVal != lclNode) |
9877 | { |
9878 | JITDUMP("Replacing block node [%06d] with lclVar V%02u\n" , dspTreeID(tree), lclNode->gtLclNum); |
9879 | effectiveVal = lclNode; |
9880 | } |
9881 | needsIndirection = false; |
9882 | } |
9883 | else |
9884 | { |
9885 | // This may be a lclVar that was determined to be address-exposed. |
9886 | effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT); |
9887 | } |
9888 | } |
9889 | if (needsIndirection) |
9890 | { |
9891 | if (indirTree != nullptr) |
9892 | { |
9893 | // We should never find a struct indirection on the lhs of an assignment. |
9894 | assert(!isDest || indirTree->OperIsBlk()); |
9895 | if (!isDest && indirTree->OperIsBlk()) |
9896 | { |
9897 | (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); |
9898 | } |
9899 | } |
9900 | else |
9901 | { |
9902 | GenTree* newTree; |
9903 | GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); |
9904 | if (isDest) |
9905 | { |
9906 | CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal); |
9907 | if (clsHnd == NO_CLASS_HANDLE) |
9908 | { |
9909 | newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth); |
9910 | } |
9911 | else |
9912 | { |
9913 | newTree = gtNewObjNode(clsHnd, addr); |
9914 | if (isDest && (newTree->OperGet() == GT_OBJ)) |
9915 | { |
9916 | gtSetObjGcInfo(newTree->AsObj()); |
9917 | } |
9918 | if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0)) |
9919 | { |
9920 | // This is not necessarily a global reference, though gtNewObjNode always assumes it is. |
9921 | // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor, |
9922 | // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled |
9923 | // separately now to avoid excess diffs. |
9924 | newTree->gtFlags &= ~(GTF_GLOB_EFFECT); |
9925 | } |
9926 | } |
9927 | } |
9928 | else |
9929 | { |
9930 | newTree = gtNewIndir(asgType, addr); |
9931 | } |
9932 | effectiveVal = newTree; |
9933 | } |
9934 | } |
9935 | } |
9936 | tree = effectiveVal; |
9937 | return tree; |
9938 | } |
9939 | |
9940 | //------------------------------------------------------------------------ |
9941 | // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk |
9942 | // |
9943 | // Arguments: |
9944 | // dest - the GT_OBJ or GT_STORE_OBJ |
9945 | // |
9946 | // Assumptions: |
9947 | // The destination must be known (by the caller) to be on the stack. |
9948 | // |
9949 | // Notes: |
9950 | // If we have a CopyObj with a dest on the stack, and its size is small enough |
9951 | // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a |
9952 | // GC Unsafe CopyBlk that is non-interruptible. |
9953 | // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op. |
9954 | // |
9955 | void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest) |
9956 | { |
9957 | #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) |
9958 | assert(dest->gtGcPtrCount != 0); |
9959 | unsigned blockWidth = dest->AsBlk()->gtBlkSize; |
9960 | #ifdef DEBUG |
9961 | bool destOnStack = false; |
9962 | GenTree* destAddr = dest->Addr(); |
9963 | assert(destAddr->IsLocalAddrExpr() != nullptr); |
9964 | #endif |
9965 | if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT)) |
9966 | { |
9967 | genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK; |
9968 | dest->SetOper(newOper); |
9969 | dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block |
9970 | } |
9971 | #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) |
9972 | } |
9973 | |
9974 | //------------------------------------------------------------------------ |
9975 | // fgMorphCopyBlock: Perform the Morphing of block copy |
9976 | // |
9977 | // Arguments: |
9978 | // tree - a block copy (i.e. an assignment with a block op on the lhs). |
9979 | // |
9980 | // Return Value: |
9981 | // We can return the orginal block copy unmodified (least desirable, but always correct) |
9982 | // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable). |
9983 | // If we have performed struct promotion of the Source() or the Dest() then we will try to |
9984 | // perform a field by field assignment for each of the promoted struct fields. |
9985 | // |
9986 | // Assumptions: |
9987 | // The child nodes for tree have already been Morphed. |
9988 | // |
9989 | // Notes: |
9990 | // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest(). |
9991 | // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes |
9992 | // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes. |
9993 | // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we |
9994 | // can not use a field by field assignment and must leave the orginal block copy unmodified. |
9995 | |
9996 | GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) |
9997 | { |
9998 | noway_assert(tree->OperIsCopyBlkOp()); |
9999 | |
10000 | JITDUMP("\nfgMorphCopyBlock:" ); |
10001 | |
10002 | bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0; |
10003 | |
10004 | GenTree* asg = tree; |
10005 | GenTree* rhs = asg->gtGetOp2(); |
10006 | GenTree* dest = asg->gtGetOp1(); |
10007 | |
10008 | #if FEATURE_MULTIREG_RET |
10009 | // If this is a multi-reg return, we will not do any morphing of this node. |
10010 | if (rhs->IsMultiRegCall()) |
10011 | { |
10012 | assert(dest->OperGet() == GT_LCL_VAR); |
10013 | JITDUMP(" not morphing a multireg call return\n" ); |
10014 | return tree; |
10015 | } |
10016 | #endif // FEATURE_MULTIREG_RET |
10017 | |
10018 | // If we have an array index on the lhs, we need to create an obj node. |
10019 | |
10020 | dest = fgMorphBlkNode(dest, true); |
10021 | if (dest != asg->gtGetOp1()) |
10022 | { |
10023 | asg->gtOp.gtOp1 = dest; |
10024 | if (dest->IsLocal()) |
10025 | { |
10026 | dest->gtFlags |= GTF_VAR_DEF; |
10027 | } |
10028 | } |
10029 | asg->gtType = dest->TypeGet(); |
10030 | rhs = fgMorphBlkNode(rhs, false); |
10031 | |
10032 | asg->gtOp.gtOp2 = rhs; |
10033 | |
10034 | GenTree* oldTree = tree; |
10035 | GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); |
10036 | |
10037 | if (oneAsgTree) |
10038 | { |
10039 | JITDUMP(" using oneAsgTree.\n" ); |
10040 | tree = oneAsgTree; |
10041 | } |
10042 | else |
10043 | { |
10044 | unsigned blockWidth; |
10045 | bool blockWidthIsConst = false; |
10046 | GenTreeLclVarCommon* lclVarTree = nullptr; |
10047 | GenTreeLclVarCommon* srcLclVarTree = nullptr; |
10048 | unsigned destLclNum = BAD_VAR_NUM; |
10049 | LclVarDsc* destLclVar = nullptr; |
10050 | FieldSeqNode* destFldSeq = nullptr; |
10051 | bool destDoFldAsg = false; |
10052 | GenTree* destAddr = nullptr; |
10053 | GenTree* srcAddr = nullptr; |
10054 | bool destOnStack = false; |
10055 | bool hasGCPtrs = false; |
10056 | |
10057 | JITDUMP("block assignment to morph:\n" ); |
10058 | DISPTREE(asg); |
10059 | |
10060 | if (dest->IsLocal()) |
10061 | { |
10062 | blockWidthIsConst = true; |
10063 | destOnStack = true; |
10064 | if (dest->gtOper == GT_LCL_VAR) |
10065 | { |
10066 | lclVarTree = dest->AsLclVarCommon(); |
10067 | destLclNum = lclVarTree->gtLclNum; |
10068 | destLclVar = &lvaTable[destLclNum]; |
10069 | if (destLclVar->lvType == TYP_STRUCT) |
10070 | { |
10071 | // It would be nice if lvExactSize always corresponded to the size of the struct, |
10072 | // but it doesn't always for the temps that the importer creates when it spills side |
10073 | // effects. |
10074 | // TODO-Cleanup: Determine when this happens, and whether it can be changed. |
10075 | blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle()); |
10076 | } |
10077 | else |
10078 | { |
10079 | blockWidth = genTypeSize(destLclVar->lvType); |
10080 | } |
10081 | hasGCPtrs = destLclVar->lvStructGcCount != 0; |
10082 | } |
10083 | else |
10084 | { |
10085 | assert(dest->TypeGet() != TYP_STRUCT); |
10086 | assert(dest->gtOper == GT_LCL_FLD); |
10087 | blockWidth = genTypeSize(dest->TypeGet()); |
10088 | destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); |
10089 | destFldSeq = dest->AsLclFld()->gtFieldSeq; |
10090 | } |
10091 | } |
10092 | else |
10093 | { |
10094 | GenTree* effectiveDest = dest->gtEffectiveVal(); |
10095 | if (effectiveDest->OperGet() == GT_IND) |
10096 | { |
10097 | assert(dest->TypeGet() != TYP_STRUCT); |
10098 | blockWidth = genTypeSize(effectiveDest->TypeGet()); |
10099 | blockWidthIsConst = true; |
10100 | if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) |
10101 | { |
10102 | destAddr = dest->gtGetOp1(); |
10103 | } |
10104 | } |
10105 | else |
10106 | { |
10107 | assert(effectiveDest->OperIsBlk()); |
10108 | GenTreeBlk* blk = effectiveDest->AsBlk(); |
10109 | |
10110 | blockWidth = blk->gtBlkSize; |
10111 | blockWidthIsConst = (blk->gtOper != GT_DYN_BLK); |
10112 | if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) |
10113 | { |
10114 | destAddr = blk->Addr(); |
10115 | } |
10116 | } |
10117 | if (destAddr != nullptr) |
10118 | { |
10119 | noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL); |
10120 | if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) |
10121 | { |
10122 | destOnStack = true; |
10123 | destLclNum = lclVarTree->gtLclNum; |
10124 | destLclVar = &lvaTable[destLclNum]; |
10125 | } |
10126 | } |
10127 | } |
10128 | |
10129 | if (destLclVar != nullptr) |
10130 | { |
10131 | #if LOCAL_ASSERTION_PROP |
10132 | // Kill everything about destLclNum (and its field locals) |
10133 | if (optLocalAssertionProp) |
10134 | { |
10135 | if (optAssertionCount > 0) |
10136 | { |
10137 | fgKillDependentAssertions(destLclNum DEBUGARG(tree)); |
10138 | } |
10139 | } |
10140 | #endif // LOCAL_ASSERTION_PROP |
10141 | |
10142 | if (destLclVar->lvPromoted && blockWidthIsConst) |
10143 | { |
10144 | noway_assert(varTypeIsStruct(destLclVar)); |
10145 | noway_assert(!opts.MinOpts()); |
10146 | |
10147 | if (blockWidth == destLclVar->lvExactSize) |
10148 | { |
10149 | JITDUMP(" (destDoFldAsg=true)" ); |
10150 | // We may decide later that a copyblk is required when this struct has holes |
10151 | destDoFldAsg = true; |
10152 | } |
10153 | else |
10154 | { |
10155 | JITDUMP(" with mismatched dest size" ); |
10156 | } |
10157 | } |
10158 | } |
10159 | |
10160 | FieldSeqNode* srcFldSeq = nullptr; |
10161 | unsigned srcLclNum = BAD_VAR_NUM; |
10162 | LclVarDsc* srcLclVar = nullptr; |
10163 | bool srcDoFldAsg = false; |
10164 | |
10165 | if (rhs->IsLocal()) |
10166 | { |
10167 | srcLclVarTree = rhs->AsLclVarCommon(); |
10168 | srcLclNum = srcLclVarTree->gtLclNum; |
10169 | if (rhs->OperGet() == GT_LCL_FLD) |
10170 | { |
10171 | srcFldSeq = rhs->AsLclFld()->gtFieldSeq; |
10172 | } |
10173 | } |
10174 | else if (rhs->OperIsIndir()) |
10175 | { |
10176 | if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq)) |
10177 | { |
10178 | srcLclNum = srcLclVarTree->gtLclNum; |
10179 | } |
10180 | else |
10181 | { |
10182 | srcAddr = rhs->gtOp.gtOp1; |
10183 | } |
10184 | } |
10185 | |
10186 | if (srcLclNum != BAD_VAR_NUM) |
10187 | { |
10188 | srcLclVar = &lvaTable[srcLclNum]; |
10189 | |
10190 | if (srcLclVar->lvPromoted && blockWidthIsConst) |
10191 | { |
10192 | noway_assert(varTypeIsStruct(srcLclVar)); |
10193 | noway_assert(!opts.MinOpts()); |
10194 | |
10195 | if (blockWidth == srcLclVar->lvExactSize) |
10196 | { |
10197 | JITDUMP(" (srcDoFldAsg=true)" ); |
10198 | // We may decide later that a copyblk is required when this struct has holes |
10199 | srcDoFldAsg = true; |
10200 | } |
10201 | else |
10202 | { |
10203 | JITDUMP(" with mismatched src size" ); |
10204 | } |
10205 | } |
10206 | } |
10207 | |
10208 | // Check to see if we are doing a copy to/from the same local block. |
10209 | // If so, morph it to a nop. |
10210 | if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq) && |
10211 | destFldSeq != FieldSeqStore::NotAField()) |
10212 | { |
10213 | JITDUMP("Self-copy; replaced with a NOP.\n" ); |
10214 | GenTree* nop = gtNewNothingNode(); |
10215 | INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); |
10216 | return nop; |
10217 | } |
10218 | |
10219 | // Check to see if we are required to do a copy block because the struct contains holes |
10220 | // and either the src or dest is externally visible |
10221 | // |
10222 | bool requiresCopyBlock = false; |
10223 | bool srcSingleLclVarAsg = false; |
10224 | bool destSingleLclVarAsg = false; |
10225 | |
10226 | // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock. |
10227 | if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct)) |
10228 | { |
10229 | requiresCopyBlock = true; |
10230 | } |
10231 | |
10232 | // Can we use field by field assignment for the dest? |
10233 | if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) |
10234 | { |
10235 | JITDUMP(" dest contains custom layout and contains holes" ); |
10236 | // C++ style CopyBlock with holes |
10237 | requiresCopyBlock = true; |
10238 | } |
10239 | |
10240 | // Can we use field by field assignment for the src? |
10241 | if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles) |
10242 | { |
10243 | JITDUMP(" src contains custom layout and contains holes" ); |
10244 | // C++ style CopyBlock with holes |
10245 | requiresCopyBlock = true; |
10246 | } |
10247 | |
10248 | #if defined(_TARGET_ARM_) |
10249 | if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED)) |
10250 | { |
10251 | JITDUMP(" rhs is unaligned" ); |
10252 | requiresCopyBlock = true; |
10253 | } |
10254 | |
10255 | if (asg->gtFlags & GTF_BLK_UNALIGNED) |
10256 | { |
10257 | JITDUMP(" asg is unaligned" ); |
10258 | requiresCopyBlock = true; |
10259 | } |
10260 | #endif // _TARGET_ARM_ |
10261 | |
10262 | if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe) |
10263 | { |
10264 | requiresCopyBlock = true; |
10265 | } |
10266 | |
10267 | // Can't use field by field assignment if the src is a call. |
10268 | if (rhs->OperGet() == GT_CALL) |
10269 | { |
10270 | JITDUMP(" src is a call" ); |
10271 | // C++ style CopyBlock with holes |
10272 | requiresCopyBlock = true; |
10273 | } |
10274 | |
10275 | // If we passed the above checks, then we will check these two |
10276 | if (!requiresCopyBlock) |
10277 | { |
10278 | // Are both dest and src promoted structs? |
10279 | if (destDoFldAsg && srcDoFldAsg) |
10280 | { |
10281 | // Both structs should be of the same type, or each have a single field of the same type. |
10282 | // If not we will use a copy block. |
10283 | if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() != |
10284 | lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle()) |
10285 | { |
10286 | unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart; |
10287 | unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart; |
10288 | if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) || |
10289 | (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType)) |
10290 | { |
10291 | requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock |
10292 | JITDUMP(" with mismatched types" ); |
10293 | } |
10294 | } |
10295 | } |
10296 | // Are neither dest or src promoted structs? |
10297 | else if (!destDoFldAsg && !srcDoFldAsg) |
10298 | { |
10299 | requiresCopyBlock = true; // Leave as a CopyBlock |
10300 | JITDUMP(" with no promoted structs" ); |
10301 | } |
10302 | else if (destDoFldAsg) |
10303 | { |
10304 | // Match the following kinds of trees: |
10305 | // fgMorphTree BB01, stmt 9 (before) |
10306 | // [000052] ------------ const int 8 |
10307 | // [000053] -A--G------- copyBlk void |
10308 | // [000051] ------------ addr byref |
10309 | // [000050] ------------ lclVar long V07 loc5 |
10310 | // [000054] --------R--- <list> void |
10311 | // [000049] ------------ addr byref |
10312 | // [000048] ------------ lclVar struct(P) V06 loc4 |
10313 | // long V06.h (offs=0x00) -> V17 tmp9 |
10314 | // Yields this transformation |
10315 | // fgMorphCopyBlock (after): |
10316 | // [000050] ------------ lclVar long V07 loc5 |
10317 | // [000085] -A---------- = long |
10318 | // [000083] D------N---- lclVar long V17 tmp9 |
10319 | // |
10320 | if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) && |
10321 | (blockWidth == genTypeSize(srcLclVar->TypeGet()))) |
10322 | { |
10323 | // Reject the following tree: |
10324 | // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe |
10325 | // |
10326 | // fgMorphTree BB01, stmt 6 (before) |
10327 | // [000038] ------------- const int 4 |
10328 | // [000039] -A--G-------- copyBlk void |
10329 | // [000037] ------------- addr byref |
10330 | // [000036] ------------- lclVar int V05 loc3 |
10331 | // [000040] --------R---- <list> void |
10332 | // [000035] ------------- addr byref |
10333 | // [000034] ------------- lclVar struct(P) V04 loc2 |
10334 | // float V04.f1 (offs=0x00) -> V13 tmp6 |
10335 | // As this would framsform into |
10336 | // float V13 = int V05 |
10337 | // |
10338 | unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart; |
10339 | var_types destType = lvaTable[fieldLclNum].TypeGet(); |
10340 | if (srcLclVar->TypeGet() == destType) |
10341 | { |
10342 | srcSingleLclVarAsg = true; |
10343 | } |
10344 | } |
10345 | } |
10346 | else |
10347 | { |
10348 | assert(srcDoFldAsg); |
10349 | // Check for the symmetric case (which happens for the _pointer field of promoted spans): |
10350 | // |
10351 | // [000240] -----+------ /--* lclVar struct(P) V18 tmp9 |
10352 | // /--* byref V18._value (offs=0x00) -> V30 tmp21 |
10353 | // [000245] -A------R--- * = struct (copy) |
10354 | // [000244] -----+------ \--* obj(8) struct |
10355 | // [000243] -----+------ \--* addr byref |
10356 | // [000242] D----+-N---- \--* lclVar byref V28 tmp19 |
10357 | // |
10358 | if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) && |
10359 | (blockWidth == genTypeSize(destLclVar->TypeGet()))) |
10360 | { |
10361 | // Check for type agreement |
10362 | unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart; |
10363 | var_types srcType = lvaTable[fieldLclNum].TypeGet(); |
10364 | if (destLclVar->TypeGet() == srcType) |
10365 | { |
10366 | destSingleLclVarAsg = true; |
10367 | } |
10368 | } |
10369 | } |
10370 | } |
10371 | |
10372 | // If we require a copy block the set both of the field assign bools to false |
10373 | if (requiresCopyBlock) |
10374 | { |
10375 | // If a copy block is required then we won't do field by field assignments |
10376 | destDoFldAsg = false; |
10377 | srcDoFldAsg = false; |
10378 | } |
10379 | |
10380 | JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n" ); |
10381 | |
10382 | // Mark the dest/src structs as DoNotEnreg when they are not being fully referenced as the same type. |
10383 | // |
10384 | if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg) |
10385 | { |
10386 | if (!destLclVar->lvRegStruct || (destLclVar->lvType != dest->TypeGet())) |
10387 | { |
10388 | // Mark it as DoNotEnregister. |
10389 | lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); |
10390 | } |
10391 | } |
10392 | |
10393 | if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg) |
10394 | { |
10395 | if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet())) |
10396 | { |
10397 | lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp)); |
10398 | } |
10399 | } |
10400 | |
10401 | if (requiresCopyBlock) |
10402 | { |
10403 | var_types asgType = dest->TypeGet(); |
10404 | dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/); |
10405 | asg->gtOp.gtOp1 = dest; |
10406 | asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); |
10407 | |
10408 | // Note that the unrolling of CopyBlk is only implemented on some platforms. |
10409 | // Currently that includes x64 and ARM but not x86: the code generation for this |
10410 | // construct requires the ability to mark certain regions of the generated code |
10411 | // as non-interruptible, and the GC encoding for the latter platform does not |
10412 | // have this capability. |
10413 | |
10414 | // If we have a CopyObj with a dest on the stack |
10415 | // we will convert it into an GC Unsafe CopyBlk that is non-interruptible |
10416 | // when its size is small enough to be completely unrolled (i.e. between [16..64] bytes). |
10417 | // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.) |
10418 | // |
10419 | if (destOnStack && (dest->OperGet() == GT_OBJ)) |
10420 | { |
10421 | fgMorphUnsafeBlk(dest->AsObj()); |
10422 | } |
10423 | |
10424 | // Eliminate the "OBJ or BLK" node on the rhs. |
10425 | rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/); |
10426 | asg->gtOp.gtOp2 = rhs; |
10427 | |
10428 | // Formerly, liveness did not consider copyblk arguments of simple types as being |
10429 | // a use or def, so these variables were marked as address-exposed. |
10430 | // TODO-1stClassStructs: This should no longer be needed. |
10431 | if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar)) |
10432 | { |
10433 | JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n" , srcLclNum); |
10434 | lvaTable[srcLclNum].lvAddrExposed = true; |
10435 | } |
10436 | |
10437 | if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar)) |
10438 | { |
10439 | JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n" , destLclNum); |
10440 | lvaTable[destLclNum].lvAddrExposed = true; |
10441 | } |
10442 | |
10443 | goto _Done; |
10444 | } |
10445 | |
10446 | // |
10447 | // Otherwise we convert this CopyBlock into individual field by field assignments |
10448 | // |
10449 | tree = nullptr; |
10450 | |
10451 | GenTree* src; |
10452 | GenTree* addrSpill = nullptr; |
10453 | unsigned addrSpillTemp = BAD_VAR_NUM; |
10454 | bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame |
10455 | |
10456 | unsigned fieldCnt = DUMMY_INIT(0); |
10457 | |
10458 | if (destDoFldAsg && srcDoFldAsg) |
10459 | { |
10460 | // To do fieldwise assignments for both sides, they'd better be the same struct type! |
10461 | // All of these conditions were checked above... |
10462 | assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM); |
10463 | assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt); |
10464 | |
10465 | fieldCnt = destLclVar->lvFieldCnt; |
10466 | goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field |
10467 | // assignments. |
10468 | } |
10469 | else if (destDoFldAsg) |
10470 | { |
10471 | fieldCnt = destLclVar->lvFieldCnt; |
10472 | rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/); |
10473 | if (srcAddr == nullptr) |
10474 | { |
10475 | srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */); |
10476 | } |
10477 | } |
10478 | else |
10479 | { |
10480 | assert(srcDoFldAsg); |
10481 | fieldCnt = srcLclVar->lvFieldCnt; |
10482 | dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/); |
10483 | if (dest->OperIsBlk()) |
10484 | { |
10485 | (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT); |
10486 | } |
10487 | destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); |
10488 | } |
10489 | |
10490 | if (destDoFldAsg) |
10491 | { |
10492 | noway_assert(!srcDoFldAsg); |
10493 | if (gtClone(srcAddr)) |
10494 | { |
10495 | // srcAddr is simple expression. No need to spill. |
10496 | noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); |
10497 | } |
10498 | else |
10499 | { |
10500 | // srcAddr is complex expression. Clone and spill it (unless the destination is |
10501 | // a struct local that only has one field, in which case we'd only use the |
10502 | // address value once...) |
10503 | if (destLclVar->lvFieldCnt > 1) |
10504 | { |
10505 | addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr' |
10506 | noway_assert(addrSpill != nullptr); |
10507 | } |
10508 | } |
10509 | } |
10510 | |
10511 | if (srcDoFldAsg) |
10512 | { |
10513 | noway_assert(!destDoFldAsg); |
10514 | |
10515 | // If we're doing field-wise stores, to an address within a local, and we copy |
10516 | // the address into "addrSpill", do *not* declare the original local var node in the |
10517 | // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the |
10518 | // field-wise assignments as an "indirect" assignment to the local. |
10519 | // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before |
10520 | // we clone it.) |
10521 | if (lclVarTree != nullptr) |
10522 | { |
10523 | lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG); |
10524 | } |
10525 | |
10526 | if (gtClone(destAddr)) |
10527 | { |
10528 | // destAddr is simple expression. No need to spill |
10529 | noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); |
10530 | } |
10531 | else |
10532 | { |
10533 | // destAddr is complex expression. Clone and spill it (unless |
10534 | // the source is a struct local that only has one field, in which case we'd only |
10535 | // use the address value once...) |
10536 | if (srcLclVar->lvFieldCnt > 1) |
10537 | { |
10538 | addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr' |
10539 | noway_assert(addrSpill != nullptr); |
10540 | } |
10541 | |
10542 | // TODO-CQ: this should be based on a more general |
10543 | // "BaseAddress" method, that handles fields of structs, before or after |
10544 | // morphing. |
10545 | if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR) |
10546 | { |
10547 | if (addrSpill->gtOp.gtOp1->IsLocal()) |
10548 | { |
10549 | // We will *not* consider this to define the local, but rather have each individual field assign |
10550 | // be a definition. |
10551 | addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK); |
10552 | assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) != |
10553 | PROMOTION_TYPE_INDEPENDENT); |
10554 | addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our |
10555 | // local stack frame |
10556 | } |
10557 | } |
10558 | } |
10559 | } |
10560 | |
10561 | if (addrSpill != nullptr) |
10562 | { |
10563 | // Spill the (complex) address to a BYREF temp. |
10564 | // Note, at most one address may need to be spilled. |
10565 | addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local" )); |
10566 | |
10567 | lvaTable[addrSpillTemp].lvType = TYP_BYREF; |
10568 | |
10569 | if (addrSpillIsStackDest) |
10570 | { |
10571 | lvaTable[addrSpillTemp].lvStackByref = true; |
10572 | } |
10573 | |
10574 | tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill); |
10575 | |
10576 | // If we are assigning the address of a LclVar here |
10577 | // liveness does not account for this kind of address taken use. |
10578 | // |
10579 | // We have to mark this local as address exposed so |
10580 | // that we don't delete the definition for this LclVar |
10581 | // as a dead store later on. |
10582 | // |
10583 | if (addrSpill->OperGet() == GT_ADDR) |
10584 | { |
10585 | GenTree* addrOp = addrSpill->gtOp.gtOp1; |
10586 | if (addrOp->IsLocal()) |
10587 | { |
10588 | unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum; |
10589 | lvaTable[lclVarNum].lvAddrExposed = true; |
10590 | lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed)); |
10591 | } |
10592 | } |
10593 | } |
10594 | |
10595 | _AssignFields: |
10596 | |
10597 | // We may have allocated a temp above, and that may have caused the lvaTable to be expanded. |
10598 | // So, beyond this point we cannot rely on the old values of 'srcLclVar' and 'destLclVar'. |
10599 | for (unsigned i = 0; i < fieldCnt; ++i) |
10600 | { |
10601 | FieldSeqNode* curFieldSeq = nullptr; |
10602 | if (destDoFldAsg) |
10603 | { |
10604 | noway_assert(destLclNum != BAD_VAR_NUM); |
10605 | unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; |
10606 | dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); |
10607 | // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not. |
10608 | if (destAddr != nullptr) |
10609 | { |
10610 | noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR); |
10611 | dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); |
10612 | } |
10613 | else |
10614 | { |
10615 | noway_assert(lclVarTree != nullptr); |
10616 | dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); |
10617 | } |
10618 | // Don't CSE the lhs of an assignment. |
10619 | dest->gtFlags |= GTF_DONT_CSE; |
10620 | } |
10621 | else |
10622 | { |
10623 | noway_assert(srcDoFldAsg); |
10624 | noway_assert(srcLclNum != BAD_VAR_NUM); |
10625 | unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; |
10626 | |
10627 | if (destSingleLclVarAsg) |
10628 | { |
10629 | noway_assert(fieldCnt == 1); |
10630 | noway_assert(destLclVar != nullptr); |
10631 | noway_assert(addrSpill == nullptr); |
10632 | |
10633 | dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet()); |
10634 | } |
10635 | else |
10636 | { |
10637 | if (addrSpill) |
10638 | { |
10639 | assert(addrSpillTemp != BAD_VAR_NUM); |
10640 | dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF); |
10641 | } |
10642 | else |
10643 | { |
10644 | dest = gtCloneExpr(destAddr); |
10645 | noway_assert(dest != nullptr); |
10646 | |
10647 | // Is the address of a local? |
10648 | GenTreeLclVarCommon* lclVarTree = nullptr; |
10649 | bool isEntire = false; |
10650 | bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr); |
10651 | if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire)) |
10652 | { |
10653 | lclVarTree->gtFlags |= GTF_VAR_DEF; |
10654 | if (!isEntire) |
10655 | { |
10656 | lclVarTree->gtFlags |= GTF_VAR_USEASG; |
10657 | } |
10658 | } |
10659 | } |
10660 | |
10661 | GenTree* fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL); |
10662 | // Have to set the field sequence -- which means we need the field handle. |
10663 | CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle(); |
10664 | CORINFO_FIELD_HANDLE fieldHnd = |
10665 | info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); |
10666 | curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); |
10667 | fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq; |
10668 | |
10669 | dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode); |
10670 | |
10671 | dest = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), dest); |
10672 | |
10673 | // !!! The destination could be on stack. !!! |
10674 | // This flag will let us choose the correct write barrier. |
10675 | dest->gtFlags |= GTF_IND_TGTANYWHERE; |
10676 | } |
10677 | } |
10678 | |
10679 | if (srcDoFldAsg) |
10680 | { |
10681 | noway_assert(srcLclNum != BAD_VAR_NUM); |
10682 | unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; |
10683 | src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); |
10684 | |
10685 | noway_assert(srcLclVarTree != nullptr); |
10686 | src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK; |
10687 | // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE, |
10688 | // but they are when they are under a GT_ADDR. |
10689 | src->gtFlags |= GTF_DONT_CSE; |
10690 | } |
10691 | else |
10692 | { |
10693 | noway_assert(destDoFldAsg); |
10694 | noway_assert(destLclNum != BAD_VAR_NUM); |
10695 | unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; |
10696 | |
10697 | if (srcSingleLclVarAsg) |
10698 | { |
10699 | noway_assert(fieldCnt == 1); |
10700 | noway_assert(srcLclNum != BAD_VAR_NUM); |
10701 | noway_assert(addrSpill == nullptr); |
10702 | |
10703 | src = gtNewLclvNode(srcLclNum, lvaGetDesc(srcLclNum)->TypeGet()); |
10704 | } |
10705 | else |
10706 | { |
10707 | if (addrSpill) |
10708 | { |
10709 | assert(addrSpillTemp != BAD_VAR_NUM); |
10710 | src = gtNewLclvNode(addrSpillTemp, TYP_BYREF); |
10711 | } |
10712 | else |
10713 | { |
10714 | src = gtCloneExpr(srcAddr); |
10715 | noway_assert(src != nullptr); |
10716 | } |
10717 | |
10718 | CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle(); |
10719 | CORINFO_FIELD_HANDLE fieldHnd = |
10720 | info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); |
10721 | curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); |
10722 | var_types destType = lvaGetDesc(fieldLclNum)->lvType; |
10723 | |
10724 | bool done = false; |
10725 | if (lvaGetDesc(fieldLclNum)->lvFldOffset == 0) |
10726 | { |
10727 | // If this is a full-width use of the src via a different type, we need to create a GT_LCL_FLD. |
10728 | // (Note that if it was the same type, 'srcSingleLclVarAsg' would be true.) |
10729 | if (srcLclNum != BAD_VAR_NUM) |
10730 | { |
10731 | noway_assert(srcLclVarTree != nullptr); |
10732 | assert(destType != TYP_STRUCT); |
10733 | unsigned destSize = genTypeSize(destType); |
10734 | srcLclVar = lvaGetDesc(srcLclNum); |
10735 | unsigned srcSize = |
10736 | (srcLclVar->lvType == TYP_STRUCT) ? srcLclVar->lvExactSize : genTypeSize(srcLclVar); |
10737 | if (destSize == srcSize) |
10738 | { |
10739 | srcLclVarTree->gtFlags |= GTF_VAR_CAST; |
10740 | srcLclVarTree->ChangeOper(GT_LCL_FLD); |
10741 | srcLclVarTree->gtType = destType; |
10742 | srcLclVarTree->AsLclFld()->gtFieldSeq = curFieldSeq; |
10743 | src = srcLclVarTree; |
10744 | done = true; |
10745 | } |
10746 | } |
10747 | } |
10748 | else // if (lvaGetDesc(fieldLclNum)->lvFldOffset != 0) |
10749 | { |
10750 | src = gtNewOperNode(GT_ADD, TYP_BYREF, src, |
10751 | new (this, GT_CNS_INT) |
10752 | GenTreeIntCon(TYP_I_IMPL, lvaGetDesc(fieldLclNum)->lvFldOffset, |
10753 | curFieldSeq)); |
10754 | } |
10755 | if (!done) |
10756 | { |
10757 | src = gtNewIndir(destType, src); |
10758 | } |
10759 | } |
10760 | } |
10761 | |
10762 | noway_assert(dest->TypeGet() == src->TypeGet()); |
10763 | |
10764 | asg = gtNewAssignNode(dest, src); |
10765 | |
10766 | // If we spilled the address, and we didn't do individual field assignments to promoted fields, |
10767 | // and it was of a local, ensure that the destination local variable has been marked as address |
10768 | // exposed. Neither liveness nor SSA are able to track this kind of indirect assignments. |
10769 | if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM) |
10770 | { |
10771 | noway_assert(lvaGetDesc(destLclNum)->lvAddrExposed); |
10772 | } |
10773 | |
10774 | #if LOCAL_ASSERTION_PROP |
10775 | if (optLocalAssertionProp) |
10776 | { |
10777 | optAssertionGen(asg); |
10778 | } |
10779 | #endif // LOCAL_ASSERTION_PROP |
10780 | |
10781 | if (tree) |
10782 | { |
10783 | tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); |
10784 | } |
10785 | else |
10786 | { |
10787 | tree = asg; |
10788 | } |
10789 | } |
10790 | } |
10791 | |
10792 | if (isLateArg) |
10793 | { |
10794 | tree->gtFlags |= GTF_LATE_ARG; |
10795 | } |
10796 | |
10797 | #ifdef DEBUG |
10798 | if (tree != oldTree) |
10799 | { |
10800 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
10801 | } |
10802 | |
10803 | if (verbose) |
10804 | { |
10805 | printf("\nfgMorphCopyBlock (after):\n" ); |
10806 | gtDispTree(tree); |
10807 | } |
10808 | #endif |
10809 | |
10810 | _Done: |
10811 | return tree; |
10812 | } |
10813 | |
10814 | // insert conversions and normalize to make tree amenable to register |
10815 | // FP architectures |
10816 | GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree) |
10817 | { |
10818 | if (tree->OperIsArithmetic()) |
10819 | { |
10820 | if (varTypeIsFloating(tree)) |
10821 | { |
10822 | GenTree* op1 = tree->gtOp.gtOp1; |
10823 | GenTree* op2 = tree->gtGetOp2(); |
10824 | |
10825 | assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet())); |
10826 | |
10827 | if (op1->TypeGet() != tree->TypeGet()) |
10828 | { |
10829 | tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet()); |
10830 | } |
10831 | if (op2->TypeGet() != tree->TypeGet()) |
10832 | { |
10833 | tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet()); |
10834 | } |
10835 | } |
10836 | } |
10837 | else if (tree->OperIsCompare()) |
10838 | { |
10839 | GenTree* op1 = tree->gtOp.gtOp1; |
10840 | |
10841 | if (varTypeIsFloating(op1)) |
10842 | { |
10843 | GenTree* op2 = tree->gtGetOp2(); |
10844 | assert(varTypeIsFloating(op2)); |
10845 | |
10846 | if (op1->TypeGet() != op2->TypeGet()) |
10847 | { |
10848 | // both had better be floating, just one bigger than other |
10849 | if (op1->TypeGet() == TYP_FLOAT) |
10850 | { |
10851 | assert(op2->TypeGet() == TYP_DOUBLE); |
10852 | tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); |
10853 | } |
10854 | else if (op2->TypeGet() == TYP_FLOAT) |
10855 | { |
10856 | assert(op1->TypeGet() == TYP_DOUBLE); |
10857 | tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); |
10858 | } |
10859 | } |
10860 | } |
10861 | } |
10862 | |
10863 | return tree; |
10864 | } |
10865 | |
10866 | //-------------------------------------------------------------------------------------------------------------- |
10867 | // fgMorphRecognizeBoxNullable: |
10868 | // Recognize this pattern: |
10869 | // |
10870 | // stmtExpr void (IL 0x000... ???) |
10871 | // return int |
10872 | // CNS_INT ref null |
10873 | // EQ/NE/GT int |
10874 | // CALL help ref HELPER.CORINFO_HELP_BOX_NULLABLE |
10875 | // CNS_INT(h) long 0x7fed96836c8 class |
10876 | // ADDR byref |
10877 | // FIELD struct value |
10878 | // LCL_VAR ref V00 this |
10879 | // |
10880 | // which comes from this code: |
10881 | // |
10882 | // return this.value==null; |
10883 | // |
10884 | // and transform it into |
10885 | // |
10886 | // stmtExpr void (IL 0x000... ???) |
10887 | // return int |
10888 | // CNS_INT ref null |
10889 | // EQ/NE/GT int |
10890 | // IND bool |
10891 | // ADDR byref |
10892 | // FIELD struct value |
10893 | // LCL_VAR ref V00 this |
10894 | // |
10895 | // Arguments: |
10896 | // compare - Compare tree to optimize. |
10897 | // |
10898 | // return value: |
10899 | // A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found; |
10900 | // the original tree otherwise. |
10901 | // |
10902 | |
10903 | GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare) |
10904 | { |
10905 | GenTree* op1 = compare->gtOp.gtOp1; |
10906 | GenTree* op2 = compare->gtOp.gtOp2; |
10907 | GenTree* opCns; |
10908 | GenTreeCall* opCall; |
10909 | |
10910 | if (op1->IsCnsIntOrI() && op2->IsHelperCall()) |
10911 | { |
10912 | opCns = op1; |
10913 | opCall = op2->AsCall(); |
10914 | } |
10915 | else if (op1->IsHelperCall() && op2->IsCnsIntOrI()) |
10916 | { |
10917 | opCns = op2; |
10918 | opCall = op1->AsCall(); |
10919 | } |
10920 | else |
10921 | { |
10922 | return compare; |
10923 | } |
10924 | |
10925 | if (!opCns->IsIntegralConst(0)) |
10926 | { |
10927 | return compare; |
10928 | } |
10929 | |
10930 | if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE) |
10931 | { |
10932 | return compare; |
10933 | } |
10934 | |
10935 | // Get the nullable struct argument |
10936 | GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1; |
10937 | |
10938 | // Check for cases that are unsafe to optimize and return the unchanged tree |
10939 | if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0)) |
10940 | { |
10941 | return compare; |
10942 | } |
10943 | |
10944 | // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset |
10945 | GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg); |
10946 | |
10947 | if (opCall == op1) |
10948 | { |
10949 | compare->gtOp.gtOp1 = newOp; |
10950 | } |
10951 | else |
10952 | { |
10953 | compare->gtOp.gtOp2 = newOp; |
10954 | } |
10955 | |
10956 | opCns->gtType = TYP_INT; |
10957 | |
10958 | return compare; |
10959 | } |
10960 | |
10961 | #ifdef FEATURE_SIMD |
10962 | |
10963 | //-------------------------------------------------------------------------------------------------------------- |
10964 | // getSIMDStructFromField: |
10965 | // Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for |
10966 | // the struct node, also base type, field index and simd size. If it is not, just return nullptr. |
10967 | // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we |
10968 | // should return nullptr, since in this case we should treat SIMD struct as a regular struct. |
10969 | // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic |
10970 | // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node |
10971 | // if the struct is a SIMD struct. |
10972 | // |
10973 | // Arguments: |
10974 | // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd |
10975 | // struct used for simd intrinsic or not. |
10976 | // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut |
10977 | // to simd lclvar's base type. |
10978 | // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut |
10979 | // equals to the index number of this field. |
10980 | // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut |
10981 | // equals to the simd struct size which this tree belongs to. |
10982 | // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore |
10983 | // the UsedInSIMDIntrinsic check. |
10984 | // |
10985 | // return value: |
10986 | // A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd |
10987 | // instrinic related field, return nullptr. |
10988 | // |
10989 | |
10990 | GenTree* Compiler::getSIMDStructFromField(GenTree* tree, |
10991 | var_types* pBaseTypeOut, |
10992 | unsigned* indexOut, |
10993 | unsigned* simdSizeOut, |
10994 | bool ignoreUsedInSIMDIntrinsic /*false*/) |
10995 | { |
10996 | GenTree* ret = nullptr; |
10997 | if (tree->OperGet() == GT_FIELD) |
10998 | { |
10999 | GenTree* objRef = tree->gtField.gtFldObj; |
11000 | if (objRef != nullptr) |
11001 | { |
11002 | GenTree* obj = nullptr; |
11003 | if (objRef->gtOper == GT_ADDR) |
11004 | { |
11005 | obj = objRef->gtOp.gtOp1; |
11006 | } |
11007 | else if (ignoreUsedInSIMDIntrinsic) |
11008 | { |
11009 | obj = objRef; |
11010 | } |
11011 | else |
11012 | { |
11013 | return nullptr; |
11014 | } |
11015 | |
11016 | if (isSIMDTypeLocal(obj)) |
11017 | { |
11018 | unsigned lclNum = obj->gtLclVarCommon.gtLclNum; |
11019 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
11020 | if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic) |
11021 | { |
11022 | *simdSizeOut = varDsc->lvExactSize; |
11023 | *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj); |
11024 | ret = obj; |
11025 | } |
11026 | } |
11027 | else if (obj->OperGet() == GT_SIMD) |
11028 | { |
11029 | ret = obj; |
11030 | GenTreeSIMD* simdNode = obj->AsSIMD(); |
11031 | *simdSizeOut = simdNode->gtSIMDSize; |
11032 | *pBaseTypeOut = simdNode->gtSIMDBaseType; |
11033 | } |
11034 | #ifdef FEATURE_HW_INTRINSICS |
11035 | else if (obj->OperIsSimdHWIntrinsic()) |
11036 | { |
11037 | ret = obj; |
11038 | GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic(); |
11039 | *simdSizeOut = simdNode->gtSIMDSize; |
11040 | *pBaseTypeOut = simdNode->gtSIMDBaseType; |
11041 | } |
11042 | #endif // FEATURE_HW_INTRINSICS |
11043 | } |
11044 | } |
11045 | if (ret != nullptr) |
11046 | { |
11047 | unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut); |
11048 | *indexOut = tree->gtField.gtFldOffset / BaseTypeSize; |
11049 | } |
11050 | return ret; |
11051 | } |
11052 | |
11053 | /***************************************************************************** |
11054 | * If a read operation tries to access simd struct field, then transform the |
11055 | * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree. |
11056 | * Otherwise, return the old tree. |
11057 | * Argument: |
11058 | * tree - GenTree*. If this pointer points to simd struct which is used for simd |
11059 | * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem. |
11060 | * Return: |
11061 | * A GenTree* which points to the new tree. If the tree is not for simd intrinsic, |
11062 | * return nullptr. |
11063 | */ |
11064 | |
11065 | GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree) |
11066 | { |
11067 | unsigned index = 0; |
11068 | var_types baseType = TYP_UNKNOWN; |
11069 | unsigned simdSize = 0; |
11070 | GenTree* simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize); |
11071 | if (simdStructNode != nullptr) |
11072 | { |
11073 | assert(simdSize >= ((index + 1) * genTypeSize(baseType))); |
11074 | GenTree* op2 = gtNewIconNode(index); |
11075 | tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize); |
11076 | #ifdef DEBUG |
11077 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
11078 | #endif |
11079 | } |
11080 | return tree; |
11081 | } |
11082 | |
11083 | /***************************************************************************** |
11084 | * Transform an assignment of a SIMD struct field to SIMD intrinsic |
11085 | * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment, |
11086 | * then return the old tree. |
11087 | * Argument: |
11088 | * tree - GenTree*. If this pointer points to simd struct which is used for simd |
11089 | * intrinsic, we will morph it as simd intrinsic set. |
11090 | * Return: |
11091 | * A GenTree* which points to the new tree. If the tree is not for simd intrinsic, |
11092 | * return nullptr. |
11093 | */ |
11094 | |
11095 | GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree) |
11096 | { |
11097 | assert(tree->OperGet() == GT_ASG); |
11098 | GenTree* op1 = tree->gtGetOp1(); |
11099 | GenTree* op2 = tree->gtGetOp2(); |
11100 | |
11101 | unsigned index = 0; |
11102 | var_types baseType = TYP_UNKNOWN; |
11103 | unsigned simdSize = 0; |
11104 | GenTree* simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize); |
11105 | if (simdOp1Struct != nullptr) |
11106 | { |
11107 | // Generate the simd set intrinsic |
11108 | assert(simdSize >= ((index + 1) * genTypeSize(baseType))); |
11109 | |
11110 | SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid; |
11111 | switch (index) |
11112 | { |
11113 | case 0: |
11114 | simdIntrinsicID = SIMDIntrinsicSetX; |
11115 | break; |
11116 | case 1: |
11117 | simdIntrinsicID = SIMDIntrinsicSetY; |
11118 | break; |
11119 | case 2: |
11120 | simdIntrinsicID = SIMDIntrinsicSetZ; |
11121 | break; |
11122 | case 3: |
11123 | simdIntrinsicID = SIMDIntrinsicSetW; |
11124 | break; |
11125 | default: |
11126 | noway_assert(!"There is no set intrinsic for index bigger than 3" ); |
11127 | } |
11128 | |
11129 | GenTree* target = gtClone(simdOp1Struct); |
11130 | assert(target != nullptr); |
11131 | var_types simdType = target->gtType; |
11132 | GenTree* simdTree = gtNewSIMDNode(simdType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize); |
11133 | |
11134 | tree->gtOp.gtOp1 = target; |
11135 | tree->gtOp.gtOp2 = simdTree; |
11136 | |
11137 | // fgMorphTree has already called fgMorphImplicitByRefArgs() on this assignment, but the source |
11138 | // and target have not yet been morphed. |
11139 | // Therefore, in case the source and/or target are now implicit byrefs, we need to call it again. |
11140 | if (fgMorphImplicitByRefArgs(tree)) |
11141 | { |
11142 | if (tree->gtGetOp1()->OperIsBlk()) |
11143 | { |
11144 | assert(tree->gtGetOp1()->TypeGet() == simdType); |
11145 | fgMorphBlkToInd(tree->gtGetOp1()->AsBlk(), simdType); |
11146 | } |
11147 | } |
11148 | #ifdef DEBUG |
11149 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
11150 | #endif |
11151 | } |
11152 | |
11153 | return tree; |
11154 | } |
11155 | |
11156 | #endif // FEATURE_SIMD |
11157 | |
11158 | /***************************************************************************** |
11159 | * |
11160 | * Transform the given GTK_SMPOP tree for code generation. |
11161 | */ |
11162 | |
11163 | #ifdef _PREFAST_ |
11164 | #pragma warning(push) |
11165 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
11166 | #endif |
11167 | GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) |
11168 | { |
11169 | ALLOCA_CHECK(); |
11170 | assert(tree->OperKind() & GTK_SMPOP); |
11171 | |
11172 | /* The steps in this function are : |
11173 | o Perform required preorder processing |
11174 | o Process the first, then second operand, if any |
11175 | o Perform required postorder morphing |
11176 | o Perform optional postorder morphing if optimizing |
11177 | */ |
11178 | |
11179 | bool isQmarkColon = false; |
11180 | |
11181 | #if LOCAL_ASSERTION_PROP |
11182 | AssertionIndex origAssertionCount = DUMMY_INIT(0); |
11183 | AssertionDsc* origAssertionTab = DUMMY_INIT(NULL); |
11184 | |
11185 | AssertionIndex thenAssertionCount = DUMMY_INIT(0); |
11186 | AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL); |
11187 | #endif |
11188 | |
11189 | if (fgGlobalMorph) |
11190 | { |
11191 | tree = fgMorphForRegisterFP(tree); |
11192 | } |
11193 | |
11194 | genTreeOps oper = tree->OperGet(); |
11195 | var_types typ = tree->TypeGet(); |
11196 | GenTree* op1 = tree->gtOp.gtOp1; |
11197 | GenTree* op2 = tree->gtGetOp2IfPresent(); |
11198 | |
11199 | /*------------------------------------------------------------------------- |
11200 | * First do any PRE-ORDER processing |
11201 | */ |
11202 | |
11203 | switch (oper) |
11204 | { |
11205 | // Some arithmetic operators need to use a helper call to the EE |
11206 | int helper; |
11207 | |
11208 | case GT_ASG: |
11209 | tree = fgDoNormalizeOnStore(tree); |
11210 | /* fgDoNormalizeOnStore can change op2 */ |
11211 | noway_assert(op1 == tree->gtOp.gtOp1); |
11212 | op2 = tree->gtOp.gtOp2; |
11213 | |
11214 | #ifdef FEATURE_SIMD |
11215 | { |
11216 | // We should check whether op2 should be assigned to a SIMD field or not. |
11217 | // If it is, we should tranlate the tree to simd intrinsic. |
11218 | assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0)); |
11219 | GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree); |
11220 | typ = tree->TypeGet(); |
11221 | op1 = tree->gtGetOp1(); |
11222 | op2 = tree->gtGetOp2(); |
11223 | #ifdef DEBUG |
11224 | assert((tree == newTree) && (tree->OperGet() == oper)); |
11225 | if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0) |
11226 | { |
11227 | tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
11228 | } |
11229 | #endif // DEBUG |
11230 | } |
11231 | #endif |
11232 | |
11233 | // We can't CSE the LHS of an assignment. Only r-values can be CSEed. |
11234 | // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former |
11235 | // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type) |
11236 | // TODO-1stClassStructs: improve this. |
11237 | if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) |
11238 | { |
11239 | op1->gtFlags |= GTF_DONT_CSE; |
11240 | } |
11241 | break; |
11242 | |
11243 | case GT_ADDR: |
11244 | |
11245 | /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ |
11246 | op1->gtFlags |= GTF_DONT_CSE; |
11247 | break; |
11248 | |
11249 | case GT_QMARK: |
11250 | case GT_JTRUE: |
11251 | |
11252 | noway_assert(op1); |
11253 | |
11254 | if (op1->OperKind() & GTK_RELOP) |
11255 | { |
11256 | noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK)); |
11257 | /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does |
11258 | not need to materialize the result as a 0 or 1. */ |
11259 | |
11260 | /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */ |
11261 | op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); |
11262 | |
11263 | // Request that the codegen for op1 sets the condition flags |
11264 | // when it generates the code for op1. |
11265 | // |
11266 | // Codegen for op1 must set the condition flags if |
11267 | // this method returns true. |
11268 | // |
11269 | op1->gtRequestSetFlags(); |
11270 | } |
11271 | else |
11272 | { |
11273 | GenTree* effOp1 = op1->gtEffectiveVal(); |
11274 | noway_assert((effOp1->gtOper == GT_CNS_INT) && |
11275 | (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); |
11276 | } |
11277 | break; |
11278 | |
11279 | case GT_COLON: |
11280 | #if LOCAL_ASSERTION_PROP |
11281 | if (optLocalAssertionProp) |
11282 | #endif |
11283 | { |
11284 | isQmarkColon = true; |
11285 | } |
11286 | break; |
11287 | |
11288 | case GT_INDEX: |
11289 | return fgMorphArrayIndex(tree); |
11290 | |
11291 | case GT_CAST: |
11292 | return fgMorphCast(tree); |
11293 | |
11294 | case GT_MUL: |
11295 | |
11296 | #ifndef _TARGET_64BIT_ |
11297 | if (typ == TYP_LONG) |
11298 | { |
11299 | /* For (long)int1 * (long)int2, we dont actually do the |
11300 | casts, and just multiply the 32 bit values, which will |
11301 | give us the 64 bit result in edx:eax */ |
11302 | |
11303 | noway_assert(op2); |
11304 | if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST && |
11305 | genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) && |
11306 | !op1->gtOverflow() && !op2->gtOverflow()) |
11307 | { |
11308 | // The casts have to be of the same signedness. |
11309 | if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED)) |
11310 | { |
11311 | // We see if we can force an int constant to change its signedness |
11312 | GenTree* constOp; |
11313 | if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT) |
11314 | constOp = op1; |
11315 | else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT) |
11316 | constOp = op2; |
11317 | else |
11318 | goto NO_MUL_64RSLT; |
11319 | |
11320 | if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000))) |
11321 | constOp->gtFlags ^= GTF_UNSIGNED; |
11322 | else |
11323 | goto NO_MUL_64RSLT; |
11324 | } |
11325 | |
11326 | // The only combination that can overflow |
11327 | if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED)) |
11328 | goto NO_MUL_64RSLT; |
11329 | |
11330 | /* Remaining combinations can never overflow during long mul. */ |
11331 | |
11332 | tree->gtFlags &= ~GTF_OVERFLOW; |
11333 | |
11334 | /* Do unsigned mul only if the casts were unsigned */ |
11335 | |
11336 | tree->gtFlags &= ~GTF_UNSIGNED; |
11337 | tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED; |
11338 | |
11339 | /* Since we are committing to GTF_MUL_64RSLT, we don't want |
11340 | the casts to be folded away. So morph the castees directly */ |
11341 | |
11342 | op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1); |
11343 | op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1); |
11344 | |
11345 | // Propagate side effect flags up the tree |
11346 | op1->gtFlags &= ~GTF_ALL_EFFECT; |
11347 | op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
11348 | op2->gtFlags &= ~GTF_ALL_EFFECT; |
11349 | op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
11350 | |
11351 | // If the GT_MUL can be altogether folded away, we should do that. |
11352 | |
11353 | if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) && |
11354 | opts.OptEnabled(CLFLG_CONSTANTFOLD)) |
11355 | { |
11356 | tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1); |
11357 | tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2); |
11358 | noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST); |
11359 | tree = gtFoldExprConst(tree); |
11360 | noway_assert(tree->OperIsConst()); |
11361 | return tree; |
11362 | } |
11363 | |
11364 | tree->gtFlags |= GTF_MUL_64RSLT; |
11365 | |
11366 | // If op1 and op2 are unsigned casts, we need to do an unsigned mult |
11367 | tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED); |
11368 | |
11369 | // Insert GT_NOP nodes for the cast operands so that they do not get folded |
11370 | // And propagate the new flags. We don't want to CSE the casts because |
11371 | // codegen expects GTF_MUL_64RSLT muls to have a certain layout. |
11372 | |
11373 | if (op1->gtCast.CastOp()->OperGet() != GT_NOP) |
11374 | { |
11375 | op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp()); |
11376 | op1->gtFlags &= ~GTF_ALL_EFFECT; |
11377 | op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); |
11378 | } |
11379 | |
11380 | if (op2->gtCast.CastOp()->OperGet() != GT_NOP) |
11381 | { |
11382 | op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp()); |
11383 | op2->gtFlags &= ~GTF_ALL_EFFECT; |
11384 | op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); |
11385 | } |
11386 | |
11387 | op1->gtFlags |= GTF_DONT_CSE; |
11388 | op2->gtFlags |= GTF_DONT_CSE; |
11389 | |
11390 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
11391 | tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT); |
11392 | |
11393 | goto DONE_MORPHING_CHILDREN; |
11394 | } |
11395 | else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0) |
11396 | { |
11397 | NO_MUL_64RSLT: |
11398 | if (tree->gtOverflow()) |
11399 | helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF; |
11400 | else |
11401 | helper = CORINFO_HELP_LMUL; |
11402 | |
11403 | goto USE_HELPER_FOR_ARITH; |
11404 | } |
11405 | else |
11406 | { |
11407 | /* We are seeing this node again. We have decided to use |
11408 | GTF_MUL_64RSLT, so leave it alone. */ |
11409 | |
11410 | assert(tree->gtIsValid64RsltMul()); |
11411 | } |
11412 | } |
11413 | #endif // !_TARGET_64BIT_ |
11414 | break; |
11415 | |
11416 | case GT_DIV: |
11417 | |
11418 | #ifndef _TARGET_64BIT_ |
11419 | if (typ == TYP_LONG) |
11420 | { |
11421 | helper = CORINFO_HELP_LDIV; |
11422 | goto USE_HELPER_FOR_ARITH; |
11423 | } |
11424 | |
11425 | #if USE_HELPERS_FOR_INT_DIV |
11426 | if (typ == TYP_INT) |
11427 | { |
11428 | helper = CORINFO_HELP_DIV; |
11429 | goto USE_HELPER_FOR_ARITH; |
11430 | } |
11431 | #endif |
11432 | #endif // !_TARGET_64BIT_ |
11433 | |
11434 | if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI()) |
11435 | { |
11436 | op2 = gtFoldExprConst(op2); |
11437 | } |
11438 | break; |
11439 | |
11440 | case GT_UDIV: |
11441 | |
11442 | #ifndef _TARGET_64BIT_ |
11443 | if (typ == TYP_LONG) |
11444 | { |
11445 | helper = CORINFO_HELP_ULDIV; |
11446 | goto USE_HELPER_FOR_ARITH; |
11447 | } |
11448 | #if USE_HELPERS_FOR_INT_DIV |
11449 | if (typ == TYP_INT) |
11450 | { |
11451 | helper = CORINFO_HELP_UDIV; |
11452 | goto USE_HELPER_FOR_ARITH; |
11453 | } |
11454 | #endif |
11455 | #endif // _TARGET_64BIT_ |
11456 | break; |
11457 | |
11458 | case GT_MOD: |
11459 | |
11460 | if (varTypeIsFloating(typ)) |
11461 | { |
11462 | helper = CORINFO_HELP_DBLREM; |
11463 | noway_assert(op2); |
11464 | if (op1->TypeGet() == TYP_FLOAT) |
11465 | { |
11466 | if (op2->TypeGet() == TYP_FLOAT) |
11467 | { |
11468 | helper = CORINFO_HELP_FLTREM; |
11469 | } |
11470 | else |
11471 | { |
11472 | tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); |
11473 | } |
11474 | } |
11475 | else if (op2->TypeGet() == TYP_FLOAT) |
11476 | { |
11477 | tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); |
11478 | } |
11479 | goto USE_HELPER_FOR_ARITH; |
11480 | } |
11481 | |
11482 | // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod. |
11483 | // A similar optimization for signed mod will not work for a negative perfectly divisible |
11484 | // HI-word. To make it correct, we would need to divide without the sign and then flip the |
11485 | // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline. |
11486 | goto ASSIGN_HELPER_FOR_MOD; |
11487 | |
11488 | case GT_UMOD: |
11489 | |
11490 | #ifdef _TARGET_ARMARCH_ |
11491 | // |
11492 | // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization |
11493 | // |
11494 | #else // _TARGET_XARCH |
11495 | /* If this is an unsigned long mod with op2 which is a cast to long from a |
11496 | constant int, then don't morph to a call to the helper. This can be done |
11497 | faster inline using idiv. |
11498 | */ |
11499 | |
11500 | noway_assert(op2); |
11501 | if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) && |
11502 | ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) && |
11503 | ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED))) |
11504 | { |
11505 | if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT && |
11506 | op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 && |
11507 | op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff && |
11508 | (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED)) |
11509 | { |
11510 | tree->gtOp.gtOp2 = op2 = fgMorphCast(op2); |
11511 | noway_assert(op2->gtOper == GT_CNS_NATIVELONG); |
11512 | } |
11513 | |
11514 | if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 && |
11515 | op2->gtIntConCommon.LngValue() <= 0x3fffffff) |
11516 | { |
11517 | tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); |
11518 | noway_assert(op1->TypeGet() == TYP_LONG); |
11519 | |
11520 | // Update flags for op1 morph |
11521 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
11522 | |
11523 | tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant |
11524 | |
11525 | // If op1 is a constant, then do constant folding of the division operator |
11526 | if (op1->gtOper == GT_CNS_NATIVELONG) |
11527 | { |
11528 | tree = gtFoldExpr(tree); |
11529 | } |
11530 | return tree; |
11531 | } |
11532 | } |
11533 | #endif // _TARGET_XARCH |
11534 | |
11535 | ASSIGN_HELPER_FOR_MOD: |
11536 | |
11537 | // For "val % 1", return 0 if op1 doesn't have any side effects |
11538 | // and we are not in the CSE phase, we cannot discard 'tree' |
11539 | // because it may contain CSE expressions that we haven't yet examined. |
11540 | // |
11541 | if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase) |
11542 | { |
11543 | if (op2->IsIntegralConst(1)) |
11544 | { |
11545 | GenTree* zeroNode = gtNewZeroConNode(typ); |
11546 | #ifdef DEBUG |
11547 | zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
11548 | #endif |
11549 | DEBUG_DESTROY_NODE(tree); |
11550 | return zeroNode; |
11551 | } |
11552 | } |
11553 | |
11554 | #ifndef _TARGET_64BIT_ |
11555 | if (typ == TYP_LONG) |
11556 | { |
11557 | helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD; |
11558 | goto USE_HELPER_FOR_ARITH; |
11559 | } |
11560 | |
11561 | #if USE_HELPERS_FOR_INT_DIV |
11562 | if (typ == TYP_INT) |
11563 | { |
11564 | if (oper == GT_UMOD) |
11565 | { |
11566 | helper = CORINFO_HELP_UMOD; |
11567 | goto USE_HELPER_FOR_ARITH; |
11568 | } |
11569 | else if (oper == GT_MOD) |
11570 | { |
11571 | helper = CORINFO_HELP_MOD; |
11572 | goto USE_HELPER_FOR_ARITH; |
11573 | } |
11574 | } |
11575 | #endif |
11576 | #endif // !_TARGET_64BIT_ |
11577 | |
11578 | if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI()) |
11579 | { |
11580 | op2 = gtFoldExprConst(op2); |
11581 | } |
11582 | |
11583 | #ifdef _TARGET_ARM64_ |
11584 | // For ARM64 we don't have a remainder instruction, |
11585 | // The architecture manual suggests the following transformation to |
11586 | // generate code for such operator: |
11587 | // |
11588 | // a % b = a - (a / b) * b; |
11589 | // |
11590 | // TODO: there are special cases where it can be done better, for example |
11591 | // when the modulo operation is unsigned and the divisor is a |
11592 | // integer constant power of two. In this case, we can make the transform: |
11593 | // |
11594 | // a % b = a & (b - 1); |
11595 | // |
11596 | // Lower supports it for all cases except when `a` is constant, but |
11597 | // in Morph we can't guarantee that `a` won't be transformed into a constant, |
11598 | // so can't guarantee that lower will be able to do this optimization. |
11599 | { |
11600 | // Do "a % b = a - (a / b) * b" morph always, see TODO before this block. |
11601 | bool doMorphModToSubMulDiv = true; |
11602 | |
11603 | if (doMorphModToSubMulDiv) |
11604 | { |
11605 | assert(!optValnumCSE_phase); |
11606 | |
11607 | tree = fgMorphModToSubMulDiv(tree->AsOp()); |
11608 | op1 = tree->gtOp.gtOp1; |
11609 | op2 = tree->gtOp.gtOp2; |
11610 | } |
11611 | } |
11612 | #else // !_TARGET_ARM64_ |
11613 | // If b is not a power of 2 constant then lowering replaces a % b |
11614 | // with a - (a / b) * b and applies magic division optimization to |
11615 | // a / b. The code may already contain an a / b expression (e.g. |
11616 | // x = a / 10; y = a % 10;) and then we end up with redundant code. |
11617 | // If we convert % to / here we give CSE the opportunity to eliminate |
11618 | // the redundant division. If there's no redundant division then |
11619 | // nothing is lost, lowering would have done this transform anyway. |
11620 | |
11621 | if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())) |
11622 | { |
11623 | ssize_t divisorValue = op2->AsIntCon()->IconValue(); |
11624 | size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) |
11625 | : static_cast<size_t>(abs(divisorValue)); |
11626 | |
11627 | if (!isPow2(absDivisorValue)) |
11628 | { |
11629 | tree = fgMorphModToSubMulDiv(tree->AsOp()); |
11630 | op1 = tree->gtOp.gtOp1; |
11631 | op2 = tree->gtOp.gtOp2; |
11632 | } |
11633 | } |
11634 | #endif // !_TARGET_ARM64_ |
11635 | break; |
11636 | |
11637 | USE_HELPER_FOR_ARITH: |
11638 | { |
11639 | // TODO: this comment is wrong now, do an appropriate fix. |
11640 | /* We have to morph these arithmetic operations into helper calls |
11641 | before morphing the arguments (preorder), else the arguments |
11642 | won't get correct values of fgPtrArgCntCur. |
11643 | However, try to fold the tree first in case we end up with a |
11644 | simple node which won't need a helper call at all */ |
11645 | |
11646 | noway_assert(tree->OperIsBinary()); |
11647 | |
11648 | GenTree* oldTree = tree; |
11649 | |
11650 | tree = gtFoldExpr(tree); |
11651 | |
11652 | // Were we able to fold it ? |
11653 | // Note that gtFoldExpr may return a non-leaf even if successful |
11654 | // e.g. for something like "expr / 1" - see also bug #290853 |
11655 | if (tree->OperIsLeaf() || (oldTree != tree)) |
11656 | { |
11657 | return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree); |
11658 | } |
11659 | |
11660 | // Did we fold it into a comma node with throw? |
11661 | if (tree->gtOper == GT_COMMA) |
11662 | { |
11663 | noway_assert(fgIsCommaThrow(tree)); |
11664 | return fgMorphTree(tree); |
11665 | } |
11666 | } |
11667 | return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2)); |
11668 | |
11669 | case GT_RETURN: |
11670 | // normalize small integer return values |
11671 | if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) && |
11672 | fgCastNeeded(op1, info.compRetType)) |
11673 | { |
11674 | // Small-typed return values are normalized by the callee |
11675 | op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType); |
11676 | |
11677 | // Propagate GTF_COLON_COND |
11678 | op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND); |
11679 | |
11680 | tree->gtOp.gtOp1 = fgMorphCast(op1); |
11681 | |
11682 | // Propagate side effect flags |
11683 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
11684 | tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
11685 | |
11686 | return tree; |
11687 | } |
11688 | break; |
11689 | |
11690 | case GT_EQ: |
11691 | case GT_NE: |
11692 | { |
11693 | GenTree* optimizedTree = gtFoldTypeCompare(tree); |
11694 | |
11695 | if (optimizedTree != tree) |
11696 | { |
11697 | return fgMorphTree(optimizedTree); |
11698 | } |
11699 | } |
11700 | |
11701 | __fallthrough; |
11702 | |
11703 | case GT_GT: |
11704 | |
11705 | // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT. |
11706 | if ((oper != GT_GT) || tree->IsUnsigned()) |
11707 | { |
11708 | fgMorphRecognizeBoxNullable(tree); |
11709 | } |
11710 | |
11711 | op1 = tree->gtOp.gtOp1; |
11712 | op2 = tree->gtGetOp2IfPresent(); |
11713 | |
11714 | break; |
11715 | |
11716 | case GT_RUNTIMELOOKUP: |
11717 | return fgMorphTree(op1); |
11718 | |
11719 | #ifdef _TARGET_ARM_ |
11720 | case GT_INTRINSIC: |
11721 | if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) |
11722 | { |
11723 | switch (tree->TypeGet()) |
11724 | { |
11725 | case TYP_DOUBLE: |
11726 | return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1)); |
11727 | case TYP_FLOAT: |
11728 | return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1)); |
11729 | default: |
11730 | unreached(); |
11731 | } |
11732 | } |
11733 | break; |
11734 | #endif |
11735 | case GT_LIST: |
11736 | // Special handling for the arg list. |
11737 | return fgMorphArgList(tree->AsArgList(), mac); |
11738 | |
11739 | default: |
11740 | break; |
11741 | } |
11742 | |
11743 | #if !CPU_HAS_FP_SUPPORT |
11744 | tree = fgMorphToEmulatedFP(tree); |
11745 | #endif |
11746 | |
11747 | /*------------------------------------------------------------------------- |
11748 | * Process the first operand, if any |
11749 | */ |
11750 | |
11751 | if (op1) |
11752 | { |
11753 | |
11754 | #if LOCAL_ASSERTION_PROP |
11755 | // If we are entering the "then" part of a Qmark-Colon we must |
11756 | // save the state of the current copy assignment table |
11757 | // so that we can restore this state when entering the "else" part |
11758 | if (isQmarkColon) |
11759 | { |
11760 | noway_assert(optLocalAssertionProp); |
11761 | if (optAssertionCount) |
11762 | { |
11763 | noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea |
11764 | unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); |
11765 | origAssertionTab = (AssertionDsc*)ALLOCA(tabSize); |
11766 | origAssertionCount = optAssertionCount; |
11767 | memcpy(origAssertionTab, optAssertionTabPrivate, tabSize); |
11768 | } |
11769 | else |
11770 | { |
11771 | origAssertionCount = 0; |
11772 | origAssertionTab = nullptr; |
11773 | } |
11774 | } |
11775 | #endif // LOCAL_ASSERTION_PROP |
11776 | |
11777 | // We might need a new MorphAddressContext context. (These are used to convey |
11778 | // parent context about how addresses being calculated will be used; see the |
11779 | // specification comment for MorphAddrContext for full details.) |
11780 | // Assume it's an Ind context to start. |
11781 | MorphAddrContext subIndMac1(MACK_Ind); |
11782 | MorphAddrContext* subMac1 = mac; |
11783 | if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind) |
11784 | { |
11785 | switch (tree->gtOper) |
11786 | { |
11787 | case GT_ADDR: |
11788 | if (subMac1 == nullptr) |
11789 | { |
11790 | subMac1 = &subIndMac1; |
11791 | subMac1->m_kind = MACK_Addr; |
11792 | } |
11793 | break; |
11794 | case GT_COMMA: |
11795 | // In a comma, the incoming context only applies to the rightmost arg of the |
11796 | // comma list. The left arg (op1) gets a fresh context. |
11797 | subMac1 = nullptr; |
11798 | break; |
11799 | case GT_OBJ: |
11800 | case GT_BLK: |
11801 | case GT_DYN_BLK: |
11802 | case GT_IND: |
11803 | subMac1 = &subIndMac1; |
11804 | break; |
11805 | default: |
11806 | break; |
11807 | } |
11808 | } |
11809 | |
11810 | // For additions, if we're in an IND context keep track of whether |
11811 | // all offsets added to the address are constant, and their sum. |
11812 | if (tree->gtOper == GT_ADD && subMac1 != nullptr) |
11813 | { |
11814 | assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock. |
11815 | GenTree* otherOp = tree->gtOp.gtOp2; |
11816 | // Is the other operator a constant? |
11817 | if (otherOp->IsCnsIntOrI()) |
11818 | { |
11819 | ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset); |
11820 | totalOffset += otherOp->gtIntConCommon.IconValue(); |
11821 | if (totalOffset.IsOverflow()) |
11822 | { |
11823 | // We will consider an offset so large as to overflow as "not a constant" -- |
11824 | // we will do a null check. |
11825 | subMac1->m_allConstantOffsets = false; |
11826 | } |
11827 | else |
11828 | { |
11829 | subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue(); |
11830 | } |
11831 | } |
11832 | else |
11833 | { |
11834 | subMac1->m_allConstantOffsets = false; |
11835 | } |
11836 | } |
11837 | |
11838 | // If gtOp1 is a GT_FIELD, we need to pass down the mac if |
11839 | // its parent is GT_ADDR, since the address of the field |
11840 | // is part of an ongoing address computation. Otherwise |
11841 | // op1 represents the value of the field and so any address |
11842 | // calculations it does are in a new context. |
11843 | if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR)) |
11844 | { |
11845 | subMac1 = nullptr; |
11846 | |
11847 | // The impact of this field's value to any ongoing |
11848 | // address computation is handled below when looking |
11849 | // at op2. |
11850 | } |
11851 | |
11852 | tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1); |
11853 | |
11854 | #if LOCAL_ASSERTION_PROP |
11855 | // If we are exiting the "then" part of a Qmark-Colon we must |
11856 | // save the state of the current copy assignment table |
11857 | // so that we can merge this state with the "else" part exit |
11858 | if (isQmarkColon) |
11859 | { |
11860 | noway_assert(optLocalAssertionProp); |
11861 | if (optAssertionCount) |
11862 | { |
11863 | noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea |
11864 | unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); |
11865 | thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize); |
11866 | thenAssertionCount = optAssertionCount; |
11867 | memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize); |
11868 | } |
11869 | else |
11870 | { |
11871 | thenAssertionCount = 0; |
11872 | thenAssertionTab = nullptr; |
11873 | } |
11874 | } |
11875 | #endif // LOCAL_ASSERTION_PROP |
11876 | |
11877 | /* Morphing along with folding and inlining may have changed the |
11878 | * side effect flags, so we have to reset them |
11879 | * |
11880 | * NOTE: Don't reset the exception flags on nodes that may throw */ |
11881 | |
11882 | assert(tree->gtOper != GT_CALL); |
11883 | |
11884 | if (!tree->OperRequiresCallFlag(this)) |
11885 | { |
11886 | tree->gtFlags &= ~GTF_CALL; |
11887 | } |
11888 | |
11889 | /* Propagate the new flags */ |
11890 | tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); |
11891 | |
11892 | // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does |
11893 | // Similarly for clsVar |
11894 | if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR)) |
11895 | { |
11896 | tree->gtFlags &= ~GTF_GLOB_REF; |
11897 | } |
11898 | } // if (op1) |
11899 | |
11900 | /*------------------------------------------------------------------------- |
11901 | * Process the second operand, if any |
11902 | */ |
11903 | |
11904 | if (op2) |
11905 | { |
11906 | |
11907 | #if LOCAL_ASSERTION_PROP |
11908 | // If we are entering the "else" part of a Qmark-Colon we must |
11909 | // reset the state of the current copy assignment table |
11910 | if (isQmarkColon) |
11911 | { |
11912 | noway_assert(optLocalAssertionProp); |
11913 | optAssertionReset(0); |
11914 | if (origAssertionCount) |
11915 | { |
11916 | size_t tabSize = origAssertionCount * sizeof(AssertionDsc); |
11917 | memcpy(optAssertionTabPrivate, origAssertionTab, tabSize); |
11918 | optAssertionReset(origAssertionCount); |
11919 | } |
11920 | } |
11921 | #endif // LOCAL_ASSERTION_PROP |
11922 | |
11923 | // We might need a new MorphAddressContext context to use in evaluating op2. |
11924 | // (These are used to convey parent context about how addresses being calculated |
11925 | // will be used; see the specification comment for MorphAddrContext for full details.) |
11926 | // Assume it's an Ind context to start. |
11927 | switch (tree->gtOper) |
11928 | { |
11929 | case GT_ADD: |
11930 | if (mac != nullptr && mac->m_kind == MACK_Ind) |
11931 | { |
11932 | GenTree* otherOp = tree->gtOp.gtOp1; |
11933 | // Is the other operator a constant? |
11934 | if (otherOp->IsCnsIntOrI()) |
11935 | { |
11936 | mac->m_totalOffset += otherOp->gtIntConCommon.IconValue(); |
11937 | } |
11938 | else |
11939 | { |
11940 | mac->m_allConstantOffsets = false; |
11941 | } |
11942 | } |
11943 | break; |
11944 | default: |
11945 | break; |
11946 | } |
11947 | |
11948 | // If gtOp2 is a GT_FIELD, we must be taking its value, |
11949 | // so it should evaluate its address in a new context. |
11950 | if (op2->gtOper == GT_FIELD) |
11951 | { |
11952 | // The impact of this field's value to any ongoing |
11953 | // address computation is handled above when looking |
11954 | // at op1. |
11955 | mac = nullptr; |
11956 | } |
11957 | |
11958 | tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac); |
11959 | |
11960 | /* Propagate the side effect flags from op2 */ |
11961 | |
11962 | tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT); |
11963 | |
11964 | #if LOCAL_ASSERTION_PROP |
11965 | // If we are exiting the "else" part of a Qmark-Colon we must |
11966 | // merge the state of the current copy assignment table with |
11967 | // that of the exit of the "then" part. |
11968 | if (isQmarkColon) |
11969 | { |
11970 | noway_assert(optLocalAssertionProp); |
11971 | // If either exit table has zero entries then |
11972 | // the merged table also has zero entries |
11973 | if (optAssertionCount == 0 || thenAssertionCount == 0) |
11974 | { |
11975 | optAssertionReset(0); |
11976 | } |
11977 | else |
11978 | { |
11979 | size_t tabSize = optAssertionCount * sizeof(AssertionDsc); |
11980 | if ((optAssertionCount != thenAssertionCount) || |
11981 | (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0)) |
11982 | { |
11983 | // Yes they are different so we have to find the merged set |
11984 | // Iterate over the copy asgn table removing any entries |
11985 | // that do not have an exact match in the thenAssertionTab |
11986 | AssertionIndex index = 1; |
11987 | while (index <= optAssertionCount) |
11988 | { |
11989 | AssertionDsc* curAssertion = optGetAssertion(index); |
11990 | |
11991 | for (unsigned j = 0; j < thenAssertionCount; j++) |
11992 | { |
11993 | AssertionDsc* thenAssertion = &thenAssertionTab[j]; |
11994 | |
11995 | // Do the left sides match? |
11996 | if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) && |
11997 | (curAssertion->assertionKind == thenAssertion->assertionKind)) |
11998 | { |
11999 | // Do the right sides match? |
12000 | if ((curAssertion->op2.kind == thenAssertion->op2.kind) && |
12001 | (curAssertion->op2.lconVal == thenAssertion->op2.lconVal)) |
12002 | { |
12003 | goto KEEP; |
12004 | } |
12005 | else |
12006 | { |
12007 | goto REMOVE; |
12008 | } |
12009 | } |
12010 | } |
12011 | // |
12012 | // If we fall out of the loop above then we didn't find |
12013 | // any matching entry in the thenAssertionTab so it must |
12014 | // have been killed on that path so we remove it here |
12015 | // |
12016 | REMOVE: |
12017 | // The data at optAssertionTabPrivate[i] is to be removed |
12018 | CLANG_FORMAT_COMMENT_ANCHOR; |
12019 | #ifdef DEBUG |
12020 | if (verbose) |
12021 | { |
12022 | printf("The QMARK-COLON " ); |
12023 | printTreeID(tree); |
12024 | printf(" removes assertion candidate #%d\n" , index); |
12025 | } |
12026 | #endif |
12027 | optAssertionRemove(index); |
12028 | continue; |
12029 | KEEP: |
12030 | // The data at optAssertionTabPrivate[i] is to be kept |
12031 | index++; |
12032 | } |
12033 | } |
12034 | } |
12035 | } |
12036 | #endif // LOCAL_ASSERTION_PROP |
12037 | } // if (op2) |
12038 | |
12039 | DONE_MORPHING_CHILDREN: |
12040 | |
12041 | if (tree->OperMayThrow(this)) |
12042 | { |
12043 | // Mark the tree node as potentially throwing an exception |
12044 | tree->gtFlags |= GTF_EXCEPT; |
12045 | } |
12046 | else |
12047 | { |
12048 | if (tree->OperIsIndirOrArrLength()) |
12049 | { |
12050 | tree->gtFlags |= GTF_IND_NONFAULTING; |
12051 | } |
12052 | if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) && |
12053 | ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0))) |
12054 | { |
12055 | tree->gtFlags &= ~GTF_EXCEPT; |
12056 | } |
12057 | } |
12058 | |
12059 | if (tree->OperRequiresAsgFlag()) |
12060 | { |
12061 | tree->gtFlags |= GTF_ASG; |
12062 | } |
12063 | else |
12064 | { |
12065 | if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) && |
12066 | ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) |
12067 | { |
12068 | tree->gtFlags &= ~GTF_ASG; |
12069 | } |
12070 | } |
12071 | |
12072 | if (tree->OperRequiresCallFlag(this)) |
12073 | { |
12074 | tree->gtFlags |= GTF_CALL; |
12075 | } |
12076 | else |
12077 | { |
12078 | if (((op1 == nullptr) || ((op1->gtFlags & GTF_CALL) == 0)) && |
12079 | ((op2 == nullptr) || ((op2->gtFlags & GTF_CALL) == 0))) |
12080 | { |
12081 | tree->gtFlags &= ~GTF_CALL; |
12082 | } |
12083 | } |
12084 | /*------------------------------------------------------------------------- |
12085 | * Now do POST-ORDER processing |
12086 | */ |
12087 | |
12088 | if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet()))) |
12089 | { |
12090 | // The tree is really not GC but was marked as such. Now that the |
12091 | // children have been unmarked, unmark the tree too. |
12092 | |
12093 | // Remember that GT_COMMA inherits it's type only from op2 |
12094 | if (tree->gtOper == GT_COMMA) |
12095 | { |
12096 | tree->gtType = genActualType(op2->TypeGet()); |
12097 | } |
12098 | else |
12099 | { |
12100 | tree->gtType = genActualType(op1->TypeGet()); |
12101 | } |
12102 | } |
12103 | |
12104 | GenTree* oldTree = tree; |
12105 | |
12106 | GenTree* qmarkOp1 = nullptr; |
12107 | GenTree* qmarkOp2 = nullptr; |
12108 | |
12109 | if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON)) |
12110 | { |
12111 | qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1; |
12112 | qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2; |
12113 | } |
12114 | |
12115 | // Try to fold it, maybe we get lucky, |
12116 | tree = gtFoldExpr(tree); |
12117 | |
12118 | if (oldTree != tree) |
12119 | { |
12120 | /* if gtFoldExpr returned op1 or op2 then we are done */ |
12121 | if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2)) |
12122 | { |
12123 | return tree; |
12124 | } |
12125 | |
12126 | /* If we created a comma-throw tree then we need to morph op1 */ |
12127 | if (fgIsCommaThrow(tree)) |
12128 | { |
12129 | tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1); |
12130 | fgMorphTreeDone(tree); |
12131 | return tree; |
12132 | } |
12133 | |
12134 | return tree; |
12135 | } |
12136 | else if (tree->OperKind() & GTK_CONST) |
12137 | { |
12138 | return tree; |
12139 | } |
12140 | |
12141 | /* gtFoldExpr could have used setOper to change the oper */ |
12142 | oper = tree->OperGet(); |
12143 | typ = tree->TypeGet(); |
12144 | |
12145 | /* gtFoldExpr could have changed op1 and op2 */ |
12146 | op1 = tree->gtOp.gtOp1; |
12147 | op2 = tree->gtGetOp2IfPresent(); |
12148 | |
12149 | // Do we have an integer compare operation? |
12150 | // |
12151 | if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet())) |
12152 | { |
12153 | // Are we comparing against zero? |
12154 | // |
12155 | if (op2->IsIntegralConst(0)) |
12156 | { |
12157 | // Request that the codegen for op1 sets the condition flags |
12158 | // when it generates the code for op1. |
12159 | // |
12160 | // Codegen for op1 must set the condition flags if |
12161 | // this method returns true. |
12162 | // |
12163 | op1->gtRequestSetFlags(); |
12164 | } |
12165 | } |
12166 | /*------------------------------------------------------------------------- |
12167 | * Perform the required oper-specific postorder morphing |
12168 | */ |
12169 | |
12170 | GenTree* temp; |
12171 | GenTree* cns1; |
12172 | GenTree* cns2; |
12173 | size_t ival1, ival2; |
12174 | GenTree* lclVarTree; |
12175 | GenTree* effectiveOp1; |
12176 | FieldSeqNode* fieldSeq = nullptr; |
12177 | |
12178 | switch (oper) |
12179 | { |
12180 | case GT_ASG: |
12181 | |
12182 | lclVarTree = fgIsIndirOfAddrOfLocal(op1); |
12183 | if (lclVarTree != nullptr) |
12184 | { |
12185 | lclVarTree->gtFlags |= GTF_VAR_DEF; |
12186 | } |
12187 | |
12188 | effectiveOp1 = op1->gtEffectiveVal(); |
12189 | |
12190 | if (effectiveOp1->OperIsConst()) |
12191 | { |
12192 | op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1); |
12193 | tree->gtOp.gtOp1 = op1; |
12194 | } |
12195 | |
12196 | /* If we are storing a small type, we might be able to omit a cast */ |
12197 | if ((effectiveOp1->gtOper == GT_IND) && varTypeIsSmall(effectiveOp1->TypeGet())) |
12198 | { |
12199 | if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow()) |
12200 | { |
12201 | var_types castType = op2->CastToType(); |
12202 | |
12203 | // If we are performing a narrowing cast and |
12204 | // castType is larger or the same as op1's type |
12205 | // then we can discard the cast. |
12206 | |
12207 | if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(effectiveOp1->TypeGet()))) |
12208 | { |
12209 | tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp(); |
12210 | } |
12211 | } |
12212 | else if (op2->OperIsCompare() && varTypeIsByte(effectiveOp1->TypeGet())) |
12213 | { |
12214 | /* We don't need to zero extend the setcc instruction */ |
12215 | op2->gtType = TYP_BYTE; |
12216 | } |
12217 | } |
12218 | // If we introduced a CSE we may need to undo the optimization above |
12219 | // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type) |
12220 | // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place. |
12221 | else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR)) |
12222 | { |
12223 | unsigned varNum = op1->gtLclVarCommon.gtLclNum; |
12224 | LclVarDsc* varDsc = &lvaTable[varNum]; |
12225 | |
12226 | /* We again need to zero extend the setcc instruction */ |
12227 | op2->gtType = varDsc->TypeGet(); |
12228 | } |
12229 | fgAssignSetVarDef(tree); |
12230 | |
12231 | /* We can't CSE the LHS of an assignment */ |
12232 | /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */ |
12233 | if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) |
12234 | { |
12235 | op1->gtFlags |= GTF_DONT_CSE; |
12236 | } |
12237 | break; |
12238 | |
12239 | case GT_EQ: |
12240 | case GT_NE: |
12241 | |
12242 | /* Make sure we're allowed to do this */ |
12243 | |
12244 | if (optValnumCSE_phase) |
12245 | { |
12246 | // It is not safe to reorder/delete CSE's |
12247 | break; |
12248 | } |
12249 | |
12250 | cns2 = op2; |
12251 | |
12252 | /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */ |
12253 | |
12254 | if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0) |
12255 | { |
12256 | op1 = tree->gtOp.gtOp1; |
12257 | |
12258 | /* Since this can occur repeatedly we use a while loop */ |
12259 | |
12260 | while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && |
12261 | (op1->gtType == TYP_INT) && (op1->gtOverflow() == false)) |
12262 | { |
12263 | /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */ |
12264 | |
12265 | ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal; |
12266 | ival2 = cns2->gtIntCon.gtIconVal; |
12267 | |
12268 | if (op1->gtOper == GT_ADD) |
12269 | { |
12270 | ival2 -= ival1; |
12271 | } |
12272 | else |
12273 | { |
12274 | ival2 += ival1; |
12275 | } |
12276 | cns2->gtIntCon.gtIconVal = ival2; |
12277 | |
12278 | #ifdef _TARGET_64BIT_ |
12279 | // we need to properly re-sign-extend or truncate as needed. |
12280 | cns2->AsIntCon()->TruncateOrSignExtend32(); |
12281 | #endif // _TARGET_64BIT_ |
12282 | |
12283 | op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1; |
12284 | } |
12285 | } |
12286 | |
12287 | // |
12288 | // Here we look for the following tree |
12289 | // |
12290 | // EQ/NE |
12291 | // / \ |
12292 | // op1 CNS 0/1 |
12293 | // |
12294 | ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1 |
12295 | |
12296 | // cast to unsigned allows test for both 0 and 1 |
12297 | if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U)) |
12298 | { |
12299 | ival2 = (size_t)cns2->gtIntConCommon.IconValue(); |
12300 | } |
12301 | else // cast to UINT64 allows test for both 0 and 1 |
12302 | if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL)) |
12303 | { |
12304 | ival2 = (size_t)cns2->gtIntConCommon.LngValue(); |
12305 | } |
12306 | |
12307 | if (ival2 != INT_MAX) |
12308 | { |
12309 | // If we don't have a comma and relop, we can't do this optimization |
12310 | // |
12311 | if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare())) |
12312 | { |
12313 | // Here we look for the following transformation |
12314 | // |
12315 | // EQ/NE Possible REVERSE(RELOP) |
12316 | // / \ / \ |
12317 | // COMMA CNS 0/1 -> COMMA relop_op2 |
12318 | // / \ / \ |
12319 | // x RELOP x relop_op1 |
12320 | // / \ |
12321 | // relop_op1 relop_op2 |
12322 | // |
12323 | // |
12324 | // |
12325 | GenTree* comma = op1; |
12326 | GenTree* relop = comma->gtOp.gtOp2; |
12327 | |
12328 | GenTree* relop_op1 = relop->gtOp.gtOp1; |
12329 | |
12330 | bool reverse = ((ival2 == 0) == (oper == GT_EQ)); |
12331 | |
12332 | if (reverse) |
12333 | { |
12334 | gtReverseCond(relop); |
12335 | } |
12336 | |
12337 | relop->gtOp.gtOp1 = comma; |
12338 | comma->gtOp.gtOp2 = relop_op1; |
12339 | |
12340 | // Comma now has fewer nodes underneath it, so we need to regenerate its flags |
12341 | comma->gtFlags &= ~GTF_ALL_EFFECT; |
12342 | comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT; |
12343 | comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT; |
12344 | |
12345 | noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0); |
12346 | noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0); |
12347 | relop->gtFlags |= |
12348 | tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT); |
12349 | |
12350 | return relop; |
12351 | } |
12352 | |
12353 | if (op1->gtOper == GT_COMMA) |
12354 | { |
12355 | // Here we look for the following tree |
12356 | // and when the LCL_VAR is a temp we can fold the tree: |
12357 | // |
12358 | // EQ/NE EQ/NE |
12359 | // / \ / \ |
12360 | // COMMA CNS 0/1 -> RELOP CNS 0/1 |
12361 | // / \ / \ |
12362 | // ASG LCL_VAR |
12363 | // / \ |
12364 | // LCL_VAR RELOP |
12365 | // / \ |
12366 | // |
12367 | |
12368 | GenTree* asg = op1->gtOp.gtOp1; |
12369 | GenTree* lcl = op1->gtOp.gtOp2; |
12370 | |
12371 | /* Make sure that the left side of the comma is the assignment of the LCL_VAR */ |
12372 | if (asg->gtOper != GT_ASG) |
12373 | { |
12374 | goto SKIP; |
12375 | } |
12376 | |
12377 | /* The right side of the comma must be a LCL_VAR temp */ |
12378 | if (lcl->gtOper != GT_LCL_VAR) |
12379 | { |
12380 | goto SKIP; |
12381 | } |
12382 | |
12383 | unsigned lclNum = lcl->gtLclVarCommon.gtLclNum; |
12384 | noway_assert(lclNum < lvaCount); |
12385 | |
12386 | /* If the LCL_VAR is not a temp then bail, a temp has a single def */ |
12387 | if (!lvaTable[lclNum].lvIsTemp) |
12388 | { |
12389 | goto SKIP; |
12390 | } |
12391 | |
12392 | #if FEATURE_ANYCSE |
12393 | /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */ |
12394 | // Fix 383856 X86/ARM ILGEN |
12395 | if (lclNumIsCSE(lclNum)) |
12396 | { |
12397 | goto SKIP; |
12398 | } |
12399 | #endif |
12400 | |
12401 | /* We also must be assigning the result of a RELOP */ |
12402 | if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR) |
12403 | { |
12404 | goto SKIP; |
12405 | } |
12406 | |
12407 | /* Both of the LCL_VAR must match */ |
12408 | if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum) |
12409 | { |
12410 | goto SKIP; |
12411 | } |
12412 | |
12413 | /* If right side of asg is not a RELOP then skip */ |
12414 | if (!asg->gtOp.gtOp2->OperIsCompare()) |
12415 | { |
12416 | goto SKIP; |
12417 | } |
12418 | |
12419 | LclVarDsc* varDsc = lvaTable + lclNum; |
12420 | |
12421 | /* Set op1 to the right side of asg, (i.e. the RELOP) */ |
12422 | op1 = asg->gtOp.gtOp2; |
12423 | |
12424 | DEBUG_DESTROY_NODE(asg->gtOp.gtOp1); |
12425 | DEBUG_DESTROY_NODE(lcl); |
12426 | } |
12427 | |
12428 | if (op1->OperIsCompare()) |
12429 | { |
12430 | // Here we look for the following tree |
12431 | // |
12432 | // EQ/NE -> RELOP/!RELOP |
12433 | // / \ / \ |
12434 | // RELOP CNS 0/1 |
12435 | // / \ |
12436 | // |
12437 | // Note that we will remove/destroy the EQ/NE node and move |
12438 | // the RELOP up into it's location. |
12439 | |
12440 | /* Here we reverse the RELOP if necessary */ |
12441 | |
12442 | bool reverse = ((ival2 == 0) == (oper == GT_EQ)); |
12443 | |
12444 | if (reverse) |
12445 | { |
12446 | gtReverseCond(op1); |
12447 | } |
12448 | |
12449 | /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */ |
12450 | op1->gtType = tree->gtType; |
12451 | |
12452 | noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0); |
12453 | op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE); |
12454 | |
12455 | DEBUG_DESTROY_NODE(tree); |
12456 | return op1; |
12457 | } |
12458 | |
12459 | // |
12460 | // Now we check for a compare with the result of an '&' operator |
12461 | // |
12462 | // Here we look for the following transformation: |
12463 | // |
12464 | // EQ/NE EQ/NE |
12465 | // / \ / \ |
12466 | // AND CNS 0/1 -> AND CNS 0 |
12467 | // / \ / \ |
12468 | // RSZ/RSH CNS 1 x CNS (1 << y) |
12469 | // / \ |
12470 | // x CNS_INT +y |
12471 | |
12472 | if (op1->gtOper == GT_AND) |
12473 | { |
12474 | GenTree* andOp = op1; |
12475 | GenTree* rshiftOp = andOp->gtOp.gtOp1; |
12476 | |
12477 | if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH)) |
12478 | { |
12479 | goto SKIP; |
12480 | } |
12481 | |
12482 | if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI()) |
12483 | { |
12484 | goto SKIP; |
12485 | } |
12486 | |
12487 | ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal; |
12488 | |
12489 | if (shiftAmount < 0) |
12490 | { |
12491 | goto SKIP; |
12492 | } |
12493 | |
12494 | if (!andOp->gtOp.gtOp2->IsIntegralConst(1)) |
12495 | { |
12496 | goto SKIP; |
12497 | } |
12498 | |
12499 | if (andOp->gtType == TYP_INT) |
12500 | { |
12501 | if (shiftAmount > 31) |
12502 | { |
12503 | goto SKIP; |
12504 | } |
12505 | |
12506 | UINT32 newAndOperand = ((UINT32)1) << shiftAmount; |
12507 | |
12508 | andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand; |
12509 | |
12510 | // Reverse the cond if necessary |
12511 | if (ival2 == 1) |
12512 | { |
12513 | gtReverseCond(tree); |
12514 | cns2->gtIntCon.gtIconVal = 0; |
12515 | oper = tree->gtOper; |
12516 | } |
12517 | } |
12518 | else if (andOp->gtType == TYP_LONG) |
12519 | { |
12520 | if (shiftAmount > 63) |
12521 | { |
12522 | goto SKIP; |
12523 | } |
12524 | |
12525 | UINT64 newAndOperand = ((UINT64)1) << shiftAmount; |
12526 | |
12527 | andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand); |
12528 | |
12529 | // Reverse the cond if necessary |
12530 | if (ival2 == 1) |
12531 | { |
12532 | gtReverseCond(tree); |
12533 | cns2->gtIntConCommon.SetLngValue(0); |
12534 | oper = tree->gtOper; |
12535 | } |
12536 | } |
12537 | |
12538 | andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1; |
12539 | |
12540 | DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2); |
12541 | DEBUG_DESTROY_NODE(rshiftOp); |
12542 | } |
12543 | } // END if (ival2 != INT_MAX) |
12544 | |
12545 | SKIP: |
12546 | /* Now check for compares with small constant longs that can be cast to int */ |
12547 | |
12548 | if (!cns2->OperIsConst()) |
12549 | { |
12550 | goto COMPARE; |
12551 | } |
12552 | |
12553 | if (cns2->TypeGet() != TYP_LONG) |
12554 | { |
12555 | goto COMPARE; |
12556 | } |
12557 | |
12558 | /* Is the constant 31 bits or smaller? */ |
12559 | |
12560 | if ((cns2->gtIntConCommon.LngValue() >> 31) != 0) |
12561 | { |
12562 | goto COMPARE; |
12563 | } |
12564 | |
12565 | /* Is the first comparand mask operation of type long ? */ |
12566 | |
12567 | if (op1->gtOper != GT_AND) |
12568 | { |
12569 | /* Another interesting case: cast from int */ |
12570 | |
12571 | if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT && |
12572 | !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate |
12573 | !op1->gtOverflow()) // cannot be an overflow checking cast |
12574 | { |
12575 | /* Simply make this into an integer comparison */ |
12576 | |
12577 | tree->gtOp.gtOp1 = op1->gtCast.CastOp(); |
12578 | tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT); |
12579 | } |
12580 | |
12581 | goto COMPARE; |
12582 | } |
12583 | |
12584 | noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND); |
12585 | |
12586 | /* Is the result of the mask effectively an INT ? */ |
12587 | |
12588 | GenTree* andMask; |
12589 | andMask = op1->gtOp.gtOp2; |
12590 | if (andMask->gtOper != GT_CNS_NATIVELONG) |
12591 | { |
12592 | goto COMPARE; |
12593 | } |
12594 | if ((andMask->gtIntConCommon.LngValue() >> 32) != 0) |
12595 | { |
12596 | goto COMPARE; |
12597 | } |
12598 | |
12599 | /* Now we know that we can cast gtOp.gtOp1 of AND to int */ |
12600 | |
12601 | op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, false, TYP_INT); |
12602 | |
12603 | /* now replace the mask node (gtOp.gtOp2 of AND node) */ |
12604 | |
12605 | noway_assert(andMask == op1->gtOp.gtOp2); |
12606 | |
12607 | ival1 = (int)andMask->gtIntConCommon.LngValue(); |
12608 | andMask->SetOper(GT_CNS_INT); |
12609 | andMask->gtType = TYP_INT; |
12610 | andMask->gtIntCon.gtIconVal = ival1; |
12611 | |
12612 | /* now change the type of the AND node */ |
12613 | |
12614 | op1->gtType = TYP_INT; |
12615 | |
12616 | /* finally we replace the comparand */ |
12617 | |
12618 | ival2 = (int)cns2->gtIntConCommon.LngValue(); |
12619 | cns2->SetOper(GT_CNS_INT); |
12620 | cns2->gtType = TYP_INT; |
12621 | |
12622 | noway_assert(cns2 == op2); |
12623 | cns2->gtIntCon.gtIconVal = ival2; |
12624 | |
12625 | goto COMPARE; |
12626 | |
12627 | case GT_LT: |
12628 | case GT_LE: |
12629 | case GT_GE: |
12630 | case GT_GT: |
12631 | |
12632 | if ((tree->gtFlags & GTF_UNSIGNED) == 0) |
12633 | { |
12634 | if (op2->gtOper == GT_CNS_INT) |
12635 | { |
12636 | cns2 = op2; |
12637 | /* Check for "expr relop 1" */ |
12638 | if (cns2->IsIntegralConst(1)) |
12639 | { |
12640 | /* Check for "expr >= 1" */ |
12641 | if (oper == GT_GE) |
12642 | { |
12643 | /* Change to "expr > 0" */ |
12644 | oper = GT_GT; |
12645 | goto SET_OPER; |
12646 | } |
12647 | /* Check for "expr < 1" */ |
12648 | else if (oper == GT_LT) |
12649 | { |
12650 | /* Change to "expr <= 0" */ |
12651 | oper = GT_LE; |
12652 | goto SET_OPER; |
12653 | } |
12654 | } |
12655 | /* Check for "expr relop -1" */ |
12656 | else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT))) |
12657 | { |
12658 | /* Check for "expr <= -1" */ |
12659 | if (oper == GT_LE) |
12660 | { |
12661 | /* Change to "expr < 0" */ |
12662 | oper = GT_LT; |
12663 | goto SET_OPER; |
12664 | } |
12665 | /* Check for "expr > -1" */ |
12666 | else if (oper == GT_GT) |
12667 | { |
12668 | /* Change to "expr >= 0" */ |
12669 | oper = GT_GE; |
12670 | |
12671 | SET_OPER: |
12672 | // IF we get here we should be changing 'oper' |
12673 | assert(tree->OperGet() != oper); |
12674 | |
12675 | // Keep the old ValueNumber for 'tree' as the new expr |
12676 | // will still compute the same value as before |
12677 | tree->SetOper(oper, GenTree::PRESERVE_VN); |
12678 | cns2->gtIntCon.gtIconVal = 0; |
12679 | |
12680 | // vnStore is null before the ValueNumber phase has run |
12681 | if (vnStore != nullptr) |
12682 | { |
12683 | // Update the ValueNumber for 'cns2', as we just changed it to 0 |
12684 | fgValueNumberTreeConst(cns2); |
12685 | } |
12686 | |
12687 | op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2); |
12688 | } |
12689 | } |
12690 | } |
12691 | } |
12692 | else // we have an unsigned comparison |
12693 | { |
12694 | if (op2->IsIntegralConst(0)) |
12695 | { |
12696 | if ((oper == GT_GT) || (oper == GT_LE)) |
12697 | { |
12698 | // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT |
12699 | // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails |
12700 | // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0) |
12701 | // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes |
12702 | // occurs as a result of branch inversion. |
12703 | oper = (oper == GT_LE) ? GT_EQ : GT_NE; |
12704 | tree->SetOper(oper, GenTree::PRESERVE_VN); |
12705 | tree->gtFlags &= ~GTF_UNSIGNED; |
12706 | } |
12707 | } |
12708 | } |
12709 | |
12710 | COMPARE: |
12711 | |
12712 | noway_assert(tree->OperKind() & GTK_RELOP); |
12713 | break; |
12714 | |
12715 | case GT_MUL: |
12716 | |
12717 | #ifndef _TARGET_64BIT_ |
12718 | if (typ == TYP_LONG) |
12719 | { |
12720 | // This must be GTF_MUL_64RSLT |
12721 | assert(tree->gtIsValid64RsltMul()); |
12722 | return tree; |
12723 | } |
12724 | #endif // _TARGET_64BIT_ |
12725 | goto CM_OVF_OP; |
12726 | |
12727 | case GT_SUB: |
12728 | |
12729 | if (tree->gtOverflow()) |
12730 | { |
12731 | goto CM_OVF_OP; |
12732 | } |
12733 | |
12734 | // TODO #4104: there are a lot of other places where |
12735 | // this condition is not checked before transformations. |
12736 | if (fgGlobalMorph) |
12737 | { |
12738 | /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */ |
12739 | |
12740 | noway_assert(op2); |
12741 | if (op2->IsCnsIntOrI()) |
12742 | { |
12743 | /* Negate the constant and change the node to be "+" */ |
12744 | |
12745 | op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue()); |
12746 | oper = GT_ADD; |
12747 | tree->ChangeOper(oper); |
12748 | goto CM_ADD_OP; |
12749 | } |
12750 | |
12751 | /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */ |
12752 | |
12753 | noway_assert(op1); |
12754 | if (op1->IsCnsIntOrI()) |
12755 | { |
12756 | noway_assert(varTypeIsIntOrI(tree)); |
12757 | |
12758 | // The type of the new GT_NEG node cannot just be op2->TypeGet(). |
12759 | // Otherwise we may sign-extend incorrectly in cases where the GT_NEG |
12760 | // node ends up feeding directly into a cast, for example in |
12761 | // GT_CAST<ubyte>(GT_SUB(0, s_1.ubyte)) |
12762 | tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2); |
12763 | fgMorphTreeDone(op2); |
12764 | |
12765 | oper = GT_ADD; |
12766 | tree->ChangeOper(oper); |
12767 | goto CM_ADD_OP; |
12768 | } |
12769 | |
12770 | /* No match - exit */ |
12771 | } |
12772 | break; |
12773 | |
12774 | #ifdef _TARGET_ARM64_ |
12775 | case GT_DIV: |
12776 | if (!varTypeIsFloating(tree->gtType)) |
12777 | { |
12778 | // Codegen for this instruction needs to be able to throw two exceptions: |
12779 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); |
12780 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); |
12781 | } |
12782 | break; |
12783 | case GT_UDIV: |
12784 | // Codegen for this instruction needs to be able to throw one exception: |
12785 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); |
12786 | break; |
12787 | #endif |
12788 | |
12789 | case GT_ADD: |
12790 | |
12791 | CM_OVF_OP: |
12792 | if (tree->gtOverflow()) |
12793 | { |
12794 | tree->gtRequestSetFlags(); |
12795 | |
12796 | // Add the excptn-throwing basic block to jump to on overflow |
12797 | |
12798 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); |
12799 | |
12800 | // We can't do any commutative morphing for overflow instructions |
12801 | |
12802 | break; |
12803 | } |
12804 | |
12805 | CM_ADD_OP: |
12806 | |
12807 | case GT_OR: |
12808 | case GT_XOR: |
12809 | case GT_AND: |
12810 | |
12811 | /* Commute any non-REF constants to the right */ |
12812 | |
12813 | noway_assert(op1); |
12814 | if (op1->OperIsConst() && (op1->gtType != TYP_REF)) |
12815 | { |
12816 | // TODO-Review: We used to assert here that |
12817 | // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD)); |
12818 | // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation, |
12819 | // and would sometimes hit this assertion. This may indicate a missed "remorph". |
12820 | // Task is to re-enable this assertion and investigate. |
12821 | |
12822 | /* Swap the operands */ |
12823 | tree->gtOp.gtOp1 = op2; |
12824 | tree->gtOp.gtOp2 = op1; |
12825 | |
12826 | op1 = op2; |
12827 | op2 = tree->gtOp.gtOp2; |
12828 | } |
12829 | |
12830 | /* See if we can fold GT_ADD nodes. */ |
12831 | |
12832 | if (oper == GT_ADD) |
12833 | { |
12834 | /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */ |
12835 | |
12836 | if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) && |
12837 | op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT && |
12838 | !op1->gtOverflow() && !op2->gtOverflow()) |
12839 | { |
12840 | cns1 = op1->gtOp.gtOp2; |
12841 | cns2 = op2->gtOp.gtOp2; |
12842 | cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal; |
12843 | #ifdef _TARGET_64BIT_ |
12844 | if (cns1->TypeGet() == TYP_INT) |
12845 | { |
12846 | // we need to properly re-sign-extend or truncate after adding two int constants above |
12847 | cns1->AsIntCon()->TruncateOrSignExtend32(); |
12848 | } |
12849 | #endif //_TARGET_64BIT_ |
12850 | |
12851 | tree->gtOp.gtOp2 = cns1; |
12852 | DEBUG_DESTROY_NODE(cns2); |
12853 | |
12854 | op1->gtOp.gtOp2 = op2->gtOp.gtOp1; |
12855 | op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT); |
12856 | DEBUG_DESTROY_NODE(op2); |
12857 | op2 = tree->gtOp.gtOp2; |
12858 | } |
12859 | |
12860 | if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ)) |
12861 | { |
12862 | /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */ |
12863 | CLANG_FORMAT_COMMENT_ANCHOR; |
12864 | |
12865 | if (op1->gtOper == GT_ADD && // |
12866 | !gtIsActiveCSE_Candidate(op1) && // |
12867 | !op1->gtOverflow() && // |
12868 | op1->gtOp.gtOp2->IsCnsIntOrI() && // |
12869 | (op1->gtOp.gtOp2->OperGet() == op2->OperGet()) && // |
12870 | (op1->gtOp.gtOp2->TypeGet() != TYP_REF) && // Don't fold REFs |
12871 | (op2->TypeGet() != TYP_REF)) // Don't fold REFs |
12872 | { |
12873 | cns1 = op1->gtOp.gtOp2; |
12874 | op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() + |
12875 | op2->gtIntConCommon.IconValue()); |
12876 | #ifdef _TARGET_64BIT_ |
12877 | if (op2->TypeGet() == TYP_INT) |
12878 | { |
12879 | // we need to properly re-sign-extend or truncate after adding two int constants above |
12880 | op2->AsIntCon()->TruncateOrSignExtend32(); |
12881 | } |
12882 | #endif //_TARGET_64BIT_ |
12883 | |
12884 | if (cns1->OperGet() == GT_CNS_INT) |
12885 | { |
12886 | op2->gtIntCon.gtFieldSeq = |
12887 | GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq); |
12888 | } |
12889 | DEBUG_DESTROY_NODE(cns1); |
12890 | |
12891 | tree->gtOp.gtOp1 = op1->gtOp.gtOp1; |
12892 | DEBUG_DESTROY_NODE(op1); |
12893 | op1 = tree->gtOp.gtOp1; |
12894 | } |
12895 | |
12896 | // Fold (x + 0). |
12897 | |
12898 | if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree)) |
12899 | { |
12900 | |
12901 | // If this addition is adding an offset to a null pointer, |
12902 | // avoid the work and yield the null pointer immediately. |
12903 | // Dereferencing the pointer in either case will have the |
12904 | // same effect. |
12905 | |
12906 | if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) && |
12907 | ((op1->gtFlags & GTF_ALL_EFFECT) == 0)) |
12908 | { |
12909 | op2->gtType = tree->gtType; |
12910 | DEBUG_DESTROY_NODE(op1); |
12911 | DEBUG_DESTROY_NODE(tree); |
12912 | return op2; |
12913 | } |
12914 | |
12915 | // Remove the addition iff it won't change the tree type |
12916 | // to TYP_REF. |
12917 | |
12918 | if (!gtIsActiveCSE_Candidate(op2) && |
12919 | ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF))) |
12920 | { |
12921 | if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) && |
12922 | (op2->gtIntCon.gtFieldSeq != nullptr) && |
12923 | (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField())) |
12924 | { |
12925 | fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq); |
12926 | } |
12927 | |
12928 | DEBUG_DESTROY_NODE(op2); |
12929 | DEBUG_DESTROY_NODE(tree); |
12930 | |
12931 | return op1; |
12932 | } |
12933 | } |
12934 | } |
12935 | } |
12936 | /* See if we can fold GT_MUL by const nodes */ |
12937 | else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase) |
12938 | { |
12939 | #ifndef _TARGET_64BIT_ |
12940 | noway_assert(typ <= TYP_UINT); |
12941 | #endif // _TARGET_64BIT_ |
12942 | noway_assert(!tree->gtOverflow()); |
12943 | |
12944 | ssize_t mult = op2->gtIntConCommon.IconValue(); |
12945 | bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr && |
12946 | op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq(); |
12947 | |
12948 | assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr); |
12949 | |
12950 | if (mult == 0) |
12951 | { |
12952 | // We may be able to throw away op1 (unless it has side-effects) |
12953 | |
12954 | if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0) |
12955 | { |
12956 | DEBUG_DESTROY_NODE(op1); |
12957 | DEBUG_DESTROY_NODE(tree); |
12958 | return op2; // Just return the "0" node |
12959 | } |
12960 | |
12961 | // We need to keep op1 for the side-effects. Hang it off |
12962 | // a GT_COMMA node |
12963 | |
12964 | tree->ChangeOper(GT_COMMA); |
12965 | return tree; |
12966 | } |
12967 | |
12968 | size_t abs_mult = (mult >= 0) ? mult : -mult; |
12969 | size_t lowestBit = genFindLowestBit(abs_mult); |
12970 | bool changeToShift = false; |
12971 | |
12972 | // is it a power of two? (positive or negative) |
12973 | if (abs_mult == lowestBit) |
12974 | { |
12975 | // if negative negate (min-int does not need negation) |
12976 | if (mult < 0 && mult != SSIZE_T_MIN) |
12977 | { |
12978 | // The type of the new GT_NEG node cannot just be op1->TypeGet(). |
12979 | // Otherwise we may sign-extend incorrectly in cases where the GT_NEG |
12980 | // node ends up feeding directly a cast, for example in |
12981 | // GT_CAST<ubyte>(GT_MUL(-1, s_1.ubyte)) |
12982 | tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); |
12983 | fgMorphTreeDone(op1); |
12984 | } |
12985 | |
12986 | // If "op2" is a constant array index, the other multiplicand must be a constant. |
12987 | // Transfer the annotation to the other one. |
12988 | if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr && |
12989 | op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq()) |
12990 | { |
12991 | assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr); |
12992 | GenTree* otherOp = op1; |
12993 | if (otherOp->OperGet() == GT_NEG) |
12994 | { |
12995 | otherOp = otherOp->gtOp.gtOp1; |
12996 | } |
12997 | assert(otherOp->OperGet() == GT_CNS_INT); |
12998 | assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField()); |
12999 | otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq; |
13000 | } |
13001 | |
13002 | if (abs_mult == 1) |
13003 | { |
13004 | DEBUG_DESTROY_NODE(op2); |
13005 | DEBUG_DESTROY_NODE(tree); |
13006 | return op1; |
13007 | } |
13008 | |
13009 | /* Change the multiplication into a shift by log2(val) bits */ |
13010 | op2->gtIntConCommon.SetIconValue(genLog2(abs_mult)); |
13011 | changeToShift = true; |
13012 | } |
13013 | #if LEA_AVAILABLE |
13014 | else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult()) |
13015 | { |
13016 | int shift = genLog2(lowestBit); |
13017 | ssize_t factor = abs_mult >> shift; |
13018 | |
13019 | if (factor == 3 || factor == 5 || factor == 9) |
13020 | { |
13021 | // if negative negate (min-int does not need negation) |
13022 | if (mult < 0 && mult != SSIZE_T_MIN) |
13023 | { |
13024 | tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); |
13025 | fgMorphTreeDone(op1); |
13026 | } |
13027 | |
13028 | GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL); |
13029 | if (op2IsConstIndex) |
13030 | { |
13031 | factorIcon->AsIntCon()->gtFieldSeq = |
13032 | GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); |
13033 | } |
13034 | |
13035 | // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift |
13036 | tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon); |
13037 | fgMorphTreeDone(op1); |
13038 | |
13039 | op2->gtIntConCommon.SetIconValue(shift); |
13040 | changeToShift = true; |
13041 | } |
13042 | } |
13043 | #endif // LEA_AVAILABLE |
13044 | if (changeToShift) |
13045 | { |
13046 | // vnStore is null before the ValueNumber phase has run |
13047 | if (vnStore != nullptr) |
13048 | { |
13049 | // Update the ValueNumber for 'op2', as we just changed the constant |
13050 | fgValueNumberTreeConst(op2); |
13051 | } |
13052 | oper = GT_LSH; |
13053 | // Keep the old ValueNumber for 'tree' as the new expr |
13054 | // will still compute the same value as before |
13055 | tree->ChangeOper(oper, GenTree::PRESERVE_VN); |
13056 | |
13057 | goto DONE_MORPHING_CHILDREN; |
13058 | } |
13059 | } |
13060 | else if (fgOperIsBitwiseRotationRoot(oper)) |
13061 | { |
13062 | tree = fgRecognizeAndMorphBitwiseRotation(tree); |
13063 | |
13064 | // fgRecognizeAndMorphBitwiseRotation may return a new tree |
13065 | oper = tree->OperGet(); |
13066 | typ = tree->TypeGet(); |
13067 | op1 = tree->gtOp.gtOp1; |
13068 | op2 = tree->gtOp.gtOp2; |
13069 | } |
13070 | |
13071 | break; |
13072 | |
13073 | case GT_NOT: |
13074 | case GT_NEG: |
13075 | |
13076 | /* Any constant cases should have been folded earlier */ |
13077 | noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase); |
13078 | break; |
13079 | |
13080 | case GT_CKFINITE: |
13081 | |
13082 | noway_assert(varTypeIsFloating(op1->TypeGet())); |
13083 | |
13084 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN); |
13085 | break; |
13086 | |
13087 | case GT_OBJ: |
13088 | // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on |
13089 | // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X |
13090 | // is a local or clsVar, even if it has been address-exposed. |
13091 | if (op1->OperGet() == GT_ADDR) |
13092 | { |
13093 | tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF); |
13094 | } |
13095 | break; |
13096 | |
13097 | case GT_IND: |
13098 | |
13099 | // Can not remove a GT_IND if it is currently a CSE candidate. |
13100 | if (gtIsActiveCSE_Candidate(tree)) |
13101 | { |
13102 | break; |
13103 | } |
13104 | |
13105 | bool foldAndReturnTemp; |
13106 | foldAndReturnTemp = false; |
13107 | temp = nullptr; |
13108 | ival1 = 0; |
13109 | |
13110 | // Don't remove a volatile GT_IND, even if the address points to a local variable. |
13111 | if ((tree->gtFlags & GTF_IND_VOLATILE) == 0) |
13112 | { |
13113 | /* Try to Fold *(&X) into X */ |
13114 | if (op1->gtOper == GT_ADDR) |
13115 | { |
13116 | // Can not remove a GT_ADDR if it is currently a CSE candidate. |
13117 | if (gtIsActiveCSE_Candidate(op1)) |
13118 | { |
13119 | break; |
13120 | } |
13121 | |
13122 | temp = op1->gtOp.gtOp1; // X |
13123 | |
13124 | // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that |
13125 | // they are the *same* struct type. In fact, they almost certainly aren't. If the |
13126 | // address has an associated field sequence, that identifies this case; go through |
13127 | // the "lcl_fld" path rather than this one. |
13128 | FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below. |
13129 | if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq)) |
13130 | { |
13131 | foldAndReturnTemp = true; |
13132 | } |
13133 | else if (temp->OperIsLocal()) |
13134 | { |
13135 | unsigned lclNum = temp->gtLclVarCommon.gtLclNum; |
13136 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
13137 | |
13138 | // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset |
13139 | if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0)) |
13140 | { |
13141 | noway_assert(varTypeIsStruct(varDsc)); |
13142 | |
13143 | // We will try to optimize when we have a single field struct that is being struct promoted |
13144 | if (varDsc->lvFieldCnt == 1) |
13145 | { |
13146 | unsigned lclNumFld = varDsc->lvFieldLclStart; |
13147 | // just grab the promoted field |
13148 | LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld]; |
13149 | |
13150 | // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset |
13151 | // is zero |
13152 | if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0)) |
13153 | { |
13154 | // We can just use the existing promoted field LclNum |
13155 | temp->gtLclVarCommon.SetLclNum(lclNumFld); |
13156 | temp->gtType = fieldVarDsc->TypeGet(); |
13157 | |
13158 | foldAndReturnTemp = true; |
13159 | } |
13160 | } |
13161 | } |
13162 | // If the type of the IND (typ) is a "small int", and the type of the local has the |
13163 | // same width, then we can reduce to just the local variable -- it will be |
13164 | // correctly normalized, and signed/unsigned differences won't matter. |
13165 | // |
13166 | // The below transformation cannot be applied if the local var needs to be normalized on load. |
13167 | else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) && |
13168 | !lvaTable[lclNum].lvNormalizeOnLoad()) |
13169 | { |
13170 | tree->gtType = typ = temp->TypeGet(); |
13171 | foldAndReturnTemp = true; |
13172 | } |
13173 | else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) && |
13174 | !lvaTable[lclNum].lvNormalizeOnLoad()) |
13175 | { |
13176 | tree->gtType = typ = temp->TypeGet(); |
13177 | foldAndReturnTemp = true; |
13178 | } |
13179 | else |
13180 | { |
13181 | // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e. |
13182 | // nullptr) |
13183 | assert(fieldSeq == nullptr); |
13184 | bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq); |
13185 | assert(b || fieldSeq == nullptr); |
13186 | |
13187 | if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD)) |
13188 | { |
13189 | // Append the field sequence, change the type. |
13190 | temp->AsLclFld()->gtFieldSeq = |
13191 | GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq); |
13192 | temp->gtType = typ; |
13193 | |
13194 | foldAndReturnTemp = true; |
13195 | } |
13196 | } |
13197 | // Otherwise will will fold this into a GT_LCL_FLD below |
13198 | // where we check (temp != nullptr) |
13199 | } |
13200 | else // !temp->OperIsLocal() |
13201 | { |
13202 | // We don't try to fold away the GT_IND/GT_ADDR for this case |
13203 | temp = nullptr; |
13204 | } |
13205 | } |
13206 | else if (op1->OperGet() == GT_ADD) |
13207 | { |
13208 | /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */ |
13209 | |
13210 | if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT && |
13211 | opts.OptimizationEnabled()) |
13212 | { |
13213 | // No overflow arithmetic with pointers |
13214 | noway_assert(!op1->gtOverflow()); |
13215 | |
13216 | temp = op1->gtOp.gtOp1->gtOp.gtOp1; |
13217 | if (!temp->OperIsLocal()) |
13218 | { |
13219 | temp = nullptr; |
13220 | break; |
13221 | } |
13222 | |
13223 | // Can not remove the GT_ADDR if it is currently a CSE candidate. |
13224 | if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1)) |
13225 | { |
13226 | break; |
13227 | } |
13228 | |
13229 | ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal; |
13230 | fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq; |
13231 | |
13232 | // Does the address have an associated zero-offset field sequence? |
13233 | FieldSeqNode* addrFieldSeq = nullptr; |
13234 | if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq)) |
13235 | { |
13236 | fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq); |
13237 | } |
13238 | |
13239 | if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT) |
13240 | { |
13241 | noway_assert(!varTypeIsGC(temp->TypeGet())); |
13242 | foldAndReturnTemp = true; |
13243 | } |
13244 | else |
13245 | { |
13246 | // The emitter can't handle large offsets |
13247 | if (ival1 != (unsigned short)ival1) |
13248 | { |
13249 | break; |
13250 | } |
13251 | |
13252 | // The emitter can get confused by invalid offsets |
13253 | if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum)) |
13254 | { |
13255 | break; |
13256 | } |
13257 | |
13258 | #ifdef _TARGET_ARM_ |
13259 | // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field |
13260 | // |
13261 | if (varTypeIsFloating(typ)) |
13262 | { |
13263 | if ((ival1 % emitTypeSize(typ)) != 0) |
13264 | { |
13265 | tree->gtFlags |= GTF_IND_UNALIGNED; |
13266 | break; |
13267 | } |
13268 | } |
13269 | #endif |
13270 | } |
13271 | // Now we can fold this into a GT_LCL_FLD below |
13272 | // where we check (temp != nullptr) |
13273 | } |
13274 | } |
13275 | } |
13276 | |
13277 | // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging: |
13278 | // - We may have a load of a local where the load has a different type than the local |
13279 | // - We may have a load of a local plus an offset |
13280 | // |
13281 | // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and |
13282 | // offset if doing so is legal. The only cases in which this transformation is illegal are if the load |
13283 | // begins before the local or if the load extends beyond the end of the local (i.e. if the load is |
13284 | // out-of-bounds w.r.t. the local). |
13285 | if ((temp != nullptr) && !foldAndReturnTemp) |
13286 | { |
13287 | assert(temp->OperIsLocal()); |
13288 | |
13289 | const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum; |
13290 | LclVarDsc* const varDsc = &lvaTable[lclNum]; |
13291 | |
13292 | const var_types tempTyp = temp->TypeGet(); |
13293 | const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK); |
13294 | const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp); |
13295 | |
13296 | // Make sure we do not enregister this lclVar. |
13297 | lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); |
13298 | |
13299 | // If the size of the load is greater than the size of the lclVar, we cannot fold this access into |
13300 | // a lclFld: the access represented by an lclFld node must begin at or after the start of the |
13301 | // lclVar and must not extend beyond the end of the lclVar. |
13302 | if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize)) |
13303 | { |
13304 | // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival' |
13305 | // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival' |
13306 | // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type. |
13307 | // |
13308 | if (temp->OperGet() == GT_LCL_FLD) |
13309 | { |
13310 | temp->AsLclFld()->gtLclOffs += (unsigned short)ival1; |
13311 | temp->AsLclFld()->gtFieldSeq = |
13312 | GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq); |
13313 | } |
13314 | else |
13315 | { |
13316 | temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"... |
13317 | temp->AsLclFld()->gtLclOffs = (unsigned short)ival1; |
13318 | if (fieldSeq != nullptr) |
13319 | { // If it does represent a field, note that. |
13320 | temp->AsLclFld()->gtFieldSeq = fieldSeq; |
13321 | } |
13322 | } |
13323 | temp->gtType = tree->gtType; |
13324 | foldAndReturnTemp = true; |
13325 | } |
13326 | } |
13327 | |
13328 | if (foldAndReturnTemp) |
13329 | { |
13330 | assert(temp != nullptr); |
13331 | assert(temp->TypeGet() == typ); |
13332 | assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR)); |
13333 | |
13334 | // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for |
13335 | // 'temp' because a GT_ADDR always marks it for its operand. |
13336 | temp->gtFlags &= ~GTF_DONT_CSE; |
13337 | temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE); |
13338 | |
13339 | if (op1->OperGet() == GT_ADD) |
13340 | { |
13341 | DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR |
13342 | DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT |
13343 | } |
13344 | DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR |
13345 | DEBUG_DESTROY_NODE(tree); // GT_IND |
13346 | |
13347 | // If the result of the fold is a local var, we may need to perform further adjustments e.g. for |
13348 | // normalization. |
13349 | if (temp->OperIs(GT_LCL_VAR)) |
13350 | { |
13351 | #ifdef DEBUG |
13352 | // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear |
13353 | // and the node in question must have this bit set (as it has already been morphed). |
13354 | temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
13355 | #endif // DEBUG |
13356 | const bool forceRemorph = true; |
13357 | temp = fgMorphLocalVar(temp, forceRemorph); |
13358 | #ifdef DEBUG |
13359 | // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the |
13360 | // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function |
13361 | // returns. |
13362 | temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
13363 | #endif // DEBUG |
13364 | } |
13365 | |
13366 | return temp; |
13367 | } |
13368 | |
13369 | // Only do this optimization when we are in the global optimizer. Doing this after value numbering |
13370 | // could result in an invalid value number for the newly generated GT_IND node. |
13371 | if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph) |
13372 | { |
13373 | // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)). |
13374 | // TBD: this transformation is currently necessary for correctness -- it might |
13375 | // be good to analyze the failures that result if we don't do this, and fix them |
13376 | // in other ways. Ideally, this should be optional. |
13377 | GenTree* commaNode = op1; |
13378 | unsigned treeFlags = tree->gtFlags; |
13379 | commaNode->gtType = typ; |
13380 | commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is |
13381 | // dangerous, clear the GTF_REVERSE_OPS at |
13382 | // least. |
13383 | #ifdef DEBUG |
13384 | commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
13385 | #endif |
13386 | while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA) |
13387 | { |
13388 | commaNode = commaNode->gtOp.gtOp2; |
13389 | commaNode->gtType = typ; |
13390 | commaNode->gtFlags = |
13391 | (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG & ~GTF_CALL); // Bashing the GT_COMMA flags here is |
13392 | // dangerous, clear the GTF_REVERSE_OPS, GT_ASG, and GT_CALL at |
13393 | // least. |
13394 | commaNode->gtFlags |= |
13395 | ((commaNode->gtOp.gtOp1->gtFlags | commaNode->gtOp.gtOp2->gtFlags) & (GTF_ASG | GTF_CALL)); |
13396 | #ifdef DEBUG |
13397 | commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
13398 | #endif |
13399 | } |
13400 | bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0; |
13401 | ArrayInfo arrInfo; |
13402 | if (wasArrIndex) |
13403 | { |
13404 | bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); |
13405 | assert(b); |
13406 | GetArrayInfoMap()->Remove(tree); |
13407 | } |
13408 | tree = op1; |
13409 | GenTree* addr = commaNode->gtOp.gtOp2; |
13410 | op1 = gtNewIndir(typ, addr); |
13411 | // This is very conservative |
13412 | op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING; |
13413 | op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT); |
13414 | |
13415 | if (wasArrIndex) |
13416 | { |
13417 | GetArrayInfoMap()->Set(op1, arrInfo); |
13418 | } |
13419 | #ifdef DEBUG |
13420 | op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
13421 | #endif |
13422 | commaNode->gtOp.gtOp2 = op1; |
13423 | commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); |
13424 | return tree; |
13425 | } |
13426 | |
13427 | break; |
13428 | |
13429 | case GT_ADDR: |
13430 | |
13431 | // Can not remove op1 if it is currently a CSE candidate. |
13432 | if (gtIsActiveCSE_Candidate(op1)) |
13433 | { |
13434 | break; |
13435 | } |
13436 | |
13437 | if (op1->OperGet() == GT_IND) |
13438 | { |
13439 | if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0) |
13440 | { |
13441 | // Can not remove a GT_ADDR if it is currently a CSE candidate. |
13442 | if (gtIsActiveCSE_Candidate(tree)) |
13443 | { |
13444 | break; |
13445 | } |
13446 | |
13447 | // Perform the transform ADDR(IND(...)) == (...). |
13448 | GenTree* addr = op1->gtOp.gtOp1; |
13449 | |
13450 | noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); |
13451 | |
13452 | DEBUG_DESTROY_NODE(op1); |
13453 | DEBUG_DESTROY_NODE(tree); |
13454 | |
13455 | return addr; |
13456 | } |
13457 | } |
13458 | else if (op1->OperGet() == GT_OBJ) |
13459 | { |
13460 | // Can not remove a GT_ADDR if it is currently a CSE candidate. |
13461 | if (gtIsActiveCSE_Candidate(tree)) |
13462 | { |
13463 | break; |
13464 | } |
13465 | |
13466 | // Perform the transform ADDR(OBJ(...)) == (...). |
13467 | GenTree* addr = op1->AsObj()->Addr(); |
13468 | |
13469 | noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); |
13470 | |
13471 | DEBUG_DESTROY_NODE(op1); |
13472 | DEBUG_DESTROY_NODE(tree); |
13473 | |
13474 | return addr; |
13475 | } |
13476 | else if (op1->gtOper == GT_CAST) |
13477 | { |
13478 | GenTree* casting = op1->gtCast.CastOp(); |
13479 | if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR) |
13480 | { |
13481 | DEBUG_DESTROY_NODE(op1); |
13482 | tree->gtOp.gtOp1 = op1 = casting; |
13483 | } |
13484 | } |
13485 | else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase) |
13486 | { |
13487 | // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)). |
13488 | // (Be sure to mark "z" as an l-value...) |
13489 | |
13490 | GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); |
13491 | for (GenTree* comma = op1; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) |
13492 | { |
13493 | commas.Push(comma); |
13494 | } |
13495 | GenTree* commaNode = commas.Top(); |
13496 | |
13497 | // The top-level addr might be annotated with a zeroOffset field. |
13498 | FieldSeqNode* zeroFieldSeq = nullptr; |
13499 | bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq); |
13500 | tree = op1; |
13501 | commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE; |
13502 | |
13503 | // If the node we're about to put under a GT_ADDR is an indirection, it |
13504 | // doesn't need to be materialized, since we only want the addressing mode. Because |
13505 | // of this, this GT_IND is not a faulting indirection and we don't have to extract it |
13506 | // as a side effect. |
13507 | GenTree* commaOp2 = commaNode->gtOp.gtOp2; |
13508 | if (commaOp2->OperIsBlk()) |
13509 | { |
13510 | commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet()); |
13511 | } |
13512 | if (commaOp2->gtOper == GT_IND) |
13513 | { |
13514 | commaOp2->gtFlags |= GTF_IND_NONFAULTING; |
13515 | commaOp2->gtFlags &= ~GTF_EXCEPT; |
13516 | commaOp2->gtFlags |= (commaOp2->gtOp.gtOp1->gtFlags & GTF_EXCEPT); |
13517 | } |
13518 | |
13519 | op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2); |
13520 | |
13521 | if (isZeroOffset) |
13522 | { |
13523 | // Transfer the annotation to the new GT_ADDR node. |
13524 | GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq); |
13525 | } |
13526 | commaNode->gtOp.gtOp2 = op1; |
13527 | // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform |
13528 | // might give op1 a type different from byref (like, say, native int). So now go back and give |
13529 | // all the comma nodes the type of op1. |
13530 | // TODO: the comma flag update below is conservative and can be improved. |
13531 | // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to |
13532 | // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF). |
13533 | |
13534 | while (!commas.Empty()) |
13535 | { |
13536 | GenTree* comma = commas.Pop(); |
13537 | comma->gtType = op1->gtType; |
13538 | comma->gtFlags |= op1->gtFlags; |
13539 | #ifdef DEBUG |
13540 | comma->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
13541 | #endif |
13542 | gtUpdateNodeSideEffects(comma); |
13543 | } |
13544 | |
13545 | return tree; |
13546 | } |
13547 | |
13548 | /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ |
13549 | op1->gtFlags |= GTF_DONT_CSE; |
13550 | break; |
13551 | |
13552 | case GT_COLON: |
13553 | if (fgGlobalMorph) |
13554 | { |
13555 | /* Mark the nodes that are conditionally executed */ |
13556 | fgWalkTreePre(&tree, gtMarkColonCond); |
13557 | } |
13558 | /* Since we're doing this postorder we clear this if it got set by a child */ |
13559 | fgRemoveRestOfBlock = false; |
13560 | break; |
13561 | |
13562 | case GT_COMMA: |
13563 | |
13564 | /* Special case: trees that don't produce a value */ |
13565 | if (op2->OperIs(GT_ASG) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2)) |
13566 | { |
13567 | typ = tree->gtType = TYP_VOID; |
13568 | } |
13569 | |
13570 | // If we are in the Valuenum CSE phase then don't morph away anything as these |
13571 | // nodes may have CSE defs/uses in them. |
13572 | // |
13573 | if (!optValnumCSE_phase) |
13574 | { |
13575 | // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this |
13576 | // is all we need. |
13577 | |
13578 | GenTree* op1SideEffects = nullptr; |
13579 | // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example) |
13580 | // hoisted expressions in loops. |
13581 | gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE)); |
13582 | if (op1SideEffects) |
13583 | { |
13584 | // Replace the left hand side with the side effect list. |
13585 | tree->gtOp.gtOp1 = op1SideEffects; |
13586 | gtUpdateNodeSideEffects(tree); |
13587 | } |
13588 | else |
13589 | { |
13590 | op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); |
13591 | DEBUG_DESTROY_NODE(tree); |
13592 | DEBUG_DESTROY_NODE(op1); |
13593 | return op2; |
13594 | } |
13595 | |
13596 | /* If the right operand is just a void nop node, throw it away */ |
13597 | if (op2->IsNothingNode() && op1->gtType == TYP_VOID) |
13598 | { |
13599 | op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); |
13600 | DEBUG_DESTROY_NODE(tree); |
13601 | DEBUG_DESTROY_NODE(op2); |
13602 | return op1; |
13603 | } |
13604 | } |
13605 | |
13606 | break; |
13607 | |
13608 | case GT_JTRUE: |
13609 | |
13610 | /* Special case if fgRemoveRestOfBlock is set to true */ |
13611 | if (fgRemoveRestOfBlock) |
13612 | { |
13613 | if (fgIsCommaThrow(op1, true)) |
13614 | { |
13615 | GenTree* throwNode = op1->gtOp.gtOp1; |
13616 | noway_assert(throwNode->gtType == TYP_VOID); |
13617 | |
13618 | JITDUMP("Removing [%06d] GT_JTRUE as the block now unconditionally throws an exception.\n" , |
13619 | dspTreeID(tree)); |
13620 | DEBUG_DESTROY_NODE(tree); |
13621 | |
13622 | return throwNode; |
13623 | } |
13624 | |
13625 | noway_assert(op1->OperKind() & GTK_RELOP); |
13626 | noway_assert(op1->gtFlags & GTF_EXCEPT); |
13627 | |
13628 | // We need to keep op1 for the side-effects. Hang it off |
13629 | // a GT_COMMA node |
13630 | |
13631 | JITDUMP("Keeping side-effects by bashing [%06d] GT_JTRUE into a GT_COMMA.\n" , dspTreeID(tree)); |
13632 | |
13633 | tree->ChangeOper(GT_COMMA); |
13634 | tree->gtOp.gtOp2 = op2 = gtNewNothingNode(); |
13635 | |
13636 | // Additionally since we're eliminating the JTRUE |
13637 | // codegen won't like it if op1 is a RELOP of longs, floats or doubles. |
13638 | // So we change it into a GT_COMMA as well. |
13639 | JITDUMP("Also bashing [%06d] (a relop) into a GT_COMMA.\n" , dspTreeID(op1)); |
13640 | op1->ChangeOper(GT_COMMA); |
13641 | op1->gtFlags &= ~GTF_UNSIGNED; // Clear the unsigned flag if it was set on the relop |
13642 | op1->gtType = op1->gtOp.gtOp1->gtType; |
13643 | |
13644 | return tree; |
13645 | } |
13646 | |
13647 | default: |
13648 | break; |
13649 | } |
13650 | |
13651 | assert(oper == tree->gtOper); |
13652 | |
13653 | // If we are in the Valuenum CSE phase then don't morph away anything as these |
13654 | // nodes may have CSE defs/uses in them. |
13655 | // |
13656 | if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList()) |
13657 | { |
13658 | /* Check for op1 as a GT_COMMA with a unconditional throw node */ |
13659 | if (op1 && fgIsCommaThrow(op1, true)) |
13660 | { |
13661 | if ((op1->gtFlags & GTF_COLON_COND) == 0) |
13662 | { |
13663 | /* We can safely throw out the rest of the statements */ |
13664 | fgRemoveRestOfBlock = true; |
13665 | } |
13666 | |
13667 | GenTree* throwNode = op1->gtOp.gtOp1; |
13668 | noway_assert(throwNode->gtType == TYP_VOID); |
13669 | |
13670 | if (oper == GT_COMMA) |
13671 | { |
13672 | /* Both tree and op1 are GT_COMMA nodes */ |
13673 | /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */ |
13674 | tree->gtOp.gtOp1 = throwNode; |
13675 | |
13676 | // Possibly reset the assignment flag |
13677 | if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) |
13678 | { |
13679 | tree->gtFlags &= ~GTF_ASG; |
13680 | } |
13681 | |
13682 | return tree; |
13683 | } |
13684 | else if (oper != GT_NOP) |
13685 | { |
13686 | if (genActualType(typ) == genActualType(op1->gtType)) |
13687 | { |
13688 | /* The types match so, return the comma throw node as the new tree */ |
13689 | return op1; |
13690 | } |
13691 | else |
13692 | { |
13693 | if (typ == TYP_VOID) |
13694 | { |
13695 | // Return the throw node |
13696 | return throwNode; |
13697 | } |
13698 | else |
13699 | { |
13700 | GenTree* commaOp2 = op1->gtOp.gtOp2; |
13701 | |
13702 | // need type of oper to be same as tree |
13703 | if (typ == TYP_LONG) |
13704 | { |
13705 | commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); |
13706 | commaOp2->gtIntConCommon.SetLngValue(0); |
13707 | /* Change the types of oper and commaOp2 to TYP_LONG */ |
13708 | op1->gtType = commaOp2->gtType = TYP_LONG; |
13709 | } |
13710 | else if (varTypeIsFloating(typ)) |
13711 | { |
13712 | commaOp2->ChangeOperConst(GT_CNS_DBL); |
13713 | commaOp2->gtDblCon.gtDconVal = 0.0; |
13714 | /* Change the types of oper and commaOp2 to TYP_DOUBLE */ |
13715 | op1->gtType = commaOp2->gtType = TYP_DOUBLE; |
13716 | } |
13717 | else |
13718 | { |
13719 | commaOp2->ChangeOperConst(GT_CNS_INT); |
13720 | commaOp2->gtIntConCommon.SetIconValue(0); |
13721 | /* Change the types of oper and commaOp2 to TYP_INT */ |
13722 | op1->gtType = commaOp2->gtType = TYP_INT; |
13723 | } |
13724 | |
13725 | /* Return the GT_COMMA node as the new tree */ |
13726 | return op1; |
13727 | } |
13728 | } |
13729 | } |
13730 | } |
13731 | |
13732 | /* Check for op2 as a GT_COMMA with a unconditional throw */ |
13733 | |
13734 | if (op2 && fgIsCommaThrow(op2, true)) |
13735 | { |
13736 | if ((op2->gtFlags & GTF_COLON_COND) == 0) |
13737 | { |
13738 | /* We can safely throw out the rest of the statements */ |
13739 | fgRemoveRestOfBlock = true; |
13740 | } |
13741 | |
13742 | // If op1 has no side-effects |
13743 | if ((op1->gtFlags & GTF_ALL_EFFECT) == 0) |
13744 | { |
13745 | // If tree is an asg node |
13746 | if (tree->OperIs(GT_ASG)) |
13747 | { |
13748 | /* Return the throw node as the new tree */ |
13749 | return op2->gtOp.gtOp1; |
13750 | } |
13751 | |
13752 | if (tree->OperGet() == GT_ARR_BOUNDS_CHECK) |
13753 | { |
13754 | /* Return the throw node as the new tree */ |
13755 | return op2->gtOp.gtOp1; |
13756 | } |
13757 | |
13758 | // If tree is a comma node |
13759 | if (tree->OperGet() == GT_COMMA) |
13760 | { |
13761 | /* Return the throw node as the new tree */ |
13762 | return op2->gtOp.gtOp1; |
13763 | } |
13764 | |
13765 | /* for the shift nodes the type of op2 can differ from the tree type */ |
13766 | if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT)) |
13767 | { |
13768 | noway_assert(GenTree::OperIsShiftOrRotate(oper)); |
13769 | |
13770 | GenTree* commaOp2 = op2->gtOp.gtOp2; |
13771 | |
13772 | commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); |
13773 | commaOp2->gtIntConCommon.SetLngValue(0); |
13774 | |
13775 | /* Change the types of oper and commaOp2 to TYP_LONG */ |
13776 | op2->gtType = commaOp2->gtType = TYP_LONG; |
13777 | } |
13778 | |
13779 | if ((genActualType(typ) == TYP_INT) && |
13780 | (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet()))) |
13781 | { |
13782 | // An example case is comparison (say GT_GT) of two longs or floating point values. |
13783 | |
13784 | GenTree* commaOp2 = op2->gtOp.gtOp2; |
13785 | |
13786 | commaOp2->ChangeOperConst(GT_CNS_INT); |
13787 | commaOp2->gtIntCon.gtIconVal = 0; |
13788 | /* Change the types of oper and commaOp2 to TYP_INT */ |
13789 | op2->gtType = commaOp2->gtType = TYP_INT; |
13790 | } |
13791 | |
13792 | if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL)) |
13793 | { |
13794 | noway_assert(tree->OperGet() == GT_ADD); |
13795 | |
13796 | GenTree* commaOp2 = op2->gtOp.gtOp2; |
13797 | |
13798 | commaOp2->ChangeOperConst(GT_CNS_INT); |
13799 | commaOp2->gtIntCon.gtIconVal = 0; |
13800 | /* Change the types of oper and commaOp2 to TYP_BYREF */ |
13801 | op2->gtType = commaOp2->gtType = TYP_BYREF; |
13802 | } |
13803 | |
13804 | /* types should now match */ |
13805 | noway_assert((genActualType(typ) == genActualType(op2->gtType))); |
13806 | |
13807 | /* Return the GT_COMMA node as the new tree */ |
13808 | return op2; |
13809 | } |
13810 | } |
13811 | } |
13812 | |
13813 | /*------------------------------------------------------------------------- |
13814 | * Optional morphing is done if tree transformations is permitted |
13815 | */ |
13816 | |
13817 | if ((opts.compFlags & CLFLG_TREETRANS) == 0) |
13818 | { |
13819 | return tree; |
13820 | } |
13821 | |
13822 | tree = fgMorphSmpOpOptional(tree->AsOp()); |
13823 | |
13824 | return tree; |
13825 | } |
13826 | #ifdef _PREFAST_ |
13827 | #pragma warning(pop) |
13828 | #endif |
13829 | |
13830 | GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) |
13831 | { |
13832 | genTreeOps oper = tree->gtOper; |
13833 | GenTree* op1 = tree->gtOp1; |
13834 | GenTree* op2 = tree->gtOp2; |
13835 | var_types typ = tree->TypeGet(); |
13836 | |
13837 | if (fgGlobalMorph && GenTree::OperIsCommutative(oper)) |
13838 | { |
13839 | /* Swap the operands so that the more expensive one is 'op1' */ |
13840 | |
13841 | if (tree->gtFlags & GTF_REVERSE_OPS) |
13842 | { |
13843 | tree->gtOp1 = op2; |
13844 | tree->gtOp2 = op1; |
13845 | |
13846 | op2 = op1; |
13847 | op1 = tree->gtOp1; |
13848 | |
13849 | tree->gtFlags &= ~GTF_REVERSE_OPS; |
13850 | } |
13851 | |
13852 | if (oper == op2->gtOper) |
13853 | { |
13854 | /* Reorder nested operators at the same precedence level to be |
13855 | left-recursive. For example, change "(a+(b+c))" to the |
13856 | equivalent expression "((a+b)+c)". |
13857 | */ |
13858 | |
13859 | /* Things are handled differently for floating-point operators */ |
13860 | |
13861 | if (!varTypeIsFloating(tree->TypeGet())) |
13862 | { |
13863 | fgMoveOpsLeft(tree); |
13864 | op1 = tree->gtOp1; |
13865 | op2 = tree->gtOp2; |
13866 | } |
13867 | } |
13868 | } |
13869 | |
13870 | #if REARRANGE_ADDS |
13871 | |
13872 | /* Change "((x+icon)+y)" to "((x+y)+icon)" |
13873 | Don't reorder floating-point operations */ |
13874 | |
13875 | if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() && |
13876 | varTypeIsIntegralOrI(typ)) |
13877 | { |
13878 | GenTree* ad2 = op1->gtOp.gtOp2; |
13879 | |
13880 | if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0) |
13881 | { |
13882 | // This takes |
13883 | // + (tree) |
13884 | // / \ |
13885 | // / \ |
13886 | // / \ |
13887 | // + (op1) op2 |
13888 | // / \ |
13889 | // \ |
13890 | // ad2 |
13891 | // |
13892 | // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is |
13893 | // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node |
13894 | // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same |
13895 | // type as (tree). |
13896 | // |
13897 | // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is |
13898 | // necessary |
13899 | |
13900 | if (varTypeIsGC(op2->TypeGet())) |
13901 | { |
13902 | noway_assert(varTypeIsGC(typ)); |
13903 | op1->gtType = typ; |
13904 | } |
13905 | tree->gtOp2 = ad2; |
13906 | |
13907 | op1->gtOp.gtOp2 = op2; |
13908 | op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT; |
13909 | |
13910 | op2 = tree->gtOp2; |
13911 | } |
13912 | } |
13913 | |
13914 | #endif |
13915 | |
13916 | /*------------------------------------------------------------------------- |
13917 | * Perform optional oper-specific postorder morphing |
13918 | */ |
13919 | |
13920 | switch (oper) |
13921 | { |
13922 | case GT_ASG: |
13923 | if (varTypeIsStruct(typ) && !tree->IsPhiDefn()) |
13924 | { |
13925 | if (tree->OperIsCopyBlkOp()) |
13926 | { |
13927 | return fgMorphCopyBlock(tree); |
13928 | } |
13929 | else |
13930 | { |
13931 | return fgMorphInitBlock(tree); |
13932 | } |
13933 | } |
13934 | |
13935 | if (typ == TYP_LONG) |
13936 | { |
13937 | break; |
13938 | } |
13939 | |
13940 | /* Make sure we're allowed to do this */ |
13941 | |
13942 | if (optValnumCSE_phase) |
13943 | { |
13944 | // It is not safe to reorder/delete CSE's |
13945 | break; |
13946 | } |
13947 | |
13948 | if (op2->gtFlags & GTF_ASG) |
13949 | { |
13950 | break; |
13951 | } |
13952 | |
13953 | if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT)) |
13954 | { |
13955 | break; |
13956 | } |
13957 | |
13958 | /* Special case: a cast that can be thrown away */ |
13959 | |
13960 | // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only |
13961 | // one cast and sometimes there is another one after it that gets removed by this |
13962 | // code. fgMorphSmp should be improved to remove all redundant casts so this code |
13963 | // can be removed. |
13964 | |
13965 | if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow()) |
13966 | { |
13967 | var_types srct; |
13968 | var_types cast; |
13969 | var_types dstt; |
13970 | |
13971 | srct = op2->gtCast.CastOp()->TypeGet(); |
13972 | cast = (var_types)op2->CastToType(); |
13973 | dstt = op1->TypeGet(); |
13974 | |
13975 | /* Make sure these are all ints and precision is not lost */ |
13976 | |
13977 | if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT) |
13978 | { |
13979 | op2 = tree->gtOp2 = op2->gtCast.CastOp(); |
13980 | } |
13981 | } |
13982 | |
13983 | break; |
13984 | |
13985 | case GT_MUL: |
13986 | |
13987 | /* Check for the case "(val + icon) * icon" */ |
13988 | |
13989 | if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD) |
13990 | { |
13991 | GenTree* add = op1->gtOp.gtOp2; |
13992 | |
13993 | if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0)) |
13994 | { |
13995 | if (tree->gtOverflow() || op1->gtOverflow()) |
13996 | { |
13997 | break; |
13998 | } |
13999 | |
14000 | ssize_t imul = op2->gtIntCon.gtIconVal; |
14001 | ssize_t iadd = add->gtIntCon.gtIconVal; |
14002 | |
14003 | /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */ |
14004 | |
14005 | oper = GT_ADD; |
14006 | tree->ChangeOper(oper); |
14007 | |
14008 | op2->gtIntCon.gtIconVal = iadd * imul; |
14009 | |
14010 | op1->ChangeOper(GT_MUL); |
14011 | |
14012 | add->gtIntCon.gtIconVal = imul; |
14013 | #ifdef _TARGET_64BIT_ |
14014 | if (add->gtType == TYP_INT) |
14015 | { |
14016 | // we need to properly re-sign-extend or truncate after multiplying two int constants above |
14017 | add->AsIntCon()->TruncateOrSignExtend32(); |
14018 | } |
14019 | #endif //_TARGET_64BIT_ |
14020 | } |
14021 | } |
14022 | |
14023 | break; |
14024 | |
14025 | case GT_DIV: |
14026 | |
14027 | /* For "val / 1", just return "val" */ |
14028 | |
14029 | if (op2->IsIntegralConst(1)) |
14030 | { |
14031 | DEBUG_DESTROY_NODE(tree); |
14032 | return op1; |
14033 | } |
14034 | |
14035 | break; |
14036 | |
14037 | case GT_LSH: |
14038 | |
14039 | /* Check for the case "(val + icon) << icon" */ |
14040 | |
14041 | if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow()) |
14042 | { |
14043 | GenTree* cns = op1->gtOp.gtOp2; |
14044 | |
14045 | if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0)) |
14046 | { |
14047 | ssize_t ishf = op2->gtIntConCommon.IconValue(); |
14048 | ssize_t iadd = cns->gtIntConCommon.IconValue(); |
14049 | |
14050 | // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n"); |
14051 | |
14052 | /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */ |
14053 | |
14054 | tree->ChangeOper(GT_ADD); |
14055 | ssize_t result = iadd << ishf; |
14056 | op2->gtIntConCommon.SetIconValue(result); |
14057 | #ifdef _TARGET_64BIT_ |
14058 | if (op1->gtType == TYP_INT) |
14059 | { |
14060 | op2->AsIntCon()->TruncateOrSignExtend32(); |
14061 | } |
14062 | #endif // _TARGET_64BIT_ |
14063 | |
14064 | // we are reusing the shift amount node here, but the type we want is that of the shift result |
14065 | op2->gtType = op1->gtType; |
14066 | |
14067 | if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr && |
14068 | cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq()) |
14069 | { |
14070 | assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr); |
14071 | op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq; |
14072 | } |
14073 | |
14074 | op1->ChangeOper(GT_LSH); |
14075 | |
14076 | cns->gtIntConCommon.SetIconValue(ishf); |
14077 | } |
14078 | } |
14079 | |
14080 | break; |
14081 | |
14082 | case GT_XOR: |
14083 | |
14084 | if (!optValnumCSE_phase) |
14085 | { |
14086 | /* "x ^ -1" is "~x" */ |
14087 | |
14088 | if (op2->IsIntegralConst(-1)) |
14089 | { |
14090 | tree->ChangeOper(GT_NOT); |
14091 | tree->gtOp2 = nullptr; |
14092 | DEBUG_DESTROY_NODE(op2); |
14093 | } |
14094 | else if (op2->IsIntegralConst(1) && op1->OperIsCompare()) |
14095 | { |
14096 | /* "binaryVal ^ 1" is "!binaryVal" */ |
14097 | gtReverseCond(op1); |
14098 | DEBUG_DESTROY_NODE(op2); |
14099 | DEBUG_DESTROY_NODE(tree); |
14100 | return op1; |
14101 | } |
14102 | } |
14103 | |
14104 | break; |
14105 | |
14106 | case GT_INIT_VAL: |
14107 | // Initialization values for initBlk have special semantics - their lower |
14108 | // byte is used to fill the struct. However, we allow 0 as a "bare" value, |
14109 | // which enables them to get a VNForZero, and be propagated. |
14110 | if (op1->IsIntegralConst(0)) |
14111 | { |
14112 | return op1; |
14113 | } |
14114 | break; |
14115 | |
14116 | default: |
14117 | break; |
14118 | } |
14119 | return tree; |
14120 | } |
14121 | |
14122 | //------------------------------------------------------------------------ |
14123 | // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b |
14124 | // (see ECMA III 3.55 and III.3.56). |
14125 | // |
14126 | // Arguments: |
14127 | // tree - The GT_MOD/GT_UMOD tree to morph |
14128 | // |
14129 | // Returns: |
14130 | // The morphed tree |
14131 | // |
14132 | // Notes: |
14133 | // For ARM64 we don't have a remainder instruction so this transform is |
14134 | // always done. For XARCH this transform is done if we know that magic |
14135 | // division will be used, in that case this transform allows CSE to |
14136 | // eliminate the redundant div from code like "x = a / 3; y = a % 3;". |
14137 | // |
14138 | // This method will produce the above expression in 'a' and 'b' are |
14139 | // leaf nodes, otherwise, if any of them is not a leaf it will spill |
14140 | // its value into a temporary variable, an example: |
14141 | // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) ) |
14142 | // |
14143 | GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) |
14144 | { |
14145 | if (tree->OperGet() == GT_MOD) |
14146 | { |
14147 | tree->SetOper(GT_DIV); |
14148 | } |
14149 | else if (tree->OperGet() == GT_UMOD) |
14150 | { |
14151 | tree->SetOper(GT_UDIV); |
14152 | } |
14153 | else |
14154 | { |
14155 | noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv" ); |
14156 | } |
14157 | |
14158 | var_types type = tree->gtType; |
14159 | GenTree* denominator = tree->gtOp2; |
14160 | GenTree* numerator = tree->gtOp1; |
14161 | |
14162 | if (!numerator->OperIsLeaf()) |
14163 | { |
14164 | numerator = fgMakeMultiUse(&tree->gtOp1); |
14165 | } |
14166 | |
14167 | if (!denominator->OperIsLeaf()) |
14168 | { |
14169 | denominator = fgMakeMultiUse(&tree->gtOp2); |
14170 | } |
14171 | |
14172 | // The numerator and denominator may have been assigned to temps, in which case |
14173 | // their defining assignments are in the current tree. Therefore, we need to |
14174 | // set the execuction order accordingly on the nodes we create. |
14175 | // That is, the "mul" will be evaluated in "normal" order, and the "sub" must |
14176 | // be set to be evaluated in reverse order. |
14177 | // |
14178 | GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator)); |
14179 | assert(!mul->IsReverseOp()); |
14180 | GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul); |
14181 | sub->gtFlags |= GTF_REVERSE_OPS; |
14182 | |
14183 | #ifdef DEBUG |
14184 | sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
14185 | #endif |
14186 | |
14187 | return sub; |
14188 | } |
14189 | |
14190 | //------------------------------------------------------------------------------ |
14191 | // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree. |
14192 | // |
14193 | // |
14194 | // Arguments: |
14195 | // oper - Operation to check |
14196 | // |
14197 | // Return Value: |
14198 | // True if the operation can be a root of a bitwise rotation tree; false otherwise. |
14199 | |
14200 | bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper) |
14201 | { |
14202 | return (oper == GT_OR) || (oper == GT_XOR); |
14203 | } |
14204 | |
14205 | //------------------------------------------------------------------------------ |
14206 | // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return |
14207 | // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree. |
14208 | // |
14209 | // Arguments: |
14210 | // tree - tree to check for a rotation pattern |
14211 | // |
14212 | // Return Value: |
14213 | // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise. |
14214 | // |
14215 | // Assumption: |
14216 | // The input is a GT_OR or a GT_XOR tree. |
14217 | |
14218 | GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) |
14219 | { |
14220 | // |
14221 | // Check for a rotation pattern, e.g., |
14222 | // |
14223 | // OR ROL |
14224 | // / \ / \ |
14225 | // LSH RSZ -> x y |
14226 | // / \ / \ |
14227 | // x AND x AND |
14228 | // / \ / \ |
14229 | // y 31 ADD 31 |
14230 | // / \ |
14231 | // NEG 32 |
14232 | // | |
14233 | // y |
14234 | // The patterns recognized: |
14235 | // (x << (y & M)) op (x >>> ((-y + N) & M)) |
14236 | // (x >>> ((-y + N) & M)) op (x << (y & M)) |
14237 | // |
14238 | // (x << y) op (x >>> (-y + N)) |
14239 | // (x >> > (-y + N)) op (x << y) |
14240 | // |
14241 | // (x >>> (y & M)) op (x << ((-y + N) & M)) |
14242 | // (x << ((-y + N) & M)) op (x >>> (y & M)) |
14243 | // |
14244 | // (x >>> y) op (x << (-y + N)) |
14245 | // (x << (-y + N)) op (x >>> y) |
14246 | // |
14247 | // (x << c1) op (x >>> c2) |
14248 | // (x >>> c1) op (x << c2) |
14249 | // |
14250 | // where |
14251 | // c1 and c2 are const |
14252 | // c1 + c2 == bitsize(x) |
14253 | // N == bitsize(x) |
14254 | // M is const |
14255 | // M & (N - 1) == N - 1 |
14256 | // op is either | or ^ |
14257 | |
14258 | if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0)) |
14259 | { |
14260 | // We can't do anything if the tree has assignments, calls, or volatile |
14261 | // reads. Note that we allow GTF_EXCEPT side effect since any exceptions |
14262 | // thrown by the original tree will be thrown by the transformed tree as well. |
14263 | return tree; |
14264 | } |
14265 | |
14266 | genTreeOps oper = tree->OperGet(); |
14267 | assert(fgOperIsBitwiseRotationRoot(oper)); |
14268 | |
14269 | // Check if we have an LSH on one side of the OR and an RSZ on the other side. |
14270 | GenTree* op1 = tree->gtGetOp1(); |
14271 | GenTree* op2 = tree->gtGetOp2(); |
14272 | GenTree* leftShiftTree = nullptr; |
14273 | GenTree* rightShiftTree = nullptr; |
14274 | if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ)) |
14275 | { |
14276 | leftShiftTree = op1; |
14277 | rightShiftTree = op2; |
14278 | } |
14279 | else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH)) |
14280 | { |
14281 | leftShiftTree = op2; |
14282 | rightShiftTree = op1; |
14283 | } |
14284 | else |
14285 | { |
14286 | return tree; |
14287 | } |
14288 | |
14289 | // Check if the trees representing the value to shift are identical. |
14290 | // We already checked that there are no side effects above. |
14291 | if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1())) |
14292 | { |
14293 | GenTree* rotatedValue = leftShiftTree->gtGetOp1(); |
14294 | var_types rotatedValueActualType = genActualType(rotatedValue->gtType); |
14295 | ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8; |
14296 | noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64)); |
14297 | GenTree* leftShiftIndex = leftShiftTree->gtGetOp2(); |
14298 | GenTree* rightShiftIndex = rightShiftTree->gtGetOp2(); |
14299 | |
14300 | // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits |
14301 | // shouldn't be masked for the transformation to be valid. If additional |
14302 | // higher bits are not masked, the transformation is still valid since the result |
14303 | // of MSIL shift instructions is unspecified if the shift amount is greater or equal |
14304 | // than the width of the value being shifted. |
14305 | ssize_t minimalMask = rotatedValueBitSize - 1; |
14306 | ssize_t leftShiftMask = -1; |
14307 | ssize_t rightShiftMask = -1; |
14308 | |
14309 | if ((leftShiftIndex->OperGet() == GT_AND)) |
14310 | { |
14311 | if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI()) |
14312 | { |
14313 | leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal; |
14314 | leftShiftIndex = leftShiftIndex->gtGetOp1(); |
14315 | } |
14316 | else |
14317 | { |
14318 | return tree; |
14319 | } |
14320 | } |
14321 | |
14322 | if ((rightShiftIndex->OperGet() == GT_AND)) |
14323 | { |
14324 | if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI()) |
14325 | { |
14326 | rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal; |
14327 | rightShiftIndex = rightShiftIndex->gtGetOp1(); |
14328 | } |
14329 | else |
14330 | { |
14331 | return tree; |
14332 | } |
14333 | } |
14334 | |
14335 | if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask)) |
14336 | { |
14337 | // The shift index is overmasked, e.g., we have |
14338 | // something like (x << y & 15) or |
14339 | // (x >> (32 - y) & 15 with 32 bit x. |
14340 | // The transformation is not valid. |
14341 | return tree; |
14342 | } |
14343 | |
14344 | GenTree* shiftIndexWithAdd = nullptr; |
14345 | GenTree* shiftIndexWithoutAdd = nullptr; |
14346 | genTreeOps rotateOp = GT_NONE; |
14347 | GenTree* rotateIndex = nullptr; |
14348 | |
14349 | if (leftShiftIndex->OperGet() == GT_ADD) |
14350 | { |
14351 | shiftIndexWithAdd = leftShiftIndex; |
14352 | shiftIndexWithoutAdd = rightShiftIndex; |
14353 | rotateOp = GT_ROR; |
14354 | } |
14355 | else if (rightShiftIndex->OperGet() == GT_ADD) |
14356 | { |
14357 | shiftIndexWithAdd = rightShiftIndex; |
14358 | shiftIndexWithoutAdd = leftShiftIndex; |
14359 | rotateOp = GT_ROL; |
14360 | } |
14361 | |
14362 | if (shiftIndexWithAdd != nullptr) |
14363 | { |
14364 | if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI()) |
14365 | { |
14366 | if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize) |
14367 | { |
14368 | if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG) |
14369 | { |
14370 | if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd)) |
14371 | { |
14372 | // We found one of these patterns: |
14373 | // (x << (y & M)) | (x >>> ((-y + N) & M)) |
14374 | // (x << y) | (x >>> (-y + N)) |
14375 | // (x >>> (y & M)) | (x << ((-y + N) & M)) |
14376 | // (x >>> y) | (x << (-y + N)) |
14377 | // where N == bitsize(x), M is const, and |
14378 | // M & (N - 1) == N - 1 |
14379 | CLANG_FORMAT_COMMENT_ANCHOR; |
14380 | |
14381 | #ifndef _TARGET_64BIT_ |
14382 | if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64)) |
14383 | { |
14384 | // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86. |
14385 | // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need |
14386 | // to add helpers for GT_ROL and GT_ROR. |
14387 | return tree; |
14388 | } |
14389 | #endif |
14390 | |
14391 | rotateIndex = shiftIndexWithoutAdd; |
14392 | } |
14393 | } |
14394 | } |
14395 | } |
14396 | } |
14397 | else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI())) |
14398 | { |
14399 | if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize) |
14400 | { |
14401 | // We found this pattern: |
14402 | // (x << c1) | (x >>> c2) |
14403 | // where c1 and c2 are const and c1 + c2 == bitsize(x) |
14404 | rotateOp = GT_ROL; |
14405 | rotateIndex = leftShiftIndex; |
14406 | } |
14407 | } |
14408 | |
14409 | if (rotateIndex != nullptr) |
14410 | { |
14411 | noway_assert(GenTree::OperIsRotate(rotateOp)); |
14412 | |
14413 | unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT; |
14414 | |
14415 | // We can use the same tree only during global morph; reusing the tree in a later morph |
14416 | // may invalidate value numbers. |
14417 | if (fgGlobalMorph) |
14418 | { |
14419 | tree->gtOp.gtOp1 = rotatedValue; |
14420 | tree->gtOp.gtOp2 = rotateIndex; |
14421 | tree->ChangeOper(rotateOp); |
14422 | |
14423 | unsigned childFlags = 0; |
14424 | for (GenTree* op : tree->Operands()) |
14425 | { |
14426 | childFlags |= (op->gtFlags & GTF_ALL_EFFECT); |
14427 | } |
14428 | |
14429 | // The parent's flags should be a superset of its operands' flags |
14430 | noway_assert((inputTreeEffects & childFlags) == childFlags); |
14431 | } |
14432 | else |
14433 | { |
14434 | tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex); |
14435 | noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT)); |
14436 | } |
14437 | |
14438 | return tree; |
14439 | } |
14440 | } |
14441 | return tree; |
14442 | } |
14443 | |
14444 | #if !CPU_HAS_FP_SUPPORT |
14445 | GenTree* Compiler::fgMorphToEmulatedFP(GenTree* tree) |
14446 | { |
14447 | |
14448 | genTreeOps oper = tree->OperGet(); |
14449 | var_types typ = tree->TypeGet(); |
14450 | GenTree* op1 = tree->gtOp.gtOp1; |
14451 | GenTree* op2 = tree->gtGetOp2IfPresent(); |
14452 | |
14453 | /* |
14454 | We have to use helper calls for all FP operations: |
14455 | |
14456 | FP operators that operate on FP values |
14457 | casts to and from FP |
14458 | comparisons of FP values |
14459 | */ |
14460 | |
14461 | if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet()))) |
14462 | { |
14463 | int helper; |
14464 | GenTree* args; |
14465 | |
14466 | /* Not all FP operations need helper calls */ |
14467 | |
14468 | switch (oper) |
14469 | { |
14470 | case GT_ASG: |
14471 | case GT_IND: |
14472 | case GT_LIST: |
14473 | case GT_ADDR: |
14474 | case GT_COMMA: |
14475 | return tree; |
14476 | } |
14477 | |
14478 | #ifdef DEBUG |
14479 | |
14480 | /* If the result isn't FP, it better be a compare or cast */ |
14481 | |
14482 | if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST)) |
14483 | gtDispTree(tree); |
14484 | |
14485 | noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST); |
14486 | #endif |
14487 | |
14488 | /* Keep track of how many arguments we're passing */ |
14489 | |
14490 | /* Is this a binary operator? */ |
14491 | |
14492 | if (op2) |
14493 | { |
14494 | /* What kind of an operator do we have? */ |
14495 | |
14496 | switch (oper) |
14497 | { |
14498 | case GT_ADD: |
14499 | helper = CPX_R4_ADD; |
14500 | break; |
14501 | case GT_SUB: |
14502 | helper = CPX_R4_SUB; |
14503 | break; |
14504 | case GT_MUL: |
14505 | helper = CPX_R4_MUL; |
14506 | break; |
14507 | case GT_DIV: |
14508 | helper = CPX_R4_DIV; |
14509 | break; |
14510 | // case GT_MOD: helper = CPX_R4_REM; break; |
14511 | |
14512 | case GT_EQ: |
14513 | helper = CPX_R4_EQ; |
14514 | break; |
14515 | case GT_NE: |
14516 | helper = CPX_R4_NE; |
14517 | break; |
14518 | case GT_LT: |
14519 | helper = CPX_R4_LT; |
14520 | break; |
14521 | case GT_LE: |
14522 | helper = CPX_R4_LE; |
14523 | break; |
14524 | case GT_GE: |
14525 | helper = CPX_R4_GE; |
14526 | break; |
14527 | case GT_GT: |
14528 | helper = CPX_R4_GT; |
14529 | break; |
14530 | |
14531 | default: |
14532 | #ifdef DEBUG |
14533 | gtDispTree(tree); |
14534 | #endif |
14535 | noway_assert(!"unexpected FP binary op" ); |
14536 | break; |
14537 | } |
14538 | |
14539 | args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1); |
14540 | } |
14541 | else |
14542 | { |
14543 | switch (oper) |
14544 | { |
14545 | case GT_RETURN: |
14546 | return tree; |
14547 | |
14548 | case GT_CAST: |
14549 | noway_assert(!"FP cast" ); |
14550 | |
14551 | case GT_NEG: |
14552 | helper = CPX_R4_NEG; |
14553 | break; |
14554 | |
14555 | default: |
14556 | #ifdef DEBUG |
14557 | gtDispTree(tree); |
14558 | #endif |
14559 | noway_assert(!"unexpected FP unary op" ); |
14560 | break; |
14561 | } |
14562 | |
14563 | args = gtNewArgList(tree->gtOp.gtOp1); |
14564 | } |
14565 | |
14566 | /* If we have double result/operands, modify the helper */ |
14567 | |
14568 | if (typ == TYP_DOUBLE) |
14569 | { |
14570 | static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG); |
14571 | static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD); |
14572 | static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB); |
14573 | static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL); |
14574 | static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV); |
14575 | |
14576 | helper++; |
14577 | } |
14578 | else |
14579 | { |
14580 | noway_assert(tree->OperIsCompare()); |
14581 | |
14582 | static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ); |
14583 | static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE); |
14584 | static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT); |
14585 | static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE); |
14586 | static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE); |
14587 | static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT); |
14588 | } |
14589 | |
14590 | tree = fgMorphIntoHelperCall(tree, helper, args); |
14591 | |
14592 | return tree; |
14593 | |
14594 | case GT_RETURN: |
14595 | |
14596 | if (op1) |
14597 | { |
14598 | |
14599 | if (compCurBB == genReturnBB) |
14600 | { |
14601 | /* This is the 'exitCrit' call at the exit label */ |
14602 | |
14603 | noway_assert(op1->gtType == TYP_VOID); |
14604 | noway_assert(op2 == 0); |
14605 | |
14606 | tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); |
14607 | |
14608 | return tree; |
14609 | } |
14610 | |
14611 | /* This is a (real) return value -- check its type */ |
14612 | CLANG_FORMAT_COMMENT_ANCHOR; |
14613 | |
14614 | #ifdef DEBUG |
14615 | if (genActualType(op1->TypeGet()) != genActualType(info.compRetType)) |
14616 | { |
14617 | bool allowMismatch = false; |
14618 | |
14619 | // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa |
14620 | if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) || |
14621 | (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL)) |
14622 | allowMismatch = true; |
14623 | |
14624 | if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet())) |
14625 | allowMismatch = true; |
14626 | |
14627 | if (!allowMismatch) |
14628 | NO_WAY("Return type mismatch" ); |
14629 | } |
14630 | #endif |
14631 | } |
14632 | break; |
14633 | } |
14634 | return tree; |
14635 | } |
14636 | #endif |
14637 | |
14638 | /***************************************************************************** |
14639 | * |
14640 | * Transform the given tree for code generation and return an equivalent tree. |
14641 | */ |
14642 | |
14643 | GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac) |
14644 | { |
14645 | assert(tree); |
14646 | assert(tree->gtOper != GT_STMT); |
14647 | |
14648 | #ifdef DEBUG |
14649 | if (verbose) |
14650 | { |
14651 | if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID) |
14652 | { |
14653 | noway_assert(!"JitBreakMorphTree hit" ); |
14654 | } |
14655 | } |
14656 | #endif |
14657 | |
14658 | #ifdef DEBUG |
14659 | int thisMorphNum = 0; |
14660 | if (verbose && treesBeforeAfterMorph) |
14661 | { |
14662 | thisMorphNum = morphNum++; |
14663 | printf("\nfgMorphTree (before %d):\n" , thisMorphNum); |
14664 | gtDispTree(tree); |
14665 | } |
14666 | #endif |
14667 | |
14668 | if (fgGlobalMorph) |
14669 | { |
14670 | // Apply any rewrites for implicit byref arguments before morphing the |
14671 | // tree. |
14672 | |
14673 | if (fgMorphImplicitByRefArgs(tree)) |
14674 | { |
14675 | #ifdef DEBUG |
14676 | if (verbose && treesBeforeAfterMorph) |
14677 | { |
14678 | printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n" , thisMorphNum); |
14679 | gtDispTree(tree); |
14680 | } |
14681 | #endif |
14682 | } |
14683 | } |
14684 | |
14685 | /*------------------------------------------------------------------------- |
14686 | * fgMorphTree() can potentially replace a tree with another, and the |
14687 | * caller has to store the return value correctly. |
14688 | * Turn this on to always make copy of "tree" here to shake out |
14689 | * hidden/unupdated references. |
14690 | */ |
14691 | |
14692 | #ifdef DEBUG |
14693 | |
14694 | if (compStressCompile(STRESS_GENERIC_CHECK, 0)) |
14695 | { |
14696 | GenTree* copy; |
14697 | |
14698 | #ifdef SMALL_TREE_NODES |
14699 | if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL) |
14700 | { |
14701 | copy = gtNewLargeOperNode(GT_ADD, TYP_INT); |
14702 | } |
14703 | else |
14704 | #endif |
14705 | { |
14706 | copy = new (this, GT_CALL) GenTreeCall(TYP_INT); |
14707 | } |
14708 | |
14709 | copy->ReplaceWith(tree, this); |
14710 | |
14711 | #if defined(LATE_DISASM) |
14712 | // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields |
14713 | if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle()) |
14714 | { |
14715 | copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle; |
14716 | } |
14717 | #endif |
14718 | |
14719 | DEBUG_DESTROY_NODE(tree); |
14720 | tree = copy; |
14721 | } |
14722 | #endif // DEBUG |
14723 | |
14724 | if (fgGlobalMorph) |
14725 | { |
14726 | /* Ensure that we haven't morphed this node already */ |
14727 | assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!" ); |
14728 | |
14729 | #if LOCAL_ASSERTION_PROP |
14730 | /* Before morphing the tree, we try to propagate any active assertions */ |
14731 | if (optLocalAssertionProp) |
14732 | { |
14733 | /* Do we have any active assertions? */ |
14734 | |
14735 | if (optAssertionCount > 0) |
14736 | { |
14737 | GenTree* newTree = tree; |
14738 | while (newTree != nullptr) |
14739 | { |
14740 | tree = newTree; |
14741 | /* newTree is non-Null if we propagated an assertion */ |
14742 | newTree = optAssertionProp(apFull, tree, nullptr); |
14743 | } |
14744 | assert(tree != nullptr); |
14745 | } |
14746 | } |
14747 | PREFAST_ASSUME(tree != nullptr); |
14748 | #endif |
14749 | } |
14750 | |
14751 | /* Save the original un-morphed tree for fgMorphTreeDone */ |
14752 | |
14753 | GenTree* oldTree = tree; |
14754 | |
14755 | /* Figure out what kind of a node we have */ |
14756 | |
14757 | unsigned kind = tree->OperKind(); |
14758 | |
14759 | /* Is this a constant node? */ |
14760 | |
14761 | if (kind & GTK_CONST) |
14762 | { |
14763 | tree = fgMorphConst(tree); |
14764 | goto DONE; |
14765 | } |
14766 | |
14767 | /* Is this a leaf node? */ |
14768 | |
14769 | if (kind & GTK_LEAF) |
14770 | { |
14771 | tree = fgMorphLeaf(tree); |
14772 | goto DONE; |
14773 | } |
14774 | |
14775 | /* Is it a 'simple' unary/binary operator? */ |
14776 | |
14777 | if (kind & GTK_SMPOP) |
14778 | { |
14779 | tree = fgMorphSmpOp(tree, mac); |
14780 | goto DONE; |
14781 | } |
14782 | |
14783 | /* See what kind of a special operator we have here */ |
14784 | |
14785 | switch (tree->OperGet()) |
14786 | { |
14787 | case GT_FIELD: |
14788 | tree = fgMorphField(tree, mac); |
14789 | break; |
14790 | |
14791 | case GT_CALL: |
14792 | if (tree->OperMayThrow(this)) |
14793 | { |
14794 | tree->gtFlags |= GTF_EXCEPT; |
14795 | } |
14796 | else |
14797 | { |
14798 | tree->gtFlags &= ~GTF_EXCEPT; |
14799 | } |
14800 | tree = fgMorphCall(tree->AsCall()); |
14801 | break; |
14802 | |
14803 | case GT_ARR_BOUNDS_CHECK: |
14804 | #ifdef FEATURE_SIMD |
14805 | case GT_SIMD_CHK: |
14806 | #endif // FEATURE_SIMD |
14807 | #ifdef FEATURE_HW_INTRINSICS |
14808 | case GT_HW_INTRINSIC_CHK: |
14809 | #endif // FEATURE_HW_INTRINSICS |
14810 | { |
14811 | fgSetRngChkTarget(tree); |
14812 | |
14813 | GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); |
14814 | bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex); |
14815 | bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen); |
14816 | // If the index is a comma(throw, x), just return that. |
14817 | if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex)) |
14818 | { |
14819 | tree = bndsChk->gtIndex; |
14820 | } |
14821 | |
14822 | bndsChk->gtFlags &= ~GTF_CALL; |
14823 | |
14824 | // Propagate effects flags upwards |
14825 | bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT); |
14826 | bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT); |
14827 | |
14828 | // Otherwise, we don't change the tree. |
14829 | } |
14830 | break; |
14831 | |
14832 | case GT_ARR_ELEM: |
14833 | tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj); |
14834 | |
14835 | unsigned dim; |
14836 | for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) |
14837 | { |
14838 | tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]); |
14839 | } |
14840 | |
14841 | tree->gtFlags &= ~GTF_CALL; |
14842 | |
14843 | tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT; |
14844 | |
14845 | for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) |
14846 | { |
14847 | tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT; |
14848 | } |
14849 | |
14850 | if (fgGlobalMorph) |
14851 | { |
14852 | fgSetRngChkTarget(tree, false); |
14853 | } |
14854 | break; |
14855 | |
14856 | case GT_ARR_OFFSET: |
14857 | tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset); |
14858 | tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex); |
14859 | tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj); |
14860 | |
14861 | tree->gtFlags &= ~GTF_CALL; |
14862 | tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT; |
14863 | tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT; |
14864 | tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT; |
14865 | if (fgGlobalMorph) |
14866 | { |
14867 | fgSetRngChkTarget(tree, false); |
14868 | } |
14869 | break; |
14870 | |
14871 | case GT_CMPXCHG: |
14872 | tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation); |
14873 | tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue); |
14874 | tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand); |
14875 | |
14876 | tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL); |
14877 | |
14878 | tree->gtFlags |= tree->gtCmpXchg.gtOpLocation->gtFlags & GTF_ALL_EFFECT; |
14879 | tree->gtFlags |= tree->gtCmpXchg.gtOpValue->gtFlags & GTF_ALL_EFFECT; |
14880 | tree->gtFlags |= tree->gtCmpXchg.gtOpComparand->gtFlags & GTF_ALL_EFFECT; |
14881 | break; |
14882 | |
14883 | case GT_STORE_DYN_BLK: |
14884 | case GT_DYN_BLK: |
14885 | if (tree->OperGet() == GT_STORE_DYN_BLK) |
14886 | { |
14887 | tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data()); |
14888 | } |
14889 | tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr()); |
14890 | tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize); |
14891 | |
14892 | tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL); |
14893 | tree->SetIndirExceptionFlags(this); |
14894 | |
14895 | if (tree->OperGet() == GT_STORE_DYN_BLK) |
14896 | { |
14897 | tree->gtFlags |= tree->gtDynBlk.Data()->gtFlags & GTF_ALL_EFFECT; |
14898 | } |
14899 | tree->gtFlags |= tree->gtDynBlk.Addr()->gtFlags & GTF_ALL_EFFECT; |
14900 | tree->gtFlags |= tree->gtDynBlk.gtDynamicSize->gtFlags & GTF_ALL_EFFECT; |
14901 | break; |
14902 | |
14903 | case GT_INDEX_ADDR: |
14904 | GenTreeIndexAddr* indexAddr; |
14905 | indexAddr = tree->AsIndexAddr(); |
14906 | indexAddr->Index() = fgMorphTree(indexAddr->Index()); |
14907 | indexAddr->Arr() = fgMorphTree(indexAddr->Arr()); |
14908 | |
14909 | tree->gtFlags &= ~GTF_CALL; |
14910 | |
14911 | tree->gtFlags |= indexAddr->Index()->gtFlags & GTF_ALL_EFFECT; |
14912 | tree->gtFlags |= indexAddr->Arr()->gtFlags & GTF_ALL_EFFECT; |
14913 | break; |
14914 | |
14915 | default: |
14916 | #ifdef DEBUG |
14917 | gtDispTree(tree); |
14918 | #endif |
14919 | noway_assert(!"unexpected operator" ); |
14920 | } |
14921 | DONE: |
14922 | |
14923 | fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum)); |
14924 | |
14925 | return tree; |
14926 | } |
14927 | |
14928 | #if LOCAL_ASSERTION_PROP |
14929 | //------------------------------------------------------------------------ |
14930 | // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum |
14931 | // |
14932 | // Arguments: |
14933 | // lclNum - The varNum of the lclVar for which we're killing assertions. |
14934 | // tree - (DEBUG only) the tree responsible for killing its assertions. |
14935 | // |
14936 | void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree)) |
14937 | { |
14938 | /* All dependent assertions are killed here */ |
14939 | |
14940 | ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum)); |
14941 | |
14942 | if (killed) |
14943 | { |
14944 | AssertionIndex index = optAssertionCount; |
14945 | while (killed && (index > 0)) |
14946 | { |
14947 | if (BitVecOps::IsMember(apTraits, killed, index - 1)) |
14948 | { |
14949 | #ifdef DEBUG |
14950 | AssertionDsc* curAssertion = optGetAssertion(index); |
14951 | noway_assert((curAssertion->op1.lcl.lclNum == lclNum) || |
14952 | ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum))); |
14953 | if (verbose) |
14954 | { |
14955 | printf("\nThe assignment " ); |
14956 | printTreeID(tree); |
14957 | printf(" using V%02u removes: " , curAssertion->op1.lcl.lclNum); |
14958 | optPrintAssertion(curAssertion); |
14959 | } |
14960 | #endif |
14961 | // Remove this bit from the killed mask |
14962 | BitVecOps::RemoveElemD(apTraits, killed, index - 1); |
14963 | |
14964 | optAssertionRemove(index); |
14965 | } |
14966 | |
14967 | index--; |
14968 | } |
14969 | |
14970 | // killed mask should now be zero |
14971 | noway_assert(BitVecOps::IsEmpty(apTraits, killed)); |
14972 | } |
14973 | } |
14974 | //------------------------------------------------------------------------ |
14975 | // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum. |
14976 | // |
14977 | // Arguments: |
14978 | // lclNum - The varNum of the lclVar for which we're killing assertions. |
14979 | // tree - (DEBUG only) the tree responsible for killing its assertions. |
14980 | // |
14981 | // Notes: |
14982 | // For structs and struct fields, it will invalidate the children and parent |
14983 | // respectively. |
14984 | // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar. |
14985 | // |
14986 | void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree)) |
14987 | { |
14988 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
14989 | |
14990 | if (varDsc->lvPromoted) |
14991 | { |
14992 | noway_assert(varTypeIsStruct(varDsc)); |
14993 | |
14994 | // Kill the field locals. |
14995 | for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) |
14996 | { |
14997 | fgKillDependentAssertionsSingle(i DEBUGARG(tree)); |
14998 | } |
14999 | |
15000 | // Kill the struct local itself. |
15001 | fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); |
15002 | } |
15003 | else if (varDsc->lvIsStructField) |
15004 | { |
15005 | // Kill the field local. |
15006 | fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); |
15007 | |
15008 | // Kill the parent struct. |
15009 | fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree)); |
15010 | } |
15011 | else |
15012 | { |
15013 | fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); |
15014 | } |
15015 | } |
15016 | #endif // LOCAL_ASSERTION_PROP |
15017 | |
15018 | /***************************************************************************** |
15019 | * |
15020 | * This function is called to complete the morphing of a tree node |
15021 | * It should only be called once for each node. |
15022 | * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated, |
15023 | * to enforce the invariant that each node is only morphed once. |
15024 | * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced |
15025 | * by an equivalent tree. |
15026 | * |
15027 | */ |
15028 | |
15029 | void Compiler::fgMorphTreeDone(GenTree* tree, |
15030 | GenTree* oldTree /* == NULL */ |
15031 | DEBUGARG(int morphNum)) |
15032 | { |
15033 | #ifdef DEBUG |
15034 | if (verbose && treesBeforeAfterMorph) |
15035 | { |
15036 | printf("\nfgMorphTree (after %d):\n" , morphNum); |
15037 | gtDispTree(tree); |
15038 | printf("" ); // in our logic this causes a flush |
15039 | } |
15040 | #endif |
15041 | |
15042 | if (!fgGlobalMorph) |
15043 | { |
15044 | return; |
15045 | } |
15046 | |
15047 | if ((oldTree != nullptr) && (oldTree != tree)) |
15048 | { |
15049 | /* Ensure that we have morphed this node */ |
15050 | assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!" ); |
15051 | |
15052 | #ifdef DEBUG |
15053 | TransferTestDataToNode(oldTree, tree); |
15054 | #endif |
15055 | } |
15056 | else |
15057 | { |
15058 | // Ensure that we haven't morphed this node already |
15059 | assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!" ); |
15060 | } |
15061 | |
15062 | if (tree->OperKind() & GTK_CONST) |
15063 | { |
15064 | goto DONE; |
15065 | } |
15066 | |
15067 | #if LOCAL_ASSERTION_PROP |
15068 | |
15069 | if (!optLocalAssertionProp) |
15070 | { |
15071 | goto DONE; |
15072 | } |
15073 | |
15074 | /* Do we have any active assertions? */ |
15075 | |
15076 | if (optAssertionCount > 0) |
15077 | { |
15078 | /* Is this an assignment to a local variable */ |
15079 | GenTreeLclVarCommon* lclVarTree = nullptr; |
15080 | if (tree->DefinesLocal(this, &lclVarTree)) |
15081 | { |
15082 | unsigned lclNum = lclVarTree->gtLclNum; |
15083 | noway_assert(lclNum < lvaCount); |
15084 | fgKillDependentAssertions(lclNum DEBUGARG(tree)); |
15085 | } |
15086 | } |
15087 | |
15088 | /* If this tree makes a new assertion - make it available */ |
15089 | optAssertionGen(tree); |
15090 | |
15091 | #endif // LOCAL_ASSERTION_PROP |
15092 | |
15093 | DONE:; |
15094 | |
15095 | #ifdef DEBUG |
15096 | /* Mark this node as being morphed */ |
15097 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
15098 | #endif |
15099 | } |
15100 | |
15101 | /***************************************************************************** |
15102 | * |
15103 | * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants |
15104 | * Returns true if we modified the flow graph |
15105 | */ |
15106 | |
15107 | bool Compiler::fgFoldConditional(BasicBlock* block) |
15108 | { |
15109 | bool result = false; |
15110 | |
15111 | // We don't want to make any code unreachable |
15112 | if (opts.OptimizationDisabled()) |
15113 | { |
15114 | return false; |
15115 | } |
15116 | |
15117 | if (block->bbJumpKind == BBJ_COND) |
15118 | { |
15119 | noway_assert(block->bbTreeList && block->bbTreeList->gtPrev); |
15120 | |
15121 | GenTree* stmt = block->bbTreeList->gtPrev; |
15122 | |
15123 | noway_assert(stmt->gtNext == nullptr); |
15124 | |
15125 | if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL) |
15126 | { |
15127 | noway_assert(fgRemoveRestOfBlock); |
15128 | |
15129 | /* Unconditional throw - transform the basic block into a BBJ_THROW */ |
15130 | fgConvertBBToThrowBB(block); |
15131 | |
15132 | /* Remove 'block' from the predecessor list of 'block->bbNext' */ |
15133 | fgRemoveRefPred(block->bbNext, block); |
15134 | |
15135 | /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */ |
15136 | fgRemoveRefPred(block->bbJumpDest, block); |
15137 | |
15138 | #ifdef DEBUG |
15139 | if (verbose) |
15140 | { |
15141 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
15142 | printf(FMT_BB " becomes a BBJ_THROW\n" , block->bbNum); |
15143 | } |
15144 | #endif |
15145 | goto DONE_COND; |
15146 | } |
15147 | |
15148 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); |
15149 | |
15150 | /* Did we fold the conditional */ |
15151 | |
15152 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1); |
15153 | GenTree* cond; |
15154 | cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1; |
15155 | |
15156 | if (cond->OperKind() & GTK_CONST) |
15157 | { |
15158 | /* Yupee - we folded the conditional! |
15159 | * Remove the conditional statement */ |
15160 | |
15161 | noway_assert(cond->gtOper == GT_CNS_INT); |
15162 | noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0)); |
15163 | |
15164 | /* remove the statement from bbTreelist - No need to update |
15165 | * the reference counts since there are no lcl vars */ |
15166 | fgRemoveStmt(block, stmt); |
15167 | |
15168 | // block is a BBJ_COND that we are folding the conditional for |
15169 | // bTaken is the path that will always be taken from block |
15170 | // bNotTaken is the path that will never be taken from block |
15171 | // |
15172 | BasicBlock* bTaken; |
15173 | BasicBlock* bNotTaken; |
15174 | |
15175 | if (cond->gtIntCon.gtIconVal != 0) |
15176 | { |
15177 | /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */ |
15178 | block->bbJumpKind = BBJ_ALWAYS; |
15179 | bTaken = block->bbJumpDest; |
15180 | bNotTaken = block->bbNext; |
15181 | } |
15182 | else |
15183 | { |
15184 | /* Unmark the loop if we are removing a backwards branch */ |
15185 | /* dest block must also be marked as a loop head and */ |
15186 | /* We must be able to reach the backedge block */ |
15187 | if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && |
15188 | fgReachable(block->bbJumpDest, block)) |
15189 | { |
15190 | optUnmarkLoopBlocks(block->bbJumpDest, block); |
15191 | } |
15192 | |
15193 | /* JTRUE 0 - transform the basic block into a BBJ_NONE */ |
15194 | block->bbJumpKind = BBJ_NONE; |
15195 | noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL)); |
15196 | bTaken = block->bbNext; |
15197 | bNotTaken = block->bbJumpDest; |
15198 | } |
15199 | |
15200 | if (fgHaveValidEdgeWeights) |
15201 | { |
15202 | // We are removing an edge from block to bNotTaken |
15203 | // and we have already computed the edge weights, so |
15204 | // we will try to adjust some of the weights |
15205 | // |
15206 | flowList* edgeTaken = fgGetPredForBlock(bTaken, block); |
15207 | BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block |
15208 | |
15209 | // We examine the taken edge (block -> bTaken) |
15210 | // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight |
15211 | // else if bTaken has valid profile weight and block does not we try to adjust block's weight |
15212 | // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken |
15213 | // |
15214 | if (block->hasProfileWeight()) |
15215 | { |
15216 | // The edge weights for (block -> bTaken) are 100% of block's weight |
15217 | edgeTaken->flEdgeWeightMin = block->bbWeight; |
15218 | edgeTaken->flEdgeWeightMax = block->bbWeight; |
15219 | |
15220 | if (!bTaken->hasProfileWeight()) |
15221 | { |
15222 | if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight)) |
15223 | { |
15224 | // Update the weight of bTaken |
15225 | bTaken->inheritWeight(block); |
15226 | bUpdated = bTaken; |
15227 | } |
15228 | } |
15229 | } |
15230 | else if (bTaken->hasProfileWeight()) |
15231 | { |
15232 | if (bTaken->countOfInEdges() == 1) |
15233 | { |
15234 | // There is only one in edge to bTaken |
15235 | edgeTaken->flEdgeWeightMin = bTaken->bbWeight; |
15236 | edgeTaken->flEdgeWeightMax = bTaken->bbWeight; |
15237 | |
15238 | // Update the weight of block |
15239 | block->inheritWeight(bTaken); |
15240 | bUpdated = block; |
15241 | } |
15242 | } |
15243 | |
15244 | if (bUpdated != nullptr) |
15245 | { |
15246 | flowList* edge; |
15247 | // Now fix the weights of the edges out of 'bUpdated' |
15248 | switch (bUpdated->bbJumpKind) |
15249 | { |
15250 | case BBJ_NONE: |
15251 | edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); |
15252 | edge->flEdgeWeightMax = bUpdated->bbWeight; |
15253 | break; |
15254 | case BBJ_COND: |
15255 | edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); |
15256 | edge->flEdgeWeightMax = bUpdated->bbWeight; |
15257 | __fallthrough; |
15258 | case BBJ_ALWAYS: |
15259 | edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated); |
15260 | edge->flEdgeWeightMax = bUpdated->bbWeight; |
15261 | break; |
15262 | default: |
15263 | // We don't handle BBJ_SWITCH |
15264 | break; |
15265 | } |
15266 | } |
15267 | } |
15268 | |
15269 | /* modify the flow graph */ |
15270 | |
15271 | /* Remove 'block' from the predecessor list of 'bNotTaken' */ |
15272 | fgRemoveRefPred(bNotTaken, block); |
15273 | |
15274 | #ifdef DEBUG |
15275 | if (verbose) |
15276 | { |
15277 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
15278 | printf(FMT_BB " becomes a %s" , block->bbNum, |
15279 | block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE" ); |
15280 | if (block->bbJumpKind == BBJ_ALWAYS) |
15281 | { |
15282 | printf(" to " FMT_BB, block->bbJumpDest->bbNum); |
15283 | } |
15284 | printf("\n" ); |
15285 | } |
15286 | #endif |
15287 | |
15288 | /* if the block was a loop condition we may have to modify |
15289 | * the loop table */ |
15290 | |
15291 | for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) |
15292 | { |
15293 | /* Some loops may have been already removed by |
15294 | * loop unrolling or conditional folding */ |
15295 | |
15296 | if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) |
15297 | { |
15298 | continue; |
15299 | } |
15300 | |
15301 | /* We are only interested in the loop bottom */ |
15302 | |
15303 | if (optLoopTable[loopNum].lpBottom == block) |
15304 | { |
15305 | if (cond->gtIntCon.gtIconVal == 0) |
15306 | { |
15307 | /* This was a bogus loop (condition always false) |
15308 | * Remove the loop from the table */ |
15309 | |
15310 | optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; |
15311 | #ifdef DEBUG |
15312 | if (verbose) |
15313 | { |
15314 | printf("Removing loop L%02u (from " FMT_BB " to " FMT_BB ")\n\n" , loopNum, |
15315 | optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum); |
15316 | } |
15317 | #endif |
15318 | } |
15319 | } |
15320 | } |
15321 | DONE_COND: |
15322 | result = true; |
15323 | } |
15324 | } |
15325 | else if (block->bbJumpKind == BBJ_SWITCH) |
15326 | { |
15327 | noway_assert(block->bbTreeList && block->bbTreeList->gtPrev); |
15328 | |
15329 | GenTree* stmt = block->bbTreeList->gtPrev; |
15330 | |
15331 | noway_assert(stmt->gtNext == nullptr); |
15332 | |
15333 | if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL) |
15334 | { |
15335 | noway_assert(fgRemoveRestOfBlock); |
15336 | |
15337 | /* Unconditional throw - transform the basic block into a BBJ_THROW */ |
15338 | fgConvertBBToThrowBB(block); |
15339 | |
15340 | /* update the flow graph */ |
15341 | |
15342 | unsigned jumpCnt = block->bbJumpSwt->bbsCount; |
15343 | BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab; |
15344 | |
15345 | for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) |
15346 | { |
15347 | BasicBlock* curJump = *jumpTab; |
15348 | |
15349 | /* Remove 'block' from the predecessor list of 'curJump' */ |
15350 | fgRemoveRefPred(curJump, block); |
15351 | } |
15352 | |
15353 | #ifdef DEBUG |
15354 | if (verbose) |
15355 | { |
15356 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
15357 | printf(FMT_BB " becomes a BBJ_THROW\n" , block->bbNum); |
15358 | } |
15359 | #endif |
15360 | goto DONE_SWITCH; |
15361 | } |
15362 | |
15363 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH); |
15364 | |
15365 | /* Did we fold the conditional */ |
15366 | |
15367 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1); |
15368 | GenTree* cond; |
15369 | cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1; |
15370 | |
15371 | if (cond->OperKind() & GTK_CONST) |
15372 | { |
15373 | /* Yupee - we folded the conditional! |
15374 | * Remove the conditional statement */ |
15375 | |
15376 | noway_assert(cond->gtOper == GT_CNS_INT); |
15377 | |
15378 | /* remove the statement from bbTreelist - No need to update |
15379 | * the reference counts since there are no lcl vars */ |
15380 | fgRemoveStmt(block, stmt); |
15381 | |
15382 | /* modify the flow graph */ |
15383 | |
15384 | /* Find the actual jump target */ |
15385 | unsigned switchVal; |
15386 | switchVal = (unsigned)cond->gtIntCon.gtIconVal; |
15387 | unsigned jumpCnt; |
15388 | jumpCnt = block->bbJumpSwt->bbsCount; |
15389 | BasicBlock** jumpTab; |
15390 | jumpTab = block->bbJumpSwt->bbsDstTab; |
15391 | bool foundVal; |
15392 | foundVal = false; |
15393 | |
15394 | for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) |
15395 | { |
15396 | BasicBlock* curJump = *jumpTab; |
15397 | |
15398 | assert(curJump->countOfInEdges() > 0); |
15399 | |
15400 | // If val matches switchVal or we are at the last entry and |
15401 | // we never found the switch value then set the new jump dest |
15402 | |
15403 | if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1))) |
15404 | { |
15405 | if (curJump != block->bbNext) |
15406 | { |
15407 | /* transform the basic block into a BBJ_ALWAYS */ |
15408 | block->bbJumpKind = BBJ_ALWAYS; |
15409 | block->bbJumpDest = curJump; |
15410 | |
15411 | // if we are jumping backwards, make sure we have a GC Poll. |
15412 | if (curJump->bbNum > block->bbNum) |
15413 | { |
15414 | block->bbFlags &= ~BBF_NEEDS_GCPOLL; |
15415 | } |
15416 | } |
15417 | else |
15418 | { |
15419 | /* transform the basic block into a BBJ_NONE */ |
15420 | block->bbJumpKind = BBJ_NONE; |
15421 | block->bbFlags &= ~BBF_NEEDS_GCPOLL; |
15422 | } |
15423 | foundVal = true; |
15424 | } |
15425 | else |
15426 | { |
15427 | /* Remove 'block' from the predecessor list of 'curJump' */ |
15428 | fgRemoveRefPred(curJump, block); |
15429 | } |
15430 | } |
15431 | #ifdef DEBUG |
15432 | if (verbose) |
15433 | { |
15434 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
15435 | printf(FMT_BB " becomes a %s" , block->bbNum, |
15436 | block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE" ); |
15437 | if (block->bbJumpKind == BBJ_ALWAYS) |
15438 | { |
15439 | printf(" to " FMT_BB, block->bbJumpDest->bbNum); |
15440 | } |
15441 | printf("\n" ); |
15442 | } |
15443 | #endif |
15444 | DONE_SWITCH: |
15445 | result = true; |
15446 | } |
15447 | } |
15448 | return result; |
15449 | } |
15450 | |
15451 | //***************************************************************************** |
15452 | // |
15453 | // Morphs a single statement in a block. |
15454 | // Can be called anytime, unlike fgMorphStmts() which should only be called once. |
15455 | // |
15456 | // Returns true if 'stmt' was removed from the block. |
15457 | // Returns false if 'stmt' is still in the block (even if other statements were removed). |
15458 | // |
15459 | |
15460 | bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg)) |
15461 | { |
15462 | assert(block != nullptr); |
15463 | assert(stmt != nullptr); |
15464 | |
15465 | compCurBB = block; |
15466 | compCurStmt = stmt; |
15467 | |
15468 | GenTree* morph = fgMorphTree(stmt->gtStmtExpr); |
15469 | |
15470 | // Bug 1106830 - During the CSE phase we can't just remove |
15471 | // morph->gtOp.gtOp2 as it could contain CSE expressions. |
15472 | // This leads to a noway_assert in OptCSE.cpp when |
15473 | // searching for the removed CSE ref. (using gtFindLink) |
15474 | // |
15475 | if (!optValnumCSE_phase) |
15476 | { |
15477 | // Check for morph as a GT_COMMA with an unconditional throw |
15478 | if (fgIsCommaThrow(morph, true)) |
15479 | { |
15480 | #ifdef DEBUG |
15481 | if (verbose) |
15482 | { |
15483 | printf("Folding a top-level fgIsCommaThrow stmt\n" ); |
15484 | printf("Removing op2 as unreachable:\n" ); |
15485 | gtDispTree(morph->gtOp.gtOp2); |
15486 | printf("\n" ); |
15487 | } |
15488 | #endif |
15489 | // Use the call as the new stmt |
15490 | morph = morph->gtOp.gtOp1; |
15491 | noway_assert(morph->gtOper == GT_CALL); |
15492 | } |
15493 | |
15494 | // we can get a throw as a statement root |
15495 | if (fgIsThrow(morph)) |
15496 | { |
15497 | #ifdef DEBUG |
15498 | if (verbose) |
15499 | { |
15500 | printf("We have a top-level fgIsThrow stmt\n" ); |
15501 | printf("Removing the rest of block as unreachable:\n" ); |
15502 | } |
15503 | #endif |
15504 | noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); |
15505 | fgRemoveRestOfBlock = true; |
15506 | } |
15507 | } |
15508 | |
15509 | stmt->gtStmtExpr = morph; |
15510 | |
15511 | // Can the entire tree be removed? |
15512 | bool removedStmt = false; |
15513 | |
15514 | // Defer removing statements during CSE so we don't inadvertently remove any CSE defs. |
15515 | if (!optValnumCSE_phase) |
15516 | { |
15517 | removedStmt = fgCheckRemoveStmt(block, stmt); |
15518 | } |
15519 | |
15520 | // Or this is the last statement of a conditional branch that was just folded? |
15521 | if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock) |
15522 | { |
15523 | if (fgFoldConditional(block)) |
15524 | { |
15525 | if (block->bbJumpKind != BBJ_THROW) |
15526 | { |
15527 | removedStmt = true; |
15528 | } |
15529 | } |
15530 | } |
15531 | |
15532 | if (!removedStmt) |
15533 | { |
15534 | // Have to re-do the evaluation order since for example some later code does not expect constants as op1 |
15535 | gtSetStmtInfo(stmt); |
15536 | |
15537 | // Have to re-link the nodes for this statement |
15538 | fgSetStmtSeq(stmt); |
15539 | } |
15540 | |
15541 | #ifdef DEBUG |
15542 | if (verbose) |
15543 | { |
15544 | printf("%s %s tree:\n" , msg, (removedStmt ? "removed" : "morphed" )); |
15545 | gtDispTree(morph); |
15546 | printf("\n" ); |
15547 | } |
15548 | #endif |
15549 | |
15550 | if (fgRemoveRestOfBlock) |
15551 | { |
15552 | // Remove the rest of the stmts in the block |
15553 | for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt()) |
15554 | { |
15555 | fgRemoveStmt(block, stmt); |
15556 | } |
15557 | |
15558 | // The rest of block has been removed and we will always throw an exception. |
15559 | |
15560 | // Update succesors of block |
15561 | fgRemoveBlockAsPred(block); |
15562 | |
15563 | // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE. |
15564 | // We should not convert it to a ThrowBB. |
15565 | if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0)) |
15566 | { |
15567 | // Convert block to a throw bb |
15568 | fgConvertBBToThrowBB(block); |
15569 | } |
15570 | |
15571 | #ifdef DEBUG |
15572 | if (verbose) |
15573 | { |
15574 | printf("\n%s Block " FMT_BB " becomes a throw block.\n" , msg, block->bbNum); |
15575 | } |
15576 | #endif |
15577 | fgRemoveRestOfBlock = false; |
15578 | } |
15579 | |
15580 | return removedStmt; |
15581 | } |
15582 | |
15583 | /***************************************************************************** |
15584 | * |
15585 | * Morph the statements of the given block. |
15586 | * This function should be called just once for a block. Use fgMorphBlockStmt() |
15587 | * for reentrant calls. |
15588 | */ |
15589 | |
15590 | void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw) |
15591 | { |
15592 | fgRemoveRestOfBlock = false; |
15593 | |
15594 | /* Make the current basic block address available globally */ |
15595 | |
15596 | compCurBB = block; |
15597 | |
15598 | *lnot = *loadw = false; |
15599 | |
15600 | fgCurrentlyInUseArgTemps = hashBv::Create(this); |
15601 | |
15602 | GenTreeStmt* stmt = block->firstStmt(); |
15603 | GenTree* prev = nullptr; |
15604 | for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt) |
15605 | { |
15606 | assert(stmt->gtOper == GT_STMT); |
15607 | |
15608 | if (fgRemoveRestOfBlock) |
15609 | { |
15610 | fgRemoveStmt(block, stmt); |
15611 | continue; |
15612 | } |
15613 | #ifdef FEATURE_SIMD |
15614 | if (opts.OptimizationEnabled() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && |
15615 | stmt->gtStmtExpr->OperGet() == GT_ASG) |
15616 | { |
15617 | fgMorphCombineSIMDFieldAssignments(block, stmt); |
15618 | } |
15619 | #endif |
15620 | |
15621 | fgMorphStmt = stmt; |
15622 | compCurStmt = stmt; |
15623 | GenTree* tree = stmt->gtStmtExpr; |
15624 | |
15625 | #ifdef DEBUG |
15626 | compCurStmtNum++; |
15627 | if (stmt == block->bbTreeList) |
15628 | { |
15629 | block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum |
15630 | } |
15631 | |
15632 | unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0); |
15633 | |
15634 | if (verbose) |
15635 | { |
15636 | printf("\nfgMorphTree " FMT_BB ", stmt %d (before)\n" , block->bbNum, compCurStmtNum); |
15637 | gtDispTree(tree); |
15638 | } |
15639 | #endif |
15640 | |
15641 | /* Morph this statement tree */ |
15642 | |
15643 | GenTree* morph = fgMorphTree(tree); |
15644 | |
15645 | // mark any outgoing arg temps as free so we can reuse them in the next statement. |
15646 | |
15647 | fgCurrentlyInUseArgTemps->ZeroAll(); |
15648 | |
15649 | // Has fgMorphStmt been sneakily changed ? |
15650 | |
15651 | if (stmt->gtStmtExpr != tree) |
15652 | { |
15653 | /* This must be tailcall. Ignore 'morph' and carry on with |
15654 | the tail-call node */ |
15655 | |
15656 | morph = stmt->gtStmtExpr; |
15657 | noway_assert(compTailCallUsed); |
15658 | noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall()); |
15659 | noway_assert(stmt->gtNextStmt == nullptr); |
15660 | |
15661 | GenTreeCall* call = morph->AsCall(); |
15662 | // Could either be |
15663 | // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or |
15664 | // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing |
15665 | // a jmp. |
15666 | noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || |
15667 | (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && |
15668 | (compCurBB->bbFlags & BBF_HAS_JMP))); |
15669 | } |
15670 | else if (block != compCurBB) |
15671 | { |
15672 | /* This must be a tail call that caused a GCPoll to get |
15673 | injected. We haven't actually morphed the call yet |
15674 | but the flag still got set, clear it here... */ |
15675 | CLANG_FORMAT_COMMENT_ANCHOR; |
15676 | |
15677 | #ifdef DEBUG |
15678 | tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
15679 | #endif |
15680 | |
15681 | noway_assert(compTailCallUsed); |
15682 | noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall()); |
15683 | noway_assert(stmt->gtNextStmt == nullptr); |
15684 | |
15685 | GenTreeCall* call = morph->AsCall(); |
15686 | |
15687 | // Could either be |
15688 | // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or |
15689 | // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing |
15690 | // a jmp. |
15691 | noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || |
15692 | (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && |
15693 | (compCurBB->bbFlags & BBF_HAS_JMP))); |
15694 | } |
15695 | |
15696 | #ifdef DEBUG |
15697 | if (compStressCompile(STRESS_CLONE_EXPR, 30)) |
15698 | { |
15699 | // Clone all the trees to stress gtCloneExpr() |
15700 | |
15701 | if (verbose) |
15702 | { |
15703 | printf("\nfgMorphTree (stressClone from):\n" ); |
15704 | gtDispTree(morph); |
15705 | } |
15706 | |
15707 | morph = gtCloneExpr(morph); |
15708 | noway_assert(morph); |
15709 | |
15710 | if (verbose) |
15711 | { |
15712 | printf("\nfgMorphTree (stressClone to):\n" ); |
15713 | gtDispTree(morph); |
15714 | } |
15715 | } |
15716 | |
15717 | /* If the hash value changes. we modified the tree during morphing */ |
15718 | if (verbose) |
15719 | { |
15720 | unsigned newHash = gtHashValue(morph); |
15721 | if (newHash != oldHash) |
15722 | { |
15723 | printf("\nfgMorphTree " FMT_BB ", stmt %d (after)\n" , block->bbNum, compCurStmtNum); |
15724 | gtDispTree(morph); |
15725 | } |
15726 | } |
15727 | #endif |
15728 | |
15729 | /* Check for morph as a GT_COMMA with an unconditional throw */ |
15730 | if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true)) |
15731 | { |
15732 | /* Use the call as the new stmt */ |
15733 | morph = morph->gtOp.gtOp1; |
15734 | noway_assert(morph->gtOper == GT_CALL); |
15735 | noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); |
15736 | |
15737 | fgRemoveRestOfBlock = true; |
15738 | } |
15739 | |
15740 | stmt->gtStmtExpr = tree = morph; |
15741 | |
15742 | if (fgRemoveRestOfBlock) |
15743 | { |
15744 | continue; |
15745 | } |
15746 | |
15747 | /* Has the statement been optimized away */ |
15748 | |
15749 | if (fgCheckRemoveStmt(block, stmt)) |
15750 | { |
15751 | continue; |
15752 | } |
15753 | |
15754 | /* Check if this block ends with a conditional branch that can be folded */ |
15755 | |
15756 | if (fgFoldConditional(block)) |
15757 | { |
15758 | continue; |
15759 | } |
15760 | |
15761 | if (ehBlockHasExnFlowDsc(block)) |
15762 | { |
15763 | continue; |
15764 | } |
15765 | } |
15766 | |
15767 | if (fgRemoveRestOfBlock) |
15768 | { |
15769 | if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH)) |
15770 | { |
15771 | GenTree* first = block->bbTreeList; |
15772 | noway_assert(first); |
15773 | GenTree* last = first->gtPrev; |
15774 | noway_assert(last && last->gtNext == nullptr); |
15775 | GenTree* lastStmt = last->gtStmt.gtStmtExpr; |
15776 | |
15777 | if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) || |
15778 | ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH))) |
15779 | { |
15780 | GenTree* op1 = lastStmt->gtOp.gtOp1; |
15781 | |
15782 | if (op1->OperKind() & GTK_RELOP) |
15783 | { |
15784 | /* Unmark the comparison node with GTF_RELOP_JMP_USED */ |
15785 | op1->gtFlags &= ~GTF_RELOP_JMP_USED; |
15786 | } |
15787 | |
15788 | last->gtStmt.gtStmtExpr = fgMorphTree(op1); |
15789 | } |
15790 | } |
15791 | |
15792 | /* Mark block as a BBJ_THROW block */ |
15793 | fgConvertBBToThrowBB(block); |
15794 | } |
15795 | |
15796 | #if FEATURE_FASTTAILCALL |
15797 | GenTree* recursiveTailCall = nullptr; |
15798 | if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall)) |
15799 | { |
15800 | fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall()); |
15801 | } |
15802 | #endif |
15803 | |
15804 | #ifdef DEBUG |
15805 | compCurBB = (BasicBlock*)INVALID_POINTER_VALUE; |
15806 | #endif |
15807 | |
15808 | // Reset this back so that it doesn't leak out impacting other blocks |
15809 | fgRemoveRestOfBlock = false; |
15810 | } |
15811 | |
15812 | /***************************************************************************** |
15813 | * |
15814 | * Morph the blocks of the method. |
15815 | * Returns true if the basic block list is modified. |
15816 | * This function should be called just once. |
15817 | */ |
15818 | |
15819 | void Compiler::fgMorphBlocks() |
15820 | { |
15821 | #ifdef DEBUG |
15822 | if (verbose) |
15823 | { |
15824 | printf("\n*************** In fgMorphBlocks()\n" ); |
15825 | } |
15826 | #endif |
15827 | |
15828 | /* Since fgMorphTree can be called after various optimizations to re-arrange |
15829 | * the nodes we need a global flag to signal if we are during the one-pass |
15830 | * global morphing */ |
15831 | |
15832 | fgGlobalMorph = true; |
15833 | |
15834 | #if LOCAL_ASSERTION_PROP |
15835 | // |
15836 | // Local assertion prop is enabled if we are optimized |
15837 | // |
15838 | optLocalAssertionProp = opts.OptimizationEnabled(); |
15839 | |
15840 | if (optLocalAssertionProp) |
15841 | { |
15842 | // |
15843 | // Initialize for local assertion prop |
15844 | // |
15845 | optAssertionInit(true); |
15846 | } |
15847 | #elif ASSERTION_PROP |
15848 | // |
15849 | // If LOCAL_ASSERTION_PROP is not set |
15850 | // and we have global assertion prop |
15851 | // then local assertion prop is always off |
15852 | // |
15853 | optLocalAssertionProp = false; |
15854 | |
15855 | #endif |
15856 | |
15857 | /*------------------------------------------------------------------------- |
15858 | * Process all basic blocks in the function |
15859 | */ |
15860 | |
15861 | BasicBlock* block = fgFirstBB; |
15862 | noway_assert(block); |
15863 | |
15864 | #ifdef DEBUG |
15865 | compCurStmtNum = 0; |
15866 | #endif |
15867 | |
15868 | do |
15869 | { |
15870 | #if OPT_BOOL_OPS |
15871 | bool lnot = false; |
15872 | #endif |
15873 | |
15874 | bool loadw = false; |
15875 | |
15876 | #ifdef DEBUG |
15877 | if (verbose) |
15878 | { |
15879 | printf("\nMorphing " FMT_BB " of '%s'\n" , block->bbNum, info.compFullName); |
15880 | } |
15881 | #endif |
15882 | |
15883 | #if LOCAL_ASSERTION_PROP |
15884 | if (optLocalAssertionProp) |
15885 | { |
15886 | // |
15887 | // Clear out any currently recorded assertion candidates |
15888 | // before processing each basic block, |
15889 | // also we must handle QMARK-COLON specially |
15890 | // |
15891 | optAssertionReset(0); |
15892 | } |
15893 | #endif |
15894 | |
15895 | /* Process all statement trees in the basic block */ |
15896 | |
15897 | fgMorphStmts(block, &lnot, &loadw); |
15898 | |
15899 | /* Are we using a single return block? */ |
15900 | |
15901 | if (block->bbJumpKind == BBJ_RETURN) |
15902 | { |
15903 | if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0)) |
15904 | { |
15905 | |
15906 | // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN. |
15907 | // For example a method returning void could have an empty block with jump kind BBJ_RETURN. |
15908 | // Such blocks do materialize as part of in-lining. |
15909 | // |
15910 | // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN. |
15911 | // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC. |
15912 | // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal |
15913 | // is BAD_VAR_NUM. |
15914 | // |
15915 | // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN. |
15916 | |
15917 | GenTree* last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr; |
15918 | GenTree* ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr; |
15919 | |
15920 | if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0)) |
15921 | { |
15922 | // This return was generated during epilog merging, so leave it alone |
15923 | } |
15924 | else |
15925 | { |
15926 | /* We'll jump to the genReturnBB */ |
15927 | CLANG_FORMAT_COMMENT_ANCHOR; |
15928 | |
15929 | #if !defined(_TARGET_X86_) |
15930 | if (info.compFlags & CORINFO_FLG_SYNCH) |
15931 | { |
15932 | fgConvertSyncReturnToLeave(block); |
15933 | } |
15934 | else |
15935 | #endif // !_TARGET_X86_ |
15936 | { |
15937 | block->bbJumpKind = BBJ_ALWAYS; |
15938 | block->bbJumpDest = genReturnBB; |
15939 | fgReturnCount--; |
15940 | } |
15941 | if (genReturnLocal != BAD_VAR_NUM) |
15942 | { |
15943 | // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal. |
15944 | |
15945 | // Method must be returning a value other than TYP_VOID. |
15946 | noway_assert(compMethodHasRetVal()); |
15947 | |
15948 | // This block must be ending with a GT_RETURN |
15949 | noway_assert(last != nullptr); |
15950 | noway_assert(last->gtOper == GT_STMT); |
15951 | noway_assert(last->gtNext == nullptr); |
15952 | noway_assert(ret != nullptr); |
15953 | |
15954 | // GT_RETURN must have non-null operand as the method is returning the value assigned to |
15955 | // genReturnLocal |
15956 | noway_assert(ret->OperGet() == GT_RETURN); |
15957 | noway_assert(ret->gtGetOp1() != nullptr); |
15958 | |
15959 | GenTree* pAfterStatement = last; |
15960 | IL_OFFSETX offset = last->AsStmt()->gtStmtILoffsx; |
15961 | GenTree* tree = |
15962 | gtNewTempAssign(genReturnLocal, ret->gtGetOp1(), &pAfterStatement, offset, block); |
15963 | if (tree->OperIsCopyBlkOp()) |
15964 | { |
15965 | tree = fgMorphCopyBlock(tree); |
15966 | } |
15967 | |
15968 | if (pAfterStatement == last) |
15969 | { |
15970 | last->gtStmt.gtStmtExpr = tree; |
15971 | } |
15972 | else |
15973 | { |
15974 | // gtNewTempAssign inserted additional statements after last |
15975 | fgRemoveStmt(block, last); |
15976 | last = fgInsertStmtAfter(block, pAfterStatement, gtNewStmt(tree, offset)); |
15977 | } |
15978 | |
15979 | // make sure that copy-prop ignores this assignment. |
15980 | last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE; |
15981 | } |
15982 | else if (ret != nullptr && ret->OperGet() == GT_RETURN) |
15983 | { |
15984 | // This block ends with a GT_RETURN |
15985 | noway_assert(last != nullptr); |
15986 | noway_assert(last->gtOper == GT_STMT); |
15987 | noway_assert(last->gtNext == nullptr); |
15988 | |
15989 | // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn |
15990 | // block |
15991 | noway_assert(ret->TypeGet() == TYP_VOID); |
15992 | noway_assert(ret->gtGetOp1() == nullptr); |
15993 | |
15994 | fgRemoveStmt(block, last); |
15995 | } |
15996 | #ifdef DEBUG |
15997 | if (verbose) |
15998 | { |
15999 | printf("morph " FMT_BB " to point at onereturn. New block is\n" , block->bbNum); |
16000 | fgTableDispBasicBlock(block); |
16001 | } |
16002 | #endif |
16003 | } |
16004 | } |
16005 | } |
16006 | block = block->bbNext; |
16007 | } while (block); |
16008 | |
16009 | /* We are done with the global morphing phase */ |
16010 | |
16011 | fgGlobalMorph = false; |
16012 | |
16013 | #ifdef DEBUG |
16014 | if (verboseTrees) |
16015 | { |
16016 | fgDispBasicBlocks(true); |
16017 | } |
16018 | #endif |
16019 | } |
16020 | |
16021 | /***************************************************************************** |
16022 | * |
16023 | * Make some decisions about the kind of code to generate. |
16024 | */ |
16025 | |
16026 | void Compiler::fgSetOptions() |
16027 | { |
16028 | #ifdef DEBUG |
16029 | /* Should we force fully interruptible code ? */ |
16030 | if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30)) |
16031 | { |
16032 | noway_assert(!codeGen->isGCTypeFixed()); |
16033 | genInterruptible = true; |
16034 | } |
16035 | #endif |
16036 | |
16037 | if (opts.compDbgCode) |
16038 | { |
16039 | assert(!codeGen->isGCTypeFixed()); |
16040 | genInterruptible = true; // debugging is easier this way ... |
16041 | } |
16042 | |
16043 | /* Assume we won't need an explicit stack frame if this is allowed */ |
16044 | |
16045 | // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of |
16046 | // the callee-saved registers. |
16047 | noway_assert(!compTailCallUsed || !compLocallocUsed); |
16048 | |
16049 | if (compLocallocUsed) |
16050 | { |
16051 | codeGen->setFramePointerRequired(true); |
16052 | } |
16053 | |
16054 | #ifdef _TARGET_X86_ |
16055 | |
16056 | if (compTailCallUsed) |
16057 | codeGen->setFramePointerRequired(true); |
16058 | |
16059 | #endif // _TARGET_X86_ |
16060 | |
16061 | if (!opts.genFPopt) |
16062 | { |
16063 | codeGen->setFramePointerRequired(true); |
16064 | } |
16065 | |
16066 | // Assert that the EH table has been initialized by now. Note that |
16067 | // compHndBBtabAllocCount never decreases; it is a high-water mark |
16068 | // of table allocation. In contrast, compHndBBtabCount does shrink |
16069 | // if we delete a dead EH region, and if it shrinks to zero, the |
16070 | // table pointer compHndBBtab is unreliable. |
16071 | assert(compHndBBtabAllocCount >= info.compXcptnsCount); |
16072 | |
16073 | #ifdef _TARGET_X86_ |
16074 | |
16075 | // Note: this case, and the !X86 case below, should both use the |
16076 | // !X86 path. This would require a few more changes for X86 to use |
16077 | // compHndBBtabCount (the current number of EH clauses) instead of |
16078 | // info.compXcptnsCount (the number of EH clauses in IL), such as |
16079 | // in ehNeedsShadowSPslots(). This is because sometimes the IL has |
16080 | // an EH clause that we delete as statically dead code before we |
16081 | // get here, leaving no EH clauses left, and thus no requirement |
16082 | // to use a frame pointer because of EH. But until all the code uses |
16083 | // the same test, leave info.compXcptnsCount here. |
16084 | if (info.compXcptnsCount > 0) |
16085 | { |
16086 | codeGen->setFramePointerRequiredEH(true); |
16087 | } |
16088 | |
16089 | #else // !_TARGET_X86_ |
16090 | |
16091 | if (compHndBBtabCount > 0) |
16092 | { |
16093 | codeGen->setFramePointerRequiredEH(true); |
16094 | } |
16095 | |
16096 | #endif // _TARGET_X86_ |
16097 | |
16098 | #ifdef UNIX_X86_ABI |
16099 | if (info.compXcptnsCount > 0) |
16100 | { |
16101 | assert(!codeGen->isGCTypeFixed()); |
16102 | // Enforce fully interruptible codegen for funclet unwinding |
16103 | genInterruptible = true; |
16104 | } |
16105 | #endif // UNIX_X86_ABI |
16106 | |
16107 | if (info.compCallUnmanaged) |
16108 | { |
16109 | codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame |
16110 | } |
16111 | |
16112 | if (info.compPublishStubParam) |
16113 | { |
16114 | codeGen->setFramePointerRequiredGCInfo(true); |
16115 | } |
16116 | |
16117 | if (opts.compNeedSecurityCheck) |
16118 | { |
16119 | codeGen->setFramePointerRequiredGCInfo(true); |
16120 | |
16121 | #ifndef JIT32_GCENCODER |
16122 | |
16123 | // The decoder only reports objects in frames with exceptions if the frame |
16124 | // is fully interruptible. |
16125 | // Even if there is no catch or other way to resume execution in this frame |
16126 | // the VM requires the security object to remain alive until later, so |
16127 | // Frames with security objects must be fully interruptible. |
16128 | genInterruptible = true; |
16129 | |
16130 | #endif // JIT32_GCENCODER |
16131 | } |
16132 | |
16133 | if (compIsProfilerHookNeeded()) |
16134 | { |
16135 | codeGen->setFramePointerRequired(true); |
16136 | } |
16137 | |
16138 | if (info.compIsVarArgs) |
16139 | { |
16140 | // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative. |
16141 | codeGen->setFramePointerRequiredGCInfo(true); |
16142 | } |
16143 | |
16144 | if (lvaReportParamTypeArg()) |
16145 | { |
16146 | codeGen->setFramePointerRequiredGCInfo(true); |
16147 | } |
16148 | |
16149 | // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not"); |
16150 | } |
16151 | |
16152 | /*****************************************************************************/ |
16153 | |
16154 | GenTree* Compiler::fgInitThisClass() |
16155 | { |
16156 | noway_assert(!compIsForInlining()); |
16157 | |
16158 | CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd); |
16159 | |
16160 | if (!kind.needsRuntimeLookup) |
16161 | { |
16162 | return fgGetSharedCCtor(info.compClassHnd); |
16163 | } |
16164 | else |
16165 | { |
16166 | #ifdef FEATURE_READYTORUN_COMPILER |
16167 | // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR. |
16168 | if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI)) |
16169 | { |
16170 | CORINFO_RESOLVED_TOKEN resolvedToken; |
16171 | memset(&resolvedToken, 0, sizeof(resolvedToken)); |
16172 | |
16173 | // We are in a shared method body, but maybe we don't need a runtime lookup after all. |
16174 | // This covers the case of a generic method on a non-generic type. |
16175 | if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST)) |
16176 | { |
16177 | resolvedToken.hClass = info.compClassHnd; |
16178 | return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF); |
16179 | } |
16180 | |
16181 | // We need a runtime lookup. |
16182 | GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind); |
16183 | |
16184 | // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static |
16185 | // base of the class that owns the method being compiled". If we're in this method, it means we're not |
16186 | // inlining and there's no ambiguity. |
16187 | return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF, |
16188 | gtNewArgList(ctxTree), &kind); |
16189 | } |
16190 | #endif |
16191 | |
16192 | // Collectible types requires that for shared generic code, if we use the generic context paramter |
16193 | // that we report it. (This is a conservative approach, we could detect some cases particularly when the |
16194 | // context parameter is this that we don't need the eager reporting logic.) |
16195 | lvaGenericsContextUseCount++; |
16196 | |
16197 | switch (kind.runtimeLookupKind) |
16198 | { |
16199 | case CORINFO_LOOKUP_THISOBJ: |
16200 | // This code takes a this pointer; but we need to pass the static method desc to get the right point in |
16201 | // the hierarchy |
16202 | { |
16203 | GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF); |
16204 | // Vtable pointer of this object |
16205 | vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree); |
16206 | vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception |
16207 | GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd); |
16208 | |
16209 | return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewArgList(vtTree, methodHnd)); |
16210 | } |
16211 | |
16212 | case CORINFO_LOOKUP_CLASSPARAM: |
16213 | { |
16214 | GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); |
16215 | return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewArgList(vtTree)); |
16216 | } |
16217 | |
16218 | case CORINFO_LOOKUP_METHODPARAM: |
16219 | { |
16220 | GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); |
16221 | return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, |
16222 | gtNewArgList(gtNewIconNode(0), methHndTree)); |
16223 | } |
16224 | } |
16225 | } |
16226 | |
16227 | noway_assert(!"Unknown LOOKUP_KIND" ); |
16228 | UNREACHABLE(); |
16229 | } |
16230 | |
16231 | #ifdef DEBUG |
16232 | /***************************************************************************** |
16233 | * |
16234 | * Tree walk callback to make sure no GT_QMARK nodes are present in the tree, |
16235 | * except for the allowed ? 1 : 0; pattern. |
16236 | */ |
16237 | Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data) |
16238 | { |
16239 | if ((*tree)->OperGet() == GT_QMARK) |
16240 | { |
16241 | fgCheckQmarkAllowedForm(*tree); |
16242 | } |
16243 | return WALK_CONTINUE; |
16244 | } |
16245 | |
16246 | void Compiler::fgCheckQmarkAllowedForm(GenTree* tree) |
16247 | { |
16248 | assert(tree->OperGet() == GT_QMARK); |
16249 | assert(!"Qmarks beyond morph disallowed." ); |
16250 | } |
16251 | |
16252 | /***************************************************************************** |
16253 | * |
16254 | * Verify that the importer has created GT_QMARK nodes in a way we can |
16255 | * process them. The following is allowed: |
16256 | * |
16257 | * 1. A top level qmark. Top level qmark is of the form: |
16258 | * a) (bool) ? (void) : (void) OR |
16259 | * b) V0N = (bool) ? (type) : (type) |
16260 | * |
16261 | * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child |
16262 | * of either op1 of colon or op2 of colon but not a child of any other |
16263 | * operator. |
16264 | */ |
16265 | void Compiler::fgPreExpandQmarkChecks(GenTree* expr) |
16266 | { |
16267 | GenTree* topQmark = fgGetTopLevelQmark(expr); |
16268 | |
16269 | // If the top level Qmark is null, then scan the tree to make sure |
16270 | // there are no qmarks within it. |
16271 | if (topQmark == nullptr) |
16272 | { |
16273 | fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); |
16274 | } |
16275 | else |
16276 | { |
16277 | // We could probably expand the cond node also, but don't think the extra effort is necessary, |
16278 | // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks. |
16279 | fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr); |
16280 | |
16281 | fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1); |
16282 | fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2); |
16283 | } |
16284 | } |
16285 | #endif // DEBUG |
16286 | |
16287 | /***************************************************************************** |
16288 | * |
16289 | * Get the top level GT_QMARK node in a given "expr", return NULL if such a |
16290 | * node is not present. If the top level GT_QMARK node is assigned to a |
16291 | * GT_LCL_VAR, then return the lcl node in ppDst. |
16292 | * |
16293 | */ |
16294 | GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */) |
16295 | { |
16296 | if (ppDst != nullptr) |
16297 | { |
16298 | *ppDst = nullptr; |
16299 | } |
16300 | |
16301 | GenTree* topQmark = nullptr; |
16302 | if (expr->gtOper == GT_QMARK) |
16303 | { |
16304 | topQmark = expr; |
16305 | } |
16306 | else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR) |
16307 | { |
16308 | topQmark = expr->gtOp.gtOp2; |
16309 | if (ppDst != nullptr) |
16310 | { |
16311 | *ppDst = expr->gtOp.gtOp1; |
16312 | } |
16313 | } |
16314 | return topQmark; |
16315 | } |
16316 | |
16317 | /********************************************************************************* |
16318 | * |
16319 | * For a castclass helper call, |
16320 | * Importer creates the following tree: |
16321 | * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper()); |
16322 | * |
16323 | * This method splits the qmark expression created by the importer into the |
16324 | * following blocks: (block, asg, cond1, cond2, helper, remainder) |
16325 | * Notice that op1 is the result for both the conditions. So we coalesce these |
16326 | * assignments into a single block instead of two blocks resulting a nested diamond. |
16327 | * |
16328 | * +---------->-----------+ |
16329 | * | | | |
16330 | * ^ ^ v |
16331 | * | | | |
16332 | * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder |
16333 | * |
16334 | * We expect to achieve the following codegen: |
16335 | * mov rsi, rdx tmp = op1 // asgBlock |
16336 | * test rsi, rsi goto skip if tmp == null ? // cond1Block |
16337 | * je SKIP |
16338 | * mov rcx, 0x76543210 cns = op2 // cond2Block |
16339 | * cmp qword ptr [rsi], rcx goto skip if *tmp == op2 |
16340 | * je SKIP |
16341 | * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock |
16342 | * mov rsi, rax |
16343 | * SKIP: // remainderBlock |
16344 | * tmp has the result. |
16345 | * |
16346 | */ |
16347 | void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTree* stmt) |
16348 | { |
16349 | #ifdef DEBUG |
16350 | if (verbose) |
16351 | { |
16352 | printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n" , block->bbNum); |
16353 | fgDispBasicBlocks(block, block, true); |
16354 | } |
16355 | #endif // DEBUG |
16356 | |
16357 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
16358 | |
16359 | GenTree* dst = nullptr; |
16360 | GenTree* qmark = fgGetTopLevelQmark(expr, &dst); |
16361 | noway_assert(dst != nullptr); |
16362 | |
16363 | assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF); |
16364 | |
16365 | // Get cond, true, false exprs for the qmark. |
16366 | GenTree* condExpr = qmark->gtGetOp1(); |
16367 | GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); |
16368 | GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); |
16369 | |
16370 | // Get cond, true, false exprs for the nested qmark. |
16371 | GenTree* nestedQmark = falseExpr; |
16372 | GenTree* cond2Expr; |
16373 | GenTree* true2Expr; |
16374 | GenTree* false2Expr; |
16375 | |
16376 | if (nestedQmark->gtOper == GT_QMARK) |
16377 | { |
16378 | cond2Expr = nestedQmark->gtGetOp1(); |
16379 | true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode(); |
16380 | false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode(); |
16381 | |
16382 | assert(cond2Expr->gtFlags & GTF_RELOP_QMARK); |
16383 | cond2Expr->gtFlags &= ~GTF_RELOP_QMARK; |
16384 | } |
16385 | else |
16386 | { |
16387 | // This is a rare case that arises when we are doing minopts and encounter isinst of null |
16388 | // gtFoldExpr was still is able to optimize away part of the tree (but not all). |
16389 | // That means it does not match our pattern. |
16390 | |
16391 | // Rather than write code to handle this case, just fake up some nodes to make it match the common |
16392 | // case. Synthesize a comparison that is always true, and for the result-on-true, use the |
16393 | // entire subtree we expected to be the nested question op. |
16394 | |
16395 | cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL)); |
16396 | true2Expr = nestedQmark; |
16397 | false2Expr = gtNewIconNode(0, TYP_I_IMPL); |
16398 | } |
16399 | assert(false2Expr->OperGet() == trueExpr->OperGet()); |
16400 | |
16401 | // Clear flags as they are now going to be part of JTRUE. |
16402 | assert(condExpr->gtFlags & GTF_RELOP_QMARK); |
16403 | condExpr->gtFlags &= ~GTF_RELOP_QMARK; |
16404 | |
16405 | // Create the chain of blocks. See method header comment. |
16406 | // The order of blocks after this is the following: |
16407 | // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock |
16408 | // |
16409 | // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', |
16410 | // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only |
16411 | // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely |
16412 | // remainderBlock will still be GC safe. |
16413 | unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; |
16414 | BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); |
16415 | fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. |
16416 | |
16417 | BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true); |
16418 | BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true); |
16419 | BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true); |
16420 | BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true); |
16421 | |
16422 | remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; |
16423 | |
16424 | // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). |
16425 | // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. |
16426 | if ((block->bbFlags & BBF_INTERNAL) == 0) |
16427 | { |
16428 | helperBlock->bbFlags &= ~BBF_INTERNAL; |
16429 | cond2Block->bbFlags &= ~BBF_INTERNAL; |
16430 | cond1Block->bbFlags &= ~BBF_INTERNAL; |
16431 | asgBlock->bbFlags &= ~BBF_INTERNAL; |
16432 | helperBlock->bbFlags |= BBF_IMPORTED; |
16433 | cond2Block->bbFlags |= BBF_IMPORTED; |
16434 | cond1Block->bbFlags |= BBF_IMPORTED; |
16435 | asgBlock->bbFlags |= BBF_IMPORTED; |
16436 | } |
16437 | |
16438 | // Chain the flow correctly. |
16439 | fgAddRefPred(asgBlock, block); |
16440 | fgAddRefPred(cond1Block, asgBlock); |
16441 | fgAddRefPred(cond2Block, cond1Block); |
16442 | fgAddRefPred(helperBlock, cond2Block); |
16443 | fgAddRefPred(remainderBlock, helperBlock); |
16444 | fgAddRefPred(remainderBlock, cond1Block); |
16445 | fgAddRefPred(remainderBlock, cond2Block); |
16446 | |
16447 | cond1Block->bbJumpDest = remainderBlock; |
16448 | cond2Block->bbJumpDest = remainderBlock; |
16449 | |
16450 | // Set the weights; some are guesses. |
16451 | asgBlock->inheritWeight(block); |
16452 | cond1Block->inheritWeight(block); |
16453 | cond2Block->inheritWeightPercentage(cond1Block, 50); |
16454 | helperBlock->inheritWeightPercentage(cond2Block, 50); |
16455 | |
16456 | // Append cond1 as JTRUE to cond1Block |
16457 | GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr); |
16458 | GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); |
16459 | fgInsertStmtAtEnd(cond1Block, jmpStmt); |
16460 | |
16461 | // Append cond2 as JTRUE to cond2Block |
16462 | jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr); |
16463 | jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); |
16464 | fgInsertStmtAtEnd(cond2Block, jmpStmt); |
16465 | |
16466 | // AsgBlock should get tmp = op1 assignment. |
16467 | trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr); |
16468 | GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx); |
16469 | fgInsertStmtAtEnd(asgBlock, trueStmt); |
16470 | |
16471 | // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper. |
16472 | gtReverseCond(cond2Expr); |
16473 | GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr); |
16474 | GenTree* helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx); |
16475 | fgInsertStmtAtEnd(helperBlock, helperStmt); |
16476 | |
16477 | // Finally remove the nested qmark stmt. |
16478 | fgRemoveStmt(block, stmt); |
16479 | |
16480 | #ifdef DEBUG |
16481 | if (verbose) |
16482 | { |
16483 | printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n" , block->bbNum); |
16484 | fgDispBasicBlocks(block, remainderBlock, true); |
16485 | } |
16486 | #endif // DEBUG |
16487 | } |
16488 | |
16489 | /***************************************************************************** |
16490 | * |
16491 | * Expand a statement with a top level qmark node. There are three cases, based |
16492 | * on whether the qmark has both "true" and "false" arms, or just one of them. |
16493 | * |
16494 | * S0; |
16495 | * C ? T : F; |
16496 | * S1; |
16497 | * |
16498 | * Generates ===> |
16499 | * |
16500 | * bbj_always |
16501 | * +---->------+ |
16502 | * false | | |
16503 | * S0 -->-- ~C -->-- T F -->-- S1 |
16504 | * | | |
16505 | * +--->--------+ |
16506 | * bbj_cond(true) |
16507 | * |
16508 | * ----------------------------------------- |
16509 | * |
16510 | * S0; |
16511 | * C ? T : NOP; |
16512 | * S1; |
16513 | * |
16514 | * Generates ===> |
16515 | * |
16516 | * false |
16517 | * S0 -->-- ~C -->-- T -->-- S1 |
16518 | * | | |
16519 | * +-->-------------+ |
16520 | * bbj_cond(true) |
16521 | * |
16522 | * ----------------------------------------- |
16523 | * |
16524 | * S0; |
16525 | * C ? NOP : F; |
16526 | * S1; |
16527 | * |
16528 | * Generates ===> |
16529 | * |
16530 | * false |
16531 | * S0 -->-- C -->-- F -->-- S1 |
16532 | * | | |
16533 | * +-->------------+ |
16534 | * bbj_cond(true) |
16535 | * |
16536 | * If the qmark assigns to a variable, then create tmps for "then" |
16537 | * and "else" results and assign the temp to the variable as a writeback step. |
16538 | */ |
16539 | void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTree* stmt) |
16540 | { |
16541 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
16542 | |
16543 | // Retrieve the Qmark node to be expanded. |
16544 | GenTree* dst = nullptr; |
16545 | GenTree* qmark = fgGetTopLevelQmark(expr, &dst); |
16546 | if (qmark == nullptr) |
16547 | { |
16548 | return; |
16549 | } |
16550 | |
16551 | if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF) |
16552 | { |
16553 | fgExpandQmarkForCastInstOf(block, stmt); |
16554 | return; |
16555 | } |
16556 | |
16557 | #ifdef DEBUG |
16558 | if (verbose) |
16559 | { |
16560 | printf("\nExpanding top-level qmark in " FMT_BB " (before)\n" , block->bbNum); |
16561 | fgDispBasicBlocks(block, block, true); |
16562 | } |
16563 | #endif // DEBUG |
16564 | |
16565 | // Retrieve the operands. |
16566 | GenTree* condExpr = qmark->gtGetOp1(); |
16567 | GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); |
16568 | GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); |
16569 | |
16570 | assert(condExpr->gtFlags & GTF_RELOP_QMARK); |
16571 | condExpr->gtFlags &= ~GTF_RELOP_QMARK; |
16572 | |
16573 | assert(!varTypeIsFloating(condExpr->TypeGet())); |
16574 | |
16575 | bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP); |
16576 | bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP); |
16577 | assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark! |
16578 | |
16579 | // Create remainder, cond and "else" blocks. After this, the blocks are in this order: |
16580 | // block ... condBlock ... elseBlock ... remainderBlock |
16581 | // |
16582 | // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', |
16583 | // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only |
16584 | // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely |
16585 | // remainderBlock will still be GC safe. |
16586 | unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; |
16587 | BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); |
16588 | fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. |
16589 | |
16590 | BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true); |
16591 | BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true); |
16592 | |
16593 | // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). |
16594 | // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. |
16595 | if ((block->bbFlags & BBF_INTERNAL) == 0) |
16596 | { |
16597 | condBlock->bbFlags &= ~BBF_INTERNAL; |
16598 | elseBlock->bbFlags &= ~BBF_INTERNAL; |
16599 | condBlock->bbFlags |= BBF_IMPORTED; |
16600 | elseBlock->bbFlags |= BBF_IMPORTED; |
16601 | } |
16602 | |
16603 | remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; |
16604 | |
16605 | condBlock->inheritWeight(block); |
16606 | |
16607 | fgAddRefPred(condBlock, block); |
16608 | fgAddRefPred(elseBlock, condBlock); |
16609 | fgAddRefPred(remainderBlock, elseBlock); |
16610 | |
16611 | BasicBlock* thenBlock = nullptr; |
16612 | if (hasTrueExpr && hasFalseExpr) |
16613 | { |
16614 | // bbj_always |
16615 | // +---->------+ |
16616 | // false | | |
16617 | // S0 -->-- ~C -->-- T F -->-- S1 |
16618 | // | | |
16619 | // +--->--------+ |
16620 | // bbj_cond(true) |
16621 | // |
16622 | gtReverseCond(condExpr); |
16623 | condBlock->bbJumpDest = elseBlock; |
16624 | |
16625 | thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true); |
16626 | thenBlock->bbJumpDest = remainderBlock; |
16627 | if ((block->bbFlags & BBF_INTERNAL) == 0) |
16628 | { |
16629 | thenBlock->bbFlags &= ~BBF_INTERNAL; |
16630 | thenBlock->bbFlags |= BBF_IMPORTED; |
16631 | } |
16632 | |
16633 | elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL); |
16634 | |
16635 | fgAddRefPred(thenBlock, condBlock); |
16636 | fgAddRefPred(remainderBlock, thenBlock); |
16637 | |
16638 | thenBlock->inheritWeightPercentage(condBlock, 50); |
16639 | elseBlock->inheritWeightPercentage(condBlock, 50); |
16640 | } |
16641 | else if (hasTrueExpr) |
16642 | { |
16643 | // false |
16644 | // S0 -->-- ~C -->-- T -->-- S1 |
16645 | // | | |
16646 | // +-->-------------+ |
16647 | // bbj_cond(true) |
16648 | // |
16649 | gtReverseCond(condExpr); |
16650 | condBlock->bbJumpDest = remainderBlock; |
16651 | fgAddRefPred(remainderBlock, condBlock); |
16652 | // Since we have no false expr, use the one we'd already created. |
16653 | thenBlock = elseBlock; |
16654 | elseBlock = nullptr; |
16655 | |
16656 | thenBlock->inheritWeightPercentage(condBlock, 50); |
16657 | } |
16658 | else if (hasFalseExpr) |
16659 | { |
16660 | // false |
16661 | // S0 -->-- C -->-- F -->-- S1 |
16662 | // | | |
16663 | // +-->------------+ |
16664 | // bbj_cond(true) |
16665 | // |
16666 | condBlock->bbJumpDest = remainderBlock; |
16667 | fgAddRefPred(remainderBlock, condBlock); |
16668 | |
16669 | elseBlock->inheritWeightPercentage(condBlock, 50); |
16670 | } |
16671 | |
16672 | GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1()); |
16673 | GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); |
16674 | fgInsertStmtAtEnd(condBlock, jmpStmt); |
16675 | |
16676 | // Remove the original qmark statement. |
16677 | fgRemoveStmt(block, stmt); |
16678 | |
16679 | // Since we have top level qmarks, we either have a dst for it in which case |
16680 | // we need to create tmps for true and falseExprs, else just don't bother |
16681 | // assigning. |
16682 | unsigned lclNum = BAD_VAR_NUM; |
16683 | if (dst != nullptr) |
16684 | { |
16685 | assert(dst->gtOper == GT_LCL_VAR); |
16686 | lclNum = dst->gtLclVar.gtLclNum; |
16687 | } |
16688 | else |
16689 | { |
16690 | assert(qmark->TypeGet() == TYP_VOID); |
16691 | } |
16692 | |
16693 | if (hasTrueExpr) |
16694 | { |
16695 | if (dst != nullptr) |
16696 | { |
16697 | trueExpr = gtNewTempAssign(lclNum, trueExpr); |
16698 | } |
16699 | GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx); |
16700 | fgInsertStmtAtEnd(thenBlock, trueStmt); |
16701 | } |
16702 | |
16703 | // Assign the falseExpr into the dst or tmp, insert in elseBlock |
16704 | if (hasFalseExpr) |
16705 | { |
16706 | if (dst != nullptr) |
16707 | { |
16708 | falseExpr = gtNewTempAssign(lclNum, falseExpr); |
16709 | } |
16710 | GenTree* falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx); |
16711 | fgInsertStmtAtEnd(elseBlock, falseStmt); |
16712 | } |
16713 | |
16714 | #ifdef DEBUG |
16715 | if (verbose) |
16716 | { |
16717 | printf("\nExpanding top-level qmark in " FMT_BB " (after)\n" , block->bbNum); |
16718 | fgDispBasicBlocks(block, remainderBlock, true); |
16719 | } |
16720 | #endif // DEBUG |
16721 | } |
16722 | |
16723 | /***************************************************************************** |
16724 | * |
16725 | * Expand GT_QMARK nodes from the flow graph into basic blocks. |
16726 | * |
16727 | */ |
16728 | |
16729 | void Compiler::fgExpandQmarkNodes() |
16730 | { |
16731 | if (compQmarkUsed) |
16732 | { |
16733 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
16734 | { |
16735 | for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) |
16736 | { |
16737 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
16738 | #ifdef DEBUG |
16739 | fgPreExpandQmarkChecks(expr); |
16740 | #endif |
16741 | fgExpandQmarkStmt(block, stmt); |
16742 | } |
16743 | } |
16744 | #ifdef DEBUG |
16745 | fgPostExpandQmarkChecks(); |
16746 | #endif |
16747 | } |
16748 | compQmarkRationalized = true; |
16749 | } |
16750 | |
16751 | #ifdef DEBUG |
16752 | /***************************************************************************** |
16753 | * |
16754 | * Make sure we don't have any more GT_QMARK nodes. |
16755 | * |
16756 | */ |
16757 | void Compiler::fgPostExpandQmarkChecks() |
16758 | { |
16759 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
16760 | { |
16761 | for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) |
16762 | { |
16763 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
16764 | fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); |
16765 | } |
16766 | } |
16767 | } |
16768 | #endif |
16769 | |
16770 | /***************************************************************************** |
16771 | * |
16772 | * Transform all basic blocks for codegen. |
16773 | */ |
16774 | |
16775 | void Compiler::fgMorph() |
16776 | { |
16777 | noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here. |
16778 | |
16779 | fgOutgoingArgTemps = nullptr; |
16780 | |
16781 | #ifdef DEBUG |
16782 | if (verbose) |
16783 | { |
16784 | printf("*************** In fgMorph()\n" ); |
16785 | } |
16786 | if (verboseTrees) |
16787 | { |
16788 | fgDispBasicBlocks(true); |
16789 | } |
16790 | #endif // DEBUG |
16791 | |
16792 | // Insert call to class constructor as the first basic block if |
16793 | // we were asked to do so. |
16794 | if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */, |
16795 | impTokenLookupContextHandle /* context */) & |
16796 | CORINFO_INITCLASS_USE_HELPER) |
16797 | { |
16798 | fgEnsureFirstBBisScratch(); |
16799 | fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass()); |
16800 | } |
16801 | |
16802 | #ifdef DEBUG |
16803 | if (opts.compGcChecks) |
16804 | { |
16805 | for (unsigned i = 0; i < info.compArgsCount; i++) |
16806 | { |
16807 | if (lvaTable[i].TypeGet() == TYP_REF) |
16808 | { |
16809 | // confirm that the argument is a GC pointer (for debugging (GC stress)) |
16810 | GenTree* op = gtNewLclvNode(i, TYP_REF); |
16811 | GenTreeArgList* args = gtNewArgList(op); |
16812 | op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, args); |
16813 | |
16814 | fgEnsureFirstBBisScratch(); |
16815 | fgInsertStmtAtEnd(fgFirstBB, op); |
16816 | } |
16817 | } |
16818 | } |
16819 | #endif // DEBUG |
16820 | |
16821 | #if defined(DEBUG) && defined(_TARGET_XARCH_) |
16822 | if (opts.compStackCheckOnRet) |
16823 | { |
16824 | lvaReturnSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnSpCheck" )); |
16825 | lvaTable[lvaReturnSpCheck].lvType = TYP_I_IMPL; |
16826 | } |
16827 | #endif // defined(DEBUG) && defined(_TARGET_XARCH_) |
16828 | |
16829 | #if defined(DEBUG) && defined(_TARGET_X86_) |
16830 | if (opts.compStackCheckOnCall) |
16831 | { |
16832 | lvaCallSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallSpCheck" )); |
16833 | lvaTable[lvaCallSpCheck].lvType = TYP_I_IMPL; |
16834 | } |
16835 | #endif // defined(DEBUG) && defined(_TARGET_X86_) |
16836 | |
16837 | /* Filter out unimported BBs */ |
16838 | |
16839 | fgRemoveEmptyBlocks(); |
16840 | |
16841 | #ifdef DEBUG |
16842 | /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ |
16843 | fgDebugCheckBBlist(false, false); |
16844 | #endif // DEBUG |
16845 | |
16846 | EndPhase(PHASE_MORPH_INIT); |
16847 | |
16848 | /* Inline */ |
16849 | fgInline(); |
16850 | #if 0 |
16851 | JITDUMP("trees after inlining\n" ); |
16852 | DBEXEC(VERBOSE, fgDispBasicBlocks(true)); |
16853 | #endif |
16854 | |
16855 | RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time. |
16856 | |
16857 | EndPhase(PHASE_MORPH_INLINE); |
16858 | |
16859 | // Transform each GT_ALLOCOBJ node into either an allocation helper call or |
16860 | // local variable allocation on the stack. |
16861 | ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS |
16862 | |
16863 | // TODO-ObjectStackAllocation: Enable the optimization for architectures using |
16864 | // JIT32_GCENCODER (i.e., x86). |
16865 | #ifndef JIT32_GCENCODER |
16866 | if (JitConfig.JitObjectStackAllocation() && opts.OptimizationEnabled()) |
16867 | { |
16868 | objectAllocator.EnableObjectStackAllocation(); |
16869 | } |
16870 | #endif // JIT32_GCENCODER |
16871 | |
16872 | objectAllocator.Run(); |
16873 | |
16874 | /* Add any internal blocks/trees we may need */ |
16875 | |
16876 | fgAddInternal(); |
16877 | |
16878 | #ifdef DEBUG |
16879 | /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ |
16880 | fgDebugCheckBBlist(false, false); |
16881 | /* Inliner could clone some trees. */ |
16882 | fgDebugCheckNodesUniqueness(); |
16883 | #endif // DEBUG |
16884 | |
16885 | fgRemoveEmptyTry(); |
16886 | |
16887 | EndPhase(PHASE_EMPTY_TRY); |
16888 | |
16889 | fgRemoveEmptyFinally(); |
16890 | |
16891 | EndPhase(PHASE_EMPTY_FINALLY); |
16892 | |
16893 | fgMergeFinallyChains(); |
16894 | |
16895 | EndPhase(PHASE_MERGE_FINALLY_CHAINS); |
16896 | |
16897 | fgCloneFinally(); |
16898 | |
16899 | EndPhase(PHASE_CLONE_FINALLY); |
16900 | |
16901 | fgUpdateFinallyTargetFlags(); |
16902 | |
16903 | /* For x64 and ARM64 we need to mark irregular parameters */ |
16904 | |
16905 | lvaRefCountState = RCS_EARLY; |
16906 | fgMarkImplicitByRefArgs(); |
16907 | |
16908 | /* Promote struct locals if necessary */ |
16909 | fgPromoteStructs(); |
16910 | |
16911 | /* Now it is the time to figure out what locals have address-taken. */ |
16912 | fgMarkAddressExposedLocals(); |
16913 | |
16914 | EndPhase(PHASE_STR_ADRLCL); |
16915 | |
16916 | /* Apply the type update to implicit byref parameters; also choose (based on address-exposed |
16917 | analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */ |
16918 | fgRetypeImplicitByRefArgs(); |
16919 | |
16920 | #ifdef DEBUG |
16921 | /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */ |
16922 | lvaStressLclFld(); |
16923 | fgStress64RsltMul(); |
16924 | #endif // DEBUG |
16925 | |
16926 | EndPhase(PHASE_MORPH_IMPBYREF); |
16927 | |
16928 | /* Morph the trees in all the blocks of the method */ |
16929 | |
16930 | fgMorphBlocks(); |
16931 | |
16932 | /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */ |
16933 | fgMarkDemotedImplicitByRefArgs(); |
16934 | lvaRefCountState = RCS_INVALID; |
16935 | |
16936 | EndPhase(PHASE_MORPH_GLOBAL); |
16937 | |
16938 | #if 0 |
16939 | JITDUMP("trees after fgMorphBlocks\n" ); |
16940 | DBEXEC(VERBOSE, fgDispBasicBlocks(true)); |
16941 | #endif |
16942 | |
16943 | #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) |
16944 | if (fgNeedToAddFinallyTargetBits) |
16945 | { |
16946 | // We previously wiped out the BBF_FINALLY_TARGET bits due to some morphing; add them back. |
16947 | fgAddFinallyTargetFlags(); |
16948 | fgNeedToAddFinallyTargetBits = false; |
16949 | } |
16950 | #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) |
16951 | |
16952 | /* Decide the kind of code we want to generate */ |
16953 | |
16954 | fgSetOptions(); |
16955 | |
16956 | fgExpandQmarkNodes(); |
16957 | |
16958 | #ifdef DEBUG |
16959 | compCurBB = nullptr; |
16960 | #endif // DEBUG |
16961 | } |
16962 | |
16963 | /***************************************************************************** |
16964 | * |
16965 | * Promoting struct locals |
16966 | */ |
16967 | void Compiler::fgPromoteStructs() |
16968 | { |
16969 | #ifdef DEBUG |
16970 | if (verbose) |
16971 | { |
16972 | printf("*************** In fgPromoteStructs()\n" ); |
16973 | } |
16974 | #endif // DEBUG |
16975 | |
16976 | if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) |
16977 | { |
16978 | JITDUMP(" promotion opt flag not enabled\n" ); |
16979 | return; |
16980 | } |
16981 | |
16982 | if (fgNoStructPromotion) |
16983 | { |
16984 | JITDUMP(" promotion disabled by JitNoStructPromotion\n" ); |
16985 | return; |
16986 | } |
16987 | |
16988 | #if 0 |
16989 | // The code in this #if has been useful in debugging struct promotion issues, by |
16990 | // enabling selective enablement of the struct promotion optimization according to |
16991 | // method hash. |
16992 | #ifdef DEBUG |
16993 | unsigned methHash = info.compMethodHash(); |
16994 | char* lostr = getenv("structpromohashlo" ); |
16995 | unsigned methHashLo = 0; |
16996 | if (lostr != NULL) |
16997 | { |
16998 | sscanf_s(lostr, "%x" , &methHashLo); |
16999 | } |
17000 | char* histr = getenv("structpromohashhi" ); |
17001 | unsigned methHashHi = UINT32_MAX; |
17002 | if (histr != NULL) |
17003 | { |
17004 | sscanf_s(histr, "%x" , &methHashHi); |
17005 | } |
17006 | if (methHash < methHashLo || methHash > methHashHi) |
17007 | { |
17008 | return; |
17009 | } |
17010 | else |
17011 | { |
17012 | printf("Promoting structs for method %s, hash = 0x%x.\n" , |
17013 | info.compFullName, info.compMethodHash()); |
17014 | printf("" ); // in our logic this causes a flush |
17015 | } |
17016 | #endif // DEBUG |
17017 | #endif // 0 |
17018 | |
17019 | if (info.compIsVarArgs) |
17020 | { |
17021 | JITDUMP(" promotion disabled because of varargs\n" ); |
17022 | return; |
17023 | } |
17024 | |
17025 | #ifdef DEBUG |
17026 | if (verbose) |
17027 | { |
17028 | printf("\nlvaTable before fgPromoteStructs\n" ); |
17029 | lvaTableDump(); |
17030 | } |
17031 | #endif // DEBUG |
17032 | |
17033 | // The lvaTable might grow as we grab temps. Make a local copy here. |
17034 | unsigned startLvaCount = lvaCount; |
17035 | |
17036 | // |
17037 | // Loop through the original lvaTable. Looking for struct locals to be promoted. |
17038 | // |
17039 | lvaStructPromotionInfo structPromotionInfo; |
17040 | bool tooManyLocalsReported = false; |
17041 | |
17042 | for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++) |
17043 | { |
17044 | // Whether this var got promoted |
17045 | bool promotedVar = false; |
17046 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
17047 | |
17048 | // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote |
17049 | // its fields. Instead, we will attempt to enregister the entire struct. |
17050 | if (varDsc->lvIsSIMDType() && (varDsc->lvIsUsedInSIMDIntrinsic() || isOpaqueSIMDLclVar(varDsc))) |
17051 | { |
17052 | varDsc->lvRegStruct = true; |
17053 | } |
17054 | // Don't promote if we have reached the tracking limit. |
17055 | else if (lvaHaveManyLocals()) |
17056 | { |
17057 | // Print the message first time when we detected this condition |
17058 | if (!tooManyLocalsReported) |
17059 | { |
17060 | JITDUMP("Stopped promoting struct fields, due to too many locals.\n" ); |
17061 | } |
17062 | tooManyLocalsReported = true; |
17063 | } |
17064 | else if (varTypeIsStruct(varDsc)) |
17065 | { |
17066 | assert(structPromotionHelper != nullptr); |
17067 | promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum); |
17068 | } |
17069 | |
17070 | if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed) |
17071 | { |
17072 | // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, |
17073 | // we will treat it as a reg struct. |
17074 | varDsc->lvRegStruct = true; |
17075 | } |
17076 | } |
17077 | |
17078 | #ifdef _TARGET_ARM_ |
17079 | if (structPromotionHelper->GetRequiresScratchVar()) |
17080 | { |
17081 | // Ensure that the scratch variable is allocated, in case we |
17082 | // pass a promoted struct as an argument. |
17083 | if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) |
17084 | { |
17085 | lvaPromotedStructAssemblyScratchVar = |
17086 | lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var." )); |
17087 | lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; |
17088 | } |
17089 | } |
17090 | #endif // _TARGET_ARM_ |
17091 | |
17092 | #ifdef DEBUG |
17093 | if (verbose) |
17094 | { |
17095 | printf("\nlvaTable after fgPromoteStructs\n" ); |
17096 | lvaTableDump(); |
17097 | } |
17098 | #endif // DEBUG |
17099 | } |
17100 | |
17101 | void Compiler::fgMorphStructField(GenTree* tree, GenTree* parent) |
17102 | { |
17103 | noway_assert(tree->OperGet() == GT_FIELD); |
17104 | |
17105 | GenTreeField* field = tree->AsField(); |
17106 | GenTree* objRef = field->gtFldObj; |
17107 | GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr; |
17108 | noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))); |
17109 | |
17110 | /* Is this an instance data member? */ |
17111 | |
17112 | if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)) |
17113 | { |
17114 | unsigned lclNum = obj->gtLclVarCommon.gtLclNum; |
17115 | const LclVarDsc* varDsc = &lvaTable[lclNum]; |
17116 | |
17117 | if (varTypeIsStruct(obj)) |
17118 | { |
17119 | if (varDsc->lvPromoted) |
17120 | { |
17121 | // Promoted struct |
17122 | unsigned fldOffset = field->gtFldOffset; |
17123 | unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); |
17124 | |
17125 | if (fieldLclIndex == BAD_VAR_NUM) |
17126 | { |
17127 | // Access a promoted struct's field with an offset that doesn't correspond to any field. |
17128 | // It can happen if the struct was cast to another struct with different offsets. |
17129 | return; |
17130 | } |
17131 | |
17132 | const LclVarDsc* fieldDsc = &lvaTable[fieldLclIndex]; |
17133 | var_types fieldType = fieldDsc->TypeGet(); |
17134 | |
17135 | assert(fieldType != TYP_STRUCT); // promoted LCL_VAR can't have a struct type. |
17136 | if (tree->TypeGet() != fieldType) |
17137 | { |
17138 | if (tree->TypeGet() != TYP_STRUCT) |
17139 | { |
17140 | // This is going to be an incorrect instruction promotion. |
17141 | // For example when we try to read int as long. |
17142 | return; |
17143 | } |
17144 | |
17145 | if (field->gtFldHnd != fieldDsc->lvFieldHnd) |
17146 | { |
17147 | CORINFO_CLASS_HANDLE fieldTreeClass = nullptr, fieldDscClass = nullptr; |
17148 | |
17149 | CorInfoType fieldTreeType = info.compCompHnd->getFieldType(field->gtFldHnd, &fieldTreeClass); |
17150 | CorInfoType fieldDscType = info.compCompHnd->getFieldType(fieldDsc->lvFieldHnd, &fieldDscClass); |
17151 | if (fieldTreeType != fieldDscType || fieldTreeClass != fieldDscClass) |
17152 | { |
17153 | // Access the promoted field with a different class handle, can't check that types match. |
17154 | return; |
17155 | } |
17156 | // Access the promoted field as a field of a non-promoted struct with the same class handle. |
17157 | } |
17158 | #ifdef DEBUG |
17159 | else if (tree->TypeGet() == TYP_STRUCT) |
17160 | { |
17161 | // The field tree accesses it as a struct, but the promoted lcl var for the field |
17162 | // says that it has another type. It can happen only if struct promotion faked |
17163 | // field type for a struct of single field of scalar type aligned at their natural boundary. |
17164 | assert(structPromotionHelper != nullptr); |
17165 | structPromotionHelper->CheckRetypedAsScalar(field->gtFldHnd, fieldType); |
17166 | } |
17167 | #endif // DEBUG |
17168 | } |
17169 | |
17170 | tree->SetOper(GT_LCL_VAR); |
17171 | tree->gtLclVarCommon.SetLclNum(fieldLclIndex); |
17172 | tree->gtType = fieldType; |
17173 | tree->gtFlags &= GTF_NODE_MASK; |
17174 | tree->gtFlags &= ~GTF_GLOB_REF; |
17175 | |
17176 | if (parent->gtOper == GT_ASG) |
17177 | { |
17178 | if (parent->gtOp.gtOp1 == tree) |
17179 | { |
17180 | tree->gtFlags |= GTF_VAR_DEF; |
17181 | tree->gtFlags |= GTF_DONT_CSE; |
17182 | } |
17183 | |
17184 | // Promotion of struct containing struct fields where the field |
17185 | // is a struct with a single pointer sized scalar type field: in |
17186 | // this case struct promotion uses the type of the underlying |
17187 | // scalar field as the type of struct field instead of recursively |
17188 | // promoting. This can lead to a case where we have a block-asgn |
17189 | // with its RHS replaced with a scalar type. Mark RHS value as |
17190 | // DONT_CSE so that assertion prop will not do const propagation. |
17191 | // The reason this is required is that if RHS of a block-asg is a |
17192 | // constant, then it is interpreted as init-block incorrectly. |
17193 | // |
17194 | // TODO - This can also be avoided if we implement recursive struct |
17195 | // promotion, tracked by #10019. |
17196 | if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree)) |
17197 | { |
17198 | tree->gtFlags |= GTF_DONT_CSE; |
17199 | } |
17200 | } |
17201 | #ifdef DEBUG |
17202 | if (verbose) |
17203 | { |
17204 | printf("Replacing the field in promoted struct with local var V%02u\n" , fieldLclIndex); |
17205 | } |
17206 | #endif // DEBUG |
17207 | } |
17208 | } |
17209 | else |
17210 | { |
17211 | // Normed struct |
17212 | // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if |
17213 | // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 |
17214 | // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However, |
17215 | // there is one extremely rare case where that won't be true. An enum type is a special value type |
17216 | // that contains exactly one element of a primitive integer type (that, for CLS programs is named |
17217 | // "value__"). The VM tells us that a local var of that enum type is the primitive type of the |
17218 | // enum's single field. It turns out that it is legal for IL to access this field using ldflda or |
17219 | // ldfld. For example: |
17220 | // |
17221 | // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum |
17222 | // { |
17223 | // .field public specialname rtspecialname int16 value__ |
17224 | // .field public static literal valuetype mynamespace.e_t one = int16(0x0000) |
17225 | // } |
17226 | // .method public hidebysig static void Main() cil managed |
17227 | // { |
17228 | // .locals init (valuetype mynamespace.e_t V_0) |
17229 | // ... |
17230 | // ldloca.s V_0 |
17231 | // ldflda int16 mynamespace.e_t::value__ |
17232 | // ... |
17233 | // } |
17234 | // |
17235 | // Normally, compilers will not generate the ldflda, since it is superfluous. |
17236 | // |
17237 | // In the example, the lclVar is short, but the JIT promotes all trees using this local to the |
17238 | // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type |
17239 | // mismatch like this, don't do this morphing. The local var may end up getting marked as |
17240 | // address taken, and the appropriate SHORT load will be done from memory in that case. |
17241 | |
17242 | if (tree->TypeGet() == obj->TypeGet()) |
17243 | { |
17244 | tree->ChangeOper(GT_LCL_VAR); |
17245 | tree->gtLclVarCommon.SetLclNum(lclNum); |
17246 | tree->gtFlags &= GTF_NODE_MASK; |
17247 | |
17248 | if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) |
17249 | { |
17250 | tree->gtFlags |= GTF_VAR_DEF; |
17251 | tree->gtFlags |= GTF_DONT_CSE; |
17252 | } |
17253 | #ifdef DEBUG |
17254 | if (verbose) |
17255 | { |
17256 | printf("Replacing the field in normed struct with local var V%02u\n" , lclNum); |
17257 | } |
17258 | #endif // DEBUG |
17259 | } |
17260 | } |
17261 | } |
17262 | } |
17263 | |
17264 | void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) |
17265 | { |
17266 | noway_assert(tree->OperGet() == GT_LCL_FLD); |
17267 | |
17268 | unsigned lclNum = tree->gtLclFld.gtLclNum; |
17269 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
17270 | |
17271 | if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted)) |
17272 | { |
17273 | // Promoted struct |
17274 | unsigned fldOffset = tree->gtLclFld.gtLclOffs; |
17275 | unsigned fieldLclIndex = 0; |
17276 | LclVarDsc* fldVarDsc = nullptr; |
17277 | |
17278 | if (fldOffset != BAD_VAR_NUM) |
17279 | { |
17280 | fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); |
17281 | noway_assert(fieldLclIndex != BAD_VAR_NUM); |
17282 | fldVarDsc = &lvaTable[fieldLclIndex]; |
17283 | } |
17284 | |
17285 | if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType) |
17286 | #ifdef _TARGET_X86_ |
17287 | && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType) |
17288 | #endif |
17289 | ) |
17290 | { |
17291 | // There is an existing sub-field we can use. |
17292 | tree->gtLclFld.SetLclNum(fieldLclIndex); |
17293 | |
17294 | // The field must be an enregisterable type; otherwise it would not be a promoted field. |
17295 | // The tree type may not match, e.g. for return types that have been morphed, but both |
17296 | // must be enregisterable types. |
17297 | // TODO-Cleanup: varTypeCanReg should presumably return true for SIMD types, but |
17298 | // there may be places where that would violate existing assumptions. |
17299 | var_types treeType = tree->TypeGet(); |
17300 | var_types fieldType = fldVarDsc->TypeGet(); |
17301 | assert((varTypeCanReg(treeType) || varTypeIsSIMD(treeType)) && |
17302 | (varTypeCanReg(fieldType) || varTypeIsSIMD(fieldType))); |
17303 | |
17304 | tree->ChangeOper(GT_LCL_VAR); |
17305 | assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex); |
17306 | tree->gtType = fldVarDsc->TypeGet(); |
17307 | #ifdef DEBUG |
17308 | if (verbose) |
17309 | { |
17310 | printf("Replacing the GT_LCL_FLD in promoted struct with local var V%02u\n" , fieldLclIndex); |
17311 | } |
17312 | #endif // DEBUG |
17313 | |
17314 | if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) |
17315 | { |
17316 | tree->gtFlags |= GTF_VAR_DEF; |
17317 | tree->gtFlags |= GTF_DONT_CSE; |
17318 | } |
17319 | } |
17320 | else |
17321 | { |
17322 | // There is no existing field that has all the parts that we need |
17323 | // So we must ensure that the struct lives in memory. |
17324 | lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); |
17325 | |
17326 | #ifdef DEBUG |
17327 | // We can't convert this guy to a float because he really does have his |
17328 | // address taken.. |
17329 | varDsc->lvKeepType = 1; |
17330 | #endif // DEBUG |
17331 | } |
17332 | } |
17333 | } |
17334 | |
17335 | //------------------------------------------------------------------------ |
17336 | // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference"; |
17337 | // i.e. which the ABI requires to be passed by making a copy in the caller and |
17338 | // passing its address to the callee. Mark their `LclVarDsc`s such that |
17339 | // `lvaIsImplicitByRefLocal` will return true for them. |
17340 | |
17341 | void Compiler::fgMarkImplicitByRefArgs() |
17342 | { |
17343 | #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
17344 | #ifdef DEBUG |
17345 | if (verbose) |
17346 | { |
17347 | printf("\n*************** In fgMarkImplicitByRefs()\n" ); |
17348 | } |
17349 | #endif // DEBUG |
17350 | |
17351 | for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) |
17352 | { |
17353 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
17354 | |
17355 | if (varDsc->lvIsParam && varTypeIsStruct(varDsc)) |
17356 | { |
17357 | size_t size; |
17358 | |
17359 | if (varDsc->lvSize() > REGSIZE_BYTES) |
17360 | { |
17361 | size = varDsc->lvSize(); |
17362 | } |
17363 | else |
17364 | { |
17365 | CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); |
17366 | size = info.compCompHnd->getClassSize(typeHnd); |
17367 | } |
17368 | |
17369 | #if defined(_TARGET_AMD64_) |
17370 | if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) |
17371 | #elif defined(_TARGET_ARM64_) |
17372 | if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc, this->info.compIsVarArgs)) |
17373 | #endif |
17374 | { |
17375 | // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local |
17376 | // So I am now using it to indicate that this is one of the weird implicit |
17377 | // by ref locals. |
17378 | // The address taken cleanup will look for references to locals marked like |
17379 | // this, and transform them appropriately. |
17380 | varDsc->lvIsTemp = 1; |
17381 | |
17382 | // Clear the ref count field; fgMarkAddressTakenLocals will increment it per |
17383 | // appearance of implicit-by-ref param so that call arg morphing can do an |
17384 | // optimization for single-use implicit-by-ref params whose single use is as |
17385 | // an outgoing call argument. |
17386 | varDsc->setLvRefCnt(0, RCS_EARLY); |
17387 | } |
17388 | } |
17389 | } |
17390 | |
17391 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
17392 | } |
17393 | |
17394 | //------------------------------------------------------------------------ |
17395 | // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from |
17396 | // struct to pointer). Also choose (based on address-exposed analysis) |
17397 | // which struct promotions of implicit byrefs to keep or discard. |
17398 | // For those which are kept, insert the appropriate initialization code. |
17399 | // For those which are to be discarded, annotate the promoted field locals |
17400 | // so that fgMorphImplicitByRefArgs will know to rewrite their appearances |
17401 | // using indirections off the pointer parameters. |
17402 | |
17403 | void Compiler::fgRetypeImplicitByRefArgs() |
17404 | { |
17405 | #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
17406 | #ifdef DEBUG |
17407 | if (verbose) |
17408 | { |
17409 | printf("\n*************** In fgRetypeImplicitByRefArgs()\n" ); |
17410 | } |
17411 | #endif // DEBUG |
17412 | |
17413 | for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) |
17414 | { |
17415 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
17416 | |
17417 | if (lvaIsImplicitByRefLocal(lclNum)) |
17418 | { |
17419 | size_t size; |
17420 | |
17421 | if (varDsc->lvSize() > REGSIZE_BYTES) |
17422 | { |
17423 | size = varDsc->lvSize(); |
17424 | } |
17425 | else |
17426 | { |
17427 | CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); |
17428 | size = info.compCompHnd->getClassSize(typeHnd); |
17429 | } |
17430 | |
17431 | if (varDsc->lvPromoted) |
17432 | { |
17433 | // This implicit-by-ref was promoted; create a new temp to represent the |
17434 | // promoted struct before rewriting this parameter as a pointer. |
17435 | unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref" )); |
17436 | lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true); |
17437 | if (info.compIsVarArgs) |
17438 | { |
17439 | lvaSetStructUsedAsVarArg(newLclNum); |
17440 | } |
17441 | |
17442 | // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array. |
17443 | varDsc = &lvaTable[lclNum]; |
17444 | |
17445 | // Copy the struct promotion annotations to the new temp. |
17446 | LclVarDsc* newVarDsc = &lvaTable[newLclNum]; |
17447 | newVarDsc->lvPromoted = true; |
17448 | newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart; |
17449 | newVarDsc->lvFieldCnt = varDsc->lvFieldCnt; |
17450 | newVarDsc->lvContainsHoles = varDsc->lvContainsHoles; |
17451 | newVarDsc->lvCustomLayout = varDsc->lvCustomLayout; |
17452 | #ifdef DEBUG |
17453 | newVarDsc->lvKeepType = true; |
17454 | #endif // DEBUG |
17455 | |
17456 | // Propagate address-taken-ness and do-not-enregister-ness. |
17457 | newVarDsc->lvAddrExposed = varDsc->lvAddrExposed; |
17458 | newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister; |
17459 | #ifdef DEBUG |
17460 | newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr; |
17461 | newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr; |
17462 | newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr; |
17463 | newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr; |
17464 | newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall; |
17465 | #endif // DEBUG |
17466 | |
17467 | // If the promotion is dependent, the promoted temp would just be committed |
17468 | // to memory anyway, so we'll rewrite its appearances to be indirections |
17469 | // through the pointer parameter, the same as we'd do for this |
17470 | // parameter if it weren't promoted at all (otherwise the initialization |
17471 | // of the new temp would just be a needless memcpy at method entry). |
17472 | bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || |
17473 | (varDsc->lvRefCnt(RCS_EARLY) <= varDsc->lvFieldCnt); |
17474 | |
17475 | if (!undoPromotion) |
17476 | { |
17477 | // Insert IR that initializes the temp from the parameter. |
17478 | // LHS is a simple reference to the temp. |
17479 | fgEnsureFirstBBisScratch(); |
17480 | GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType); |
17481 | // RHS is an indirection (using GT_OBJ) off the parameter. |
17482 | GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF); |
17483 | GenTree* rhs = gtNewBlockVal(addr, (unsigned)size); |
17484 | GenTree* assign = gtNewAssignNode(lhs, rhs); |
17485 | fgInsertStmtAtBeg(fgFirstBB, assign); |
17486 | } |
17487 | |
17488 | // Update the locals corresponding to the promoted fields. |
17489 | unsigned fieldLclStart = varDsc->lvFieldLclStart; |
17490 | unsigned fieldCount = varDsc->lvFieldCnt; |
17491 | unsigned fieldLclStop = fieldLclStart + fieldCount; |
17492 | |
17493 | for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) |
17494 | { |
17495 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
17496 | |
17497 | if (undoPromotion) |
17498 | { |
17499 | // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs |
17500 | // will know to rewrite appearances of this local. |
17501 | assert(fieldVarDsc->lvParentLcl == lclNum); |
17502 | } |
17503 | else |
17504 | { |
17505 | // Set the new parent. |
17506 | fieldVarDsc->lvParentLcl = newLclNum; |
17507 | // Clear the ref count field; it is used to communicate the nubmer of references |
17508 | // to the implicit byref parameter when morphing calls that pass the implicit byref |
17509 | // out as an outgoing argument value, but that doesn't pertain to this field local |
17510 | // which is now a field of a non-arg local. |
17511 | fieldVarDsc->setLvRefCnt(0, RCS_EARLY); |
17512 | } |
17513 | |
17514 | fieldVarDsc->lvIsParam = false; |
17515 | // The fields shouldn't inherit any register preferences from |
17516 | // the parameter which is really a pointer to the struct. |
17517 | fieldVarDsc->lvIsRegArg = false; |
17518 | fieldVarDsc->lvIsMultiRegArg = false; |
17519 | fieldVarDsc->lvSetIsHfaRegArg(false); |
17520 | fieldVarDsc->lvArgReg = REG_NA; |
17521 | #if FEATURE_MULTIREG_ARGS |
17522 | fieldVarDsc->lvOtherArgReg = REG_NA; |
17523 | #endif |
17524 | } |
17525 | |
17526 | // Hijack lvFieldLclStart to record the new temp number. |
17527 | // It will get fixed up in fgMarkDemotedImplicitByRefArgs. |
17528 | varDsc->lvFieldLclStart = newLclNum; |
17529 | // Go ahead and clear lvFieldCnt -- either we're promoting |
17530 | // a replacement temp or we're not promoting this arg, and |
17531 | // in either case the parameter is now a pointer that doesn't |
17532 | // have these fields. |
17533 | varDsc->lvFieldCnt = 0; |
17534 | |
17535 | // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs |
17536 | // whether references to the struct should be rewritten as |
17537 | // indirections off the pointer (not promoted) or references |
17538 | // to the new struct local (promoted). |
17539 | varDsc->lvPromoted = !undoPromotion; |
17540 | } |
17541 | else |
17542 | { |
17543 | // The "undo promotion" path above clears lvPromoted for args that struct |
17544 | // promotion wanted to promote but that aren't considered profitable to |
17545 | // rewrite. It hijacks lvFieldLclStart to communicate to |
17546 | // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left |
17547 | // on such args for fgMorphImplicitByRefArgs to consult in the interim. |
17548 | // Here we have an arg that was simply never promoted, so make sure it doesn't |
17549 | // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs |
17550 | // and fgMarkDemotedImplicitByRefArgs. |
17551 | assert(varDsc->lvFieldLclStart == 0); |
17552 | } |
17553 | |
17554 | // Since the parameter in this position is really a pointer, its type is TYP_BYREF. |
17555 | varDsc->lvType = TYP_BYREF; |
17556 | |
17557 | // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF |
17558 | // make sure that the following flag is not set as these will force SSA to |
17559 | // exclude tracking/enregistering these LclVars. (see SsaBuilder::IncludeInSsa) |
17560 | // |
17561 | varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it. |
17562 | |
17563 | // The struct parameter may have had its address taken, but the pointer parameter |
17564 | // cannot -- any uses of the struct parameter's address are uses of the pointer |
17565 | // parameter's value, and there's no way for the MSIL to reference the pointer |
17566 | // parameter's address. So clear the address-taken bit for the parameter. |
17567 | varDsc->lvAddrExposed = 0; |
17568 | varDsc->lvDoNotEnregister = 0; |
17569 | |
17570 | #ifdef DEBUG |
17571 | // This should not be converted to a double in stress mode, |
17572 | // because it is really a pointer |
17573 | varDsc->lvKeepType = 1; |
17574 | |
17575 | if (verbose) |
17576 | { |
17577 | printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n" , lclNum); |
17578 | } |
17579 | #endif // DEBUG |
17580 | } |
17581 | } |
17582 | |
17583 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
17584 | } |
17585 | |
17586 | //------------------------------------------------------------------------ |
17587 | // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion |
17588 | // asked to promote. Appearances of these have now been rewritten |
17589 | // (by fgMorphImplicitByRefArgs) using indirections from the pointer |
17590 | // parameter or references to the promotion temp, as appropriate. |
17591 | |
17592 | void Compiler::fgMarkDemotedImplicitByRefArgs() |
17593 | { |
17594 | #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
17595 | |
17596 | for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) |
17597 | { |
17598 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
17599 | |
17600 | if (lvaIsImplicitByRefLocal(lclNum)) |
17601 | { |
17602 | if (varDsc->lvPromoted) |
17603 | { |
17604 | // The parameter is simply a pointer now, so clear lvPromoted. It was left set |
17605 | // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that |
17606 | // appearances of this arg needed to be rewritten to a new promoted struct local. |
17607 | varDsc->lvPromoted = false; |
17608 | |
17609 | // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs |
17610 | // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one. |
17611 | varDsc->lvFieldLclStart = 0; |
17612 | } |
17613 | else if (varDsc->lvFieldLclStart != 0) |
17614 | { |
17615 | // We created new temps to represent a promoted struct corresponding to this |
17616 | // parameter, but decided not to go through with the promotion and have |
17617 | // rewritten all uses as indirections off the pointer parameter. |
17618 | // We stashed the pointer to the new struct temp in lvFieldLclStart; make |
17619 | // note of that and clear the annotation. |
17620 | unsigned structLclNum = varDsc->lvFieldLclStart; |
17621 | varDsc->lvFieldLclStart = 0; |
17622 | |
17623 | // Clear the arg's ref count; this was set during address-taken analysis so that |
17624 | // call morphing could identify single-use implicit byrefs; we're done with |
17625 | // that, and want it to be in its default state of zero when we go to set |
17626 | // real ref counts for all variables. |
17627 | varDsc->setLvRefCnt(0, RCS_EARLY); |
17628 | |
17629 | // The temp struct is now unused; set flags appropriately so that we |
17630 | // won't allocate space for it on the stack. |
17631 | LclVarDsc* structVarDsc = &lvaTable[structLclNum]; |
17632 | structVarDsc->setLvRefCnt(0, RCS_EARLY); |
17633 | structVarDsc->lvAddrExposed = false; |
17634 | #ifdef DEBUG |
17635 | structVarDsc->lvUnusedStruct = true; |
17636 | #endif // DEBUG |
17637 | |
17638 | unsigned fieldLclStart = structVarDsc->lvFieldLclStart; |
17639 | unsigned fieldCount = structVarDsc->lvFieldCnt; |
17640 | unsigned fieldLclStop = fieldLclStart + fieldCount; |
17641 | |
17642 | for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) |
17643 | { |
17644 | // Fix the pointer to the parent local. |
17645 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
17646 | assert(fieldVarDsc->lvParentLcl == lclNum); |
17647 | fieldVarDsc->lvParentLcl = structLclNum; |
17648 | |
17649 | // The field local is now unused; set flags appropriately so that |
17650 | // we won't allocate stack space for it. |
17651 | fieldVarDsc->setLvRefCnt(0, RCS_EARLY); |
17652 | fieldVarDsc->lvAddrExposed = false; |
17653 | } |
17654 | } |
17655 | } |
17656 | } |
17657 | |
17658 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
17659 | } |
17660 | |
17661 | /***************************************************************************** |
17662 | * |
17663 | * Morph irregular parameters |
17664 | * for x64 and ARM64 this means turning them into byrefs, adding extra indirs. |
17665 | */ |
17666 | bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) |
17667 | { |
17668 | #if (!defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)) && !defined(_TARGET_ARM64_) |
17669 | |
17670 | return false; |
17671 | |
17672 | #else // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
17673 | |
17674 | bool changed = false; |
17675 | |
17676 | // Implicit byref morphing needs to know if the reference to the parameter is a |
17677 | // child of GT_ADDR or not, so this method looks one level down and does the |
17678 | // rewrite whenever a child is a reference to an implicit byref parameter. |
17679 | if (tree->gtOper == GT_ADDR) |
17680 | { |
17681 | if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) |
17682 | { |
17683 | GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true); |
17684 | changed = (morphedTree != nullptr); |
17685 | assert(!changed || (morphedTree == tree)); |
17686 | } |
17687 | } |
17688 | else |
17689 | { |
17690 | for (GenTree** pTree : tree->UseEdges()) |
17691 | { |
17692 | GenTree* childTree = *pTree; |
17693 | if (childTree->gtOper == GT_LCL_VAR) |
17694 | { |
17695 | GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false); |
17696 | if (newChildTree != nullptr) |
17697 | { |
17698 | changed = true; |
17699 | *pTree = newChildTree; |
17700 | } |
17701 | } |
17702 | } |
17703 | } |
17704 | |
17705 | return changed; |
17706 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
17707 | } |
17708 | |
17709 | GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) |
17710 | { |
17711 | assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR))); |
17712 | assert(isAddr == (tree->gtOper == GT_ADDR)); |
17713 | |
17714 | GenTree* lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree; |
17715 | unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum; |
17716 | LclVarDsc* lclVarDsc = &lvaTable[lclNum]; |
17717 | |
17718 | CORINFO_FIELD_HANDLE fieldHnd; |
17719 | unsigned fieldOffset = 0; |
17720 | var_types fieldRefType = TYP_UNKNOWN; |
17721 | |
17722 | if (lvaIsImplicitByRefLocal(lclNum)) |
17723 | { |
17724 | // The SIMD transformation to coalesce contiguous references to SIMD vector fields will |
17725 | // re-invoke the traversal to mark address-taken locals. |
17726 | // So, we may encounter a tree that has already been transformed to TYP_BYREF. |
17727 | // If we do, leave it as-is. |
17728 | if (!varTypeIsStruct(lclVarTree)) |
17729 | { |
17730 | assert(lclVarTree->TypeGet() == TYP_BYREF); |
17731 | |
17732 | return nullptr; |
17733 | } |
17734 | else if (lclVarDsc->lvPromoted) |
17735 | { |
17736 | // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this |
17737 | // arg. Rewrite this to refer to the new local. |
17738 | assert(lclVarDsc->lvFieldLclStart != 0); |
17739 | lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart); |
17740 | return tree; |
17741 | } |
17742 | |
17743 | fieldHnd = nullptr; |
17744 | } |
17745 | else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl)) |
17746 | { |
17747 | // This was a field reference to an implicit-by-reference struct parameter that was |
17748 | // dependently promoted; update it to a field reference off the pointer. |
17749 | // Grab the field handle from the struct field lclVar. |
17750 | fieldHnd = lclVarDsc->lvFieldHnd; |
17751 | fieldOffset = lclVarDsc->lvFldOffset; |
17752 | assert(fieldHnd != nullptr); |
17753 | // Update lclNum/lclVarDsc to refer to the parameter |
17754 | lclNum = lclVarDsc->lvParentLcl; |
17755 | lclVarDsc = &lvaTable[lclNum]; |
17756 | fieldRefType = lclVarTree->TypeGet(); |
17757 | } |
17758 | else |
17759 | { |
17760 | // We only need to tranform the 'marked' implicit by ref parameters |
17761 | return nullptr; |
17762 | } |
17763 | |
17764 | // This is no longer a def of the lclVar, even if it WAS a def of the struct. |
17765 | lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK); |
17766 | |
17767 | if (isAddr) |
17768 | { |
17769 | if (fieldHnd == nullptr) |
17770 | { |
17771 | // change &X into just plain X |
17772 | tree->ReplaceWith(lclVarTree, this); |
17773 | tree->gtType = TYP_BYREF; |
17774 | } |
17775 | else |
17776 | { |
17777 | // change &(X.f) [i.e. GT_ADDR of local for promoted arg field] |
17778 | // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param] |
17779 | lclVarTree->gtLclVarCommon.SetLclNum(lclNum); |
17780 | lclVarTree->gtType = TYP_BYREF; |
17781 | tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset); |
17782 | } |
17783 | |
17784 | #ifdef DEBUG |
17785 | if (verbose) |
17786 | { |
17787 | printf("Replacing address of implicit by ref struct parameter with byref:\n" ); |
17788 | } |
17789 | #endif // DEBUG |
17790 | } |
17791 | else |
17792 | { |
17793 | // Change X into OBJ(X) or FIELD(X, f) |
17794 | var_types structType = tree->gtType; |
17795 | tree->gtType = TYP_BYREF; |
17796 | |
17797 | if (fieldHnd) |
17798 | { |
17799 | tree->gtLclVarCommon.SetLclNum(lclNum); |
17800 | tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset); |
17801 | } |
17802 | else |
17803 | { |
17804 | tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree); |
17805 | } |
17806 | |
17807 | if (structType == TYP_STRUCT) |
17808 | { |
17809 | gtSetObjGcInfo(tree->AsObj()); |
17810 | } |
17811 | |
17812 | // TODO-CQ: If the VM ever stops violating the ABI and passing heap references |
17813 | // we could remove TGTANYWHERE |
17814 | tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE); |
17815 | |
17816 | #ifdef DEBUG |
17817 | if (verbose) |
17818 | { |
17819 | printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n" ); |
17820 | } |
17821 | #endif // DEBUG |
17822 | } |
17823 | |
17824 | #ifdef DEBUG |
17825 | if (verbose) |
17826 | { |
17827 | gtDispTree(tree); |
17828 | } |
17829 | #endif // DEBUG |
17830 | |
17831 | return tree; |
17832 | } |
17833 | |
17834 | class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor> |
17835 | { |
17836 | // During tree traversal every GenTree node produces a "value" that represents: |
17837 | // - the memory location associated with a local variable, including an offset |
17838 | // accumulated from GT_LCL_FLD and GT_FIELD nodes. |
17839 | // - the address of local variable memory location, including an offset as well. |
17840 | // - an unknown value - the result of a node we don't know how to process. This |
17841 | // also includes the result of TYP_VOID nodes (or any other nodes that don't |
17842 | // actually produce values in IR) in order to support the invariant that every |
17843 | // node produces a value. |
17844 | // |
17845 | // The existence of GT_ADDR nodes and their use together with GT_FIELD to form |
17846 | // FIELD/ADDR/FIELD/ADDR/LCL_VAR sequences complicate things a bit. A typical |
17847 | // GT_FIELD node acts like an indirection and should produce an unknown value, |
17848 | // local address analysis doesn't know or care what value the field stores. |
17849 | // But a GT_FIELD can also be used as an operand for a GT_ADDR node and then |
17850 | // the GT_FIELD node does not perform an indirection, it's just represents a |
17851 | // location, similar to GT_LCL_VAR and GT_LCL_FLD. |
17852 | // |
17853 | // To avoid this issue, the semantics of GT_FIELD (and for simplicity's sake any other |
17854 | // indirection) nodes slightly deviates from the IR semantics - an indirection does not |
17855 | // actually produce an unknown value but a location value, if the indirection address |
17856 | // operand is an address value. |
17857 | // |
17858 | // The actual indirection is performed when the indirection's user node is processed: |
17859 | // - A GT_ADDR user turns the location value produced by the indirection back |
17860 | // into an address value. |
17861 | // - Any other user node performs the indirection and produces an unknown value. |
17862 | // |
17863 | class Value |
17864 | { |
17865 | GenTree* m_node; |
17866 | unsigned m_lclNum; |
17867 | unsigned m_offset; |
17868 | bool m_address; |
17869 | INDEBUG(bool m_consumed;) |
17870 | |
17871 | public: |
17872 | // Produce an unknown value associated with the specified node. |
17873 | Value(GenTree* node) |
17874 | : m_node(node) |
17875 | , m_lclNum(BAD_VAR_NUM) |
17876 | , m_offset(0) |
17877 | , m_address(false) |
17878 | #ifdef DEBUG |
17879 | , m_consumed(false) |
17880 | #endif // DEBUG |
17881 | { |
17882 | } |
17883 | |
17884 | // Get the node that produced this value. |
17885 | GenTree* Node() const |
17886 | { |
17887 | return m_node; |
17888 | } |
17889 | |
17890 | // Does this value represent a location? |
17891 | bool IsLocation() const |
17892 | { |
17893 | return (m_lclNum != BAD_VAR_NUM) && !m_address; |
17894 | } |
17895 | |
17896 | // Does this value represent the address of a location? |
17897 | bool IsAddress() const |
17898 | { |
17899 | assert((m_lclNum != BAD_VAR_NUM) || !m_address); |
17900 | |
17901 | return m_address; |
17902 | } |
17903 | |
17904 | // Get the location's variable number. |
17905 | unsigned LclNum() const |
17906 | { |
17907 | assert(IsLocation() || IsAddress()); |
17908 | |
17909 | return m_lclNum; |
17910 | } |
17911 | |
17912 | // Get the location's byte offset. |
17913 | unsigned Offset() const |
17914 | { |
17915 | assert(IsLocation() || IsAddress()); |
17916 | |
17917 | return m_offset; |
17918 | } |
17919 | |
17920 | //------------------------------------------------------------------------ |
17921 | // Location: Produce a location value. |
17922 | // |
17923 | // Arguments: |
17924 | // lclNum - the local variable number |
17925 | // offset - the byte offset of the location (used for GT_LCL_FLD nodes) |
17926 | // |
17927 | // Notes: |
17928 | // - (lclnum, offset) => LOCATION(lclNum, offset) |
17929 | // |
17930 | void Location(unsigned lclNum, unsigned offset = 0) |
17931 | { |
17932 | assert(!IsLocation() && !IsAddress()); |
17933 | |
17934 | m_lclNum = lclNum; |
17935 | m_offset = offset; |
17936 | } |
17937 | |
17938 | //------------------------------------------------------------------------ |
17939 | // Address: Produce an address value from a location value. |
17940 | // |
17941 | // Arguments: |
17942 | // val - the input value |
17943 | // |
17944 | // Notes: |
17945 | // - LOCATION(lclNum, offset) => ADDRESS(lclNum, offset) |
17946 | // - ADDRESS(lclNum, offset) => invalid, we should never encounter something like ADDR(ADDR(...)) |
17947 | // - UNKNOWN => UNKNOWN |
17948 | // |
17949 | void Address(Value& val) |
17950 | { |
17951 | assert(!IsLocation() && !IsAddress()); |
17952 | assert(!val.IsAddress()); |
17953 | |
17954 | if (val.IsLocation()) |
17955 | { |
17956 | m_address = true; |
17957 | m_lclNum = val.m_lclNum; |
17958 | m_offset = val.m_offset; |
17959 | } |
17960 | |
17961 | INDEBUG(val.Consume();) |
17962 | } |
17963 | |
17964 | //------------------------------------------------------------------------ |
17965 | // Field: Produce a location value from an address value. |
17966 | // |
17967 | // Arguments: |
17968 | // val - the input value |
17969 | // offset - the offset to add to the existing location offset |
17970 | // |
17971 | // Return Value: |
17972 | // `true` if the value was consumed. `false` if the input value |
17973 | // cannot be consumed because it is itsef a location or because |
17974 | // the offset overflowed. In this case the caller is expected |
17975 | // to escape the input value. |
17976 | // |
17977 | // Notes: |
17978 | // - LOCATION(lclNum, offset) => not representable, must escape |
17979 | // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset + field.Offset) |
17980 | // if the offset overflows then location is not representable, must escape |
17981 | // - UNKNOWN => UNKNOWN |
17982 | // |
17983 | bool Field(Value& val, unsigned offset) |
17984 | { |
17985 | assert(!IsLocation() && !IsAddress()); |
17986 | |
17987 | if (val.IsLocation()) |
17988 | { |
17989 | return false; |
17990 | } |
17991 | |
17992 | if (val.IsAddress()) |
17993 | { |
17994 | ClrSafeInt<unsigned> newOffset = ClrSafeInt<unsigned>(val.m_offset) + ClrSafeInt<unsigned>(offset); |
17995 | |
17996 | if (newOffset.IsOverflow()) |
17997 | { |
17998 | return false; |
17999 | } |
18000 | |
18001 | m_lclNum = val.m_lclNum; |
18002 | m_offset = newOffset.Value(); |
18003 | } |
18004 | |
18005 | INDEBUG(val.Consume();) |
18006 | return true; |
18007 | } |
18008 | |
18009 | //------------------------------------------------------------------------ |
18010 | // Indir: Produce a location value from an address value. |
18011 | // |
18012 | // Arguments: |
18013 | // val - the input value |
18014 | // |
18015 | // Return Value: |
18016 | // `true` if the value was consumed. `false` if the input value |
18017 | // cannot be consumed because it is itsef a location. In this |
18018 | // case the caller is expected to escape the input value. |
18019 | // |
18020 | // Notes: |
18021 | // - LOCATION(lclNum, offset) => not representable, must escape |
18022 | // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset) |
18023 | // - UNKNOWN => UNKNOWN |
18024 | // |
18025 | bool Indir(Value& val) |
18026 | { |
18027 | assert(!IsLocation() && !IsAddress()); |
18028 | |
18029 | if (val.IsLocation()) |
18030 | { |
18031 | return false; |
18032 | } |
18033 | |
18034 | if (val.IsAddress()) |
18035 | { |
18036 | m_lclNum = val.m_lclNum; |
18037 | m_offset = val.m_offset; |
18038 | } |
18039 | |
18040 | INDEBUG(val.Consume();) |
18041 | return true; |
18042 | } |
18043 | |
18044 | #ifdef DEBUG |
18045 | void Consume() |
18046 | { |
18047 | assert(!m_consumed); |
18048 | // Mark the value as consumed so that PopValue can ensure that values |
18049 | // aren't popped from the stack without being processed appropriately. |
18050 | m_consumed = true; |
18051 | } |
18052 | |
18053 | bool IsConsumed() |
18054 | { |
18055 | return m_consumed; |
18056 | } |
18057 | #endif // DEBUG |
18058 | }; |
18059 | |
18060 | ArrayStack<Value> m_valueStack; |
18061 | INDEBUG(bool m_stmtModified;) |
18062 | |
18063 | public: |
18064 | enum |
18065 | { |
18066 | DoPreOrder = true, |
18067 | DoPostOrder = true, |
18068 | ComputeStack = true, |
18069 | DoLclVarsOnly = false, |
18070 | UseExecutionOrder = false, |
18071 | }; |
18072 | |
18073 | LocalAddressVisitor(Compiler* comp) |
18074 | : GenTreeVisitor<LocalAddressVisitor>(comp), m_valueStack(comp->getAllocator(CMK_LocalAddressVisitor)) |
18075 | { |
18076 | } |
18077 | |
18078 | void VisitStmt(GenTreeStmt* stmt) |
18079 | { |
18080 | #ifdef DEBUG |
18081 | if (m_compiler->verbose) |
18082 | { |
18083 | printf("LocalAddressVisitor visiting statement:\n" ); |
18084 | m_compiler->gtDispTree(stmt); |
18085 | m_stmtModified = false; |
18086 | } |
18087 | #endif // DEBUG |
18088 | |
18089 | WalkTree(&stmt->gtStmtExpr, nullptr); |
18090 | |
18091 | // We could have somethinge like STMT(IND(ADDR(LCL_VAR))) so we need to escape |
18092 | // the location here. This doesn't seem to happen often, if ever. The importer |
18093 | // tends to wrap such a tree in a COMMA. |
18094 | if (TopValue(0).IsLocation()) |
18095 | { |
18096 | EscapeLocation(TopValue(0), stmt); |
18097 | } |
18098 | else |
18099 | { |
18100 | // If we have an address on the stack then we don't need to do anything. |
18101 | // The address tree isn't actually used and it will be discarded during |
18102 | // morphing. So just mark any value as consumed to keep PopValue happy. |
18103 | INDEBUG(TopValue(0).Consume();) |
18104 | } |
18105 | |
18106 | PopValue(); |
18107 | assert(m_valueStack.Empty()); |
18108 | |
18109 | #ifdef DEBUG |
18110 | if (m_compiler->verbose) |
18111 | { |
18112 | if (m_stmtModified) |
18113 | { |
18114 | printf("LocalAddressVisitor modified statement:\n" ); |
18115 | m_compiler->gtDispTree(stmt); |
18116 | } |
18117 | |
18118 | printf("\n" ); |
18119 | } |
18120 | #endif // DEBUG |
18121 | } |
18122 | |
18123 | // Morph promoted struct fields and count implict byref argument occurrences. |
18124 | // Also create and push the value produced by the visited node. This is done here |
18125 | // rather than in PostOrderVisit because it makes it easy to handle nodes with an |
18126 | // arbitrary number of operands - just pop values until the value corresponding |
18127 | // to the visited node is encountered. |
18128 | fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) |
18129 | { |
18130 | GenTree* node = *use; |
18131 | |
18132 | if (node->OperIs(GT_FIELD)) |
18133 | { |
18134 | MorphStructField(node, user); |
18135 | } |
18136 | else if (node->OperIs(GT_LCL_FLD)) |
18137 | { |
18138 | MorphLocalField(node, user); |
18139 | } |
18140 | |
18141 | if (node->OperIsLocal()) |
18142 | { |
18143 | unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); |
18144 | |
18145 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); |
18146 | if (varDsc->lvIsStructField) |
18147 | { |
18148 | // Promoted field, increase counter for the parent lclVar. |
18149 | assert(!m_compiler->lvaIsImplicitByRefLocal(lclNum)); |
18150 | unsigned parentLclNum = varDsc->lvParentLcl; |
18151 | UpdateEarlyRefCountForImplicitByRef(parentLclNum); |
18152 | } |
18153 | else |
18154 | { |
18155 | UpdateEarlyRefCountForImplicitByRef(lclNum); |
18156 | } |
18157 | } |
18158 | |
18159 | PushValue(node); |
18160 | |
18161 | return Compiler::WALK_CONTINUE; |
18162 | } |
18163 | |
18164 | // Evaluate a node. Since this is done in postorder, the node's operands have already been |
18165 | // evaluated and are available on the value stack. The value produced by the visited node |
18166 | // is left on the top of the evaluation stack. |
18167 | fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) |
18168 | { |
18169 | GenTree* node = *use; |
18170 | |
18171 | switch (node->OperGet()) |
18172 | { |
18173 | case GT_LCL_VAR: |
18174 | assert(TopValue(0).Node() == node); |
18175 | |
18176 | TopValue(0).Location(node->AsLclVar()->GetLclNum()); |
18177 | break; |
18178 | |
18179 | case GT_LCL_FLD: |
18180 | assert(TopValue(0).Node() == node); |
18181 | |
18182 | TopValue(0).Location(node->AsLclFld()->GetLclNum(), node->AsLclFld()->gtLclOffs); |
18183 | break; |
18184 | |
18185 | case GT_ADDR: |
18186 | assert(TopValue(1).Node() == node); |
18187 | assert(TopValue(0).Node() == node->gtGetOp1()); |
18188 | |
18189 | TopValue(1).Address(TopValue(0)); |
18190 | PopValue(); |
18191 | break; |
18192 | |
18193 | case GT_FIELD: |
18194 | if (node->AsField()->gtFldObj != nullptr) |
18195 | { |
18196 | assert(TopValue(1).Node() == node); |
18197 | assert(TopValue(0).Node() == node->AsField()->gtFldObj); |
18198 | |
18199 | if (!TopValue(1).Field(TopValue(0), node->AsField()->gtFldOffset)) |
18200 | { |
18201 | // Either the address comes from a location value (e.g. FIELD(IND(...))) |
18202 | // or the field offset has overflowed. |
18203 | EscapeValue(TopValue(0), node); |
18204 | } |
18205 | |
18206 | PopValue(); |
18207 | } |
18208 | else |
18209 | { |
18210 | assert(TopValue(0).Node() == node); |
18211 | } |
18212 | break; |
18213 | |
18214 | case GT_OBJ: |
18215 | case GT_BLK: |
18216 | case GT_IND: |
18217 | assert(TopValue(1).Node() == node); |
18218 | assert(TopValue(0).Node() == node->gtGetOp1()); |
18219 | |
18220 | if ((node->gtFlags & GTF_IND_VOLATILE) != 0) |
18221 | { |
18222 | // Volatile indirections must not be removed so the address, |
18223 | // if any, must be escaped. |
18224 | EscapeValue(TopValue(0), node); |
18225 | } |
18226 | else if (!TopValue(1).Indir(TopValue(0))) |
18227 | { |
18228 | // If the address comes from another indirection (e.g. IND(IND(...)) |
18229 | // then we need to escape the location. |
18230 | EscapeLocation(TopValue(0), node); |
18231 | } |
18232 | |
18233 | PopValue(); |
18234 | break; |
18235 | |
18236 | case GT_DYN_BLK: |
18237 | assert(TopValue(2).Node() == node); |
18238 | assert(TopValue(1).Node() == node->AsDynBlk()->Addr()); |
18239 | assert(TopValue(0).Node() == node->AsDynBlk()->gtDynamicSize); |
18240 | |
18241 | // The block size may be the result of an indirection so we need |
18242 | // to escape the location that may be associated with it. |
18243 | EscapeValue(TopValue(0), node); |
18244 | |
18245 | if (!TopValue(2).Indir(TopValue(1))) |
18246 | { |
18247 | // If the address comes from another indirection (e.g. DYN_BLK(IND(...)) |
18248 | // then we need to escape the location. |
18249 | EscapeLocation(TopValue(1), node); |
18250 | } |
18251 | |
18252 | PopValue(); |
18253 | PopValue(); |
18254 | break; |
18255 | |
18256 | default: |
18257 | while (TopValue(0).Node() != node) |
18258 | { |
18259 | EscapeValue(TopValue(0), node); |
18260 | PopValue(); |
18261 | } |
18262 | break; |
18263 | } |
18264 | |
18265 | assert(TopValue(0).Node() == node); |
18266 | return Compiler::WALK_CONTINUE; |
18267 | } |
18268 | |
18269 | private: |
18270 | void PushValue(GenTree* node) |
18271 | { |
18272 | m_valueStack.Push(node); |
18273 | } |
18274 | |
18275 | Value& TopValue(unsigned index) |
18276 | { |
18277 | return m_valueStack.IndexRef(index); |
18278 | } |
18279 | |
18280 | void PopValue() |
18281 | { |
18282 | assert(TopValue(0).IsConsumed()); |
18283 | m_valueStack.Pop(); |
18284 | } |
18285 | |
18286 | //------------------------------------------------------------------------ |
18287 | // EscapeValue: Process an escaped value |
18288 | // |
18289 | // Arguments: |
18290 | // val - the escaped address value |
18291 | // user - the node that uses the escaped value |
18292 | // |
18293 | void EscapeValue(Value& val, GenTree* user) |
18294 | { |
18295 | if (val.IsLocation()) |
18296 | { |
18297 | EscapeLocation(val, user); |
18298 | } |
18299 | else if (val.IsAddress()) |
18300 | { |
18301 | EscapeAddress(val, user); |
18302 | } |
18303 | else |
18304 | { |
18305 | INDEBUG(val.Consume();) |
18306 | } |
18307 | } |
18308 | |
18309 | //------------------------------------------------------------------------ |
18310 | // EscapeAddress: Process an escaped address value |
18311 | // |
18312 | // Arguments: |
18313 | // val - the escaped address value |
18314 | // user - the node that uses the address value |
18315 | // |
18316 | void EscapeAddress(Value& val, GenTree* user) |
18317 | { |
18318 | assert(val.IsAddress()); |
18319 | |
18320 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum()); |
18321 | |
18322 | // In general we don't know how an exposed struct field address will be used - it may be used to |
18323 | // access only that specific field or it may be used to access other fields in the same struct |
18324 | // be using pointer/ref arithmetic. It seems reasonable to make an exception for the "this" arg |
18325 | // of calls - it would be highly unsual for a struct member method to attempt to access memory |
18326 | // beyond "this" instance. And calling struct member methods is common enough that attempting to |
18327 | // mark the entire struct as address exposed results in CQ regressions. |
18328 | bool isThisArg = user->IsCall() && (val.Node() == user->AsCall()->gtCallObjp); |
18329 | bool exposeParentLcl = varDsc->lvIsStructField && !isThisArg; |
18330 | |
18331 | m_compiler->lvaSetVarAddrExposed(exposeParentLcl ? varDsc->lvParentLcl : val.LclNum()); |
18332 | |
18333 | #ifdef _TARGET_64BIT_ |
18334 | // If the address of a variable is passed in a call and the allocation size of the variable |
18335 | // is 32 bits we will quirk the size to 64 bits. Some PInvoke signatures incorrectly specify |
18336 | // a ByRef to an INT32 when they actually write a SIZE_T or INT64. There are cases where |
18337 | // overwriting these extra 4 bytes corrupts some data (such as a saved register) that leads |
18338 | // to A/V. Wheras previously the JIT64 codegen did not lead to an A/V. |
18339 | if (!varDsc->lvIsParam && !varDsc->lvIsStructField && (genActualType(varDsc->TypeGet()) == TYP_INT)) |
18340 | { |
18341 | // TODO-Cleanup: This should simply check if the user is a call node, not if a call ancestor exists. |
18342 | if (Compiler::gtHasCallOnStack(&m_ancestors)) |
18343 | { |
18344 | varDsc->lvQuirkToLong = true; |
18345 | JITDUMP("Adding a quirk for the storage size of V%02u of type %s" , val.LclNum(), |
18346 | varTypeName(varDsc->TypeGet())); |
18347 | } |
18348 | } |
18349 | #endif // _TARGET_64BIT_ |
18350 | |
18351 | INDEBUG(val.Consume();) |
18352 | } |
18353 | |
18354 | //------------------------------------------------------------------------ |
18355 | // EscapeLocation: Process an escaped location value |
18356 | // |
18357 | // Arguments: |
18358 | // val - the escaped location value |
18359 | // user - the node that uses the location value |
18360 | // |
18361 | // Notes: |
18362 | // Unlike EscapeAddress, this does not necessarily mark the lclvar associated |
18363 | // with the value as address exposed. This is needed only if the indirection |
18364 | // is wider than the lclvar. |
18365 | // |
18366 | void EscapeLocation(Value& val, GenTree* user) |
18367 | { |
18368 | assert(val.IsLocation()); |
18369 | |
18370 | GenTree* node = val.Node(); |
18371 | |
18372 | if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD)) |
18373 | { |
18374 | // If the location is accessed directly then we don't need to do anything. |
18375 | |
18376 | assert(node->AsLclVarCommon()->GetLclNum() == val.LclNum()); |
18377 | } |
18378 | else |
18379 | { |
18380 | // Otherwise it must be accessed through some kind of indirection. Usually this is |
18381 | // something like IND(ADDR(LCL_VAR)), global morph will change it to GT_LCL_VAR or |
18382 | // GT_LCL_FLD so the lclvar does not need to be address exposed. |
18383 | // |
18384 | // However, it is possible for the indirection to be wider than the lclvar |
18385 | // (e.g. *(long*)&int32Var) or to have a field offset that pushes the indirection |
18386 | // past the end of the lclvar memory location. In such cases morph doesn't do |
18387 | // anything so the lclvar needs to be address exposed. |
18388 | // |
18389 | // More importantly, if the lclvar is a promoted struct field then the parent lclvar |
18390 | // also needs to be address exposed so we get dependent struct promotion. Code like |
18391 | // *(long*)&int32Var has undefined behavior and it's practically useless but reading, |
18392 | // say, 2 consecutive Int32 struct fields as Int64 has more practical value. |
18393 | |
18394 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum()); |
18395 | unsigned indirSize = GetIndirSize(node, user); |
18396 | bool isWide; |
18397 | |
18398 | if (indirSize == 0) |
18399 | { |
18400 | // If we can't figure out the indirection size then treat it as a wide indirection. |
18401 | isWide = true; |
18402 | } |
18403 | else |
18404 | { |
18405 | ClrSafeInt<unsigned> endOffset = ClrSafeInt<unsigned>(val.Offset()) + ClrSafeInt<unsigned>(indirSize); |
18406 | |
18407 | if (endOffset.IsOverflow()) |
18408 | { |
18409 | isWide = true; |
18410 | } |
18411 | else if (varDsc->TypeGet() == TYP_STRUCT) |
18412 | { |
18413 | isWide = (endOffset.Value() > varDsc->lvExactSize); |
18414 | } |
18415 | else |
18416 | { |
18417 | // For small int types use the real type size, not the stack slot size. |
18418 | // Morph does manage to transform `*(int*)&byteVar` into just byteVar where |
18419 | // the LCL_VAR node has type TYP_INT. But such code is simply bogus and |
18420 | // there's no reason to attempt to optimize it. It makes more sense to |
18421 | // mark the variable address exposed in such circumstances. |
18422 | // |
18423 | // Same for "small" SIMD types - SIMD8/12 have 8/12 bytes, even if the |
18424 | // stack location may have 16 bytes. |
18425 | // |
18426 | // For TYP_BLK variables the type size is 0 so they're always address |
18427 | // exposed. |
18428 | isWide = (endOffset.Value() > genTypeSize(varDsc->TypeGet())); |
18429 | } |
18430 | } |
18431 | |
18432 | if (isWide) |
18433 | { |
18434 | m_compiler->lvaSetVarAddrExposed(varDsc->lvIsStructField ? varDsc->lvParentLcl : val.LclNum()); |
18435 | } |
18436 | } |
18437 | |
18438 | INDEBUG(val.Consume();) |
18439 | } |
18440 | |
18441 | //------------------------------------------------------------------------ |
18442 | // GetIndirSize: Return the size (in bytes) of an indirection node. |
18443 | // |
18444 | // Arguments: |
18445 | // indir - the indirection node |
18446 | // user - the node that uses the indirection |
18447 | // |
18448 | // Notes: |
18449 | // This returns 0 for indirection of unknown size, typically GT_DYN_BLK. |
18450 | // GT_IND nodes that have type TYP_STRUCT are expected to only appears |
18451 | // on the RHS of an assignment, in which case the LHS size will be used instead. |
18452 | // Otherwise 0 is returned as well. |
18453 | // |
18454 | unsigned GetIndirSize(GenTree* indir, GenTree* user) |
18455 | { |
18456 | assert(indir->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_DYN_BLK, GT_FIELD)); |
18457 | |
18458 | if (indir->TypeGet() != TYP_STRUCT) |
18459 | { |
18460 | return genTypeSize(indir->TypeGet()); |
18461 | } |
18462 | |
18463 | // A struct indir that is the RHS of an assignment needs special casing: |
18464 | // - It can be a GT_IND of type TYP_STRUCT, in which case the size is given by the LHS. |
18465 | // - It can be a GT_OBJ that has a correct size, but different than the size of the LHS. |
18466 | // The LHS size takes precedence. |
18467 | // Just take the LHS size in all cases. |
18468 | if (user->OperIs(GT_ASG) && (indir == user->gtGetOp2())) |
18469 | { |
18470 | indir = user->gtGetOp1(); |
18471 | |
18472 | if (indir->TypeGet() != TYP_STRUCT) |
18473 | { |
18474 | return genTypeSize(indir->TypeGet()); |
18475 | } |
18476 | |
18477 | // The LHS may be a LCL_VAR/LCL_FLD, these are not indirections so we need to handle them here. |
18478 | // It can also be a GT_INDEX, this is an indirection but it never applies to lclvar addresses |
18479 | // so it needs to be handled here as well. |
18480 | |
18481 | switch (indir->OperGet()) |
18482 | { |
18483 | case GT_LCL_VAR: |
18484 | return m_compiler->lvaGetDesc(indir->AsLclVar())->lvExactSize; |
18485 | case GT_LCL_FLD: |
18486 | return genTypeSize(indir->TypeGet()); |
18487 | case GT_INDEX: |
18488 | return indir->AsIndex()->gtIndElemSize; |
18489 | default: |
18490 | break; |
18491 | } |
18492 | } |
18493 | |
18494 | switch (indir->OperGet()) |
18495 | { |
18496 | case GT_FIELD: |
18497 | return m_compiler->info.compCompHnd->getClassSize( |
18498 | m_compiler->info.compCompHnd->getFieldClass(indir->AsField()->gtFldHnd)); |
18499 | case GT_BLK: |
18500 | case GT_OBJ: |
18501 | return indir->AsBlk()->gtBlkSize; |
18502 | default: |
18503 | assert(indir->OperIs(GT_IND, GT_DYN_BLK)); |
18504 | return 0; |
18505 | } |
18506 | } |
18507 | |
18508 | //------------------------------------------------------------------------ |
18509 | // MorphStructField: Replaces a GT_FIELD based promoted/normed struct field access |
18510 | // (e.g. FIELD(ADDR(LCL_VAR))) with a GT_LCL_VAR that references the struct field. |
18511 | // |
18512 | // Arguments: |
18513 | // node - the GT_FIELD node |
18514 | // user - the node that uses the field |
18515 | // |
18516 | // Notes: |
18517 | // This does not do anything if the field access does not denote |
18518 | // a promoted/normed struct field. |
18519 | // |
18520 | void MorphStructField(GenTree* node, GenTree* user) |
18521 | { |
18522 | assert(node->OperIs(GT_FIELD)); |
18523 | // TODO-Cleanup: Move fgMorphStructField implementation here, it's not used anywhere else. |
18524 | m_compiler->fgMorphStructField(node, user); |
18525 | INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);) |
18526 | } |
18527 | |
18528 | //------------------------------------------------------------------------ |
18529 | // MorphLocalField: Replaces a GT_LCL_FLD based promoted struct field access |
18530 | // with a GT_LCL_VAR that references the struct field. |
18531 | // |
18532 | // Arguments: |
18533 | // node - the GT_LCL_FLD node |
18534 | // user - the node that uses the field |
18535 | // |
18536 | // Notes: |
18537 | // This does not do anything if the field access does not denote |
18538 | // involved a promoted struct local. |
18539 | // If the GT_LCL_FLD offset does not have a coresponding promoted struct |
18540 | // field then no transformation is done and struct local's enregistration |
18541 | // is disabled. |
18542 | // |
18543 | void MorphLocalField(GenTree* node, GenTree* user) |
18544 | { |
18545 | assert(node->OperIs(GT_LCL_FLD)); |
18546 | // TODO-Cleanup: Move fgMorphLocalField implementation here, it's not used anywhere else. |
18547 | m_compiler->fgMorphLocalField(node, user); |
18548 | INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);) |
18549 | } |
18550 | |
18551 | //------------------------------------------------------------------------ |
18552 | // UpdateEarlyRefCountForImplicitByRef: updates the ref count for implicit byref params. |
18553 | // |
18554 | // Arguments: |
18555 | // lclNum - the local number to update the count for. |
18556 | // |
18557 | // Notes: |
18558 | // fgMakeOutgoingStructArgCopy checks the ref counts for implicit byref params when it decides |
18559 | // if it's legal to elide certain copies of them; |
18560 | // fgRetypeImplicitByRefArgs checks the ref counts when it decides to undo promotions. |
18561 | // |
18562 | void UpdateEarlyRefCountForImplicitByRef(unsigned lclNum) |
18563 | { |
18564 | if (!m_compiler->lvaIsImplicitByRefLocal(lclNum)) |
18565 | { |
18566 | return; |
18567 | } |
18568 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); |
18569 | JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for V%02d\n" , varDsc->lvRefCnt(RCS_EARLY), |
18570 | varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); |
18571 | varDsc->incLvRefCnt(1, RCS_EARLY); |
18572 | } |
18573 | }; |
18574 | |
18575 | void Compiler::fgAddFieldSeqForZeroOffset(GenTree* op1, FieldSeqNode* fieldSeq) |
18576 | { |
18577 | assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF); |
18578 | |
18579 | switch (op1->OperGet()) |
18580 | { |
18581 | case GT_ADDR: |
18582 | if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD) |
18583 | { |
18584 | GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld(); |
18585 | lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq); |
18586 | } |
18587 | break; |
18588 | |
18589 | case GT_ADD: |
18590 | if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT) |
18591 | { |
18592 | FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq; |
18593 | if (op1Fs != nullptr) |
18594 | { |
18595 | op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq); |
18596 | op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs; |
18597 | } |
18598 | } |
18599 | else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT) |
18600 | { |
18601 | FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq; |
18602 | if (op2Fs != nullptr) |
18603 | { |
18604 | op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq); |
18605 | op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs; |
18606 | } |
18607 | } |
18608 | break; |
18609 | |
18610 | case GT_CNS_INT: |
18611 | { |
18612 | FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq; |
18613 | if (op1Fs != nullptr) |
18614 | { |
18615 | op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq); |
18616 | op1->gtIntCon.gtFieldSeq = op1Fs; |
18617 | } |
18618 | } |
18619 | break; |
18620 | |
18621 | default: |
18622 | // Record in the general zero-offset map. |
18623 | GetZeroOffsetFieldMap()->Set(op1, fieldSeq); |
18624 | break; |
18625 | } |
18626 | } |
18627 | |
18628 | //------------------------------------------------------------------------ |
18629 | // fgMarkAddressExposedLocals: Traverses the entire method and marks address |
18630 | // exposed locals. |
18631 | // |
18632 | // Notes: |
18633 | // Trees such as IND(ADDR(LCL_VAR)), that morph is expected to fold |
18634 | // to just LCL_VAR, do not result in the involved local being marked |
18635 | // address exposed. |
18636 | // |
18637 | void Compiler::fgMarkAddressExposedLocals() |
18638 | { |
18639 | #ifdef DEBUG |
18640 | if (verbose) |
18641 | { |
18642 | printf("\n*************** In fgMarkAddressExposedLocals()\n" ); |
18643 | } |
18644 | #endif // DEBUG |
18645 | |
18646 | LocalAddressVisitor visitor(this); |
18647 | |
18648 | for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) |
18649 | { |
18650 | // Make the current basic block address available globally |
18651 | compCurBB = block; |
18652 | |
18653 | for (GenTree* stmt = block->bbTreeList; stmt != nullptr; stmt = stmt->gtNext) |
18654 | { |
18655 | visitor.VisitStmt(stmt->AsStmt()); |
18656 | } |
18657 | } |
18658 | } |
18659 | |
18660 | #ifdef FEATURE_SIMD |
18661 | |
18662 | //----------------------------------------------------------------------------------- |
18663 | // fgMorphCombineSIMDFieldAssignments: |
18664 | // If the RHS of the input stmt is a read for simd vector X Field, then this function |
18665 | // will keep reading next few stmts based on the vector size(2, 3, 4). |
18666 | // If the next stmts LHS are located contiguous and RHS are also located |
18667 | // contiguous, then we replace those statements with a copyblk. |
18668 | // |
18669 | // Argument: |
18670 | // block - BasicBlock*. block which stmt belongs to |
18671 | // stmt - GenTreeStmt*. the stmt node we want to check |
18672 | // |
18673 | // return value: |
18674 | // if this funciton successfully optimized the stmts, then return true. Otherwise |
18675 | // return false; |
18676 | |
18677 | bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* stmt) |
18678 | { |
18679 | |
18680 | noway_assert(stmt->gtOper == GT_STMT); |
18681 | GenTree* tree = stmt->gtStmt.gtStmtExpr; |
18682 | assert(tree->OperGet() == GT_ASG); |
18683 | |
18684 | GenTree* originalLHS = tree->gtOp.gtOp1; |
18685 | GenTree* prevLHS = tree->gtOp.gtOp1; |
18686 | GenTree* prevRHS = tree->gtOp.gtOp2; |
18687 | unsigned index = 0; |
18688 | var_types baseType = TYP_UNKNOWN; |
18689 | unsigned simdSize = 0; |
18690 | GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true); |
18691 | |
18692 | if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT) |
18693 | { |
18694 | // if the RHS is not from a SIMD vector field X, then there is no need to check further. |
18695 | return false; |
18696 | } |
18697 | |
18698 | var_types simdType = getSIMDTypeForSize(simdSize); |
18699 | int assignmentsCount = simdSize / genTypeSize(baseType) - 1; |
18700 | int remainingAssignments = assignmentsCount; |
18701 | GenTree* curStmt = stmt->gtNext; |
18702 | GenTree* lastStmt = stmt; |
18703 | |
18704 | while (curStmt != nullptr && remainingAssignments > 0) |
18705 | { |
18706 | GenTree* exp = curStmt->gtStmt.gtStmtExpr; |
18707 | if (exp->OperGet() != GT_ASG) |
18708 | { |
18709 | break; |
18710 | } |
18711 | GenTree* curLHS = exp->gtGetOp1(); |
18712 | GenTree* curRHS = exp->gtGetOp2(); |
18713 | |
18714 | if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS)) |
18715 | { |
18716 | break; |
18717 | } |
18718 | |
18719 | remainingAssignments--; |
18720 | prevLHS = curLHS; |
18721 | prevRHS = curRHS; |
18722 | |
18723 | lastStmt = curStmt; |
18724 | curStmt = curStmt->gtNext; |
18725 | } |
18726 | |
18727 | if (remainingAssignments > 0) |
18728 | { |
18729 | // if the left assignments number is bigger than zero, then this means |
18730 | // that the assignments are not assgining to the contiguously memory |
18731 | // locations from same vector. |
18732 | return false; |
18733 | } |
18734 | #ifdef DEBUG |
18735 | if (verbose) |
18736 | { |
18737 | printf("\nFound contiguous assignments from a SIMD vector to memory.\n" ); |
18738 | printf("From " FMT_BB ", stmt" , block->bbNum); |
18739 | printTreeID(stmt); |
18740 | printf(" to stmt" ); |
18741 | printTreeID(lastStmt); |
18742 | printf("\n" ); |
18743 | } |
18744 | #endif |
18745 | |
18746 | for (int i = 0; i < assignmentsCount; i++) |
18747 | { |
18748 | fgRemoveStmt(block, stmt->gtNext); |
18749 | } |
18750 | |
18751 | GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); |
18752 | if (simdStructNode->OperIsLocal()) |
18753 | { |
18754 | setLclRelatedToSIMDIntrinsic(simdStructNode); |
18755 | } |
18756 | GenTree* copyBlkAddr = copyBlkDst; |
18757 | if (copyBlkAddr->gtOper == GT_LEA) |
18758 | { |
18759 | copyBlkAddr = copyBlkAddr->AsAddrMode()->Base(); |
18760 | } |
18761 | GenTreeLclVarCommon* localDst = nullptr; |
18762 | if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr)) |
18763 | { |
18764 | setLclRelatedToSIMDIntrinsic(localDst); |
18765 | } |
18766 | |
18767 | if (simdStructNode->TypeGet() == TYP_BYREF) |
18768 | { |
18769 | assert(simdStructNode->OperIsLocal()); |
18770 | assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum)); |
18771 | simdStructNode = gtNewIndir(simdType, simdStructNode); |
18772 | } |
18773 | else |
18774 | { |
18775 | assert(varTypeIsSIMD(simdStructNode)); |
18776 | } |
18777 | |
18778 | #ifdef DEBUG |
18779 | if (verbose) |
18780 | { |
18781 | printf("\n" FMT_BB " stmt" , block->bbNum); |
18782 | printTreeID(stmt); |
18783 | printf("(before)\n" ); |
18784 | gtDispTree(stmt); |
18785 | } |
18786 | #endif |
18787 | |
18788 | // TODO-1stClassStructs: we should be able to simply use a GT_IND here. |
18789 | GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize); |
18790 | blkNode->gtType = simdType; |
18791 | tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize, |
18792 | false, // not volatile |
18793 | true); // copyBlock |
18794 | |
18795 | stmt->gtStmt.gtStmtExpr = tree; |
18796 | |
18797 | // Since we generated a new address node which didn't exist before, |
18798 | // we should expose this address manually here. |
18799 | LocalAddressVisitor visitor(this); |
18800 | visitor.VisitStmt(stmt->AsStmt()); |
18801 | |
18802 | #ifdef DEBUG |
18803 | if (verbose) |
18804 | { |
18805 | printf("\nReplaced " FMT_BB " stmt" , block->bbNum); |
18806 | printTreeID(stmt); |
18807 | printf("(after)\n" ); |
18808 | gtDispTree(stmt); |
18809 | } |
18810 | #endif |
18811 | return true; |
18812 | } |
18813 | |
18814 | #endif // FEATURE_SIMD |
18815 | |
18816 | #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) |
18817 | GenTreeStmt* SkipNopStmts(GenTreeStmt* stmt) |
18818 | { |
18819 | while ((stmt != nullptr) && !stmt->IsNothingNode()) |
18820 | { |
18821 | stmt = stmt->gtNextStmt; |
18822 | } |
18823 | return stmt; |
18824 | } |
18825 | |
18826 | #endif // !FEATURE_CORECLR && _TARGET_AMD64_ |
18827 | |
18828 | //------------------------------------------------------------------------ |
18829 | // fgCheckStmtAfterTailCall: check that statements after the tail call stmt |
18830 | // candidate are in one of expected forms, that are desctibed below. |
18831 | // |
18832 | // Return Value: |
18833 | // 'true' if stmts are in the expected form, else 'false'. |
18834 | // |
18835 | bool Compiler::fgCheckStmtAfterTailCall() |
18836 | { |
18837 | |
18838 | // For void calls, we would have created a GT_CALL in the stmt list. |
18839 | // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)). |
18840 | // For calls returning structs, we would have a void call, followed by a void return. |
18841 | // For debuggable code, it would be an assignment of the call to a temp |
18842 | // We want to get rid of any of this extra trees, and just leave |
18843 | // the call. |
18844 | GenTreeStmt* callStmt = fgMorphStmt; |
18845 | |
18846 | GenTreeStmt* nextMorphStmt = callStmt->gtNextStmt; |
18847 | |
18848 | #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) |
18849 | // Legacy Jit64 Compat: |
18850 | // There could be any number of GT_NOPs between tail call and GT_RETURN. |
18851 | // That is tail call pattern could be one of the following: |
18852 | // 1) tail.call, nop*, ret |
18853 | // 2) tail.call, nop*, pop, nop*, ret |
18854 | // 3) var=tail.call, nop*, ret(var) |
18855 | // 4) var=tail.call, nop*, pop, ret |
18856 | // 5) comma(tail.call, nop), nop*, ret |
18857 | // |
18858 | // See impIsTailCallILPattern() for details on tail call IL patterns |
18859 | // that are supported. |
18860 | GenTree* callExpr = callStmt->gtStmtExpr; |
18861 | |
18862 | if (callExpr->gtOper != GT_RETURN) |
18863 | { |
18864 | // First skip all GT_NOPs after the call |
18865 | nextMorphStmt = SkipNopStmts(nextMorphStmt); |
18866 | |
18867 | // Check to see if there is a pop. |
18868 | // Since tail call is honored, we can get rid of the stmt corresponding to pop. |
18869 | if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN) |
18870 | { |
18871 | // Note that pop opcode may or may not result in a new stmt (for details see |
18872 | // impImportBlockCode()). Hence, it is not possible to assert about the IR |
18873 | // form generated by pop but pop tree must be side-effect free so that we can |
18874 | // delete it safely. |
18875 | GenTreeStmt* popStmt = nextMorphStmt; |
18876 | |
18877 | // Side effect flags on a GT_COMMA may be overly pessimistic, so examine |
18878 | // the constituent nodes. |
18879 | GenTree* popExpr = popStmt->gtStmtExpr; |
18880 | bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0; |
18881 | if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA)) |
18882 | { |
18883 | isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) && |
18884 | ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0); |
18885 | } |
18886 | noway_assert(isSideEffectFree); |
18887 | |
18888 | nextMorphStmt = popStmt->gtNextStmt; |
18889 | } |
18890 | |
18891 | // Next skip any GT_NOP nodes after the pop |
18892 | nextMorphStmt = SkipNopStmts(nextMorphStmt); |
18893 | } |
18894 | #endif // !FEATURE_CORECLR && _TARGET_AMD64_ |
18895 | |
18896 | // Check that the rest stmts in the block are in one of the following pattern: |
18897 | // 1) ret(void) |
18898 | // 2) ret(cast*(callResultLclVar)) |
18899 | // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block |
18900 | if (nextMorphStmt != nullptr) |
18901 | { |
18902 | GenTree* callExpr = callStmt->gtStmtExpr; |
18903 | if (callExpr->gtOper != GT_ASG) |
18904 | { |
18905 | // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar), |
18906 | // where lclVar was return buffer in the call for structs or simd. |
18907 | GenTreeStmt* retStmt = nextMorphStmt; |
18908 | GenTree* retExpr = retStmt->gtStmtExpr; |
18909 | noway_assert(retExpr->gtOper == GT_RETURN); |
18910 | |
18911 | nextMorphStmt = retStmt->gtNextStmt; |
18912 | } |
18913 | else |
18914 | { |
18915 | noway_assert(callExpr->gtGetOp1()->OperIsLocal()); |
18916 | unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum; |
18917 | |
18918 | #if FEATURE_TAILCALL_OPT_SHARED_RETURN |
18919 | |
18920 | // We can have a move from the call result to an lvaInlineeReturnSpillTemp. |
18921 | // However, we can't check that this assignment was created there. |
18922 | if (nextMorphStmt->gtStmtExpr->gtOper == GT_ASG) |
18923 | { |
18924 | GenTreeStmt* moveStmt = nextMorphStmt; |
18925 | GenTree* moveExpr = nextMorphStmt->gtStmtExpr; |
18926 | noway_assert(moveExpr->gtGetOp1()->OperIsLocal() && moveExpr->gtGetOp2()->OperIsLocal()); |
18927 | |
18928 | unsigned srcLclNum = moveExpr->gtGetOp2()->AsLclVarCommon()->gtLclNum; |
18929 | noway_assert(srcLclNum == callResultLclNumber); |
18930 | unsigned dstLclNum = moveExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum; |
18931 | callResultLclNumber = dstLclNum; |
18932 | |
18933 | nextMorphStmt = moveStmt->gtNextStmt; |
18934 | } |
18935 | if (nextMorphStmt != nullptr) |
18936 | #endif |
18937 | { |
18938 | GenTreeStmt* retStmt = nextMorphStmt; |
18939 | GenTree* retExpr = nextMorphStmt->gtStmtExpr; |
18940 | noway_assert(retExpr->gtOper == GT_RETURN); |
18941 | |
18942 | GenTree* treeWithLcl = retExpr->gtGetOp1(); |
18943 | while (treeWithLcl->gtOper == GT_CAST) |
18944 | { |
18945 | noway_assert(!treeWithLcl->gtOverflow()); |
18946 | treeWithLcl = treeWithLcl->gtGetOp1(); |
18947 | } |
18948 | |
18949 | noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->gtLclNum); |
18950 | |
18951 | nextMorphStmt = retStmt->gtNextStmt; |
18952 | } |
18953 | } |
18954 | } |
18955 | return nextMorphStmt == nullptr; |
18956 | } |
18957 | |
18958 | static const int numberOfTrackedFlags = 5; |
18959 | static const unsigned trackedFlags[numberOfTrackedFlags] = {GTF_ASG, GTF_CALL, GTF_EXCEPT, GTF_GLOB_REF, |
18960 | GTF_ORDER_SIDEEFF}; |
18961 | |
18962 | //------------------------------------------------------------------------ |
18963 | // fgMorphArgList: morph argument list tree without recursion. |
18964 | // |
18965 | // Arguments: |
18966 | // args - argument list tree to morph; |
18967 | // mac - morph address context, used to morph children. |
18968 | // |
18969 | // Return Value: |
18970 | // morphed argument list. |
18971 | // |
18972 | GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac) |
18973 | { |
18974 | // Use a non-recursive algorithm that morphs all actual list values, |
18975 | // memorizes the last node for each effect flag and resets |
18976 | // them during the second iteration. |
18977 | assert((trackedFlags[0] | trackedFlags[1] | trackedFlags[2] | trackedFlags[3] | trackedFlags[4]) == GTF_ALL_EFFECT); |
18978 | |
18979 | GenTree* memorizedLastNodes[numberOfTrackedFlags] = {nullptr}; |
18980 | |
18981 | for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) |
18982 | { |
18983 | // Morph actual list values. |
18984 | GenTree*& arg = listNode->Current(); |
18985 | arg = fgMorphTree(arg, mac); |
18986 | |
18987 | // Remember the last list node with each flag. |
18988 | for (int i = 0; i < numberOfTrackedFlags; ++i) |
18989 | { |
18990 | if ((arg->gtFlags & trackedFlags[i]) != 0) |
18991 | { |
18992 | memorizedLastNodes[i] = listNode; |
18993 | } |
18994 | } |
18995 | } |
18996 | |
18997 | for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) |
18998 | { |
18999 | // Clear all old effects from the list node. |
19000 | listNode->gtFlags &= ~GTF_ALL_EFFECT; |
19001 | |
19002 | // Spread each flag to all list nodes (to the prefix) before the memorized last node. |
19003 | for (int i = 0; i < numberOfTrackedFlags; ++i) |
19004 | { |
19005 | if (memorizedLastNodes[i] != nullptr) |
19006 | { |
19007 | listNode->gtFlags |= trackedFlags[i]; |
19008 | } |
19009 | if (listNode == memorizedLastNodes[i]) |
19010 | { |
19011 | memorizedLastNodes[i] = nullptr; |
19012 | } |
19013 | } |
19014 | } |
19015 | |
19016 | return args; |
19017 | } |
19018 | |