| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 7 | XX XX |
| 8 | XX Morph XX |
| 9 | XX XX |
| 10 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 11 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 12 | */ |
| 13 | |
| 14 | #include "jitpch.h" |
| 15 | #ifdef _MSC_VER |
| 16 | #pragma hdrstop |
| 17 | #endif |
| 18 | |
| 19 | #include "allocacheck.h" // for alloca |
| 20 | |
| 21 | // Convert the given node into a call to the specified helper passing |
| 22 | // the given argument list. |
| 23 | // |
| 24 | // Tries to fold constants and also adds an edge for overflow exception |
| 25 | // returns the morphed tree |
| 26 | GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper) |
| 27 | { |
| 28 | GenTree* result; |
| 29 | |
| 30 | /* If the operand is a constant, we'll try to fold it */ |
| 31 | if (oper->OperIsConst()) |
| 32 | { |
| 33 | GenTree* oldTree = tree; |
| 34 | |
| 35 | tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) |
| 36 | |
| 37 | if (tree != oldTree) |
| 38 | { |
| 39 | return fgMorphTree(tree); |
| 40 | } |
| 41 | else if (tree->OperKind() & GTK_CONST) |
| 42 | { |
| 43 | return fgMorphConst(tree); |
| 44 | } |
| 45 | |
| 46 | // assert that oper is unchanged and that it is still a GT_CAST node |
| 47 | noway_assert(tree->gtCast.CastOp() == oper); |
| 48 | noway_assert(tree->gtOper == GT_CAST); |
| 49 | } |
| 50 | result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper)); |
| 51 | assert(result == tree); |
| 52 | return result; |
| 53 | } |
| 54 | |
| 55 | /***************************************************************************** |
| 56 | * |
| 57 | * Convert the given node into a call to the specified helper passing |
| 58 | * the given argument list. |
| 59 | */ |
| 60 | |
| 61 | GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeArgList* args, bool morphArgs) |
| 62 | { |
| 63 | // The helper call ought to be semantically equivalent to the original node, so preserve its VN. |
| 64 | tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN); |
| 65 | |
| 66 | tree->gtCall.gtCallType = CT_HELPER; |
| 67 | tree->gtCall.gtCallMethHnd = eeFindHelper(helper); |
| 68 | tree->gtCall.gtCallArgs = args; |
| 69 | tree->gtCall.gtCallObjp = nullptr; |
| 70 | tree->gtCall.gtCallLateArgs = nullptr; |
| 71 | tree->gtCall.fgArgInfo = nullptr; |
| 72 | tree->gtCall.gtRetClsHnd = nullptr; |
| 73 | tree->gtCall.gtCallMoreFlags = 0; |
| 74 | tree->gtCall.gtInlineCandidateInfo = nullptr; |
| 75 | tree->gtCall.gtControlExpr = nullptr; |
| 76 | |
| 77 | #if DEBUG |
| 78 | // Helper calls are never candidates. |
| 79 | |
| 80 | tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; |
| 81 | #endif // DEBUG |
| 82 | |
| 83 | #ifdef FEATURE_READYTORUN_COMPILER |
| 84 | tree->gtCall.gtEntryPoint.addr = nullptr; |
| 85 | tree->gtCall.gtEntryPoint.accessType = IAT_VALUE; |
| 86 | #endif |
| 87 | |
| 88 | #ifndef _TARGET_64BIT_ |
| 89 | if (varTypeIsLong(tree)) |
| 90 | { |
| 91 | GenTreeCall* callNode = tree->AsCall(); |
| 92 | ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); |
| 93 | retTypeDesc->Reset(); |
| 94 | retTypeDesc->InitializeLongReturnType(this); |
| 95 | callNode->ClearOtherRegs(); |
| 96 | } |
| 97 | #endif // !_TARGET_64BIT_ |
| 98 | |
| 99 | if (tree->OperMayThrow(this)) |
| 100 | { |
| 101 | tree->gtFlags |= GTF_EXCEPT; |
| 102 | } |
| 103 | else |
| 104 | { |
| 105 | tree->gtFlags &= ~GTF_EXCEPT; |
| 106 | } |
| 107 | tree->gtFlags |= GTF_CALL; |
| 108 | if (args) |
| 109 | { |
| 110 | tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT); |
| 111 | } |
| 112 | |
| 113 | /* Perform the morphing */ |
| 114 | |
| 115 | if (morphArgs) |
| 116 | { |
| 117 | tree = fgMorphArgs(tree->AsCall()); |
| 118 | } |
| 119 | |
| 120 | return tree; |
| 121 | } |
| 122 | |
| 123 | /***************************************************************************** |
| 124 | * |
| 125 | * Morph a cast node (we perform some very simple transformations here). |
| 126 | */ |
| 127 | |
| 128 | #ifdef _PREFAST_ |
| 129 | #pragma warning(push) |
| 130 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
| 131 | #endif |
| 132 | GenTree* Compiler::fgMorphCast(GenTree* tree) |
| 133 | { |
| 134 | noway_assert(tree->gtOper == GT_CAST); |
| 135 | noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE); |
| 136 | |
| 137 | /* The first sub-operand is the thing being cast */ |
| 138 | |
| 139 | GenTree* oper = tree->gtCast.CastOp(); |
| 140 | |
| 141 | if (fgGlobalMorph && (oper->gtOper == GT_ADDR)) |
| 142 | { |
| 143 | // Make sure we've checked if 'oper' is an address of an implicit-byref parameter. |
| 144 | // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast |
| 145 | // morphing code to see that type. |
| 146 | fgMorphImplicitByRefArgs(oper); |
| 147 | } |
| 148 | |
| 149 | var_types srcType = genActualType(oper->TypeGet()); |
| 150 | |
| 151 | var_types dstType = tree->CastToType(); |
| 152 | unsigned dstSize = genTypeSize(dstType); |
| 153 | |
| 154 | // See if the cast has to be done in two steps. R -> I |
| 155 | if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) |
| 156 | { |
| 157 | if (srcType == TYP_FLOAT |
| 158 | #if defined(_TARGET_ARM64_) |
| 159 | // Arm64: src = float, dst is overflow conversion. |
| 160 | // This goes through helper and hence src needs to be converted to double. |
| 161 | && tree->gtOverflow() |
| 162 | #elif defined(_TARGET_AMD64_) |
| 163 | // Amd64: src = float, dst = uint64 or overflow conversion. |
| 164 | // This goes through helper and hence src needs to be converted to double. |
| 165 | && (tree->gtOverflow() || (dstType == TYP_ULONG)) |
| 166 | #elif defined(_TARGET_ARM_) |
| 167 | // Arm: src = float, dst = int64/uint64 or overflow conversion. |
| 168 | && (tree->gtOverflow() || varTypeIsLong(dstType)) |
| 169 | #else |
| 170 | // x86: src = float, dst = uint32/int64/uint64 or overflow conversion. |
| 171 | && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT)) |
| 172 | #endif |
| 173 | ) |
| 174 | { |
| 175 | oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); |
| 176 | } |
| 177 | |
| 178 | // do we need to do it in two steps R -> I, '-> smallType |
| 179 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 180 | |
| 181 | #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_) |
| 182 | if (dstSize < genTypeSize(TYP_INT)) |
| 183 | { |
| 184 | oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT); |
| 185 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
| 186 | tree->gtFlags &= ~GTF_UNSIGNED; |
| 187 | } |
| 188 | #else |
| 189 | if (dstSize < TARGET_POINTER_SIZE) |
| 190 | { |
| 191 | oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL); |
| 192 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
| 193 | } |
| 194 | #endif |
| 195 | else |
| 196 | { |
| 197 | /* Note that if we need to use a helper call then we can not morph oper */ |
| 198 | if (!tree->gtOverflow()) |
| 199 | { |
| 200 | #ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized |
| 201 | goto OPTIMIZECAST; |
| 202 | #else |
| 203 | switch (dstType) |
| 204 | { |
| 205 | case TYP_INT: |
| 206 | goto OPTIMIZECAST; |
| 207 | |
| 208 | case TYP_UINT: |
| 209 | #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) |
| 210 | goto OPTIMIZECAST; |
| 211 | #else // _TARGET_X86_ |
| 212 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); |
| 213 | #endif // _TARGET_X86_ |
| 214 | |
| 215 | case TYP_LONG: |
| 216 | #ifdef _TARGET_AMD64_ |
| 217 | // SSE2 has instructions to convert a float/double directly to a long |
| 218 | goto OPTIMIZECAST; |
| 219 | #else // !_TARGET_AMD64_ |
| 220 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); |
| 221 | #endif // !_TARGET_AMD64_ |
| 222 | |
| 223 | case TYP_ULONG: |
| 224 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); |
| 225 | default: |
| 226 | break; |
| 227 | } |
| 228 | #endif // _TARGET_ARM64_ |
| 229 | } |
| 230 | else |
| 231 | { |
| 232 | switch (dstType) |
| 233 | { |
| 234 | case TYP_INT: |
| 235 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper); |
| 236 | case TYP_UINT: |
| 237 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper); |
| 238 | case TYP_LONG: |
| 239 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper); |
| 240 | case TYP_ULONG: |
| 241 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper); |
| 242 | default: |
| 243 | break; |
| 244 | } |
| 245 | } |
| 246 | noway_assert(!"Unexpected dstType" ); |
| 247 | } |
| 248 | } |
| 249 | #ifndef _TARGET_64BIT_ |
| 250 | // The code generation phase (for x86 & ARM32) does not handle casts |
| 251 | // directly from [u]long to anything other than [u]int. Insert an |
| 252 | // intermediate cast to native int. |
| 253 | else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType)) |
| 254 | { |
| 255 | oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL); |
| 256 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
| 257 | tree->gtFlags &= ~GTF_UNSIGNED; |
| 258 | } |
| 259 | #endif //!_TARGET_64BIT_ |
| 260 | |
| 261 | #ifdef _TARGET_ARM_ |
| 262 | else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) && |
| 263 | !varTypeIsLong(oper->gtCast.CastOp())) |
| 264 | { |
| 265 | // optimization: conv.r4(conv.r8(?)) -> conv.r4(d) |
| 266 | // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step. |
| 267 | // This happens semi-frequently because there is no IL 'conv.r4.un' |
| 268 | oper->gtType = TYP_FLOAT; |
| 269 | oper->CastToType() = TYP_FLOAT; |
| 270 | return fgMorphTree(oper); |
| 271 | } |
| 272 | // converts long/ulong --> float/double casts into helper calls. |
| 273 | else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType)) |
| 274 | { |
| 275 | if (dstType == TYP_FLOAT) |
| 276 | { |
| 277 | // there is only a double helper, so we |
| 278 | // - change the dsttype to double |
| 279 | // - insert a cast from double to float |
| 280 | // - recurse into the resulting tree |
| 281 | tree->CastToType() = TYP_DOUBLE; |
| 282 | tree->gtType = TYP_DOUBLE; |
| 283 | |
| 284 | tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); |
| 285 | |
| 286 | return fgMorphTree(tree); |
| 287 | } |
| 288 | if (tree->gtFlags & GTF_UNSIGNED) |
| 289 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); |
| 290 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); |
| 291 | } |
| 292 | #endif //_TARGET_ARM_ |
| 293 | |
| 294 | #ifdef _TARGET_AMD64_ |
| 295 | // Do we have to do two step U4/8 -> R4/8 ? |
| 296 | // Codegen supports the following conversion as one-step operation |
| 297 | // a) Long -> R4/R8 |
| 298 | // b) U8 -> R8 |
| 299 | // |
| 300 | // The following conversions are performed as two-step operations using above. |
| 301 | // U4 -> R4/8 = U4-> Long -> R4/8 |
| 302 | // U8 -> R4 = U8 -> R8 -> R4 |
| 303 | else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) |
| 304 | { |
| 305 | srcType = genUnsignedType(srcType); |
| 306 | |
| 307 | if (srcType == TYP_ULONG) |
| 308 | { |
| 309 | if (dstType == TYP_FLOAT) |
| 310 | { |
| 311 | // Codegen can handle U8 -> R8 conversion. |
| 312 | // U8 -> R4 = U8 -> R8 -> R4 |
| 313 | // - change the dsttype to double |
| 314 | // - insert a cast from double to float |
| 315 | // - recurse into the resulting tree |
| 316 | tree->CastToType() = TYP_DOUBLE; |
| 317 | tree->gtType = TYP_DOUBLE; |
| 318 | tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); |
| 319 | return fgMorphTree(tree); |
| 320 | } |
| 321 | } |
| 322 | else if (srcType == TYP_UINT) |
| 323 | { |
| 324 | oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); |
| 325 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
| 326 | tree->gtFlags &= ~GTF_UNSIGNED; |
| 327 | } |
| 328 | } |
| 329 | #endif // _TARGET_AMD64_ |
| 330 | |
| 331 | #ifdef _TARGET_X86_ |
| 332 | // Do we have to do two step U4/8 -> R4/8 ? |
| 333 | else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) |
| 334 | { |
| 335 | srcType = genUnsignedType(srcType); |
| 336 | |
| 337 | if (srcType == TYP_ULONG) |
| 338 | { |
| 339 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); |
| 340 | } |
| 341 | else if (srcType == TYP_UINT) |
| 342 | { |
| 343 | oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); |
| 344 | oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); |
| 345 | tree->gtFlags &= ~GTF_UNSIGNED; |
| 346 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); |
| 347 | } |
| 348 | } |
| 349 | else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType)) |
| 350 | { |
| 351 | return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); |
| 352 | } |
| 353 | #endif //_TARGET_X86_ |
| 354 | else if (varTypeIsGC(srcType) != varTypeIsGC(dstType)) |
| 355 | { |
| 356 | // We are casting away GC information. we would like to just |
| 357 | // change the type to int, however this gives the emitter fits because |
| 358 | // it believes the variable is a GC variable at the begining of the |
| 359 | // instruction group, but is not turned non-gc by the code generator |
| 360 | // we fix this by copying the GC pointer to a non-gc pointer temp. |
| 361 | noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?" ); |
| 362 | |
| 363 | // We generate an assignment to an int and then do the cast from an int. With this we avoid |
| 364 | // the gc problem and we allow casts to bytes, longs, etc... |
| 365 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC" )); |
| 366 | oper->gtType = TYP_I_IMPL; |
| 367 | GenTree* asg = gtNewTempAssign(lclNum, oper); |
| 368 | oper->gtType = srcType; |
| 369 | |
| 370 | // do the real cast |
| 371 | GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType); |
| 372 | |
| 373 | // Generate the comma tree |
| 374 | oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast); |
| 375 | |
| 376 | return fgMorphTree(oper); |
| 377 | } |
| 378 | |
| 379 | // Look for narrowing casts ([u]long -> [u]int) and try to push them |
| 380 | // down into the operand before morphing it. |
| 381 | // |
| 382 | // It doesn't matter if this is cast is from ulong or long (i.e. if |
| 383 | // GTF_UNSIGNED is set) because the transformation is only applied to |
| 384 | // overflow-insensitive narrowing casts, which always silently truncate. |
| 385 | // |
| 386 | // Note that casts from [u]long to small integer types are handled above. |
| 387 | if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT))) |
| 388 | { |
| 389 | // As a special case, look for overflow-sensitive casts of an AND |
| 390 | // expression, and see if the second operand is a small constant. Since |
| 391 | // the result of an AND is bound by its smaller operand, it may be |
| 392 | // possible to prove that the cast won't overflow, which will in turn |
| 393 | // allow the cast's operand to be transformed. |
| 394 | if (tree->gtOverflow() && (oper->OperGet() == GT_AND)) |
| 395 | { |
| 396 | GenTree* andOp2 = oper->gtOp.gtOp2; |
| 397 | |
| 398 | // Special case to the special case: AND with a casted int. |
| 399 | if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT)) |
| 400 | { |
| 401 | // gtFoldExprConst will deal with whether the cast is signed or |
| 402 | // unsigned, or overflow-sensitive. |
| 403 | andOp2 = gtFoldExprConst(andOp2); |
| 404 | oper->gtOp.gtOp2 = andOp2; |
| 405 | } |
| 406 | |
| 407 | // Look for a constant less than 2^{32} for a cast to uint, or less |
| 408 | // than 2^{31} for a cast to int. |
| 409 | int maxWidth = (dstType == TYP_UINT) ? 32 : 31; |
| 410 | |
| 411 | if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0)) |
| 412 | { |
| 413 | // This cast can't overflow. |
| 414 | tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT); |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | // Only apply this transformation during global morph, |
| 419 | // when neither the cast node nor the oper node may throw an exception |
| 420 | // based on the upper 32 bits. |
| 421 | // |
| 422 | if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx()) |
| 423 | { |
| 424 | // For these operations the lower 32 bits of the result only depends |
| 425 | // upon the lower 32 bits of the operands. |
| 426 | // |
| 427 | bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG); |
| 428 | |
| 429 | // For long LSH cast to int, there is a discontinuity in behavior |
| 430 | // when the shift amount is 32 or larger. |
| 431 | // |
| 432 | // CAST(INT, LSH(1LL, 31)) == LSH(1, 31) |
| 433 | // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31) |
| 434 | // |
| 435 | // CAST(INT, LSH(1LL, 32)) == 0 |
| 436 | // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1 |
| 437 | // |
| 438 | // So some extra validation is needed. |
| 439 | // |
| 440 | if (oper->OperIs(GT_LSH)) |
| 441 | { |
| 442 | GenTree* shiftAmount = oper->gtOp.gtOp2; |
| 443 | |
| 444 | // Expose constant value for shift, if possible, to maximize the number |
| 445 | // of cases we can handle. |
| 446 | shiftAmount = gtFoldExpr(shiftAmount); |
| 447 | oper->gtOp.gtOp2 = shiftAmount; |
| 448 | |
| 449 | #if DEBUG |
| 450 | // We may remorph the shift amount tree again later, so clear any morphed flag. |
| 451 | shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
| 452 | #endif // DEBUG |
| 453 | |
| 454 | if (shiftAmount->IsIntegralConst()) |
| 455 | { |
| 456 | const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue(); |
| 457 | |
| 458 | if ((shiftAmountValue >= 64) || (shiftAmountValue < 0)) |
| 459 | { |
| 460 | // Shift amount is large enough or negative so result is undefined. |
| 461 | // Don't try to optimize. |
| 462 | assert(!canPushCast); |
| 463 | } |
| 464 | else if ((shiftAmountValue >= 32) && ((tree->gtFlags & GTF_ALL_EFFECT) == 0)) |
| 465 | { |
| 466 | // Result of the shift is zero. |
| 467 | DEBUG_DESTROY_NODE(tree); |
| 468 | GenTree* zero = gtNewZeroConNode(TYP_INT); |
| 469 | return fgMorphTree(zero); |
| 470 | } |
| 471 | else |
| 472 | { |
| 473 | // Shift amount is positive and small enough that we can push the cast through. |
| 474 | canPushCast = true; |
| 475 | } |
| 476 | } |
| 477 | else |
| 478 | { |
| 479 | // Shift amount is unknown. We can't optimize this case. |
| 480 | assert(!canPushCast); |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | if (canPushCast) |
| 485 | { |
| 486 | DEBUG_DESTROY_NODE(tree); |
| 487 | |
| 488 | // Insert narrowing casts for op1 and op2. |
| 489 | oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, false, dstType); |
| 490 | if (oper->gtOp.gtOp2 != nullptr) |
| 491 | { |
| 492 | oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, false, dstType); |
| 493 | } |
| 494 | |
| 495 | // Clear the GT_MUL_64RSLT if it is set. |
| 496 | if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT)) |
| 497 | { |
| 498 | oper->gtFlags &= ~GTF_MUL_64RSLT; |
| 499 | } |
| 500 | |
| 501 | // The operation now produces a 32-bit result. |
| 502 | oper->gtType = TYP_INT; |
| 503 | |
| 504 | // Remorph the new tree as the casts that we added may be folded away. |
| 505 | return fgMorphTree(oper); |
| 506 | } |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | OPTIMIZECAST: |
| 511 | noway_assert(tree->gtOper == GT_CAST); |
| 512 | |
| 513 | /* Morph the operand */ |
| 514 | tree->gtCast.CastOp() = oper = fgMorphTree(oper); |
| 515 | |
| 516 | /* Reset the call flag */ |
| 517 | tree->gtFlags &= ~GTF_CALL; |
| 518 | |
| 519 | /* Reset the assignment flag */ |
| 520 | tree->gtFlags &= ~GTF_ASG; |
| 521 | |
| 522 | /* unless we have an overflow cast, reset the except flag */ |
| 523 | if (!tree->gtOverflow()) |
| 524 | { |
| 525 | tree->gtFlags &= ~GTF_EXCEPT; |
| 526 | } |
| 527 | |
| 528 | /* Just in case new side effects were introduced */ |
| 529 | tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT); |
| 530 | |
| 531 | if (!gtIsActiveCSE_Candidate(tree) && !gtIsActiveCSE_Candidate(oper)) |
| 532 | { |
| 533 | srcType = oper->TypeGet(); |
| 534 | |
| 535 | /* See if we can discard the cast */ |
| 536 | if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType)) |
| 537 | { |
| 538 | if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType)) |
| 539 | { |
| 540 | if (varTypeIsSmall(srcType)) |
| 541 | { |
| 542 | // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the |
| 543 | // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType |
| 544 | // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is |
| 545 | // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion |
| 546 | // from u4 to i2. |
| 547 | srcType = genActualType(srcType); |
| 548 | } |
| 549 | |
| 550 | srcType = genUnsignedType(srcType); |
| 551 | } |
| 552 | |
| 553 | if (srcType == dstType) |
| 554 | { // Certainly if they are identical it is pointless |
| 555 | goto REMOVE_CAST; |
| 556 | } |
| 557 | |
| 558 | if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType)) |
| 559 | { |
| 560 | unsigned varNum = oper->gtLclVarCommon.gtLclNum; |
| 561 | LclVarDsc* varDsc = &lvaTable[varNum]; |
| 562 | if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore()) |
| 563 | { |
| 564 | goto REMOVE_CAST; |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | bool unsignedSrc = varTypeIsUnsigned(srcType); |
| 569 | bool unsignedDst = varTypeIsUnsigned(dstType); |
| 570 | bool signsDiffer = (unsignedSrc != unsignedDst); |
| 571 | unsigned srcSize = genTypeSize(srcType); |
| 572 | |
| 573 | // For same sized casts with |
| 574 | // the same signs or non-overflow cast we discard them as well |
| 575 | if (srcSize == dstSize) |
| 576 | { |
| 577 | /* This should have been handled above */ |
| 578 | noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType)); |
| 579 | |
| 580 | if (!signsDiffer) |
| 581 | { |
| 582 | goto REMOVE_CAST; |
| 583 | } |
| 584 | |
| 585 | if (!tree->gtOverflow()) |
| 586 | { |
| 587 | /* For small type casts, when necessary we force |
| 588 | the src operand to the dstType and allow the |
| 589 | implied load from memory to perform the casting */ |
| 590 | if (varTypeIsSmall(srcType)) |
| 591 | { |
| 592 | switch (oper->gtOper) |
| 593 | { |
| 594 | case GT_IND: |
| 595 | case GT_CLS_VAR: |
| 596 | case GT_LCL_FLD: |
| 597 | case GT_ARR_ELEM: |
| 598 | oper->gtType = dstType; |
| 599 | // We're changing the type here so we need to update the VN; |
| 600 | // in other cases we discard the cast without modifying oper |
| 601 | // so the VN doesn't change. |
| 602 | oper->SetVNsFromNode(tree); |
| 603 | goto REMOVE_CAST; |
| 604 | default: |
| 605 | break; |
| 606 | } |
| 607 | } |
| 608 | else |
| 609 | { |
| 610 | goto REMOVE_CAST; |
| 611 | } |
| 612 | } |
| 613 | } |
| 614 | else if (srcSize < dstSize) // widening cast |
| 615 | { |
| 616 | // Keep any long casts |
| 617 | if (dstSize == sizeof(int)) |
| 618 | { |
| 619 | // Only keep signed to unsigned widening cast with overflow check |
| 620 | if (!tree->gtOverflow() || !unsignedDst || unsignedSrc) |
| 621 | { |
| 622 | goto REMOVE_CAST; |
| 623 | } |
| 624 | } |
| 625 | |
| 626 | // Widening casts from unsigned or to signed can never overflow |
| 627 | |
| 628 | if (unsignedSrc || !unsignedDst) |
| 629 | { |
| 630 | tree->gtFlags &= ~GTF_OVERFLOW; |
| 631 | if (!(oper->gtFlags & GTF_EXCEPT)) |
| 632 | { |
| 633 | tree->gtFlags &= ~GTF_EXCEPT; |
| 634 | } |
| 635 | } |
| 636 | } |
| 637 | else // if (srcSize > dstSize) |
| 638 | { |
| 639 | // Try to narrow the operand of the cast and discard the cast |
| 640 | // Note: Do not narrow a cast that is marked as a CSE |
| 641 | // And do not narrow if the oper is marked as a CSE either |
| 642 | // |
| 643 | if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) && |
| 644 | optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false)) |
| 645 | { |
| 646 | optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true); |
| 647 | |
| 648 | /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */ |
| 649 | if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType())) |
| 650 | { |
| 651 | oper = oper->gtCast.CastOp(); |
| 652 | } |
| 653 | goto REMOVE_CAST; |
| 654 | } |
| 655 | } |
| 656 | } |
| 657 | |
| 658 | switch (oper->gtOper) |
| 659 | { |
| 660 | /* If the operand is a constant, we'll fold it */ |
| 661 | case GT_CNS_INT: |
| 662 | case GT_CNS_LNG: |
| 663 | case GT_CNS_DBL: |
| 664 | case GT_CNS_STR: |
| 665 | { |
| 666 | GenTree* oldTree = tree; |
| 667 | |
| 668 | tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) |
| 669 | |
| 670 | // Did we get a comma throw as a result of gtFoldExprConst? |
| 671 | if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA)) |
| 672 | { |
| 673 | noway_assert(fgIsCommaThrow(tree)); |
| 674 | tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1); |
| 675 | fgMorphTreeDone(tree); |
| 676 | return tree; |
| 677 | } |
| 678 | else if (tree->gtOper != GT_CAST) |
| 679 | { |
| 680 | return tree; |
| 681 | } |
| 682 | |
| 683 | noway_assert(tree->gtCast.CastOp() == oper); // unchanged |
| 684 | } |
| 685 | break; |
| 686 | |
| 687 | case GT_CAST: |
| 688 | /* Check for two consecutive casts into the same dstType */ |
| 689 | if (!tree->gtOverflow()) |
| 690 | { |
| 691 | var_types dstType2 = oper->CastToType(); |
| 692 | if (dstType == dstType2) |
| 693 | { |
| 694 | goto REMOVE_CAST; |
| 695 | } |
| 696 | } |
| 697 | break; |
| 698 | |
| 699 | case GT_COMMA: |
| 700 | // Check for cast of a GT_COMMA with a throw overflow |
| 701 | // Bug 110829: Since this optimization will bash the types |
| 702 | // neither oper or commaOp2 can be CSE candidates |
| 703 | if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate |
| 704 | { |
| 705 | GenTree* commaOp2 = oper->gtOp.gtOp2; |
| 706 | |
| 707 | if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate |
| 708 | { |
| 709 | // need type of oper to be same as tree |
| 710 | if (tree->gtType == TYP_LONG) |
| 711 | { |
| 712 | commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); |
| 713 | commaOp2->gtIntConCommon.SetLngValue(0); |
| 714 | /* Change the types of oper and commaOp2 to TYP_LONG */ |
| 715 | oper->gtType = commaOp2->gtType = TYP_LONG; |
| 716 | } |
| 717 | else if (varTypeIsFloating(tree->gtType)) |
| 718 | { |
| 719 | commaOp2->ChangeOperConst(GT_CNS_DBL); |
| 720 | commaOp2->gtDblCon.gtDconVal = 0.0; |
| 721 | // Change the types of oper and commaOp2 |
| 722 | oper->gtType = commaOp2->gtType = tree->gtType; |
| 723 | } |
| 724 | else |
| 725 | { |
| 726 | commaOp2->ChangeOperConst(GT_CNS_INT); |
| 727 | commaOp2->gtIntCon.gtIconVal = 0; |
| 728 | /* Change the types of oper and commaOp2 to TYP_INT */ |
| 729 | oper->gtType = commaOp2->gtType = TYP_INT; |
| 730 | } |
| 731 | } |
| 732 | |
| 733 | if (vnStore != nullptr) |
| 734 | { |
| 735 | fgValueNumberTreeConst(commaOp2); |
| 736 | } |
| 737 | |
| 738 | /* Return the GT_COMMA node as the new tree */ |
| 739 | return oper; |
| 740 | } |
| 741 | break; |
| 742 | |
| 743 | default: |
| 744 | break; |
| 745 | } /* end switch (oper->gtOper) */ |
| 746 | } |
| 747 | |
| 748 | if (tree->gtOverflow()) |
| 749 | { |
| 750 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); |
| 751 | } |
| 752 | |
| 753 | return tree; |
| 754 | |
| 755 | REMOVE_CAST: |
| 756 | /* Here we've eliminated the cast, so just return it's operand */ |
| 757 | assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate |
| 758 | |
| 759 | DEBUG_DESTROY_NODE(tree); |
| 760 | return oper; |
| 761 | } |
| 762 | #ifdef _PREFAST_ |
| 763 | #pragma warning(pop) |
| 764 | #endif |
| 765 | |
| 766 | /***************************************************************************** |
| 767 | * |
| 768 | * Perform an unwrap operation on a Proxy object |
| 769 | */ |
| 770 | |
| 771 | GenTree* Compiler::fgUnwrapProxy(GenTree* objRef) |
| 772 | { |
| 773 | assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef)); |
| 774 | |
| 775 | CORINFO_EE_INFO* pInfo = eeGetEEInfo(); |
| 776 | GenTree* addTree; |
| 777 | |
| 778 | // Perform the unwrap: |
| 779 | // |
| 780 | // This requires two extra indirections. |
| 781 | // We mark these indirections as 'invariant' and |
| 782 | // the CSE logic will hoist them when appropriate. |
| 783 | // |
| 784 | // Note that each dereference is a GC pointer |
| 785 | |
| 786 | addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL)); |
| 787 | |
| 788 | objRef = gtNewOperNode(GT_IND, TYP_REF, addTree); |
| 789 | objRef->gtFlags |= GTF_IND_INVARIANT; |
| 790 | |
| 791 | addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL)); |
| 792 | |
| 793 | objRef = gtNewOperNode(GT_IND, TYP_REF, addTree); |
| 794 | objRef->gtFlags |= GTF_IND_INVARIANT; |
| 795 | |
| 796 | // objRef now hold the 'real this' reference (i.e. the unwrapped proxy) |
| 797 | return objRef; |
| 798 | } |
| 799 | |
| 800 | /***************************************************************************** |
| 801 | * |
| 802 | * Morph an argument list; compute the pointer argument count in the process. |
| 803 | * |
| 804 | * NOTE: This function can be called from any place in the JIT to perform re-morphing |
| 805 | * due to graph altering modifications such as copy / constant propagation |
| 806 | */ |
| 807 | |
| 808 | unsigned UpdateGT_LISTFlags(GenTree* tree) |
| 809 | { |
| 810 | assert(tree->gtOper == GT_LIST); |
| 811 | |
| 812 | unsigned flags = 0; |
| 813 | if (tree->gtOp.gtOp2) |
| 814 | { |
| 815 | flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2); |
| 816 | } |
| 817 | |
| 818 | flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
| 819 | |
| 820 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
| 821 | tree->gtFlags |= flags; |
| 822 | |
| 823 | return tree->gtFlags; |
| 824 | } |
| 825 | |
| 826 | #ifdef DEBUG |
| 827 | void fgArgTabEntry::Dump() |
| 828 | { |
| 829 | printf("fgArgTabEntry[arg %u" , argNum); |
| 830 | printf(" %d.%s" , node->gtTreeID, GenTree::OpName(node->gtOper)); |
| 831 | if (regNum != REG_STK) |
| 832 | { |
| 833 | printf(", %u reg%s:" , numRegs, numRegs == 1 ? "" : "s" ); |
| 834 | for (unsigned i = 0; i < numRegs; i++) |
| 835 | { |
| 836 | printf(" %s" , getRegName(regNums[i])); |
| 837 | } |
| 838 | } |
| 839 | if (numSlots > 0) |
| 840 | { |
| 841 | printf(", numSlots=%u, slotNum=%u" , numSlots, slotNum); |
| 842 | } |
| 843 | printf(", align=%u" , alignment); |
| 844 | if (isLateArg()) |
| 845 | { |
| 846 | printf(", lateArgInx=%u" , lateArgInx); |
| 847 | } |
| 848 | if (isSplit) |
| 849 | { |
| 850 | printf(", isSplit" ); |
| 851 | } |
| 852 | if (needTmp) |
| 853 | { |
| 854 | printf(", tmpNum=V%02u" , tmpNum); |
| 855 | } |
| 856 | if (needPlace) |
| 857 | { |
| 858 | printf(", needPlace" ); |
| 859 | } |
| 860 | if (isTmp) |
| 861 | { |
| 862 | printf(", isTmp" ); |
| 863 | } |
| 864 | if (processed) |
| 865 | { |
| 866 | printf(", processed" ); |
| 867 | } |
| 868 | if (isHfaRegArg) |
| 869 | { |
| 870 | printf(", isHfa" ); |
| 871 | } |
| 872 | if (isBackFilled) |
| 873 | { |
| 874 | printf(", isBackFilled" ); |
| 875 | } |
| 876 | if (isNonStandard) |
| 877 | { |
| 878 | printf(", isNonStandard" ); |
| 879 | } |
| 880 | if (isStruct) |
| 881 | { |
| 882 | printf(", isStruct" ); |
| 883 | } |
| 884 | printf("]\n" ); |
| 885 | } |
| 886 | #endif |
| 887 | |
| 888 | fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs) |
| 889 | { |
| 890 | compiler = comp; |
| 891 | callTree = call; |
| 892 | argCount = 0; // filled in arg count, starts at zero |
| 893 | nextSlotNum = INIT_ARG_STACK_SLOT; |
| 894 | stkLevel = 0; |
| 895 | #if defined(UNIX_X86_ABI) |
| 896 | alignmentDone = false; |
| 897 | stkSizeBytes = 0; |
| 898 | padStkAlign = 0; |
| 899 | #endif |
| 900 | #if FEATURE_FIXED_OUT_ARGS |
| 901 | outArgSize = 0; |
| 902 | #endif |
| 903 | |
| 904 | argTableSize = numArgs; // the allocated table size |
| 905 | |
| 906 | hasRegArgs = false; |
| 907 | hasStackArgs = false; |
| 908 | argsComplete = false; |
| 909 | argsSorted = false; |
| 910 | |
| 911 | if (argTableSize == 0) |
| 912 | { |
| 913 | argTable = nullptr; |
| 914 | } |
| 915 | else |
| 916 | { |
| 917 | argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; |
| 918 | } |
| 919 | } |
| 920 | |
| 921 | /***************************************************************************** |
| 922 | * |
| 923 | * fgArgInfo Copy Constructor |
| 924 | * |
| 925 | * This method needs to act like a copy constructor for fgArgInfo. |
| 926 | * The newCall needs to have its fgArgInfo initialized such that |
| 927 | * we have newCall that is an exact copy of the oldCall. |
| 928 | * We have to take care since the argument information |
| 929 | * in the argTable contains pointers that must point to the |
| 930 | * new arguments and not the old arguments. |
| 931 | */ |
| 932 | fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall) |
| 933 | { |
| 934 | fgArgInfo* oldArgInfo = oldCall->gtCall.fgArgInfo; |
| 935 | |
| 936 | compiler = oldArgInfo->compiler; |
| 937 | callTree = newCall; |
| 938 | argCount = 0; // filled in arg count, starts at zero |
| 939 | nextSlotNum = INIT_ARG_STACK_SLOT; |
| 940 | stkLevel = oldArgInfo->stkLevel; |
| 941 | #if defined(UNIX_X86_ABI) |
| 942 | alignmentDone = oldArgInfo->alignmentDone; |
| 943 | stkSizeBytes = oldArgInfo->stkSizeBytes; |
| 944 | padStkAlign = oldArgInfo->padStkAlign; |
| 945 | #endif |
| 946 | #if FEATURE_FIXED_OUT_ARGS |
| 947 | outArgSize = oldArgInfo->outArgSize; |
| 948 | #endif |
| 949 | argTableSize = oldArgInfo->argTableSize; |
| 950 | argsComplete = false; |
| 951 | argTable = nullptr; |
| 952 | if (argTableSize > 0) |
| 953 | { |
| 954 | argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; |
| 955 | for (unsigned inx = 0; inx < argTableSize; inx++) |
| 956 | { |
| 957 | argTable[inx] = nullptr; |
| 958 | } |
| 959 | } |
| 960 | |
| 961 | assert(oldArgInfo->argsComplete); |
| 962 | |
| 963 | // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument, |
| 964 | // so we can iterate over these argument lists more uniformly. |
| 965 | // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them |
| 966 | GenTreeArgList* newArgs; |
| 967 | GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs); |
| 968 | GenTreeArgList* oldArgs; |
| 969 | GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs); |
| 970 | |
| 971 | if (newCall->gtCallObjp == nullptr) |
| 972 | { |
| 973 | assert(oldCall->gtCallObjp == nullptr); |
| 974 | newArgs = newCall->gtCallArgs; |
| 975 | oldArgs = oldCall->gtCallArgs; |
| 976 | } |
| 977 | else |
| 978 | { |
| 979 | assert(oldCall->gtCallObjp != nullptr); |
| 980 | newArgObjp.Current() = newCall->gtCallArgs; |
| 981 | newArgs = &newArgObjp; |
| 982 | oldArgObjp.Current() = oldCall->gtCallObjp; |
| 983 | oldArgs = &oldArgObjp; |
| 984 | } |
| 985 | |
| 986 | GenTree* newCurr; |
| 987 | GenTree* oldCurr; |
| 988 | GenTreeArgList* newParent = nullptr; |
| 989 | GenTreeArgList* oldParent = nullptr; |
| 990 | fgArgTabEntry** oldArgTable = oldArgInfo->argTable; |
| 991 | bool scanRegArgs = false; |
| 992 | |
| 993 | while (newArgs) |
| 994 | { |
| 995 | /* Get hold of the next argument values for the oldCall and newCall */ |
| 996 | |
| 997 | newCurr = newArgs->Current(); |
| 998 | oldCurr = oldArgs->Current(); |
| 999 | if (newArgs != &newArgObjp) |
| 1000 | { |
| 1001 | newParent = newArgs; |
| 1002 | oldParent = oldArgs; |
| 1003 | } |
| 1004 | else |
| 1005 | { |
| 1006 | assert(newParent == nullptr && oldParent == nullptr); |
| 1007 | } |
| 1008 | newArgs = newArgs->Rest(); |
| 1009 | oldArgs = oldArgs->Rest(); |
| 1010 | |
| 1011 | fgArgTabEntry* oldArgTabEntry = nullptr; |
| 1012 | fgArgTabEntry* newArgTabEntry = nullptr; |
| 1013 | |
| 1014 | for (unsigned inx = 0; inx < argTableSize; inx++) |
| 1015 | { |
| 1016 | oldArgTabEntry = oldArgTable[inx]; |
| 1017 | |
| 1018 | if (oldArgTabEntry->parent == oldParent) |
| 1019 | { |
| 1020 | assert((oldParent == nullptr) == (newParent == nullptr)); |
| 1021 | |
| 1022 | // We have found the matching "parent" field in oldArgTabEntry |
| 1023 | |
| 1024 | newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; |
| 1025 | |
| 1026 | // First block copy all fields |
| 1027 | // |
| 1028 | *newArgTabEntry = *oldArgTabEntry; |
| 1029 | |
| 1030 | // Then update all GenTree* fields in the newArgTabEntry |
| 1031 | // |
| 1032 | newArgTabEntry->parent = newParent; |
| 1033 | |
| 1034 | // The node field is likely to have been updated |
| 1035 | // to point at a node in the gtCallLateArgs list |
| 1036 | // |
| 1037 | if (oldArgTabEntry->node == oldCurr) |
| 1038 | { |
| 1039 | // node is not pointing into the gtCallLateArgs list |
| 1040 | newArgTabEntry->node = newCurr; |
| 1041 | } |
| 1042 | else |
| 1043 | { |
| 1044 | // node must be pointing into the gtCallLateArgs list |
| 1045 | // |
| 1046 | // We will fix this pointer up in the next loop |
| 1047 | // |
| 1048 | newArgTabEntry->node = nullptr; // For now we assign a NULL to this field |
| 1049 | |
| 1050 | scanRegArgs = true; |
| 1051 | } |
| 1052 | |
| 1053 | // Now initialize the proper element in the argTable array |
| 1054 | // |
| 1055 | argTable[inx] = newArgTabEntry; |
| 1056 | break; |
| 1057 | } |
| 1058 | } |
| 1059 | // We should have found the matching oldArgTabEntry and created the newArgTabEntry |
| 1060 | // |
| 1061 | assert(newArgTabEntry != nullptr); |
| 1062 | } |
| 1063 | |
| 1064 | if (scanRegArgs) |
| 1065 | { |
| 1066 | newArgs = newCall->gtCallLateArgs; |
| 1067 | oldArgs = oldCall->gtCallLateArgs; |
| 1068 | |
| 1069 | while (newArgs) |
| 1070 | { |
| 1071 | /* Get hold of the next argument values for the oldCall and newCall */ |
| 1072 | |
| 1073 | assert(newArgs->OperIsList()); |
| 1074 | |
| 1075 | newCurr = newArgs->Current(); |
| 1076 | newArgs = newArgs->Rest(); |
| 1077 | |
| 1078 | assert(oldArgs->OperIsList()); |
| 1079 | |
| 1080 | oldCurr = oldArgs->Current(); |
| 1081 | oldArgs = oldArgs->Rest(); |
| 1082 | |
| 1083 | fgArgTabEntry* oldArgTabEntry = nullptr; |
| 1084 | fgArgTabEntry* newArgTabEntry = nullptr; |
| 1085 | |
| 1086 | for (unsigned inx = 0; inx < argTableSize; inx++) |
| 1087 | { |
| 1088 | oldArgTabEntry = oldArgTable[inx]; |
| 1089 | |
| 1090 | if (oldArgTabEntry->node == oldCurr) |
| 1091 | { |
| 1092 | // We have found the matching "node" field in oldArgTabEntry |
| 1093 | |
| 1094 | newArgTabEntry = argTable[inx]; |
| 1095 | assert(newArgTabEntry != nullptr); |
| 1096 | |
| 1097 | // update the "node" GenTree* fields in the newArgTabEntry |
| 1098 | // |
| 1099 | assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field |
| 1100 | |
| 1101 | newArgTabEntry->node = newCurr; |
| 1102 | break; |
| 1103 | } |
| 1104 | } |
| 1105 | } |
| 1106 | } |
| 1107 | |
| 1108 | argCount = oldArgInfo->argCount; |
| 1109 | nextSlotNum = oldArgInfo->nextSlotNum; |
| 1110 | hasRegArgs = oldArgInfo->hasRegArgs; |
| 1111 | hasStackArgs = oldArgInfo->hasStackArgs; |
| 1112 | argsComplete = true; |
| 1113 | argsSorted = true; |
| 1114 | } |
| 1115 | |
| 1116 | void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry) |
| 1117 | { |
| 1118 | assert(argCount < argTableSize); |
| 1119 | argTable[argCount] = curArgTabEntry; |
| 1120 | argCount++; |
| 1121 | } |
| 1122 | |
| 1123 | fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, |
| 1124 | GenTree* node, |
| 1125 | GenTree* parent, |
| 1126 | regNumber regNum, |
| 1127 | unsigned numRegs, |
| 1128 | unsigned alignment, |
| 1129 | bool isStruct, |
| 1130 | bool isVararg /*=false*/) |
| 1131 | { |
| 1132 | fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; |
| 1133 | |
| 1134 | // Any additional register numbers are set by the caller. |
| 1135 | // This is primarily because on ARM we don't yet know if it |
| 1136 | // will be split or if it is a double HFA, so the number of registers |
| 1137 | // may actually be less. |
| 1138 | curArgTabEntry->setRegNum(0, regNum); |
| 1139 | |
| 1140 | curArgTabEntry->argNum = argNum; |
| 1141 | curArgTabEntry->node = node; |
| 1142 | curArgTabEntry->parent = parent; |
| 1143 | curArgTabEntry->slotNum = 0; |
| 1144 | curArgTabEntry->numRegs = numRegs; |
| 1145 | curArgTabEntry->numSlots = 0; |
| 1146 | curArgTabEntry->alignment = alignment; |
| 1147 | curArgTabEntry->lateArgInx = UINT_MAX; |
| 1148 | curArgTabEntry->tmpNum = BAD_VAR_NUM; |
| 1149 | curArgTabEntry->isSplit = false; |
| 1150 | curArgTabEntry->isTmp = false; |
| 1151 | curArgTabEntry->needTmp = false; |
| 1152 | curArgTabEntry->needPlace = false; |
| 1153 | curArgTabEntry->processed = false; |
| 1154 | #ifdef FEATURE_HFA |
| 1155 | curArgTabEntry->_isHfaArg = false; |
| 1156 | #endif |
| 1157 | curArgTabEntry->isBackFilled = false; |
| 1158 | curArgTabEntry->isNonStandard = false; |
| 1159 | curArgTabEntry->isStruct = isStruct; |
| 1160 | curArgTabEntry->isVararg = isVararg; |
| 1161 | |
| 1162 | hasRegArgs = true; |
| 1163 | AddArg(curArgTabEntry); |
| 1164 | return curArgTabEntry; |
| 1165 | } |
| 1166 | |
| 1167 | #if defined(UNIX_AMD64_ABI) |
| 1168 | fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, |
| 1169 | GenTree* node, |
| 1170 | GenTree* parent, |
| 1171 | regNumber regNum, |
| 1172 | unsigned numRegs, |
| 1173 | unsigned alignment, |
| 1174 | const bool isStruct, |
| 1175 | const bool isVararg, |
| 1176 | const regNumber otherRegNum, |
| 1177 | const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr) |
| 1178 | { |
| 1179 | fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct, isVararg); |
| 1180 | assert(curArgTabEntry != nullptr); |
| 1181 | |
| 1182 | curArgTabEntry->isStruct = isStruct; // is this a struct arg |
| 1183 | |
| 1184 | curArgTabEntry->checkIsStruct(); |
| 1185 | assert(numRegs <= 2); |
| 1186 | if (numRegs == 2) |
| 1187 | { |
| 1188 | curArgTabEntry->setRegNum(1, otherRegNum); |
| 1189 | } |
| 1190 | |
| 1191 | if (isStruct && structDescPtr != nullptr) |
| 1192 | { |
| 1193 | curArgTabEntry->structDesc.CopyFrom(*structDescPtr); |
| 1194 | } |
| 1195 | |
| 1196 | return curArgTabEntry; |
| 1197 | } |
| 1198 | #endif // defined(UNIX_AMD64_ABI) |
| 1199 | |
| 1200 | fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, |
| 1201 | GenTree* node, |
| 1202 | GenTree* parent, |
| 1203 | unsigned numSlots, |
| 1204 | unsigned alignment, |
| 1205 | bool isStruct, |
| 1206 | bool isVararg /*=false*/) |
| 1207 | { |
| 1208 | fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; |
| 1209 | |
| 1210 | nextSlotNum = roundUp(nextSlotNum, alignment); |
| 1211 | |
| 1212 | curArgTabEntry->setRegNum(0, REG_STK); |
| 1213 | curArgTabEntry->argNum = argNum; |
| 1214 | curArgTabEntry->node = node; |
| 1215 | curArgTabEntry->parent = parent; |
| 1216 | curArgTabEntry->slotNum = nextSlotNum; |
| 1217 | curArgTabEntry->numRegs = 0; |
| 1218 | curArgTabEntry->numSlots = numSlots; |
| 1219 | curArgTabEntry->alignment = alignment; |
| 1220 | curArgTabEntry->lateArgInx = UINT_MAX; |
| 1221 | curArgTabEntry->tmpNum = BAD_VAR_NUM; |
| 1222 | curArgTabEntry->isSplit = false; |
| 1223 | curArgTabEntry->isTmp = false; |
| 1224 | curArgTabEntry->needTmp = false; |
| 1225 | curArgTabEntry->needPlace = false; |
| 1226 | curArgTabEntry->processed = false; |
| 1227 | #ifdef FEATURE_HFA |
| 1228 | curArgTabEntry->_isHfaArg = false; |
| 1229 | #endif |
| 1230 | curArgTabEntry->isBackFilled = false; |
| 1231 | curArgTabEntry->isNonStandard = false; |
| 1232 | curArgTabEntry->isStruct = isStruct; |
| 1233 | curArgTabEntry->isVararg = isVararg; |
| 1234 | |
| 1235 | hasStackArgs = true; |
| 1236 | AddArg(curArgTabEntry); |
| 1237 | |
| 1238 | nextSlotNum += numSlots; |
| 1239 | return curArgTabEntry; |
| 1240 | } |
| 1241 | |
| 1242 | void fgArgInfo::RemorphReset() |
| 1243 | { |
| 1244 | nextSlotNum = INIT_ARG_STACK_SLOT; |
| 1245 | } |
| 1246 | |
| 1247 | //------------------------------------------------------------------------ |
| 1248 | // UpdateRegArg: Update the given fgArgTabEntry while morphing. |
| 1249 | // |
| 1250 | // Arguments: |
| 1251 | // curArgTabEntry - the fgArgTabEntry to update. |
| 1252 | // node - the tree node that defines the argument |
| 1253 | // reMorphing - a boolean value indicate whether we are remorphing the call |
| 1254 | // |
| 1255 | // Assumptions: |
| 1256 | // This must have already been determined to be at least partially passed in registers. |
| 1257 | // |
| 1258 | void fgArgInfo::UpdateRegArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) |
| 1259 | { |
| 1260 | bool isLateArg = curArgTabEntry->isLateArg(); |
| 1261 | // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. |
| 1262 | assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || |
| 1263 | (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); |
| 1264 | |
| 1265 | assert(curArgTabEntry->numRegs != 0); |
| 1266 | |
| 1267 | if (curArgTabEntry->parent != nullptr) |
| 1268 | { |
| 1269 | assert(curArgTabEntry->parent->OperIsList()); |
| 1270 | assert(curArgTabEntry->parent->Current() == node); |
| 1271 | } |
| 1272 | |
| 1273 | if (curArgTabEntry->node != node) |
| 1274 | { |
| 1275 | if (reMorphing) |
| 1276 | { |
| 1277 | // Find the arg in the late args list. |
| 1278 | GenTree* argx = Compiler::gtArgNodeByLateArgInx(callTree, curArgTabEntry->lateArgInx); |
| 1279 | if (curArgTabEntry->node != argx) |
| 1280 | { |
| 1281 | curArgTabEntry->node = argx; |
| 1282 | } |
| 1283 | } |
| 1284 | else |
| 1285 | { |
| 1286 | assert(!isLateArg); |
| 1287 | curArgTabEntry->node = node; |
| 1288 | } |
| 1289 | } |
| 1290 | } |
| 1291 | |
| 1292 | //------------------------------------------------------------------------ |
| 1293 | // UpdateStkArg: Update the given fgArgTabEntry while morphing. |
| 1294 | // |
| 1295 | // Arguments: |
| 1296 | // curArgTabEntry - the fgArgTabEntry to update. |
| 1297 | // node - the tree node that defines the argument |
| 1298 | // reMorphing - a boolean value indicate whether we are remorphing the call |
| 1299 | // |
| 1300 | // Assumptions: |
| 1301 | // This must have already been determined to be passed on the stack. |
| 1302 | // |
| 1303 | void fgArgInfo::UpdateStkArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) |
| 1304 | { |
| 1305 | bool isLateArg = curArgTabEntry->isLateArg(); |
| 1306 | // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. |
| 1307 | assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || |
| 1308 | (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); |
| 1309 | |
| 1310 | noway_assert(curArgTabEntry->parent != nullptr); |
| 1311 | assert((curArgTabEntry->regNum == REG_STK) || curArgTabEntry->isSplit); |
| 1312 | assert(curArgTabEntry->parent->OperIsList()); |
| 1313 | assert(curArgTabEntry->parent->Current() == node); |
| 1314 | nextSlotNum = (unsigned)roundUp(nextSlotNum, curArgTabEntry->alignment); |
| 1315 | assert(curArgTabEntry->slotNum == nextSlotNum); |
| 1316 | |
| 1317 | if (curArgTabEntry->node != node) |
| 1318 | { |
| 1319 | #if FEATURE_FIXED_OUT_ARGS |
| 1320 | if (isLateArg) |
| 1321 | { |
| 1322 | GenTree* argx = nullptr; |
| 1323 | unsigned lateArgInx = curArgTabEntry->lateArgInx; |
| 1324 | |
| 1325 | // Traverse the late argument list to find this argument so that we can update it. |
| 1326 | unsigned listInx = 0; |
| 1327 | for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), listInx++) |
| 1328 | { |
| 1329 | argx = list->Current(); |
| 1330 | assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs; |
| 1331 | if (listInx == lateArgInx) |
| 1332 | { |
| 1333 | break; |
| 1334 | } |
| 1335 | } |
| 1336 | assert(listInx == lateArgInx); |
| 1337 | assert(lateArgInx == curArgTabEntry->lateArgInx); |
| 1338 | |
| 1339 | if (curArgTabEntry->node != argx) |
| 1340 | { |
| 1341 | curArgTabEntry->node = argx; |
| 1342 | } |
| 1343 | } |
| 1344 | else |
| 1345 | #endif // FEATURE_FIXED_OUT_ARGS |
| 1346 | { |
| 1347 | curArgTabEntry->node = node; |
| 1348 | } |
| 1349 | } |
| 1350 | nextSlotNum += curArgTabEntry->numSlots; |
| 1351 | } |
| 1352 | |
| 1353 | void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots) |
| 1354 | { |
| 1355 | fgArgTabEntry* curArgTabEntry = nullptr; |
| 1356 | assert(argNum < argCount); |
| 1357 | for (unsigned inx = 0; inx < argCount; inx++) |
| 1358 | { |
| 1359 | curArgTabEntry = argTable[inx]; |
| 1360 | if (curArgTabEntry->argNum == argNum) |
| 1361 | { |
| 1362 | break; |
| 1363 | } |
| 1364 | } |
| 1365 | |
| 1366 | assert(numRegs > 0); |
| 1367 | assert(numSlots > 0); |
| 1368 | |
| 1369 | if (argsComplete) |
| 1370 | { |
| 1371 | assert(curArgTabEntry->isSplit == true); |
| 1372 | assert(curArgTabEntry->numRegs == numRegs); |
| 1373 | assert(curArgTabEntry->numSlots == numSlots); |
| 1374 | assert(hasStackArgs == true); |
| 1375 | } |
| 1376 | else |
| 1377 | { |
| 1378 | curArgTabEntry->isSplit = true; |
| 1379 | curArgTabEntry->numRegs = numRegs; |
| 1380 | curArgTabEntry->numSlots = numSlots; |
| 1381 | hasStackArgs = true; |
| 1382 | } |
| 1383 | nextSlotNum += numSlots; |
| 1384 | } |
| 1385 | |
| 1386 | //------------------------------------------------------------------------ |
| 1387 | // EvalToTmp: Replace the node in the given fgArgTabEntry with a temp |
| 1388 | // |
| 1389 | // Arguments: |
| 1390 | // curArgTabEntry - the fgArgTabEntry for the argument |
| 1391 | // tmpNum - the varNum for the temp |
| 1392 | // newNode - the assignment of the argument value to the temp |
| 1393 | // |
| 1394 | // Notes: |
| 1395 | // Although the name of this method is EvalToTmp, it doesn't actually create |
| 1396 | // the temp or the copy. |
| 1397 | // |
| 1398 | void fgArgInfo::EvalToTmp(fgArgTabEntry* curArgTabEntry, unsigned tmpNum, GenTree* newNode) |
| 1399 | { |
| 1400 | assert(curArgTabEntry->parent->Current() == newNode); |
| 1401 | |
| 1402 | curArgTabEntry->node = newNode; |
| 1403 | curArgTabEntry->tmpNum = tmpNum; |
| 1404 | curArgTabEntry->isTmp = true; |
| 1405 | } |
| 1406 | |
| 1407 | void fgArgInfo::ArgsComplete() |
| 1408 | { |
| 1409 | bool hasStackArgs = false; |
| 1410 | bool hasStructRegArg = false; |
| 1411 | |
| 1412 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
| 1413 | { |
| 1414 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 1415 | assert(curArgTabEntry != nullptr); |
| 1416 | GenTree* argx = curArgTabEntry->node; |
| 1417 | |
| 1418 | if (curArgTabEntry->regNum == REG_STK) |
| 1419 | { |
| 1420 | hasStackArgs = true; |
| 1421 | #if !FEATURE_FIXED_OUT_ARGS |
| 1422 | // On x86 we use push instructions to pass arguments: |
| 1423 | // The non-register arguments are evaluated and pushed in order |
| 1424 | // and they are never evaluated into temps |
| 1425 | // |
| 1426 | continue; |
| 1427 | #endif |
| 1428 | } |
| 1429 | #if FEATURE_ARG_SPLIT |
| 1430 | else if (curArgTabEntry->isSplit) |
| 1431 | { |
| 1432 | hasStructRegArg = true; |
| 1433 | hasStackArgs = true; |
| 1434 | } |
| 1435 | #endif // FEATURE_ARG_SPLIT |
| 1436 | else // we have a register argument, next we look for a struct type. |
| 1437 | { |
| 1438 | if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct)) |
| 1439 | { |
| 1440 | hasStructRegArg = true; |
| 1441 | } |
| 1442 | } |
| 1443 | |
| 1444 | /* If the argument tree contains an assignment (GTF_ASG) then the argument and |
| 1445 | and every earlier argument (except constants) must be evaluated into temps |
| 1446 | since there may be other arguments that follow and they may use the value being assigned. |
| 1447 | |
| 1448 | EXAMPLE: ArgTab is "a, a=5, a" |
| 1449 | -> when we see the second arg "a=5" |
| 1450 | we know the first two arguments "a, a=5" have to be evaluated into temps |
| 1451 | |
| 1452 | For the case of an assignment, we only know that there exist some assignment someplace |
| 1453 | in the tree. We don't know what is being assigned so we are very conservative here |
| 1454 | and assume that any local variable could have been assigned. |
| 1455 | */ |
| 1456 | |
| 1457 | if (argx->gtFlags & GTF_ASG) |
| 1458 | { |
| 1459 | // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to |
| 1460 | // a tmp, then we need a temp in the late arg list. |
| 1461 | if ((argCount > 1) || argx->OperIsCopyBlkOp() |
| 1462 | #ifdef FEATURE_FIXED_OUT_ARGS |
| 1463 | || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property |
| 1464 | // that we only have late non-register args when that feature is on. |
| 1465 | #endif // FEATURE_FIXED_OUT_ARGS |
| 1466 | ) |
| 1467 | { |
| 1468 | curArgTabEntry->needTmp = true; |
| 1469 | } |
| 1470 | |
| 1471 | // For all previous arguments, unless they are a simple constant |
| 1472 | // we require that they be evaluated into temps |
| 1473 | for (unsigned prevInx = 0; prevInx < curInx; prevInx++) |
| 1474 | { |
| 1475 | fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; |
| 1476 | assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); |
| 1477 | |
| 1478 | assert(prevArgTabEntry->node); |
| 1479 | if (prevArgTabEntry->node->gtOper != GT_CNS_INT) |
| 1480 | { |
| 1481 | prevArgTabEntry->needTmp = true; |
| 1482 | } |
| 1483 | } |
| 1484 | } |
| 1485 | |
| 1486 | bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0); |
| 1487 | #if FEATURE_FIXED_OUT_ARGS |
| 1488 | // Like calls, if this argument has a tree that will do an inline throw, |
| 1489 | // a call to a jit helper, then we need to treat it like a call (but only |
| 1490 | // if there are/were any stack args). |
| 1491 | // This means unnesting, sorting, etc. Technically this is overly |
| 1492 | // conservative, but I want to avoid as much special-case debug-only code |
| 1493 | // as possible, so leveraging the GTF_CALL flag is the easiest. |
| 1494 | // |
| 1495 | if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && compiler->opts.compDbgCode && |
| 1496 | (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT)) |
| 1497 | { |
| 1498 | for (unsigned otherInx = 0; otherInx < argCount; otherInx++) |
| 1499 | { |
| 1500 | if (otherInx == curInx) |
| 1501 | { |
| 1502 | continue; |
| 1503 | } |
| 1504 | |
| 1505 | if (argTable[otherInx]->regNum == REG_STK) |
| 1506 | { |
| 1507 | treatLikeCall = true; |
| 1508 | break; |
| 1509 | } |
| 1510 | } |
| 1511 | } |
| 1512 | #endif // FEATURE_FIXED_OUT_ARGS |
| 1513 | |
| 1514 | /* If it contains a call (GTF_CALL) then itself and everything before the call |
| 1515 | with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT |
| 1516 | has to be kept in the right order since we will move the call to the first position) |
| 1517 | |
| 1518 | For calls we don't have to be quite as conservative as we are with an assignment |
| 1519 | since the call won't be modifying any non-address taken LclVars. |
| 1520 | */ |
| 1521 | |
| 1522 | if (treatLikeCall) |
| 1523 | { |
| 1524 | if (argCount > 1) // If this is not the only argument |
| 1525 | { |
| 1526 | curArgTabEntry->needTmp = true; |
| 1527 | } |
| 1528 | else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL)) |
| 1529 | { |
| 1530 | // Spill all arguments that are floating point calls |
| 1531 | curArgTabEntry->needTmp = true; |
| 1532 | } |
| 1533 | |
| 1534 | // All previous arguments may need to be evaluated into temps |
| 1535 | for (unsigned prevInx = 0; prevInx < curInx; prevInx++) |
| 1536 | { |
| 1537 | fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; |
| 1538 | assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); |
| 1539 | assert(prevArgTabEntry->node); |
| 1540 | |
| 1541 | // For all previous arguments, if they have any GTF_ALL_EFFECT |
| 1542 | // we require that they be evaluated into a temp |
| 1543 | if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0) |
| 1544 | { |
| 1545 | prevArgTabEntry->needTmp = true; |
| 1546 | } |
| 1547 | #if FEATURE_FIXED_OUT_ARGS |
| 1548 | // Or, if they are stored into the FIXED_OUT_ARG area |
| 1549 | // we require that they be moved to the gtCallLateArgs |
| 1550 | // and replaced with a placeholder node |
| 1551 | else if (prevArgTabEntry->regNum == REG_STK) |
| 1552 | { |
| 1553 | prevArgTabEntry->needPlace = true; |
| 1554 | } |
| 1555 | #if FEATURE_ARG_SPLIT |
| 1556 | else if (prevArgTabEntry->isSplit) |
| 1557 | { |
| 1558 | prevArgTabEntry->needPlace = true; |
| 1559 | } |
| 1560 | #endif // _TARGET_ARM_ |
| 1561 | #endif |
| 1562 | } |
| 1563 | } |
| 1564 | |
| 1565 | #if FEATURE_MULTIREG_ARGS |
| 1566 | // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST |
| 1567 | // with multiple indirections, so here we consider spilling it into a tmp LclVar. |
| 1568 | // |
| 1569 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 1570 | #ifdef _TARGET_ARM_ |
| 1571 | bool isMultiRegArg = (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->numSlots > 1); |
| 1572 | #else |
| 1573 | bool isMultiRegArg = (curArgTabEntry->numRegs > 1); |
| 1574 | #endif |
| 1575 | |
| 1576 | if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false)) |
| 1577 | { |
| 1578 | if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)) |
| 1579 | { |
| 1580 | // Spill multireg struct arguments that have Assignments or Calls embedded in them |
| 1581 | curArgTabEntry->needTmp = true; |
| 1582 | } |
| 1583 | else |
| 1584 | { |
| 1585 | // We call gtPrepareCost to measure the cost of evaluating this tree |
| 1586 | compiler->gtPrepareCost(argx); |
| 1587 | |
| 1588 | if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX))) |
| 1589 | { |
| 1590 | // Spill multireg struct arguments that are expensive to evaluate twice |
| 1591 | curArgTabEntry->needTmp = true; |
| 1592 | } |
| 1593 | #if defined(FEATURE_SIMD) && defined(_TARGET_ARM64_) |
| 1594 | else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet())) |
| 1595 | { |
| 1596 | // SIMD types do not need the optimization below due to their sizes |
| 1597 | if (argx->OperIsSIMDorSimdHWintrinsic() || |
| 1598 | (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) && |
| 1599 | argx->AsObj()->gtOp1->gtOp.gtOp1->OperIsSIMDorSimdHWintrinsic())) |
| 1600 | { |
| 1601 | curArgTabEntry->needTmp = true; |
| 1602 | } |
| 1603 | } |
| 1604 | #endif |
| 1605 | #ifndef _TARGET_ARM_ |
| 1606 | // TODO-Arm: This optimization is not implemented for ARM32 |
| 1607 | // so we skip this for ARM32 until it is ported to use RyuJIT backend |
| 1608 | // |
| 1609 | else if (argx->OperGet() == GT_OBJ) |
| 1610 | { |
| 1611 | GenTreeObj* argObj = argx->AsObj(); |
| 1612 | CORINFO_CLASS_HANDLE objClass = argObj->gtClass; |
| 1613 | unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass); |
| 1614 | switch (structSize) |
| 1615 | { |
| 1616 | case 3: |
| 1617 | case 5: |
| 1618 | case 6: |
| 1619 | case 7: |
| 1620 | // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes |
| 1621 | // |
| 1622 | if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar? |
| 1623 | { |
| 1624 | // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes |
| 1625 | // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp. |
| 1626 | // |
| 1627 | curArgTabEntry->needTmp = true; |
| 1628 | } |
| 1629 | break; |
| 1630 | case 11: |
| 1631 | case 13: |
| 1632 | case 14: |
| 1633 | case 15: |
| 1634 | // Spill any GT_OBJ multireg structs that are difficult to extract |
| 1635 | // |
| 1636 | // When we have a GT_OBJ of a struct with the above sizes we would need |
| 1637 | // to use 3 or 4 load instructions to load the exact size of this struct. |
| 1638 | // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence |
| 1639 | // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp. |
| 1640 | // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing |
| 1641 | // the argument. |
| 1642 | // |
| 1643 | curArgTabEntry->needTmp = true; |
| 1644 | break; |
| 1645 | |
| 1646 | default: |
| 1647 | break; |
| 1648 | } |
| 1649 | } |
| 1650 | #endif // !_TARGET_ARM_ |
| 1651 | } |
| 1652 | } |
| 1653 | #endif // FEATURE_MULTIREG_ARGS |
| 1654 | } |
| 1655 | |
| 1656 | // We only care because we can't spill structs and qmarks involve a lot of spilling, but |
| 1657 | // if we don't have qmarks, then it doesn't matter. |
| 1658 | // So check for Qmark's globally once here, instead of inside the loop. |
| 1659 | // |
| 1660 | const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed); |
| 1661 | |
| 1662 | #if FEATURE_FIXED_OUT_ARGS |
| 1663 | |
| 1664 | // For Arm/x64 we only care because we can't reorder a register |
| 1665 | // argument that uses GT_LCLHEAP. This is an optimization to |
| 1666 | // save a check inside the below loop. |
| 1667 | // |
| 1668 | const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed); |
| 1669 | |
| 1670 | #else |
| 1671 | |
| 1672 | const bool hasStackArgsWeCareAbout = hasStackArgs; |
| 1673 | |
| 1674 | #endif // FEATURE_FIXED_OUT_ARGS |
| 1675 | |
| 1676 | // If we have any stack args we have to force the evaluation |
| 1677 | // of any arguments passed in registers that might throw an exception |
| 1678 | // |
| 1679 | // Technically we only a required to handle the following two cases: |
| 1680 | // a GT_IND with GTF_IND_RNGCHK (only on x86) or |
| 1681 | // a GT_LCLHEAP node that allocates stuff on the stack |
| 1682 | // |
| 1683 | if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout) |
| 1684 | { |
| 1685 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
| 1686 | { |
| 1687 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 1688 | assert(curArgTabEntry != nullptr); |
| 1689 | GenTree* argx = curArgTabEntry->node; |
| 1690 | |
| 1691 | // Examine the register args that are currently not marked needTmp |
| 1692 | // |
| 1693 | if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK)) |
| 1694 | { |
| 1695 | if (hasStackArgsWeCareAbout) |
| 1696 | { |
| 1697 | #if !FEATURE_FIXED_OUT_ARGS |
| 1698 | // On x86 we previously recorded a stack depth of zero when |
| 1699 | // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag |
| 1700 | // Thus we can not reorder the argument after any stack based argument |
| 1701 | // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to |
| 1702 | // check for it explicitly.) |
| 1703 | // |
| 1704 | if (argx->gtFlags & GTF_EXCEPT) |
| 1705 | { |
| 1706 | curArgTabEntry->needTmp = true; |
| 1707 | continue; |
| 1708 | } |
| 1709 | #else |
| 1710 | // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP |
| 1711 | // |
| 1712 | if (argx->gtFlags & GTF_EXCEPT) |
| 1713 | { |
| 1714 | assert(compiler->compLocallocUsed); |
| 1715 | |
| 1716 | // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree |
| 1717 | // |
| 1718 | if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT) |
| 1719 | { |
| 1720 | curArgTabEntry->needTmp = true; |
| 1721 | continue; |
| 1722 | } |
| 1723 | } |
| 1724 | #endif |
| 1725 | } |
| 1726 | if (hasStructRegArgWeCareAbout) |
| 1727 | { |
| 1728 | // Returns true if a GT_QMARK node is encountered in the argx tree |
| 1729 | // |
| 1730 | if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT) |
| 1731 | { |
| 1732 | curArgTabEntry->needTmp = true; |
| 1733 | continue; |
| 1734 | } |
| 1735 | } |
| 1736 | } |
| 1737 | } |
| 1738 | } |
| 1739 | |
| 1740 | argsComplete = true; |
| 1741 | } |
| 1742 | |
| 1743 | void fgArgInfo::SortArgs() |
| 1744 | { |
| 1745 | assert(argsComplete == true); |
| 1746 | |
| 1747 | #ifdef DEBUG |
| 1748 | if (compiler->verbose) |
| 1749 | { |
| 1750 | printf("\nSorting the arguments:\n" ); |
| 1751 | } |
| 1752 | #endif |
| 1753 | |
| 1754 | /* Shuffle the arguments around before we build the gtCallLateArgs list. |
| 1755 | The idea is to move all "simple" arguments like constants and local vars |
| 1756 | to the end of the table, and move the complex arguments towards the beginning |
| 1757 | of the table. This will help prevent registers from being spilled by |
| 1758 | allowing us to evaluate the more complex arguments before the simpler arguments. |
| 1759 | The argTable ends up looking like: |
| 1760 | +------------------------------------+ <--- argTable[argCount - 1] |
| 1761 | | constants | |
| 1762 | +------------------------------------+ |
| 1763 | | local var / local field | |
| 1764 | +------------------------------------+ |
| 1765 | | remaining arguments sorted by cost | |
| 1766 | +------------------------------------+ |
| 1767 | | temps (argTable[].needTmp = true) | |
| 1768 | +------------------------------------+ |
| 1769 | | args with calls (GTF_CALL) | |
| 1770 | +------------------------------------+ <--- argTable[0] |
| 1771 | */ |
| 1772 | |
| 1773 | /* Set the beginning and end for the new argument table */ |
| 1774 | unsigned curInx; |
| 1775 | int regCount = 0; |
| 1776 | unsigned begTab = 0; |
| 1777 | unsigned endTab = argCount - 1; |
| 1778 | unsigned argsRemaining = argCount; |
| 1779 | |
| 1780 | // First take care of arguments that are constants. |
| 1781 | // [We use a backward iterator pattern] |
| 1782 | // |
| 1783 | curInx = argCount; |
| 1784 | do |
| 1785 | { |
| 1786 | curInx--; |
| 1787 | |
| 1788 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 1789 | |
| 1790 | if (curArgTabEntry->regNum != REG_STK) |
| 1791 | { |
| 1792 | regCount++; |
| 1793 | } |
| 1794 | |
| 1795 | // Skip any already processed args |
| 1796 | // |
| 1797 | if (!curArgTabEntry->processed) |
| 1798 | { |
| 1799 | GenTree* argx = curArgTabEntry->node; |
| 1800 | |
| 1801 | // put constants at the end of the table |
| 1802 | // |
| 1803 | if (argx->gtOper == GT_CNS_INT) |
| 1804 | { |
| 1805 | noway_assert(curInx <= endTab); |
| 1806 | |
| 1807 | curArgTabEntry->processed = true; |
| 1808 | |
| 1809 | // place curArgTabEntry at the endTab position by performing a swap |
| 1810 | // |
| 1811 | if (curInx != endTab) |
| 1812 | { |
| 1813 | argTable[curInx] = argTable[endTab]; |
| 1814 | argTable[endTab] = curArgTabEntry; |
| 1815 | } |
| 1816 | |
| 1817 | endTab--; |
| 1818 | argsRemaining--; |
| 1819 | } |
| 1820 | } |
| 1821 | } while (curInx > 0); |
| 1822 | |
| 1823 | if (argsRemaining > 0) |
| 1824 | { |
| 1825 | // Next take care of arguments that are calls. |
| 1826 | // [We use a forward iterator pattern] |
| 1827 | // |
| 1828 | for (curInx = begTab; curInx <= endTab; curInx++) |
| 1829 | { |
| 1830 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 1831 | |
| 1832 | // Skip any already processed args |
| 1833 | // |
| 1834 | if (!curArgTabEntry->processed) |
| 1835 | { |
| 1836 | GenTree* argx = curArgTabEntry->node; |
| 1837 | |
| 1838 | // put calls at the beginning of the table |
| 1839 | // |
| 1840 | if (argx->gtFlags & GTF_CALL) |
| 1841 | { |
| 1842 | curArgTabEntry->processed = true; |
| 1843 | |
| 1844 | // place curArgTabEntry at the begTab position by performing a swap |
| 1845 | // |
| 1846 | if (curInx != begTab) |
| 1847 | { |
| 1848 | argTable[curInx] = argTable[begTab]; |
| 1849 | argTable[begTab] = curArgTabEntry; |
| 1850 | } |
| 1851 | |
| 1852 | begTab++; |
| 1853 | argsRemaining--; |
| 1854 | } |
| 1855 | } |
| 1856 | } |
| 1857 | } |
| 1858 | |
| 1859 | if (argsRemaining > 0) |
| 1860 | { |
| 1861 | // Next take care arguments that are temps. |
| 1862 | // These temps come before the arguments that are |
| 1863 | // ordinary local vars or local fields |
| 1864 | // since this will give them a better chance to become |
| 1865 | // enregistered into their actual argument register. |
| 1866 | // [We use a forward iterator pattern] |
| 1867 | // |
| 1868 | for (curInx = begTab; curInx <= endTab; curInx++) |
| 1869 | { |
| 1870 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 1871 | |
| 1872 | // Skip any already processed args |
| 1873 | // |
| 1874 | if (!curArgTabEntry->processed) |
| 1875 | { |
| 1876 | if (curArgTabEntry->needTmp) |
| 1877 | { |
| 1878 | curArgTabEntry->processed = true; |
| 1879 | |
| 1880 | // place curArgTabEntry at the begTab position by performing a swap |
| 1881 | // |
| 1882 | if (curInx != begTab) |
| 1883 | { |
| 1884 | argTable[curInx] = argTable[begTab]; |
| 1885 | argTable[begTab] = curArgTabEntry; |
| 1886 | } |
| 1887 | |
| 1888 | begTab++; |
| 1889 | argsRemaining--; |
| 1890 | } |
| 1891 | } |
| 1892 | } |
| 1893 | } |
| 1894 | |
| 1895 | if (argsRemaining > 0) |
| 1896 | { |
| 1897 | // Next take care of local var and local field arguments. |
| 1898 | // These are moved towards the end of the argument evaluation. |
| 1899 | // [We use a backward iterator pattern] |
| 1900 | // |
| 1901 | curInx = endTab + 1; |
| 1902 | do |
| 1903 | { |
| 1904 | curInx--; |
| 1905 | |
| 1906 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 1907 | |
| 1908 | // Skip any already processed args |
| 1909 | // |
| 1910 | if (!curArgTabEntry->processed) |
| 1911 | { |
| 1912 | GenTree* argx = curArgTabEntry->node; |
| 1913 | |
| 1914 | if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD)) |
| 1915 | { |
| 1916 | noway_assert(curInx <= endTab); |
| 1917 | |
| 1918 | curArgTabEntry->processed = true; |
| 1919 | |
| 1920 | // place curArgTabEntry at the endTab position by performing a swap |
| 1921 | // |
| 1922 | if (curInx != endTab) |
| 1923 | { |
| 1924 | argTable[curInx] = argTable[endTab]; |
| 1925 | argTable[endTab] = curArgTabEntry; |
| 1926 | } |
| 1927 | |
| 1928 | endTab--; |
| 1929 | argsRemaining--; |
| 1930 | } |
| 1931 | } |
| 1932 | } while (curInx > begTab); |
| 1933 | } |
| 1934 | |
| 1935 | // Finally, take care of all the remaining arguments. |
| 1936 | // Note that we fill in one arg at a time using a while loop. |
| 1937 | bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop |
| 1938 | while (argsRemaining > 0) |
| 1939 | { |
| 1940 | /* Find the most expensive arg remaining and evaluate it next */ |
| 1941 | |
| 1942 | fgArgTabEntry* expensiveArgTabEntry = nullptr; |
| 1943 | unsigned expensiveArg = UINT_MAX; |
| 1944 | unsigned expensiveArgCost = 0; |
| 1945 | |
| 1946 | // [We use a forward iterator pattern] |
| 1947 | // |
| 1948 | for (curInx = begTab; curInx <= endTab; curInx++) |
| 1949 | { |
| 1950 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 1951 | |
| 1952 | // Skip any already processed args |
| 1953 | // |
| 1954 | if (!curArgTabEntry->processed) |
| 1955 | { |
| 1956 | GenTree* argx = curArgTabEntry->node; |
| 1957 | |
| 1958 | // We should have already handled these kinds of args |
| 1959 | assert(argx->gtOper != GT_LCL_VAR); |
| 1960 | assert(argx->gtOper != GT_LCL_FLD); |
| 1961 | assert(argx->gtOper != GT_CNS_INT); |
| 1962 | |
| 1963 | // This arg should either have no persistent side effects or be the last one in our table |
| 1964 | // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); |
| 1965 | |
| 1966 | if (argsRemaining == 1) |
| 1967 | { |
| 1968 | // This is the last arg to place |
| 1969 | expensiveArg = curInx; |
| 1970 | expensiveArgTabEntry = curArgTabEntry; |
| 1971 | assert(begTab == endTab); |
| 1972 | break; |
| 1973 | } |
| 1974 | else |
| 1975 | { |
| 1976 | if (!costsPrepared) |
| 1977 | { |
| 1978 | /* We call gtPrepareCost to measure the cost of evaluating this tree */ |
| 1979 | compiler->gtPrepareCost(argx); |
| 1980 | } |
| 1981 | |
| 1982 | if (argx->gtCostEx > expensiveArgCost) |
| 1983 | { |
| 1984 | // Remember this arg as the most expensive one that we have yet seen |
| 1985 | expensiveArgCost = argx->gtCostEx; |
| 1986 | expensiveArg = curInx; |
| 1987 | expensiveArgTabEntry = curArgTabEntry; |
| 1988 | } |
| 1989 | } |
| 1990 | } |
| 1991 | } |
| 1992 | |
| 1993 | noway_assert(expensiveArg != UINT_MAX); |
| 1994 | |
| 1995 | // put the most expensive arg towards the beginning of the table |
| 1996 | |
| 1997 | expensiveArgTabEntry->processed = true; |
| 1998 | |
| 1999 | // place expensiveArgTabEntry at the begTab position by performing a swap |
| 2000 | // |
| 2001 | if (expensiveArg != begTab) |
| 2002 | { |
| 2003 | argTable[expensiveArg] = argTable[begTab]; |
| 2004 | argTable[begTab] = expensiveArgTabEntry; |
| 2005 | } |
| 2006 | |
| 2007 | begTab++; |
| 2008 | argsRemaining--; |
| 2009 | |
| 2010 | costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop |
| 2011 | } |
| 2012 | |
| 2013 | // The table should now be completely filled and thus begTab should now be adjacent to endTab |
| 2014 | // and regArgsRemaining should be zero |
| 2015 | assert(begTab == (endTab + 1)); |
| 2016 | assert(argsRemaining == 0); |
| 2017 | |
| 2018 | #if !FEATURE_FIXED_OUT_ARGS |
| 2019 | // Finally build the regArgList |
| 2020 | // |
| 2021 | callTree->gtCall.regArgList = NULL; |
| 2022 | callTree->gtCall.regArgListCount = regCount; |
| 2023 | |
| 2024 | unsigned regInx = 0; |
| 2025 | for (curInx = 0; curInx < argCount; curInx++) |
| 2026 | { |
| 2027 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 2028 | |
| 2029 | if (curArgTabEntry->regNum != REG_STK) |
| 2030 | { |
| 2031 | // Encode the argument register in the register mask |
| 2032 | // |
| 2033 | callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum; |
| 2034 | regInx++; |
| 2035 | } |
| 2036 | } |
| 2037 | #endif // !FEATURE_FIXED_OUT_ARGS |
| 2038 | |
| 2039 | argsSorted = true; |
| 2040 | } |
| 2041 | |
| 2042 | #ifdef DEBUG |
| 2043 | void fgArgInfo::Dump(Compiler* compiler) |
| 2044 | { |
| 2045 | for (unsigned curInx = 0; curInx < ArgCount(); curInx++) |
| 2046 | { |
| 2047 | fgArgTabEntry* curArgEntry = ArgTable()[curInx]; |
| 2048 | curArgEntry->Dump(); |
| 2049 | } |
| 2050 | } |
| 2051 | #endif |
| 2052 | |
| 2053 | //------------------------------------------------------------------------------ |
| 2054 | // fgMakeTmpArgNode : This function creates a tmp var only if needed. |
| 2055 | // We need this to be done in order to enforce ordering |
| 2056 | // of the evaluation of arguments. |
| 2057 | // |
| 2058 | // Arguments: |
| 2059 | // curArgTabEntry |
| 2060 | // |
| 2061 | // Return Value: |
| 2062 | // the newly created temp var tree. |
| 2063 | |
| 2064 | GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) |
| 2065 | { |
| 2066 | unsigned tmpVarNum = curArgTabEntry->tmpNum; |
| 2067 | LclVarDsc* varDsc = &lvaTable[tmpVarNum]; |
| 2068 | assert(varDsc->lvIsTemp); |
| 2069 | var_types type = varDsc->TypeGet(); |
| 2070 | |
| 2071 | // Create a copy of the temp to go into the late argument list |
| 2072 | GenTree* arg = gtNewLclvNode(tmpVarNum, type); |
| 2073 | GenTree* addrNode = nullptr; |
| 2074 | |
| 2075 | if (varTypeIsStruct(type)) |
| 2076 | { |
| 2077 | |
| 2078 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_) |
| 2079 | |
| 2080 | // Can this type be passed as a primitive type? |
| 2081 | // If so, the following call will return the corresponding primitive type. |
| 2082 | // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type. |
| 2083 | |
| 2084 | bool passedAsPrimitive = false; |
| 2085 | if (curArgTabEntry->isSingleRegOrSlot()) |
| 2086 | { |
| 2087 | CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle(); |
| 2088 | var_types structBaseType = |
| 2089 | getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd, curArgTabEntry->isVararg); |
| 2090 | |
| 2091 | if (structBaseType != TYP_UNKNOWN) |
| 2092 | { |
| 2093 | passedAsPrimitive = true; |
| 2094 | #if defined(UNIX_AMD64_ABI) |
| 2095 | // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry, |
| 2096 | // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take |
| 2097 | // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again. |
| 2098 | // |
| 2099 | if (genIsValidFloatReg(curArgTabEntry->regNum)) |
| 2100 | { |
| 2101 | if (structBaseType == TYP_INT) |
| 2102 | { |
| 2103 | structBaseType = TYP_FLOAT; |
| 2104 | } |
| 2105 | else |
| 2106 | { |
| 2107 | assert(structBaseType == TYP_LONG); |
| 2108 | structBaseType = TYP_DOUBLE; |
| 2109 | } |
| 2110 | } |
| 2111 | #endif |
| 2112 | type = structBaseType; |
| 2113 | } |
| 2114 | } |
| 2115 | |
| 2116 | // If it is passed in registers, don't get the address of the var. Make it a |
| 2117 | // field instead. It will be loaded in registers with putarg_reg tree in lower. |
| 2118 | if (passedAsPrimitive) |
| 2119 | { |
| 2120 | arg->ChangeOper(GT_LCL_FLD); |
| 2121 | arg->gtType = type; |
| 2122 | } |
| 2123 | else |
| 2124 | { |
| 2125 | var_types addrType = TYP_BYREF; |
| 2126 | arg = gtNewOperNode(GT_ADDR, addrType, arg); |
| 2127 | addrNode = arg; |
| 2128 | |
| 2129 | #if FEATURE_MULTIREG_ARGS |
| 2130 | #ifdef _TARGET_ARM64_ |
| 2131 | assert(varTypeIsStruct(type)); |
| 2132 | if (lvaIsMultiregStruct(varDsc, curArgTabEntry->isVararg)) |
| 2133 | { |
| 2134 | // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD); |
| 2135 | // as that is how UNIX_AMD64_ABI works. |
| 2136 | // We will create a GT_OBJ for the argument below. |
| 2137 | // This will be passed by value in two registers. |
| 2138 | assert(addrNode != nullptr); |
| 2139 | |
| 2140 | // Create an Obj of the temp to use it as a call argument. |
| 2141 | arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); |
| 2142 | |
| 2143 | // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here; |
| 2144 | // this is only to preserve former behavior (though some CSE'ing of struct |
| 2145 | // values can be pessimizing, so enabling this may require some additional tuning). |
| 2146 | arg->gtFlags |= GTF_DONT_CSE; |
| 2147 | } |
| 2148 | #else |
| 2149 | // Always create an Obj of the temp to use it as a call argument. |
| 2150 | arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); |
| 2151 | arg->gtFlags |= GTF_DONT_CSE; |
| 2152 | #endif // !_TARGET_ARM64_ |
| 2153 | #endif // FEATURE_MULTIREG_ARGS |
| 2154 | } |
| 2155 | |
| 2156 | #else // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_) |
| 2157 | |
| 2158 | // other targets, we pass the struct by value |
| 2159 | assert(varTypeIsStruct(type)); |
| 2160 | |
| 2161 | addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); |
| 2162 | |
| 2163 | // Get a new Obj node temp to use it as a call argument. |
| 2164 | // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object. |
| 2165 | arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode); |
| 2166 | |
| 2167 | #endif // not (_TARGET_AMD64_ or _TARGET_ARM64_ or _TARGET_ARM_) |
| 2168 | |
| 2169 | } // (varTypeIsStruct(type)) |
| 2170 | |
| 2171 | if (addrNode != nullptr) |
| 2172 | { |
| 2173 | assert(addrNode->gtOper == GT_ADDR); |
| 2174 | |
| 2175 | // This will prevent this LclVar from being optimized away |
| 2176 | lvaSetVarAddrExposed(tmpVarNum); |
| 2177 | |
| 2178 | // the child of a GT_ADDR is required to have this flag set |
| 2179 | addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE; |
| 2180 | } |
| 2181 | |
| 2182 | return arg; |
| 2183 | } |
| 2184 | |
| 2185 | //------------------------------------------------------------------------------ |
| 2186 | // EvalArgsToTemps : Create temp assignments and populate the LateArgs list. |
| 2187 | |
| 2188 | void fgArgInfo::EvalArgsToTemps() |
| 2189 | { |
| 2190 | assert(argsSorted == true); |
| 2191 | |
| 2192 | unsigned regArgInx = 0; |
| 2193 | // Now go through the argument table and perform the necessary evaluation into temps |
| 2194 | GenTreeArgList* tmpRegArgNext = nullptr; |
| 2195 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
| 2196 | { |
| 2197 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 2198 | |
| 2199 | GenTree* argx = curArgTabEntry->node; |
| 2200 | GenTree* setupArg = nullptr; |
| 2201 | GenTree* defArg; |
| 2202 | |
| 2203 | #if !FEATURE_FIXED_OUT_ARGS |
| 2204 | // Only ever set for FEATURE_FIXED_OUT_ARGS |
| 2205 | assert(curArgTabEntry->needPlace == false); |
| 2206 | |
| 2207 | // On x86 and other archs that use push instructions to pass arguments: |
| 2208 | // Only the register arguments need to be replaced with placeholder nodes. |
| 2209 | // Stacked arguments are evaluated and pushed (or stored into the stack) in order. |
| 2210 | // |
| 2211 | if (curArgTabEntry->regNum == REG_STK) |
| 2212 | continue; |
| 2213 | #endif |
| 2214 | |
| 2215 | if (curArgTabEntry->needTmp) |
| 2216 | { |
| 2217 | if (curArgTabEntry->isTmp == true) |
| 2218 | { |
| 2219 | // Create a copy of the temp to go into the late argument list |
| 2220 | defArg = compiler->fgMakeTmpArgNode(curArgTabEntry); |
| 2221 | |
| 2222 | // mark the original node as a late argument |
| 2223 | argx->gtFlags |= GTF_LATE_ARG; |
| 2224 | } |
| 2225 | else |
| 2226 | { |
| 2227 | // Create a temp assignment for the argument |
| 2228 | // Put the temp in the gtCallLateArgs list |
| 2229 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 2230 | |
| 2231 | #ifdef DEBUG |
| 2232 | if (compiler->verbose) |
| 2233 | { |
| 2234 | printf("Argument with 'side effect'...\n" ); |
| 2235 | compiler->gtDispTree(argx); |
| 2236 | } |
| 2237 | #endif |
| 2238 | |
| 2239 | #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) |
| 2240 | noway_assert(argx->gtType != TYP_STRUCT); |
| 2241 | #endif |
| 2242 | |
| 2243 | unsigned tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect" )); |
| 2244 | if (argx->gtOper == GT_MKREFANY) |
| 2245 | { |
| 2246 | // For GT_MKREFANY, typically the actual struct copying does |
| 2247 | // not have any side-effects and can be delayed. So instead |
| 2248 | // of using a temp for the whole struct, we can just use a temp |
| 2249 | // for operand that that has a side-effect |
| 2250 | GenTree* operand; |
| 2251 | if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0) |
| 2252 | { |
| 2253 | operand = argx->gtOp.gtOp1; |
| 2254 | |
| 2255 | // In the early argument evaluation, place an assignment to the temp |
| 2256 | // from the source operand of the mkrefany |
| 2257 | setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); |
| 2258 | |
| 2259 | // Replace the operand for the mkrefany with the new temp. |
| 2260 | argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); |
| 2261 | } |
| 2262 | else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0) |
| 2263 | { |
| 2264 | operand = argx->gtOp.gtOp2; |
| 2265 | |
| 2266 | // In the early argument evaluation, place an assignment to the temp |
| 2267 | // from the source operand of the mkrefany |
| 2268 | setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); |
| 2269 | |
| 2270 | // Replace the operand for the mkrefany with the new temp. |
| 2271 | argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); |
| 2272 | } |
| 2273 | } |
| 2274 | |
| 2275 | if (setupArg != nullptr) |
| 2276 | { |
| 2277 | // Now keep the mkrefany for the late argument list |
| 2278 | defArg = argx; |
| 2279 | |
| 2280 | // Clear the side-effect flags because now both op1 and op2 have no side-effects |
| 2281 | defArg->gtFlags &= ~GTF_ALL_EFFECT; |
| 2282 | } |
| 2283 | else |
| 2284 | { |
| 2285 | setupArg = compiler->gtNewTempAssign(tmpVarNum, argx); |
| 2286 | |
| 2287 | LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum; |
| 2288 | var_types lclVarType = genActualType(argx->gtType); |
| 2289 | var_types scalarType = TYP_UNKNOWN; |
| 2290 | |
| 2291 | if (setupArg->OperIsCopyBlkOp()) |
| 2292 | { |
| 2293 | setupArg = compiler->fgMorphCopyBlock(setupArg); |
| 2294 | #if defined(_TARGET_ARMARCH_) |
| 2295 | // This scalar LclVar widening step is only performed for ARM architectures. |
| 2296 | // |
| 2297 | CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); |
| 2298 | unsigned structSize = varDsc->lvExactSize; |
| 2299 | |
| 2300 | scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg); |
| 2301 | #endif // _TARGET_ARMARCH_ |
| 2302 | } |
| 2303 | |
| 2304 | // scalarType can be set to a wider type for ARM architectures: (3 => 4) or (5,6,7 => 8) |
| 2305 | if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) |
| 2306 | { |
| 2307 | // Create a GT_LCL_FLD using the wider type to go to the late argument list |
| 2308 | defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0); |
| 2309 | } |
| 2310 | else |
| 2311 | { |
| 2312 | // Create a copy of the temp to go to the late argument list |
| 2313 | defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType); |
| 2314 | } |
| 2315 | |
| 2316 | curArgTabEntry->isTmp = true; |
| 2317 | curArgTabEntry->tmpNum = tmpVarNum; |
| 2318 | |
| 2319 | #ifdef _TARGET_ARM_ |
| 2320 | // Previously we might have thought the local was promoted, and thus the 'COPYBLK' |
| 2321 | // might have left holes in the used registers (see |
| 2322 | // fgAddSkippedRegsInPromotedStructArg). |
| 2323 | // Too bad we're not that smart for these intermediate temps... |
| 2324 | if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1)) |
| 2325 | { |
| 2326 | regNumber argReg = curArgTabEntry->regNum; |
| 2327 | regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum); |
| 2328 | for (unsigned i = 1; i < curArgTabEntry->numRegs; i++) |
| 2329 | { |
| 2330 | argReg = genRegArgNext(argReg); |
| 2331 | allUsedRegs |= genRegMask(argReg); |
| 2332 | } |
| 2333 | } |
| 2334 | #endif // _TARGET_ARM_ |
| 2335 | } |
| 2336 | |
| 2337 | /* mark the assignment as a late argument */ |
| 2338 | setupArg->gtFlags |= GTF_LATE_ARG; |
| 2339 | |
| 2340 | #ifdef DEBUG |
| 2341 | if (compiler->verbose) |
| 2342 | { |
| 2343 | printf("\n Evaluate to a temp:\n" ); |
| 2344 | compiler->gtDispTree(setupArg); |
| 2345 | } |
| 2346 | #endif |
| 2347 | } |
| 2348 | } |
| 2349 | else // curArgTabEntry->needTmp == false |
| 2350 | { |
| 2351 | // On x86 - |
| 2352 | // Only register args are replaced with placeholder nodes |
| 2353 | // and the stack based arguments are evaluated and pushed in order. |
| 2354 | // |
| 2355 | // On Arm/x64 - When needTmp is false and needPlace is false, |
| 2356 | // the non-register arguments are evaluated and stored in order. |
| 2357 | // When needPlace is true we have a nested call that comes after |
| 2358 | // this argument so we have to replace it in the gtCallArgs list |
| 2359 | // (the initial argument evaluation list) with a placeholder. |
| 2360 | // |
| 2361 | if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false)) |
| 2362 | { |
| 2363 | continue; |
| 2364 | } |
| 2365 | |
| 2366 | /* No temp needed - move the whole node to the gtCallLateArgs list */ |
| 2367 | |
| 2368 | /* The argument is deferred and put in the late argument list */ |
| 2369 | |
| 2370 | defArg = argx; |
| 2371 | |
| 2372 | // Create a placeholder node to put in its place in gtCallLateArgs. |
| 2373 | |
| 2374 | // For a struct type we also need to record the class handle of the arg. |
| 2375 | CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; |
| 2376 | |
| 2377 | #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) |
| 2378 | |
| 2379 | // All structs are either passed (and retyped) as integral types, OR they |
| 2380 | // are passed by reference. |
| 2381 | noway_assert(argx->gtType != TYP_STRUCT); |
| 2382 | |
| 2383 | #else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI) |
| 2384 | |
| 2385 | if (varTypeIsStruct(defArg)) |
| 2386 | { |
| 2387 | clsHnd = compiler->gtGetStructHandleIfPresent(defArg); |
| 2388 | noway_assert(clsHnd != NO_CLASS_HANDLE); |
| 2389 | } |
| 2390 | |
| 2391 | #endif // !(defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) |
| 2392 | |
| 2393 | setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd); |
| 2394 | |
| 2395 | /* mark the placeholder node as a late argument */ |
| 2396 | setupArg->gtFlags |= GTF_LATE_ARG; |
| 2397 | |
| 2398 | #ifdef DEBUG |
| 2399 | if (compiler->verbose) |
| 2400 | { |
| 2401 | if (curArgTabEntry->regNum == REG_STK) |
| 2402 | { |
| 2403 | printf("Deferred stack argument :\n" ); |
| 2404 | } |
| 2405 | else |
| 2406 | { |
| 2407 | printf("Deferred argument ('%s'):\n" , getRegName(curArgTabEntry->regNum)); |
| 2408 | } |
| 2409 | |
| 2410 | compiler->gtDispTree(argx); |
| 2411 | printf("Replaced with placeholder node:\n" ); |
| 2412 | compiler->gtDispTree(setupArg); |
| 2413 | } |
| 2414 | #endif |
| 2415 | } |
| 2416 | |
| 2417 | if (setupArg != nullptr) |
| 2418 | { |
| 2419 | if (curArgTabEntry->parent) |
| 2420 | { |
| 2421 | GenTree* parent = curArgTabEntry->parent; |
| 2422 | /* a normal argument from the list */ |
| 2423 | noway_assert(parent->OperIsList()); |
| 2424 | noway_assert(parent->gtOp.gtOp1 == argx); |
| 2425 | |
| 2426 | parent->gtFlags |= (setupArg->gtFlags & GTF_ALL_EFFECT); |
| 2427 | |
| 2428 | parent->gtOp.gtOp1 = setupArg; |
| 2429 | } |
| 2430 | else |
| 2431 | { |
| 2432 | /* must be the gtCallObjp */ |
| 2433 | noway_assert(callTree->gtCall.gtCallObjp == argx); |
| 2434 | |
| 2435 | callTree->gtCall.gtCallObjp = setupArg; |
| 2436 | } |
| 2437 | } |
| 2438 | |
| 2439 | /* deferred arg goes into the late argument list */ |
| 2440 | |
| 2441 | if (tmpRegArgNext == nullptr) |
| 2442 | { |
| 2443 | tmpRegArgNext = compiler->gtNewArgList(defArg); |
| 2444 | callTree->gtCall.gtCallLateArgs = tmpRegArgNext; |
| 2445 | } |
| 2446 | else |
| 2447 | { |
| 2448 | noway_assert(tmpRegArgNext->OperIsList()); |
| 2449 | noway_assert(tmpRegArgNext->Current()); |
| 2450 | tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg); |
| 2451 | |
| 2452 | tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT); |
| 2453 | tmpRegArgNext = tmpRegArgNext->Rest(); |
| 2454 | } |
| 2455 | |
| 2456 | tmpRegArgNext->gtFlags |= (defArg->gtFlags & GTF_ALL_EFFECT); |
| 2457 | |
| 2458 | curArgTabEntry->node = defArg; |
| 2459 | curArgTabEntry->lateArgInx = regArgInx++; |
| 2460 | } |
| 2461 | |
| 2462 | #ifdef DEBUG |
| 2463 | if (compiler->verbose) |
| 2464 | { |
| 2465 | printf("\nShuffled argument table: " ); |
| 2466 | for (unsigned curInx = 0; curInx < argCount; curInx++) |
| 2467 | { |
| 2468 | fgArgTabEntry* curArgTabEntry = argTable[curInx]; |
| 2469 | |
| 2470 | if (curArgTabEntry->regNum != REG_STK) |
| 2471 | { |
| 2472 | printf("%s " , getRegName(curArgTabEntry->regNum)); |
| 2473 | } |
| 2474 | } |
| 2475 | printf("\n" ); |
| 2476 | } |
| 2477 | #endif |
| 2478 | } |
| 2479 | |
| 2480 | // Return a conservative estimate of the stack size in bytes. |
| 2481 | // It will be used only on the intercepted-for-host code path to copy the arguments. |
| 2482 | int Compiler::fgEstimateCallStackSize(GenTreeCall* call) |
| 2483 | { |
| 2484 | |
| 2485 | int numArgs = 0; |
| 2486 | for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest()) |
| 2487 | { |
| 2488 | numArgs++; |
| 2489 | } |
| 2490 | |
| 2491 | int numStkArgs; |
| 2492 | if (numArgs > MAX_REG_ARG) |
| 2493 | { |
| 2494 | numStkArgs = numArgs - MAX_REG_ARG; |
| 2495 | } |
| 2496 | else |
| 2497 | { |
| 2498 | numStkArgs = 0; |
| 2499 | } |
| 2500 | |
| 2501 | return numStkArgs * REGSIZE_BYTES; |
| 2502 | } |
| 2503 | |
| 2504 | //------------------------------------------------------------------------------ |
| 2505 | // fgMakeMultiUse : If the node is a local, clone it and increase the ref count |
| 2506 | // otherwise insert a comma form temp |
| 2507 | // |
| 2508 | // Arguments: |
| 2509 | // ppTree - a pointer to the child node we will be replacing with the comma expression that |
| 2510 | // evaluates ppTree to a temp and returns the result |
| 2511 | // |
| 2512 | // Return Value: |
| 2513 | // A fresh GT_LCL_VAR node referencing the temp which has not been used |
| 2514 | // |
| 2515 | // Assumption: |
| 2516 | // The result tree MUST be added to the tree structure since the ref counts are |
| 2517 | // already incremented. |
| 2518 | |
| 2519 | GenTree* Compiler::fgMakeMultiUse(GenTree** pOp) |
| 2520 | { |
| 2521 | GenTree* tree = *pOp; |
| 2522 | if (tree->IsLocal()) |
| 2523 | { |
| 2524 | return gtClone(tree); |
| 2525 | } |
| 2526 | else |
| 2527 | { |
| 2528 | return fgInsertCommaFormTemp(pOp); |
| 2529 | } |
| 2530 | } |
| 2531 | |
| 2532 | //------------------------------------------------------------------------------ |
| 2533 | // fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree, |
| 2534 | // and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl) |
| 2535 | // |
| 2536 | // Arguments: |
| 2537 | // ppTree - a pointer to the child node we will be replacing with the comma expression that |
| 2538 | // evaluates ppTree to a temp and returns the result |
| 2539 | // |
| 2540 | // structType - value type handle if the temp created is of TYP_STRUCT. |
| 2541 | // |
| 2542 | // Return Value: |
| 2543 | // A fresh GT_LCL_VAR node referencing the temp which has not been used |
| 2544 | // |
| 2545 | |
| 2546 | GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/) |
| 2547 | { |
| 2548 | GenTree* subTree = *ppTree; |
| 2549 | |
| 2550 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable" )); |
| 2551 | |
| 2552 | if (varTypeIsStruct(subTree)) |
| 2553 | { |
| 2554 | assert(structType != nullptr); |
| 2555 | lvaSetStruct(lclNum, structType, false); |
| 2556 | } |
| 2557 | |
| 2558 | // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree. |
| 2559 | // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for |
| 2560 | // setting type of lcl vars created. |
| 2561 | GenTree* asg = gtNewTempAssign(lclNum, subTree); |
| 2562 | |
| 2563 | GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET); |
| 2564 | |
| 2565 | GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load); |
| 2566 | |
| 2567 | *ppTree = comma; |
| 2568 | |
| 2569 | return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET); |
| 2570 | } |
| 2571 | |
| 2572 | //------------------------------------------------------------------------ |
| 2573 | // fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg |
| 2574 | // |
| 2575 | // Arguments: |
| 2576 | // callNode - the call for which we are generating the fgArgInfo |
| 2577 | // |
| 2578 | // Return Value: |
| 2579 | // None |
| 2580 | // |
| 2581 | // Notes: |
| 2582 | // This method is idempotent in that it checks whether the fgArgInfo has already been |
| 2583 | // constructed, and just returns. |
| 2584 | // This method only computes the arg table and arg entries for the call (the fgArgInfo), |
| 2585 | // and makes no modification of the args themselves. |
| 2586 | // |
| 2587 | void Compiler::fgInitArgInfo(GenTreeCall* call) |
| 2588 | { |
| 2589 | GenTree* args; |
| 2590 | GenTree* argx; |
| 2591 | |
| 2592 | unsigned argIndex = 0; |
| 2593 | unsigned intArgRegNum = 0; |
| 2594 | unsigned fltArgRegNum = 0; |
| 2595 | unsigned argSlots = 0; |
| 2596 | |
| 2597 | bool callHasRetBuffArg = call->HasRetBufArg(); |
| 2598 | bool callIsVararg = call->IsVarargs(); |
| 2599 | |
| 2600 | #ifdef _TARGET_ARM_ |
| 2601 | regMaskTP argSkippedRegMask = RBM_NONE; |
| 2602 | regMaskTP fltArgSkippedRegMask = RBM_NONE; |
| 2603 | #endif // _TARGET_ARM_ |
| 2604 | |
| 2605 | #if defined(_TARGET_X86_) |
| 2606 | unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated |
| 2607 | #else |
| 2608 | const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number |
| 2609 | #endif |
| 2610 | |
| 2611 | if (call->fgArgInfo != nullptr) |
| 2612 | { |
| 2613 | // We've already initialized and set the fgArgInfo. |
| 2614 | return; |
| 2615 | } |
| 2616 | JITDUMP("Initializing arg info for %d.%s:\n" , call->gtTreeID, GenTree::OpName(call->gtOper)); |
| 2617 | |
| 2618 | // At this point, we should never have gtCallLateArgs, as this needs to be done before those are determined. |
| 2619 | assert(call->gtCallLateArgs == nullptr); |
| 2620 | |
| 2621 | #ifdef _TARGET_UNIX_ |
| 2622 | if (callIsVararg) |
| 2623 | { |
| 2624 | // Currently native varargs is not implemented on non windows targets. |
| 2625 | // |
| 2626 | // Note that some targets like Arm64 Unix should not need much work as |
| 2627 | // the ABI is the same. While other targets may only need small changes |
| 2628 | // such as amd64 Unix, which just expects RAX to pass numFPArguments. |
| 2629 | NYI("Morphing Vararg call not yet implemented on non Windows targets." ); |
| 2630 | } |
| 2631 | #endif // _TARGET_UNIX_ |
| 2632 | |
| 2633 | // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed |
| 2634 | // following the normal calling convention or in the normal argument registers. We either mark existing |
| 2635 | // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the |
| 2636 | // non-standard arguments into the argument list, below. |
| 2637 | class NonStandardArgs |
| 2638 | { |
| 2639 | struct NonStandardArg |
| 2640 | { |
| 2641 | regNumber reg; // The register to be assigned to this non-standard argument. |
| 2642 | GenTree* node; // The tree node representing this non-standard argument. |
| 2643 | // Note that this must be updated if the tree node changes due to morphing! |
| 2644 | }; |
| 2645 | |
| 2646 | ArrayStack<NonStandardArg> args; |
| 2647 | |
| 2648 | public: |
| 2649 | NonStandardArgs(CompAllocator alloc) : args(alloc, 3) // We will have at most 3 non-standard arguments |
| 2650 | { |
| 2651 | } |
| 2652 | |
| 2653 | //----------------------------------------------------------------------------- |
| 2654 | // Add: add a non-standard argument to the table of non-standard arguments |
| 2655 | // |
| 2656 | // Arguments: |
| 2657 | // node - a GenTree node that has a non-standard argument. |
| 2658 | // reg - the register to assign to this node. |
| 2659 | // |
| 2660 | // Return Value: |
| 2661 | // None. |
| 2662 | // |
| 2663 | void Add(GenTree* node, regNumber reg) |
| 2664 | { |
| 2665 | NonStandardArg nsa = {reg, node}; |
| 2666 | args.Push(nsa); |
| 2667 | } |
| 2668 | |
| 2669 | //----------------------------------------------------------------------------- |
| 2670 | // Find: Look for a GenTree* in the set of non-standard args. |
| 2671 | // |
| 2672 | // Arguments: |
| 2673 | // node - a GenTree node to look for |
| 2674 | // |
| 2675 | // Return Value: |
| 2676 | // The index of the non-standard argument (a non-negative, unique, stable number). |
| 2677 | // If the node is not a non-standard argument, return -1. |
| 2678 | // |
| 2679 | int Find(GenTree* node) |
| 2680 | { |
| 2681 | for (int i = 0; i < args.Height(); i++) |
| 2682 | { |
| 2683 | if (node == args.Index(i).node) |
| 2684 | { |
| 2685 | return i; |
| 2686 | } |
| 2687 | } |
| 2688 | return -1; |
| 2689 | } |
| 2690 | |
| 2691 | //----------------------------------------------------------------------------- |
| 2692 | // FindReg: Look for a GenTree node in the non-standard arguments set. If found, |
| 2693 | // set the register to use for the node. |
| 2694 | // |
| 2695 | // Arguments: |
| 2696 | // node - a GenTree node to look for |
| 2697 | // pReg - an OUT argument. *pReg is set to the non-standard register to use if |
| 2698 | // 'node' is found in the non-standard argument set. |
| 2699 | // |
| 2700 | // Return Value: |
| 2701 | // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the |
| 2702 | // register to use. |
| 2703 | // 'false' otherwise (in this case, *pReg is unmodified). |
| 2704 | // |
| 2705 | bool FindReg(GenTree* node, regNumber* pReg) |
| 2706 | { |
| 2707 | for (int i = 0; i < args.Height(); i++) |
| 2708 | { |
| 2709 | NonStandardArg& nsa = args.IndexRef(i); |
| 2710 | if (node == nsa.node) |
| 2711 | { |
| 2712 | *pReg = nsa.reg; |
| 2713 | return true; |
| 2714 | } |
| 2715 | } |
| 2716 | return false; |
| 2717 | } |
| 2718 | |
| 2719 | //----------------------------------------------------------------------------- |
| 2720 | // Replace: Replace the non-standard argument node at a given index. This is done when |
| 2721 | // the original node was replaced via morphing, but we need to continue to assign a |
| 2722 | // particular non-standard arg to it. |
| 2723 | // |
| 2724 | // Arguments: |
| 2725 | // index - the index of the non-standard arg. It must exist. |
| 2726 | // node - the new GenTree node. |
| 2727 | // |
| 2728 | // Return Value: |
| 2729 | // None. |
| 2730 | // |
| 2731 | void Replace(int index, GenTree* node) |
| 2732 | { |
| 2733 | args.IndexRef(index).node = node; |
| 2734 | } |
| 2735 | |
| 2736 | } nonStandardArgs(getAllocator(CMK_ArrayStack)); |
| 2737 | |
| 2738 | // Count of args. On first morph, this is counted before we've filled in the arg table. |
| 2739 | // On remorph, we grab it from the arg table. |
| 2740 | unsigned numArgs = 0; |
| 2741 | |
| 2742 | // First we need to count the args |
| 2743 | if (call->gtCallObjp) |
| 2744 | { |
| 2745 | numArgs++; |
| 2746 | } |
| 2747 | for (GenTree* args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) |
| 2748 | { |
| 2749 | numArgs++; |
| 2750 | } |
| 2751 | |
| 2752 | // Insert or mark non-standard args. These are either outside the normal calling convention, or |
| 2753 | // arguments registers that don't follow the normal progression of argument registers in the calling |
| 2754 | // convention (such as for the ARM64 fixed return buffer argument x8). |
| 2755 | // |
| 2756 | // *********** NOTE ************* |
| 2757 | // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments |
| 2758 | // in the implementation of fast tail call. |
| 2759 | // *********** END NOTE ********* |
| 2760 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 2761 | |
| 2762 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
| 2763 | // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention. |
| 2764 | // Set the argument registers correctly here. |
| 2765 | if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME)) |
| 2766 | { |
| 2767 | GenTreeArgList* args = call->gtCallArgs; |
| 2768 | GenTree* arg1 = args->Current(); |
| 2769 | assert(arg1 != nullptr); |
| 2770 | nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME); |
| 2771 | } |
| 2772 | #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
| 2773 | #if defined(_TARGET_ARM_) |
| 2774 | // A non-standard calling convention using secure delegate invoke is used on ARM, only, but not for secure |
| 2775 | // delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing |
| 2776 | // R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs |
| 2777 | // to be preserved. Thus, the VM uses a secure delegate IL stub, which preserves R4 and also sets up R4 |
| 2778 | // correctly for the VSD call. The VM is simply reusing an existing mechanism (secure delegate IL stub) |
| 2779 | // to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details. |
| 2780 | else if (call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV) |
| 2781 | { |
| 2782 | GenTree* arg = call->gtCallObjp; |
| 2783 | if (arg->OperIsLocal()) |
| 2784 | { |
| 2785 | arg = gtClone(arg, true); |
| 2786 | } |
| 2787 | else |
| 2788 | { |
| 2789 | GenTree* tmp = fgInsertCommaFormTemp(&arg); |
| 2790 | call->gtCallObjp = arg; |
| 2791 | call->gtFlags |= GTF_ASG; |
| 2792 | arg = tmp; |
| 2793 | } |
| 2794 | noway_assert(arg != nullptr); |
| 2795 | |
| 2796 | GenTree* newArg = new (this, GT_ADDR) |
| 2797 | GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfSecureDelegateIndirectCell); |
| 2798 | |
| 2799 | // Append newArg as the last arg |
| 2800 | GenTreeArgList** insertionPoint = &call->gtCallArgs; |
| 2801 | for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest()) |
| 2802 | { |
| 2803 | } |
| 2804 | *insertionPoint = gtNewListNode(newArg, nullptr); |
| 2805 | |
| 2806 | numArgs++; |
| 2807 | nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg()); |
| 2808 | } |
| 2809 | #endif // defined(_TARGET_ARM_) |
| 2810 | #if defined(_TARGET_X86_) |
| 2811 | // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the |
| 2812 | // hi part to be in EDX. This sets the argument registers up correctly. |
| 2813 | else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || |
| 2814 | call->IsHelperCall(this, CORINFO_HELP_LRSZ)) |
| 2815 | { |
| 2816 | GenTreeArgList* args = call->gtCallArgs; |
| 2817 | GenTree* arg1 = args->Current(); |
| 2818 | assert(arg1 != nullptr); |
| 2819 | nonStandardArgs.Add(arg1, REG_LNGARG_LO); |
| 2820 | |
| 2821 | args = args->Rest(); |
| 2822 | GenTree* arg2 = args->Current(); |
| 2823 | assert(arg2 != nullptr); |
| 2824 | nonStandardArgs.Add(arg2, REG_LNGARG_HI); |
| 2825 | } |
| 2826 | #else // !_TARGET_X86_ |
| 2827 | // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed. |
| 2828 | // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling |
| 2829 | // convention for x86/SSE. |
| 2830 | |
| 2831 | // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it |
| 2832 | // |
| 2833 | if (hasFixedRetBuffReg() && call->HasRetBufArg()) |
| 2834 | { |
| 2835 | args = call->gtCallArgs; |
| 2836 | assert(args != nullptr); |
| 2837 | assert(args->OperIsList()); |
| 2838 | |
| 2839 | argx = call->gtCallArgs->Current(); |
| 2840 | |
| 2841 | // We don't increment numArgs here, since we already counted this argument above. |
| 2842 | |
| 2843 | nonStandardArgs.Add(argx, theFixedRetBuffReg()); |
| 2844 | } |
| 2845 | |
| 2846 | // We are allowed to have a Fixed Return Buffer argument combined |
| 2847 | // with any of the remaining non-standard arguments |
| 2848 | // |
| 2849 | if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers()) |
| 2850 | { |
| 2851 | assert(!call->gtCallCookie); |
| 2852 | // Add a conservative estimate of the stack size in a special parameter (r11) at the call site. |
| 2853 | // It will be used only on the intercepted-for-host code path to copy the arguments. |
| 2854 | |
| 2855 | GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call)); |
| 2856 | call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs); |
| 2857 | numArgs++; |
| 2858 | |
| 2859 | nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM); |
| 2860 | } |
| 2861 | else if (call->IsVirtualStub()) |
| 2862 | { |
| 2863 | if (!call->IsTailCallViaHelper()) |
| 2864 | { |
| 2865 | GenTree* stubAddrArg = fgGetStubAddrArg(call); |
| 2866 | // And push the stub address onto the list of arguments |
| 2867 | call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs); |
| 2868 | |
| 2869 | numArgs++; |
| 2870 | nonStandardArgs.Add(stubAddrArg, stubAddrArg->gtRegNum); |
| 2871 | } |
| 2872 | else |
| 2873 | { |
| 2874 | // If it is a VSD call getting dispatched via tail call helper, |
| 2875 | // fgMorphTailCall() would materialize stub addr as an additional |
| 2876 | // parameter added to the original arg list and hence no need to |
| 2877 | // add as a non-standard arg. |
| 2878 | } |
| 2879 | } |
| 2880 | else |
| 2881 | #endif // !_TARGET_X86_ |
| 2882 | if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr)) |
| 2883 | { |
| 2884 | assert(!call->IsUnmanaged()); |
| 2885 | |
| 2886 | GenTree* arg = call->gtCallCookie; |
| 2887 | noway_assert(arg != nullptr); |
| 2888 | call->gtCallCookie = nullptr; |
| 2889 | |
| 2890 | #if defined(_TARGET_X86_) |
| 2891 | // x86 passes the cookie on the stack as the final argument to the call. |
| 2892 | GenTreeArgList** insertionPoint = &call->gtCallArgs; |
| 2893 | for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest()) |
| 2894 | { |
| 2895 | } |
| 2896 | *insertionPoint = gtNewListNode(arg, nullptr); |
| 2897 | #else // !defined(_TARGET_X86_) |
| 2898 | // All other architectures pass the cookie in a register. |
| 2899 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
| 2900 | #endif // defined(_TARGET_X86_) |
| 2901 | |
| 2902 | nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM); |
| 2903 | numArgs++; |
| 2904 | |
| 2905 | // put destination into R10/EAX |
| 2906 | arg = gtClone(call->gtCallAddr, true); |
| 2907 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
| 2908 | numArgs++; |
| 2909 | |
| 2910 | nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM); |
| 2911 | |
| 2912 | // finally change this call to a helper call |
| 2913 | call->gtCallType = CT_HELPER; |
| 2914 | call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); |
| 2915 | } |
| 2916 | #if defined(FEATURE_READYTORUN_COMPILER) && defined(_TARGET_ARMARCH_) |
| 2917 | // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg() |
| 2918 | // for indirection cell address, which ZapIndirectHelperThunk expects. |
| 2919 | if (call->IsR2RRelativeIndir()) |
| 2920 | { |
| 2921 | assert(call->gtEntryPoint.addr != nullptr); |
| 2922 | |
| 2923 | size_t addrValue = (size_t)call->gtEntryPoint.addr; |
| 2924 | GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR); |
| 2925 | indirectCellAddress->gtRegNum = REG_R2R_INDIRECT_PARAM; |
| 2926 | |
| 2927 | // Push the stub address onto the list of arguments. |
| 2928 | call->gtCallArgs = gtNewListNode(indirectCellAddress, call->gtCallArgs); |
| 2929 | |
| 2930 | numArgs++; |
| 2931 | nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->gtRegNum); |
| 2932 | } |
| 2933 | |
| 2934 | #endif // FEATURE_READYTORUN_COMPILER && _TARGET_ARMARCH_ |
| 2935 | |
| 2936 | // Allocate the fgArgInfo for the call node; |
| 2937 | // |
| 2938 | call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs); |
| 2939 | |
| 2940 | // Add the 'this' argument value, if present. |
| 2941 | argx = call->gtCallObjp; |
| 2942 | if (argx != nullptr) |
| 2943 | { |
| 2944 | assert(argIndex == 0); |
| 2945 | assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT); |
| 2946 | assert(varTypeIsGC(argx) || (argx->gtType == TYP_I_IMPL)); |
| 2947 | |
| 2948 | // This is a register argument - put it in the table. |
| 2949 | call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1, false, |
| 2950 | callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); |
| 2951 | |
| 2952 | intArgRegNum++; |
| 2953 | #ifdef WINDOWS_AMD64_ABI |
| 2954 | // Whenever we pass an integer register argument |
| 2955 | // we skip the corresponding floating point register argument |
| 2956 | fltArgRegNum++; |
| 2957 | #endif // WINDOWS_AMD64_ABI |
| 2958 | argIndex++; |
| 2959 | argSlots++; |
| 2960 | } |
| 2961 | |
| 2962 | #ifdef _TARGET_X86_ |
| 2963 | // Compute the maximum number of arguments that can be passed in registers. |
| 2964 | // For X86 we handle the varargs and unmanaged calling conventions |
| 2965 | |
| 2966 | if (call->gtFlags & GTF_CALL_POP_ARGS) |
| 2967 | { |
| 2968 | noway_assert(intArgRegNum < MAX_REG_ARG); |
| 2969 | // No more register arguments for varargs (CALL_POP_ARGS) |
| 2970 | maxRegArgs = intArgRegNum; |
| 2971 | |
| 2972 | // Add in the ret buff arg |
| 2973 | if (callHasRetBuffArg) |
| 2974 | maxRegArgs++; |
| 2975 | } |
| 2976 | |
| 2977 | if (call->IsUnmanaged()) |
| 2978 | { |
| 2979 | noway_assert(intArgRegNum == 0); |
| 2980 | |
| 2981 | if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL) |
| 2982 | { |
| 2983 | noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL || |
| 2984 | call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF || |
| 2985 | call->gtCallArgs->gtOp.gtOp1->gtOper == |
| 2986 | GT_NOP); // the arg was already morphed to a register (fgMorph called twice) |
| 2987 | maxRegArgs = 1; |
| 2988 | } |
| 2989 | else |
| 2990 | { |
| 2991 | maxRegArgs = 0; |
| 2992 | } |
| 2993 | |
| 2994 | // Add in the ret buff arg |
| 2995 | if (callHasRetBuffArg) |
| 2996 | maxRegArgs++; |
| 2997 | } |
| 2998 | #endif // _TARGET_X86_ |
| 2999 | |
| 3000 | /* Morph the user arguments */ |
| 3001 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3002 | |
| 3003 | #if defined(_TARGET_ARM_) |
| 3004 | |
| 3005 | // The ARM ABI has a concept of back-filling of floating-point argument registers, according |
| 3006 | // to the "Procedure Call Standard for the ARM Architecture" document, especially |
| 3007 | // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can |
| 3008 | // appear in a lower-numbered register than floating point argument N. That is, argument |
| 3009 | // register allocation is not strictly increasing. To support this, we need to keep track of unused |
| 3010 | // floating-point argument registers that we can back-fill. We only support 4-byte float and |
| 3011 | // 8-byte double types, and one to four element HFAs composed of these types. With this, we will |
| 3012 | // only back-fill single registers, since there is no way with these types to create |
| 3013 | // an alignment hole greater than one register. However, there can be up to 3 back-fill slots |
| 3014 | // available (with 16 FP argument registers). Consider this code: |
| 3015 | // |
| 3016 | // struct HFA { float x, y, z; }; // a three element HFA |
| 3017 | // void bar(float a1, // passed in f0 |
| 3018 | // double a2, // passed in f2/f3; skip f1 for alignment |
| 3019 | // HFA a3, // passed in f4/f5/f6 |
| 3020 | // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot |
| 3021 | // HFA a5, // passed in f10/f11/f12 |
| 3022 | // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill |
| 3023 | // // slots |
| 3024 | // float a7, // passed in f1 (back-filled) |
| 3025 | // float a8, // passed in f7 (back-filled) |
| 3026 | // float a9, // passed in f13 (back-filled) |
| 3027 | // float a10) // passed on the stack in [OutArg+0] |
| 3028 | // |
| 3029 | // Note that if we ever support FP types with larger alignment requirements, then there could |
| 3030 | // be more than single register back-fills. |
| 3031 | // |
| 3032 | // Once we assign a floating-pointer register to the stack, they all must be on the stack. |
| 3033 | // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling |
| 3034 | // continues only so long as no VFP CPRC has been allocated to a slot on the stack." |
| 3035 | // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack |
| 3036 | // and prevent any additional floating-point arguments from going in registers. |
| 3037 | |
| 3038 | bool anyFloatStackArgs = false; |
| 3039 | |
| 3040 | #endif // _TARGET_ARM_ |
| 3041 | |
| 3042 | #ifdef UNIX_AMD64_ABI |
| 3043 | SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; |
| 3044 | #endif // UNIX_AMD64_ABI |
| 3045 | |
| 3046 | for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++) |
| 3047 | { |
| 3048 | assert(args->OperIsList()); |
| 3049 | argx = args->Current(); |
| 3050 | fgArgTabEntry* argEntry = nullptr; |
| 3051 | |
| 3052 | // Change the node to TYP_I_IMPL so we don't report GC info |
| 3053 | // NOTE: We deferred this from the importer because of the inliner. |
| 3054 | |
| 3055 | if (argx->IsVarAddr()) |
| 3056 | { |
| 3057 | argx->gtType = TYP_I_IMPL; |
| 3058 | } |
| 3059 | |
| 3060 | // We should never have any ArgPlaceHolder nodes at this point. |
| 3061 | assert(!argx->IsArgPlaceHolderNode()); |
| 3062 | |
| 3063 | // Setup any HFA information about 'argx' |
| 3064 | bool isHfaArg = false; |
| 3065 | var_types hfaType = TYP_UNDEF; |
| 3066 | unsigned hfaSlots = 0; |
| 3067 | |
| 3068 | bool passUsingFloatRegs; |
| 3069 | unsigned argAlign = 1; |
| 3070 | unsigned size = 0; |
| 3071 | CORINFO_CLASS_HANDLE copyBlkClass = nullptr; |
| 3072 | bool isRegArg = false; |
| 3073 | bool isNonStandard = false; |
| 3074 | regNumber nonStdRegNum = REG_NA; |
| 3075 | |
| 3076 | #ifdef FEATURE_HFA |
| 3077 | hfaType = GetHfaType(argx); |
| 3078 | isHfaArg = varTypeIsFloating(hfaType); |
| 3079 | |
| 3080 | #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) |
| 3081 | // Make sure for vararg methods isHfaArg is not true. |
| 3082 | isHfaArg = callIsVararg ? false : isHfaArg; |
| 3083 | #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) |
| 3084 | |
| 3085 | if (isHfaArg) |
| 3086 | { |
| 3087 | isHfaArg = true; |
| 3088 | hfaSlots = GetHfaCount(argx); |
| 3089 | |
| 3090 | // If we have a HFA struct it's possible we transition from a method that originally |
| 3091 | // only had integer types to now start having FP types. We have to communicate this |
| 3092 | // through this flag since LSRA later on will use this flag to determine whether |
| 3093 | // or not to track the FP register set. |
| 3094 | // |
| 3095 | compFloatingPointUsed = true; |
| 3096 | } |
| 3097 | #endif // FEATURE_HFA |
| 3098 | |
| 3099 | #ifdef _TARGET_ARM_ |
| 3100 | passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP; |
| 3101 | bool passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); |
| 3102 | |
| 3103 | // We don't use the "size" return value from InferOpSizeAlign(). |
| 3104 | codeGen->InferOpSizeAlign(argx, &argAlign); |
| 3105 | |
| 3106 | argAlign = roundUp(argAlign, TARGET_POINTER_SIZE); |
| 3107 | argAlign /= TARGET_POINTER_SIZE; |
| 3108 | |
| 3109 | if (argAlign == 2) |
| 3110 | { |
| 3111 | if (passUsingFloatRegs) |
| 3112 | { |
| 3113 | if (fltArgRegNum % 2 == 1) |
| 3114 | { |
| 3115 | fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); |
| 3116 | fltArgRegNum++; |
| 3117 | } |
| 3118 | } |
| 3119 | else if (passUsingIntRegs) |
| 3120 | { |
| 3121 | if (intArgRegNum % 2 == 1) |
| 3122 | { |
| 3123 | argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); |
| 3124 | intArgRegNum++; |
| 3125 | } |
| 3126 | } |
| 3127 | |
| 3128 | if (argSlots % 2 == 1) |
| 3129 | { |
| 3130 | argSlots++; |
| 3131 | } |
| 3132 | } |
| 3133 | |
| 3134 | #elif defined(_TARGET_ARM64_) |
| 3135 | |
| 3136 | assert(!callIsVararg || !isHfaArg); |
| 3137 | passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)); |
| 3138 | |
| 3139 | #elif defined(_TARGET_AMD64_) |
| 3140 | |
| 3141 | passUsingFloatRegs = varTypeIsFloating(argx); |
| 3142 | |
| 3143 | #elif defined(_TARGET_X86_) |
| 3144 | |
| 3145 | passUsingFloatRegs = false; |
| 3146 | |
| 3147 | #else |
| 3148 | #error Unsupported or unset target architecture |
| 3149 | #endif // _TARGET_* |
| 3150 | |
| 3151 | bool isBackFilled = false; |
| 3152 | unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use |
| 3153 | var_types structBaseType = TYP_STRUCT; |
| 3154 | unsigned structSize = 0; |
| 3155 | bool passStructByRef = false; |
| 3156 | |
| 3157 | bool isStructArg; |
| 3158 | GenTree* actualArg = argx->gtEffectiveVal(true /* Commas only */); |
| 3159 | |
| 3160 | // |
| 3161 | // Figure out the size of the argument. This is either in number of registers, or number of |
| 3162 | // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and |
| 3163 | // the stack. |
| 3164 | // |
| 3165 | isStructArg = varTypeIsStruct(argx); |
| 3166 | CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE; |
| 3167 | if (isStructArg) |
| 3168 | { |
| 3169 | objClass = gtGetStructHandle(argx); |
| 3170 | if (argx->TypeGet() == TYP_STRUCT) |
| 3171 | { |
| 3172 | // For TYP_STRUCT arguments we must have an OBJ, LCL_VAR or MKREFANY |
| 3173 | switch (actualArg->OperGet()) |
| 3174 | { |
| 3175 | case GT_OBJ: |
| 3176 | // Get the size off the OBJ node. |
| 3177 | structSize = actualArg->AsObj()->gtBlkSize; |
| 3178 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
| 3179 | break; |
| 3180 | case GT_LCL_VAR: |
| 3181 | structSize = lvaGetDesc(actualArg->AsLclVarCommon())->lvExactSize; |
| 3182 | break; |
| 3183 | case GT_MKREFANY: |
| 3184 | structSize = info.compCompHnd->getClassSize(objClass); |
| 3185 | break; |
| 3186 | default: |
| 3187 | BADCODE("illegal argument tree in fgInitArgInfo" ); |
| 3188 | break; |
| 3189 | } |
| 3190 | } |
| 3191 | else |
| 3192 | { |
| 3193 | structSize = genTypeSize(argx); |
| 3194 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
| 3195 | } |
| 3196 | } |
| 3197 | #if defined(_TARGET_AMD64_) |
| 3198 | #ifdef UNIX_AMD64_ABI |
| 3199 | if (!isStructArg) |
| 3200 | { |
| 3201 | size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' |
| 3202 | } |
| 3203 | else |
| 3204 | { |
| 3205 | size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; |
| 3206 | eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); |
| 3207 | } |
| 3208 | #else // !UNIX_AMD64_ABI |
| 3209 | size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' |
| 3210 | #endif // UNIX_AMD64_ABI |
| 3211 | #elif defined(_TARGET_ARM64_) |
| 3212 | if (isStructArg) |
| 3213 | { |
| 3214 | if (isHfaArg) |
| 3215 | { |
| 3216 | // HFA structs are passed by value in multiple registers. |
| 3217 | // The "size" in registers may differ the size in pointer-sized units. |
| 3218 | size = GetHfaCount(argx); |
| 3219 | } |
| 3220 | else |
| 3221 | { |
| 3222 | // Structs are either passed in 1 or 2 (64-bit) slots. |
| 3223 | // Structs that are the size of 2 pointers are passed by value in multiple registers, |
| 3224 | // if sufficient registers are available. |
| 3225 | // Structs that are larger than 2 pointers (except for HFAs) are passed by |
| 3226 | // reference (to a copy) |
| 3227 | size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; |
| 3228 | |
| 3229 | if (size > 2) |
| 3230 | { |
| 3231 | size = 1; |
| 3232 | } |
| 3233 | } |
| 3234 | // Note that there are some additional rules for multireg structs. |
| 3235 | // (i.e they cannot be split between registers and the stack) |
| 3236 | } |
| 3237 | else |
| 3238 | { |
| 3239 | size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot' |
| 3240 | } |
| 3241 | #elif defined(_TARGET_ARM_) || defined(_TARGET_X86_) |
| 3242 | if (isStructArg) |
| 3243 | { |
| 3244 | size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; |
| 3245 | } |
| 3246 | else |
| 3247 | { |
| 3248 | // The typical case. |
| 3249 | // Long/double type argument(s) will be modified as needed in Lowering. |
| 3250 | size = genTypeStSz(argx->gtType); |
| 3251 | } |
| 3252 | #else |
| 3253 | #error Unsupported or unset target architecture |
| 3254 | #endif // _TARGET_XXX_ |
| 3255 | if (isStructArg) |
| 3256 | { |
| 3257 | // We have an argument with a struct type, but it may be be a child of a GT_COMMA |
| 3258 | GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/); |
| 3259 | |
| 3260 | assert(args->OperIsList()); |
| 3261 | assert(argx == args->Current()); |
| 3262 | |
| 3263 | unsigned originalSize = structSize; |
| 3264 | originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize); |
| 3265 | unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); |
| 3266 | |
| 3267 | structSize = originalSize; |
| 3268 | |
| 3269 | structPassingKind howToPassStruct; |
| 3270 | |
| 3271 | structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, originalSize); |
| 3272 | |
| 3273 | bool passedInRegisters = false; |
| 3274 | passStructByRef = (howToPassStruct == SPK_ByReference); |
| 3275 | |
| 3276 | if (howToPassStruct == SPK_PrimitiveType) |
| 3277 | { |
| 3278 | // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register. |
| 3279 | // For ARM or AMD64/Windows only power-of-2 structs are passed in registers. |
| 3280 | #if !defined(_TARGET_ARM64_) && !defined(UNIX_AMD64_ABI) |
| 3281 | if (!isPow2(originalSize)) |
| 3282 | #endif // !_TARGET_ARM64_ && !UNIX_AMD64_ABI |
| 3283 | { |
| 3284 | passedInRegisters = true; |
| 3285 | } |
| 3286 | #ifdef _TARGET_ARM_ |
| 3287 | // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct, |
| 3288 | // or for a struct of two floats. This causes the struct to be address-taken. |
| 3289 | if (structBaseType == TYP_DOUBLE) |
| 3290 | { |
| 3291 | size = 2; |
| 3292 | } |
| 3293 | else |
| 3294 | #endif // _TARGET_ARM_ |
| 3295 | { |
| 3296 | size = 1; |
| 3297 | } |
| 3298 | } |
| 3299 | else if (passStructByRef) |
| 3300 | { |
| 3301 | size = 1; |
| 3302 | } |
| 3303 | } |
| 3304 | |
| 3305 | // The 'size' value has now must have been set. (the original value of zero is an invalid value) |
| 3306 | assert(size != 0); |
| 3307 | |
| 3308 | // |
| 3309 | // Figure out if the argument will be passed in a register. |
| 3310 | // |
| 3311 | |
| 3312 | if (isRegParamType(genActualType(argx->TypeGet())) |
| 3313 | #ifdef UNIX_AMD64_ABI |
| 3314 | && (!isStructArg || structDesc.passedInRegisters) |
| 3315 | #endif |
| 3316 | ) |
| 3317 | { |
| 3318 | #ifdef _TARGET_ARM_ |
| 3319 | if (passUsingFloatRegs) |
| 3320 | { |
| 3321 | // First, see if it can be back-filled |
| 3322 | if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) |
| 3323 | (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot? |
| 3324 | (size == 1)) // The size to back-fill is one float register |
| 3325 | { |
| 3326 | // Back-fill the register. |
| 3327 | isBackFilled = true; |
| 3328 | regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); |
| 3329 | fltArgSkippedRegMask &= |
| 3330 | ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask |
| 3331 | nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); |
| 3332 | assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG); |
| 3333 | } |
| 3334 | |
| 3335 | // Does the entire float, double, or HFA fit in the FP arg registers? |
| 3336 | // Check if the last register needed is still in the argument register range. |
| 3337 | isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG; |
| 3338 | |
| 3339 | if (!isRegArg) |
| 3340 | { |
| 3341 | anyFloatStackArgs = true; |
| 3342 | } |
| 3343 | } |
| 3344 | else |
| 3345 | { |
| 3346 | isRegArg = intArgRegNum < MAX_REG_ARG; |
| 3347 | } |
| 3348 | #elif defined(_TARGET_ARM64_) |
| 3349 | if (passUsingFloatRegs) |
| 3350 | { |
| 3351 | // Check if the last register needed is still in the fp argument register range. |
| 3352 | isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; |
| 3353 | |
| 3354 | // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers? |
| 3355 | if (isHfaArg && !isRegArg) |
| 3356 | { |
| 3357 | // recompute the 'size' so that it represent the number of stack slots rather than the number of |
| 3358 | // registers |
| 3359 | // |
| 3360 | unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); |
| 3361 | size = roundupSize / TARGET_POINTER_SIZE; |
| 3362 | |
| 3363 | // We also must update fltArgRegNum so that we no longer try to |
| 3364 | // allocate any new floating point registers for args |
| 3365 | // This prevents us from backfilling a subsequent arg into d7 |
| 3366 | // |
| 3367 | fltArgRegNum = MAX_FLOAT_REG_ARG; |
| 3368 | } |
| 3369 | } |
| 3370 | else |
| 3371 | { |
| 3372 | // Check if the last register needed is still in the int argument register range. |
| 3373 | isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; |
| 3374 | |
| 3375 | // Did we run out of registers when we had a 16-byte struct (size===2) ? |
| 3376 | // (i.e we only have one register remaining but we needed two registers to pass this arg) |
| 3377 | // This prevents us from backfilling a subsequent arg into x7 |
| 3378 | // |
| 3379 | if (!isRegArg && (size > 1)) |
| 3380 | { |
| 3381 | #if defined(_TARGET_WINDOWS_) |
| 3382 | // Arm64 windows native varargs allows splitting a 16 byte struct between stack |
| 3383 | // and the last general purpose register. |
| 3384 | if (callIsVararg) |
| 3385 | { |
| 3386 | // Override the decision and force a split. |
| 3387 | isRegArg = isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs; |
| 3388 | } |
| 3389 | else |
| 3390 | #endif // defined(_TARGET_WINDOWS_) |
| 3391 | { |
| 3392 | // We also must update intArgRegNum so that we no longer try to |
| 3393 | // allocate any new general purpose registers for args |
| 3394 | // |
| 3395 | intArgRegNum = maxRegArgs; |
| 3396 | } |
| 3397 | } |
| 3398 | } |
| 3399 | #else // not _TARGET_ARM_ or _TARGET_ARM64_ |
| 3400 | |
| 3401 | #if defined(UNIX_AMD64_ABI) |
| 3402 | |
| 3403 | // Here a struct can be passed in register following the classifications of its members and size. |
| 3404 | // Now make sure there are actually enough registers to do so. |
| 3405 | if (isStructArg) |
| 3406 | { |
| 3407 | unsigned int structFloatRegs = 0; |
| 3408 | unsigned int structIntRegs = 0; |
| 3409 | for (unsigned int i = 0; i < structDesc.eightByteCount; i++) |
| 3410 | { |
| 3411 | if (structDesc.IsIntegralSlot(i)) |
| 3412 | { |
| 3413 | structIntRegs++; |
| 3414 | } |
| 3415 | else if (structDesc.IsSseSlot(i)) |
| 3416 | { |
| 3417 | structFloatRegs++; |
| 3418 | } |
| 3419 | } |
| 3420 | |
| 3421 | isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) && |
| 3422 | ((intArgRegNum + structIntRegs) <= MAX_REG_ARG); |
| 3423 | } |
| 3424 | else |
| 3425 | { |
| 3426 | if (passUsingFloatRegs) |
| 3427 | { |
| 3428 | isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG; |
| 3429 | } |
| 3430 | else |
| 3431 | { |
| 3432 | isRegArg = intArgRegNum < MAX_REG_ARG; |
| 3433 | } |
| 3434 | } |
| 3435 | #else // !defined(UNIX_AMD64_ABI) |
| 3436 | isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; |
| 3437 | #endif // !defined(UNIX_AMD64_ABI) |
| 3438 | #endif // _TARGET_ARM_ |
| 3439 | } |
| 3440 | else |
| 3441 | { |
| 3442 | isRegArg = false; |
| 3443 | } |
| 3444 | |
| 3445 | // If there are nonstandard args (outside the calling convention) they were inserted above |
| 3446 | // and noted them in a table so we can recognize them here and build their argInfo. |
| 3447 | // |
| 3448 | // They should not affect the placement of any other args or stack space required. |
| 3449 | // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. |
| 3450 | isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum); |
| 3451 | if (isNonStandard) |
| 3452 | { |
| 3453 | isRegArg = (nonStdRegNum != REG_STK); |
| 3454 | } |
| 3455 | #if defined(_TARGET_X86_) |
| 3456 | else if (call->IsTailCallViaHelper()) |
| 3457 | { |
| 3458 | // We have already (before calling fgMorphArgs()) appended the 4 special args |
| 3459 | // required by the x86 tailcall helper. These args are required to go on the |
| 3460 | // stack. Force them to the stack here. |
| 3461 | assert(numArgs >= 4); |
| 3462 | if (argIndex >= numArgs - 4) |
| 3463 | { |
| 3464 | isRegArg = false; |
| 3465 | } |
| 3466 | } |
| 3467 | #endif // defined(_TARGET_X86_) |
| 3468 | |
| 3469 | // Now we know if the argument goes in registers or not and how big it is. |
| 3470 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3471 | |
| 3472 | #ifdef _TARGET_ARM_ |
| 3473 | // If we ever allocate a floating point argument to the stack, then all |
| 3474 | // subsequent HFA/float/double arguments go on the stack. |
| 3475 | if (!isRegArg && passUsingFloatRegs) |
| 3476 | { |
| 3477 | for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum) |
| 3478 | { |
| 3479 | fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); |
| 3480 | } |
| 3481 | } |
| 3482 | |
| 3483 | // If we think we're going to split a struct between integer registers and the stack, check to |
| 3484 | // see if we've already assigned a floating-point arg to the stack. |
| 3485 | if (isRegArg && // We decided above to use a register for the argument |
| 3486 | !passUsingFloatRegs && // We're using integer registers |
| 3487 | (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack |
| 3488 | anyFloatStackArgs) // We've already used the stack for a floating-point argument |
| 3489 | { |
| 3490 | isRegArg = false; // Change our mind; don't pass this struct partially in registers |
| 3491 | |
| 3492 | // Skip the rest of the integer argument registers |
| 3493 | for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum) |
| 3494 | { |
| 3495 | argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); |
| 3496 | } |
| 3497 | } |
| 3498 | #endif // _TARGET_ARM_ |
| 3499 | |
| 3500 | // Now create the fgArgTabEntry. |
| 3501 | fgArgTabEntry* newArgEntry; |
| 3502 | if (isRegArg) |
| 3503 | { |
| 3504 | regNumber nextRegNum = REG_STK; |
| 3505 | |
| 3506 | #if defined(UNIX_AMD64_ABI) |
| 3507 | regNumber nextOtherRegNum = REG_STK; |
| 3508 | unsigned int structFloatRegs = 0; |
| 3509 | unsigned int structIntRegs = 0; |
| 3510 | #endif // defined(UNIX_AMD64_ABI) |
| 3511 | |
| 3512 | if (isNonStandard) |
| 3513 | { |
| 3514 | nextRegNum = nonStdRegNum; |
| 3515 | } |
| 3516 | #if defined(UNIX_AMD64_ABI) |
| 3517 | else if (isStructArg && structDesc.passedInRegisters) |
| 3518 | { |
| 3519 | // It is a struct passed in registers. Assign the next available register. |
| 3520 | assert((structDesc.eightByteCount <= 2) && "Too many eightbytes." ); |
| 3521 | regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum}; |
| 3522 | for (unsigned int i = 0; i < structDesc.eightByteCount; i++) |
| 3523 | { |
| 3524 | if (structDesc.IsIntegralSlot(i)) |
| 3525 | { |
| 3526 | *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs); |
| 3527 | structIntRegs++; |
| 3528 | } |
| 3529 | else if (structDesc.IsSseSlot(i)) |
| 3530 | { |
| 3531 | *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs); |
| 3532 | structFloatRegs++; |
| 3533 | } |
| 3534 | } |
| 3535 | } |
| 3536 | #endif // defined(UNIX_AMD64_ABI) |
| 3537 | else |
| 3538 | { |
| 3539 | // fill in or update the argInfo table |
| 3540 | nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) |
| 3541 | : genMapIntRegArgNumToRegNum(intArgRegNum); |
| 3542 | } |
| 3543 | |
| 3544 | #ifdef _TARGET_AMD64_ |
| 3545 | #ifndef UNIX_AMD64_ABI |
| 3546 | assert(size == 1); |
| 3547 | #endif |
| 3548 | #endif |
| 3549 | |
| 3550 | // This is a register argument - put it in the table |
| 3551 | newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign, isStructArg, |
| 3552 | callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) |
| 3553 | UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); |
| 3554 | |
| 3555 | newArgEntry->SetIsBackFilled(isBackFilled); |
| 3556 | newArgEntry->isNonStandard = isNonStandard; |
| 3557 | |
| 3558 | // Set up the next intArgRegNum and fltArgRegNum values. |
| 3559 | if (!isBackFilled) |
| 3560 | { |
| 3561 | #if defined(UNIX_AMD64_ABI) |
| 3562 | if (isStructArg) |
| 3563 | { |
| 3564 | // For this case, we've already set the regNums in the argTabEntry |
| 3565 | intArgRegNum += structIntRegs; |
| 3566 | fltArgRegNum += structFloatRegs; |
| 3567 | } |
| 3568 | else |
| 3569 | #endif // defined(UNIX_AMD64_ABI) |
| 3570 | { |
| 3571 | if (!isNonStandard) |
| 3572 | { |
| 3573 | #if FEATURE_ARG_SPLIT |
| 3574 | // Check for a split (partially enregistered) struct |
| 3575 | if (!passUsingFloatRegs && ((intArgRegNum + size) > MAX_REG_ARG)) |
| 3576 | { |
| 3577 | // This indicates a partial enregistration of a struct type |
| 3578 | assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() || |
| 3579 | (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); |
| 3580 | unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum; |
| 3581 | assert((unsigned char)numRegsPartial == numRegsPartial); |
| 3582 | call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); |
| 3583 | } |
| 3584 | #endif // FEATURE_ARG_SPLIT |
| 3585 | |
| 3586 | if (passUsingFloatRegs) |
| 3587 | { |
| 3588 | fltArgRegNum += size; |
| 3589 | |
| 3590 | #ifdef WINDOWS_AMD64_ABI |
| 3591 | // Whenever we pass an integer register argument |
| 3592 | // we skip the corresponding floating point register argument |
| 3593 | intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); |
| 3594 | #endif // WINDOWS_AMD64_ABI |
| 3595 | // No supported architecture supports partial structs using float registers. |
| 3596 | assert(fltArgRegNum <= MAX_FLOAT_REG_ARG); |
| 3597 | } |
| 3598 | else |
| 3599 | { |
| 3600 | // Increment intArgRegNum by 'size' registers |
| 3601 | intArgRegNum += size; |
| 3602 | |
| 3603 | #ifdef WINDOWS_AMD64_ABI |
| 3604 | fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); |
| 3605 | #endif // WINDOWS_AMD64_ABI |
| 3606 | } |
| 3607 | } |
| 3608 | } |
| 3609 | } |
| 3610 | } |
| 3611 | else // We have an argument that is not passed in a register |
| 3612 | { |
| 3613 | // This is a stack argument - put it in the table |
| 3614 | newArgEntry = call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg, callIsVararg); |
| 3615 | #ifdef UNIX_AMD64_ABI |
| 3616 | // TODO-Amd64-Unix-CQ: This is temporary (see also in fgMorphArgs). |
| 3617 | if (structDesc.passedInRegisters) |
| 3618 | { |
| 3619 | newArgEntry->structDesc.CopyFrom(structDesc); |
| 3620 | } |
| 3621 | #endif |
| 3622 | } |
| 3623 | |
| 3624 | #ifdef FEATURE_HFA |
| 3625 | if (isHfaArg) |
| 3626 | { |
| 3627 | newArgEntry->setHfaType(hfaType, hfaSlots); |
| 3628 | } |
| 3629 | #endif // FEATURE_HFA |
| 3630 | newArgEntry->SetMultiRegNums(); |
| 3631 | |
| 3632 | noway_assert(newArgEntry != nullptr); |
| 3633 | if (newArgEntry->isStruct) |
| 3634 | { |
| 3635 | newArgEntry->passedByRef = passStructByRef; |
| 3636 | newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; |
| 3637 | } |
| 3638 | else |
| 3639 | { |
| 3640 | newArgEntry->argType = argx->TypeGet(); |
| 3641 | } |
| 3642 | |
| 3643 | argSlots += size; |
| 3644 | } // end foreach argument loop |
| 3645 | |
| 3646 | #ifdef DEBUG |
| 3647 | if (verbose) |
| 3648 | { |
| 3649 | call->fgArgInfo->Dump(this); |
| 3650 | JITDUMP("\n" ); |
| 3651 | } |
| 3652 | #endif |
| 3653 | } |
| 3654 | |
| 3655 | //------------------------------------------------------------------------ |
| 3656 | // fgMorphArgs: Walk and transform (morph) the arguments of a call |
| 3657 | // |
| 3658 | // Arguments: |
| 3659 | // callNode - the call for which we are doing the argument morphing |
| 3660 | // |
| 3661 | // Return Value: |
| 3662 | // Like most morph methods, this method returns the morphed node, |
| 3663 | // though in this case there are currently no scenarios where the |
| 3664 | // node itself is re-created. |
| 3665 | // |
| 3666 | // Notes: |
| 3667 | // This calls fgInitArgInfo to create the 'fgArgInfo' for the call. |
| 3668 | // If it has already been created, that method will simply return. |
| 3669 | // |
| 3670 | // This method changes the state of the call node. It uses the existence |
| 3671 | // of gtCallLateArgs (the late arguments list) to determine if it has |
| 3672 | // already done the first round of morphing. |
| 3673 | // |
| 3674 | // The first time it is called (i.e. during global morphing), this method |
| 3675 | // computes the "late arguments". This is when it determines which arguments |
| 3676 | // need to be evaluated to temps prior to the main argument setup, and which |
| 3677 | // can be directly evaluated into the argument location. It also creates a |
| 3678 | // second argument list (gtCallLateArgs) that does the final placement of the |
| 3679 | // arguments, e.g. into registers or onto the stack. |
| 3680 | // |
| 3681 | // The "non-late arguments", aka the gtCallArgs, are doing the in-order |
| 3682 | // evaluation of the arguments that might have side-effects, such as embedded |
| 3683 | // assignments, calls or possible throws. In these cases, it and earlier |
| 3684 | // arguments must be evaluated to temps. |
| 3685 | // |
| 3686 | // On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS), |
| 3687 | // if we have any nested calls, we need to defer the copying of the argument |
| 3688 | // into the fixed argument area until after the call. If the argument did not |
| 3689 | // otherwise need to be computed into a temp, it is moved to gtCallLateArgs and |
| 3690 | // replaced in the "early" arg list (gtCallArgs) with a placeholder node. |
| 3691 | |
| 3692 | #ifdef _PREFAST_ |
| 3693 | #pragma warning(push) |
| 3694 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
| 3695 | #endif |
| 3696 | GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) |
| 3697 | { |
| 3698 | GenTree* args; |
| 3699 | GenTree* argx; |
| 3700 | |
| 3701 | unsigned flagsSummary = 0; |
| 3702 | |
| 3703 | unsigned argIndex = 0; |
| 3704 | unsigned argSlots = 0; |
| 3705 | |
| 3706 | bool reMorphing = call->AreArgsComplete(); |
| 3707 | |
| 3708 | // Set up the fgArgInfo. |
| 3709 | fgInitArgInfo(call); |
| 3710 | unsigned numArgs = call->fgArgInfo->ArgCount(); |
| 3711 | JITDUMP("%sMorphing args for %d.%s:\n" , (reMorphing) ? "Re" : "" , call->gtTreeID, GenTree::OpName(call->gtOper)); |
| 3712 | |
| 3713 | // If we are remorphing, process the late arguments (which were determined by a previous caller). |
| 3714 | if (reMorphing) |
| 3715 | { |
| 3716 | // We need to reMorph the gtCallLateArgs early since that is what triggers |
| 3717 | // the expression folding and we need to have the final folded gtCallLateArgs |
| 3718 | // available when we call UpdateRegArg so that we correctly update the fgArgInfo |
| 3719 | // with the folded tree that represents the final optimized argument nodes. |
| 3720 | // |
| 3721 | if (call->gtCallLateArgs != nullptr) |
| 3722 | { |
| 3723 | |
| 3724 | call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList(); |
| 3725 | flagsSummary |= call->gtCallLateArgs->gtFlags; |
| 3726 | } |
| 3727 | assert(call->fgArgInfo != nullptr); |
| 3728 | } |
| 3729 | call->fgArgInfo->RemorphReset(); |
| 3730 | |
| 3731 | // First we morph the argument subtrees ('this' pointer, arguments, etc.). |
| 3732 | // During the first call to fgMorphArgs we also record the |
| 3733 | // information about late arguments we have in 'fgArgInfo'. |
| 3734 | // This information is used later to contruct the gtCallLateArgs */ |
| 3735 | |
| 3736 | // Process the 'this' argument value, if present. |
| 3737 | argx = call->gtCallObjp; |
| 3738 | if (argx) |
| 3739 | { |
| 3740 | fgArgTabEntry* thisArgEntry = call->fgArgInfo->GetArgEntry(0, reMorphing); |
| 3741 | argx = fgMorphTree(argx); |
| 3742 | call->gtCallObjp = argx; |
| 3743 | // This is a register argument - possibly update it in the table. |
| 3744 | call->fgArgInfo->UpdateRegArg(thisArgEntry, argx, reMorphing); |
| 3745 | flagsSummary |= argx->gtFlags; |
| 3746 | assert(argIndex == 0); |
| 3747 | argIndex++; |
| 3748 | argSlots++; |
| 3749 | } |
| 3750 | |
| 3751 | // Note that this name is a bit of a misnomer - it indicates that there are struct args |
| 3752 | // that occupy more than a single slot that are passed by value (not necessarily in regs). |
| 3753 | bool hasMultiregStructArgs = false; |
| 3754 | for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++) |
| 3755 | { |
| 3756 | GenTree** parentArgx = &args->gtOp.gtOp1; |
| 3757 | fgArgTabEntry* argEntry = call->fgArgInfo->GetArgEntry(argIndex, reMorphing); |
| 3758 | |
| 3759 | // Morph the arg node, and update the parent and argEntry pointers. |
| 3760 | argx = *parentArgx; |
| 3761 | argx = fgMorphTree(argx); |
| 3762 | *parentArgx = argx; |
| 3763 | assert(args->OperIsList()); |
| 3764 | assert(argx == args->Current()); |
| 3765 | |
| 3766 | unsigned argAlign = argEntry->alignment; |
| 3767 | unsigned size = argEntry->getSize(); |
| 3768 | CORINFO_CLASS_HANDLE copyBlkClass = NO_CLASS_HANDLE; |
| 3769 | |
| 3770 | if (argAlign == 2) |
| 3771 | { |
| 3772 | if (argSlots % 2 == 1) |
| 3773 | { |
| 3774 | argSlots++; |
| 3775 | } |
| 3776 | } |
| 3777 | if (argEntry->isNonStandard) |
| 3778 | { |
| 3779 | // We need to update the node field for this nonStandard arg here |
| 3780 | // as it may have been changed by the call to fgMorphTree. |
| 3781 | call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); |
| 3782 | flagsSummary |= argx->gtFlags; |
| 3783 | continue; |
| 3784 | } |
| 3785 | |
| 3786 | assert(size != 0); |
| 3787 | argSlots += argEntry->getSlotCount(); |
| 3788 | |
| 3789 | // lclVar address should have been retyped to TYP_I_IMPL. |
| 3790 | assert(!argx->IsVarAddr() || (argx->gtType = TYP_I_IMPL)); |
| 3791 | |
| 3792 | // Get information about this argument. |
| 3793 | var_types hfaType = argEntry->hfaType; |
| 3794 | bool isHfaArg = (hfaType != TYP_UNDEF); |
| 3795 | bool isHfaRegArg = argEntry->isHfaRegArg; |
| 3796 | unsigned hfaSlots = argEntry->numRegs; |
| 3797 | bool passUsingFloatRegs = argEntry->isPassedInFloatRegisters(); |
| 3798 | bool isBackFilled = argEntry->IsBackFilled(); |
| 3799 | unsigned structSize = 0; |
| 3800 | |
| 3801 | // Struct arguments may be morphed into a node that is not a struct type. |
| 3802 | // In such case the fgArgTabEntry keeps track of whether the original node (before morphing) |
| 3803 | // was a struct and the struct classification. |
| 3804 | bool isStructArg = argEntry->isStruct; |
| 3805 | |
| 3806 | GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/); |
| 3807 | if (isStructArg && varTypeIsStruct(argObj) && !argObj->OperIs(GT_ASG, GT_MKREFANY, GT_FIELD_LIST, GT_ARGPLACE)) |
| 3808 | { |
| 3809 | CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(argObj); |
| 3810 | unsigned originalSize; |
| 3811 | if (argObj->TypeGet() == TYP_STRUCT) |
| 3812 | { |
| 3813 | if (argObj->OperIs(GT_OBJ)) |
| 3814 | { |
| 3815 | // Get the size off the OBJ node. |
| 3816 | originalSize = argObj->AsObj()->gtBlkSize; |
| 3817 | assert(originalSize == info.compCompHnd->getClassSize(objClass)); |
| 3818 | } |
| 3819 | else |
| 3820 | { |
| 3821 | // We have a BADCODE assert for this in fgInitArgInfo. |
| 3822 | assert(argObj->OperIs(GT_LCL_VAR)); |
| 3823 | originalSize = lvaGetDesc(argObj->AsLclVarCommon())->lvExactSize; |
| 3824 | } |
| 3825 | } |
| 3826 | else |
| 3827 | { |
| 3828 | originalSize = genTypeSize(argx); |
| 3829 | assert(originalSize == info.compCompHnd->getClassSize(objClass)); |
| 3830 | } |
| 3831 | unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); |
| 3832 | var_types structBaseType = argEntry->argType; |
| 3833 | |
| 3834 | #ifndef _TARGET_X86_ |
| 3835 | // First, handle the case where the argument is passed by reference. |
| 3836 | if (argEntry->passedByRef) |
| 3837 | { |
| 3838 | assert(size == 1); |
| 3839 | copyBlkClass = objClass; |
| 3840 | #ifdef UNIX_AMD64_ABI |
| 3841 | assert(!"Structs are not passed by reference on x64/ux" ); |
| 3842 | #endif // UNIX_AMD64_ABI |
| 3843 | } |
| 3844 | else |
| 3845 | { |
| 3846 | // This is passed by value. |
| 3847 | // Check to see if we can transform this into load of a primitive type. |
| 3848 | // 'size' must be the number of pointer sized items |
| 3849 | assert(size == roundupSize / TARGET_POINTER_SIZE); |
| 3850 | |
| 3851 | structSize = originalSize; |
| 3852 | unsigned passingSize = originalSize; |
| 3853 | |
| 3854 | // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size. |
| 3855 | // When it can do this is platform-dependent: |
| 3856 | // - In general, it can be done for power of 2 structs that fit in a single register. |
| 3857 | // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field. |
| 3858 | // - This is irrelevant for X86, since structs are always passed by value on the stack. |
| 3859 | |
| 3860 | GenTree** parentOfArgObj = parentArgx; |
| 3861 | GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj); |
| 3862 | bool canTransform = false; |
| 3863 | |
| 3864 | if (structBaseType != TYP_STRUCT) |
| 3865 | { |
| 3866 | if (isPow2(passingSize)) |
| 3867 | { |
| 3868 | canTransform = true; |
| 3869 | } |
| 3870 | |
| 3871 | #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) |
| 3872 | // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can |
| 3873 | // only transform in that case if the arg is a local. |
| 3874 | // TODO-CQ: This transformation should be applicable in general, not just for the ARM64 |
| 3875 | // or UNIX_AMD64_ABI cases where they will be passed in registers. |
| 3876 | else |
| 3877 | { |
| 3878 | canTransform = (lclVar != nullptr); |
| 3879 | passingSize = genTypeSize(structBaseType); |
| 3880 | } |
| 3881 | #endif // _TARGET_ARM64_ || UNIX_AMD64_ABI |
| 3882 | } |
| 3883 | |
| 3884 | if (!canTransform) |
| 3885 | { |
| 3886 | #if defined(_TARGET_AMD64_) |
| 3887 | #ifndef UNIX_AMD64_ABI |
| 3888 | // On Windows structs are always copied and passed by reference (handled above) unless they are |
| 3889 | // passed by value in a single register. |
| 3890 | assert(size == 1); |
| 3891 | copyBlkClass = objClass; |
| 3892 | #else // UNIX_AMD64_ABI |
| 3893 | // On Unix, structs are always passed by value. |
| 3894 | // We only need a copy if we have one of the following: |
| 3895 | // - We have a lclVar that has been promoted and is passed in registers. |
| 3896 | // - The sizes don't match for a non-lclVar argument. |
| 3897 | // - We have a known struct type (e.g. SIMD) that requires multiple registers. |
| 3898 | // TODO-Amd64-Unix-CQ: The first case could and should be handled without copies. |
| 3899 | // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not |
| 3900 | // actually passed in registers. |
| 3901 | if (argEntry->isPassedInRegisters()) |
| 3902 | { |
| 3903 | assert(argEntry->structDesc.passedInRegisters); |
| 3904 | if (lclVar != nullptr) |
| 3905 | { |
| 3906 | if (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT) |
| 3907 | { |
| 3908 | copyBlkClass = objClass; |
| 3909 | } |
| 3910 | } |
| 3911 | else if (argObj->OperIs(GT_OBJ)) |
| 3912 | { |
| 3913 | if (passingSize != structSize) |
| 3914 | { |
| 3915 | copyBlkClass = objClass; |
| 3916 | } |
| 3917 | } |
| 3918 | else |
| 3919 | { |
| 3920 | // This should only be the case of a value directly producing a known struct type. |
| 3921 | assert(argObj->TypeGet() != TYP_STRUCT); |
| 3922 | if (argEntry->numRegs > 1) |
| 3923 | { |
| 3924 | copyBlkClass = objClass; |
| 3925 | } |
| 3926 | } |
| 3927 | } |
| 3928 | #endif // UNIX_AMD64_ABI |
| 3929 | #elif defined(_TARGET_ARM64_) |
| 3930 | if ((passingSize != structSize) && (lclVar == nullptr)) |
| 3931 | { |
| 3932 | copyBlkClass = objClass; |
| 3933 | } |
| 3934 | #endif |
| 3935 | |
| 3936 | #ifdef _TARGET_ARM_ |
| 3937 | // TODO-1stClassStructs: Unify these conditions across targets. |
| 3938 | if (((lclVar != nullptr) && |
| 3939 | (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)) || |
| 3940 | ((argObj->OperIs(GT_OBJ)) && (passingSize != structSize))) |
| 3941 | { |
| 3942 | copyBlkClass = objClass; |
| 3943 | } |
| 3944 | |
| 3945 | if (structSize < TARGET_POINTER_SIZE) |
| 3946 | { |
| 3947 | copyBlkClass = objClass; |
| 3948 | } |
| 3949 | #endif // _TARGET_ARM_ |
| 3950 | } |
| 3951 | else |
| 3952 | { |
| 3953 | // We have a struct argument that's less than pointer size, and it is either a power of 2, |
| 3954 | // or a local. |
| 3955 | // Change our GT_OBJ into a GT_IND of the correct type. |
| 3956 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 3957 | |
| 3958 | #ifdef _TARGET_ARM_ |
| 3959 | assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2))); |
| 3960 | #else |
| 3961 | assert(size == 1); |
| 3962 | #endif |
| 3963 | |
| 3964 | assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize)); |
| 3965 | |
| 3966 | if (argObj->OperIs(GT_OBJ)) |
| 3967 | { |
| 3968 | argObj->ChangeOper(GT_IND); |
| 3969 | |
| 3970 | // Now see if we can fold *(&X) into X |
| 3971 | if (argObj->gtOp.gtOp1->gtOper == GT_ADDR) |
| 3972 | { |
| 3973 | GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1; |
| 3974 | |
| 3975 | // Keep the DONT_CSE flag in sync |
| 3976 | // (as the addr always marks it for its op1) |
| 3977 | temp->gtFlags &= ~GTF_DONT_CSE; |
| 3978 | temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE); |
| 3979 | DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR |
| 3980 | DEBUG_DESTROY_NODE(argObj); // GT_IND |
| 3981 | |
| 3982 | argObj = temp; |
| 3983 | *parentOfArgObj = temp; |
| 3984 | |
| 3985 | // If the OBJ had been the top level node, we've now changed argx. |
| 3986 | if (parentOfArgObj == parentArgx) |
| 3987 | { |
| 3988 | argx = temp; |
| 3989 | } |
| 3990 | } |
| 3991 | } |
| 3992 | if (argObj->gtOper == GT_LCL_VAR) |
| 3993 | { |
| 3994 | unsigned lclNum = argObj->gtLclVarCommon.gtLclNum; |
| 3995 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 3996 | |
| 3997 | if (varDsc->lvPromoted) |
| 3998 | { |
| 3999 | if (varDsc->lvFieldCnt == 1) |
| 4000 | { |
| 4001 | // get the first and only promoted field |
| 4002 | LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; |
| 4003 | if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize) |
| 4004 | { |
| 4005 | // we will use the first and only promoted field |
| 4006 | argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart); |
| 4007 | |
| 4008 | if (varTypeCanReg(fieldVarDsc->TypeGet()) && |
| 4009 | (genTypeSize(fieldVarDsc->TypeGet()) == originalSize)) |
| 4010 | { |
| 4011 | // Just use the existing field's type |
| 4012 | argObj->gtType = fieldVarDsc->TypeGet(); |
| 4013 | } |
| 4014 | else |
| 4015 | { |
| 4016 | // Can't use the existing field's type, so use GT_LCL_FLD to swizzle |
| 4017 | // to a new type |
| 4018 | argObj->ChangeOper(GT_LCL_FLD); |
| 4019 | argObj->gtType = structBaseType; |
| 4020 | } |
| 4021 | assert(varTypeCanReg(argObj->TypeGet())); |
| 4022 | assert(copyBlkClass == NO_CLASS_HANDLE); |
| 4023 | } |
| 4024 | else |
| 4025 | { |
| 4026 | // use GT_LCL_FLD to swizzle the single field struct to a new type |
| 4027 | lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); |
| 4028 | argObj->ChangeOper(GT_LCL_FLD); |
| 4029 | argObj->gtType = structBaseType; |
| 4030 | } |
| 4031 | } |
| 4032 | else |
| 4033 | { |
| 4034 | // The struct fits into a single register, but it has been promoted into its |
| 4035 | // constituent fields, and so we have to re-assemble it |
| 4036 | copyBlkClass = objClass; |
| 4037 | } |
| 4038 | } |
| 4039 | else if (!varTypeIsIntegralOrI(varDsc->TypeGet())) |
| 4040 | { |
| 4041 | // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD |
| 4042 | argObj->ChangeOper(GT_LCL_FLD); |
| 4043 | argObj->gtType = structBaseType; |
| 4044 | } |
| 4045 | } |
| 4046 | else |
| 4047 | { |
| 4048 | // Not a GT_LCL_VAR, so we can just change the type on the node |
| 4049 | argObj->gtType = structBaseType; |
| 4050 | } |
| 4051 | assert(varTypeCanReg(argObj->TypeGet()) || |
| 4052 | ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType))); |
| 4053 | |
| 4054 | size = 1; |
| 4055 | } |
| 4056 | |
| 4057 | #ifndef UNIX_AMD64_ABI |
| 4058 | // We still have a struct unless we converted the GT_OBJ into a GT_IND above... |
| 4059 | if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef) |
| 4060 | { |
| 4061 | if (isHfaArg && passUsingFloatRegs) |
| 4062 | { |
| 4063 | size = argEntry->numRegs; |
| 4064 | } |
| 4065 | else |
| 4066 | { |
| 4067 | // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, |
| 4068 | // we must copyblk to a temp before doing the obj to avoid |
| 4069 | // the obj reading memory past the end of the valuetype |
| 4070 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 4071 | |
| 4072 | if (roundupSize > originalSize) |
| 4073 | { |
| 4074 | copyBlkClass = objClass; |
| 4075 | |
| 4076 | // There are a few special cases where we can omit using a CopyBlk |
| 4077 | // where we normally would need to use one. |
| 4078 | |
| 4079 | if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar? |
| 4080 | { |
| 4081 | copyBlkClass = NO_CLASS_HANDLE; |
| 4082 | } |
| 4083 | } |
| 4084 | |
| 4085 | size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items |
| 4086 | } |
| 4087 | } |
| 4088 | #endif // !UNIX_AMD64_ABI |
| 4089 | } |
| 4090 | #endif // !_TARGET_X86_ |
| 4091 | } |
| 4092 | |
| 4093 | if (argEntry->isPassedInRegisters()) |
| 4094 | { |
| 4095 | call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); |
| 4096 | } |
| 4097 | else |
| 4098 | { |
| 4099 | call->fgArgInfo->UpdateStkArg(argEntry, argx, reMorphing); |
| 4100 | } |
| 4101 | |
| 4102 | if (copyBlkClass != NO_CLASS_HANDLE) |
| 4103 | { |
| 4104 | fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); |
| 4105 | } |
| 4106 | |
| 4107 | if (argx->gtOper == GT_MKREFANY) |
| 4108 | { |
| 4109 | // 'Lower' the MKREFANY tree and insert it. |
| 4110 | noway_assert(!reMorphing); |
| 4111 | |
| 4112 | #ifdef _TARGET_X86_ |
| 4113 | |
| 4114 | // Build the mkrefany as a GT_FIELD_LIST |
| 4115 | GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) |
| 4116 | GenTreeFieldList(argx->gtOp.gtOp1, OFFSETOF__CORINFO_TypedReference__dataPtr, TYP_BYREF, nullptr); |
| 4117 | (void)new (this, GT_FIELD_LIST) |
| 4118 | GenTreeFieldList(argx->gtOp.gtOp2, OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL, fieldList); |
| 4119 | fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx); |
| 4120 | fp->node = fieldList; |
| 4121 | args->gtOp.gtOp1 = fieldList; |
| 4122 | |
| 4123 | #else // !_TARGET_X86_ |
| 4124 | |
| 4125 | // Get a new temp |
| 4126 | // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany |
| 4127 | unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument" )); |
| 4128 | lvaSetStruct(tmp, impGetRefAnyClass(), false); |
| 4129 | |
| 4130 | // Build the mkrefany as a comma node: |
| 4131 | // (tmp.ptr=argx),(tmp.type=handle) |
| 4132 | GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__dataPtr); |
| 4133 | GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__type); |
| 4134 | destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField()); |
| 4135 | destPtrSlot->gtFlags |= GTF_VAR_DEF; |
| 4136 | destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField()); |
| 4137 | destTypeSlot->gtFlags |= GTF_VAR_DEF; |
| 4138 | |
| 4139 | GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1); |
| 4140 | GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2); |
| 4141 | GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot); |
| 4142 | |
| 4143 | // Change the expression to "(tmp=val)" |
| 4144 | args->gtOp.gtOp1 = asg; |
| 4145 | |
| 4146 | // EvalArgsToTemps will cause tmp to actually get loaded as the argument |
| 4147 | call->fgArgInfo->EvalToTmp(argEntry, tmp, asg); |
| 4148 | lvaSetVarAddrExposed(tmp); |
| 4149 | #endif // !_TARGET_X86_ |
| 4150 | } |
| 4151 | |
| 4152 | #if FEATURE_MULTIREG_ARGS |
| 4153 | if (isStructArg) |
| 4154 | { |
| 4155 | if (size > 1 || isHfaArg) |
| 4156 | { |
| 4157 | hasMultiregStructArgs = true; |
| 4158 | } |
| 4159 | } |
| 4160 | #ifdef _TARGET_ARM_ |
| 4161 | else if ((argEntry->argType == TYP_LONG) || (argEntry->argType == TYP_DOUBLE)) |
| 4162 | { |
| 4163 | assert((argEntry->numRegs == 2) || (argEntry->numSlots == 2)); |
| 4164 | } |
| 4165 | #endif |
| 4166 | else |
| 4167 | { |
| 4168 | // We must have exactly one register or slot. |
| 4169 | assert(((argEntry->numRegs == 1) && (argEntry->numSlots == 0)) || |
| 4170 | ((argEntry->numRegs == 0) && (argEntry->numSlots == 1))); |
| 4171 | } |
| 4172 | #endif |
| 4173 | |
| 4174 | #if defined(_TARGET_X86_) |
| 4175 | if (isStructArg) |
| 4176 | { |
| 4177 | GenTree* lclNode = argx->OperIs(GT_LCL_VAR) ? argx : fgIsIndirOfAddrOfLocal(argx); |
| 4178 | if ((lclNode != nullptr) && |
| 4179 | (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT)) |
| 4180 | { |
| 4181 | // Make a GT_FIELD_LIST of the field lclVars. |
| 4182 | GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon(); |
| 4183 | LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]); |
| 4184 | GenTreeFieldList* fieldList = nullptr; |
| 4185 | for (unsigned fieldLclNum = varDsc->lvFieldLclStart; |
| 4186 | fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum) |
| 4187 | { |
| 4188 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
| 4189 | if (fieldList == nullptr) |
| 4190 | { |
| 4191 | lcl->SetLclNum(fieldLclNum); |
| 4192 | lcl->ChangeOper(GT_LCL_VAR); |
| 4193 | lcl->gtType = fieldVarDsc->lvType; |
| 4194 | fieldList = new (this, GT_FIELD_LIST) |
| 4195 | GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr); |
| 4196 | fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx); |
| 4197 | fp->node = fieldList; |
| 4198 | args->gtOp.gtOp1 = fieldList; |
| 4199 | } |
| 4200 | else |
| 4201 | { |
| 4202 | GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType); |
| 4203 | fieldList = new (this, GT_FIELD_LIST) |
| 4204 | GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList); |
| 4205 | } |
| 4206 | } |
| 4207 | } |
| 4208 | } |
| 4209 | #endif // _TARGET_X86_ |
| 4210 | |
| 4211 | flagsSummary |= args->Current()->gtFlags; |
| 4212 | |
| 4213 | } // end foreach argument loop |
| 4214 | |
| 4215 | if (!reMorphing) |
| 4216 | { |
| 4217 | call->fgArgInfo->ArgsComplete(); |
| 4218 | } |
| 4219 | |
| 4220 | if (call->gtCallArgs) |
| 4221 | { |
| 4222 | UpdateGT_LISTFlags(call->gtCallArgs); |
| 4223 | } |
| 4224 | |
| 4225 | /* Process the function address, if indirect call */ |
| 4226 | |
| 4227 | if (call->gtCallType == CT_INDIRECT) |
| 4228 | { |
| 4229 | call->gtCallAddr = fgMorphTree(call->gtCallAddr); |
| 4230 | } |
| 4231 | |
| 4232 | #if FEATURE_FIXED_OUT_ARGS |
| 4233 | |
| 4234 | // Record the outgoing argument size. If the call is a fast tail |
| 4235 | // call, it will setup its arguments in incoming arg area instead |
| 4236 | // of the out-going arg area, so we don't need to track the |
| 4237 | // outgoing arg size. |
| 4238 | if (!call->IsFastTailCall()) |
| 4239 | { |
| 4240 | unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum(); |
| 4241 | |
| 4242 | #if defined(UNIX_AMD64_ABI) |
| 4243 | // This is currently required for the UNIX ABI to work correctly. |
| 4244 | opts.compNeedToAlignFrame = true; |
| 4245 | #endif // UNIX_AMD64_ABI |
| 4246 | |
| 4247 | const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; |
| 4248 | call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL)); |
| 4249 | |
| 4250 | #ifdef DEBUG |
| 4251 | if (verbose) |
| 4252 | { |
| 4253 | printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n" , argSlots, |
| 4254 | preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize); |
| 4255 | } |
| 4256 | #endif |
| 4257 | } |
| 4258 | #endif // FEATURE_FIXED_OUT_ARGS |
| 4259 | |
| 4260 | // Clear the ASG and EXCEPT (if possible) flags on the call node |
| 4261 | call->gtFlags &= ~GTF_ASG; |
| 4262 | if (!call->OperMayThrow(this)) |
| 4263 | { |
| 4264 | call->gtFlags &= ~GTF_EXCEPT; |
| 4265 | } |
| 4266 | |
| 4267 | // Union in the side effect flags from the call's operands |
| 4268 | call->gtFlags |= flagsSummary & GTF_ALL_EFFECT; |
| 4269 | |
| 4270 | // If the register arguments have already been determined |
| 4271 | // or we have no register arguments then we don't need to |
| 4272 | // call SortArgs() and EvalArgsToTemps() |
| 4273 | // |
| 4274 | // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch |
| 4275 | // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy |
| 4276 | // is added to make sure to call EvalArgsToTemp. |
| 4277 | if (!reMorphing && (call->fgArgInfo->HasRegArgs())) |
| 4278 | { |
| 4279 | // This is the first time that we morph this call AND it has register arguments. |
| 4280 | // Follow into the code below and do the 'defer or eval to temp' analysis. |
| 4281 | |
| 4282 | call->fgArgInfo->SortArgs(); |
| 4283 | |
| 4284 | call->fgArgInfo->EvalArgsToTemps(); |
| 4285 | |
| 4286 | // We may have updated the arguments |
| 4287 | if (call->gtCallArgs) |
| 4288 | { |
| 4289 | UpdateGT_LISTFlags(call->gtCallArgs); |
| 4290 | } |
| 4291 | } |
| 4292 | |
| 4293 | if (hasMultiregStructArgs) |
| 4294 | { |
| 4295 | fgMorphMultiregStructArgs(call); |
| 4296 | } |
| 4297 | |
| 4298 | #ifdef DEBUG |
| 4299 | if (verbose) |
| 4300 | { |
| 4301 | call->fgArgInfo->Dump(this); |
| 4302 | JITDUMP("\n" ); |
| 4303 | } |
| 4304 | #endif |
| 4305 | return call; |
| 4306 | } |
| 4307 | #ifdef _PREFAST_ |
| 4308 | #pragma warning(pop) |
| 4309 | #endif |
| 4310 | |
| 4311 | //----------------------------------------------------------------------------- |
| 4312 | // fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and |
| 4313 | // call fgMorphMultiregStructArg on each of them. |
| 4314 | // |
| 4315 | // Arguments: |
| 4316 | // call : a GenTreeCall node that has one or more TYP_STRUCT arguments\ |
| 4317 | // |
| 4318 | // Notes: |
| 4319 | // We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types. |
| 4320 | // It will ensure that the struct arguments are in the correct form. |
| 4321 | // If this method fails to find any TYP_STRUCT arguments it will assert. |
| 4322 | // |
| 4323 | void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) |
| 4324 | { |
| 4325 | bool foundStructArg = false; |
| 4326 | unsigned initialFlags = call->gtFlags; |
| 4327 | unsigned flagsSummary = 0; |
| 4328 | fgArgInfo* allArgInfo = call->fgArgInfo; |
| 4329 | |
| 4330 | #ifdef _TARGET_X86_ |
| 4331 | assert(!"Logic error: no MultiregStructArgs for X86" ); |
| 4332 | #endif |
| 4333 | #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) |
| 4334 | assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI" ); |
| 4335 | #endif |
| 4336 | |
| 4337 | for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2) |
| 4338 | { |
| 4339 | // For late arguments the arg tree that is overridden is in the gtCallLateArgs list. |
| 4340 | // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.) |
| 4341 | // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping |
| 4342 | // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself, |
| 4343 | // otherwise points to the list in the late args list. |
| 4344 | bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0; |
| 4345 | fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1); |
| 4346 | assert(fgEntryPtr != nullptr); |
| 4347 | GenTree* argx = fgEntryPtr->node; |
| 4348 | GenTree* lateList = nullptr; |
| 4349 | GenTree* lateNode = nullptr; |
| 4350 | |
| 4351 | if (isLateArg) |
| 4352 | { |
| 4353 | for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext()) |
| 4354 | { |
| 4355 | assert(list->OperIsList()); |
| 4356 | |
| 4357 | GenTree* argNode = list->Current(); |
| 4358 | if (argx == argNode) |
| 4359 | { |
| 4360 | lateList = list; |
| 4361 | lateNode = argNode; |
| 4362 | break; |
| 4363 | } |
| 4364 | } |
| 4365 | assert(lateList != nullptr && lateNode != nullptr); |
| 4366 | } |
| 4367 | |
| 4368 | GenTree* arg = argx; |
| 4369 | |
| 4370 | if (!fgEntryPtr->isStruct) |
| 4371 | { |
| 4372 | continue; |
| 4373 | } |
| 4374 | |
| 4375 | unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots); |
| 4376 | if ((size > 1) || fgEntryPtr->isHfaArg) |
| 4377 | { |
| 4378 | foundStructArg = true; |
| 4379 | if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) |
| 4380 | { |
| 4381 | arg = fgMorphMultiregStructArg(arg, fgEntryPtr); |
| 4382 | |
| 4383 | // Did we replace 'argx' with a new tree? |
| 4384 | if (arg != argx) |
| 4385 | { |
| 4386 | fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node |
| 4387 | |
| 4388 | // link the new arg node into either the late arg list or the gtCallArgs list |
| 4389 | if (isLateArg) |
| 4390 | { |
| 4391 | lateList->gtOp.gtOp1 = arg; |
| 4392 | } |
| 4393 | else |
| 4394 | { |
| 4395 | args->gtOp.gtOp1 = arg; |
| 4396 | } |
| 4397 | } |
| 4398 | } |
| 4399 | } |
| 4400 | } |
| 4401 | |
| 4402 | // We should only call this method when we actually have one or more multireg struct args |
| 4403 | assert(foundStructArg); |
| 4404 | |
| 4405 | // Update the flags |
| 4406 | call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); |
| 4407 | } |
| 4408 | |
| 4409 | //----------------------------------------------------------------------------- |
| 4410 | // fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list, |
| 4411 | // morph the argument as needed to be passed correctly. |
| 4412 | // |
| 4413 | // Arguments: |
| 4414 | // arg - A GenTree node containing a TYP_STRUCT arg |
| 4415 | // fgEntryPtr - the fgArgTabEntry information for the current 'arg' |
| 4416 | // |
| 4417 | // Notes: |
| 4418 | // The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT. |
| 4419 | // If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the |
| 4420 | // stack are marked as doNotEnregister, and then we return. |
| 4421 | // |
| 4422 | // If it is passed by register, we mutate the argument into the GT_FIELD_LIST form |
| 4423 | // which is only used for struct arguments. |
| 4424 | // |
| 4425 | // If arg is a LclVar we check if it is struct promoted and has the right number of fields |
| 4426 | // and if they are at the appropriate offsets we will use the struct promted fields |
| 4427 | // in the GT_FIELD_LIST nodes that we create. |
| 4428 | // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements |
| 4429 | // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct |
| 4430 | // this also forces the struct to be stack allocated into the local frame. |
| 4431 | // For the GT_OBJ case will clone the address expression and generate two (or more) |
| 4432 | // indirections. |
| 4433 | // Currently the implementation handles ARM64/ARM and will NYI for other architectures. |
| 4434 | // |
| 4435 | GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr) |
| 4436 | { |
| 4437 | assert(varTypeIsStruct(arg->TypeGet())); |
| 4438 | |
| 4439 | #if !defined(_TARGET_ARMARCH_) && !defined(UNIX_AMD64_ABI) |
| 4440 | NYI("fgMorphMultiregStructArg requires implementation for this target" ); |
| 4441 | #endif |
| 4442 | |
| 4443 | #ifdef _TARGET_ARM_ |
| 4444 | if ((fgEntryPtr->isSplit && fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4) || |
| 4445 | (!fgEntryPtr->isSplit && fgEntryPtr->regNum == REG_STK)) |
| 4446 | #else |
| 4447 | if (fgEntryPtr->regNum == REG_STK) |
| 4448 | #endif |
| 4449 | { |
| 4450 | GenTreeLclVarCommon* lcl = nullptr; |
| 4451 | GenTree* actualArg = arg->gtEffectiveVal(); |
| 4452 | |
| 4453 | if (actualArg->OperGet() == GT_OBJ) |
| 4454 | { |
| 4455 | if (actualArg->gtGetOp1()->OperIs(GT_ADDR) && actualArg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR)) |
| 4456 | { |
| 4457 | lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVarCommon(); |
| 4458 | } |
| 4459 | } |
| 4460 | else if (actualArg->OperGet() == GT_LCL_VAR) |
| 4461 | { |
| 4462 | lcl = actualArg->AsLclVarCommon(); |
| 4463 | } |
| 4464 | if (lcl != nullptr) |
| 4465 | { |
| 4466 | if (lvaGetPromotionType(lcl->gtLclNum) == PROMOTION_TYPE_INDEPENDENT) |
| 4467 | { |
| 4468 | arg = fgMorphLclArgToFieldlist(lcl); |
| 4469 | } |
| 4470 | else if (arg->TypeGet() == TYP_STRUCT) |
| 4471 | { |
| 4472 | // If this is a non-register struct, it must be referenced from memory. |
| 4473 | if (!actualArg->OperIs(GT_OBJ)) |
| 4474 | { |
| 4475 | // Create an Obj of the temp to use it as a call argument. |
| 4476 | arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); |
| 4477 | arg = gtNewObjNode(lvaGetStruct(lcl->gtLclNum), arg); |
| 4478 | } |
| 4479 | // Its fields will need to be accessed by address. |
| 4480 | lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUG_ARG(DNER_IsStructArg)); |
| 4481 | } |
| 4482 | } |
| 4483 | |
| 4484 | return arg; |
| 4485 | } |
| 4486 | |
| 4487 | #if FEATURE_MULTIREG_ARGS |
| 4488 | // Examine 'arg' and setup argValue objClass and structSize |
| 4489 | // |
| 4490 | CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); |
| 4491 | GenTree* argValue = arg; // normally argValue will be arg, but see right below |
| 4492 | unsigned structSize = 0; |
| 4493 | |
| 4494 | if (arg->OperGet() == GT_OBJ) |
| 4495 | { |
| 4496 | GenTreeObj* argObj = arg->AsObj(); |
| 4497 | objClass = argObj->gtClass; |
| 4498 | structSize = argObj->Size(); |
| 4499 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
| 4500 | |
| 4501 | // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR. |
| 4502 | GenTree* op1 = argObj->gtOp1; |
| 4503 | if (op1->OperGet() == GT_ADDR) |
| 4504 | { |
| 4505 | GenTree* underlyingTree = op1->gtOp.gtOp1; |
| 4506 | |
| 4507 | // Only update to the same type. |
| 4508 | if (underlyingTree->OperIs(GT_LCL_VAR) && (underlyingTree->TypeGet() == argValue->TypeGet()) && |
| 4509 | (objClass == gtGetStructHandleIfPresent(underlyingTree))) |
| 4510 | { |
| 4511 | argValue = underlyingTree; |
| 4512 | } |
| 4513 | } |
| 4514 | } |
| 4515 | else if (arg->OperGet() == GT_LCL_VAR) |
| 4516 | { |
| 4517 | GenTreeLclVarCommon* varNode = arg->AsLclVarCommon(); |
| 4518 | unsigned varNum = varNode->gtLclNum; |
| 4519 | assert(varNum < lvaCount); |
| 4520 | LclVarDsc* varDsc = &lvaTable[varNum]; |
| 4521 | |
| 4522 | structSize = varDsc->lvExactSize; |
| 4523 | assert(structSize == info.compCompHnd->getClassSize(objClass)); |
| 4524 | } |
| 4525 | else |
| 4526 | { |
| 4527 | objClass = gtGetStructHandleIfPresent(arg); |
| 4528 | structSize = info.compCompHnd->getClassSize(objClass); |
| 4529 | } |
| 4530 | noway_assert(objClass != NO_CLASS_HANDLE); |
| 4531 | |
| 4532 | var_types hfaType = TYP_UNDEF; |
| 4533 | var_types elemType = TYP_UNDEF; |
| 4534 | unsigned elemCount = 0; |
| 4535 | unsigned elemSize = 0; |
| 4536 | var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0 |
| 4537 | |
| 4538 | hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF |
| 4539 | if (varTypeIsFloating(hfaType) |
| 4540 | #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
| 4541 | && !fgEntryPtr->isVararg |
| 4542 | #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
| 4543 | ) |
| 4544 | { |
| 4545 | elemType = hfaType; |
| 4546 | elemSize = genTypeSize(elemType); |
| 4547 | elemCount = structSize / elemSize; |
| 4548 | assert(elemSize * elemCount == structSize); |
| 4549 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4550 | { |
| 4551 | type[inx] = elemType; |
| 4552 | } |
| 4553 | } |
| 4554 | else |
| 4555 | { |
| 4556 | assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE); |
| 4557 | BYTE gcPtrs[MAX_ARG_REG_COUNT]; |
| 4558 | elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; |
| 4559 | info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); |
| 4560 | |
| 4561 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4562 | { |
| 4563 | #ifdef UNIX_AMD64_ABI |
| 4564 | if (gcPtrs[inx] == TYPE_GC_NONE) |
| 4565 | { |
| 4566 | type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx], |
| 4567 | fgEntryPtr->structDesc.eightByteSizes[inx]); |
| 4568 | } |
| 4569 | else |
| 4570 | #endif // UNIX_AMD64_ABI |
| 4571 | { |
| 4572 | type[inx] = getJitGCType(gcPtrs[inx]); |
| 4573 | } |
| 4574 | } |
| 4575 | |
| 4576 | #ifndef UNIX_AMD64_ABI |
| 4577 | if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) |
| 4578 | { |
| 4579 | elemSize = TARGET_POINTER_SIZE; |
| 4580 | // We can safely widen this to aligned bytes since we are loading from |
| 4581 | // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and |
| 4582 | // lives in the stack frame or will be a promoted field. |
| 4583 | // |
| 4584 | structSize = elemCount * TARGET_POINTER_SIZE; |
| 4585 | } |
| 4586 | else // we must have a GT_OBJ |
| 4587 | { |
| 4588 | assert(argValue->OperGet() == GT_OBJ); |
| 4589 | |
| 4590 | // We need to load the struct from an arbitrary address |
| 4591 | // and we can't read past the end of the structSize |
| 4592 | // We adjust the last load type here |
| 4593 | // |
| 4594 | unsigned remainingBytes = structSize % TARGET_POINTER_SIZE; |
| 4595 | unsigned lastElem = elemCount - 1; |
| 4596 | if (remainingBytes != 0) |
| 4597 | { |
| 4598 | switch (remainingBytes) |
| 4599 | { |
| 4600 | case 1: |
| 4601 | type[lastElem] = TYP_BYTE; |
| 4602 | break; |
| 4603 | case 2: |
| 4604 | type[lastElem] = TYP_SHORT; |
| 4605 | break; |
| 4606 | #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) |
| 4607 | case 4: |
| 4608 | type[lastElem] = TYP_INT; |
| 4609 | break; |
| 4610 | #endif // (_TARGET_ARM64_) || (UNIX_AMD64_ABI) |
| 4611 | default: |
| 4612 | noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg" ); |
| 4613 | break; |
| 4614 | } |
| 4615 | } |
| 4616 | } |
| 4617 | #endif // !UNIX_AMD64_ABI |
| 4618 | } |
| 4619 | |
| 4620 | // We should still have a TYP_STRUCT |
| 4621 | assert(varTypeIsStruct(argValue->TypeGet())); |
| 4622 | |
| 4623 | GenTreeFieldList* newArg = nullptr; |
| 4624 | |
| 4625 | // Are we passing a struct LclVar? |
| 4626 | // |
| 4627 | if (argValue->OperGet() == GT_LCL_VAR) |
| 4628 | { |
| 4629 | GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); |
| 4630 | unsigned varNum = varNode->gtLclNum; |
| 4631 | assert(varNum < lvaCount); |
| 4632 | LclVarDsc* varDsc = &lvaTable[varNum]; |
| 4633 | |
| 4634 | // At this point any TYP_STRUCT LclVar must be an aligned struct |
| 4635 | // or an HFA struct, both which are passed by value. |
| 4636 | // |
| 4637 | assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa()); |
| 4638 | |
| 4639 | varDsc->lvIsMultiRegArg = true; |
| 4640 | |
| 4641 | #ifdef DEBUG |
| 4642 | if (verbose) |
| 4643 | { |
| 4644 | JITDUMP("Multireg struct argument V%02u : " , varNum); |
| 4645 | fgEntryPtr->Dump(); |
| 4646 | } |
| 4647 | #endif // DEBUG |
| 4648 | |
| 4649 | #ifndef UNIX_AMD64_ABI |
| 4650 | // This local variable must match the layout of the 'objClass' type exactly |
| 4651 | if (varDsc->lvIsHfa() |
| 4652 | #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
| 4653 | && !fgEntryPtr->isVararg |
| 4654 | #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
| 4655 | ) |
| 4656 | { |
| 4657 | // We have a HFA struct |
| 4658 | noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE)); |
| 4659 | noway_assert(elemSize == genTypeSize(elemType)); |
| 4660 | noway_assert(elemCount == (varDsc->lvExactSize / elemSize)); |
| 4661 | noway_assert(elemSize * elemCount == varDsc->lvExactSize); |
| 4662 | |
| 4663 | for (unsigned inx = 0; (inx < elemCount); inx++) |
| 4664 | { |
| 4665 | noway_assert(type[inx] == elemType); |
| 4666 | } |
| 4667 | } |
| 4668 | else |
| 4669 | { |
| 4670 | #if defined(_TARGET_ARM64_) |
| 4671 | // We must have a 16-byte struct (non-HFA) |
| 4672 | noway_assert(elemCount == 2); |
| 4673 | #elif defined(_TARGET_ARM_) |
| 4674 | noway_assert(elemCount <= 4); |
| 4675 | #endif |
| 4676 | |
| 4677 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4678 | { |
| 4679 | CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx]; |
| 4680 | |
| 4681 | // We setup the type[inx] value above using the GC info from 'objClass' |
| 4682 | // This GT_LCL_VAR must have the same GC layout info |
| 4683 | // |
| 4684 | if (currentGcLayoutType != TYPE_GC_NONE) |
| 4685 | { |
| 4686 | noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType)); |
| 4687 | } |
| 4688 | else |
| 4689 | { |
| 4690 | // We may have use a small type when we setup the type[inx] values above |
| 4691 | // We can safely widen this to TYP_I_IMPL |
| 4692 | type[inx] = TYP_I_IMPL; |
| 4693 | } |
| 4694 | } |
| 4695 | } |
| 4696 | #endif // !UNIX_AMD64_ABI |
| 4697 | |
| 4698 | #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) |
| 4699 | // Is this LclVar a promoted struct with exactly 2 fields? |
| 4700 | // TODO-ARM64-CQ: Support struct promoted HFA types here |
| 4701 | if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && (!varDsc->lvIsHfa() |
| 4702 | #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
| 4703 | && !fgEntryPtr->isVararg |
| 4704 | #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) |
| 4705 | )) |
| 4706 | { |
| 4707 | // See if we have two promoted fields that start at offset 0 and 8? |
| 4708 | unsigned loVarNum = lvaGetFieldLocal(varDsc, 0); |
| 4709 | unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE); |
| 4710 | |
| 4711 | // Did we find the promoted fields at the necessary offsets? |
| 4712 | if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM)) |
| 4713 | { |
| 4714 | LclVarDsc* loVarDsc = &lvaTable[loVarNum]; |
| 4715 | LclVarDsc* hiVarDsc = &lvaTable[hiVarNum]; |
| 4716 | |
| 4717 | var_types loType = loVarDsc->lvType; |
| 4718 | var_types hiType = hiVarDsc->lvType; |
| 4719 | |
| 4720 | if (varTypeIsFloating(loType) || varTypeIsFloating(hiType)) |
| 4721 | { |
| 4722 | // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer |
| 4723 | // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) |
| 4724 | // |
| 4725 | JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n" , |
| 4726 | varNum); |
| 4727 | // |
| 4728 | // we call lvaSetVarDoNotEnregister and do the proper transformation below. |
| 4729 | // |
| 4730 | } |
| 4731 | else |
| 4732 | { |
| 4733 | // We can use the struct promoted field as the two arguments |
| 4734 | |
| 4735 | GenTree* loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum); |
| 4736 | GenTree* hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum); |
| 4737 | |
| 4738 | // Create a new tree for 'arg' |
| 4739 | // replace the existing LDOBJ(ADDR(LCLVAR)) |
| 4740 | // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr)) |
| 4741 | // |
| 4742 | newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr); |
| 4743 | (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg); |
| 4744 | } |
| 4745 | } |
| 4746 | } |
| 4747 | else |
| 4748 | { |
| 4749 | // |
| 4750 | // We will create a list of GT_LCL_FLDs nodes to pass this struct |
| 4751 | // |
| 4752 | lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); |
| 4753 | } |
| 4754 | #elif defined(_TARGET_ARM_) |
| 4755 | // Is this LclVar a promoted struct with exactly same size? |
| 4756 | if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa()) |
| 4757 | { |
| 4758 | // See if we have promoted fields? |
| 4759 | unsigned varNums[4]; |
| 4760 | bool hasBadVarNum = false; |
| 4761 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4762 | { |
| 4763 | varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx); |
| 4764 | if (varNums[inx] == BAD_VAR_NUM) |
| 4765 | { |
| 4766 | hasBadVarNum = true; |
| 4767 | break; |
| 4768 | } |
| 4769 | } |
| 4770 | |
| 4771 | // Did we find the promoted fields at the necessary offsets? |
| 4772 | if (!hasBadVarNum) |
| 4773 | { |
| 4774 | LclVarDsc* varDscs[4]; |
| 4775 | var_types varType[4]; |
| 4776 | bool varIsFloat = false; |
| 4777 | |
| 4778 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4779 | { |
| 4780 | varDscs[inx] = &lvaTable[varNums[inx]]; |
| 4781 | varType[inx] = varDscs[inx]->lvType; |
| 4782 | if (varTypeIsFloating(varType[inx])) |
| 4783 | { |
| 4784 | // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the |
| 4785 | // integer |
| 4786 | // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) |
| 4787 | // |
| 4788 | JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n" , |
| 4789 | varNum); |
| 4790 | // |
| 4791 | // we call lvaSetVarDoNotEnregister and do the proper transformation below. |
| 4792 | // |
| 4793 | varIsFloat = true; |
| 4794 | break; |
| 4795 | } |
| 4796 | } |
| 4797 | |
| 4798 | if (!varIsFloat) |
| 4799 | { |
| 4800 | newArg = fgMorphLclArgToFieldlist(varNode); |
| 4801 | } |
| 4802 | } |
| 4803 | } |
| 4804 | else |
| 4805 | { |
| 4806 | // |
| 4807 | // We will create a list of GT_LCL_FLDs nodes to pass this struct |
| 4808 | // |
| 4809 | lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); |
| 4810 | } |
| 4811 | #endif // _TARGET_ARM_ |
| 4812 | } |
| 4813 | |
| 4814 | // If we didn't set newarg to a new List Node tree |
| 4815 | // |
| 4816 | if (newArg == nullptr) |
| 4817 | { |
| 4818 | if (fgEntryPtr->regNum == REG_STK) |
| 4819 | { |
| 4820 | // We leave this stack passed argument alone |
| 4821 | return arg; |
| 4822 | } |
| 4823 | |
| 4824 | // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted ) |
| 4825 | // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it? |
| 4826 | // |
| 4827 | if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) |
| 4828 | { |
| 4829 | GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); |
| 4830 | unsigned varNum = varNode->gtLclNum; |
| 4831 | assert(varNum < lvaCount); |
| 4832 | LclVarDsc* varDsc = &lvaTable[varNum]; |
| 4833 | |
| 4834 | unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0; |
| 4835 | unsigned lastOffset = baseOffset + structSize; |
| 4836 | |
| 4837 | // The allocated size of our LocalVar must be at least as big as lastOffset |
| 4838 | assert(varDsc->lvSize() >= lastOffset); |
| 4839 | |
| 4840 | if (varDsc->lvStructGcCount > 0) |
| 4841 | { |
| 4842 | // alignment of the baseOffset is required |
| 4843 | noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0); |
| 4844 | #ifndef UNIX_AMD64_ABI |
| 4845 | noway_assert(elemSize == TARGET_POINTER_SIZE); |
| 4846 | #endif |
| 4847 | unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE; |
| 4848 | const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable |
| 4849 | for (unsigned inx = 0; (inx < elemCount); inx++) |
| 4850 | { |
| 4851 | // The GC information must match what we setup using 'objClass' |
| 4852 | if ((gcPtrs[baseIndex + inx] != TYPE_GC_NONE) || varTypeGCtype(type[inx])) |
| 4853 | { |
| 4854 | noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx])); |
| 4855 | } |
| 4856 | } |
| 4857 | } |
| 4858 | else // this varDsc contains no GC pointers |
| 4859 | { |
| 4860 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4861 | { |
| 4862 | // The GC information must match what we setup using 'objClass' |
| 4863 | noway_assert(!varTypeIsGC(type[inx])); |
| 4864 | } |
| 4865 | } |
| 4866 | |
| 4867 | // |
| 4868 | // We create a list of GT_LCL_FLDs nodes to pass this struct |
| 4869 | // |
| 4870 | lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); |
| 4871 | |
| 4872 | // Create a new tree for 'arg' |
| 4873 | // replace the existing LDOBJ(ADDR(LCLVAR)) |
| 4874 | // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...) |
| 4875 | // |
| 4876 | unsigned offset = baseOffset; |
| 4877 | GenTreeFieldList* listEntry = nullptr; |
| 4878 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4879 | { |
| 4880 | elemSize = genTypeSize(type[inx]); |
| 4881 | GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); |
| 4882 | listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry); |
| 4883 | if (newArg == nullptr) |
| 4884 | { |
| 4885 | newArg = listEntry; |
| 4886 | } |
| 4887 | offset += elemSize; |
| 4888 | } |
| 4889 | } |
| 4890 | // Are we passing a GT_OBJ struct? |
| 4891 | // |
| 4892 | else if (argValue->OperGet() == GT_OBJ) |
| 4893 | { |
| 4894 | GenTreeObj* argObj = argValue->AsObj(); |
| 4895 | GenTree* baseAddr = argObj->gtOp1; |
| 4896 | var_types addrType = baseAddr->TypeGet(); |
| 4897 | |
| 4898 | if (baseAddr->OperGet() == GT_ADDR) |
| 4899 | { |
| 4900 | GenTree* addrTaken = baseAddr->gtOp.gtOp1; |
| 4901 | if (addrTaken->IsLocal()) |
| 4902 | { |
| 4903 | GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon(); |
| 4904 | unsigned varNum = varNode->gtLclNum; |
| 4905 | // We access non-struct type (for example, long) as a struct type. |
| 4906 | // Make sure lclVar lives on stack to make sure its fields are accessible by address. |
| 4907 | lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField)); |
| 4908 | } |
| 4909 | } |
| 4910 | |
| 4911 | // Create a new tree for 'arg' |
| 4912 | // replace the existing LDOBJ(EXPR) |
| 4913 | // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...) |
| 4914 | // |
| 4915 | |
| 4916 | unsigned offset = 0; |
| 4917 | GenTreeFieldList* listEntry = nullptr; |
| 4918 | for (unsigned inx = 0; inx < elemCount; inx++) |
| 4919 | { |
| 4920 | elemSize = genTypeSize(type[inx]); |
| 4921 | GenTree* curAddr = baseAddr; |
| 4922 | if (offset != 0) |
| 4923 | { |
| 4924 | GenTree* baseAddrDup = gtCloneExpr(baseAddr); |
| 4925 | noway_assert(baseAddrDup != nullptr); |
| 4926 | curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); |
| 4927 | } |
| 4928 | else |
| 4929 | { |
| 4930 | curAddr = baseAddr; |
| 4931 | } |
| 4932 | GenTree* curItem = gtNewIndir(type[inx], curAddr); |
| 4933 | |
| 4934 | // For safety all GT_IND should have at least GT_GLOB_REF set. |
| 4935 | curItem->gtFlags |= GTF_GLOB_REF; |
| 4936 | |
| 4937 | listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry); |
| 4938 | if (newArg == nullptr) |
| 4939 | { |
| 4940 | newArg = listEntry; |
| 4941 | } |
| 4942 | offset += elemSize; |
| 4943 | } |
| 4944 | } |
| 4945 | } |
| 4946 | |
| 4947 | #ifdef DEBUG |
| 4948 | // If we reach here we should have set newArg to something |
| 4949 | if (newArg == nullptr) |
| 4950 | { |
| 4951 | gtDispTree(argValue); |
| 4952 | assert(!"Missing case in fgMorphMultiregStructArg" ); |
| 4953 | } |
| 4954 | #endif |
| 4955 | |
| 4956 | noway_assert(newArg != nullptr); |
| 4957 | noway_assert(newArg->OperIsFieldList()); |
| 4958 | |
| 4959 | // We need to propagate any GTF_ALL_EFFECT flags from the end of the list back to the beginning. |
| 4960 | // This is verified in fgDebugCheckFlags(). |
| 4961 | |
| 4962 | ArrayStack<GenTree*> stack(getAllocator(CMK_ArrayStack)); |
| 4963 | GenTree* tree; |
| 4964 | for (tree = newArg; (tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsFieldList(); tree = tree->gtGetOp2()) |
| 4965 | { |
| 4966 | stack.Push(tree); |
| 4967 | } |
| 4968 | |
| 4969 | unsigned propFlags = (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
| 4970 | tree->gtFlags |= propFlags; |
| 4971 | |
| 4972 | while (!stack.Empty()) |
| 4973 | { |
| 4974 | tree = stack.Pop(); |
| 4975 | propFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
| 4976 | propFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT); |
| 4977 | tree->gtFlags |= propFlags; |
| 4978 | } |
| 4979 | |
| 4980 | #ifdef DEBUG |
| 4981 | if (verbose) |
| 4982 | { |
| 4983 | printf("fgMorphMultiregStructArg created tree:\n" ); |
| 4984 | gtDispTree(newArg); |
| 4985 | } |
| 4986 | #endif |
| 4987 | |
| 4988 | arg = newArg; // consider calling fgMorphTree(newArg); |
| 4989 | |
| 4990 | #endif // FEATURE_MULTIREG_ARGS |
| 4991 | |
| 4992 | return arg; |
| 4993 | } |
| 4994 | |
| 4995 | //------------------------------------------------------------------------ |
| 4996 | // fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields |
| 4997 | // |
| 4998 | // Arguments: |
| 4999 | // lcl - The GT_LCL_VAR node we will transform |
| 5000 | // |
| 5001 | // Return value: |
| 5002 | // The new GT_FIELD_LIST that we have created. |
| 5003 | // |
| 5004 | GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) |
| 5005 | { |
| 5006 | LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]); |
| 5007 | assert(varDsc->lvPromoted == true); |
| 5008 | |
| 5009 | unsigned fieldCount = varDsc->lvFieldCnt; |
| 5010 | GenTreeFieldList* listEntry = nullptr; |
| 5011 | GenTreeFieldList* newArg = nullptr; |
| 5012 | unsigned fieldLclNum = varDsc->lvFieldLclStart; |
| 5013 | |
| 5014 | // We can use the struct promoted field as arguments |
| 5015 | for (unsigned i = 0; i < fieldCount; i++) |
| 5016 | { |
| 5017 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
| 5018 | GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType); |
| 5019 | listEntry = new (this, GT_FIELD_LIST) |
| 5020 | GenTreeFieldList(lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, listEntry); |
| 5021 | if (newArg == nullptr) |
| 5022 | { |
| 5023 | newArg = listEntry; |
| 5024 | } |
| 5025 | fieldLclNum++; |
| 5026 | } |
| 5027 | return newArg; |
| 5028 | } |
| 5029 | |
| 5030 | //------------------------------------------------------------------------ |
| 5031 | // fgMakeOutgoingStructArgCopy: make a copy of a struct variable if necessary, |
| 5032 | // to pass to a callee. |
| 5033 | // |
| 5034 | // Arguments: |
| 5035 | // call - call being processed |
| 5036 | // args - args for the call |
| 5037 | /// argIndex - arg being processed |
| 5038 | // copyBlkClass - class handle for the struct |
| 5039 | // |
| 5040 | // Return value: |
| 5041 | // tree that computes address of the outgoing arg |
| 5042 | // |
| 5043 | void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, |
| 5044 | GenTree* args, |
| 5045 | unsigned argIndex, |
| 5046 | CORINFO_CLASS_HANDLE copyBlkClass) |
| 5047 | { |
| 5048 | GenTree* argx = args->Current(); |
| 5049 | noway_assert(argx->gtOper != GT_MKREFANY); |
| 5050 | fgArgTabEntry* argEntry = Compiler::gtArgEntryByNode(call, argx); |
| 5051 | |
| 5052 | // If we're optimizing, see if we can avoid making a copy. |
| 5053 | // |
| 5054 | // We don't need a copy if this is the last use of an implicit by-ref local. |
| 5055 | // |
| 5056 | // We can't determine that all of the time, but if there is only |
| 5057 | // one use and the method has no loops, then this use must be the last. |
| 5058 | if (opts.OptimizationEnabled()) |
| 5059 | { |
| 5060 | GenTreeLclVarCommon* lcl = nullptr; |
| 5061 | |
| 5062 | if (argx->OperIsLocal()) |
| 5063 | { |
| 5064 | lcl = argx->AsLclVarCommon(); |
| 5065 | } |
| 5066 | else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal()) |
| 5067 | { |
| 5068 | lcl = argx->AsObj()->Addr()->AsLclVarCommon(); |
| 5069 | } |
| 5070 | |
| 5071 | if (lcl != nullptr) |
| 5072 | { |
| 5073 | unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); |
| 5074 | if (lvaIsImplicitByRefLocal(varNum)) |
| 5075 | { |
| 5076 | LclVarDsc* varDsc = &lvaTable[varNum]; |
| 5077 | // JIT_TailCall helper has an implicit assumption that all tail call arguments live |
| 5078 | // on the caller's frame. If an argument lives on the caller caller's frame, it may get |
| 5079 | // overwritten if that frame is reused for the tail call. Therefore, we should always copy |
| 5080 | // struct parameters if they are passed as arguments to a tail call. |
| 5081 | if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop()) |
| 5082 | { |
| 5083 | varDsc->setLvRefCnt(0, RCS_EARLY); |
| 5084 | args->gtOp.gtOp1 = lcl; |
| 5085 | argEntry->node = lcl; |
| 5086 | |
| 5087 | JITDUMP("did not have to make outgoing copy for V%2d" , varNum); |
| 5088 | return; |
| 5089 | } |
| 5090 | } |
| 5091 | } |
| 5092 | } |
| 5093 | |
| 5094 | if (fgOutgoingArgTemps == nullptr) |
| 5095 | { |
| 5096 | fgOutgoingArgTemps = hashBv::Create(this); |
| 5097 | } |
| 5098 | |
| 5099 | unsigned tmp = 0; |
| 5100 | bool found = false; |
| 5101 | |
| 5102 | // Attempt to find a local we have already used for an outgoing struct and reuse it. |
| 5103 | // We do not reuse within a statement. |
| 5104 | if (!opts.MinOpts()) |
| 5105 | { |
| 5106 | indexType lclNum; |
| 5107 | FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps) |
| 5108 | { |
| 5109 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 5110 | if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) && |
| 5111 | !fgCurrentlyInUseArgTemps->testBit(lclNum)) |
| 5112 | { |
| 5113 | tmp = (unsigned)lclNum; |
| 5114 | found = true; |
| 5115 | JITDUMP("reusing outgoing struct arg" ); |
| 5116 | break; |
| 5117 | } |
| 5118 | } |
| 5119 | NEXT_HBV_BIT_SET; |
| 5120 | } |
| 5121 | |
| 5122 | // Create the CopyBlk tree and insert it. |
| 5123 | if (!found) |
| 5124 | { |
| 5125 | // Get a new temp |
| 5126 | // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk. |
| 5127 | tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument" )); |
| 5128 | lvaSetStruct(tmp, copyBlkClass, false); |
| 5129 | if (call->IsVarargs()) |
| 5130 | { |
| 5131 | lvaSetStructUsedAsVarArg(tmp); |
| 5132 | } |
| 5133 | |
| 5134 | fgOutgoingArgTemps->setBit(tmp); |
| 5135 | } |
| 5136 | |
| 5137 | fgCurrentlyInUseArgTemps->setBit(tmp); |
| 5138 | |
| 5139 | // TYP_SIMD structs should not be enregistered, since ABI requires it to be |
| 5140 | // allocated on stack and address of it needs to be passed. |
| 5141 | if (lclVarIsSIMDType(tmp)) |
| 5142 | { |
| 5143 | lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct)); |
| 5144 | } |
| 5145 | |
| 5146 | // Create a reference to the temp |
| 5147 | GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType); |
| 5148 | dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction. |
| 5149 | |
| 5150 | if (argx->gtOper == GT_OBJ) |
| 5151 | { |
| 5152 | argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT); |
| 5153 | argx->SetIndirExceptionFlags(this); |
| 5154 | } |
| 5155 | else |
| 5156 | { |
| 5157 | argx->gtFlags |= GTF_DONT_CSE; |
| 5158 | } |
| 5159 | |
| 5160 | // Copy the valuetype to the temp |
| 5161 | unsigned size = info.compCompHnd->getClassSize(copyBlkClass); |
| 5162 | GenTree* copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */); |
| 5163 | copyBlk = fgMorphCopyBlock(copyBlk); |
| 5164 | |
| 5165 | #if FEATURE_FIXED_OUT_ARGS |
| 5166 | |
| 5167 | // Do the copy early, and evalute the temp later (see EvalArgsToTemps) |
| 5168 | // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode |
| 5169 | GenTree* arg = copyBlk; |
| 5170 | |
| 5171 | #else // FEATURE_FIXED_OUT_ARGS |
| 5172 | |
| 5173 | // Structs are always on the stack, and thus never need temps |
| 5174 | // so we have to put the copy and temp all into one expression. |
| 5175 | argEntry->tmpNum = tmp; |
| 5176 | GenTree* arg = fgMakeTmpArgNode(argEntry); |
| 5177 | |
| 5178 | // Change the expression to "(tmp=val),tmp" |
| 5179 | arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); |
| 5180 | |
| 5181 | #endif // FEATURE_FIXED_OUT_ARGS |
| 5182 | |
| 5183 | args->gtOp.gtOp1 = arg; |
| 5184 | call->fgArgInfo->EvalToTmp(argEntry, tmp, arg); |
| 5185 | |
| 5186 | return; |
| 5187 | } |
| 5188 | |
| 5189 | #ifdef _TARGET_ARM_ |
| 5190 | // See declaration for specification comment. |
| 5191 | void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, |
| 5192 | unsigned firstArgRegNum, |
| 5193 | regMaskTP* pArgSkippedRegMask) |
| 5194 | { |
| 5195 | assert(varDsc->lvPromoted); |
| 5196 | // There's no way to do these calculations without breaking abstraction and assuming that |
| 5197 | // integer register arguments are consecutive ints. They are on ARM. |
| 5198 | |
| 5199 | // To start, figure out what register contains the last byte of the first argument. |
| 5200 | LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; |
| 5201 | unsigned lastFldRegOfLastByte = |
| 5202 | (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; |
| 5203 | ; |
| 5204 | |
| 5205 | // Now we're keeping track of the register that the last field ended in; see what registers |
| 5206 | // subsequent fields start in, and whether any are skipped. |
| 5207 | // (We assume here the invariant that the fields are sorted in offset order.) |
| 5208 | for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++) |
| 5209 | { |
| 5210 | unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset; |
| 5211 | LclVarDsc* fldVarDsc = &lvaTable[fldVarNum]; |
| 5212 | unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE; |
| 5213 | assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields. |
| 5214 | // This loop should enumerate the offsets of any registers skipped. |
| 5215 | // Find what reg contains the last byte: |
| 5216 | // And start at the first register after that. If that isn't the first reg of the current |
| 5217 | for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset; |
| 5218 | skippedRegOffsets++) |
| 5219 | { |
| 5220 | // If the register number would not be an arg reg, we're done. |
| 5221 | if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG) |
| 5222 | return; |
| 5223 | *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets)); |
| 5224 | } |
| 5225 | lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; |
| 5226 | } |
| 5227 | } |
| 5228 | |
| 5229 | #endif // _TARGET_ARM_ |
| 5230 | |
| 5231 | //**************************************************************************** |
| 5232 | // fgFixupStructReturn: |
| 5233 | // The companion to impFixupCallStructReturn. Now that the importer is done |
| 5234 | // change the gtType to the precomputed native return type |
| 5235 | // requires that callNode currently has a struct type |
| 5236 | // |
| 5237 | void Compiler::fgFixupStructReturn(GenTree* callNode) |
| 5238 | { |
| 5239 | assert(varTypeIsStruct(callNode)); |
| 5240 | |
| 5241 | GenTreeCall* call = callNode->AsCall(); |
| 5242 | bool callHasRetBuffArg = call->HasRetBufArg(); |
| 5243 | bool isHelperCall = call->IsHelperCall(); |
| 5244 | |
| 5245 | // Decide on the proper return type for this call that currently returns a struct |
| 5246 | // |
| 5247 | CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd; |
| 5248 | Compiler::structPassingKind howToReturnStruct; |
| 5249 | var_types returnType; |
| 5250 | |
| 5251 | // There are a couple of Helper Calls that say they return a TYP_STRUCT but they |
| 5252 | // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType) |
| 5253 | // |
| 5254 | // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD |
| 5255 | // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD |
| 5256 | // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL |
| 5257 | // |
| 5258 | if (isHelperCall) |
| 5259 | { |
| 5260 | assert(!callHasRetBuffArg); |
| 5261 | assert(retClsHnd == NO_CLASS_HANDLE); |
| 5262 | |
| 5263 | // Now that we are past the importer, re-type this node |
| 5264 | howToReturnStruct = SPK_PrimitiveType; |
| 5265 | returnType = (var_types)call->gtReturnType; |
| 5266 | } |
| 5267 | else |
| 5268 | { |
| 5269 | returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct); |
| 5270 | } |
| 5271 | |
| 5272 | if (howToReturnStruct == SPK_ByReference) |
| 5273 | { |
| 5274 | assert(returnType == TYP_UNKNOWN); |
| 5275 | assert(callHasRetBuffArg); |
| 5276 | } |
| 5277 | else |
| 5278 | { |
| 5279 | assert(returnType != TYP_UNKNOWN); |
| 5280 | |
| 5281 | if (!varTypeIsStruct(returnType)) |
| 5282 | { |
| 5283 | // Widen the primitive type if necessary |
| 5284 | returnType = genActualType(returnType); |
| 5285 | } |
| 5286 | call->gtType = returnType; |
| 5287 | } |
| 5288 | |
| 5289 | #if FEATURE_MULTIREG_RET |
| 5290 | // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer. |
| 5291 | assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg); |
| 5292 | #else // !FEATURE_MULTIREG_RET |
| 5293 | // No more struct returns |
| 5294 | assert(call->TypeGet() != TYP_STRUCT); |
| 5295 | #endif |
| 5296 | |
| 5297 | #if !defined(UNIX_AMD64_ABI) |
| 5298 | // If it was a struct return, it has been transformed into a call |
| 5299 | // with a return buffer (that returns TYP_VOID) or into a return |
| 5300 | // of a primitive/enregisterable type |
| 5301 | assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID)); |
| 5302 | #endif |
| 5303 | } |
| 5304 | |
| 5305 | /***************************************************************************** |
| 5306 | * |
| 5307 | * A little helper used to rearrange nested commutative operations. The |
| 5308 | * effect is that nested associative, commutative operations are transformed |
| 5309 | * into a 'left-deep' tree, i.e. into something like this: |
| 5310 | * |
| 5311 | * (((a op b) op c) op d) op... |
| 5312 | */ |
| 5313 | |
| 5314 | #if REARRANGE_ADDS |
| 5315 | |
| 5316 | void Compiler::fgMoveOpsLeft(GenTree* tree) |
| 5317 | { |
| 5318 | GenTree* op1; |
| 5319 | GenTree* op2; |
| 5320 | genTreeOps oper; |
| 5321 | |
| 5322 | do |
| 5323 | { |
| 5324 | op1 = tree->gtOp.gtOp1; |
| 5325 | op2 = tree->gtOp.gtOp2; |
| 5326 | oper = tree->OperGet(); |
| 5327 | |
| 5328 | noway_assert(GenTree::OperIsCommutative(oper)); |
| 5329 | noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL); |
| 5330 | noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder); |
| 5331 | noway_assert(oper == op2->gtOper); |
| 5332 | |
| 5333 | // Commutativity doesn't hold if overflow checks are needed |
| 5334 | |
| 5335 | if (tree->gtOverflowEx() || op2->gtOverflowEx()) |
| 5336 | { |
| 5337 | return; |
| 5338 | } |
| 5339 | |
| 5340 | if (gtIsActiveCSE_Candidate(op2)) |
| 5341 | { |
| 5342 | // If we have marked op2 as a CSE candidate, |
| 5343 | // we can't perform a commutative reordering |
| 5344 | // because any value numbers that we computed for op2 |
| 5345 | // will be incorrect after performing a commutative reordering |
| 5346 | // |
| 5347 | return; |
| 5348 | } |
| 5349 | |
| 5350 | if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT)) |
| 5351 | { |
| 5352 | return; |
| 5353 | } |
| 5354 | |
| 5355 | // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators |
| 5356 | if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)) |
| 5357 | { |
| 5358 | return; |
| 5359 | } |
| 5360 | |
| 5361 | if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN) |
| 5362 | { |
| 5363 | // We could deal with this, but we were always broken and just hit the assert |
| 5364 | // below regarding flags, which means it's not frequent, so will just bail out. |
| 5365 | // See #195514 |
| 5366 | return; |
| 5367 | } |
| 5368 | |
| 5369 | noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx()); |
| 5370 | |
| 5371 | GenTree* ad1 = op2->gtOp.gtOp1; |
| 5372 | GenTree* ad2 = op2->gtOp.gtOp2; |
| 5373 | |
| 5374 | // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT |
| 5375 | // We can not reorder such GT_OR trees |
| 5376 | // |
| 5377 | if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet())) |
| 5378 | { |
| 5379 | break; |
| 5380 | } |
| 5381 | |
| 5382 | // Don't split up a byref calculation and create a new byref. E.g., |
| 5383 | // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int). |
| 5384 | // Doing this transformation could create a situation where the first |
| 5385 | // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that |
| 5386 | // no longer points within the ref object. If a GC happens, the byref won't |
| 5387 | // get updated. This can happen, for instance, if one of the int components |
| 5388 | // is negative. It also requires the address generation be in a fully-interruptible |
| 5389 | // code region. |
| 5390 | // |
| 5391 | if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL) |
| 5392 | { |
| 5393 | assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD)); |
| 5394 | break; |
| 5395 | } |
| 5396 | |
| 5397 | /* Change "(x op (y op z))" to "(x op y) op z" */ |
| 5398 | /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */ |
| 5399 | |
| 5400 | GenTree* new_op1 = op2; |
| 5401 | |
| 5402 | new_op1->gtOp.gtOp1 = op1; |
| 5403 | new_op1->gtOp.gtOp2 = ad1; |
| 5404 | |
| 5405 | /* Change the flags. */ |
| 5406 | |
| 5407 | // Make sure we arent throwing away any flags |
| 5408 | noway_assert((new_op1->gtFlags & |
| 5409 | ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag. |
| 5410 | GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated |
| 5411 | GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0); |
| 5412 | |
| 5413 | new_op1->gtFlags = |
| 5414 | (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag. |
| 5415 | (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT); |
| 5416 | |
| 5417 | /* Retype new_op1 if it has not/become a GC ptr. */ |
| 5418 | |
| 5419 | if (varTypeIsGC(op1->TypeGet())) |
| 5420 | { |
| 5421 | noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && |
| 5422 | oper == GT_ADD) || // byref(ref + (int+int)) |
| 5423 | (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && |
| 5424 | oper == GT_OR)); // int(gcref | int(gcref|intval)) |
| 5425 | |
| 5426 | new_op1->gtType = tree->gtType; |
| 5427 | } |
| 5428 | else if (varTypeIsGC(ad2->TypeGet())) |
| 5429 | { |
| 5430 | // Neither ad1 nor op1 are GC. So new_op1 isnt either |
| 5431 | noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); |
| 5432 | new_op1->gtType = TYP_I_IMPL; |
| 5433 | } |
| 5434 | |
| 5435 | // If new_op1 is a new expression. Assign it a new unique value number. |
| 5436 | // vnStore is null before the ValueNumber phase has run |
| 5437 | if (vnStore != nullptr) |
| 5438 | { |
| 5439 | // We can only keep the old value number on new_op1 if both op1 and ad2 |
| 5440 | // have the same non-NoVN value numbers. Since op is commutative, comparing |
| 5441 | // only ad2 and op1 is enough. |
| 5442 | if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || |
| 5443 | (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || |
| 5444 | (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal())) |
| 5445 | { |
| 5446 | new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet())); |
| 5447 | } |
| 5448 | } |
| 5449 | |
| 5450 | tree->gtOp.gtOp1 = new_op1; |
| 5451 | tree->gtOp.gtOp2 = ad2; |
| 5452 | |
| 5453 | /* If 'new_op1' is now the same nested op, process it recursively */ |
| 5454 | |
| 5455 | if ((ad1->gtOper == oper) && !ad1->gtOverflowEx()) |
| 5456 | { |
| 5457 | fgMoveOpsLeft(new_op1); |
| 5458 | } |
| 5459 | |
| 5460 | /* If 'ad2' is now the same nested op, process it |
| 5461 | * Instead of recursion, we set up op1 and op2 for the next loop. |
| 5462 | */ |
| 5463 | |
| 5464 | op1 = new_op1; |
| 5465 | op2 = ad2; |
| 5466 | } while ((op2->gtOper == oper) && !op2->gtOverflowEx()); |
| 5467 | |
| 5468 | return; |
| 5469 | } |
| 5470 | |
| 5471 | #endif |
| 5472 | |
| 5473 | /*****************************************************************************/ |
| 5474 | |
| 5475 | void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay) |
| 5476 | { |
| 5477 | if (tree->OperIsBoundsCheck()) |
| 5478 | { |
| 5479 | GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk(); |
| 5480 | BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay); |
| 5481 | if (failBlock != nullptr) |
| 5482 | { |
| 5483 | boundsChk->gtIndRngFailBB = gtNewCodeRef(failBlock); |
| 5484 | } |
| 5485 | } |
| 5486 | else if (tree->OperIs(GT_INDEX_ADDR)) |
| 5487 | { |
| 5488 | GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr(); |
| 5489 | BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); |
| 5490 | if (failBlock != nullptr) |
| 5491 | { |
| 5492 | indexAddr->gtIndRngFailBB = gtNewCodeRef(failBlock); |
| 5493 | } |
| 5494 | } |
| 5495 | else |
| 5496 | { |
| 5497 | noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX)); |
| 5498 | fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); |
| 5499 | } |
| 5500 | } |
| 5501 | |
| 5502 | BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay) |
| 5503 | { |
| 5504 | if (opts.MinOpts()) |
| 5505 | { |
| 5506 | delay = false; |
| 5507 | } |
| 5508 | |
| 5509 | if (!opts.compDbgCode) |
| 5510 | { |
| 5511 | if (!delay && !compIsForInlining()) |
| 5512 | { |
| 5513 | // Create/find the appropriate "range-fail" label |
| 5514 | return fgRngChkTarget(compCurBB, kind); |
| 5515 | } |
| 5516 | } |
| 5517 | |
| 5518 | return nullptr; |
| 5519 | } |
| 5520 | |
| 5521 | /***************************************************************************** |
| 5522 | * |
| 5523 | * Expand a GT_INDEX node and fully morph the child operands |
| 5524 | * |
| 5525 | * The orginal GT_INDEX node is bashed into the GT_IND node that accesses |
| 5526 | * the array element. We expand the GT_INDEX node into a larger tree that |
| 5527 | * evaluates the array base and index. The simplest expansion is a GT_COMMA |
| 5528 | * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag. |
| 5529 | * For complex array or index expressions one or more GT_COMMA assignments |
| 5530 | * are inserted so that we only evaluate the array or index expressions once. |
| 5531 | * |
| 5532 | * The fully expanded tree is then morphed. This causes gtFoldExpr to |
| 5533 | * perform local constant prop and reorder the constants in the tree and |
| 5534 | * fold them. |
| 5535 | * |
| 5536 | * We then parse the resulting array element expression in order to locate |
| 5537 | * and label the constants and variables that occur in the tree. |
| 5538 | */ |
| 5539 | |
| 5540 | const int MAX_ARR_COMPLEXITY = 4; |
| 5541 | const int MAX_INDEX_COMPLEXITY = 4; |
| 5542 | |
| 5543 | GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) |
| 5544 | { |
| 5545 | noway_assert(tree->gtOper == GT_INDEX); |
| 5546 | GenTreeIndex* asIndex = tree->AsIndex(); |
| 5547 | |
| 5548 | var_types elemTyp = tree->TypeGet(); |
| 5549 | unsigned elemSize = tree->gtIndex.gtIndElemSize; |
| 5550 | CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass; |
| 5551 | |
| 5552 | noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr); |
| 5553 | |
| 5554 | #ifdef FEATURE_SIMD |
| 5555 | if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= maxSIMDStructBytes()) |
| 5556 | { |
| 5557 | // If this is a SIMD type, this is the point at which we lose the type information, |
| 5558 | // so we need to set the correct type on the GT_IND. |
| 5559 | // (We don't care about the base type here, so we only check, but don't retain, the return value). |
| 5560 | unsigned simdElemSize = 0; |
| 5561 | if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN) |
| 5562 | { |
| 5563 | assert(simdElemSize == elemSize); |
| 5564 | elemTyp = getSIMDTypeForSize(elemSize); |
| 5565 | // This is the new type of the node. |
| 5566 | tree->gtType = elemTyp; |
| 5567 | // Now set elemStructType to null so that we don't confuse value numbering. |
| 5568 | elemStructType = nullptr; |
| 5569 | } |
| 5570 | } |
| 5571 | #endif // FEATURE_SIMD |
| 5572 | |
| 5573 | // Set up the the array length's offset into lenOffs |
| 5574 | // And the the first element's offset into elemOffs |
| 5575 | ssize_t lenOffs; |
| 5576 | ssize_t elemOffs; |
| 5577 | if (tree->gtFlags & GTF_INX_STRING_LAYOUT) |
| 5578 | { |
| 5579 | lenOffs = OFFSETOF__CORINFO_String__stringLen; |
| 5580 | elemOffs = OFFSETOF__CORINFO_String__chars; |
| 5581 | tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE |
| 5582 | } |
| 5583 | else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT) |
| 5584 | { |
| 5585 | lenOffs = OFFSETOF__CORINFO_Array__length; |
| 5586 | elemOffs = eeGetEEInfo()->offsetOfObjArrayData; |
| 5587 | } |
| 5588 | else // We have a standard array |
| 5589 | { |
| 5590 | lenOffs = OFFSETOF__CORINFO_Array__length; |
| 5591 | elemOffs = OFFSETOF__CORINFO_Array__data; |
| 5592 | } |
| 5593 | |
| 5594 | // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts |
| 5595 | // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down. |
| 5596 | // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion |
| 5597 | // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in |
| 5598 | // minopts). |
| 5599 | // |
| 5600 | // When we *are* optimizing, we fully expand GT_INDEX to: |
| 5601 | // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or |
| 5602 | // side-effecting. |
| 5603 | // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or |
| 5604 | // side-effecting. |
| 5605 | // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array)) |
| 5606 | // 4. Compute the address of the element that will be accessed: |
| 5607 | // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize)) |
| 5608 | // 5. Dereference the address with a GT_IND. |
| 5609 | // |
| 5610 | // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows |
| 5611 | // for more straightforward bounds-check removal, CSE, etc. |
| 5612 | if (opts.MinOpts()) |
| 5613 | { |
| 5614 | GenTree* const array = fgMorphTree(asIndex->Arr()); |
| 5615 | GenTree* const index = fgMorphTree(asIndex->Index()); |
| 5616 | |
| 5617 | GenTreeIndexAddr* const indexAddr = |
| 5618 | new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize, |
| 5619 | static_cast<unsigned>(lenOffs), static_cast<unsigned>(elemOffs)); |
| 5620 | indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT; |
| 5621 | |
| 5622 | // Mark the indirection node as needing a range check if necessary. |
| 5623 | // Note this will always be true unless JitSkipArrayBoundCheck() is used |
| 5624 | if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0) |
| 5625 | { |
| 5626 | fgSetRngChkTarget(indexAddr); |
| 5627 | } |
| 5628 | |
| 5629 | // Change `tree` into an indirection and return. |
| 5630 | tree->ChangeOper(GT_IND); |
| 5631 | GenTreeIndir* const indir = tree->AsIndir(); |
| 5632 | indir->Addr() = indexAddr; |
| 5633 | indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT); |
| 5634 | |
| 5635 | #ifdef DEBUG |
| 5636 | indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 5637 | #endif // DEBUG |
| 5638 | |
| 5639 | return indir; |
| 5640 | } |
| 5641 | |
| 5642 | GenTree* arrRef = asIndex->Arr(); |
| 5643 | GenTree* index = asIndex->Index(); |
| 5644 | |
| 5645 | bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled |
| 5646 | bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0); |
| 5647 | |
| 5648 | GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression |
| 5649 | GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression |
| 5650 | GenTree* bndsChk = nullptr; |
| 5651 | |
| 5652 | // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address. |
| 5653 | if (chkd) |
| 5654 | { |
| 5655 | GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression |
| 5656 | GenTree* index2 = nullptr; |
| 5657 | |
| 5658 | // If the arrRef expression involves an assignment, a call or reads from global memory, |
| 5659 | // then we *must* allocate a temporary in which to "localize" those values, |
| 5660 | // to ensure that the same values are used in the bounds check and the actual |
| 5661 | // dereference. |
| 5662 | // Also we allocate the temporary when the arrRef is sufficiently complex/expensive. |
| 5663 | // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true |
| 5664 | // complexity is not exposed. (Without that condition there are cases of local struct |
| 5665 | // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was |
| 5666 | // fixed, there were some regressions that were mostly ameliorated by adding this condition.) |
| 5667 | // |
| 5668 | if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || |
| 5669 | gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD)) |
| 5670 | { |
| 5671 | unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr" )); |
| 5672 | arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef); |
| 5673 | arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); |
| 5674 | arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); |
| 5675 | } |
| 5676 | else |
| 5677 | { |
| 5678 | arrRef2 = gtCloneExpr(arrRef); |
| 5679 | noway_assert(arrRef2 != nullptr); |
| 5680 | } |
| 5681 | |
| 5682 | // If the index expression involves an assignment, a call or reads from global memory, |
| 5683 | // we *must* allocate a temporary in which to "localize" those values, |
| 5684 | // to ensure that the same values are used in the bounds check and the actual |
| 5685 | // dereference. |
| 5686 | // Also we allocate the temporary when the index is sufficiently complex/expensive. |
| 5687 | // |
| 5688 | if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) || |
| 5689 | (arrRef->OperGet() == GT_FIELD)) |
| 5690 | { |
| 5691 | unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr" )); |
| 5692 | indexDefn = gtNewTempAssign(indexTmpNum, index); |
| 5693 | index = gtNewLclvNode(indexTmpNum, index->TypeGet()); |
| 5694 | index2 = gtNewLclvNode(indexTmpNum, index->TypeGet()); |
| 5695 | } |
| 5696 | else |
| 5697 | { |
| 5698 | index2 = gtCloneExpr(index); |
| 5699 | noway_assert(index2 != nullptr); |
| 5700 | } |
| 5701 | |
| 5702 | // Next introduce a GT_ARR_BOUNDS_CHECK node |
| 5703 | var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. |
| 5704 | |
| 5705 | #ifdef _TARGET_64BIT_ |
| 5706 | // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case |
| 5707 | // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case, |
| 5708 | // the comparison will have to be widen to 64 bits. |
| 5709 | if (index->TypeGet() == TYP_I_IMPL) |
| 5710 | { |
| 5711 | bndsChkType = TYP_I_IMPL; |
| 5712 | } |
| 5713 | #endif // _TARGET_64BIT_ |
| 5714 | |
| 5715 | GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs); |
| 5716 | |
| 5717 | if (bndsChkType != TYP_INT) |
| 5718 | { |
| 5719 | arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType); |
| 5720 | } |
| 5721 | |
| 5722 | GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) |
| 5723 | GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL); |
| 5724 | |
| 5725 | bndsChk = arrBndsChk; |
| 5726 | |
| 5727 | // Now we'll switch to using the second copies for arrRef and index |
| 5728 | // to compute the address expression |
| 5729 | |
| 5730 | arrRef = arrRef2; |
| 5731 | index = index2; |
| 5732 | } |
| 5733 | |
| 5734 | // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))" |
| 5735 | |
| 5736 | GenTree* addr; |
| 5737 | |
| 5738 | #ifdef _TARGET_64BIT_ |
| 5739 | // Widen 'index' on 64-bit targets |
| 5740 | if (index->TypeGet() != TYP_I_IMPL) |
| 5741 | { |
| 5742 | if (index->OperGet() == GT_CNS_INT) |
| 5743 | { |
| 5744 | index->gtType = TYP_I_IMPL; |
| 5745 | } |
| 5746 | else |
| 5747 | { |
| 5748 | index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL); |
| 5749 | } |
| 5750 | } |
| 5751 | #endif // _TARGET_64BIT_ |
| 5752 | |
| 5753 | /* Scale the index value if necessary */ |
| 5754 | if (elemSize > 1) |
| 5755 | { |
| 5756 | GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL); |
| 5757 | |
| 5758 | // Fix 392756 WP7 Crossgen |
| 5759 | // |
| 5760 | // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node |
| 5761 | // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar. |
| 5762 | // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE. |
| 5763 | // |
| 5764 | size->gtFlags |= GTF_DONT_CSE; |
| 5765 | |
| 5766 | /* Multiply by the array element size */ |
| 5767 | addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size); |
| 5768 | } |
| 5769 | else |
| 5770 | { |
| 5771 | addr = index; |
| 5772 | } |
| 5773 | |
| 5774 | // Be careful to only create the byref pointer when the full index expression is added to the array reference. |
| 5775 | // We don't want to create a partial byref address expression that doesn't include the full index offset: |
| 5776 | // a byref must point within the containing object. It is dangerous (especially when optimizations come into |
| 5777 | // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that |
| 5778 | // the partial byref will not point within the object, and thus not get updated correctly during a GC. |
| 5779 | // This is mostly a risk in fully-interruptible code regions. |
| 5780 | // |
| 5781 | // NOTE: the tree form created here is pattern matched by optExtractArrIndex(), so changes here must |
| 5782 | // be reflected there. |
| 5783 | |
| 5784 | /* Add the first element's offset */ |
| 5785 | |
| 5786 | GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL); |
| 5787 | |
| 5788 | addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns); |
| 5789 | |
| 5790 | /* Add the object ref to the element's offset */ |
| 5791 | |
| 5792 | addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); |
| 5793 | |
| 5794 | #if SMALL_TREE_NODES |
| 5795 | assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL); |
| 5796 | #endif |
| 5797 | |
| 5798 | // Change the orginal GT_INDEX node into a GT_IND node |
| 5799 | tree->SetOper(GT_IND); |
| 5800 | |
| 5801 | // If the index node is a floating-point type, notify the compiler |
| 5802 | // we'll potentially use floating point registers at the time of codegen. |
| 5803 | if (varTypeIsFloating(tree->gtType)) |
| 5804 | { |
| 5805 | this->compFloatingPointUsed = true; |
| 5806 | } |
| 5807 | |
| 5808 | // We've now consumed the GTF_INX_RNGCHK, and the node |
| 5809 | // is no longer a GT_INDEX node. |
| 5810 | tree->gtFlags &= ~GTF_INX_RNGCHK; |
| 5811 | |
| 5812 | tree->gtOp.gtOp1 = addr; |
| 5813 | |
| 5814 | // This is an array index expression. |
| 5815 | tree->gtFlags |= GTF_IND_ARR_INDEX; |
| 5816 | |
| 5817 | /* An indirection will cause a GPF if the address is null */ |
| 5818 | tree->gtFlags |= GTF_EXCEPT; |
| 5819 | |
| 5820 | if (nCSE) |
| 5821 | { |
| 5822 | tree->gtFlags |= GTF_DONT_CSE; |
| 5823 | } |
| 5824 | |
| 5825 | // Store information about it. |
| 5826 | GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType)); |
| 5827 | |
| 5828 | // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it. |
| 5829 | |
| 5830 | GenTree* indTree = tree; |
| 5831 | |
| 5832 | // Did we create a bndsChk tree? |
| 5833 | if (bndsChk) |
| 5834 | { |
| 5835 | // Use a GT_COMMA node to prepend the array bound check |
| 5836 | // |
| 5837 | tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree); |
| 5838 | |
| 5839 | /* Mark the indirection node as needing a range check */ |
| 5840 | fgSetRngChkTarget(bndsChk); |
| 5841 | } |
| 5842 | |
| 5843 | if (indexDefn != nullptr) |
| 5844 | { |
| 5845 | // Use a GT_COMMA node to prepend the index assignment |
| 5846 | // |
| 5847 | tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree); |
| 5848 | } |
| 5849 | if (arrRefDefn != nullptr) |
| 5850 | { |
| 5851 | // Use a GT_COMMA node to prepend the arRef assignment |
| 5852 | // |
| 5853 | tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree); |
| 5854 | } |
| 5855 | |
| 5856 | // Currently we morph the tree to perform some folding operations prior |
| 5857 | // to attaching fieldSeq info and labeling constant array index contributions |
| 5858 | // |
| 5859 | fgMorphTree(tree); |
| 5860 | |
| 5861 | // Ideally we just want to proceed to attaching fieldSeq info and labeling the |
| 5862 | // constant array index contributions, but the morphing operation may have changed |
| 5863 | // the 'tree' into something that now unconditionally throws an exception. |
| 5864 | // |
| 5865 | // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified |
| 5866 | // or it could be left unchanged. If it is unchanged then we should not return, |
| 5867 | // instead we should proceed to attaching fieldSeq info, etc... |
| 5868 | // |
| 5869 | GenTree* arrElem = tree->gtEffectiveVal(); |
| 5870 | |
| 5871 | if (fgIsCommaThrow(tree)) |
| 5872 | { |
| 5873 | if ((arrElem != indTree) || // A new tree node may have been created |
| 5874 | (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT |
| 5875 | { |
| 5876 | return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc.. |
| 5877 | } |
| 5878 | } |
| 5879 | |
| 5880 | assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)); |
| 5881 | |
| 5882 | addr = arrElem->gtOp.gtOp1; |
| 5883 | |
| 5884 | assert(addr->TypeGet() == TYP_BYREF); |
| 5885 | |
| 5886 | GenTree* cnsOff = nullptr; |
| 5887 | if (addr->OperGet() == GT_ADD) |
| 5888 | { |
| 5889 | assert(addr->TypeGet() == TYP_BYREF); |
| 5890 | assert(addr->gtOp.gtOp1->TypeGet() == TYP_REF); |
| 5891 | |
| 5892 | addr = addr->gtOp.gtOp2; |
| 5893 | |
| 5894 | // Look for the constant [#FirstElem] node here, or as the RHS of an ADD. |
| 5895 | |
| 5896 | if (addr->gtOper == GT_CNS_INT) |
| 5897 | { |
| 5898 | cnsOff = addr; |
| 5899 | addr = nullptr; |
| 5900 | } |
| 5901 | else |
| 5902 | { |
| 5903 | if ((addr->OperGet() == GT_ADD) && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)) |
| 5904 | { |
| 5905 | cnsOff = addr->gtOp.gtOp2; |
| 5906 | addr = addr->gtOp.gtOp1; |
| 5907 | } |
| 5908 | |
| 5909 | // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX |
| 5910 | addr->LabelIndex(this); |
| 5911 | } |
| 5912 | } |
| 5913 | else if (addr->OperGet() == GT_CNS_INT) |
| 5914 | { |
| 5915 | cnsOff = addr; |
| 5916 | } |
| 5917 | |
| 5918 | FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField); |
| 5919 | |
| 5920 | if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs)) |
| 5921 | { |
| 5922 | // Assign it the [#FirstElem] field sequence |
| 5923 | // |
| 5924 | cnsOff->gtIntCon.gtFieldSeq = firstElemFseq; |
| 5925 | } |
| 5926 | else // We have folded the first element's offset with the index expression |
| 5927 | { |
| 5928 | // Build the [#ConstantIndex, #FirstElem] field sequence |
| 5929 | // |
| 5930 | FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); |
| 5931 | FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq); |
| 5932 | |
| 5933 | if (cnsOff == nullptr) // It must have folded into a zero offset |
| 5934 | { |
| 5935 | // Record in the general zero-offset map. |
| 5936 | GetZeroOffsetFieldMap()->Set(addr, fieldSeq); |
| 5937 | } |
| 5938 | else |
| 5939 | { |
| 5940 | cnsOff->gtIntCon.gtFieldSeq = fieldSeq; |
| 5941 | } |
| 5942 | } |
| 5943 | |
| 5944 | return tree; |
| 5945 | } |
| 5946 | |
| 5947 | #ifdef _TARGET_X86_ |
| 5948 | /***************************************************************************** |
| 5949 | * |
| 5950 | * Wrap fixed stack arguments for varargs functions to go through varargs |
| 5951 | * cookie to access them, except for the cookie itself. |
| 5952 | * |
| 5953 | * Non-x86 platforms are allowed to access all arguments directly |
| 5954 | * so we don't need this code. |
| 5955 | * |
| 5956 | */ |
| 5957 | GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs) |
| 5958 | { |
| 5959 | /* For the fixed stack arguments of a varargs function, we need to go |
| 5960 | through the varargs cookies to access them, except for the |
| 5961 | cookie itself */ |
| 5962 | |
| 5963 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 5964 | |
| 5965 | if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg) |
| 5966 | { |
| 5967 | // Create a node representing the local pointing to the base of the args |
| 5968 | GenTree* ptrArg = |
| 5969 | gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL), |
| 5970 | gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES + |
| 5971 | lclOffs)); |
| 5972 | |
| 5973 | // Access the argument through the local |
| 5974 | GenTree* tree; |
| 5975 | if (varTypeIsStruct(varType)) |
| 5976 | { |
| 5977 | tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize); |
| 5978 | } |
| 5979 | else |
| 5980 | { |
| 5981 | tree = gtNewOperNode(GT_IND, varType, ptrArg); |
| 5982 | } |
| 5983 | tree->gtFlags |= GTF_IND_TGTANYWHERE; |
| 5984 | |
| 5985 | if (varDsc->lvAddrExposed) |
| 5986 | { |
| 5987 | tree->gtFlags |= GTF_GLOB_REF; |
| 5988 | } |
| 5989 | |
| 5990 | return fgMorphTree(tree); |
| 5991 | } |
| 5992 | |
| 5993 | return NULL; |
| 5994 | } |
| 5995 | #endif |
| 5996 | |
| 5997 | /***************************************************************************** |
| 5998 | * |
| 5999 | * Transform the given GT_LCL_VAR tree for code generation. |
| 6000 | */ |
| 6001 | |
| 6002 | GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph) |
| 6003 | { |
| 6004 | assert(tree->gtOper == GT_LCL_VAR); |
| 6005 | |
| 6006 | unsigned lclNum = tree->gtLclVarCommon.gtLclNum; |
| 6007 | var_types varType = lvaGetRealType(lclNum); |
| 6008 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 6009 | |
| 6010 | if (varDsc->lvAddrExposed) |
| 6011 | { |
| 6012 | tree->gtFlags |= GTF_GLOB_REF; |
| 6013 | } |
| 6014 | |
| 6015 | #ifdef _TARGET_X86_ |
| 6016 | if (info.compIsVarArgs) |
| 6017 | { |
| 6018 | GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0); |
| 6019 | if (newTree != nullptr) |
| 6020 | { |
| 6021 | if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) |
| 6022 | { |
| 6023 | fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType); |
| 6024 | } |
| 6025 | return newTree; |
| 6026 | } |
| 6027 | } |
| 6028 | #endif // _TARGET_X86_ |
| 6029 | |
| 6030 | /* If not during the global morphing phase bail */ |
| 6031 | |
| 6032 | if (!fgGlobalMorph && !forceRemorph) |
| 6033 | { |
| 6034 | return tree; |
| 6035 | } |
| 6036 | |
| 6037 | bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0; |
| 6038 | |
| 6039 | noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr |
| 6040 | |
| 6041 | if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad()) |
| 6042 | { |
| 6043 | #if LOCAL_ASSERTION_PROP |
| 6044 | /* Assertion prop can tell us to omit adding a cast here */ |
| 6045 | if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX) |
| 6046 | { |
| 6047 | return tree; |
| 6048 | } |
| 6049 | #endif |
| 6050 | /* Small-typed arguments and aliased locals are normalized on load. |
| 6051 | Other small-typed locals are normalized on store. |
| 6052 | Also, under the debugger as the debugger could write to the variable. |
| 6053 | If this is one of the former, insert a narrowing cast on the load. |
| 6054 | ie. Convert: var-short --> cast-short(var-int) */ |
| 6055 | |
| 6056 | tree->gtType = TYP_INT; |
| 6057 | fgMorphTreeDone(tree); |
| 6058 | tree = gtNewCastNode(TYP_INT, tree, false, varType); |
| 6059 | fgMorphTreeDone(tree); |
| 6060 | return tree; |
| 6061 | } |
| 6062 | |
| 6063 | return tree; |
| 6064 | } |
| 6065 | |
| 6066 | /***************************************************************************** |
| 6067 | Grab a temp for big offset morphing. |
| 6068 | This method will grab a new temp if no temp of this "type" has been created. |
| 6069 | Or it will return the same cached one if it has been created. |
| 6070 | */ |
| 6071 | unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type) |
| 6072 | { |
| 6073 | unsigned lclNum = fgBigOffsetMorphingTemps[type]; |
| 6074 | |
| 6075 | if (lclNum == BAD_VAR_NUM) |
| 6076 | { |
| 6077 | // We haven't created a temp for this kind of type. Create one now. |
| 6078 | lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing" )); |
| 6079 | fgBigOffsetMorphingTemps[type] = lclNum; |
| 6080 | } |
| 6081 | else |
| 6082 | { |
| 6083 | // We better get the right type. |
| 6084 | noway_assert(lvaTable[lclNum].TypeGet() == type); |
| 6085 | } |
| 6086 | |
| 6087 | noway_assert(lclNum != BAD_VAR_NUM); |
| 6088 | return lclNum; |
| 6089 | } |
| 6090 | |
| 6091 | /***************************************************************************** |
| 6092 | * |
| 6093 | * Transform the given GT_FIELD tree for code generation. |
| 6094 | */ |
| 6095 | |
| 6096 | GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) |
| 6097 | { |
| 6098 | assert(tree->gtOper == GT_FIELD); |
| 6099 | |
| 6100 | CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd; |
| 6101 | unsigned fldOffset = tree->gtField.gtFldOffset; |
| 6102 | GenTree* objRef = tree->gtField.gtFldObj; |
| 6103 | bool fieldMayOverlap = false; |
| 6104 | bool objIsLocal = false; |
| 6105 | |
| 6106 | if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR)) |
| 6107 | { |
| 6108 | // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter. |
| 6109 | // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the |
| 6110 | // simd field rewrites are sensitive to. |
| 6111 | fgMorphImplicitByRefArgs(objRef); |
| 6112 | } |
| 6113 | |
| 6114 | noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) || |
| 6115 | ((tree->gtFlags & GTF_GLOB_REF) != 0)); |
| 6116 | |
| 6117 | if (tree->gtField.gtFldMayOverlap) |
| 6118 | { |
| 6119 | fieldMayOverlap = true; |
| 6120 | // Reset the flag because we may reuse the node. |
| 6121 | tree->gtField.gtFldMayOverlap = false; |
| 6122 | } |
| 6123 | |
| 6124 | #ifdef FEATURE_SIMD |
| 6125 | // if this field belongs to simd struct, translate it to simd instrinsic. |
| 6126 | if (mac == nullptr) |
| 6127 | { |
| 6128 | GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree); |
| 6129 | if (newTree != tree) |
| 6130 | { |
| 6131 | newTree = fgMorphSmpOp(newTree); |
| 6132 | return newTree; |
| 6133 | } |
| 6134 | } |
| 6135 | else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1())) |
| 6136 | { |
| 6137 | GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr(); |
| 6138 | if (lcl != nullptr) |
| 6139 | { |
| 6140 | lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField)); |
| 6141 | } |
| 6142 | } |
| 6143 | #endif |
| 6144 | |
| 6145 | /* Is this an instance data member? */ |
| 6146 | |
| 6147 | if (objRef) |
| 6148 | { |
| 6149 | GenTree* addr; |
| 6150 | objIsLocal = objRef->IsLocal(); |
| 6151 | |
| 6152 | if (tree->gtFlags & GTF_IND_TLS_REF) |
| 6153 | { |
| 6154 | NO_WAY("instance field can not be a TLS ref." ); |
| 6155 | } |
| 6156 | |
| 6157 | /* We'll create the expression "*(objRef + mem_offs)" */ |
| 6158 | |
| 6159 | noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL); |
| 6160 | |
| 6161 | // An optimization for Contextful classes: |
| 6162 | // we unwrap the proxy when we have a 'this reference' |
| 6163 | if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef)) |
| 6164 | { |
| 6165 | objRef = fgUnwrapProxy(objRef); |
| 6166 | } |
| 6167 | |
| 6168 | /* |
| 6169 | Now we have a tree like this: |
| 6170 | |
| 6171 | +--------------------+ |
| 6172 | | GT_FIELD | tree |
| 6173 | +----------+---------+ |
| 6174 | | |
| 6175 | +--------------+-------------+ |
| 6176 | | tree->gtField.gtFldObj | |
| 6177 | +--------------+-------------+ |
| 6178 | |
| 6179 | |
| 6180 | We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): |
| 6181 | |
| 6182 | +--------------------+ |
| 6183 | | GT_IND/GT_OBJ | tree |
| 6184 | +---------+----------+ |
| 6185 | | |
| 6186 | | |
| 6187 | +---------+----------+ |
| 6188 | | GT_ADD | addr |
| 6189 | +---------+----------+ |
| 6190 | | |
| 6191 | / \ |
| 6192 | / \ |
| 6193 | / \ |
| 6194 | +-------------------+ +----------------------+ |
| 6195 | | objRef | | fldOffset | |
| 6196 | | | | (when fldOffset !=0) | |
| 6197 | +-------------------+ +----------------------+ |
| 6198 | |
| 6199 | |
| 6200 | or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): |
| 6201 | |
| 6202 | |
| 6203 | +--------------------+ |
| 6204 | | GT_IND/GT_OBJ | tree |
| 6205 | +----------+---------+ |
| 6206 | | |
| 6207 | +----------+---------+ |
| 6208 | | GT_COMMA | comma2 |
| 6209 | +----------+---------+ |
| 6210 | | |
| 6211 | / \ |
| 6212 | / \ |
| 6213 | / \ |
| 6214 | / \ |
| 6215 | +---------+----------+ +---------+----------+ |
| 6216 | comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr |
| 6217 | +---------+----------+ +---------+----------+ |
| 6218 | | | |
| 6219 | / \ / \ |
| 6220 | / \ / \ |
| 6221 | / \ / \ |
| 6222 | +-----+-----+ +-----+-----+ +---------+ +-----------+ |
| 6223 | asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset | |
| 6224 | +-----+-----+ +-----+-----+ +---------+ +-----------+ |
| 6225 | | | |
| 6226 | / \ | |
| 6227 | / \ | |
| 6228 | / \ | |
| 6229 | +-----+-----+ +-----+-----+ +-----------+ |
| 6230 | | tmpLcl | | objRef | | tmpLcl | |
| 6231 | +-----------+ +-----------+ +-----------+ |
| 6232 | |
| 6233 | |
| 6234 | */ |
| 6235 | |
| 6236 | var_types objRefType = objRef->TypeGet(); |
| 6237 | |
| 6238 | GenTree* comma = nullptr; |
| 6239 | |
| 6240 | // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field, |
| 6241 | // and thus is equivalent to a MACK_Ind with zero offset. |
| 6242 | MorphAddrContext defMAC(MACK_Ind); |
| 6243 | if (mac == nullptr) |
| 6244 | { |
| 6245 | mac = &defMAC; |
| 6246 | } |
| 6247 | |
| 6248 | // This flag is set to enable the "conservative" style of explicit null-check insertion. |
| 6249 | // This means that we insert an explicit null check whenever we create byref by adding a |
| 6250 | // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately |
| 6251 | // dereferenced). The alternative is "aggressive", which would not insert such checks (for |
| 6252 | // small offsets); in this plan, we would transfer some null-checking responsibility to |
| 6253 | // callee's of methods taking byref parameters. They would have to add explicit null checks |
| 6254 | // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in |
| 6255 | // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too |
| 6256 | // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null |
| 6257 | // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs. |
| 6258 | // This is left here to point out how to implement it. |
| 6259 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 6260 | |
| 6261 | #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1 |
| 6262 | |
| 6263 | bool addExplicitNullCheck = false; |
| 6264 | |
| 6265 | // Implicit byref locals are never null. |
| 6266 | if (!((objRef->gtOper == GT_LCL_VAR) && lvaIsImplicitByRefLocal(objRef->gtLclVarCommon.gtLclNum))) |
| 6267 | { |
| 6268 | // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression |
| 6269 | // whose address is being taken is either a local or static variable, whose address is necessarily |
| 6270 | // non-null, or else it is a field dereference, which will do its own bounds checking if necessary. |
| 6271 | if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind)) |
| 6272 | { |
| 6273 | if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)) |
| 6274 | { |
| 6275 | addExplicitNullCheck = true; |
| 6276 | } |
| 6277 | else |
| 6278 | { |
| 6279 | // In R2R mode the field offset for some fields may change when the code |
| 6280 | // is loaded. So we can't rely on a zero offset here to suppress the null check. |
| 6281 | // |
| 6282 | // See GitHub issue #16454. |
| 6283 | bool fieldHasChangeableOffset = false; |
| 6284 | |
| 6285 | #ifdef FEATURE_READYTORUN_COMPILER |
| 6286 | fieldHasChangeableOffset = (tree->gtField.gtFieldLookup.addr != nullptr); |
| 6287 | #endif |
| 6288 | |
| 6289 | #if CONSERVATIVE_NULL_CHECK_BYREF_CREATION |
| 6290 | addExplicitNullCheck = (mac->m_kind == MACK_Addr) && |
| 6291 | ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset); |
| 6292 | #else |
| 6293 | addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr && |
| 6294 | ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset)); |
| 6295 | #endif |
| 6296 | } |
| 6297 | } |
| 6298 | } |
| 6299 | |
| 6300 | if (addExplicitNullCheck) |
| 6301 | { |
| 6302 | #ifdef DEBUG |
| 6303 | if (verbose) |
| 6304 | { |
| 6305 | printf("Before explicit null check morphing:\n" ); |
| 6306 | gtDispTree(tree); |
| 6307 | } |
| 6308 | #endif |
| 6309 | |
| 6310 | // |
| 6311 | // Create the "comma" subtree |
| 6312 | // |
| 6313 | GenTree* asg = nullptr; |
| 6314 | GenTree* nullchk; |
| 6315 | |
| 6316 | unsigned lclNum; |
| 6317 | |
| 6318 | if (objRef->gtOper != GT_LCL_VAR) |
| 6319 | { |
| 6320 | lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet())); |
| 6321 | |
| 6322 | // Create the "asg" node |
| 6323 | asg = gtNewTempAssign(lclNum, objRef); |
| 6324 | } |
| 6325 | else |
| 6326 | { |
| 6327 | lclNum = objRef->gtLclVarCommon.gtLclNum; |
| 6328 | } |
| 6329 | |
| 6330 | // Create the "nullchk" node. |
| 6331 | // Make it TYP_BYTE so we only deference it for 1 byte. |
| 6332 | GenTree* lclVar = gtNewLclvNode(lclNum, objRefType); |
| 6333 | nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr); |
| 6334 | |
| 6335 | nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections |
| 6336 | |
| 6337 | // An indirection will cause a GPF if the address is null. |
| 6338 | nullchk->gtFlags |= GTF_EXCEPT; |
| 6339 | |
| 6340 | compCurBB->bbFlags |= BBF_HAS_NULLCHECK; |
| 6341 | optMethodFlags |= OMF_HAS_NULLCHECK; |
| 6342 | |
| 6343 | if (asg) |
| 6344 | { |
| 6345 | // Create the "comma" node. |
| 6346 | comma = gtNewOperNode(GT_COMMA, |
| 6347 | TYP_VOID, // We don't want to return anything from this "comma" node. |
| 6348 | // Set the type to TYP_VOID, so we can select "cmp" instruction |
| 6349 | // instead of "mov" instruction later on. |
| 6350 | asg, nullchk); |
| 6351 | } |
| 6352 | else |
| 6353 | { |
| 6354 | comma = nullchk; |
| 6355 | } |
| 6356 | |
| 6357 | addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node. |
| 6358 | } |
| 6359 | else if (fldOffset == 0) |
| 6360 | { |
| 6361 | // Generate the "addr" node. |
| 6362 | addr = objRef; |
| 6363 | FieldSeqNode* fieldSeq = |
| 6364 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
| 6365 | GetZeroOffsetFieldMap()->Set(addr, fieldSeq); |
| 6366 | } |
| 6367 | else |
| 6368 | { |
| 6369 | addr = objRef; |
| 6370 | } |
| 6371 | |
| 6372 | #ifdef FEATURE_READYTORUN_COMPILER |
| 6373 | if (tree->gtField.gtFieldLookup.addr != nullptr) |
| 6374 | { |
| 6375 | GenTree* offsetNode = nullptr; |
| 6376 | if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE) |
| 6377 | { |
| 6378 | offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->gtField.gtFieldLookup.addr, |
| 6379 | GTF_ICON_FIELD_HDL, false); |
| 6380 | } |
| 6381 | else |
| 6382 | { |
| 6383 | noway_assert(!"unexpected accessType for R2R field access" ); |
| 6384 | } |
| 6385 | |
| 6386 | var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; |
| 6387 | addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode); |
| 6388 | } |
| 6389 | #endif |
| 6390 | if (fldOffset != 0) |
| 6391 | { |
| 6392 | // Generate the "addr" node. |
| 6393 | /* Add the member offset to the object's address */ |
| 6394 | FieldSeqNode* fieldSeq = |
| 6395 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
| 6396 | addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, |
| 6397 | gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq)); |
| 6398 | } |
| 6399 | |
| 6400 | // Now let's set the "tree" as a GT_IND tree. |
| 6401 | |
| 6402 | tree->SetOper(GT_IND); |
| 6403 | tree->gtOp.gtOp1 = addr; |
| 6404 | |
| 6405 | tree->gtFlags &= (~GTF_EXCEPT | addr->gtFlags); |
| 6406 | tree->SetIndirExceptionFlags(this); |
| 6407 | |
| 6408 | if (addExplicitNullCheck) |
| 6409 | { |
| 6410 | // |
| 6411 | // Create "comma2" node and link it to "tree". |
| 6412 | // |
| 6413 | GenTree* comma2; |
| 6414 | comma2 = gtNewOperNode(GT_COMMA, |
| 6415 | addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node. |
| 6416 | comma, addr); |
| 6417 | tree->gtOp.gtOp1 = comma2; |
| 6418 | } |
| 6419 | |
| 6420 | #ifdef DEBUG |
| 6421 | if (verbose) |
| 6422 | { |
| 6423 | if (addExplicitNullCheck) |
| 6424 | { |
| 6425 | printf("After adding explicit null check:\n" ); |
| 6426 | gtDispTree(tree); |
| 6427 | } |
| 6428 | } |
| 6429 | #endif |
| 6430 | } |
| 6431 | else /* This is a static data member */ |
| 6432 | { |
| 6433 | if (tree->gtFlags & GTF_IND_TLS_REF) |
| 6434 | { |
| 6435 | // Thread Local Storage static field reference |
| 6436 | // |
| 6437 | // Field ref is a TLS 'Thread-Local-Storage' reference |
| 6438 | // |
| 6439 | // Build this tree: IND(*) # |
| 6440 | // | |
| 6441 | // ADD(I_IMPL) |
| 6442 | // / \ |
| 6443 | // / CNS(fldOffset) |
| 6444 | // / |
| 6445 | // / |
| 6446 | // / |
| 6447 | // IND(I_IMPL) == [Base of this DLL's TLS] |
| 6448 | // | |
| 6449 | // ADD(I_IMPL) |
| 6450 | // / \ |
| 6451 | // / CNS(IdValue*4) or MUL |
| 6452 | // / / \ |
| 6453 | // IND(I_IMPL) / CNS(4) |
| 6454 | // | / |
| 6455 | // CNS(TLS_HDL,0x2C) IND |
| 6456 | // | |
| 6457 | // CNS(pIdAddr) |
| 6458 | // |
| 6459 | // # Denotes the orginal node |
| 6460 | // |
| 6461 | void** pIdAddr = nullptr; |
| 6462 | unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr); |
| 6463 | |
| 6464 | // |
| 6465 | // If we can we access the TLS DLL index ID value directly |
| 6466 | // then pIdAddr will be NULL and |
| 6467 | // IdValue will be the actual TLS DLL index ID |
| 6468 | // |
| 6469 | GenTree* dllRef = nullptr; |
| 6470 | if (pIdAddr == nullptr) |
| 6471 | { |
| 6472 | if (IdValue != 0) |
| 6473 | { |
| 6474 | dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL); |
| 6475 | } |
| 6476 | } |
| 6477 | else |
| 6478 | { |
| 6479 | dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_STATIC_HDL, true); |
| 6480 | |
| 6481 | // Next we multiply by 4 |
| 6482 | dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL)); |
| 6483 | } |
| 6484 | |
| 6485 | #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides |
| 6486 | |
| 6487 | // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] |
| 6488 | |
| 6489 | GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL); |
| 6490 | |
| 6491 | // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS |
| 6492 | if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) |
| 6493 | { |
| 6494 | tree->gtFlags &= ~GTF_FLD_INITCLASS; |
| 6495 | tlsRef->gtFlags |= GTF_ICON_INITCLASS; |
| 6496 | } |
| 6497 | |
| 6498 | tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); |
| 6499 | |
| 6500 | if (dllRef != nullptr) |
| 6501 | { |
| 6502 | /* Add the dllRef */ |
| 6503 | tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); |
| 6504 | } |
| 6505 | |
| 6506 | /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */ |
| 6507 | tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); |
| 6508 | |
| 6509 | if (fldOffset != 0) |
| 6510 | { |
| 6511 | FieldSeqNode* fieldSeq = |
| 6512 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
| 6513 | GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq); |
| 6514 | |
| 6515 | /* Add the TLS static field offset to the address */ |
| 6516 | |
| 6517 | tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode); |
| 6518 | } |
| 6519 | |
| 6520 | // Final indirect to get to actual value of TLS static field |
| 6521 | |
| 6522 | tree->SetOper(GT_IND); |
| 6523 | tree->gtOp.gtOp1 = tlsRef; |
| 6524 | |
| 6525 | noway_assert(tree->gtFlags & GTF_IND_TLS_REF); |
| 6526 | } |
| 6527 | else |
| 6528 | { |
| 6529 | // Normal static field reference |
| 6530 | |
| 6531 | // |
| 6532 | // If we can we access the static's address directly |
| 6533 | // then pFldAddr will be NULL and |
| 6534 | // fldAddr will be the actual address of the static field |
| 6535 | // |
| 6536 | void** pFldAddr = nullptr; |
| 6537 | void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr); |
| 6538 | |
| 6539 | if (pFldAddr == nullptr) |
| 6540 | { |
| 6541 | #ifdef _TARGET_64BIT_ |
| 6542 | if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr)) |
| 6543 | { |
| 6544 | // The address is not directly addressible, so force it into a |
| 6545 | // constant, so we handle it properly |
| 6546 | |
| 6547 | GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL); |
| 6548 | addr->gtType = TYP_I_IMPL; |
| 6549 | FieldSeqNode* fieldSeq = |
| 6550 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
| 6551 | addr->gtIntCon.gtFieldSeq = fieldSeq; |
| 6552 | // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS |
| 6553 | if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) |
| 6554 | { |
| 6555 | tree->gtFlags &= ~GTF_FLD_INITCLASS; |
| 6556 | addr->gtFlags |= GTF_ICON_INITCLASS; |
| 6557 | } |
| 6558 | |
| 6559 | tree->SetOper(GT_IND); |
| 6560 | tree->gtOp.gtOp1 = addr; |
| 6561 | |
| 6562 | return fgMorphSmpOp(tree); |
| 6563 | } |
| 6564 | else |
| 6565 | #endif // _TARGET_64BIT_ |
| 6566 | { |
| 6567 | // Only volatile or classinit could be set, and they map over |
| 6568 | noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0); |
| 6569 | static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE); |
| 6570 | static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS); |
| 6571 | tree->SetOper(GT_CLS_VAR); |
| 6572 | tree->gtClsVar.gtClsVarHnd = symHnd; |
| 6573 | FieldSeqNode* fieldSeq = |
| 6574 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
| 6575 | tree->gtClsVar.gtFieldSeq = fieldSeq; |
| 6576 | } |
| 6577 | |
| 6578 | return tree; |
| 6579 | } |
| 6580 | else |
| 6581 | { |
| 6582 | GenTree* addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL); |
| 6583 | |
| 6584 | // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS |
| 6585 | if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) |
| 6586 | { |
| 6587 | tree->gtFlags &= ~GTF_FLD_INITCLASS; |
| 6588 | addr->gtFlags |= GTF_ICON_INITCLASS; |
| 6589 | } |
| 6590 | |
| 6591 | // There are two cases here, either the static is RVA based, |
| 6592 | // in which case the type of the FIELD node is not a GC type |
| 6593 | // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is |
| 6594 | // a GC type and the handle to it is a TYP_BYREF in the GC heap |
| 6595 | // because handles to statics now go into the large object heap |
| 6596 | |
| 6597 | var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL); |
| 6598 | GenTree* op1 = gtNewOperNode(GT_IND, handleTyp, addr); |
| 6599 | op1->gtFlags |= GTF_IND_INVARIANT; |
| 6600 | |
| 6601 | tree->SetOper(GT_IND); |
| 6602 | tree->gtOp.gtOp1 = op1; |
| 6603 | } |
| 6604 | } |
| 6605 | } |
| 6606 | noway_assert(tree->gtOper == GT_IND); |
| 6607 | |
| 6608 | GenTree* res = fgMorphSmpOp(tree); |
| 6609 | |
| 6610 | // If we have a struct type, this node would previously have been under a GT_ADDR, |
| 6611 | // and therefore would have been marked GTF_DONT_CSE. |
| 6612 | // TODO-1stClassStructs: revisit this. |
| 6613 | if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal) |
| 6614 | { |
| 6615 | res->gtFlags |= GTF_DONT_CSE; |
| 6616 | } |
| 6617 | |
| 6618 | if (fldOffset == 0 && res->OperGet() == GT_IND) |
| 6619 | { |
| 6620 | GenTree* addr = res->gtOp.gtOp1; |
| 6621 | // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node. |
| 6622 | FieldSeqNode* fieldSeq = |
| 6623 | fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); |
| 6624 | fgAddFieldSeqForZeroOffset(addr, fieldSeq); |
| 6625 | } |
| 6626 | |
| 6627 | return res; |
| 6628 | } |
| 6629 | |
| 6630 | //------------------------------------------------------------------------------ |
| 6631 | // fgMorphCallInline: attempt to inline a call |
| 6632 | // |
| 6633 | // Arguments: |
| 6634 | // call - call expression to inline, inline candidate |
| 6635 | // inlineResult - result tracking and reporting |
| 6636 | // |
| 6637 | // Notes: |
| 6638 | // Attempts to inline the call. |
| 6639 | // |
| 6640 | // If successful, callee's IR is inserted in place of the call, and |
| 6641 | // is marked with an InlineContext. |
| 6642 | // |
| 6643 | // If unsuccessful, the transformations done in anticipation of a |
| 6644 | // possible inline are undone, and the candidate flag on the call |
| 6645 | // is cleared. |
| 6646 | |
| 6647 | void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) |
| 6648 | { |
| 6649 | bool inliningFailed = false; |
| 6650 | |
| 6651 | // Is this call an inline candidate? |
| 6652 | if (call->IsInlineCandidate()) |
| 6653 | { |
| 6654 | // Attempt the inline |
| 6655 | fgMorphCallInlineHelper(call, inlineResult); |
| 6656 | |
| 6657 | // We should have made up our minds one way or another.... |
| 6658 | assert(inlineResult->IsDecided()); |
| 6659 | |
| 6660 | // If we failed to inline, we have a bit of work to do to cleanup |
| 6661 | if (inlineResult->IsFailure()) |
| 6662 | { |
| 6663 | |
| 6664 | #ifdef DEBUG |
| 6665 | |
| 6666 | // Before we do any cleanup, create a failing InlineContext to |
| 6667 | // capture details of the inlining attempt. |
| 6668 | m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult); |
| 6669 | |
| 6670 | #endif |
| 6671 | |
| 6672 | inliningFailed = true; |
| 6673 | |
| 6674 | // Clear the Inline Candidate flag so we can ensure later we tried |
| 6675 | // inlining all candidates. |
| 6676 | // |
| 6677 | call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE; |
| 6678 | } |
| 6679 | } |
| 6680 | else |
| 6681 | { |
| 6682 | // This wasn't an inline candidate. So it must be a GDV candidate. |
| 6683 | assert(call->IsGuardedDevirtualizationCandidate()); |
| 6684 | |
| 6685 | // We already know we can't inline this call, so don't even bother to try. |
| 6686 | inliningFailed = true; |
| 6687 | } |
| 6688 | |
| 6689 | // If we failed to inline (or didn't even try), do some cleanup. |
| 6690 | if (inliningFailed) |
| 6691 | { |
| 6692 | if (call->gtReturnType != TYP_VOID) |
| 6693 | { |
| 6694 | JITDUMP("Inlining [%06u] failed, so bashing [%06u] to NOP\n" , dspTreeID(call), dspTreeID(fgMorphStmt)); |
| 6695 | |
| 6696 | // Detach the GT_CALL tree from the original statement by |
| 6697 | // hanging a "nothing" node to it. Later the "nothing" node will be removed |
| 6698 | // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. |
| 6699 | |
| 6700 | noway_assert(fgMorphStmt->gtStmtExpr == call); |
| 6701 | fgMorphStmt->gtStmtExpr = gtNewNothingNode(); |
| 6702 | } |
| 6703 | } |
| 6704 | } |
| 6705 | |
| 6706 | /***************************************************************************** |
| 6707 | * Helper to attempt to inline a call |
| 6708 | * Sets success/failure in inline result |
| 6709 | * If success, modifies current method's IR with inlinee's IR |
| 6710 | * If failed, undoes any speculative modifications to current method |
| 6711 | */ |
| 6712 | |
| 6713 | void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) |
| 6714 | { |
| 6715 | // Don't expect any surprises here. |
| 6716 | assert(result->IsCandidate()); |
| 6717 | |
| 6718 | if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING) |
| 6719 | { |
| 6720 | // For now, attributing this to call site, though it's really |
| 6721 | // more of a budget issue (lvaCount currently includes all |
| 6722 | // caller and prospective callee locals). We still might be |
| 6723 | // able to inline other callees into this caller, or inline |
| 6724 | // this callee in other callers. |
| 6725 | result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS); |
| 6726 | return; |
| 6727 | } |
| 6728 | |
| 6729 | if (call->IsVirtual()) |
| 6730 | { |
| 6731 | result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL); |
| 6732 | return; |
| 6733 | } |
| 6734 | |
| 6735 | // Re-check this because guarded devirtualization may allow these through. |
| 6736 | if (gtIsRecursiveCall(call) && call->IsImplicitTailCall()) |
| 6737 | { |
| 6738 | result->NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL); |
| 6739 | return; |
| 6740 | } |
| 6741 | |
| 6742 | // impMarkInlineCandidate() is expected not to mark tail prefixed calls |
| 6743 | // and recursive tail calls as inline candidates. |
| 6744 | noway_assert(!call->IsTailPrefixedCall()); |
| 6745 | noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call)); |
| 6746 | |
| 6747 | /* If the caller's stack frame is marked, then we can't do any inlining. Period. |
| 6748 | Although we have checked this in impCanInline, it is possible that later IL instructions |
| 6749 | might cause compNeedSecurityCheck to be set. Therefore we need to check it here again. |
| 6750 | */ |
| 6751 | |
| 6752 | if (opts.compNeedSecurityCheck) |
| 6753 | { |
| 6754 | result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK); |
| 6755 | return; |
| 6756 | } |
| 6757 | |
| 6758 | // |
| 6759 | // Calling inlinee's compiler to inline the method. |
| 6760 | // |
| 6761 | |
| 6762 | unsigned startVars = lvaCount; |
| 6763 | |
| 6764 | #ifdef DEBUG |
| 6765 | if (verbose) |
| 6766 | { |
| 6767 | printf("Expanding INLINE_CANDIDATE in statement " ); |
| 6768 | printTreeID(fgMorphStmt); |
| 6769 | printf(" in " FMT_BB ":\n" , compCurBB->bbNum); |
| 6770 | gtDispTree(fgMorphStmt); |
| 6771 | if (call->IsImplicitTailCall()) |
| 6772 | { |
| 6773 | printf("Note: candidate is implicit tail call\n" ); |
| 6774 | } |
| 6775 | } |
| 6776 | #endif |
| 6777 | |
| 6778 | impInlineRoot()->m_inlineStrategy->NoteAttempt(result); |
| 6779 | |
| 6780 | // |
| 6781 | // Invoke the compiler to inline the call. |
| 6782 | // |
| 6783 | |
| 6784 | fgInvokeInlineeCompiler(call, result); |
| 6785 | |
| 6786 | if (result->IsFailure()) |
| 6787 | { |
| 6788 | // Undo some changes made in anticipation of inlining... |
| 6789 | |
| 6790 | // Zero out the used locals |
| 6791 | memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable)); |
| 6792 | for (unsigned i = startVars; i < lvaCount; i++) |
| 6793 | { |
| 6794 | new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(); // call the constructor. |
| 6795 | } |
| 6796 | |
| 6797 | lvaCount = startVars; |
| 6798 | |
| 6799 | #ifdef DEBUG |
| 6800 | if (verbose) |
| 6801 | { |
| 6802 | // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount); |
| 6803 | } |
| 6804 | #endif |
| 6805 | |
| 6806 | return; |
| 6807 | } |
| 6808 | |
| 6809 | #ifdef DEBUG |
| 6810 | if (verbose) |
| 6811 | { |
| 6812 | // printf("After inlining lvaCount=%d.\n", lvaCount); |
| 6813 | } |
| 6814 | #endif |
| 6815 | } |
| 6816 | |
| 6817 | //------------------------------------------------------------------------ |
| 6818 | // fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp. |
| 6819 | // |
| 6820 | // Arguments: |
| 6821 | // callee - The callee to check |
| 6822 | // |
| 6823 | // Return Value: |
| 6824 | // Returns true or false based on whether the callee can be fastTailCalled |
| 6825 | // |
| 6826 | // Notes: |
| 6827 | // This function is target specific and each target will make the fastTailCall |
| 6828 | // decision differently. See the notes below. |
| 6829 | // |
| 6830 | // |
| 6831 | // Windows Amd64: |
| 6832 | // A fast tail call can be made whenever the number of callee arguments |
| 6833 | // is larger than or equal to the number of caller arguments, or we have four |
| 6834 | // or fewer callee arguments. This is because, on Windows AMD64, each |
| 6835 | // argument uses exactly one register or one 8-byte stack slot. Thus, we only |
| 6836 | // need to count arguments, and not be concerned with the size of each |
| 6837 | // incoming or outgoing argument. |
| 6838 | // |
| 6839 | // Can fast tail call examples (amd64 Windows): |
| 6840 | // |
| 6841 | // -- Callee will have all register arguments -- |
| 6842 | // caller(int, int, int, int) |
| 6843 | // callee(int, int, float, int) |
| 6844 | // |
| 6845 | // -- Callee requires stack space that is equal to the caller -- |
| 6846 | // caller(struct, struct, struct, struct, struct, struct) |
| 6847 | // callee(int, int, int, int, int, int) |
| 6848 | // |
| 6849 | // -- Callee requires stack space that is less than the caller -- |
| 6850 | // caller(struct, double, struct, float, struct, struct) |
| 6851 | // callee(int, int, int, int, int) |
| 6852 | // |
| 6853 | // -- Callee will have all register arguments -- |
| 6854 | // caller(int) |
| 6855 | // callee(int, int, int, int) |
| 6856 | // |
| 6857 | // Cannot fast tail call examples (amd64 Windows): |
| 6858 | // |
| 6859 | // -- Callee requires stack space that is larger than the caller -- |
| 6860 | // caller(struct, double, struct, float, struct, struct) |
| 6861 | // callee(int, int, int, int, int, double, double, double) |
| 6862 | // |
| 6863 | // Unix Amd64 && Arm64: |
| 6864 | // A fastTailCall decision can be made whenever the callee's stack space is |
| 6865 | // less than or equal to the caller's stack space. There are many permutations |
| 6866 | // of when the caller and callee have different stack sizes if there are |
| 6867 | // structs being passed to either the caller or callee. |
| 6868 | // |
| 6869 | // Exceptions: |
| 6870 | // 1) If the callee has structs which cannot be enregistered it will be |
| 6871 | // reported as cannot fast tail call. This is an implementation limitation |
| 6872 | // where the callee only is checked for non enregisterable structs. This is |
| 6873 | // tracked with https://github.com/dotnet/coreclr/issues/12644. |
| 6874 | // |
| 6875 | // 2) If the caller or callee has stack arguments and the callee has more |
| 6876 | // arguments then the caller it will be reported as cannot fast tail call. |
| 6877 | // This is due to a bug in LowerFastTailCall which assumes that |
| 6878 | // nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This |
| 6879 | // is tracked with https://github.com/dotnet/coreclr/issues/12468. |
| 6880 | // |
| 6881 | // 3) If the callee has a 9 to 16 byte struct argument and the callee has |
| 6882 | // stack arguments, the decision will be to not fast tail call. This is |
| 6883 | // because before fgMorphArgs is done, the struct is unknown whether it |
| 6884 | // will be placed on the stack or enregistered. Therefore, the conservative |
| 6885 | // decision of do not fast tail call is taken. This limitations should be |
| 6886 | // removed if/when fgMorphArgs no longer depends on fgCanFastTailCall. |
| 6887 | // |
| 6888 | // 4) Arm64 Only, if there are HFA arguments and the callee has stack |
| 6889 | // arguments, the decision will be reported as cannot fast tail call. |
| 6890 | // This is because before fgMorphArgs is done, the struct is unknown whether it |
| 6891 | // will be placed on the stack or enregistered. Therefore, the conservative |
| 6892 | // decision of do not fast tail call is taken. |
| 6893 | // |
| 6894 | // Can fast tail call examples (amd64 Unix): |
| 6895 | // |
| 6896 | // -- Callee will have all register arguments -- |
| 6897 | // caller(int, int, int, int) |
| 6898 | // callee(int, int, float, int) |
| 6899 | // |
| 6900 | // -- Callee requires stack space that is equal to the caller -- |
| 6901 | // caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack |
| 6902 | // space |
| 6903 | // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space |
| 6904 | // |
| 6905 | // -- Callee requires stack space that is less than the caller -- |
| 6906 | // caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack |
| 6907 | // space |
| 6908 | // callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space |
| 6909 | // |
| 6910 | // -- Callee will have all register arguments -- |
| 6911 | // caller(int) |
| 6912 | // callee(int, int, int, int) |
| 6913 | // |
| 6914 | // Cannot fast tail call examples (amd64 Unix): |
| 6915 | // |
| 6916 | // -- Callee requires stack space that is larger than the caller -- |
| 6917 | // caller(float, float, float, float, float, float, float, float) -- 8 float register arguments |
| 6918 | // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space |
| 6919 | // |
| 6920 | // -- Callee has structs which cannot be enregistered (Implementation Limitation) -- |
| 6921 | // caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register |
| 6922 | // arguments, 24 byte stack space |
| 6923 | // callee({ double, double, double }) -- 24 bytes stack space |
| 6924 | // |
| 6925 | // -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) -- |
| 6926 | // caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space |
| 6927 | // callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space |
| 6928 | // |
| 6929 | // -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) -- |
| 6930 | // caller({ double, double, double, double, double, double }) // 48 byte stack |
| 6931 | // callee(int, int) -- 2 int registers |
| 6932 | |
| 6933 | bool Compiler::fgCanFastTailCall(GenTreeCall* callee) |
| 6934 | { |
| 6935 | #if FEATURE_FASTTAILCALL |
| 6936 | // To reach here means that the return types of the caller and callee are tail call compatible. |
| 6937 | // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type. |
| 6938 | // |
| 6939 | // In an implicit tail call case callSig may not be available but it is guaranteed to be available |
| 6940 | // for explicit tail call cases. The reason implicit tail case callSig may not be available is that |
| 6941 | // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case |
| 6942 | // fgInline() will replace return value place holder with call node using gtCloneExpr() which is |
| 6943 | // currently not copying/setting callSig. |
| 6944 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 6945 | |
| 6946 | #ifdef DEBUG |
| 6947 | if (callee->IsTailPrefixedCall()) |
| 6948 | { |
| 6949 | assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass, |
| 6950 | (var_types)callee->gtReturnType, callee->callSig->retTypeClass)); |
| 6951 | } |
| 6952 | #endif |
| 6953 | |
| 6954 | auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) { |
| 6955 | #if DEBUG |
| 6956 | if ((JitConfig.JitReportFastTailCallDecisions()) == 1) |
| 6957 | { |
| 6958 | if (callee->gtCallType != CT_INDIRECT) |
| 6959 | { |
| 6960 | const char* methodName; |
| 6961 | |
| 6962 | methodName = eeGetMethodFullName(callee->gtCallMethHnd); |
| 6963 | |
| 6964 | printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: " , |
| 6965 | info.compFullName, methodName); |
| 6966 | } |
| 6967 | else |
| 6968 | { |
| 6969 | printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- " |
| 6970 | "Decision: " , |
| 6971 | info.compFullName); |
| 6972 | } |
| 6973 | |
| 6974 | if (callerStackSize != -1) |
| 6975 | { |
| 6976 | printf("%s (CallerStackSize: %d, CalleeStackSize: %d)\n\n" , msg, callerStackSize, calleeStackSize); |
| 6977 | } |
| 6978 | else |
| 6979 | { |
| 6980 | printf("%s\n\n" , msg); |
| 6981 | } |
| 6982 | } |
| 6983 | else |
| 6984 | { |
| 6985 | JITDUMP("[Fast tailcall decision]: %s\n" , msg); |
| 6986 | } |
| 6987 | #else |
| 6988 | (void)this; |
| 6989 | (void)callee; |
| 6990 | #endif // DEBUG |
| 6991 | }; |
| 6992 | |
| 6993 | // Note on vararg methods: |
| 6994 | // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. |
| 6995 | // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its |
| 6996 | // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as |
| 6997 | // out-going area required for callee is bounded by caller's fixed argument space. |
| 6998 | // |
| 6999 | // Note that callee being a vararg method is not a problem since we can account the params being passed. |
| 7000 | unsigned nCallerArgs = info.compArgsCount; |
| 7001 | |
| 7002 | size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount; |
| 7003 | size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount; |
| 7004 | |
| 7005 | // Count the callee args including implicit and hidden. |
| 7006 | // Note that GenericContext and VarargCookie are added by importer while |
| 7007 | // importing the call to gtCallArgs list along with explicit user args. |
| 7008 | size_t calleeArgRegCount = 0; |
| 7009 | size_t calleeFloatArgRegCount = 0; |
| 7010 | |
| 7011 | if (callee->gtCallObjp) // thisPtr |
| 7012 | { |
| 7013 | ++calleeArgRegCount; |
| 7014 | } |
| 7015 | |
| 7016 | if (callee->HasRetBufArg()) // RetBuf |
| 7017 | { |
| 7018 | // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs. |
| 7019 | |
| 7020 | // If callee has RetBuf param, caller too must have it. |
| 7021 | // Otherwise go the slow route. |
| 7022 | if (info.compRetBuffArg == BAD_VAR_NUM) |
| 7023 | { |
| 7024 | reportFastTailCallDecision("Callee has RetBuf but caller does not." , 0, 0); |
| 7025 | return false; |
| 7026 | } |
| 7027 | } |
| 7028 | |
| 7029 | // Count user args while tracking whether any of them is a multi-byte params |
| 7030 | // that cannot be passed in a register. Note that we don't need to count |
| 7031 | // non-standard and secret params passed in registers (e.g. R10, R11) since |
| 7032 | // these won't contribute to out-going arg size. |
| 7033 | // For each struct arg, hasMultiByteStackArgs will track if it can be passed in registers. |
| 7034 | // If it cannot we will break the loop and not fastTailCall. This is an implementation limitation |
| 7035 | // where the callee only is checked for non enregisterable structs. |
| 7036 | // It is tracked with https://github.com/dotnet/coreclr/issues/12644. |
| 7037 | bool hasMultiByteStackArgs = false; |
| 7038 | bool hasTwoSlotSizedStruct = false; |
| 7039 | bool hasHfaArg = false; |
| 7040 | size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have. |
| 7041 | size_t calleeStackSize = 0; |
| 7042 | for (GenTree* args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) |
| 7043 | { |
| 7044 | ++nCalleeArgs; |
| 7045 | assert(args->OperIsList()); |
| 7046 | GenTree* argx = args->gtOp.gtOp1; |
| 7047 | |
| 7048 | if (varTypeIsStruct(argx)) |
| 7049 | { |
| 7050 | // Actual arg may be a child of a GT_COMMA. Skip over comma opers. |
| 7051 | argx = argx->gtEffectiveVal(true /*commaOnly*/); |
| 7052 | |
| 7053 | // Get the size of the struct and see if it is register passable. |
| 7054 | CORINFO_CLASS_HANDLE objClass = nullptr; |
| 7055 | |
| 7056 | if (argx->OperGet() == GT_OBJ) |
| 7057 | { |
| 7058 | objClass = argx->AsObj()->gtClass; |
| 7059 | } |
| 7060 | else if (argx->IsLocal()) |
| 7061 | { |
| 7062 | objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle(); |
| 7063 | } |
| 7064 | if (objClass != nullptr) |
| 7065 | { |
| 7066 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) |
| 7067 | |
| 7068 | unsigned typeSize = 0; |
| 7069 | // We should have already broken out of the loop if we've set hasMultiByteStackArgs to true. |
| 7070 | assert(!hasMultiByteStackArgs); |
| 7071 | hasMultiByteStackArgs = |
| 7072 | !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false, false); |
| 7073 | |
| 7074 | #if defined(UNIX_AMD64_ABI) |
| 7075 | SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; |
| 7076 | |
| 7077 | assert(objClass != nullptr); |
| 7078 | eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); |
| 7079 | |
| 7080 | if (structDesc.passedInRegisters) |
| 7081 | { |
| 7082 | if (structDesc.eightByteCount == 2) |
| 7083 | { |
| 7084 | hasTwoSlotSizedStruct = true; |
| 7085 | } |
| 7086 | |
| 7087 | for (unsigned int i = 0; i < structDesc.eightByteCount; i++) |
| 7088 | { |
| 7089 | if (structDesc.IsIntegralSlot(i)) |
| 7090 | { |
| 7091 | ++calleeArgRegCount; |
| 7092 | } |
| 7093 | else if (structDesc.IsSseSlot(i)) |
| 7094 | { |
| 7095 | ++calleeFloatArgRegCount; |
| 7096 | } |
| 7097 | else |
| 7098 | { |
| 7099 | assert(false && "Invalid eightbyte classification type." ); |
| 7100 | break; |
| 7101 | } |
| 7102 | } |
| 7103 | } |
| 7104 | else |
| 7105 | { |
| 7106 | calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE); |
| 7107 | hasMultiByteStackArgs = true; |
| 7108 | } |
| 7109 | |
| 7110 | #elif defined(_TARGET_ARM64_) // ARM64 |
| 7111 | var_types hfaType = GetHfaType(argx); |
| 7112 | bool isHfaArg = varTypeIsFloating(hfaType); |
| 7113 | size_t size = 1; |
| 7114 | |
| 7115 | if (isHfaArg) |
| 7116 | { |
| 7117 | hasHfaArg = true; |
| 7118 | |
| 7119 | calleeFloatArgRegCount += GetHfaCount(argx); |
| 7120 | } |
| 7121 | else |
| 7122 | { |
| 7123 | // Structs are either passed in 1 or 2 (64-bit) slots |
| 7124 | size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE); |
| 7125 | size = roundupSize / TARGET_POINTER_SIZE; |
| 7126 | |
| 7127 | if (size > 2) |
| 7128 | { |
| 7129 | size = 1; |
| 7130 | } |
| 7131 | |
| 7132 | else if (size == 2) |
| 7133 | { |
| 7134 | hasTwoSlotSizedStruct = true; |
| 7135 | } |
| 7136 | |
| 7137 | calleeArgRegCount += size; |
| 7138 | } |
| 7139 | |
| 7140 | #elif defined(WINDOWS_AMD64_ABI) |
| 7141 | |
| 7142 | ++calleeArgRegCount; |
| 7143 | |
| 7144 | #endif // UNIX_AMD64_ABI |
| 7145 | |
| 7146 | #else |
| 7147 | assert(!"Target platform ABI rules regarding passing struct type args in registers" ); |
| 7148 | unreached(); |
| 7149 | #endif //_TARGET_AMD64_ || _TARGET_ARM64_ |
| 7150 | } |
| 7151 | else |
| 7152 | { |
| 7153 | hasMultiByteStackArgs = true; |
| 7154 | } |
| 7155 | } |
| 7156 | else |
| 7157 | { |
| 7158 | varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount; |
| 7159 | } |
| 7160 | |
| 7161 | // We can break early on multiByte cases. |
| 7162 | if (hasMultiByteStackArgs) |
| 7163 | { |
| 7164 | break; |
| 7165 | } |
| 7166 | } |
| 7167 | |
| 7168 | const unsigned maxRegArgs = MAX_REG_ARG; |
| 7169 | |
| 7170 | // If we reached here means that callee has only those argument types which can be passed in |
| 7171 | // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. |
| 7172 | // If we are passing args on stack for the callee and it has more args passed on stack than |
| 7173 | // the caller, then fast tail call cannot be performed. |
| 7174 | // |
| 7175 | // Note that the GC'ness of on stack args need not match since the arg setup area is marked |
| 7176 | // as non-interruptible for fast tail calls. |
| 7177 | |
| 7178 | #ifdef WINDOWS_AMD64_ABI |
| 7179 | assert(calleeStackSize == 0); |
| 7180 | size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs) |
| 7181 | ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs |
| 7182 | : 0; |
| 7183 | calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE; |
| 7184 | size_t callerStackSize = info.compArgStackSize; |
| 7185 | |
| 7186 | bool hasStackArgs = false; |
| 7187 | |
| 7188 | if (callerStackSize > 0 || calleeStackSize > 0) |
| 7189 | { |
| 7190 | hasStackArgs = true; |
| 7191 | } |
| 7192 | |
| 7193 | // Go the slow route, if it has multi-byte params. This is an implementation |
| 7194 | // limitatio see https://github.com/dotnet/coreclr/issues/12644. |
| 7195 | if (hasMultiByteStackArgs) |
| 7196 | { |
| 7197 | reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs" , callerStackSize, calleeStackSize); |
| 7198 | return false; |
| 7199 | } |
| 7200 | |
| 7201 | // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then |
| 7202 | // make sure the callee's incoming arguments is less than the caller's |
| 7203 | if (hasStackArgs && (nCalleeArgs > nCallerArgs)) |
| 7204 | { |
| 7205 | reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)" , callerStackSize, |
| 7206 | calleeStackSize); |
| 7207 | return false; |
| 7208 | } |
| 7209 | |
| 7210 | #elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
| 7211 | |
| 7212 | // For *nix Amd64 and Arm64 check to see if all arguments for the callee |
| 7213 | // and caller are passing in registers. If not, ensure that the outgoing argument stack size |
| 7214 | // requirement for the callee is less than or equal to the caller's entire stack frame usage. |
| 7215 | // |
| 7216 | // Also, in the case that we have to pass arguments on the stack make sure |
| 7217 | // that we are not dealing with structs that are >8 bytes. |
| 7218 | |
| 7219 | bool hasStackArgs = false; |
| 7220 | size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG; |
| 7221 | |
| 7222 | size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0; |
| 7223 | size_t calleeFloatStackArgCount = |
| 7224 | calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0; |
| 7225 | |
| 7226 | size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount; |
| 7227 | size_t callerStackSize = info.compArgStackSize; |
| 7228 | calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE; |
| 7229 | |
| 7230 | if (callerStackSize > 0 || calleeStackSize > 0) |
| 7231 | { |
| 7232 | hasStackArgs = true; |
| 7233 | } |
| 7234 | |
| 7235 | // Go the slow route, if it has multi-byte params. This is an implementation |
| 7236 | // limitation see https://github.com/dotnet/coreclr/issues/12644. |
| 7237 | if (hasMultiByteStackArgs) |
| 7238 | { |
| 7239 | reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs" , callerStackSize, calleeStackSize); |
| 7240 | return false; |
| 7241 | } |
| 7242 | |
| 7243 | // Callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not fastTailCall. |
| 7244 | if (calleeStackSize > 0 && hasTwoSlotSizedStruct) |
| 7245 | { |
| 7246 | reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct" , |
| 7247 | callerStackSize, calleeStackSize); |
| 7248 | return false; |
| 7249 | } |
| 7250 | |
| 7251 | // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall. |
| 7252 | if (calleeStackSize > 0 && hasHfaArg) |
| 7253 | { |
| 7254 | reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg" , callerStackSize, |
| 7255 | calleeStackSize); |
| 7256 | return false; |
| 7257 | } |
| 7258 | |
| 7259 | // TODO-AMD64-Unix |
| 7260 | // TODO-ARM64 |
| 7261 | // |
| 7262 | // LowerFastTailCall currently assumes nCalleeArgs <= nCallerArgs. This is |
| 7263 | // not true in many cases on x64 linux, remove this pessimization when |
| 7264 | // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468 |
| 7265 | // for more information. |
| 7266 | if (hasStackArgs && (nCalleeArgs > nCallerArgs)) |
| 7267 | { |
| 7268 | reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)" , callerStackSize, |
| 7269 | calleeStackSize); |
| 7270 | return false; |
| 7271 | } |
| 7272 | |
| 7273 | if (calleeStackSize > callerStackSize) |
| 7274 | { |
| 7275 | reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize" , callerStackSize, |
| 7276 | calleeStackSize); |
| 7277 | return false; |
| 7278 | } |
| 7279 | |
| 7280 | #else |
| 7281 | |
| 7282 | NYI("fastTailCall not supported on this Architecture." ); |
| 7283 | |
| 7284 | #endif // WINDOWS_AMD64_ABI |
| 7285 | |
| 7286 | reportFastTailCallDecision("Will fastTailCall" , callerStackSize, calleeStackSize); |
| 7287 | return true; |
| 7288 | #else // FEATURE_FASTTAILCALL |
| 7289 | return false; |
| 7290 | #endif |
| 7291 | } |
| 7292 | |
| 7293 | /***************************************************************************** |
| 7294 | * |
| 7295 | * Transform the given GT_CALL tree for tail call code generation. |
| 7296 | */ |
| 7297 | void Compiler::fgMorphTailCall(GenTreeCall* call, void* pfnCopyArgs) |
| 7298 | { |
| 7299 | JITDUMP("fgMorphTailCall (before):\n" ); |
| 7300 | DISPTREE(call); |
| 7301 | |
| 7302 | // The runtime requires that we perform a null check on the `this` argument before |
| 7303 | // tail calling to a virtual dispatch stub. This requirement is a consequence of limitations |
| 7304 | // in the runtime's ability to map an AV to a NullReferenceException if |
| 7305 | // the AV occurs in a dispatch stub that has unmanaged caller. |
| 7306 | if (call->IsVirtualStub()) |
| 7307 | { |
| 7308 | call->gtFlags |= GTF_CALL_NULLCHECK; |
| 7309 | } |
| 7310 | |
| 7311 | #if defined(_TARGET_ARM_) |
| 7312 | // For the helper-assisted tail calls, we need to push all the arguments |
| 7313 | // into a single list, and then add a few extra at the beginning |
| 7314 | |
| 7315 | // Check for PInvoke call types that we don't handle in codegen yet. |
| 7316 | assert(!call->IsUnmanaged()); |
| 7317 | assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL)); |
| 7318 | |
| 7319 | // First move the this pointer (if any) onto the regular arg list |
| 7320 | GenTree* thisPtr = NULL; |
| 7321 | if (call->gtCallObjp) |
| 7322 | { |
| 7323 | GenTree* objp = call->gtCallObjp; |
| 7324 | call->gtCallObjp = NULL; |
| 7325 | |
| 7326 | if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable()) |
| 7327 | { |
| 7328 | thisPtr = gtClone(objp, true); |
| 7329 | var_types vt = objp->TypeGet(); |
| 7330 | if (thisPtr == NULL) |
| 7331 | { |
| 7332 | // Too complex, so use a temp |
| 7333 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr" )); |
| 7334 | GenTree* asg = gtNewTempAssign(lclNum, objp); |
| 7335 | if (!call->IsVirtualVtable()) |
| 7336 | { |
| 7337 | // Add an indirection to get the nullcheck |
| 7338 | GenTree* tmp = gtNewLclvNode(lclNum, vt); |
| 7339 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp); |
| 7340 | asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind); |
| 7341 | } |
| 7342 | objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); |
| 7343 | thisPtr = gtNewLclvNode(lclNum, vt); |
| 7344 | } |
| 7345 | else if (!call->IsVirtualVtable()) |
| 7346 | { |
| 7347 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr); |
| 7348 | objp = gtNewOperNode(GT_COMMA, vt, ind, objp); |
| 7349 | thisPtr = gtClone(thisPtr, true); |
| 7350 | } |
| 7351 | |
| 7352 | call->gtFlags &= ~GTF_CALL_NULLCHECK; |
| 7353 | } |
| 7354 | |
| 7355 | call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs); |
| 7356 | } |
| 7357 | |
| 7358 | // Add the extra VSD parameter if needed |
| 7359 | if (call->IsVirtualStub()) |
| 7360 | { |
| 7361 | GenTree* stubAddrArg = fgGetStubAddrArg(call); |
| 7362 | // And push the stub address onto the list of arguments |
| 7363 | call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs); |
| 7364 | } |
| 7365 | else if (call->IsVirtualVtable()) |
| 7366 | { |
| 7367 | noway_assert(thisPtr != NULL); |
| 7368 | |
| 7369 | GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL)); |
| 7370 | GenTree* vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); |
| 7371 | vtbl->gtFlags |= GTF_EXCEPT; |
| 7372 | |
| 7373 | unsigned vtabOffsOfIndirection; |
| 7374 | unsigned vtabOffsAfterIndirection; |
| 7375 | bool isRelative; |
| 7376 | info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection, |
| 7377 | &isRelative); |
| 7378 | |
| 7379 | /* Get the appropriate vtable chunk */ |
| 7380 | |
| 7381 | if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK) |
| 7382 | { |
| 7383 | add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL)); |
| 7384 | |
| 7385 | GenTree* indOffTree = nullptr; |
| 7386 | |
| 7387 | if (isRelative) |
| 7388 | { |
| 7389 | indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, |
| 7390 | nullptr DEBUGARG("virtual table call" )); |
| 7391 | } |
| 7392 | |
| 7393 | vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); |
| 7394 | |
| 7395 | if (isRelative) |
| 7396 | { |
| 7397 | vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree); |
| 7398 | } |
| 7399 | } |
| 7400 | |
| 7401 | /* Now the appropriate vtable slot */ |
| 7402 | |
| 7403 | add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL)); |
| 7404 | |
| 7405 | GenTree* indOffTree = nullptr; |
| 7406 | |
| 7407 | if (isRelative) |
| 7408 | { |
| 7409 | indOffTree = impCloneExpr(add, &add, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, |
| 7410 | nullptr DEBUGARG("virtual table call 2" )); |
| 7411 | } |
| 7412 | |
| 7413 | vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); |
| 7414 | |
| 7415 | if (isRelative) |
| 7416 | { |
| 7417 | vtbl = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, indOffTree); |
| 7418 | } |
| 7419 | |
| 7420 | // Switch this to a plain indirect call |
| 7421 | call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; |
| 7422 | assert(!call->IsVirtual()); |
| 7423 | call->gtCallType = CT_INDIRECT; |
| 7424 | |
| 7425 | call->gtCallAddr = vtbl; |
| 7426 | call->gtCallCookie = NULL; |
| 7427 | call->gtFlags |= GTF_EXCEPT; |
| 7428 | } |
| 7429 | |
| 7430 | // Now inject a placeholder for the real call target that codegen will generate |
| 7431 | GenTree* arg = gtNewIconNode(0, TYP_I_IMPL); |
| 7432 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
| 7433 | |
| 7434 | // Lastly inject the pointer for the copy routine |
| 7435 | noway_assert(pfnCopyArgs != nullptr); |
| 7436 | arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); |
| 7437 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
| 7438 | |
| 7439 | // It is now a varargs tail call |
| 7440 | call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; |
| 7441 | call->gtFlags &= ~GTF_CALL_POP_ARGS; |
| 7442 | |
| 7443 | #elif defined(_TARGET_XARCH_) |
| 7444 | |
| 7445 | // For the helper-assisted tail calls, we need to push all the arguments |
| 7446 | // into a single list, and then add a few extra at the beginning or end. |
| 7447 | // |
| 7448 | // For AMD64, the tailcall helper (JIT_TailCall) is defined as: |
| 7449 | // |
| 7450 | // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>) |
| 7451 | // |
| 7452 | // We need to add "copyRoutine" and "callTarget" extra params at the beginning. |
| 7453 | // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg |
| 7454 | // for callTarget here which will be replaced later with callTarget in tail call lowering. |
| 7455 | // |
| 7456 | // For x86, the tailcall helper is defined as: |
| 7457 | // |
| 7458 | // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* |
| 7459 | // callTarget) |
| 7460 | // |
| 7461 | // Note that the special arguments are on the stack, whereas the function arguments follow |
| 7462 | // the normal convention: there might be register arguments in ECX and EDX. The stack will |
| 7463 | // look like (highest address at the top): |
| 7464 | // first normal stack argument |
| 7465 | // ... |
| 7466 | // last normal stack argument |
| 7467 | // numberOfOldStackArgs |
| 7468 | // numberOfNewStackArgs |
| 7469 | // flags |
| 7470 | // callTarget |
| 7471 | // |
| 7472 | // Each special arg is 4 bytes. |
| 7473 | // |
| 7474 | // 'flags' is a bitmask where: |
| 7475 | // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all |
| 7476 | // callee-saved registers for tailcall functions. Note that the helper assumes |
| 7477 | // that the callee-saved registers live immediately below EBP, and must have been |
| 7478 | // pushed in this order: EDI, ESI, EBX. |
| 7479 | // 2 == call target is a virtual stub dispatch. |
| 7480 | // |
| 7481 | // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details |
| 7482 | // on the custom calling convention. |
| 7483 | |
| 7484 | // Check for PInvoke call types that we don't handle in codegen yet. |
| 7485 | assert(!call->IsUnmanaged()); |
| 7486 | assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr)); |
| 7487 | |
| 7488 | // Don't support tail calling helper methods |
| 7489 | assert(call->gtCallType != CT_HELPER); |
| 7490 | |
| 7491 | // We come this route only for tail prefixed calls that cannot be dispatched as |
| 7492 | // fast tail calls |
| 7493 | assert(!call->IsImplicitTailCall()); |
| 7494 | assert(!fgCanFastTailCall(call)); |
| 7495 | |
| 7496 | // First move the 'this' pointer (if any) onto the regular arg list. We do this because |
| 7497 | // we are going to prepend special arguments onto the argument list (for non-x86 platforms), |
| 7498 | // and thus shift where the 'this' pointer will be passed to a later argument slot. In |
| 7499 | // addition, for all platforms, we are going to change the call into a helper call. Our code |
| 7500 | // generation code for handling calls to helpers does not handle 'this' pointers. So, when we |
| 7501 | // do this transformation, we must explicitly create a null 'this' pointer check, if required, |
| 7502 | // since special 'this' pointer handling will no longer kick in. |
| 7503 | // |
| 7504 | // Some call types, such as virtual vtable calls, require creating a call address expression |
| 7505 | // that involves the "this" pointer. Lowering will sometimes create an embedded statement |
| 7506 | // to create a temporary that is assigned to the "this" pointer expression, and then use |
| 7507 | // that temp to create the call address expression. This temp creation embedded statement |
| 7508 | // will occur immediately before the "this" pointer argument, and then will be used for both |
| 7509 | // the "this" pointer argument as well as the call address expression. In the normal ordering, |
| 7510 | // the embedded statement establishing the "this" pointer temp will execute before both uses |
| 7511 | // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the |
| 7512 | // normal call argument list, and insert a placeholder which will hold the call address |
| 7513 | // expression. For non-x86, things are ok, because the order of execution of these is not |
| 7514 | // altered. However, for x86, the call address expression is inserted as the *last* argument |
| 7515 | // in the argument list, *after* the "this" pointer. It will be put on the stack, and be |
| 7516 | // evaluated first. To ensure we don't end up with out-of-order temp definition and use, |
| 7517 | // for those cases where call lowering creates an embedded form temp of "this", we will |
| 7518 | // create a temp here, early, that will later get morphed correctly. |
| 7519 | |
| 7520 | if (call->gtCallObjp) |
| 7521 | { |
| 7522 | GenTree* thisPtr = nullptr; |
| 7523 | GenTree* objp = call->gtCallObjp; |
| 7524 | call->gtCallObjp = nullptr; |
| 7525 | |
| 7526 | #ifdef _TARGET_X86_ |
| 7527 | if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal()) |
| 7528 | { |
| 7529 | // tmp = "this" |
| 7530 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr" )); |
| 7531 | GenTree* asg = gtNewTempAssign(lclNum, objp); |
| 7532 | |
| 7533 | // COMMA(tmp = "this", tmp) |
| 7534 | var_types vt = objp->TypeGet(); |
| 7535 | GenTree* tmp = gtNewLclvNode(lclNum, vt); |
| 7536 | thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp); |
| 7537 | |
| 7538 | objp = thisPtr; |
| 7539 | } |
| 7540 | #endif // _TARGET_X86_ |
| 7541 | |
| 7542 | if (call->NeedsNullCheck()) |
| 7543 | { |
| 7544 | // clone "this" if "this" has no side effects. |
| 7545 | if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT)) |
| 7546 | { |
| 7547 | thisPtr = gtClone(objp, true); |
| 7548 | } |
| 7549 | |
| 7550 | var_types vt = objp->TypeGet(); |
| 7551 | if (thisPtr == nullptr) |
| 7552 | { |
| 7553 | // create a temp if either "this" has side effects or "this" is too complex to clone. |
| 7554 | |
| 7555 | // tmp = "this" |
| 7556 | unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr" )); |
| 7557 | GenTree* asg = gtNewTempAssign(lclNum, objp); |
| 7558 | |
| 7559 | // COMMA(tmp = "this", deref(tmp)) |
| 7560 | GenTree* tmp = gtNewLclvNode(lclNum, vt); |
| 7561 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, tmp); |
| 7562 | asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind); |
| 7563 | |
| 7564 | // COMMA(COMMA(tmp = "this", deref(tmp)), tmp) |
| 7565 | thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); |
| 7566 | } |
| 7567 | else |
| 7568 | { |
| 7569 | // thisPtr = COMMA(deref("this"), "this") |
| 7570 | GenTree* ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr); |
| 7571 | thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true)); |
| 7572 | } |
| 7573 | |
| 7574 | call->gtFlags &= ~GTF_CALL_NULLCHECK; |
| 7575 | } |
| 7576 | else |
| 7577 | { |
| 7578 | thisPtr = objp; |
| 7579 | } |
| 7580 | |
| 7581 | // During rationalization tmp="this" and null check will |
| 7582 | // materialize as embedded stmts in right execution order. |
| 7583 | assert(thisPtr != nullptr); |
| 7584 | call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs); |
| 7585 | } |
| 7586 | |
| 7587 | #if defined(_TARGET_AMD64_) |
| 7588 | |
| 7589 | // Add the extra VSD parameter to arg list in case of VSD calls. |
| 7590 | // Tail call arg copying thunk will move this extra VSD parameter |
| 7591 | // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk() |
| 7592 | // in Stublinkerx86.cpp for more details. |
| 7593 | if (call->IsVirtualStub()) |
| 7594 | { |
| 7595 | GenTree* stubAddrArg = fgGetStubAddrArg(call); |
| 7596 | // And push the stub address onto the list of arguments |
| 7597 | call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs); |
| 7598 | } |
| 7599 | |
| 7600 | // Now inject a placeholder for the real call target that Lower phase will generate. |
| 7601 | GenTree* arg = gtNewIconNode(0, TYP_I_IMPL); |
| 7602 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
| 7603 | |
| 7604 | // Inject the pointer for the copy routine to be used for struct copying |
| 7605 | noway_assert(pfnCopyArgs != nullptr); |
| 7606 | arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); |
| 7607 | call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); |
| 7608 | |
| 7609 | #else // !_TARGET_AMD64_ |
| 7610 | |
| 7611 | // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will |
| 7612 | // append to the list. |
| 7613 | GenTreeArgList** ppArg = &call->gtCallArgs; |
| 7614 | for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest()) |
| 7615 | { |
| 7616 | ppArg = (GenTreeArgList**)&args->gtOp2; |
| 7617 | } |
| 7618 | assert(ppArg != nullptr); |
| 7619 | assert(*ppArg == nullptr); |
| 7620 | |
| 7621 | unsigned nOldStkArgsWords = |
| 7622 | (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; |
| 7623 | GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); |
| 7624 | *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs |
| 7625 | ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); |
| 7626 | |
| 7627 | // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. |
| 7628 | // The constant will be replaced. |
| 7629 | GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL); |
| 7630 | *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs |
| 7631 | ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); |
| 7632 | |
| 7633 | // Inject a placeholder for the flags. |
| 7634 | // The constant will be replaced. |
| 7635 | GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL); |
| 7636 | *ppArg = gtNewListNode(arg1, nullptr); |
| 7637 | ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); |
| 7638 | |
| 7639 | // Inject a placeholder for the real call target that the Lowering phase will generate. |
| 7640 | // The constant will be replaced. |
| 7641 | GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL); |
| 7642 | *ppArg = gtNewListNode(arg0, nullptr); |
| 7643 | |
| 7644 | #endif // !_TARGET_AMD64_ |
| 7645 | |
| 7646 | // It is now a varargs tail call dispatched via helper. |
| 7647 | call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; |
| 7648 | call->gtFlags &= ~GTF_CALL_POP_ARGS; |
| 7649 | |
| 7650 | #elif defined(_TARGET_ARM64_) |
| 7651 | NYI_ARM64("Tail calls via stub are unsupported on this platform." ); |
| 7652 | #endif // _TARGET_ARM64_ |
| 7653 | |
| 7654 | // The function is responsible for doing explicit null check when it is necessary. |
| 7655 | assert(!call->NeedsNullCheck()); |
| 7656 | |
| 7657 | JITDUMP("fgMorphTailCall (after):\n" ); |
| 7658 | DISPTREE(call); |
| 7659 | } |
| 7660 | |
| 7661 | //------------------------------------------------------------------------ |
| 7662 | // fgGetStubAddrArg: Return the virtual stub address for the given call. |
| 7663 | // |
| 7664 | // Notes: |
| 7665 | // the JIT must place the address of the stub used to load the call target, |
| 7666 | // the "stub indirection cell", in special call argument with special register. |
| 7667 | // |
| 7668 | // Arguments: |
| 7669 | // call - a call that needs virtual stub dispatching. |
| 7670 | // |
| 7671 | // Return Value: |
| 7672 | // addr tree with set resister requirements. |
| 7673 | // |
| 7674 | GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) |
| 7675 | { |
| 7676 | assert(call->IsVirtualStub()); |
| 7677 | GenTree* stubAddrArg; |
| 7678 | if (call->gtCallType == CT_INDIRECT) |
| 7679 | { |
| 7680 | stubAddrArg = gtClone(call->gtCallAddr, true); |
| 7681 | } |
| 7682 | else |
| 7683 | { |
| 7684 | assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT); |
| 7685 | ssize_t addr = ssize_t(call->gtStubCallStubAddr); |
| 7686 | stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); |
| 7687 | } |
| 7688 | assert(stubAddrArg != nullptr); |
| 7689 | stubAddrArg->gtRegNum = virtualStubParamInfo->GetReg(); |
| 7690 | return stubAddrArg; |
| 7691 | } |
| 7692 | |
| 7693 | //------------------------------------------------------------------------------ |
| 7694 | // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop. |
| 7695 | // |
| 7696 | // |
| 7697 | // Arguments: |
| 7698 | // block - basic block ending with a recursive fast tail call |
| 7699 | // recursiveTailCall - recursive tail call to transform |
| 7700 | // |
| 7701 | // Notes: |
| 7702 | // The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop. |
| 7703 | |
| 7704 | void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall) |
| 7705 | { |
| 7706 | assert(recursiveTailCall->IsTailCallConvertibleToLoop()); |
| 7707 | GenTree* last = block->lastStmt(); |
| 7708 | assert(recursiveTailCall == last->gtStmt.gtStmtExpr); |
| 7709 | |
| 7710 | // Transform recursive tail call into a loop. |
| 7711 | |
| 7712 | GenTree* earlyArgInsertionPoint = last; |
| 7713 | IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx; |
| 7714 | |
| 7715 | // Hoist arg setup statement for the 'this' argument. |
| 7716 | GenTree* thisArg = recursiveTailCall->gtCallObjp; |
| 7717 | if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode()) |
| 7718 | { |
| 7719 | GenTree* thisArgStmt = gtNewStmt(thisArg, callILOffset); |
| 7720 | fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt); |
| 7721 | } |
| 7722 | |
| 7723 | // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first; |
| 7724 | // then the temps need to be assigned to the method parameters. This is done so that the caller |
| 7725 | // parameters are not re-assigned before call arguments depending on them are evaluated. |
| 7726 | // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of |
| 7727 | // where the next temp or parameter assignment should be inserted. |
| 7728 | |
| 7729 | // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first |
| 7730 | // while the second call argument (const 1) doesn't. |
| 7731 | // Basic block before tail recursion elimination: |
| 7732 | // ***** BB04, stmt 1 (top level) |
| 7733 | // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013) |
| 7734 | // [000033] --C - G------ - \--* call void RecursiveMethod |
| 7735 | // [000030] ------------ | / --* const int - 1 |
| 7736 | // [000031] ------------arg0 in rcx + --* +int |
| 7737 | // [000029] ------------ | \--* lclVar int V00 arg1 |
| 7738 | // [000032] ------------arg1 in rdx \--* const int 1 |
| 7739 | // |
| 7740 | // |
| 7741 | // Basic block after tail recursion elimination : |
| 7742 | // ***** BB04, stmt 1 (top level) |
| 7743 | // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) |
| 7744 | // [000030] ------------ | / --* const int - 1 |
| 7745 | // [000031] ------------ | / --* +int |
| 7746 | // [000029] ------------ | | \--* lclVar int V00 arg1 |
| 7747 | // [000050] - A---------- \--* = int |
| 7748 | // [000049] D------N---- \--* lclVar int V02 tmp0 |
| 7749 | // |
| 7750 | // ***** BB04, stmt 2 (top level) |
| 7751 | // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) |
| 7752 | // [000052] ------------ | / --* lclVar int V02 tmp0 |
| 7753 | // [000054] - A---------- \--* = int |
| 7754 | // [000053] D------N---- \--* lclVar int V00 arg0 |
| 7755 | |
| 7756 | // ***** BB04, stmt 3 (top level) |
| 7757 | // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) |
| 7758 | // [000032] ------------ | / --* const int 1 |
| 7759 | // [000057] - A---------- \--* = int |
| 7760 | // [000056] D------N---- \--* lclVar int V01 arg1 |
| 7761 | |
| 7762 | GenTree* tmpAssignmentInsertionPoint = last; |
| 7763 | GenTree* paramAssignmentInsertionPoint = last; |
| 7764 | |
| 7765 | // Process early args. They may contain both setup statements for late args and actual args. |
| 7766 | // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum |
| 7767 | // below has the correct second argument. |
| 7768 | int earlyArgIndex = (thisArg == nullptr) ? 0 : 1; |
| 7769 | for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr; |
| 7770 | (earlyArgIndex++, earlyArgs = earlyArgs->Rest())) |
| 7771 | { |
| 7772 | GenTree* earlyArg = earlyArgs->Current(); |
| 7773 | if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode()) |
| 7774 | { |
| 7775 | if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0) |
| 7776 | { |
| 7777 | // This is a setup node so we need to hoist it. |
| 7778 | GenTree* earlyArgStmt = gtNewStmt(earlyArg, callILOffset); |
| 7779 | fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt); |
| 7780 | } |
| 7781 | else |
| 7782 | { |
| 7783 | // This is an actual argument that needs to be assigned to the corresponding caller parameter. |
| 7784 | fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); |
| 7785 | GenTree* paramAssignStmt = |
| 7786 | fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset, |
| 7787 | tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); |
| 7788 | if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr)) |
| 7789 | { |
| 7790 | // All temp assignments will happen before the first param assignment. |
| 7791 | tmpAssignmentInsertionPoint = paramAssignStmt; |
| 7792 | } |
| 7793 | } |
| 7794 | } |
| 7795 | } |
| 7796 | |
| 7797 | // Process late args. |
| 7798 | int lateArgIndex = 0; |
| 7799 | for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr; |
| 7800 | (lateArgIndex++, lateArgs = lateArgs->Rest())) |
| 7801 | { |
| 7802 | // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. |
| 7803 | GenTree* lateArg = lateArgs->Current(); |
| 7804 | fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); |
| 7805 | GenTree* paramAssignStmt = |
| 7806 | fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset, |
| 7807 | tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); |
| 7808 | |
| 7809 | if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr)) |
| 7810 | { |
| 7811 | // All temp assignments will happen before the first param assignment. |
| 7812 | tmpAssignmentInsertionPoint = paramAssignStmt; |
| 7813 | } |
| 7814 | } |
| 7815 | |
| 7816 | // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that |
| 7817 | // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that |
| 7818 | // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here. |
| 7819 | if (!info.compIsStatic && (lvaArg0Var != info.compThisArg)) |
| 7820 | { |
| 7821 | var_types thisType = lvaTable[info.compThisArg].TypeGet(); |
| 7822 | GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType); |
| 7823 | GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType)); |
| 7824 | GenTree* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset); |
| 7825 | fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt); |
| 7826 | } |
| 7827 | |
| 7828 | // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog |
| 7829 | // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization |
| 7830 | // for all non-parameter IL locals as well as temp structs with GC fields. |
| 7831 | // Liveness phase will remove unnecessary initializations. |
| 7832 | if (info.compInitMem) |
| 7833 | { |
| 7834 | unsigned varNum; |
| 7835 | LclVarDsc* varDsc; |
| 7836 | for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) |
| 7837 | { |
| 7838 | #if FEATURE_FIXED_OUT_ARGS |
| 7839 | if (varNum == lvaOutgoingArgSpaceVar) |
| 7840 | { |
| 7841 | continue; |
| 7842 | } |
| 7843 | #endif // FEATURE_FIXED_OUT_ARGS |
| 7844 | if (!varDsc->lvIsParam) |
| 7845 | { |
| 7846 | var_types lclType = varDsc->TypeGet(); |
| 7847 | bool isUserLocal = (varNum < info.compLocalsCount); |
| 7848 | bool structWithGCFields = ((lclType == TYP_STRUCT) && (varDsc->lvStructGcCount > 0)); |
| 7849 | if (isUserLocal || structWithGCFields) |
| 7850 | { |
| 7851 | GenTree* lcl = gtNewLclvNode(varNum, lclType); |
| 7852 | GenTree* init = nullptr; |
| 7853 | if (varTypeIsStruct(lclType)) |
| 7854 | { |
| 7855 | const bool isVolatile = false; |
| 7856 | const bool isCopyBlock = false; |
| 7857 | init = gtNewBlkOpNode(lcl, gtNewIconNode(0), varDsc->lvSize(), isVolatile, isCopyBlock); |
| 7858 | init = fgMorphInitBlock(init); |
| 7859 | } |
| 7860 | else |
| 7861 | { |
| 7862 | GenTree* zero = gtNewZeroConNode(genActualType(lclType)); |
| 7863 | init = gtNewAssignNode(lcl, zero); |
| 7864 | } |
| 7865 | GenTree* initStmt = gtNewStmt(init, callILOffset); |
| 7866 | fgInsertStmtBefore(block, last, initStmt); |
| 7867 | } |
| 7868 | } |
| 7869 | } |
| 7870 | } |
| 7871 | |
| 7872 | // Remove the call |
| 7873 | fgRemoveStmt(block, last); |
| 7874 | |
| 7875 | // Set the loop edge. Ensure we have a scratch block and then target the |
| 7876 | // next block. Loop detection needs to see a pred out of the loop, so |
| 7877 | // mark the scratch block BBF_DONT_REMOVE to prevent empty block removal |
| 7878 | // on it. |
| 7879 | fgEnsureFirstBBisScratch(); |
| 7880 | fgFirstBB->bbFlags |= BBF_DONT_REMOVE; |
| 7881 | block->bbJumpKind = BBJ_ALWAYS; |
| 7882 | block->bbJumpDest = fgFirstBB->bbNext; |
| 7883 | fgAddRefPred(block->bbJumpDest, block); |
| 7884 | block->bbFlags &= ~BBF_HAS_JMP; |
| 7885 | } |
| 7886 | |
| 7887 | //------------------------------------------------------------------------------ |
| 7888 | // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter. |
| 7889 | // |
| 7890 | // |
| 7891 | // Arguments: |
| 7892 | // arg - argument to assign |
| 7893 | // argTabEntry - argument table entry corresponding to arg |
| 7894 | // block --- basic block the call is in |
| 7895 | // callILOffset - IL offset of the call |
| 7896 | // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary) |
| 7897 | // paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted |
| 7898 | // |
| 7899 | // Return Value: |
| 7900 | // parameter assignment statement if one was inserted; nullptr otherwise. |
| 7901 | |
| 7902 | GenTree* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, |
| 7903 | fgArgTabEntry* argTabEntry, |
| 7904 | BasicBlock* block, |
| 7905 | IL_OFFSETX callILOffset, |
| 7906 | GenTree* tmpAssignmentInsertionPoint, |
| 7907 | GenTree* paramAssignmentInsertionPoint) |
| 7908 | { |
| 7909 | // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because |
| 7910 | // some argument trees may reference parameters directly. |
| 7911 | |
| 7912 | GenTree* argInTemp = nullptr; |
| 7913 | unsigned originalArgNum = argTabEntry->argNum; |
| 7914 | bool needToAssignParameter = true; |
| 7915 | |
| 7916 | // TODO-CQ: enable calls with struct arguments passed in registers. |
| 7917 | noway_assert(!varTypeIsStruct(arg->TypeGet())); |
| 7918 | |
| 7919 | if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) |
| 7920 | { |
| 7921 | // The argument is already assigned to a temp or is a const. |
| 7922 | argInTemp = arg; |
| 7923 | } |
| 7924 | else if (arg->OperGet() == GT_LCL_VAR) |
| 7925 | { |
| 7926 | unsigned lclNum = arg->AsLclVar()->gtLclNum; |
| 7927 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 7928 | if (!varDsc->lvIsParam) |
| 7929 | { |
| 7930 | // The argument is a non-parameter local so it doesn't need to be assigned to a temp. |
| 7931 | argInTemp = arg; |
| 7932 | } |
| 7933 | else if (lclNum == originalArgNum) |
| 7934 | { |
| 7935 | // The argument is the same parameter local that we were about to assign so |
| 7936 | // we can skip the assignment. |
| 7937 | needToAssignParameter = false; |
| 7938 | } |
| 7939 | } |
| 7940 | |
| 7941 | // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve |
| 7942 | // any caller parameters. Some common cases are handled above but we may be able to eliminate |
| 7943 | // more temp assignments. |
| 7944 | |
| 7945 | GenTree* paramAssignStmt = nullptr; |
| 7946 | if (needToAssignParameter) |
| 7947 | { |
| 7948 | if (argInTemp == nullptr) |
| 7949 | { |
| 7950 | // The argument is not assigned to a temp. We need to create a new temp and insert an assignment. |
| 7951 | // TODO: we can avoid a temp assignment if we can prove that the argument tree |
| 7952 | // doesn't involve any caller parameters. |
| 7953 | unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp" )); |
| 7954 | lvaTable[tmpNum].lvType = arg->gtType; |
| 7955 | GenTree* tempSrc = arg; |
| 7956 | GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType); |
| 7957 | GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc); |
| 7958 | GenTree* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset); |
| 7959 | fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt); |
| 7960 | argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType); |
| 7961 | } |
| 7962 | |
| 7963 | // Now assign the temp to the parameter. |
| 7964 | LclVarDsc* paramDsc = lvaTable + originalArgNum; |
| 7965 | assert(paramDsc->lvIsParam); |
| 7966 | GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType); |
| 7967 | GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp); |
| 7968 | paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset); |
| 7969 | |
| 7970 | fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt); |
| 7971 | } |
| 7972 | return paramAssignStmt; |
| 7973 | } |
| 7974 | |
| 7975 | /***************************************************************************** |
| 7976 | * |
| 7977 | * Transform the given GT_CALL tree for code generation. |
| 7978 | */ |
| 7979 | |
| 7980 | GenTree* Compiler::fgMorphCall(GenTreeCall* call) |
| 7981 | { |
| 7982 | if (varTypeIsStruct(call)) |
| 7983 | { |
| 7984 | fgFixupStructReturn(call); |
| 7985 | } |
| 7986 | if (call->CanTailCall()) |
| 7987 | { |
| 7988 | // It should either be an explicit (i.e. tail prefixed) or an implicit tail call |
| 7989 | assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall()); |
| 7990 | |
| 7991 | // It cannot be an inline candidate |
| 7992 | assert(!call->IsInlineCandidate()); |
| 7993 | |
| 7994 | const char* szFailReason = nullptr; |
| 7995 | bool hasStructParam = false; |
| 7996 | if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) |
| 7997 | { |
| 7998 | szFailReason = "Might turn into an intrinsic" ; |
| 7999 | } |
| 8000 | |
| 8001 | if (opts.compNeedSecurityCheck) |
| 8002 | { |
| 8003 | szFailReason = "Needs security check" ; |
| 8004 | } |
| 8005 | else if (compLocallocUsed || compLocallocOptimized) |
| 8006 | { |
| 8007 | szFailReason = "Localloc used" ; |
| 8008 | } |
| 8009 | #ifdef _TARGET_AMD64_ |
| 8010 | // Needed for Jit64 compat. |
| 8011 | // In future, enabling tail calls from methods that need GS cookie check |
| 8012 | // would require codegen side work to emit GS cookie check before a tail |
| 8013 | // call. |
| 8014 | else if (getNeedsGSSecurityCookie()) |
| 8015 | { |
| 8016 | szFailReason = "GS Security cookie check" ; |
| 8017 | } |
| 8018 | #endif |
| 8019 | #ifdef DEBUG |
| 8020 | // DDB 99324: Just disable tailcall under compGcChecks stress mode. |
| 8021 | else if (opts.compGcChecks) |
| 8022 | { |
| 8023 | szFailReason = "GcChecks" ; |
| 8024 | } |
| 8025 | #endif |
| 8026 | #if FEATURE_TAILCALL_OPT |
| 8027 | else |
| 8028 | { |
| 8029 | // We are still not sure whether it can be a tail call. Because, when converting |
| 8030 | // a call to an implicit tail call, we must check that there are no locals with |
| 8031 | // their address taken. If this is the case, we have to assume that the address |
| 8032 | // has been leaked and the current stack frame must live until after the final |
| 8033 | // call. |
| 8034 | |
| 8035 | // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note |
| 8036 | // that lvHasLdAddrOp is much more conservative. We cannot just base it on |
| 8037 | // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs |
| 8038 | // during morph stage. The reason for also checking lvAddrExposed is that in case |
| 8039 | // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp. |
| 8040 | // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us |
| 8041 | // never to be incorrect. |
| 8042 | // |
| 8043 | // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose |
| 8044 | // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed |
| 8045 | // is set. This avoids the need for iterating through all lcl vars of the current |
| 8046 | // method. Right now throughout the code base we are not consistently using 'set' |
| 8047 | // method to set lvHasLdAddrOp and lvAddrExposed flags. |
| 8048 | unsigned varNum; |
| 8049 | LclVarDsc* varDsc; |
| 8050 | bool hasAddrExposedVars = false; |
| 8051 | bool hasStructPromotedParam = false; |
| 8052 | bool hasPinnedVars = false; |
| 8053 | |
| 8054 | for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) |
| 8055 | { |
| 8056 | // If the method is marked as an explicit tail call we will skip the |
| 8057 | // following three hazard checks. |
| 8058 | // We still must check for any struct parameters and set 'hasStructParam' |
| 8059 | // so that we won't transform the recursive tail call into a loop. |
| 8060 | // |
| 8061 | if (call->IsImplicitTailCall()) |
| 8062 | { |
| 8063 | if (varDsc->lvHasLdAddrOp) |
| 8064 | { |
| 8065 | hasAddrExposedVars = true; |
| 8066 | break; |
| 8067 | } |
| 8068 | if (varDsc->lvAddrExposed) |
| 8069 | { |
| 8070 | if (lvaIsImplicitByRefLocal(varNum)) |
| 8071 | { |
| 8072 | // The address of the implicit-byref is a non-address use of the pointer parameter. |
| 8073 | } |
| 8074 | else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl)) |
| 8075 | { |
| 8076 | // The address of the implicit-byref's field is likewise a non-address use of the pointer |
| 8077 | // parameter. |
| 8078 | } |
| 8079 | else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum)) |
| 8080 | { |
| 8081 | // This temp was used for struct promotion bookkeeping. It will not be used, and will have |
| 8082 | // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs. |
| 8083 | assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl)); |
| 8084 | assert(fgGlobalMorph); |
| 8085 | } |
| 8086 | else |
| 8087 | { |
| 8088 | hasAddrExposedVars = true; |
| 8089 | break; |
| 8090 | } |
| 8091 | } |
| 8092 | if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum)) |
| 8093 | { |
| 8094 | hasStructPromotedParam = true; |
| 8095 | break; |
| 8096 | } |
| 8097 | if (varDsc->lvPinned) |
| 8098 | { |
| 8099 | // A tail call removes the method from the stack, which means the pinning |
| 8100 | // goes away for the callee. We can't allow that. |
| 8101 | hasPinnedVars = true; |
| 8102 | break; |
| 8103 | } |
| 8104 | } |
| 8105 | if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam) |
| 8106 | { |
| 8107 | hasStructParam = true; |
| 8108 | // This prevents transforming a recursive tail call into a loop |
| 8109 | // but doesn't prevent tail call optimization so we need to |
| 8110 | // look at the rest of parameters. |
| 8111 | continue; |
| 8112 | } |
| 8113 | } |
| 8114 | |
| 8115 | if (hasAddrExposedVars) |
| 8116 | { |
| 8117 | szFailReason = "Local address taken" ; |
| 8118 | } |
| 8119 | if (hasStructPromotedParam) |
| 8120 | { |
| 8121 | szFailReason = "Has Struct Promoted Param" ; |
| 8122 | } |
| 8123 | if (hasPinnedVars) |
| 8124 | { |
| 8125 | szFailReason = "Has Pinned Vars" ; |
| 8126 | } |
| 8127 | } |
| 8128 | #endif // FEATURE_TAILCALL_OPT |
| 8129 | |
| 8130 | var_types callType = call->TypeGet(); |
| 8131 | |
| 8132 | // We have to ensure to pass the incoming retValBuf as the |
| 8133 | // outgoing one. Using a temp will not do as this function will |
| 8134 | // not regain control to do the copy. |
| 8135 | |
| 8136 | if (info.compRetBuffArg != BAD_VAR_NUM) |
| 8137 | { |
| 8138 | noway_assert(callType == TYP_VOID); |
| 8139 | GenTree* retValBuf = call->gtCallArgs->gtOp.gtOp1; |
| 8140 | if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg) |
| 8141 | { |
| 8142 | szFailReason = "Need to copy return buffer" ; |
| 8143 | } |
| 8144 | } |
| 8145 | |
| 8146 | // If this is an opportunistic tail call and cannot be dispatched as |
| 8147 | // fast tail call, go the non-tail call route. This is done for perf |
| 8148 | // reason. |
| 8149 | // |
| 8150 | // Avoid the cost of determining whether can be dispatched as fast tail |
| 8151 | // call if we already know that tail call cannot be honored for other |
| 8152 | // reasons. |
| 8153 | bool canFastTailCall = false; |
| 8154 | if (szFailReason == nullptr) |
| 8155 | { |
| 8156 | canFastTailCall = fgCanFastTailCall(call); |
| 8157 | if (!canFastTailCall) |
| 8158 | { |
| 8159 | // Implicit or opportunistic tail calls are always dispatched via fast tail call |
| 8160 | // mechanism and never via tail call helper for perf. |
| 8161 | if (call->IsImplicitTailCall()) |
| 8162 | { |
| 8163 | szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp" ; |
| 8164 | } |
| 8165 | else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this)) |
| 8166 | { |
| 8167 | // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be |
| 8168 | // dispatched as a fast tail call. |
| 8169 | |
| 8170 | // Methods with non-standard args will have indirection cell or cookie param passed |
| 8171 | // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before |
| 8172 | // tail calling the target method and hence ".tail" prefix on such calls needs to be |
| 8173 | // ignored. |
| 8174 | // |
| 8175 | // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require |
| 8176 | // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper. |
| 8177 | // This is done by by adding stubAddr as an additional arg before the original list of |
| 8178 | // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk() |
| 8179 | // in Stublinkerx86.cpp. |
| 8180 | szFailReason = "Method with non-standard args passed in callee trash register cannot be tail " |
| 8181 | "called via helper" ; |
| 8182 | } |
| 8183 | #ifdef _TARGET_ARM64_ |
| 8184 | else |
| 8185 | { |
| 8186 | // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER. |
| 8187 | // So, bail out if we can't make fast tail call. |
| 8188 | szFailReason = "Non-qualified fast tail call" ; |
| 8189 | } |
| 8190 | #endif |
| 8191 | } |
| 8192 | } |
| 8193 | |
| 8194 | // Clear these flags before calling fgMorphCall() to avoid recursion. |
| 8195 | bool isTailPrefixed = call->IsTailPrefixedCall(); |
| 8196 | call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; |
| 8197 | |
| 8198 | #if FEATURE_TAILCALL_OPT |
| 8199 | call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; |
| 8200 | #endif |
| 8201 | |
| 8202 | if (szFailReason == nullptr) |
| 8203 | { |
| 8204 | if (!fgCheckStmtAfterTailCall()) |
| 8205 | { |
| 8206 | szFailReason = "Unexpected statements after the tail call" ; |
| 8207 | } |
| 8208 | } |
| 8209 | |
| 8210 | void* pfnCopyArgs = nullptr; |
| 8211 | #if !defined(_TARGET_X86_) |
| 8212 | if (!canFastTailCall && szFailReason == nullptr) |
| 8213 | { |
| 8214 | pfnCopyArgs = |
| 8215 | info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, call->IsVirtualStub() |
| 8216 | ? CORINFO_TAILCALL_STUB_DISPATCH_ARG |
| 8217 | : CORINFO_TAILCALL_NORMAL); |
| 8218 | if (pfnCopyArgs == nullptr) |
| 8219 | { |
| 8220 | if (!info.compMatchedVM) |
| 8221 | { |
| 8222 | // If we don't have a matched VM, we won't get valid results when asking for a thunk. |
| 8223 | pfnCopyArgs = UlongToPtr(0xCA11CA11); // "callcall" |
| 8224 | } |
| 8225 | else |
| 8226 | { |
| 8227 | szFailReason = "TailCallCopyArgsThunk not available." ; |
| 8228 | } |
| 8229 | } |
| 8230 | } |
| 8231 | #endif // !_TARGET_X86_ |
| 8232 | |
| 8233 | if (szFailReason != nullptr) |
| 8234 | { |
| 8235 | #ifdef DEBUG |
| 8236 | if (verbose) |
| 8237 | { |
| 8238 | printf("\nRejecting tail call late for call " ); |
| 8239 | printTreeID(call); |
| 8240 | printf(": %s\n" , szFailReason); |
| 8241 | } |
| 8242 | #endif |
| 8243 | |
| 8244 | // for non user funcs, we have no handles to report |
| 8245 | info.compCompHnd->reportTailCallDecision(nullptr, |
| 8246 | (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, |
| 8247 | isTailPrefixed, TAILCALL_FAIL, szFailReason); |
| 8248 | |
| 8249 | goto NO_TAIL_CALL; |
| 8250 | } |
| 8251 | |
| 8252 | #if !FEATURE_TAILCALL_OPT_SHARED_RETURN |
| 8253 | // We enable shared-ret tail call optimization for recursive calls even if |
| 8254 | // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. |
| 8255 | if (gtIsRecursiveCall(call)) |
| 8256 | #endif |
| 8257 | { |
| 8258 | // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN, |
| 8259 | // but if the call falls through to a ret, and we are doing a tailcall, change it here. |
| 8260 | if (compCurBB->bbJumpKind != BBJ_RETURN) |
| 8261 | { |
| 8262 | compCurBB->bbJumpKind = BBJ_RETURN; |
| 8263 | } |
| 8264 | } |
| 8265 | |
| 8266 | // Set this flag before calling fgMorphCall() to prevent inlining this call. |
| 8267 | call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; |
| 8268 | |
| 8269 | bool fastTailCallToLoop = false; |
| 8270 | #if FEATURE_TAILCALL_OPT |
| 8271 | // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register |
| 8272 | // or return type is a struct that can be passed in a register. |
| 8273 | // |
| 8274 | // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through |
| 8275 | // hidden generic context param or through keep alive thisptr), then while transforming a recursive |
| 8276 | // call to such a method requires that the generic context stored on stack slot be updated. Right now, |
| 8277 | // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming |
| 8278 | // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the |
| 8279 | // generic type parameters of both caller and callee generic method are the same. |
| 8280 | if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() && |
| 8281 | !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && |
| 8282 | !varTypeIsStruct(call->TypeGet()) && ((info.compClassAttr & CORINFO_FLG_MARSHAL_BYREF) == 0)) |
| 8283 | { |
| 8284 | call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP; |
| 8285 | fastTailCallToLoop = true; |
| 8286 | } |
| 8287 | #endif |
| 8288 | |
| 8289 | // Do some target-specific transformations (before we process the args, etc.) |
| 8290 | // This is needed only for tail prefixed calls that cannot be dispatched as |
| 8291 | // fast calls. |
| 8292 | if (!canFastTailCall) |
| 8293 | { |
| 8294 | fgMorphTailCall(call, pfnCopyArgs); |
| 8295 | } |
| 8296 | |
| 8297 | // Implementation note : If we optimize tailcall to do a direct jump |
| 8298 | // to the target function (after stomping on the return address, etc), |
| 8299 | // without using CORINFO_HELP_TAILCALL, we have to make certain that |
| 8300 | // we don't starve the hijacking logic (by stomping on the hijacked |
| 8301 | // return address etc). |
| 8302 | |
| 8303 | // At this point, we are committed to do the tailcall. |
| 8304 | compTailCallUsed = true; |
| 8305 | |
| 8306 | CorInfoTailCall tailCallResult; |
| 8307 | |
| 8308 | if (fastTailCallToLoop) |
| 8309 | { |
| 8310 | tailCallResult = TAILCALL_RECURSIVE; |
| 8311 | } |
| 8312 | else if (canFastTailCall) |
| 8313 | { |
| 8314 | tailCallResult = TAILCALL_OPTIMIZED; |
| 8315 | } |
| 8316 | else |
| 8317 | { |
| 8318 | tailCallResult = TAILCALL_HELPER; |
| 8319 | } |
| 8320 | |
| 8321 | // for non user funcs, we have no handles to report |
| 8322 | info.compCompHnd->reportTailCallDecision(nullptr, |
| 8323 | (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, |
| 8324 | isTailPrefixed, tailCallResult, nullptr); |
| 8325 | |
| 8326 | // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID. |
| 8327 | // to avoid doing any extra work for the return value. |
| 8328 | call->gtType = TYP_VOID; |
| 8329 | |
| 8330 | #ifdef DEBUG |
| 8331 | if (verbose) |
| 8332 | { |
| 8333 | printf("\nGTF_CALL_M_TAILCALL bit set for call " ); |
| 8334 | printTreeID(call); |
| 8335 | printf("\n" ); |
| 8336 | if (fastTailCallToLoop) |
| 8337 | { |
| 8338 | printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call " ); |
| 8339 | printTreeID(call); |
| 8340 | printf("\n" ); |
| 8341 | } |
| 8342 | } |
| 8343 | #endif |
| 8344 | |
| 8345 | GenTree* stmtExpr = fgMorphStmt->gtStmtExpr; |
| 8346 | |
| 8347 | #ifdef DEBUG |
| 8348 | // Tail call needs to be in one of the following IR forms |
| 8349 | // Either a call stmt or |
| 8350 | // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..))) |
| 8351 | // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..))) |
| 8352 | // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP) |
| 8353 | // In the above, |
| 8354 | // GT_CASTS may be nested. |
| 8355 | genTreeOps stmtOper = stmtExpr->gtOper; |
| 8356 | if (stmtOper == GT_CALL) |
| 8357 | { |
| 8358 | assert(stmtExpr == call); |
| 8359 | } |
| 8360 | else |
| 8361 | { |
| 8362 | assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA); |
| 8363 | GenTree* treeWithCall; |
| 8364 | if (stmtOper == GT_RETURN) |
| 8365 | { |
| 8366 | treeWithCall = stmtExpr->gtGetOp1(); |
| 8367 | } |
| 8368 | else if (stmtOper == GT_COMMA) |
| 8369 | { |
| 8370 | // Second operation must be nop. |
| 8371 | assert(stmtExpr->gtGetOp2()->IsNothingNode()); |
| 8372 | treeWithCall = stmtExpr->gtGetOp1(); |
| 8373 | } |
| 8374 | else |
| 8375 | { |
| 8376 | treeWithCall = stmtExpr->gtGetOp2(); |
| 8377 | } |
| 8378 | |
| 8379 | // Peel off casts |
| 8380 | while (treeWithCall->gtOper == GT_CAST) |
| 8381 | { |
| 8382 | assert(!treeWithCall->gtOverflow()); |
| 8383 | treeWithCall = treeWithCall->gtGetOp1(); |
| 8384 | } |
| 8385 | |
| 8386 | assert(treeWithCall == call); |
| 8387 | } |
| 8388 | #endif |
| 8389 | GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt; |
| 8390 | // Remove all stmts after the call. |
| 8391 | while (nextMorphStmt != nullptr) |
| 8392 | { |
| 8393 | GenTreeStmt* stmtToRemove = nextMorphStmt; |
| 8394 | nextMorphStmt = stmtToRemove->gtNextStmt; |
| 8395 | fgRemoveStmt(compCurBB, stmtToRemove); |
| 8396 | } |
| 8397 | |
| 8398 | fgMorphStmt->gtStmtExpr = call; |
| 8399 | |
| 8400 | // Tail call via helper: The VM can't use return address hijacking if we're |
| 8401 | // not going to return and the helper doesn't have enough info to safely poll, |
| 8402 | // so we poll before the tail call, if the block isn't already safe. Since |
| 8403 | // tail call via helper is a slow mechanism it doen't matter whether we emit |
| 8404 | // GC poll. This is done to be in parity with Jit64. Also this avoids GC info |
| 8405 | // size increase if all most all methods are expected to be tail calls (e.g. F#). |
| 8406 | // |
| 8407 | // Note that we can avoid emitting GC-poll if we know that the current BB is |
| 8408 | // dominated by a Gc-SafePoint block. But we don't have dominator info at this |
| 8409 | // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL) |
| 8410 | // here and remove it in lowering if the block is dominated by a GC-SafePoint. For |
| 8411 | // now it not clear whether optimizing slow tail calls is worth the effort. As a |
| 8412 | // low cost check, we check whether the first and current basic blocks are |
| 8413 | // GC-SafePoints. |
| 8414 | // |
| 8415 | // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder() |
| 8416 | // is going to mark the method as fully interruptible if the block containing this tail |
| 8417 | // call is reachable without executing any call. |
| 8418 | if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) || |
| 8419 | !fgCreateGCPoll(GCPOLL_INLINE, compCurBB)) |
| 8420 | { |
| 8421 | // We didn't insert a poll block, so we need to morph the call now |
| 8422 | // (Normally it will get morphed when we get to the split poll block) |
| 8423 | GenTree* temp = fgMorphCall(call); |
| 8424 | noway_assert(temp == call); |
| 8425 | } |
| 8426 | |
| 8427 | // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to |
| 8428 | // the target. So we don't need an epilog - just like CORINFO_HELP_THROW. |
| 8429 | // |
| 8430 | // Fast tail call: in case of fast tail calls, we need a jmp epilog and |
| 8431 | // hence mark it as BBJ_RETURN with BBF_JMP flag set. |
| 8432 | noway_assert(compCurBB->bbJumpKind == BBJ_RETURN); |
| 8433 | |
| 8434 | if (canFastTailCall) |
| 8435 | { |
| 8436 | compCurBB->bbFlags |= BBF_HAS_JMP; |
| 8437 | } |
| 8438 | else |
| 8439 | { |
| 8440 | compCurBB->bbJumpKind = BBJ_THROW; |
| 8441 | } |
| 8442 | |
| 8443 | // For non-void calls, we return a place holder which will be |
| 8444 | // used by the parent GT_RETURN node of this call. |
| 8445 | |
| 8446 | GenTree* result = call; |
| 8447 | if (callType != TYP_VOID && info.compRetType != TYP_VOID) |
| 8448 | { |
| 8449 | #ifdef FEATURE_HFA |
| 8450 | // Return a dummy node, as the return is already removed. |
| 8451 | if (callType == TYP_STRUCT) |
| 8452 | { |
| 8453 | // This is a HFA, use float 0. |
| 8454 | callType = TYP_FLOAT; |
| 8455 | } |
| 8456 | #elif defined(UNIX_AMD64_ABI) |
| 8457 | // Return a dummy node, as the return is already removed. |
| 8458 | if (varTypeIsStruct(callType)) |
| 8459 | { |
| 8460 | // This is a register-returned struct. Return a 0. |
| 8461 | // The actual return registers are hacked in lower and the register allocator. |
| 8462 | callType = TYP_INT; |
| 8463 | } |
| 8464 | #endif |
| 8465 | #ifdef FEATURE_SIMD |
| 8466 | // Return a dummy node, as the return is already removed. |
| 8467 | if (varTypeIsSIMD(callType)) |
| 8468 | { |
| 8469 | callType = TYP_DOUBLE; |
| 8470 | } |
| 8471 | #endif |
| 8472 | result = gtNewZeroConNode(genActualType(callType)); |
| 8473 | result = fgMorphTree(result); |
| 8474 | } |
| 8475 | |
| 8476 | return result; |
| 8477 | } |
| 8478 | |
| 8479 | NO_TAIL_CALL: |
| 8480 | |
| 8481 | if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 && |
| 8482 | (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR) |
| 8483 | #ifdef FEATURE_READYTORUN_COMPILER |
| 8484 | || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR) |
| 8485 | #endif |
| 8486 | ) && |
| 8487 | (call == fgMorphStmt->gtStmtExpr)) |
| 8488 | { |
| 8489 | // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result. |
| 8490 | // Transform it into a null check. |
| 8491 | |
| 8492 | GenTree* thisPtr = call->gtCallArgs->gtOp.gtOp1; |
| 8493 | |
| 8494 | GenTree* nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr); |
| 8495 | nullCheck->gtFlags |= GTF_EXCEPT; |
| 8496 | |
| 8497 | return fgMorphTree(nullCheck); |
| 8498 | } |
| 8499 | |
| 8500 | noway_assert(call->gtOper == GT_CALL); |
| 8501 | |
| 8502 | // |
| 8503 | // Only count calls once (only in the global morph phase) |
| 8504 | // |
| 8505 | if (fgGlobalMorph) |
| 8506 | { |
| 8507 | if (call->gtCallType == CT_INDIRECT) |
| 8508 | { |
| 8509 | optCallCount++; |
| 8510 | optIndirectCallCount++; |
| 8511 | } |
| 8512 | else if (call->gtCallType == CT_USER_FUNC) |
| 8513 | { |
| 8514 | optCallCount++; |
| 8515 | if (call->IsVirtual()) |
| 8516 | { |
| 8517 | optIndirectCallCount++; |
| 8518 | } |
| 8519 | } |
| 8520 | } |
| 8521 | |
| 8522 | // Couldn't inline - remember that this BB contains method calls |
| 8523 | |
| 8524 | // If this is a 'regular' call, mark the basic block as |
| 8525 | // having a call (for computing full interruptibility). |
| 8526 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 8527 | |
| 8528 | if (IsGcSafePoint(call)) |
| 8529 | { |
| 8530 | compCurBB->bbFlags |= BBF_GC_SAFE_POINT; |
| 8531 | } |
| 8532 | |
| 8533 | // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag |
| 8534 | // |
| 8535 | // We need to do these before the arguments are morphed |
| 8536 | if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)) |
| 8537 | { |
| 8538 | // See if this is foldable |
| 8539 | GenTree* optTree = gtFoldExprCall(call); |
| 8540 | |
| 8541 | // If we optimized, morph the result |
| 8542 | if (optTree != call) |
| 8543 | { |
| 8544 | return fgMorphTree(optTree); |
| 8545 | } |
| 8546 | } |
| 8547 | |
| 8548 | // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack. |
| 8549 | GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require |
| 8550 | // copy-back). |
| 8551 | unsigned retValTmpNum = BAD_VAR_NUM; |
| 8552 | CORINFO_CLASS_HANDLE structHnd = nullptr; |
| 8553 | if (call->HasRetBufArg() && |
| 8554 | call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null). |
| 8555 | { |
| 8556 | // We're enforcing the invariant that return buffers pointers (at least for |
| 8557 | // struct return types containing GC pointers) are never pointers into the heap. |
| 8558 | // The large majority of cases are address of local variables, which are OK. |
| 8559 | // Otherwise, allocate a local of the given struct type, pass its address, |
| 8560 | // then assign from that into the proper destination. (We don't need to do this |
| 8561 | // if we're passing the caller's ret buff arg to the callee, since the caller's caller |
| 8562 | // will maintain the same invariant.) |
| 8563 | |
| 8564 | GenTree* dest = call->gtCallArgs->gtOp.gtOp1; |
| 8565 | assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above. |
| 8566 | if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)) |
| 8567 | { |
| 8568 | // We'll exempt helper calls from this, assuming that the helper implementation |
| 8569 | // follows the old convention, and does whatever barrier is required. |
| 8570 | if (call->gtCallType != CT_HELPER) |
| 8571 | { |
| 8572 | structHnd = call->gtRetClsHnd; |
| 8573 | if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) && |
| 8574 | !(dest->OperGet() == GT_LCL_VAR && dest->gtLclVar.gtLclNum == info.compRetBuffArg)) |
| 8575 | { |
| 8576 | origDest = dest; |
| 8577 | |
| 8578 | retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg" )); |
| 8579 | lvaSetStruct(retValTmpNum, structHnd, true); |
| 8580 | dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); |
| 8581 | } |
| 8582 | } |
| 8583 | } |
| 8584 | |
| 8585 | call->gtCallArgs->gtOp.gtOp1 = dest; |
| 8586 | } |
| 8587 | |
| 8588 | /* Process the "normal" argument list */ |
| 8589 | call = fgMorphArgs(call); |
| 8590 | noway_assert(call->gtOper == GT_CALL); |
| 8591 | |
| 8592 | // Morph stelem.ref helper call to store a null value, into a store into an array without the helper. |
| 8593 | // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place. |
| 8594 | if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST))) |
| 8595 | { |
| 8596 | GenTree* value = gtArgEntryByArgNum(call, 2)->node; |
| 8597 | if (value->IsIntegralConst(0)) |
| 8598 | { |
| 8599 | assert(value->OperGet() == GT_CNS_INT); |
| 8600 | |
| 8601 | GenTree* arr = gtArgEntryByArgNum(call, 0)->node; |
| 8602 | GenTree* index = gtArgEntryByArgNum(call, 1)->node; |
| 8603 | |
| 8604 | // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy |
| 8605 | // the spill trees as well if necessary. |
| 8606 | GenTreeOp* argSetup = nullptr; |
| 8607 | for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest()) |
| 8608 | { |
| 8609 | GenTree* const arg = earlyArgs->Current(); |
| 8610 | if (arg->OperGet() != GT_ASG) |
| 8611 | { |
| 8612 | continue; |
| 8613 | } |
| 8614 | |
| 8615 | assert(arg != arr); |
| 8616 | assert(arg != index); |
| 8617 | |
| 8618 | arg->gtFlags &= ~GTF_LATE_ARG; |
| 8619 | |
| 8620 | GenTree* op1 = argSetup; |
| 8621 | if (op1 == nullptr) |
| 8622 | { |
| 8623 | op1 = gtNewNothingNode(); |
| 8624 | #if DEBUG |
| 8625 | op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 8626 | #endif // DEBUG |
| 8627 | } |
| 8628 | |
| 8629 | argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg); |
| 8630 | |
| 8631 | #if DEBUG |
| 8632 | argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 8633 | #endif // DEBUG |
| 8634 | } |
| 8635 | |
| 8636 | #ifdef DEBUG |
| 8637 | auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult { |
| 8638 | (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
| 8639 | return WALK_CONTINUE; |
| 8640 | }; |
| 8641 | |
| 8642 | fgWalkTreePost(&arr, resetMorphedFlag); |
| 8643 | fgWalkTreePost(&index, resetMorphedFlag); |
| 8644 | fgWalkTreePost(&value, resetMorphedFlag); |
| 8645 | #endif // DEBUG |
| 8646 | |
| 8647 | GenTree* const nullCheckedArr = impCheckForNullPointer(arr); |
| 8648 | GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index); |
| 8649 | GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value); |
| 8650 | arrStore->gtFlags |= GTF_ASG; |
| 8651 | |
| 8652 | GenTree* result = fgMorphTree(arrStore); |
| 8653 | if (argSetup != nullptr) |
| 8654 | { |
| 8655 | result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result); |
| 8656 | #if DEBUG |
| 8657 | result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 8658 | #endif // DEBUG |
| 8659 | } |
| 8660 | |
| 8661 | return result; |
| 8662 | } |
| 8663 | } |
| 8664 | |
| 8665 | // Optimize get_ManagedThreadId(get_CurrentThread) |
| 8666 | if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) && |
| 8667 | info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId) |
| 8668 | { |
| 8669 | noway_assert(origDest == nullptr); |
| 8670 | noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr); |
| 8671 | |
| 8672 | GenTree* innerCall = call->gtCallLateArgs->gtOp.gtOp1; |
| 8673 | |
| 8674 | if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) && |
| 8675 | info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) == |
| 8676 | CORINFO_INTRINSIC_GetCurrentManagedThread) |
| 8677 | { |
| 8678 | // substitute expression with call to helper |
| 8679 | GenTree* newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT); |
| 8680 | JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n" ); |
| 8681 | return fgMorphTree(newCall); |
| 8682 | } |
| 8683 | } |
| 8684 | |
| 8685 | if (origDest != nullptr) |
| 8686 | { |
| 8687 | GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); |
| 8688 | // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused |
| 8689 | // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the |
| 8690 | // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to |
| 8691 | // be correct. |
| 8692 | if (origDest->OperGet() == GT_ASG) |
| 8693 | { |
| 8694 | if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR) |
| 8695 | { |
| 8696 | GenTree* var = origDest->gtOp.gtOp1; |
| 8697 | origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest, |
| 8698 | gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet())); |
| 8699 | } |
| 8700 | } |
| 8701 | GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false); |
| 8702 | copyBlk = fgMorphTree(copyBlk); |
| 8703 | GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk); |
| 8704 | #ifdef DEBUG |
| 8705 | result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 8706 | #endif |
| 8707 | return result; |
| 8708 | } |
| 8709 | |
| 8710 | if (call->IsNoReturn()) |
| 8711 | { |
| 8712 | // |
| 8713 | // If we know that the call does not return then we can set fgRemoveRestOfBlock |
| 8714 | // to remove all subsequent statements and change the call's basic block to BBJ_THROW. |
| 8715 | // As a result the compiler won't need to preserve live registers across the call. |
| 8716 | // |
| 8717 | // This isn't need for tail calls as there shouldn't be any code after the call anyway. |
| 8718 | // Besides, the tail call code is part of the epilog and converting the block to |
| 8719 | // BBJ_THROW would result in the tail call being dropped as the epilog is generated |
| 8720 | // only for BBJ_RETURN blocks. |
| 8721 | // |
| 8722 | // Currently this doesn't work for non-void callees. Some of the code that handles |
| 8723 | // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes |
| 8724 | // do not have this flag by default. We could add the flag here but the proper solution |
| 8725 | // would be to replace the return expression with a local var node during inlining |
| 8726 | // so the rest of the call tree stays in a separate statement. That statement can then |
| 8727 | // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere. |
| 8728 | // |
| 8729 | |
| 8730 | if (!call->IsTailCall() && call->TypeGet() == TYP_VOID) |
| 8731 | { |
| 8732 | fgRemoveRestOfBlock = true; |
| 8733 | } |
| 8734 | } |
| 8735 | |
| 8736 | return call; |
| 8737 | } |
| 8738 | |
| 8739 | /***************************************************************************** |
| 8740 | * |
| 8741 | * Transform the given GTK_CONST tree for code generation. |
| 8742 | */ |
| 8743 | |
| 8744 | GenTree* Compiler::fgMorphConst(GenTree* tree) |
| 8745 | { |
| 8746 | assert(tree->OperKind() & GTK_CONST); |
| 8747 | |
| 8748 | /* Clear any exception flags or other unnecessary flags |
| 8749 | * that may have been set before folding this node to a constant */ |
| 8750 | |
| 8751 | tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS); |
| 8752 | |
| 8753 | if (tree->OperGet() != GT_CNS_STR) |
| 8754 | { |
| 8755 | return tree; |
| 8756 | } |
| 8757 | |
| 8758 | // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will |
| 8759 | // guarantee slow performance for that block. Instead cache the return value |
| 8760 | // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf. |
| 8761 | |
| 8762 | if (compCurBB->bbJumpKind == BBJ_THROW) |
| 8763 | { |
| 8764 | CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd); |
| 8765 | if (helper != CORINFO_HELP_UNDEF) |
| 8766 | { |
| 8767 | // For un-important blocks, we want to construct the string lazily |
| 8768 | |
| 8769 | GenTreeArgList* args; |
| 8770 | if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE) |
| 8771 | { |
| 8772 | args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT)); |
| 8773 | } |
| 8774 | else |
| 8775 | { |
| 8776 | args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT), |
| 8777 | gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd)); |
| 8778 | } |
| 8779 | |
| 8780 | tree = gtNewHelperCallNode(helper, TYP_REF, args); |
| 8781 | return fgMorphTree(tree); |
| 8782 | } |
| 8783 | } |
| 8784 | |
| 8785 | assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd)); |
| 8786 | |
| 8787 | LPVOID pValue; |
| 8788 | InfoAccessType iat = |
| 8789 | info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue); |
| 8790 | |
| 8791 | tree = gtNewStringLiteralNode(iat, pValue); |
| 8792 | |
| 8793 | return fgMorphTree(tree); |
| 8794 | } |
| 8795 | |
| 8796 | /***************************************************************************** |
| 8797 | * |
| 8798 | * Transform the given GTK_LEAF tree for code generation. |
| 8799 | */ |
| 8800 | |
| 8801 | GenTree* Compiler::fgMorphLeaf(GenTree* tree) |
| 8802 | { |
| 8803 | assert(tree->OperKind() & GTK_LEAF); |
| 8804 | |
| 8805 | if (tree->gtOper == GT_LCL_VAR) |
| 8806 | { |
| 8807 | const bool forceRemorph = false; |
| 8808 | return fgMorphLocalVar(tree, forceRemorph); |
| 8809 | } |
| 8810 | #ifdef _TARGET_X86_ |
| 8811 | else if (tree->gtOper == GT_LCL_FLD) |
| 8812 | { |
| 8813 | if (info.compIsVarArgs) |
| 8814 | { |
| 8815 | GenTree* newTree = |
| 8816 | fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs); |
| 8817 | if (newTree != nullptr) |
| 8818 | { |
| 8819 | if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) |
| 8820 | { |
| 8821 | fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType); |
| 8822 | } |
| 8823 | return newTree; |
| 8824 | } |
| 8825 | } |
| 8826 | } |
| 8827 | #endif // _TARGET_X86_ |
| 8828 | else if (tree->gtOper == GT_FTN_ADDR) |
| 8829 | { |
| 8830 | CORINFO_CONST_LOOKUP addrInfo; |
| 8831 | |
| 8832 | #ifdef FEATURE_READYTORUN_COMPILER |
| 8833 | if (tree->gtFptrVal.gtEntryPoint.addr != nullptr) |
| 8834 | { |
| 8835 | addrInfo = tree->gtFptrVal.gtEntryPoint; |
| 8836 | } |
| 8837 | else |
| 8838 | #endif |
| 8839 | { |
| 8840 | info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo); |
| 8841 | } |
| 8842 | |
| 8843 | // Refer to gtNewIconHandleNode() as the template for constructing a constant handle |
| 8844 | // |
| 8845 | tree->SetOper(GT_CNS_INT); |
| 8846 | tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle)); |
| 8847 | tree->gtFlags |= GTF_ICON_FTN_ADDR; |
| 8848 | |
| 8849 | switch (addrInfo.accessType) |
| 8850 | { |
| 8851 | case IAT_PPVALUE: |
| 8852 | tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree); |
| 8853 | tree->gtFlags |= GTF_IND_INVARIANT; |
| 8854 | |
| 8855 | __fallthrough; |
| 8856 | |
| 8857 | case IAT_PVALUE: |
| 8858 | tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree); |
| 8859 | break; |
| 8860 | |
| 8861 | case IAT_VALUE: |
| 8862 | tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding |
| 8863 | break; |
| 8864 | |
| 8865 | default: |
| 8866 | noway_assert(!"Unknown addrInfo.accessType" ); |
| 8867 | } |
| 8868 | |
| 8869 | return fgMorphTree(tree); |
| 8870 | } |
| 8871 | |
| 8872 | return tree; |
| 8873 | } |
| 8874 | |
| 8875 | void Compiler::fgAssignSetVarDef(GenTree* tree) |
| 8876 | { |
| 8877 | GenTreeLclVarCommon* lclVarCmnTree; |
| 8878 | bool isEntire = false; |
| 8879 | if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire)) |
| 8880 | { |
| 8881 | if (isEntire) |
| 8882 | { |
| 8883 | lclVarCmnTree->gtFlags |= GTF_VAR_DEF; |
| 8884 | } |
| 8885 | else |
| 8886 | { |
| 8887 | // We consider partial definitions to be modeled as uses followed by definitions. |
| 8888 | // This captures the idea that precedings defs are not necessarily made redundant |
| 8889 | // by this definition. |
| 8890 | lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG); |
| 8891 | } |
| 8892 | } |
| 8893 | } |
| 8894 | |
| 8895 | //------------------------------------------------------------------------ |
| 8896 | // fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment |
| 8897 | // |
| 8898 | // Arguments: |
| 8899 | // tree - The block assignment to be possibly morphed |
| 8900 | // |
| 8901 | // Return Value: |
| 8902 | // The modified tree if successful, nullptr otherwise. |
| 8903 | // |
| 8904 | // Assumptions: |
| 8905 | // 'tree' must be a block assignment. |
| 8906 | // |
| 8907 | // Notes: |
| 8908 | // If successful, this method always returns the incoming tree, modifying only |
| 8909 | // its arguments. |
| 8910 | |
| 8911 | GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) |
| 8912 | { |
| 8913 | // This must be a block assignment. |
| 8914 | noway_assert(tree->OperIsBlkOp()); |
| 8915 | var_types asgType = tree->TypeGet(); |
| 8916 | |
| 8917 | GenTree* asg = tree; |
| 8918 | GenTree* dest = asg->gtGetOp1(); |
| 8919 | GenTree* src = asg->gtGetOp2(); |
| 8920 | unsigned destVarNum = BAD_VAR_NUM; |
| 8921 | LclVarDsc* destVarDsc = nullptr; |
| 8922 | GenTree* destLclVarTree = nullptr; |
| 8923 | bool isCopyBlock = asg->OperIsCopyBlkOp(); |
| 8924 | bool isInitBlock = !isCopyBlock; |
| 8925 | |
| 8926 | unsigned size; |
| 8927 | CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; |
| 8928 | #ifdef FEATURE_SIMD |
| 8929 | // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic) |
| 8930 | // The SIMD type in question could be Vector2f which is 8-bytes in size. |
| 8931 | // The below check is to make sure that we don't turn that copyblk |
| 8932 | // into a assignment, since rationalizer logic will transform the |
| 8933 | // copyblk appropriately. Otherwise, the transformation made in this |
| 8934 | // routine will prevent rationalizer logic and we might end up with |
| 8935 | // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert |
| 8936 | // in codegen. |
| 8937 | // TODO-1stClassStructs: This is here to preserve old behavior. |
| 8938 | // It should be eliminated. |
| 8939 | if (src->OperIsSIMDorSimdHWintrinsic()) |
| 8940 | { |
| 8941 | return nullptr; |
| 8942 | } |
| 8943 | #endif |
| 8944 | |
| 8945 | if (dest->gtEffectiveVal()->OperIsBlk()) |
| 8946 | { |
| 8947 | GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk(); |
| 8948 | size = lhsBlk->Size(); |
| 8949 | if (impIsAddressInLocal(lhsBlk->Addr(), &destLclVarTree)) |
| 8950 | { |
| 8951 | destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum; |
| 8952 | destVarDsc = &(lvaTable[destVarNum]); |
| 8953 | } |
| 8954 | if (lhsBlk->OperGet() == GT_OBJ) |
| 8955 | { |
| 8956 | clsHnd = lhsBlk->AsObj()->gtClass; |
| 8957 | } |
| 8958 | } |
| 8959 | else |
| 8960 | { |
| 8961 | // Is this an enregisterable struct that is already a simple assignment? |
| 8962 | // This can happen if we are re-morphing. |
| 8963 | if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock) |
| 8964 | { |
| 8965 | return tree; |
| 8966 | } |
| 8967 | noway_assert(dest->OperIsLocal()); |
| 8968 | destLclVarTree = dest; |
| 8969 | destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum; |
| 8970 | destVarDsc = &(lvaTable[destVarNum]); |
| 8971 | if (isCopyBlock) |
| 8972 | { |
| 8973 | clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle(); |
| 8974 | size = info.compCompHnd->getClassSize(clsHnd); |
| 8975 | } |
| 8976 | else |
| 8977 | { |
| 8978 | size = destVarDsc->lvExactSize; |
| 8979 | } |
| 8980 | } |
| 8981 | |
| 8982 | // |
| 8983 | // See if we can do a simple transformation: |
| 8984 | // |
| 8985 | // GT_ASG <TYP_size> |
| 8986 | // / \ |
| 8987 | // GT_IND GT_IND or CNS_INT |
| 8988 | // | | |
| 8989 | // [dest] [src] |
| 8990 | // |
| 8991 | |
| 8992 | if (asgType == TYP_STRUCT) |
| 8993 | { |
| 8994 | if (size == REGSIZE_BYTES) |
| 8995 | { |
| 8996 | if (clsHnd == NO_CLASS_HANDLE) |
| 8997 | { |
| 8998 | // A register-sized cpblk can be treated as an integer asignment. |
| 8999 | asgType = TYP_I_IMPL; |
| 9000 | } |
| 9001 | else |
| 9002 | { |
| 9003 | BYTE gcPtr; |
| 9004 | info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); |
| 9005 | asgType = getJitGCType(gcPtr); |
| 9006 | } |
| 9007 | } |
| 9008 | else |
| 9009 | { |
| 9010 | switch (size) |
| 9011 | { |
| 9012 | case 1: |
| 9013 | asgType = TYP_BYTE; |
| 9014 | break; |
| 9015 | case 2: |
| 9016 | asgType = TYP_SHORT; |
| 9017 | break; |
| 9018 | |
| 9019 | #ifdef _TARGET_64BIT_ |
| 9020 | case 4: |
| 9021 | asgType = TYP_INT; |
| 9022 | break; |
| 9023 | #endif // _TARGET_64BIT_ |
| 9024 | } |
| 9025 | } |
| 9026 | } |
| 9027 | |
| 9028 | if ((destVarDsc != nullptr) && varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted) |
| 9029 | { |
| 9030 | // Let fgMorphCopyBlock handle it. |
| 9031 | return nullptr; |
| 9032 | } |
| 9033 | |
| 9034 | GenTree* srcLclVarTree = nullptr; |
| 9035 | LclVarDsc* srcVarDsc = nullptr; |
| 9036 | if (isCopyBlock) |
| 9037 | { |
| 9038 | if (src->OperGet() == GT_LCL_VAR) |
| 9039 | { |
| 9040 | srcLclVarTree = src; |
| 9041 | srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]); |
| 9042 | } |
| 9043 | else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &srcLclVarTree)) |
| 9044 | { |
| 9045 | srcVarDsc = &(lvaTable[srcLclVarTree->AsLclVarCommon()->gtLclNum]); |
| 9046 | } |
| 9047 | if ((srcVarDsc != nullptr) && varTypeIsStruct(srcLclVarTree) && srcVarDsc->lvPromoted) |
| 9048 | { |
| 9049 | // Let fgMorphCopyBlock handle it. |
| 9050 | return nullptr; |
| 9051 | } |
| 9052 | } |
| 9053 | |
| 9054 | if (asgType != TYP_STRUCT) |
| 9055 | { |
| 9056 | noway_assert((size <= REGSIZE_BYTES) || varTypeIsSIMD(asgType)); |
| 9057 | |
| 9058 | // For initBlk, a non constant source is not going to allow us to fiddle |
| 9059 | // with the bits to create a single assigment. |
| 9060 | // Nor do we (for now) support transforming an InitBlock of SIMD type. |
| 9061 | if (isInitBlock && (!src->IsConstInitVal() || varTypeIsSIMD(asgType))) |
| 9062 | { |
| 9063 | return nullptr; |
| 9064 | } |
| 9065 | |
| 9066 | if (destVarDsc != nullptr) |
| 9067 | { |
| 9068 | #if LOCAL_ASSERTION_PROP |
| 9069 | // Kill everything about dest |
| 9070 | if (optLocalAssertionProp) |
| 9071 | { |
| 9072 | if (optAssertionCount > 0) |
| 9073 | { |
| 9074 | fgKillDependentAssertions(destVarNum DEBUGARG(tree)); |
| 9075 | } |
| 9076 | } |
| 9077 | #endif // LOCAL_ASSERTION_PROP |
| 9078 | |
| 9079 | // A previous incarnation of this code also required the local not to be |
| 9080 | // address-exposed(=taken). That seems orthogonal to the decision of whether |
| 9081 | // to do field-wise assignments: being address-exposed will cause it to be |
| 9082 | // "dependently" promoted, so it will be in the right memory location. One possible |
| 9083 | // further reason for avoiding field-wise stores is that the struct might have alignment-induced |
| 9084 | // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid |
| 9085 | // concern, then we could compromise, and say that address-exposed + fields do not completely cover the |
| 9086 | // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision. |
| 9087 | if (varTypeIsStruct(destLclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType())) |
| 9088 | { |
| 9089 | // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) |
| 9090 | return nullptr; |
| 9091 | } |
| 9092 | else if (!varTypeIsFloating(destLclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc))) |
| 9093 | { |
| 9094 | // Use the dest local var directly, as well as its type. |
| 9095 | dest = destLclVarTree; |
| 9096 | asgType = destVarDsc->lvType; |
| 9097 | |
| 9098 | // If the block operation had been a write to a local var of a small int type, |
| 9099 | // of the exact size of the small int type, and the var is NormalizeOnStore, |
| 9100 | // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't |
| 9101 | // have done that normalization. If we're now making it into an assignment, |
| 9102 | // the NormalizeOnStore will work, and it can be a full def. |
| 9103 | if (destVarDsc->lvNormalizeOnStore()) |
| 9104 | { |
| 9105 | dest->gtFlags &= (~GTF_VAR_USEASG); |
| 9106 | } |
| 9107 | } |
| 9108 | else |
| 9109 | { |
| 9110 | // Could be a non-promoted struct, or a floating point type local, or |
| 9111 | // an int subject to a partial write. Don't enregister. |
| 9112 | lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField)); |
| 9113 | |
| 9114 | // Mark the local var tree as a definition point of the local. |
| 9115 | destLclVarTree->gtFlags |= GTF_VAR_DEF; |
| 9116 | if (size < destVarDsc->lvExactSize) |
| 9117 | { // If it's not a full-width assignment.... |
| 9118 | destLclVarTree->gtFlags |= GTF_VAR_USEASG; |
| 9119 | } |
| 9120 | |
| 9121 | if (dest == destLclVarTree) |
| 9122 | { |
| 9123 | dest = gtNewIndir(asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest)); |
| 9124 | } |
| 9125 | } |
| 9126 | } |
| 9127 | |
| 9128 | // Check to ensure we don't have a reducible *(& ... ) |
| 9129 | if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR) |
| 9130 | { |
| 9131 | // If dest is an Indir or Block, and it has a child that is a Addr node |
| 9132 | // |
| 9133 | GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR |
| 9134 | |
| 9135 | // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'? |
| 9136 | // |
| 9137 | GenTree* destOp = addrNode->gtGetOp1(); |
| 9138 | var_types destOpType = destOp->TypeGet(); |
| 9139 | |
| 9140 | // We can if we have a primitive integer type and the sizes are exactly the same. |
| 9141 | // |
| 9142 | if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType)))) |
| 9143 | { |
| 9144 | dest = destOp; |
| 9145 | asgType = destOpType; |
| 9146 | } |
| 9147 | } |
| 9148 | |
| 9149 | if (dest->gtEffectiveVal()->OperIsIndir()) |
| 9150 | { |
| 9151 | // If we have no information about the destination, we have to assume it could |
| 9152 | // live anywhere (not just in the GC heap). |
| 9153 | // Mark the GT_IND node so that we use the correct write barrier helper in case |
| 9154 | // the field is a GC ref. |
| 9155 | |
| 9156 | if (!fgIsIndirOfAddrOfLocal(dest)) |
| 9157 | { |
| 9158 | dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); |
| 9159 | tree->gtFlags |= GTF_GLOB_REF; |
| 9160 | } |
| 9161 | |
| 9162 | dest->gtFlags &= (~GTF_EXCEPT | dest->AsIndir()->Addr()->gtFlags); |
| 9163 | dest->SetIndirExceptionFlags(this); |
| 9164 | tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT); |
| 9165 | } |
| 9166 | |
| 9167 | if (isCopyBlock) |
| 9168 | { |
| 9169 | if (srcVarDsc != nullptr) |
| 9170 | { |
| 9171 | // Handled above. |
| 9172 | assert(!varTypeIsStruct(srcLclVarTree) || !srcVarDsc->lvPromoted); |
| 9173 | if (!varTypeIsFloating(srcLclVarTree->TypeGet()) && |
| 9174 | size == genTypeSize(genActualType(srcLclVarTree->TypeGet()))) |
| 9175 | { |
| 9176 | // Use the src local var directly. |
| 9177 | src = srcLclVarTree; |
| 9178 | } |
| 9179 | else |
| 9180 | { |
| 9181 | // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar)) |
| 9182 | // or indir(lclVarAddr) in rational form and liveness won't account for these uses. That said, |
| 9183 | // we have to mark this local as address exposed so we don't delete it as a dead store later on. |
| 9184 | unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum; |
| 9185 | lvaTable[lclVarNum].lvAddrExposed = true; |
| 9186 | lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed)); |
| 9187 | GenTree* srcAddr; |
| 9188 | if (src == srcLclVarTree) |
| 9189 | { |
| 9190 | srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src); |
| 9191 | src = gtNewOperNode(GT_IND, asgType, srcAddr); |
| 9192 | } |
| 9193 | else |
| 9194 | { |
| 9195 | assert(src->OperIsIndir()); |
| 9196 | } |
| 9197 | } |
| 9198 | } |
| 9199 | |
| 9200 | if (src->OperIsIndir()) |
| 9201 | { |
| 9202 | if (!fgIsIndirOfAddrOfLocal(src)) |
| 9203 | { |
| 9204 | // If we have no information about the src, we have to assume it could |
| 9205 | // live anywhere (not just in the GC heap). |
| 9206 | // Mark the GT_IND node so that we use the correct write barrier helper in case |
| 9207 | // the field is a GC ref. |
| 9208 | src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); |
| 9209 | } |
| 9210 | |
| 9211 | src->gtFlags &= (~GTF_EXCEPT | src->AsIndir()->Addr()->gtFlags); |
| 9212 | src->SetIndirExceptionFlags(this); |
| 9213 | } |
| 9214 | } |
| 9215 | else |
| 9216 | { |
| 9217 | // InitBlk |
| 9218 | #if FEATURE_SIMD |
| 9219 | if (varTypeIsSIMD(asgType)) |
| 9220 | { |
| 9221 | assert(!isCopyBlock); // Else we would have returned the tree above. |
| 9222 | noway_assert(src->IsIntegralConst(0)); |
| 9223 | noway_assert(destVarDsc != nullptr); |
| 9224 | |
| 9225 | src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size); |
| 9226 | tree->gtOp.gtOp2 = src; |
| 9227 | return tree; |
| 9228 | } |
| 9229 | else |
| 9230 | #endif |
| 9231 | { |
| 9232 | if (src->OperIsInitVal()) |
| 9233 | { |
| 9234 | src = src->gtGetOp1(); |
| 9235 | } |
| 9236 | assert(src->IsCnsIntOrI()); |
| 9237 | // This will mutate the integer constant, in place, to be the correct |
| 9238 | // value for the type we are using in the assignment. |
| 9239 | src->AsIntCon()->FixupInitBlkValue(asgType); |
| 9240 | } |
| 9241 | } |
| 9242 | |
| 9243 | // Ensure that the dest is setup appropriately. |
| 9244 | if (dest->gtEffectiveVal()->OperIsIndir()) |
| 9245 | { |
| 9246 | dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/); |
| 9247 | } |
| 9248 | |
| 9249 | // Ensure that the rhs is setup appropriately. |
| 9250 | if (isCopyBlock) |
| 9251 | { |
| 9252 | src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/); |
| 9253 | } |
| 9254 | |
| 9255 | // Set the lhs and rhs on the assignment. |
| 9256 | if (dest != tree->gtOp.gtOp1) |
| 9257 | { |
| 9258 | asg->gtOp.gtOp1 = dest; |
| 9259 | } |
| 9260 | if (src != asg->gtOp.gtOp2) |
| 9261 | { |
| 9262 | asg->gtOp.gtOp2 = src; |
| 9263 | } |
| 9264 | |
| 9265 | asg->ChangeType(asgType); |
| 9266 | dest->gtFlags |= GTF_DONT_CSE; |
| 9267 | asg->gtFlags &= ~GTF_EXCEPT; |
| 9268 | asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT); |
| 9269 | // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate. |
| 9270 | asg->gtFlags &= ~GTF_REVERSE_OPS; |
| 9271 | |
| 9272 | #ifdef DEBUG |
| 9273 | if (verbose) |
| 9274 | { |
| 9275 | printf("fgMorphOneAsgBlock (after):\n" ); |
| 9276 | gtDispTree(tree); |
| 9277 | } |
| 9278 | #endif |
| 9279 | return tree; |
| 9280 | } |
| 9281 | |
| 9282 | return nullptr; |
| 9283 | } |
| 9284 | |
| 9285 | //------------------------------------------------------------------------ |
| 9286 | // fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node |
| 9287 | // |
| 9288 | // Arguments: |
| 9289 | // tree - a tree node with a gtOper of GT_INITBLK |
| 9290 | // the child nodes for tree have already been Morphed |
| 9291 | // |
| 9292 | // Return Value: |
| 9293 | // We can return the orginal GT_INITBLK unmodified (least desirable, but always correct) |
| 9294 | // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable) |
| 9295 | // If we have performed struct promotion of the Dest() then we will try to |
| 9296 | // perform a field by field assignment for each of the promoted struct fields |
| 9297 | // |
| 9298 | // Notes: |
| 9299 | // If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp |
| 9300 | // if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we |
| 9301 | // can not use a field by field assignment and must the orginal GT_INITBLK unmodified. |
| 9302 | |
| 9303 | GenTree* Compiler::fgMorphInitBlock(GenTree* tree) |
| 9304 | { |
| 9305 | // We must have the GT_ASG form of InitBlkOp. |
| 9306 | noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp()); |
| 9307 | #ifdef DEBUG |
| 9308 | bool morphed = false; |
| 9309 | #endif // DEBUG |
| 9310 | |
| 9311 | GenTree* asg = tree; |
| 9312 | GenTree* src = tree->gtGetOp2(); |
| 9313 | GenTree* origDest = tree->gtGetOp1(); |
| 9314 | |
| 9315 | GenTree* dest = fgMorphBlkNode(origDest, true); |
| 9316 | if (dest != origDest) |
| 9317 | { |
| 9318 | tree->gtOp.gtOp1 = dest; |
| 9319 | } |
| 9320 | tree->gtType = dest->TypeGet(); |
| 9321 | // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its |
| 9322 | // type will be the type of the original lclVar, in which case we will change it to TYP_INT). |
| 9323 | if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src)) |
| 9324 | { |
| 9325 | src->gtType = TYP_INT; |
| 9326 | } |
| 9327 | JITDUMP("\nfgMorphInitBlock:" ); |
| 9328 | |
| 9329 | GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); |
| 9330 | if (oneAsgTree) |
| 9331 | { |
| 9332 | JITDUMP(" using oneAsgTree.\n" ); |
| 9333 | tree = oneAsgTree; |
| 9334 | } |
| 9335 | else |
| 9336 | { |
| 9337 | GenTree* destAddr = nullptr; |
| 9338 | GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src; |
| 9339 | GenTree* blockSize = nullptr; |
| 9340 | unsigned blockWidth = 0; |
| 9341 | FieldSeqNode* destFldSeq = nullptr; |
| 9342 | LclVarDsc* destLclVar = nullptr; |
| 9343 | bool destDoFldAsg = false; |
| 9344 | unsigned destLclNum = BAD_VAR_NUM; |
| 9345 | bool blockWidthIsConst = false; |
| 9346 | GenTreeLclVarCommon* lclVarTree = nullptr; |
| 9347 | if (dest->IsLocal()) |
| 9348 | { |
| 9349 | lclVarTree = dest->AsLclVarCommon(); |
| 9350 | } |
| 9351 | else |
| 9352 | { |
| 9353 | if (dest->OperIsBlk()) |
| 9354 | { |
| 9355 | destAddr = dest->AsBlk()->Addr(); |
| 9356 | blockWidth = dest->AsBlk()->gtBlkSize; |
| 9357 | } |
| 9358 | else |
| 9359 | { |
| 9360 | assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT)); |
| 9361 | destAddr = dest->gtGetOp1(); |
| 9362 | blockWidth = genTypeSize(dest->TypeGet()); |
| 9363 | } |
| 9364 | } |
| 9365 | if (lclVarTree != nullptr) |
| 9366 | { |
| 9367 | destLclNum = lclVarTree->gtLclNum; |
| 9368 | destLclVar = &lvaTable[destLclNum]; |
| 9369 | blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar); |
| 9370 | blockWidthIsConst = true; |
| 9371 | } |
| 9372 | else |
| 9373 | { |
| 9374 | if (dest->gtOper == GT_DYN_BLK) |
| 9375 | { |
| 9376 | // The size must be an integer type |
| 9377 | blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize; |
| 9378 | assert(varTypeIsIntegral(blockSize->gtType)); |
| 9379 | } |
| 9380 | else |
| 9381 | { |
| 9382 | assert(blockWidth != 0); |
| 9383 | blockWidthIsConst = true; |
| 9384 | } |
| 9385 | |
| 9386 | if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) |
| 9387 | { |
| 9388 | destLclNum = lclVarTree->gtLclNum; |
| 9389 | destLclVar = &lvaTable[destLclNum]; |
| 9390 | } |
| 9391 | } |
| 9392 | if (destLclNum != BAD_VAR_NUM) |
| 9393 | { |
| 9394 | #if LOCAL_ASSERTION_PROP |
| 9395 | // Kill everything about destLclNum (and its field locals) |
| 9396 | if (optLocalAssertionProp) |
| 9397 | { |
| 9398 | if (optAssertionCount > 0) |
| 9399 | { |
| 9400 | fgKillDependentAssertions(destLclNum DEBUGARG(tree)); |
| 9401 | } |
| 9402 | } |
| 9403 | #endif // LOCAL_ASSERTION_PROP |
| 9404 | |
| 9405 | if (destLclVar->lvPromoted && blockWidthIsConst) |
| 9406 | { |
| 9407 | assert(initVal->OperGet() == GT_CNS_INT); |
| 9408 | noway_assert(varTypeIsStruct(destLclVar)); |
| 9409 | noway_assert(!opts.MinOpts()); |
| 9410 | if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles) |
| 9411 | { |
| 9412 | JITDUMP(" dest is address exposed" ); |
| 9413 | } |
| 9414 | else |
| 9415 | { |
| 9416 | if (blockWidth == destLclVar->lvExactSize) |
| 9417 | { |
| 9418 | JITDUMP(" (destDoFldAsg=true)" ); |
| 9419 | // We may decide later that a copyblk is required when this struct has holes |
| 9420 | destDoFldAsg = true; |
| 9421 | } |
| 9422 | else |
| 9423 | { |
| 9424 | JITDUMP(" with mismatched size" ); |
| 9425 | } |
| 9426 | } |
| 9427 | } |
| 9428 | } |
| 9429 | |
| 9430 | // Can we use field by field assignment for the dest? |
| 9431 | if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) |
| 9432 | { |
| 9433 | JITDUMP(" dest contains holes" ); |
| 9434 | destDoFldAsg = false; |
| 9435 | } |
| 9436 | |
| 9437 | JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n" ); |
| 9438 | |
| 9439 | // If we're doing an InitBlock and we've transformed the dest to a non-Blk |
| 9440 | // we need to change it back. |
| 9441 | if (!destDoFldAsg && !dest->OperIsBlk()) |
| 9442 | { |
| 9443 | noway_assert(blockWidth != 0); |
| 9444 | tree->gtOp.gtOp1 = origDest; |
| 9445 | tree->gtType = origDest->gtType; |
| 9446 | } |
| 9447 | |
| 9448 | if (!destDoFldAsg && (destLclVar != nullptr)) |
| 9449 | { |
| 9450 | // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister. |
| 9451 | if (!destLclVar->lvRegStruct) |
| 9452 | { |
| 9453 | // Mark it as DoNotEnregister. |
| 9454 | lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); |
| 9455 | } |
| 9456 | } |
| 9457 | |
| 9458 | // Mark the dest struct as DoNotEnreg |
| 9459 | // when they are LclVar structs and we are using a CopyBlock |
| 9460 | // or the struct is not promoted |
| 9461 | // |
| 9462 | if (!destDoFldAsg) |
| 9463 | { |
| 9464 | dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true); |
| 9465 | tree->gtOp.gtOp1 = dest; |
| 9466 | tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); |
| 9467 | } |
| 9468 | else |
| 9469 | { |
| 9470 | // The initVal must be a constant of TYP_INT |
| 9471 | noway_assert(initVal->OperGet() == GT_CNS_INT); |
| 9472 | noway_assert(genActualType(initVal->gtType) == TYP_INT); |
| 9473 | |
| 9474 | // The dest must be of a struct type. |
| 9475 | noway_assert(varTypeIsStruct(destLclVar)); |
| 9476 | |
| 9477 | // |
| 9478 | // Now, convert InitBlock to individual assignments |
| 9479 | // |
| 9480 | |
| 9481 | tree = nullptr; |
| 9482 | INDEBUG(morphed = true); |
| 9483 | |
| 9484 | GenTree* dest; |
| 9485 | GenTree* srcCopy; |
| 9486 | unsigned fieldLclNum; |
| 9487 | unsigned fieldCnt = destLclVar->lvFieldCnt; |
| 9488 | |
| 9489 | for (unsigned i = 0; i < fieldCnt; ++i) |
| 9490 | { |
| 9491 | fieldLclNum = destLclVar->lvFieldLclStart + i; |
| 9492 | dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); |
| 9493 | |
| 9494 | noway_assert(lclVarTree->gtOper == GT_LCL_VAR); |
| 9495 | // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not. |
| 9496 | dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG)); |
| 9497 | |
| 9498 | srcCopy = gtCloneExpr(initVal); |
| 9499 | noway_assert(srcCopy != nullptr); |
| 9500 | |
| 9501 | // need type of oper to be same as tree |
| 9502 | if (dest->gtType == TYP_LONG) |
| 9503 | { |
| 9504 | srcCopy->ChangeOperConst(GT_CNS_NATIVELONG); |
| 9505 | // copy and extend the value |
| 9506 | srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue()); |
| 9507 | /* Change the types of srcCopy to TYP_LONG */ |
| 9508 | srcCopy->gtType = TYP_LONG; |
| 9509 | } |
| 9510 | else if (varTypeIsFloating(dest->gtType)) |
| 9511 | { |
| 9512 | srcCopy->ChangeOperConst(GT_CNS_DBL); |
| 9513 | // setup the bit pattern |
| 9514 | memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal, |
| 9515 | sizeof(srcCopy->gtDblCon.gtDconVal)); |
| 9516 | /* Change the types of srcCopy to TYP_DOUBLE */ |
| 9517 | srcCopy->gtType = TYP_DOUBLE; |
| 9518 | } |
| 9519 | else |
| 9520 | { |
| 9521 | noway_assert(srcCopy->gtOper == GT_CNS_INT); |
| 9522 | noway_assert(srcCopy->TypeGet() == TYP_INT); |
| 9523 | // setup the bit pattern |
| 9524 | memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal, |
| 9525 | sizeof(srcCopy->gtIntCon.gtIconVal)); |
| 9526 | } |
| 9527 | |
| 9528 | srcCopy->gtType = dest->TypeGet(); |
| 9529 | |
| 9530 | asg = gtNewAssignNode(dest, srcCopy); |
| 9531 | |
| 9532 | #if LOCAL_ASSERTION_PROP |
| 9533 | if (optLocalAssertionProp) |
| 9534 | { |
| 9535 | optAssertionGen(asg); |
| 9536 | } |
| 9537 | #endif // LOCAL_ASSERTION_PROP |
| 9538 | |
| 9539 | if (tree) |
| 9540 | { |
| 9541 | tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); |
| 9542 | } |
| 9543 | else |
| 9544 | { |
| 9545 | tree = asg; |
| 9546 | } |
| 9547 | } |
| 9548 | } |
| 9549 | } |
| 9550 | |
| 9551 | #ifdef DEBUG |
| 9552 | if (morphed) |
| 9553 | { |
| 9554 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 9555 | |
| 9556 | if (verbose) |
| 9557 | { |
| 9558 | printf("fgMorphInitBlock (after):\n" ); |
| 9559 | gtDispTree(tree); |
| 9560 | } |
| 9561 | } |
| 9562 | #endif |
| 9563 | |
| 9564 | return tree; |
| 9565 | } |
| 9566 | |
| 9567 | //------------------------------------------------------------------------ |
| 9568 | // fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type |
| 9569 | // |
| 9570 | // Arguments: |
| 9571 | // tree - the node to be modified. |
| 9572 | // type - the type of indirection to change it to. |
| 9573 | // |
| 9574 | // Return Value: |
| 9575 | // Returns the node, modified in place. |
| 9576 | // |
| 9577 | // Notes: |
| 9578 | // This doesn't really warrant a separate method, but is here to abstract |
| 9579 | // the fact that these nodes can be modified in-place. |
| 9580 | |
| 9581 | GenTree* Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type) |
| 9582 | { |
| 9583 | tree->SetOper(GT_IND); |
| 9584 | tree->gtType = type; |
| 9585 | return tree; |
| 9586 | } |
| 9587 | |
| 9588 | //------------------------------------------------------------------------ |
| 9589 | // fgMorphGetStructAddr: Gets the address of a struct object |
| 9590 | // |
| 9591 | // Arguments: |
| 9592 | // pTree - the parent's pointer to the struct object node |
| 9593 | // clsHnd - the class handle for the struct type |
| 9594 | // isRValue - true if this is a source (not dest) |
| 9595 | // |
| 9596 | // Return Value: |
| 9597 | // Returns the address of the struct value, possibly modifying the existing tree to |
| 9598 | // sink the address below any comma nodes (this is to canonicalize for value numbering). |
| 9599 | // If this is a source, it will morph it to an GT_IND before taking its address, |
| 9600 | // since it may not be remorphed (and we don't want blk nodes as rvalues). |
| 9601 | |
| 9602 | GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue) |
| 9603 | { |
| 9604 | GenTree* addr; |
| 9605 | GenTree* tree = *pTree; |
| 9606 | // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we |
| 9607 | // need to hang onto that for the purposes of value numbering. |
| 9608 | if (tree->OperIsIndir()) |
| 9609 | { |
| 9610 | if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0) |
| 9611 | { |
| 9612 | addr = tree->gtOp.gtOp1; |
| 9613 | } |
| 9614 | else |
| 9615 | { |
| 9616 | if (isRValue && tree->OperIsBlk()) |
| 9617 | { |
| 9618 | tree->ChangeOper(GT_IND); |
| 9619 | } |
| 9620 | addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); |
| 9621 | } |
| 9622 | } |
| 9623 | else if (tree->gtOper == GT_COMMA) |
| 9624 | { |
| 9625 | // If this is a comma, we're going to "sink" the GT_ADDR below it. |
| 9626 | (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue); |
| 9627 | tree->gtType = TYP_BYREF; |
| 9628 | addr = tree; |
| 9629 | } |
| 9630 | else |
| 9631 | { |
| 9632 | switch (tree->gtOper) |
| 9633 | { |
| 9634 | case GT_LCL_FLD: |
| 9635 | case GT_LCL_VAR: |
| 9636 | case GT_INDEX: |
| 9637 | case GT_FIELD: |
| 9638 | case GT_ARR_ELEM: |
| 9639 | addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); |
| 9640 | break; |
| 9641 | case GT_INDEX_ADDR: |
| 9642 | addr = tree; |
| 9643 | break; |
| 9644 | default: |
| 9645 | { |
| 9646 | // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're |
| 9647 | // not going to use "temp" |
| 9648 | GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd); |
| 9649 | addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue); |
| 9650 | break; |
| 9651 | } |
| 9652 | } |
| 9653 | } |
| 9654 | *pTree = addr; |
| 9655 | return addr; |
| 9656 | } |
| 9657 | |
| 9658 | //------------------------------------------------------------------------ |
| 9659 | // fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment |
| 9660 | // |
| 9661 | // Arguments: |
| 9662 | // tree - The struct type node |
| 9663 | // isDest - True if this is the destination of the assignment |
| 9664 | // |
| 9665 | // Return Value: |
| 9666 | // Returns the possibly-morphed node. The caller is responsible for updating |
| 9667 | // the parent of this node.. |
| 9668 | |
| 9669 | GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest) |
| 9670 | { |
| 9671 | GenTree* handleTree = nullptr; |
| 9672 | GenTree* addr = nullptr; |
| 9673 | if (tree->OperIs(GT_COMMA)) |
| 9674 | { |
| 9675 | // In order to CSE and value number array index expressions and bounds checks, |
| 9676 | // the commas in which they are contained need to match. |
| 9677 | // The pattern is that the COMMA should be the address expression. |
| 9678 | // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind. |
| 9679 | // TODO-1stClassStructs: Consider whether this can be improved. |
| 9680 | // Also consider whether some of this can be included in gtNewBlockVal (though note |
| 9681 | // that doing so may cause us to query the type system before we otherwise would). |
| 9682 | // Example: |
| 9683 | // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct |
| 9684 | // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct |
| 9685 | |
| 9686 | addr = tree; |
| 9687 | GenTree* effectiveVal = tree->gtEffectiveVal(); |
| 9688 | |
| 9689 | GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); |
| 9690 | for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) |
| 9691 | { |
| 9692 | commas.Push(comma); |
| 9693 | } |
| 9694 | |
| 9695 | GenTree* lastComma = commas.Top(); |
| 9696 | noway_assert(lastComma->gtGetOp2() == effectiveVal); |
| 9697 | GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); |
| 9698 | #ifdef DEBUG |
| 9699 | effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 9700 | #endif |
| 9701 | lastComma->gtOp.gtOp2 = effectiveValAddr; |
| 9702 | |
| 9703 | while (!commas.Empty()) |
| 9704 | { |
| 9705 | GenTree* comma = commas.Pop(); |
| 9706 | comma->gtType = TYP_BYREF; |
| 9707 | gtUpdateNodeSideEffects(comma); |
| 9708 | } |
| 9709 | |
| 9710 | handleTree = effectiveVal; |
| 9711 | } |
| 9712 | else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR)) |
| 9713 | { |
| 9714 | handleTree = tree; |
| 9715 | addr = tree->AsIndir()->Addr(); |
| 9716 | } |
| 9717 | |
| 9718 | if (addr != nullptr) |
| 9719 | { |
| 9720 | var_types structType = handleTree->TypeGet(); |
| 9721 | if (structType == TYP_STRUCT) |
| 9722 | { |
| 9723 | CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree); |
| 9724 | if (structHnd == NO_CLASS_HANDLE) |
| 9725 | { |
| 9726 | tree = gtNewOperNode(GT_IND, structType, addr); |
| 9727 | } |
| 9728 | else |
| 9729 | { |
| 9730 | tree = gtNewObjNode(structHnd, addr); |
| 9731 | if (tree->OperGet() == GT_OBJ) |
| 9732 | { |
| 9733 | gtSetObjGcInfo(tree->AsObj()); |
| 9734 | } |
| 9735 | } |
| 9736 | } |
| 9737 | else |
| 9738 | { |
| 9739 | tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType)); |
| 9740 | } |
| 9741 | |
| 9742 | gtUpdateNodeSideEffects(tree); |
| 9743 | #ifdef DEBUG |
| 9744 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 9745 | #endif |
| 9746 | } |
| 9747 | |
| 9748 | if (!tree->OperIsBlk()) |
| 9749 | { |
| 9750 | return tree; |
| 9751 | } |
| 9752 | GenTreeBlk* blkNode = tree->AsBlk(); |
| 9753 | if (blkNode->OperGet() == GT_DYN_BLK) |
| 9754 | { |
| 9755 | if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI()) |
| 9756 | { |
| 9757 | unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue(); |
| 9758 | // A GT_BLK with size of zero is not supported, |
| 9759 | // so if we encounter such a thing we just leave it as a GT_DYN_BLK |
| 9760 | if (size != 0) |
| 9761 | { |
| 9762 | blkNode->AsDynBlk()->gtDynamicSize = nullptr; |
| 9763 | blkNode->ChangeOper(GT_BLK); |
| 9764 | blkNode->gtBlkSize = size; |
| 9765 | } |
| 9766 | else |
| 9767 | { |
| 9768 | return tree; |
| 9769 | } |
| 9770 | } |
| 9771 | else |
| 9772 | { |
| 9773 | return tree; |
| 9774 | } |
| 9775 | } |
| 9776 | if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) && |
| 9777 | (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR)) |
| 9778 | { |
| 9779 | GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon(); |
| 9780 | if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode))) |
| 9781 | { |
| 9782 | lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr)); |
| 9783 | } |
| 9784 | } |
| 9785 | |
| 9786 | return tree; |
| 9787 | } |
| 9788 | |
| 9789 | //------------------------------------------------------------------------ |
| 9790 | // fgMorphBlockOperand: Canonicalize an operand of a block assignment |
| 9791 | // |
| 9792 | // Arguments: |
| 9793 | // tree - The block operand |
| 9794 | // asgType - The type of the assignment |
| 9795 | // blockWidth - The size of the block |
| 9796 | // isDest - true iff this is the destination of the assignment |
| 9797 | // |
| 9798 | // Return Value: |
| 9799 | // Returns the morphed block operand |
| 9800 | // |
| 9801 | // Notes: |
| 9802 | // This does the following: |
| 9803 | // - Ensures that a struct operand is a block node or lclVar. |
| 9804 | // - Ensures that any COMMAs are above ADDR nodes. |
| 9805 | // Although 'tree' WAS an operand of a block assignment, the assignment |
| 9806 | // may have been retyped to be a scalar assignment. |
| 9807 | |
| 9808 | GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest) |
| 9809 | { |
| 9810 | GenTree* effectiveVal = tree->gtEffectiveVal(); |
| 9811 | |
| 9812 | if (asgType != TYP_STRUCT) |
| 9813 | { |
| 9814 | if (effectiveVal->OperIsIndir()) |
| 9815 | { |
| 9816 | GenTree* addr = effectiveVal->AsIndir()->Addr(); |
| 9817 | if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType)) |
| 9818 | { |
| 9819 | effectiveVal = addr->gtGetOp1(); |
| 9820 | } |
| 9821 | else if (effectiveVal->OperIsBlk()) |
| 9822 | { |
| 9823 | effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); |
| 9824 | } |
| 9825 | else |
| 9826 | { |
| 9827 | effectiveVal->gtType = asgType; |
| 9828 | } |
| 9829 | } |
| 9830 | else if (effectiveVal->TypeGet() != asgType) |
| 9831 | { |
| 9832 | GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); |
| 9833 | effectiveVal = gtNewIndir(asgType, addr); |
| 9834 | } |
| 9835 | } |
| 9836 | else |
| 9837 | { |
| 9838 | GenTreeIndir* indirTree = nullptr; |
| 9839 | GenTreeLclVarCommon* lclNode = nullptr; |
| 9840 | bool needsIndirection = true; |
| 9841 | |
| 9842 | if (effectiveVal->OperIsIndir()) |
| 9843 | { |
| 9844 | indirTree = effectiveVal->AsIndir(); |
| 9845 | GenTree* addr = effectiveVal->AsIndir()->Addr(); |
| 9846 | if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR)) |
| 9847 | { |
| 9848 | lclNode = addr->gtGetOp1()->AsLclVarCommon(); |
| 9849 | } |
| 9850 | } |
| 9851 | else if (effectiveVal->OperGet() == GT_LCL_VAR) |
| 9852 | { |
| 9853 | lclNode = effectiveVal->AsLclVarCommon(); |
| 9854 | } |
| 9855 | #ifdef FEATURE_SIMD |
| 9856 | if (varTypeIsSIMD(asgType)) |
| 9857 | { |
| 9858 | if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) && |
| 9859 | (indirTree->Addr()->gtGetOp1()->OperIsSIMDorSimdHWintrinsic())) |
| 9860 | { |
| 9861 | assert(!isDest); |
| 9862 | needsIndirection = false; |
| 9863 | effectiveVal = indirTree->Addr()->gtGetOp1(); |
| 9864 | } |
| 9865 | if (effectiveVal->OperIsSIMDorSimdHWintrinsic()) |
| 9866 | { |
| 9867 | needsIndirection = false; |
| 9868 | } |
| 9869 | } |
| 9870 | #endif // FEATURE_SIMD |
| 9871 | if (lclNode != nullptr) |
| 9872 | { |
| 9873 | LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]); |
| 9874 | if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType)) |
| 9875 | { |
| 9876 | if (effectiveVal != lclNode) |
| 9877 | { |
| 9878 | JITDUMP("Replacing block node [%06d] with lclVar V%02u\n" , dspTreeID(tree), lclNode->gtLclNum); |
| 9879 | effectiveVal = lclNode; |
| 9880 | } |
| 9881 | needsIndirection = false; |
| 9882 | } |
| 9883 | else |
| 9884 | { |
| 9885 | // This may be a lclVar that was determined to be address-exposed. |
| 9886 | effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT); |
| 9887 | } |
| 9888 | } |
| 9889 | if (needsIndirection) |
| 9890 | { |
| 9891 | if (indirTree != nullptr) |
| 9892 | { |
| 9893 | // We should never find a struct indirection on the lhs of an assignment. |
| 9894 | assert(!isDest || indirTree->OperIsBlk()); |
| 9895 | if (!isDest && indirTree->OperIsBlk()) |
| 9896 | { |
| 9897 | (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); |
| 9898 | } |
| 9899 | } |
| 9900 | else |
| 9901 | { |
| 9902 | GenTree* newTree; |
| 9903 | GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); |
| 9904 | if (isDest) |
| 9905 | { |
| 9906 | CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal); |
| 9907 | if (clsHnd == NO_CLASS_HANDLE) |
| 9908 | { |
| 9909 | newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth); |
| 9910 | } |
| 9911 | else |
| 9912 | { |
| 9913 | newTree = gtNewObjNode(clsHnd, addr); |
| 9914 | if (isDest && (newTree->OperGet() == GT_OBJ)) |
| 9915 | { |
| 9916 | gtSetObjGcInfo(newTree->AsObj()); |
| 9917 | } |
| 9918 | if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0)) |
| 9919 | { |
| 9920 | // This is not necessarily a global reference, though gtNewObjNode always assumes it is. |
| 9921 | // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor, |
| 9922 | // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled |
| 9923 | // separately now to avoid excess diffs. |
| 9924 | newTree->gtFlags &= ~(GTF_GLOB_EFFECT); |
| 9925 | } |
| 9926 | } |
| 9927 | } |
| 9928 | else |
| 9929 | { |
| 9930 | newTree = gtNewIndir(asgType, addr); |
| 9931 | } |
| 9932 | effectiveVal = newTree; |
| 9933 | } |
| 9934 | } |
| 9935 | } |
| 9936 | tree = effectiveVal; |
| 9937 | return tree; |
| 9938 | } |
| 9939 | |
| 9940 | //------------------------------------------------------------------------ |
| 9941 | // fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk |
| 9942 | // |
| 9943 | // Arguments: |
| 9944 | // dest - the GT_OBJ or GT_STORE_OBJ |
| 9945 | // |
| 9946 | // Assumptions: |
| 9947 | // The destination must be known (by the caller) to be on the stack. |
| 9948 | // |
| 9949 | // Notes: |
| 9950 | // If we have a CopyObj with a dest on the stack, and its size is small enough |
| 9951 | // to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a |
| 9952 | // GC Unsafe CopyBlk that is non-interruptible. |
| 9953 | // This is not supported for the JIT32_GCENCODER, in which case this method is a no-op. |
| 9954 | // |
| 9955 | void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest) |
| 9956 | { |
| 9957 | #if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) |
| 9958 | assert(dest->gtGcPtrCount != 0); |
| 9959 | unsigned blockWidth = dest->AsBlk()->gtBlkSize; |
| 9960 | #ifdef DEBUG |
| 9961 | bool destOnStack = false; |
| 9962 | GenTree* destAddr = dest->Addr(); |
| 9963 | assert(destAddr->IsLocalAddrExpr() != nullptr); |
| 9964 | #endif |
| 9965 | if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT)) |
| 9966 | { |
| 9967 | genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK; |
| 9968 | dest->SetOper(newOper); |
| 9969 | dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block |
| 9970 | } |
| 9971 | #endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) |
| 9972 | } |
| 9973 | |
| 9974 | //------------------------------------------------------------------------ |
| 9975 | // fgMorphCopyBlock: Perform the Morphing of block copy |
| 9976 | // |
| 9977 | // Arguments: |
| 9978 | // tree - a block copy (i.e. an assignment with a block op on the lhs). |
| 9979 | // |
| 9980 | // Return Value: |
| 9981 | // We can return the orginal block copy unmodified (least desirable, but always correct) |
| 9982 | // We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable). |
| 9983 | // If we have performed struct promotion of the Source() or the Dest() then we will try to |
| 9984 | // perform a field by field assignment for each of the promoted struct fields. |
| 9985 | // |
| 9986 | // Assumptions: |
| 9987 | // The child nodes for tree have already been Morphed. |
| 9988 | // |
| 9989 | // Notes: |
| 9990 | // If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest(). |
| 9991 | // When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes |
| 9992 | // and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes. |
| 9993 | // if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we |
| 9994 | // can not use a field by field assignment and must leave the orginal block copy unmodified. |
| 9995 | |
| 9996 | GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) |
| 9997 | { |
| 9998 | noway_assert(tree->OperIsCopyBlkOp()); |
| 9999 | |
| 10000 | JITDUMP("\nfgMorphCopyBlock:" ); |
| 10001 | |
| 10002 | bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0; |
| 10003 | |
| 10004 | GenTree* asg = tree; |
| 10005 | GenTree* rhs = asg->gtGetOp2(); |
| 10006 | GenTree* dest = asg->gtGetOp1(); |
| 10007 | |
| 10008 | #if FEATURE_MULTIREG_RET |
| 10009 | // If this is a multi-reg return, we will not do any morphing of this node. |
| 10010 | if (rhs->IsMultiRegCall()) |
| 10011 | { |
| 10012 | assert(dest->OperGet() == GT_LCL_VAR); |
| 10013 | JITDUMP(" not morphing a multireg call return\n" ); |
| 10014 | return tree; |
| 10015 | } |
| 10016 | #endif // FEATURE_MULTIREG_RET |
| 10017 | |
| 10018 | // If we have an array index on the lhs, we need to create an obj node. |
| 10019 | |
| 10020 | dest = fgMorphBlkNode(dest, true); |
| 10021 | if (dest != asg->gtGetOp1()) |
| 10022 | { |
| 10023 | asg->gtOp.gtOp1 = dest; |
| 10024 | if (dest->IsLocal()) |
| 10025 | { |
| 10026 | dest->gtFlags |= GTF_VAR_DEF; |
| 10027 | } |
| 10028 | } |
| 10029 | asg->gtType = dest->TypeGet(); |
| 10030 | rhs = fgMorphBlkNode(rhs, false); |
| 10031 | |
| 10032 | asg->gtOp.gtOp2 = rhs; |
| 10033 | |
| 10034 | GenTree* oldTree = tree; |
| 10035 | GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); |
| 10036 | |
| 10037 | if (oneAsgTree) |
| 10038 | { |
| 10039 | JITDUMP(" using oneAsgTree.\n" ); |
| 10040 | tree = oneAsgTree; |
| 10041 | } |
| 10042 | else |
| 10043 | { |
| 10044 | unsigned blockWidth; |
| 10045 | bool blockWidthIsConst = false; |
| 10046 | GenTreeLclVarCommon* lclVarTree = nullptr; |
| 10047 | GenTreeLclVarCommon* srcLclVarTree = nullptr; |
| 10048 | unsigned destLclNum = BAD_VAR_NUM; |
| 10049 | LclVarDsc* destLclVar = nullptr; |
| 10050 | FieldSeqNode* destFldSeq = nullptr; |
| 10051 | bool destDoFldAsg = false; |
| 10052 | GenTree* destAddr = nullptr; |
| 10053 | GenTree* srcAddr = nullptr; |
| 10054 | bool destOnStack = false; |
| 10055 | bool hasGCPtrs = false; |
| 10056 | |
| 10057 | JITDUMP("block assignment to morph:\n" ); |
| 10058 | DISPTREE(asg); |
| 10059 | |
| 10060 | if (dest->IsLocal()) |
| 10061 | { |
| 10062 | blockWidthIsConst = true; |
| 10063 | destOnStack = true; |
| 10064 | if (dest->gtOper == GT_LCL_VAR) |
| 10065 | { |
| 10066 | lclVarTree = dest->AsLclVarCommon(); |
| 10067 | destLclNum = lclVarTree->gtLclNum; |
| 10068 | destLclVar = &lvaTable[destLclNum]; |
| 10069 | if (destLclVar->lvType == TYP_STRUCT) |
| 10070 | { |
| 10071 | // It would be nice if lvExactSize always corresponded to the size of the struct, |
| 10072 | // but it doesn't always for the temps that the importer creates when it spills side |
| 10073 | // effects. |
| 10074 | // TODO-Cleanup: Determine when this happens, and whether it can be changed. |
| 10075 | blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle()); |
| 10076 | } |
| 10077 | else |
| 10078 | { |
| 10079 | blockWidth = genTypeSize(destLclVar->lvType); |
| 10080 | } |
| 10081 | hasGCPtrs = destLclVar->lvStructGcCount != 0; |
| 10082 | } |
| 10083 | else |
| 10084 | { |
| 10085 | assert(dest->TypeGet() != TYP_STRUCT); |
| 10086 | assert(dest->gtOper == GT_LCL_FLD); |
| 10087 | blockWidth = genTypeSize(dest->TypeGet()); |
| 10088 | destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); |
| 10089 | destFldSeq = dest->AsLclFld()->gtFieldSeq; |
| 10090 | } |
| 10091 | } |
| 10092 | else |
| 10093 | { |
| 10094 | GenTree* effectiveDest = dest->gtEffectiveVal(); |
| 10095 | if (effectiveDest->OperGet() == GT_IND) |
| 10096 | { |
| 10097 | assert(dest->TypeGet() != TYP_STRUCT); |
| 10098 | blockWidth = genTypeSize(effectiveDest->TypeGet()); |
| 10099 | blockWidthIsConst = true; |
| 10100 | if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) |
| 10101 | { |
| 10102 | destAddr = dest->gtGetOp1(); |
| 10103 | } |
| 10104 | } |
| 10105 | else |
| 10106 | { |
| 10107 | assert(effectiveDest->OperIsBlk()); |
| 10108 | GenTreeBlk* blk = effectiveDest->AsBlk(); |
| 10109 | |
| 10110 | blockWidth = blk->gtBlkSize; |
| 10111 | blockWidthIsConst = (blk->gtOper != GT_DYN_BLK); |
| 10112 | if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) |
| 10113 | { |
| 10114 | destAddr = blk->Addr(); |
| 10115 | } |
| 10116 | } |
| 10117 | if (destAddr != nullptr) |
| 10118 | { |
| 10119 | noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL); |
| 10120 | if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) |
| 10121 | { |
| 10122 | destOnStack = true; |
| 10123 | destLclNum = lclVarTree->gtLclNum; |
| 10124 | destLclVar = &lvaTable[destLclNum]; |
| 10125 | } |
| 10126 | } |
| 10127 | } |
| 10128 | |
| 10129 | if (destLclVar != nullptr) |
| 10130 | { |
| 10131 | #if LOCAL_ASSERTION_PROP |
| 10132 | // Kill everything about destLclNum (and its field locals) |
| 10133 | if (optLocalAssertionProp) |
| 10134 | { |
| 10135 | if (optAssertionCount > 0) |
| 10136 | { |
| 10137 | fgKillDependentAssertions(destLclNum DEBUGARG(tree)); |
| 10138 | } |
| 10139 | } |
| 10140 | #endif // LOCAL_ASSERTION_PROP |
| 10141 | |
| 10142 | if (destLclVar->lvPromoted && blockWidthIsConst) |
| 10143 | { |
| 10144 | noway_assert(varTypeIsStruct(destLclVar)); |
| 10145 | noway_assert(!opts.MinOpts()); |
| 10146 | |
| 10147 | if (blockWidth == destLclVar->lvExactSize) |
| 10148 | { |
| 10149 | JITDUMP(" (destDoFldAsg=true)" ); |
| 10150 | // We may decide later that a copyblk is required when this struct has holes |
| 10151 | destDoFldAsg = true; |
| 10152 | } |
| 10153 | else |
| 10154 | { |
| 10155 | JITDUMP(" with mismatched dest size" ); |
| 10156 | } |
| 10157 | } |
| 10158 | } |
| 10159 | |
| 10160 | FieldSeqNode* srcFldSeq = nullptr; |
| 10161 | unsigned srcLclNum = BAD_VAR_NUM; |
| 10162 | LclVarDsc* srcLclVar = nullptr; |
| 10163 | bool srcDoFldAsg = false; |
| 10164 | |
| 10165 | if (rhs->IsLocal()) |
| 10166 | { |
| 10167 | srcLclVarTree = rhs->AsLclVarCommon(); |
| 10168 | srcLclNum = srcLclVarTree->gtLclNum; |
| 10169 | if (rhs->OperGet() == GT_LCL_FLD) |
| 10170 | { |
| 10171 | srcFldSeq = rhs->AsLclFld()->gtFieldSeq; |
| 10172 | } |
| 10173 | } |
| 10174 | else if (rhs->OperIsIndir()) |
| 10175 | { |
| 10176 | if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq)) |
| 10177 | { |
| 10178 | srcLclNum = srcLclVarTree->gtLclNum; |
| 10179 | } |
| 10180 | else |
| 10181 | { |
| 10182 | srcAddr = rhs->gtOp.gtOp1; |
| 10183 | } |
| 10184 | } |
| 10185 | |
| 10186 | if (srcLclNum != BAD_VAR_NUM) |
| 10187 | { |
| 10188 | srcLclVar = &lvaTable[srcLclNum]; |
| 10189 | |
| 10190 | if (srcLclVar->lvPromoted && blockWidthIsConst) |
| 10191 | { |
| 10192 | noway_assert(varTypeIsStruct(srcLclVar)); |
| 10193 | noway_assert(!opts.MinOpts()); |
| 10194 | |
| 10195 | if (blockWidth == srcLclVar->lvExactSize) |
| 10196 | { |
| 10197 | JITDUMP(" (srcDoFldAsg=true)" ); |
| 10198 | // We may decide later that a copyblk is required when this struct has holes |
| 10199 | srcDoFldAsg = true; |
| 10200 | } |
| 10201 | else |
| 10202 | { |
| 10203 | JITDUMP(" with mismatched src size" ); |
| 10204 | } |
| 10205 | } |
| 10206 | } |
| 10207 | |
| 10208 | // Check to see if we are doing a copy to/from the same local block. |
| 10209 | // If so, morph it to a nop. |
| 10210 | if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq) && |
| 10211 | destFldSeq != FieldSeqStore::NotAField()) |
| 10212 | { |
| 10213 | JITDUMP("Self-copy; replaced with a NOP.\n" ); |
| 10214 | GenTree* nop = gtNewNothingNode(); |
| 10215 | INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); |
| 10216 | return nop; |
| 10217 | } |
| 10218 | |
| 10219 | // Check to see if we are required to do a copy block because the struct contains holes |
| 10220 | // and either the src or dest is externally visible |
| 10221 | // |
| 10222 | bool requiresCopyBlock = false; |
| 10223 | bool srcSingleLclVarAsg = false; |
| 10224 | bool destSingleLclVarAsg = false; |
| 10225 | |
| 10226 | // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock. |
| 10227 | if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct)) |
| 10228 | { |
| 10229 | requiresCopyBlock = true; |
| 10230 | } |
| 10231 | |
| 10232 | // Can we use field by field assignment for the dest? |
| 10233 | if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) |
| 10234 | { |
| 10235 | JITDUMP(" dest contains custom layout and contains holes" ); |
| 10236 | // C++ style CopyBlock with holes |
| 10237 | requiresCopyBlock = true; |
| 10238 | } |
| 10239 | |
| 10240 | // Can we use field by field assignment for the src? |
| 10241 | if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles) |
| 10242 | { |
| 10243 | JITDUMP(" src contains custom layout and contains holes" ); |
| 10244 | // C++ style CopyBlock with holes |
| 10245 | requiresCopyBlock = true; |
| 10246 | } |
| 10247 | |
| 10248 | #if defined(_TARGET_ARM_) |
| 10249 | if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED)) |
| 10250 | { |
| 10251 | JITDUMP(" rhs is unaligned" ); |
| 10252 | requiresCopyBlock = true; |
| 10253 | } |
| 10254 | |
| 10255 | if (asg->gtFlags & GTF_BLK_UNALIGNED) |
| 10256 | { |
| 10257 | JITDUMP(" asg is unaligned" ); |
| 10258 | requiresCopyBlock = true; |
| 10259 | } |
| 10260 | #endif // _TARGET_ARM_ |
| 10261 | |
| 10262 | if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe) |
| 10263 | { |
| 10264 | requiresCopyBlock = true; |
| 10265 | } |
| 10266 | |
| 10267 | // Can't use field by field assignment if the src is a call. |
| 10268 | if (rhs->OperGet() == GT_CALL) |
| 10269 | { |
| 10270 | JITDUMP(" src is a call" ); |
| 10271 | // C++ style CopyBlock with holes |
| 10272 | requiresCopyBlock = true; |
| 10273 | } |
| 10274 | |
| 10275 | // If we passed the above checks, then we will check these two |
| 10276 | if (!requiresCopyBlock) |
| 10277 | { |
| 10278 | // Are both dest and src promoted structs? |
| 10279 | if (destDoFldAsg && srcDoFldAsg) |
| 10280 | { |
| 10281 | // Both structs should be of the same type, or each have a single field of the same type. |
| 10282 | // If not we will use a copy block. |
| 10283 | if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() != |
| 10284 | lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle()) |
| 10285 | { |
| 10286 | unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart; |
| 10287 | unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart; |
| 10288 | if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) || |
| 10289 | (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType)) |
| 10290 | { |
| 10291 | requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock |
| 10292 | JITDUMP(" with mismatched types" ); |
| 10293 | } |
| 10294 | } |
| 10295 | } |
| 10296 | // Are neither dest or src promoted structs? |
| 10297 | else if (!destDoFldAsg && !srcDoFldAsg) |
| 10298 | { |
| 10299 | requiresCopyBlock = true; // Leave as a CopyBlock |
| 10300 | JITDUMP(" with no promoted structs" ); |
| 10301 | } |
| 10302 | else if (destDoFldAsg) |
| 10303 | { |
| 10304 | // Match the following kinds of trees: |
| 10305 | // fgMorphTree BB01, stmt 9 (before) |
| 10306 | // [000052] ------------ const int 8 |
| 10307 | // [000053] -A--G------- copyBlk void |
| 10308 | // [000051] ------------ addr byref |
| 10309 | // [000050] ------------ lclVar long V07 loc5 |
| 10310 | // [000054] --------R--- <list> void |
| 10311 | // [000049] ------------ addr byref |
| 10312 | // [000048] ------------ lclVar struct(P) V06 loc4 |
| 10313 | // long V06.h (offs=0x00) -> V17 tmp9 |
| 10314 | // Yields this transformation |
| 10315 | // fgMorphCopyBlock (after): |
| 10316 | // [000050] ------------ lclVar long V07 loc5 |
| 10317 | // [000085] -A---------- = long |
| 10318 | // [000083] D------N---- lclVar long V17 tmp9 |
| 10319 | // |
| 10320 | if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) && |
| 10321 | (blockWidth == genTypeSize(srcLclVar->TypeGet()))) |
| 10322 | { |
| 10323 | // Reject the following tree: |
| 10324 | // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe |
| 10325 | // |
| 10326 | // fgMorphTree BB01, stmt 6 (before) |
| 10327 | // [000038] ------------- const int 4 |
| 10328 | // [000039] -A--G-------- copyBlk void |
| 10329 | // [000037] ------------- addr byref |
| 10330 | // [000036] ------------- lclVar int V05 loc3 |
| 10331 | // [000040] --------R---- <list> void |
| 10332 | // [000035] ------------- addr byref |
| 10333 | // [000034] ------------- lclVar struct(P) V04 loc2 |
| 10334 | // float V04.f1 (offs=0x00) -> V13 tmp6 |
| 10335 | // As this would framsform into |
| 10336 | // float V13 = int V05 |
| 10337 | // |
| 10338 | unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart; |
| 10339 | var_types destType = lvaTable[fieldLclNum].TypeGet(); |
| 10340 | if (srcLclVar->TypeGet() == destType) |
| 10341 | { |
| 10342 | srcSingleLclVarAsg = true; |
| 10343 | } |
| 10344 | } |
| 10345 | } |
| 10346 | else |
| 10347 | { |
| 10348 | assert(srcDoFldAsg); |
| 10349 | // Check for the symmetric case (which happens for the _pointer field of promoted spans): |
| 10350 | // |
| 10351 | // [000240] -----+------ /--* lclVar struct(P) V18 tmp9 |
| 10352 | // /--* byref V18._value (offs=0x00) -> V30 tmp21 |
| 10353 | // [000245] -A------R--- * = struct (copy) |
| 10354 | // [000244] -----+------ \--* obj(8) struct |
| 10355 | // [000243] -----+------ \--* addr byref |
| 10356 | // [000242] D----+-N---- \--* lclVar byref V28 tmp19 |
| 10357 | // |
| 10358 | if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) && |
| 10359 | (blockWidth == genTypeSize(destLclVar->TypeGet()))) |
| 10360 | { |
| 10361 | // Check for type agreement |
| 10362 | unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart; |
| 10363 | var_types srcType = lvaTable[fieldLclNum].TypeGet(); |
| 10364 | if (destLclVar->TypeGet() == srcType) |
| 10365 | { |
| 10366 | destSingleLclVarAsg = true; |
| 10367 | } |
| 10368 | } |
| 10369 | } |
| 10370 | } |
| 10371 | |
| 10372 | // If we require a copy block the set both of the field assign bools to false |
| 10373 | if (requiresCopyBlock) |
| 10374 | { |
| 10375 | // If a copy block is required then we won't do field by field assignments |
| 10376 | destDoFldAsg = false; |
| 10377 | srcDoFldAsg = false; |
| 10378 | } |
| 10379 | |
| 10380 | JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n" ); |
| 10381 | |
| 10382 | // Mark the dest/src structs as DoNotEnreg when they are not being fully referenced as the same type. |
| 10383 | // |
| 10384 | if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg) |
| 10385 | { |
| 10386 | if (!destLclVar->lvRegStruct || (destLclVar->lvType != dest->TypeGet())) |
| 10387 | { |
| 10388 | // Mark it as DoNotEnregister. |
| 10389 | lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); |
| 10390 | } |
| 10391 | } |
| 10392 | |
| 10393 | if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg) |
| 10394 | { |
| 10395 | if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet())) |
| 10396 | { |
| 10397 | lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp)); |
| 10398 | } |
| 10399 | } |
| 10400 | |
| 10401 | if (requiresCopyBlock) |
| 10402 | { |
| 10403 | var_types asgType = dest->TypeGet(); |
| 10404 | dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/); |
| 10405 | asg->gtOp.gtOp1 = dest; |
| 10406 | asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); |
| 10407 | |
| 10408 | // Note that the unrolling of CopyBlk is only implemented on some platforms. |
| 10409 | // Currently that includes x64 and ARM but not x86: the code generation for this |
| 10410 | // construct requires the ability to mark certain regions of the generated code |
| 10411 | // as non-interruptible, and the GC encoding for the latter platform does not |
| 10412 | // have this capability. |
| 10413 | |
| 10414 | // If we have a CopyObj with a dest on the stack |
| 10415 | // we will convert it into an GC Unsafe CopyBlk that is non-interruptible |
| 10416 | // when its size is small enough to be completely unrolled (i.e. between [16..64] bytes). |
| 10417 | // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.) |
| 10418 | // |
| 10419 | if (destOnStack && (dest->OperGet() == GT_OBJ)) |
| 10420 | { |
| 10421 | fgMorphUnsafeBlk(dest->AsObj()); |
| 10422 | } |
| 10423 | |
| 10424 | // Eliminate the "OBJ or BLK" node on the rhs. |
| 10425 | rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/); |
| 10426 | asg->gtOp.gtOp2 = rhs; |
| 10427 | |
| 10428 | // Formerly, liveness did not consider copyblk arguments of simple types as being |
| 10429 | // a use or def, so these variables were marked as address-exposed. |
| 10430 | // TODO-1stClassStructs: This should no longer be needed. |
| 10431 | if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar)) |
| 10432 | { |
| 10433 | JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n" , srcLclNum); |
| 10434 | lvaTable[srcLclNum].lvAddrExposed = true; |
| 10435 | } |
| 10436 | |
| 10437 | if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar)) |
| 10438 | { |
| 10439 | JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n" , destLclNum); |
| 10440 | lvaTable[destLclNum].lvAddrExposed = true; |
| 10441 | } |
| 10442 | |
| 10443 | goto _Done; |
| 10444 | } |
| 10445 | |
| 10446 | // |
| 10447 | // Otherwise we convert this CopyBlock into individual field by field assignments |
| 10448 | // |
| 10449 | tree = nullptr; |
| 10450 | |
| 10451 | GenTree* src; |
| 10452 | GenTree* addrSpill = nullptr; |
| 10453 | unsigned addrSpillTemp = BAD_VAR_NUM; |
| 10454 | bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame |
| 10455 | |
| 10456 | unsigned fieldCnt = DUMMY_INIT(0); |
| 10457 | |
| 10458 | if (destDoFldAsg && srcDoFldAsg) |
| 10459 | { |
| 10460 | // To do fieldwise assignments for both sides, they'd better be the same struct type! |
| 10461 | // All of these conditions were checked above... |
| 10462 | assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM); |
| 10463 | assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt); |
| 10464 | |
| 10465 | fieldCnt = destLclVar->lvFieldCnt; |
| 10466 | goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field |
| 10467 | // assignments. |
| 10468 | } |
| 10469 | else if (destDoFldAsg) |
| 10470 | { |
| 10471 | fieldCnt = destLclVar->lvFieldCnt; |
| 10472 | rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/); |
| 10473 | if (srcAddr == nullptr) |
| 10474 | { |
| 10475 | srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */); |
| 10476 | } |
| 10477 | } |
| 10478 | else |
| 10479 | { |
| 10480 | assert(srcDoFldAsg); |
| 10481 | fieldCnt = srcLclVar->lvFieldCnt; |
| 10482 | dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/); |
| 10483 | if (dest->OperIsBlk()) |
| 10484 | { |
| 10485 | (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT); |
| 10486 | } |
| 10487 | destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); |
| 10488 | } |
| 10489 | |
| 10490 | if (destDoFldAsg) |
| 10491 | { |
| 10492 | noway_assert(!srcDoFldAsg); |
| 10493 | if (gtClone(srcAddr)) |
| 10494 | { |
| 10495 | // srcAddr is simple expression. No need to spill. |
| 10496 | noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); |
| 10497 | } |
| 10498 | else |
| 10499 | { |
| 10500 | // srcAddr is complex expression. Clone and spill it (unless the destination is |
| 10501 | // a struct local that only has one field, in which case we'd only use the |
| 10502 | // address value once...) |
| 10503 | if (destLclVar->lvFieldCnt > 1) |
| 10504 | { |
| 10505 | addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr' |
| 10506 | noway_assert(addrSpill != nullptr); |
| 10507 | } |
| 10508 | } |
| 10509 | } |
| 10510 | |
| 10511 | if (srcDoFldAsg) |
| 10512 | { |
| 10513 | noway_assert(!destDoFldAsg); |
| 10514 | |
| 10515 | // If we're doing field-wise stores, to an address within a local, and we copy |
| 10516 | // the address into "addrSpill", do *not* declare the original local var node in the |
| 10517 | // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the |
| 10518 | // field-wise assignments as an "indirect" assignment to the local. |
| 10519 | // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before |
| 10520 | // we clone it.) |
| 10521 | if (lclVarTree != nullptr) |
| 10522 | { |
| 10523 | lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG); |
| 10524 | } |
| 10525 | |
| 10526 | if (gtClone(destAddr)) |
| 10527 | { |
| 10528 | // destAddr is simple expression. No need to spill |
| 10529 | noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); |
| 10530 | } |
| 10531 | else |
| 10532 | { |
| 10533 | // destAddr is complex expression. Clone and spill it (unless |
| 10534 | // the source is a struct local that only has one field, in which case we'd only |
| 10535 | // use the address value once...) |
| 10536 | if (srcLclVar->lvFieldCnt > 1) |
| 10537 | { |
| 10538 | addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr' |
| 10539 | noway_assert(addrSpill != nullptr); |
| 10540 | } |
| 10541 | |
| 10542 | // TODO-CQ: this should be based on a more general |
| 10543 | // "BaseAddress" method, that handles fields of structs, before or after |
| 10544 | // morphing. |
| 10545 | if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR) |
| 10546 | { |
| 10547 | if (addrSpill->gtOp.gtOp1->IsLocal()) |
| 10548 | { |
| 10549 | // We will *not* consider this to define the local, but rather have each individual field assign |
| 10550 | // be a definition. |
| 10551 | addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK); |
| 10552 | assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) != |
| 10553 | PROMOTION_TYPE_INDEPENDENT); |
| 10554 | addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our |
| 10555 | // local stack frame |
| 10556 | } |
| 10557 | } |
| 10558 | } |
| 10559 | } |
| 10560 | |
| 10561 | if (addrSpill != nullptr) |
| 10562 | { |
| 10563 | // Spill the (complex) address to a BYREF temp. |
| 10564 | // Note, at most one address may need to be spilled. |
| 10565 | addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local" )); |
| 10566 | |
| 10567 | lvaTable[addrSpillTemp].lvType = TYP_BYREF; |
| 10568 | |
| 10569 | if (addrSpillIsStackDest) |
| 10570 | { |
| 10571 | lvaTable[addrSpillTemp].lvStackByref = true; |
| 10572 | } |
| 10573 | |
| 10574 | tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill); |
| 10575 | |
| 10576 | // If we are assigning the address of a LclVar here |
| 10577 | // liveness does not account for this kind of address taken use. |
| 10578 | // |
| 10579 | // We have to mark this local as address exposed so |
| 10580 | // that we don't delete the definition for this LclVar |
| 10581 | // as a dead store later on. |
| 10582 | // |
| 10583 | if (addrSpill->OperGet() == GT_ADDR) |
| 10584 | { |
| 10585 | GenTree* addrOp = addrSpill->gtOp.gtOp1; |
| 10586 | if (addrOp->IsLocal()) |
| 10587 | { |
| 10588 | unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum; |
| 10589 | lvaTable[lclVarNum].lvAddrExposed = true; |
| 10590 | lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed)); |
| 10591 | } |
| 10592 | } |
| 10593 | } |
| 10594 | |
| 10595 | _AssignFields: |
| 10596 | |
| 10597 | // We may have allocated a temp above, and that may have caused the lvaTable to be expanded. |
| 10598 | // So, beyond this point we cannot rely on the old values of 'srcLclVar' and 'destLclVar'. |
| 10599 | for (unsigned i = 0; i < fieldCnt; ++i) |
| 10600 | { |
| 10601 | FieldSeqNode* curFieldSeq = nullptr; |
| 10602 | if (destDoFldAsg) |
| 10603 | { |
| 10604 | noway_assert(destLclNum != BAD_VAR_NUM); |
| 10605 | unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; |
| 10606 | dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); |
| 10607 | // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not. |
| 10608 | if (destAddr != nullptr) |
| 10609 | { |
| 10610 | noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR); |
| 10611 | dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); |
| 10612 | } |
| 10613 | else |
| 10614 | { |
| 10615 | noway_assert(lclVarTree != nullptr); |
| 10616 | dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); |
| 10617 | } |
| 10618 | // Don't CSE the lhs of an assignment. |
| 10619 | dest->gtFlags |= GTF_DONT_CSE; |
| 10620 | } |
| 10621 | else |
| 10622 | { |
| 10623 | noway_assert(srcDoFldAsg); |
| 10624 | noway_assert(srcLclNum != BAD_VAR_NUM); |
| 10625 | unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; |
| 10626 | |
| 10627 | if (destSingleLclVarAsg) |
| 10628 | { |
| 10629 | noway_assert(fieldCnt == 1); |
| 10630 | noway_assert(destLclVar != nullptr); |
| 10631 | noway_assert(addrSpill == nullptr); |
| 10632 | |
| 10633 | dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet()); |
| 10634 | } |
| 10635 | else |
| 10636 | { |
| 10637 | if (addrSpill) |
| 10638 | { |
| 10639 | assert(addrSpillTemp != BAD_VAR_NUM); |
| 10640 | dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF); |
| 10641 | } |
| 10642 | else |
| 10643 | { |
| 10644 | dest = gtCloneExpr(destAddr); |
| 10645 | noway_assert(dest != nullptr); |
| 10646 | |
| 10647 | // Is the address of a local? |
| 10648 | GenTreeLclVarCommon* lclVarTree = nullptr; |
| 10649 | bool isEntire = false; |
| 10650 | bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr); |
| 10651 | if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire)) |
| 10652 | { |
| 10653 | lclVarTree->gtFlags |= GTF_VAR_DEF; |
| 10654 | if (!isEntire) |
| 10655 | { |
| 10656 | lclVarTree->gtFlags |= GTF_VAR_USEASG; |
| 10657 | } |
| 10658 | } |
| 10659 | } |
| 10660 | |
| 10661 | GenTree* fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL); |
| 10662 | // Have to set the field sequence -- which means we need the field handle. |
| 10663 | CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle(); |
| 10664 | CORINFO_FIELD_HANDLE fieldHnd = |
| 10665 | info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); |
| 10666 | curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); |
| 10667 | fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq; |
| 10668 | |
| 10669 | dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode); |
| 10670 | |
| 10671 | dest = gtNewIndir(lvaTable[fieldLclNum].TypeGet(), dest); |
| 10672 | |
| 10673 | // !!! The destination could be on stack. !!! |
| 10674 | // This flag will let us choose the correct write barrier. |
| 10675 | dest->gtFlags |= GTF_IND_TGTANYWHERE; |
| 10676 | } |
| 10677 | } |
| 10678 | |
| 10679 | if (srcDoFldAsg) |
| 10680 | { |
| 10681 | noway_assert(srcLclNum != BAD_VAR_NUM); |
| 10682 | unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; |
| 10683 | src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); |
| 10684 | |
| 10685 | noway_assert(srcLclVarTree != nullptr); |
| 10686 | src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK; |
| 10687 | // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE, |
| 10688 | // but they are when they are under a GT_ADDR. |
| 10689 | src->gtFlags |= GTF_DONT_CSE; |
| 10690 | } |
| 10691 | else |
| 10692 | { |
| 10693 | noway_assert(destDoFldAsg); |
| 10694 | noway_assert(destLclNum != BAD_VAR_NUM); |
| 10695 | unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; |
| 10696 | |
| 10697 | if (srcSingleLclVarAsg) |
| 10698 | { |
| 10699 | noway_assert(fieldCnt == 1); |
| 10700 | noway_assert(srcLclNum != BAD_VAR_NUM); |
| 10701 | noway_assert(addrSpill == nullptr); |
| 10702 | |
| 10703 | src = gtNewLclvNode(srcLclNum, lvaGetDesc(srcLclNum)->TypeGet()); |
| 10704 | } |
| 10705 | else |
| 10706 | { |
| 10707 | if (addrSpill) |
| 10708 | { |
| 10709 | assert(addrSpillTemp != BAD_VAR_NUM); |
| 10710 | src = gtNewLclvNode(addrSpillTemp, TYP_BYREF); |
| 10711 | } |
| 10712 | else |
| 10713 | { |
| 10714 | src = gtCloneExpr(srcAddr); |
| 10715 | noway_assert(src != nullptr); |
| 10716 | } |
| 10717 | |
| 10718 | CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle(); |
| 10719 | CORINFO_FIELD_HANDLE fieldHnd = |
| 10720 | info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); |
| 10721 | curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); |
| 10722 | var_types destType = lvaGetDesc(fieldLclNum)->lvType; |
| 10723 | |
| 10724 | bool done = false; |
| 10725 | if (lvaGetDesc(fieldLclNum)->lvFldOffset == 0) |
| 10726 | { |
| 10727 | // If this is a full-width use of the src via a different type, we need to create a GT_LCL_FLD. |
| 10728 | // (Note that if it was the same type, 'srcSingleLclVarAsg' would be true.) |
| 10729 | if (srcLclNum != BAD_VAR_NUM) |
| 10730 | { |
| 10731 | noway_assert(srcLclVarTree != nullptr); |
| 10732 | assert(destType != TYP_STRUCT); |
| 10733 | unsigned destSize = genTypeSize(destType); |
| 10734 | srcLclVar = lvaGetDesc(srcLclNum); |
| 10735 | unsigned srcSize = |
| 10736 | (srcLclVar->lvType == TYP_STRUCT) ? srcLclVar->lvExactSize : genTypeSize(srcLclVar); |
| 10737 | if (destSize == srcSize) |
| 10738 | { |
| 10739 | srcLclVarTree->gtFlags |= GTF_VAR_CAST; |
| 10740 | srcLclVarTree->ChangeOper(GT_LCL_FLD); |
| 10741 | srcLclVarTree->gtType = destType; |
| 10742 | srcLclVarTree->AsLclFld()->gtFieldSeq = curFieldSeq; |
| 10743 | src = srcLclVarTree; |
| 10744 | done = true; |
| 10745 | } |
| 10746 | } |
| 10747 | } |
| 10748 | else // if (lvaGetDesc(fieldLclNum)->lvFldOffset != 0) |
| 10749 | { |
| 10750 | src = gtNewOperNode(GT_ADD, TYP_BYREF, src, |
| 10751 | new (this, GT_CNS_INT) |
| 10752 | GenTreeIntCon(TYP_I_IMPL, lvaGetDesc(fieldLclNum)->lvFldOffset, |
| 10753 | curFieldSeq)); |
| 10754 | } |
| 10755 | if (!done) |
| 10756 | { |
| 10757 | src = gtNewIndir(destType, src); |
| 10758 | } |
| 10759 | } |
| 10760 | } |
| 10761 | |
| 10762 | noway_assert(dest->TypeGet() == src->TypeGet()); |
| 10763 | |
| 10764 | asg = gtNewAssignNode(dest, src); |
| 10765 | |
| 10766 | // If we spilled the address, and we didn't do individual field assignments to promoted fields, |
| 10767 | // and it was of a local, ensure that the destination local variable has been marked as address |
| 10768 | // exposed. Neither liveness nor SSA are able to track this kind of indirect assignments. |
| 10769 | if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM) |
| 10770 | { |
| 10771 | noway_assert(lvaGetDesc(destLclNum)->lvAddrExposed); |
| 10772 | } |
| 10773 | |
| 10774 | #if LOCAL_ASSERTION_PROP |
| 10775 | if (optLocalAssertionProp) |
| 10776 | { |
| 10777 | optAssertionGen(asg); |
| 10778 | } |
| 10779 | #endif // LOCAL_ASSERTION_PROP |
| 10780 | |
| 10781 | if (tree) |
| 10782 | { |
| 10783 | tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); |
| 10784 | } |
| 10785 | else |
| 10786 | { |
| 10787 | tree = asg; |
| 10788 | } |
| 10789 | } |
| 10790 | } |
| 10791 | |
| 10792 | if (isLateArg) |
| 10793 | { |
| 10794 | tree->gtFlags |= GTF_LATE_ARG; |
| 10795 | } |
| 10796 | |
| 10797 | #ifdef DEBUG |
| 10798 | if (tree != oldTree) |
| 10799 | { |
| 10800 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 10801 | } |
| 10802 | |
| 10803 | if (verbose) |
| 10804 | { |
| 10805 | printf("\nfgMorphCopyBlock (after):\n" ); |
| 10806 | gtDispTree(tree); |
| 10807 | } |
| 10808 | #endif |
| 10809 | |
| 10810 | _Done: |
| 10811 | return tree; |
| 10812 | } |
| 10813 | |
| 10814 | // insert conversions and normalize to make tree amenable to register |
| 10815 | // FP architectures |
| 10816 | GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree) |
| 10817 | { |
| 10818 | if (tree->OperIsArithmetic()) |
| 10819 | { |
| 10820 | if (varTypeIsFloating(tree)) |
| 10821 | { |
| 10822 | GenTree* op1 = tree->gtOp.gtOp1; |
| 10823 | GenTree* op2 = tree->gtGetOp2(); |
| 10824 | |
| 10825 | assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet())); |
| 10826 | |
| 10827 | if (op1->TypeGet() != tree->TypeGet()) |
| 10828 | { |
| 10829 | tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet()); |
| 10830 | } |
| 10831 | if (op2->TypeGet() != tree->TypeGet()) |
| 10832 | { |
| 10833 | tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet()); |
| 10834 | } |
| 10835 | } |
| 10836 | } |
| 10837 | else if (tree->OperIsCompare()) |
| 10838 | { |
| 10839 | GenTree* op1 = tree->gtOp.gtOp1; |
| 10840 | |
| 10841 | if (varTypeIsFloating(op1)) |
| 10842 | { |
| 10843 | GenTree* op2 = tree->gtGetOp2(); |
| 10844 | assert(varTypeIsFloating(op2)); |
| 10845 | |
| 10846 | if (op1->TypeGet() != op2->TypeGet()) |
| 10847 | { |
| 10848 | // both had better be floating, just one bigger than other |
| 10849 | if (op1->TypeGet() == TYP_FLOAT) |
| 10850 | { |
| 10851 | assert(op2->TypeGet() == TYP_DOUBLE); |
| 10852 | tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); |
| 10853 | } |
| 10854 | else if (op2->TypeGet() == TYP_FLOAT) |
| 10855 | { |
| 10856 | assert(op1->TypeGet() == TYP_DOUBLE); |
| 10857 | tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); |
| 10858 | } |
| 10859 | } |
| 10860 | } |
| 10861 | } |
| 10862 | |
| 10863 | return tree; |
| 10864 | } |
| 10865 | |
| 10866 | //-------------------------------------------------------------------------------------------------------------- |
| 10867 | // fgMorphRecognizeBoxNullable: |
| 10868 | // Recognize this pattern: |
| 10869 | // |
| 10870 | // stmtExpr void (IL 0x000... ???) |
| 10871 | // return int |
| 10872 | // CNS_INT ref null |
| 10873 | // EQ/NE/GT int |
| 10874 | // CALL help ref HELPER.CORINFO_HELP_BOX_NULLABLE |
| 10875 | // CNS_INT(h) long 0x7fed96836c8 class |
| 10876 | // ADDR byref |
| 10877 | // FIELD struct value |
| 10878 | // LCL_VAR ref V00 this |
| 10879 | // |
| 10880 | // which comes from this code: |
| 10881 | // |
| 10882 | // return this.value==null; |
| 10883 | // |
| 10884 | // and transform it into |
| 10885 | // |
| 10886 | // stmtExpr void (IL 0x000... ???) |
| 10887 | // return int |
| 10888 | // CNS_INT ref null |
| 10889 | // EQ/NE/GT int |
| 10890 | // IND bool |
| 10891 | // ADDR byref |
| 10892 | // FIELD struct value |
| 10893 | // LCL_VAR ref V00 this |
| 10894 | // |
| 10895 | // Arguments: |
| 10896 | // compare - Compare tree to optimize. |
| 10897 | // |
| 10898 | // return value: |
| 10899 | // A tree that has a call to CORINFO_HELP_BOX_NULLABLE optimized away if the pattern is found; |
| 10900 | // the original tree otherwise. |
| 10901 | // |
| 10902 | |
| 10903 | GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare) |
| 10904 | { |
| 10905 | GenTree* op1 = compare->gtOp.gtOp1; |
| 10906 | GenTree* op2 = compare->gtOp.gtOp2; |
| 10907 | GenTree* opCns; |
| 10908 | GenTreeCall* opCall; |
| 10909 | |
| 10910 | if (op1->IsCnsIntOrI() && op2->IsHelperCall()) |
| 10911 | { |
| 10912 | opCns = op1; |
| 10913 | opCall = op2->AsCall(); |
| 10914 | } |
| 10915 | else if (op1->IsHelperCall() && op2->IsCnsIntOrI()) |
| 10916 | { |
| 10917 | opCns = op2; |
| 10918 | opCall = op1->AsCall(); |
| 10919 | } |
| 10920 | else |
| 10921 | { |
| 10922 | return compare; |
| 10923 | } |
| 10924 | |
| 10925 | if (!opCns->IsIntegralConst(0)) |
| 10926 | { |
| 10927 | return compare; |
| 10928 | } |
| 10929 | |
| 10930 | if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE) |
| 10931 | { |
| 10932 | return compare; |
| 10933 | } |
| 10934 | |
| 10935 | // Get the nullable struct argument |
| 10936 | GenTree* arg = opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1; |
| 10937 | |
| 10938 | // Check for cases that are unsafe to optimize and return the unchanged tree |
| 10939 | if (arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || ((arg->gtFlags & GTF_LATE_ARG) != 0)) |
| 10940 | { |
| 10941 | return compare; |
| 10942 | } |
| 10943 | |
| 10944 | // Replace the box with an access of the nullable 'hasValue' field which is at the zero offset |
| 10945 | GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, arg); |
| 10946 | |
| 10947 | if (opCall == op1) |
| 10948 | { |
| 10949 | compare->gtOp.gtOp1 = newOp; |
| 10950 | } |
| 10951 | else |
| 10952 | { |
| 10953 | compare->gtOp.gtOp2 = newOp; |
| 10954 | } |
| 10955 | |
| 10956 | opCns->gtType = TYP_INT; |
| 10957 | |
| 10958 | return compare; |
| 10959 | } |
| 10960 | |
| 10961 | #ifdef FEATURE_SIMD |
| 10962 | |
| 10963 | //-------------------------------------------------------------------------------------------------------------- |
| 10964 | // getSIMDStructFromField: |
| 10965 | // Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for |
| 10966 | // the struct node, also base type, field index and simd size. If it is not, just return nullptr. |
| 10967 | // Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we |
| 10968 | // should return nullptr, since in this case we should treat SIMD struct as a regular struct. |
| 10969 | // However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic |
| 10970 | // as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node |
| 10971 | // if the struct is a SIMD struct. |
| 10972 | // |
| 10973 | // Arguments: |
| 10974 | // tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd |
| 10975 | // struct used for simd intrinsic or not. |
| 10976 | // pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut |
| 10977 | // to simd lclvar's base type. |
| 10978 | // indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut |
| 10979 | // equals to the index number of this field. |
| 10980 | // simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut |
| 10981 | // equals to the simd struct size which this tree belongs to. |
| 10982 | // ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore |
| 10983 | // the UsedInSIMDIntrinsic check. |
| 10984 | // |
| 10985 | // return value: |
| 10986 | // A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd |
| 10987 | // instrinic related field, return nullptr. |
| 10988 | // |
| 10989 | |
| 10990 | GenTree* Compiler::getSIMDStructFromField(GenTree* tree, |
| 10991 | var_types* pBaseTypeOut, |
| 10992 | unsigned* indexOut, |
| 10993 | unsigned* simdSizeOut, |
| 10994 | bool ignoreUsedInSIMDIntrinsic /*false*/) |
| 10995 | { |
| 10996 | GenTree* ret = nullptr; |
| 10997 | if (tree->OperGet() == GT_FIELD) |
| 10998 | { |
| 10999 | GenTree* objRef = tree->gtField.gtFldObj; |
| 11000 | if (objRef != nullptr) |
| 11001 | { |
| 11002 | GenTree* obj = nullptr; |
| 11003 | if (objRef->gtOper == GT_ADDR) |
| 11004 | { |
| 11005 | obj = objRef->gtOp.gtOp1; |
| 11006 | } |
| 11007 | else if (ignoreUsedInSIMDIntrinsic) |
| 11008 | { |
| 11009 | obj = objRef; |
| 11010 | } |
| 11011 | else |
| 11012 | { |
| 11013 | return nullptr; |
| 11014 | } |
| 11015 | |
| 11016 | if (isSIMDTypeLocal(obj)) |
| 11017 | { |
| 11018 | unsigned lclNum = obj->gtLclVarCommon.gtLclNum; |
| 11019 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 11020 | if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic) |
| 11021 | { |
| 11022 | *simdSizeOut = varDsc->lvExactSize; |
| 11023 | *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj); |
| 11024 | ret = obj; |
| 11025 | } |
| 11026 | } |
| 11027 | else if (obj->OperGet() == GT_SIMD) |
| 11028 | { |
| 11029 | ret = obj; |
| 11030 | GenTreeSIMD* simdNode = obj->AsSIMD(); |
| 11031 | *simdSizeOut = simdNode->gtSIMDSize; |
| 11032 | *pBaseTypeOut = simdNode->gtSIMDBaseType; |
| 11033 | } |
| 11034 | #ifdef FEATURE_HW_INTRINSICS |
| 11035 | else if (obj->OperIsSimdHWIntrinsic()) |
| 11036 | { |
| 11037 | ret = obj; |
| 11038 | GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic(); |
| 11039 | *simdSizeOut = simdNode->gtSIMDSize; |
| 11040 | *pBaseTypeOut = simdNode->gtSIMDBaseType; |
| 11041 | } |
| 11042 | #endif // FEATURE_HW_INTRINSICS |
| 11043 | } |
| 11044 | } |
| 11045 | if (ret != nullptr) |
| 11046 | { |
| 11047 | unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut); |
| 11048 | *indexOut = tree->gtField.gtFldOffset / BaseTypeSize; |
| 11049 | } |
| 11050 | return ret; |
| 11051 | } |
| 11052 | |
| 11053 | /***************************************************************************** |
| 11054 | * If a read operation tries to access simd struct field, then transform the |
| 11055 | * operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree. |
| 11056 | * Otherwise, return the old tree. |
| 11057 | * Argument: |
| 11058 | * tree - GenTree*. If this pointer points to simd struct which is used for simd |
| 11059 | * intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem. |
| 11060 | * Return: |
| 11061 | * A GenTree* which points to the new tree. If the tree is not for simd intrinsic, |
| 11062 | * return nullptr. |
| 11063 | */ |
| 11064 | |
| 11065 | GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree) |
| 11066 | { |
| 11067 | unsigned index = 0; |
| 11068 | var_types baseType = TYP_UNKNOWN; |
| 11069 | unsigned simdSize = 0; |
| 11070 | GenTree* simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize); |
| 11071 | if (simdStructNode != nullptr) |
| 11072 | { |
| 11073 | assert(simdSize >= ((index + 1) * genTypeSize(baseType))); |
| 11074 | GenTree* op2 = gtNewIconNode(index); |
| 11075 | tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize); |
| 11076 | #ifdef DEBUG |
| 11077 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 11078 | #endif |
| 11079 | } |
| 11080 | return tree; |
| 11081 | } |
| 11082 | |
| 11083 | /***************************************************************************** |
| 11084 | * Transform an assignment of a SIMD struct field to SIMD intrinsic |
| 11085 | * SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment, |
| 11086 | * then return the old tree. |
| 11087 | * Argument: |
| 11088 | * tree - GenTree*. If this pointer points to simd struct which is used for simd |
| 11089 | * intrinsic, we will morph it as simd intrinsic set. |
| 11090 | * Return: |
| 11091 | * A GenTree* which points to the new tree. If the tree is not for simd intrinsic, |
| 11092 | * return nullptr. |
| 11093 | */ |
| 11094 | |
| 11095 | GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree) |
| 11096 | { |
| 11097 | assert(tree->OperGet() == GT_ASG); |
| 11098 | GenTree* op1 = tree->gtGetOp1(); |
| 11099 | GenTree* op2 = tree->gtGetOp2(); |
| 11100 | |
| 11101 | unsigned index = 0; |
| 11102 | var_types baseType = TYP_UNKNOWN; |
| 11103 | unsigned simdSize = 0; |
| 11104 | GenTree* simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize); |
| 11105 | if (simdOp1Struct != nullptr) |
| 11106 | { |
| 11107 | // Generate the simd set intrinsic |
| 11108 | assert(simdSize >= ((index + 1) * genTypeSize(baseType))); |
| 11109 | |
| 11110 | SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid; |
| 11111 | switch (index) |
| 11112 | { |
| 11113 | case 0: |
| 11114 | simdIntrinsicID = SIMDIntrinsicSetX; |
| 11115 | break; |
| 11116 | case 1: |
| 11117 | simdIntrinsicID = SIMDIntrinsicSetY; |
| 11118 | break; |
| 11119 | case 2: |
| 11120 | simdIntrinsicID = SIMDIntrinsicSetZ; |
| 11121 | break; |
| 11122 | case 3: |
| 11123 | simdIntrinsicID = SIMDIntrinsicSetW; |
| 11124 | break; |
| 11125 | default: |
| 11126 | noway_assert(!"There is no set intrinsic for index bigger than 3" ); |
| 11127 | } |
| 11128 | |
| 11129 | GenTree* target = gtClone(simdOp1Struct); |
| 11130 | assert(target != nullptr); |
| 11131 | var_types simdType = target->gtType; |
| 11132 | GenTree* simdTree = gtNewSIMDNode(simdType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize); |
| 11133 | |
| 11134 | tree->gtOp.gtOp1 = target; |
| 11135 | tree->gtOp.gtOp2 = simdTree; |
| 11136 | |
| 11137 | // fgMorphTree has already called fgMorphImplicitByRefArgs() on this assignment, but the source |
| 11138 | // and target have not yet been morphed. |
| 11139 | // Therefore, in case the source and/or target are now implicit byrefs, we need to call it again. |
| 11140 | if (fgMorphImplicitByRefArgs(tree)) |
| 11141 | { |
| 11142 | if (tree->gtGetOp1()->OperIsBlk()) |
| 11143 | { |
| 11144 | assert(tree->gtGetOp1()->TypeGet() == simdType); |
| 11145 | fgMorphBlkToInd(tree->gtGetOp1()->AsBlk(), simdType); |
| 11146 | } |
| 11147 | } |
| 11148 | #ifdef DEBUG |
| 11149 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 11150 | #endif |
| 11151 | } |
| 11152 | |
| 11153 | return tree; |
| 11154 | } |
| 11155 | |
| 11156 | #endif // FEATURE_SIMD |
| 11157 | |
| 11158 | /***************************************************************************** |
| 11159 | * |
| 11160 | * Transform the given GTK_SMPOP tree for code generation. |
| 11161 | */ |
| 11162 | |
| 11163 | #ifdef _PREFAST_ |
| 11164 | #pragma warning(push) |
| 11165 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
| 11166 | #endif |
| 11167 | GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) |
| 11168 | { |
| 11169 | ALLOCA_CHECK(); |
| 11170 | assert(tree->OperKind() & GTK_SMPOP); |
| 11171 | |
| 11172 | /* The steps in this function are : |
| 11173 | o Perform required preorder processing |
| 11174 | o Process the first, then second operand, if any |
| 11175 | o Perform required postorder morphing |
| 11176 | o Perform optional postorder morphing if optimizing |
| 11177 | */ |
| 11178 | |
| 11179 | bool isQmarkColon = false; |
| 11180 | |
| 11181 | #if LOCAL_ASSERTION_PROP |
| 11182 | AssertionIndex origAssertionCount = DUMMY_INIT(0); |
| 11183 | AssertionDsc* origAssertionTab = DUMMY_INIT(NULL); |
| 11184 | |
| 11185 | AssertionIndex thenAssertionCount = DUMMY_INIT(0); |
| 11186 | AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL); |
| 11187 | #endif |
| 11188 | |
| 11189 | if (fgGlobalMorph) |
| 11190 | { |
| 11191 | tree = fgMorphForRegisterFP(tree); |
| 11192 | } |
| 11193 | |
| 11194 | genTreeOps oper = tree->OperGet(); |
| 11195 | var_types typ = tree->TypeGet(); |
| 11196 | GenTree* op1 = tree->gtOp.gtOp1; |
| 11197 | GenTree* op2 = tree->gtGetOp2IfPresent(); |
| 11198 | |
| 11199 | /*------------------------------------------------------------------------- |
| 11200 | * First do any PRE-ORDER processing |
| 11201 | */ |
| 11202 | |
| 11203 | switch (oper) |
| 11204 | { |
| 11205 | // Some arithmetic operators need to use a helper call to the EE |
| 11206 | int helper; |
| 11207 | |
| 11208 | case GT_ASG: |
| 11209 | tree = fgDoNormalizeOnStore(tree); |
| 11210 | /* fgDoNormalizeOnStore can change op2 */ |
| 11211 | noway_assert(op1 == tree->gtOp.gtOp1); |
| 11212 | op2 = tree->gtOp.gtOp2; |
| 11213 | |
| 11214 | #ifdef FEATURE_SIMD |
| 11215 | { |
| 11216 | // We should check whether op2 should be assigned to a SIMD field or not. |
| 11217 | // If it is, we should tranlate the tree to simd intrinsic. |
| 11218 | assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0)); |
| 11219 | GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree); |
| 11220 | typ = tree->TypeGet(); |
| 11221 | op1 = tree->gtGetOp1(); |
| 11222 | op2 = tree->gtGetOp2(); |
| 11223 | #ifdef DEBUG |
| 11224 | assert((tree == newTree) && (tree->OperGet() == oper)); |
| 11225 | if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0) |
| 11226 | { |
| 11227 | tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
| 11228 | } |
| 11229 | #endif // DEBUG |
| 11230 | } |
| 11231 | #endif |
| 11232 | |
| 11233 | // We can't CSE the LHS of an assignment. Only r-values can be CSEed. |
| 11234 | // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former |
| 11235 | // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type) |
| 11236 | // TODO-1stClassStructs: improve this. |
| 11237 | if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) |
| 11238 | { |
| 11239 | op1->gtFlags |= GTF_DONT_CSE; |
| 11240 | } |
| 11241 | break; |
| 11242 | |
| 11243 | case GT_ADDR: |
| 11244 | |
| 11245 | /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ |
| 11246 | op1->gtFlags |= GTF_DONT_CSE; |
| 11247 | break; |
| 11248 | |
| 11249 | case GT_QMARK: |
| 11250 | case GT_JTRUE: |
| 11251 | |
| 11252 | noway_assert(op1); |
| 11253 | |
| 11254 | if (op1->OperKind() & GTK_RELOP) |
| 11255 | { |
| 11256 | noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK)); |
| 11257 | /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does |
| 11258 | not need to materialize the result as a 0 or 1. */ |
| 11259 | |
| 11260 | /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */ |
| 11261 | op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); |
| 11262 | |
| 11263 | // Request that the codegen for op1 sets the condition flags |
| 11264 | // when it generates the code for op1. |
| 11265 | // |
| 11266 | // Codegen for op1 must set the condition flags if |
| 11267 | // this method returns true. |
| 11268 | // |
| 11269 | op1->gtRequestSetFlags(); |
| 11270 | } |
| 11271 | else |
| 11272 | { |
| 11273 | GenTree* effOp1 = op1->gtEffectiveVal(); |
| 11274 | noway_assert((effOp1->gtOper == GT_CNS_INT) && |
| 11275 | (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); |
| 11276 | } |
| 11277 | break; |
| 11278 | |
| 11279 | case GT_COLON: |
| 11280 | #if LOCAL_ASSERTION_PROP |
| 11281 | if (optLocalAssertionProp) |
| 11282 | #endif |
| 11283 | { |
| 11284 | isQmarkColon = true; |
| 11285 | } |
| 11286 | break; |
| 11287 | |
| 11288 | case GT_INDEX: |
| 11289 | return fgMorphArrayIndex(tree); |
| 11290 | |
| 11291 | case GT_CAST: |
| 11292 | return fgMorphCast(tree); |
| 11293 | |
| 11294 | case GT_MUL: |
| 11295 | |
| 11296 | #ifndef _TARGET_64BIT_ |
| 11297 | if (typ == TYP_LONG) |
| 11298 | { |
| 11299 | /* For (long)int1 * (long)int2, we dont actually do the |
| 11300 | casts, and just multiply the 32 bit values, which will |
| 11301 | give us the 64 bit result in edx:eax */ |
| 11302 | |
| 11303 | noway_assert(op2); |
| 11304 | if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST && |
| 11305 | genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) && |
| 11306 | !op1->gtOverflow() && !op2->gtOverflow()) |
| 11307 | { |
| 11308 | // The casts have to be of the same signedness. |
| 11309 | if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED)) |
| 11310 | { |
| 11311 | // We see if we can force an int constant to change its signedness |
| 11312 | GenTree* constOp; |
| 11313 | if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT) |
| 11314 | constOp = op1; |
| 11315 | else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT) |
| 11316 | constOp = op2; |
| 11317 | else |
| 11318 | goto NO_MUL_64RSLT; |
| 11319 | |
| 11320 | if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000))) |
| 11321 | constOp->gtFlags ^= GTF_UNSIGNED; |
| 11322 | else |
| 11323 | goto NO_MUL_64RSLT; |
| 11324 | } |
| 11325 | |
| 11326 | // The only combination that can overflow |
| 11327 | if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED)) |
| 11328 | goto NO_MUL_64RSLT; |
| 11329 | |
| 11330 | /* Remaining combinations can never overflow during long mul. */ |
| 11331 | |
| 11332 | tree->gtFlags &= ~GTF_OVERFLOW; |
| 11333 | |
| 11334 | /* Do unsigned mul only if the casts were unsigned */ |
| 11335 | |
| 11336 | tree->gtFlags &= ~GTF_UNSIGNED; |
| 11337 | tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED; |
| 11338 | |
| 11339 | /* Since we are committing to GTF_MUL_64RSLT, we don't want |
| 11340 | the casts to be folded away. So morph the castees directly */ |
| 11341 | |
| 11342 | op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1); |
| 11343 | op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1); |
| 11344 | |
| 11345 | // Propagate side effect flags up the tree |
| 11346 | op1->gtFlags &= ~GTF_ALL_EFFECT; |
| 11347 | op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
| 11348 | op2->gtFlags &= ~GTF_ALL_EFFECT; |
| 11349 | op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
| 11350 | |
| 11351 | // If the GT_MUL can be altogether folded away, we should do that. |
| 11352 | |
| 11353 | if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) && |
| 11354 | opts.OptEnabled(CLFLG_CONSTANTFOLD)) |
| 11355 | { |
| 11356 | tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1); |
| 11357 | tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2); |
| 11358 | noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST); |
| 11359 | tree = gtFoldExprConst(tree); |
| 11360 | noway_assert(tree->OperIsConst()); |
| 11361 | return tree; |
| 11362 | } |
| 11363 | |
| 11364 | tree->gtFlags |= GTF_MUL_64RSLT; |
| 11365 | |
| 11366 | // If op1 and op2 are unsigned casts, we need to do an unsigned mult |
| 11367 | tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED); |
| 11368 | |
| 11369 | // Insert GT_NOP nodes for the cast operands so that they do not get folded |
| 11370 | // And propagate the new flags. We don't want to CSE the casts because |
| 11371 | // codegen expects GTF_MUL_64RSLT muls to have a certain layout. |
| 11372 | |
| 11373 | if (op1->gtCast.CastOp()->OperGet() != GT_NOP) |
| 11374 | { |
| 11375 | op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp()); |
| 11376 | op1->gtFlags &= ~GTF_ALL_EFFECT; |
| 11377 | op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); |
| 11378 | } |
| 11379 | |
| 11380 | if (op2->gtCast.CastOp()->OperGet() != GT_NOP) |
| 11381 | { |
| 11382 | op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp()); |
| 11383 | op2->gtFlags &= ~GTF_ALL_EFFECT; |
| 11384 | op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); |
| 11385 | } |
| 11386 | |
| 11387 | op1->gtFlags |= GTF_DONT_CSE; |
| 11388 | op2->gtFlags |= GTF_DONT_CSE; |
| 11389 | |
| 11390 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
| 11391 | tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT); |
| 11392 | |
| 11393 | goto DONE_MORPHING_CHILDREN; |
| 11394 | } |
| 11395 | else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0) |
| 11396 | { |
| 11397 | NO_MUL_64RSLT: |
| 11398 | if (tree->gtOverflow()) |
| 11399 | helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF; |
| 11400 | else |
| 11401 | helper = CORINFO_HELP_LMUL; |
| 11402 | |
| 11403 | goto USE_HELPER_FOR_ARITH; |
| 11404 | } |
| 11405 | else |
| 11406 | { |
| 11407 | /* We are seeing this node again. We have decided to use |
| 11408 | GTF_MUL_64RSLT, so leave it alone. */ |
| 11409 | |
| 11410 | assert(tree->gtIsValid64RsltMul()); |
| 11411 | } |
| 11412 | } |
| 11413 | #endif // !_TARGET_64BIT_ |
| 11414 | break; |
| 11415 | |
| 11416 | case GT_DIV: |
| 11417 | |
| 11418 | #ifndef _TARGET_64BIT_ |
| 11419 | if (typ == TYP_LONG) |
| 11420 | { |
| 11421 | helper = CORINFO_HELP_LDIV; |
| 11422 | goto USE_HELPER_FOR_ARITH; |
| 11423 | } |
| 11424 | |
| 11425 | #if USE_HELPERS_FOR_INT_DIV |
| 11426 | if (typ == TYP_INT) |
| 11427 | { |
| 11428 | helper = CORINFO_HELP_DIV; |
| 11429 | goto USE_HELPER_FOR_ARITH; |
| 11430 | } |
| 11431 | #endif |
| 11432 | #endif // !_TARGET_64BIT_ |
| 11433 | |
| 11434 | if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI()) |
| 11435 | { |
| 11436 | op2 = gtFoldExprConst(op2); |
| 11437 | } |
| 11438 | break; |
| 11439 | |
| 11440 | case GT_UDIV: |
| 11441 | |
| 11442 | #ifndef _TARGET_64BIT_ |
| 11443 | if (typ == TYP_LONG) |
| 11444 | { |
| 11445 | helper = CORINFO_HELP_ULDIV; |
| 11446 | goto USE_HELPER_FOR_ARITH; |
| 11447 | } |
| 11448 | #if USE_HELPERS_FOR_INT_DIV |
| 11449 | if (typ == TYP_INT) |
| 11450 | { |
| 11451 | helper = CORINFO_HELP_UDIV; |
| 11452 | goto USE_HELPER_FOR_ARITH; |
| 11453 | } |
| 11454 | #endif |
| 11455 | #endif // _TARGET_64BIT_ |
| 11456 | break; |
| 11457 | |
| 11458 | case GT_MOD: |
| 11459 | |
| 11460 | if (varTypeIsFloating(typ)) |
| 11461 | { |
| 11462 | helper = CORINFO_HELP_DBLREM; |
| 11463 | noway_assert(op2); |
| 11464 | if (op1->TypeGet() == TYP_FLOAT) |
| 11465 | { |
| 11466 | if (op2->TypeGet() == TYP_FLOAT) |
| 11467 | { |
| 11468 | helper = CORINFO_HELP_FLTREM; |
| 11469 | } |
| 11470 | else |
| 11471 | { |
| 11472 | tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); |
| 11473 | } |
| 11474 | } |
| 11475 | else if (op2->TypeGet() == TYP_FLOAT) |
| 11476 | { |
| 11477 | tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); |
| 11478 | } |
| 11479 | goto USE_HELPER_FOR_ARITH; |
| 11480 | } |
| 11481 | |
| 11482 | // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod. |
| 11483 | // A similar optimization for signed mod will not work for a negative perfectly divisible |
| 11484 | // HI-word. To make it correct, we would need to divide without the sign and then flip the |
| 11485 | // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline. |
| 11486 | goto ASSIGN_HELPER_FOR_MOD; |
| 11487 | |
| 11488 | case GT_UMOD: |
| 11489 | |
| 11490 | #ifdef _TARGET_ARMARCH_ |
| 11491 | // |
| 11492 | // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization |
| 11493 | // |
| 11494 | #else // _TARGET_XARCH |
| 11495 | /* If this is an unsigned long mod with op2 which is a cast to long from a |
| 11496 | constant int, then don't morph to a call to the helper. This can be done |
| 11497 | faster inline using idiv. |
| 11498 | */ |
| 11499 | |
| 11500 | noway_assert(op2); |
| 11501 | if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) && |
| 11502 | ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) && |
| 11503 | ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED))) |
| 11504 | { |
| 11505 | if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT && |
| 11506 | op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 && |
| 11507 | op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff && |
| 11508 | (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED)) |
| 11509 | { |
| 11510 | tree->gtOp.gtOp2 = op2 = fgMorphCast(op2); |
| 11511 | noway_assert(op2->gtOper == GT_CNS_NATIVELONG); |
| 11512 | } |
| 11513 | |
| 11514 | if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 && |
| 11515 | op2->gtIntConCommon.LngValue() <= 0x3fffffff) |
| 11516 | { |
| 11517 | tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); |
| 11518 | noway_assert(op1->TypeGet() == TYP_LONG); |
| 11519 | |
| 11520 | // Update flags for op1 morph |
| 11521 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
| 11522 | |
| 11523 | tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant |
| 11524 | |
| 11525 | // If op1 is a constant, then do constant folding of the division operator |
| 11526 | if (op1->gtOper == GT_CNS_NATIVELONG) |
| 11527 | { |
| 11528 | tree = gtFoldExpr(tree); |
| 11529 | } |
| 11530 | return tree; |
| 11531 | } |
| 11532 | } |
| 11533 | #endif // _TARGET_XARCH |
| 11534 | |
| 11535 | ASSIGN_HELPER_FOR_MOD: |
| 11536 | |
| 11537 | // For "val % 1", return 0 if op1 doesn't have any side effects |
| 11538 | // and we are not in the CSE phase, we cannot discard 'tree' |
| 11539 | // because it may contain CSE expressions that we haven't yet examined. |
| 11540 | // |
| 11541 | if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase) |
| 11542 | { |
| 11543 | if (op2->IsIntegralConst(1)) |
| 11544 | { |
| 11545 | GenTree* zeroNode = gtNewZeroConNode(typ); |
| 11546 | #ifdef DEBUG |
| 11547 | zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 11548 | #endif |
| 11549 | DEBUG_DESTROY_NODE(tree); |
| 11550 | return zeroNode; |
| 11551 | } |
| 11552 | } |
| 11553 | |
| 11554 | #ifndef _TARGET_64BIT_ |
| 11555 | if (typ == TYP_LONG) |
| 11556 | { |
| 11557 | helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD; |
| 11558 | goto USE_HELPER_FOR_ARITH; |
| 11559 | } |
| 11560 | |
| 11561 | #if USE_HELPERS_FOR_INT_DIV |
| 11562 | if (typ == TYP_INT) |
| 11563 | { |
| 11564 | if (oper == GT_UMOD) |
| 11565 | { |
| 11566 | helper = CORINFO_HELP_UMOD; |
| 11567 | goto USE_HELPER_FOR_ARITH; |
| 11568 | } |
| 11569 | else if (oper == GT_MOD) |
| 11570 | { |
| 11571 | helper = CORINFO_HELP_MOD; |
| 11572 | goto USE_HELPER_FOR_ARITH; |
| 11573 | } |
| 11574 | } |
| 11575 | #endif |
| 11576 | #endif // !_TARGET_64BIT_ |
| 11577 | |
| 11578 | if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI()) |
| 11579 | { |
| 11580 | op2 = gtFoldExprConst(op2); |
| 11581 | } |
| 11582 | |
| 11583 | #ifdef _TARGET_ARM64_ |
| 11584 | // For ARM64 we don't have a remainder instruction, |
| 11585 | // The architecture manual suggests the following transformation to |
| 11586 | // generate code for such operator: |
| 11587 | // |
| 11588 | // a % b = a - (a / b) * b; |
| 11589 | // |
| 11590 | // TODO: there are special cases where it can be done better, for example |
| 11591 | // when the modulo operation is unsigned and the divisor is a |
| 11592 | // integer constant power of two. In this case, we can make the transform: |
| 11593 | // |
| 11594 | // a % b = a & (b - 1); |
| 11595 | // |
| 11596 | // Lower supports it for all cases except when `a` is constant, but |
| 11597 | // in Morph we can't guarantee that `a` won't be transformed into a constant, |
| 11598 | // so can't guarantee that lower will be able to do this optimization. |
| 11599 | { |
| 11600 | // Do "a % b = a - (a / b) * b" morph always, see TODO before this block. |
| 11601 | bool doMorphModToSubMulDiv = true; |
| 11602 | |
| 11603 | if (doMorphModToSubMulDiv) |
| 11604 | { |
| 11605 | assert(!optValnumCSE_phase); |
| 11606 | |
| 11607 | tree = fgMorphModToSubMulDiv(tree->AsOp()); |
| 11608 | op1 = tree->gtOp.gtOp1; |
| 11609 | op2 = tree->gtOp.gtOp2; |
| 11610 | } |
| 11611 | } |
| 11612 | #else // !_TARGET_ARM64_ |
| 11613 | // If b is not a power of 2 constant then lowering replaces a % b |
| 11614 | // with a - (a / b) * b and applies magic division optimization to |
| 11615 | // a / b. The code may already contain an a / b expression (e.g. |
| 11616 | // x = a / 10; y = a % 10;) and then we end up with redundant code. |
| 11617 | // If we convert % to / here we give CSE the opportunity to eliminate |
| 11618 | // the redundant division. If there's no redundant division then |
| 11619 | // nothing is lost, lowering would have done this transform anyway. |
| 11620 | |
| 11621 | if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())) |
| 11622 | { |
| 11623 | ssize_t divisorValue = op2->AsIntCon()->IconValue(); |
| 11624 | size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) |
| 11625 | : static_cast<size_t>(abs(divisorValue)); |
| 11626 | |
| 11627 | if (!isPow2(absDivisorValue)) |
| 11628 | { |
| 11629 | tree = fgMorphModToSubMulDiv(tree->AsOp()); |
| 11630 | op1 = tree->gtOp.gtOp1; |
| 11631 | op2 = tree->gtOp.gtOp2; |
| 11632 | } |
| 11633 | } |
| 11634 | #endif // !_TARGET_ARM64_ |
| 11635 | break; |
| 11636 | |
| 11637 | USE_HELPER_FOR_ARITH: |
| 11638 | { |
| 11639 | // TODO: this comment is wrong now, do an appropriate fix. |
| 11640 | /* We have to morph these arithmetic operations into helper calls |
| 11641 | before morphing the arguments (preorder), else the arguments |
| 11642 | won't get correct values of fgPtrArgCntCur. |
| 11643 | However, try to fold the tree first in case we end up with a |
| 11644 | simple node which won't need a helper call at all */ |
| 11645 | |
| 11646 | noway_assert(tree->OperIsBinary()); |
| 11647 | |
| 11648 | GenTree* oldTree = tree; |
| 11649 | |
| 11650 | tree = gtFoldExpr(tree); |
| 11651 | |
| 11652 | // Were we able to fold it ? |
| 11653 | // Note that gtFoldExpr may return a non-leaf even if successful |
| 11654 | // e.g. for something like "expr / 1" - see also bug #290853 |
| 11655 | if (tree->OperIsLeaf() || (oldTree != tree)) |
| 11656 | { |
| 11657 | return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree); |
| 11658 | } |
| 11659 | |
| 11660 | // Did we fold it into a comma node with throw? |
| 11661 | if (tree->gtOper == GT_COMMA) |
| 11662 | { |
| 11663 | noway_assert(fgIsCommaThrow(tree)); |
| 11664 | return fgMorphTree(tree); |
| 11665 | } |
| 11666 | } |
| 11667 | return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2)); |
| 11668 | |
| 11669 | case GT_RETURN: |
| 11670 | // normalize small integer return values |
| 11671 | if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) && |
| 11672 | fgCastNeeded(op1, info.compRetType)) |
| 11673 | { |
| 11674 | // Small-typed return values are normalized by the callee |
| 11675 | op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType); |
| 11676 | |
| 11677 | // Propagate GTF_COLON_COND |
| 11678 | op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND); |
| 11679 | |
| 11680 | tree->gtOp.gtOp1 = fgMorphCast(op1); |
| 11681 | |
| 11682 | // Propagate side effect flags |
| 11683 | tree->gtFlags &= ~GTF_ALL_EFFECT; |
| 11684 | tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); |
| 11685 | |
| 11686 | return tree; |
| 11687 | } |
| 11688 | break; |
| 11689 | |
| 11690 | case GT_EQ: |
| 11691 | case GT_NE: |
| 11692 | { |
| 11693 | GenTree* optimizedTree = gtFoldTypeCompare(tree); |
| 11694 | |
| 11695 | if (optimizedTree != tree) |
| 11696 | { |
| 11697 | return fgMorphTree(optimizedTree); |
| 11698 | } |
| 11699 | } |
| 11700 | |
| 11701 | __fallthrough; |
| 11702 | |
| 11703 | case GT_GT: |
| 11704 | |
| 11705 | // Try to optimize away calls to CORINFO_HELP_BOX_NULLABLE for GT_EQ, GT_NE, and unsigned GT_GT. |
| 11706 | if ((oper != GT_GT) || tree->IsUnsigned()) |
| 11707 | { |
| 11708 | fgMorphRecognizeBoxNullable(tree); |
| 11709 | } |
| 11710 | |
| 11711 | op1 = tree->gtOp.gtOp1; |
| 11712 | op2 = tree->gtGetOp2IfPresent(); |
| 11713 | |
| 11714 | break; |
| 11715 | |
| 11716 | case GT_RUNTIMELOOKUP: |
| 11717 | return fgMorphTree(op1); |
| 11718 | |
| 11719 | #ifdef _TARGET_ARM_ |
| 11720 | case GT_INTRINSIC: |
| 11721 | if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) |
| 11722 | { |
| 11723 | switch (tree->TypeGet()) |
| 11724 | { |
| 11725 | case TYP_DOUBLE: |
| 11726 | return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1)); |
| 11727 | case TYP_FLOAT: |
| 11728 | return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1)); |
| 11729 | default: |
| 11730 | unreached(); |
| 11731 | } |
| 11732 | } |
| 11733 | break; |
| 11734 | #endif |
| 11735 | case GT_LIST: |
| 11736 | // Special handling for the arg list. |
| 11737 | return fgMorphArgList(tree->AsArgList(), mac); |
| 11738 | |
| 11739 | default: |
| 11740 | break; |
| 11741 | } |
| 11742 | |
| 11743 | #if !CPU_HAS_FP_SUPPORT |
| 11744 | tree = fgMorphToEmulatedFP(tree); |
| 11745 | #endif |
| 11746 | |
| 11747 | /*------------------------------------------------------------------------- |
| 11748 | * Process the first operand, if any |
| 11749 | */ |
| 11750 | |
| 11751 | if (op1) |
| 11752 | { |
| 11753 | |
| 11754 | #if LOCAL_ASSERTION_PROP |
| 11755 | // If we are entering the "then" part of a Qmark-Colon we must |
| 11756 | // save the state of the current copy assignment table |
| 11757 | // so that we can restore this state when entering the "else" part |
| 11758 | if (isQmarkColon) |
| 11759 | { |
| 11760 | noway_assert(optLocalAssertionProp); |
| 11761 | if (optAssertionCount) |
| 11762 | { |
| 11763 | noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea |
| 11764 | unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); |
| 11765 | origAssertionTab = (AssertionDsc*)ALLOCA(tabSize); |
| 11766 | origAssertionCount = optAssertionCount; |
| 11767 | memcpy(origAssertionTab, optAssertionTabPrivate, tabSize); |
| 11768 | } |
| 11769 | else |
| 11770 | { |
| 11771 | origAssertionCount = 0; |
| 11772 | origAssertionTab = nullptr; |
| 11773 | } |
| 11774 | } |
| 11775 | #endif // LOCAL_ASSERTION_PROP |
| 11776 | |
| 11777 | // We might need a new MorphAddressContext context. (These are used to convey |
| 11778 | // parent context about how addresses being calculated will be used; see the |
| 11779 | // specification comment for MorphAddrContext for full details.) |
| 11780 | // Assume it's an Ind context to start. |
| 11781 | MorphAddrContext subIndMac1(MACK_Ind); |
| 11782 | MorphAddrContext* subMac1 = mac; |
| 11783 | if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind) |
| 11784 | { |
| 11785 | switch (tree->gtOper) |
| 11786 | { |
| 11787 | case GT_ADDR: |
| 11788 | if (subMac1 == nullptr) |
| 11789 | { |
| 11790 | subMac1 = &subIndMac1; |
| 11791 | subMac1->m_kind = MACK_Addr; |
| 11792 | } |
| 11793 | break; |
| 11794 | case GT_COMMA: |
| 11795 | // In a comma, the incoming context only applies to the rightmost arg of the |
| 11796 | // comma list. The left arg (op1) gets a fresh context. |
| 11797 | subMac1 = nullptr; |
| 11798 | break; |
| 11799 | case GT_OBJ: |
| 11800 | case GT_BLK: |
| 11801 | case GT_DYN_BLK: |
| 11802 | case GT_IND: |
| 11803 | subMac1 = &subIndMac1; |
| 11804 | break; |
| 11805 | default: |
| 11806 | break; |
| 11807 | } |
| 11808 | } |
| 11809 | |
| 11810 | // For additions, if we're in an IND context keep track of whether |
| 11811 | // all offsets added to the address are constant, and their sum. |
| 11812 | if (tree->gtOper == GT_ADD && subMac1 != nullptr) |
| 11813 | { |
| 11814 | assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock. |
| 11815 | GenTree* otherOp = tree->gtOp.gtOp2; |
| 11816 | // Is the other operator a constant? |
| 11817 | if (otherOp->IsCnsIntOrI()) |
| 11818 | { |
| 11819 | ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset); |
| 11820 | totalOffset += otherOp->gtIntConCommon.IconValue(); |
| 11821 | if (totalOffset.IsOverflow()) |
| 11822 | { |
| 11823 | // We will consider an offset so large as to overflow as "not a constant" -- |
| 11824 | // we will do a null check. |
| 11825 | subMac1->m_allConstantOffsets = false; |
| 11826 | } |
| 11827 | else |
| 11828 | { |
| 11829 | subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue(); |
| 11830 | } |
| 11831 | } |
| 11832 | else |
| 11833 | { |
| 11834 | subMac1->m_allConstantOffsets = false; |
| 11835 | } |
| 11836 | } |
| 11837 | |
| 11838 | // If gtOp1 is a GT_FIELD, we need to pass down the mac if |
| 11839 | // its parent is GT_ADDR, since the address of the field |
| 11840 | // is part of an ongoing address computation. Otherwise |
| 11841 | // op1 represents the value of the field and so any address |
| 11842 | // calculations it does are in a new context. |
| 11843 | if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR)) |
| 11844 | { |
| 11845 | subMac1 = nullptr; |
| 11846 | |
| 11847 | // The impact of this field's value to any ongoing |
| 11848 | // address computation is handled below when looking |
| 11849 | // at op2. |
| 11850 | } |
| 11851 | |
| 11852 | tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1); |
| 11853 | |
| 11854 | #if LOCAL_ASSERTION_PROP |
| 11855 | // If we are exiting the "then" part of a Qmark-Colon we must |
| 11856 | // save the state of the current copy assignment table |
| 11857 | // so that we can merge this state with the "else" part exit |
| 11858 | if (isQmarkColon) |
| 11859 | { |
| 11860 | noway_assert(optLocalAssertionProp); |
| 11861 | if (optAssertionCount) |
| 11862 | { |
| 11863 | noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea |
| 11864 | unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); |
| 11865 | thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize); |
| 11866 | thenAssertionCount = optAssertionCount; |
| 11867 | memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize); |
| 11868 | } |
| 11869 | else |
| 11870 | { |
| 11871 | thenAssertionCount = 0; |
| 11872 | thenAssertionTab = nullptr; |
| 11873 | } |
| 11874 | } |
| 11875 | #endif // LOCAL_ASSERTION_PROP |
| 11876 | |
| 11877 | /* Morphing along with folding and inlining may have changed the |
| 11878 | * side effect flags, so we have to reset them |
| 11879 | * |
| 11880 | * NOTE: Don't reset the exception flags on nodes that may throw */ |
| 11881 | |
| 11882 | assert(tree->gtOper != GT_CALL); |
| 11883 | |
| 11884 | if (!tree->OperRequiresCallFlag(this)) |
| 11885 | { |
| 11886 | tree->gtFlags &= ~GTF_CALL; |
| 11887 | } |
| 11888 | |
| 11889 | /* Propagate the new flags */ |
| 11890 | tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); |
| 11891 | |
| 11892 | // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does |
| 11893 | // Similarly for clsVar |
| 11894 | if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR)) |
| 11895 | { |
| 11896 | tree->gtFlags &= ~GTF_GLOB_REF; |
| 11897 | } |
| 11898 | } // if (op1) |
| 11899 | |
| 11900 | /*------------------------------------------------------------------------- |
| 11901 | * Process the second operand, if any |
| 11902 | */ |
| 11903 | |
| 11904 | if (op2) |
| 11905 | { |
| 11906 | |
| 11907 | #if LOCAL_ASSERTION_PROP |
| 11908 | // If we are entering the "else" part of a Qmark-Colon we must |
| 11909 | // reset the state of the current copy assignment table |
| 11910 | if (isQmarkColon) |
| 11911 | { |
| 11912 | noway_assert(optLocalAssertionProp); |
| 11913 | optAssertionReset(0); |
| 11914 | if (origAssertionCount) |
| 11915 | { |
| 11916 | size_t tabSize = origAssertionCount * sizeof(AssertionDsc); |
| 11917 | memcpy(optAssertionTabPrivate, origAssertionTab, tabSize); |
| 11918 | optAssertionReset(origAssertionCount); |
| 11919 | } |
| 11920 | } |
| 11921 | #endif // LOCAL_ASSERTION_PROP |
| 11922 | |
| 11923 | // We might need a new MorphAddressContext context to use in evaluating op2. |
| 11924 | // (These are used to convey parent context about how addresses being calculated |
| 11925 | // will be used; see the specification comment for MorphAddrContext for full details.) |
| 11926 | // Assume it's an Ind context to start. |
| 11927 | switch (tree->gtOper) |
| 11928 | { |
| 11929 | case GT_ADD: |
| 11930 | if (mac != nullptr && mac->m_kind == MACK_Ind) |
| 11931 | { |
| 11932 | GenTree* otherOp = tree->gtOp.gtOp1; |
| 11933 | // Is the other operator a constant? |
| 11934 | if (otherOp->IsCnsIntOrI()) |
| 11935 | { |
| 11936 | mac->m_totalOffset += otherOp->gtIntConCommon.IconValue(); |
| 11937 | } |
| 11938 | else |
| 11939 | { |
| 11940 | mac->m_allConstantOffsets = false; |
| 11941 | } |
| 11942 | } |
| 11943 | break; |
| 11944 | default: |
| 11945 | break; |
| 11946 | } |
| 11947 | |
| 11948 | // If gtOp2 is a GT_FIELD, we must be taking its value, |
| 11949 | // so it should evaluate its address in a new context. |
| 11950 | if (op2->gtOper == GT_FIELD) |
| 11951 | { |
| 11952 | // The impact of this field's value to any ongoing |
| 11953 | // address computation is handled above when looking |
| 11954 | // at op1. |
| 11955 | mac = nullptr; |
| 11956 | } |
| 11957 | |
| 11958 | tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac); |
| 11959 | |
| 11960 | /* Propagate the side effect flags from op2 */ |
| 11961 | |
| 11962 | tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT); |
| 11963 | |
| 11964 | #if LOCAL_ASSERTION_PROP |
| 11965 | // If we are exiting the "else" part of a Qmark-Colon we must |
| 11966 | // merge the state of the current copy assignment table with |
| 11967 | // that of the exit of the "then" part. |
| 11968 | if (isQmarkColon) |
| 11969 | { |
| 11970 | noway_assert(optLocalAssertionProp); |
| 11971 | // If either exit table has zero entries then |
| 11972 | // the merged table also has zero entries |
| 11973 | if (optAssertionCount == 0 || thenAssertionCount == 0) |
| 11974 | { |
| 11975 | optAssertionReset(0); |
| 11976 | } |
| 11977 | else |
| 11978 | { |
| 11979 | size_t tabSize = optAssertionCount * sizeof(AssertionDsc); |
| 11980 | if ((optAssertionCount != thenAssertionCount) || |
| 11981 | (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0)) |
| 11982 | { |
| 11983 | // Yes they are different so we have to find the merged set |
| 11984 | // Iterate over the copy asgn table removing any entries |
| 11985 | // that do not have an exact match in the thenAssertionTab |
| 11986 | AssertionIndex index = 1; |
| 11987 | while (index <= optAssertionCount) |
| 11988 | { |
| 11989 | AssertionDsc* curAssertion = optGetAssertion(index); |
| 11990 | |
| 11991 | for (unsigned j = 0; j < thenAssertionCount; j++) |
| 11992 | { |
| 11993 | AssertionDsc* thenAssertion = &thenAssertionTab[j]; |
| 11994 | |
| 11995 | // Do the left sides match? |
| 11996 | if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) && |
| 11997 | (curAssertion->assertionKind == thenAssertion->assertionKind)) |
| 11998 | { |
| 11999 | // Do the right sides match? |
| 12000 | if ((curAssertion->op2.kind == thenAssertion->op2.kind) && |
| 12001 | (curAssertion->op2.lconVal == thenAssertion->op2.lconVal)) |
| 12002 | { |
| 12003 | goto KEEP; |
| 12004 | } |
| 12005 | else |
| 12006 | { |
| 12007 | goto REMOVE; |
| 12008 | } |
| 12009 | } |
| 12010 | } |
| 12011 | // |
| 12012 | // If we fall out of the loop above then we didn't find |
| 12013 | // any matching entry in the thenAssertionTab so it must |
| 12014 | // have been killed on that path so we remove it here |
| 12015 | // |
| 12016 | REMOVE: |
| 12017 | // The data at optAssertionTabPrivate[i] is to be removed |
| 12018 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 12019 | #ifdef DEBUG |
| 12020 | if (verbose) |
| 12021 | { |
| 12022 | printf("The QMARK-COLON " ); |
| 12023 | printTreeID(tree); |
| 12024 | printf(" removes assertion candidate #%d\n" , index); |
| 12025 | } |
| 12026 | #endif |
| 12027 | optAssertionRemove(index); |
| 12028 | continue; |
| 12029 | KEEP: |
| 12030 | // The data at optAssertionTabPrivate[i] is to be kept |
| 12031 | index++; |
| 12032 | } |
| 12033 | } |
| 12034 | } |
| 12035 | } |
| 12036 | #endif // LOCAL_ASSERTION_PROP |
| 12037 | } // if (op2) |
| 12038 | |
| 12039 | DONE_MORPHING_CHILDREN: |
| 12040 | |
| 12041 | if (tree->OperMayThrow(this)) |
| 12042 | { |
| 12043 | // Mark the tree node as potentially throwing an exception |
| 12044 | tree->gtFlags |= GTF_EXCEPT; |
| 12045 | } |
| 12046 | else |
| 12047 | { |
| 12048 | if (tree->OperIsIndirOrArrLength()) |
| 12049 | { |
| 12050 | tree->gtFlags |= GTF_IND_NONFAULTING; |
| 12051 | } |
| 12052 | if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) && |
| 12053 | ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0))) |
| 12054 | { |
| 12055 | tree->gtFlags &= ~GTF_EXCEPT; |
| 12056 | } |
| 12057 | } |
| 12058 | |
| 12059 | if (tree->OperRequiresAsgFlag()) |
| 12060 | { |
| 12061 | tree->gtFlags |= GTF_ASG; |
| 12062 | } |
| 12063 | else |
| 12064 | { |
| 12065 | if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) && |
| 12066 | ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) |
| 12067 | { |
| 12068 | tree->gtFlags &= ~GTF_ASG; |
| 12069 | } |
| 12070 | } |
| 12071 | |
| 12072 | if (tree->OperRequiresCallFlag(this)) |
| 12073 | { |
| 12074 | tree->gtFlags |= GTF_CALL; |
| 12075 | } |
| 12076 | else |
| 12077 | { |
| 12078 | if (((op1 == nullptr) || ((op1->gtFlags & GTF_CALL) == 0)) && |
| 12079 | ((op2 == nullptr) || ((op2->gtFlags & GTF_CALL) == 0))) |
| 12080 | { |
| 12081 | tree->gtFlags &= ~GTF_CALL; |
| 12082 | } |
| 12083 | } |
| 12084 | /*------------------------------------------------------------------------- |
| 12085 | * Now do POST-ORDER processing |
| 12086 | */ |
| 12087 | |
| 12088 | if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet()))) |
| 12089 | { |
| 12090 | // The tree is really not GC but was marked as such. Now that the |
| 12091 | // children have been unmarked, unmark the tree too. |
| 12092 | |
| 12093 | // Remember that GT_COMMA inherits it's type only from op2 |
| 12094 | if (tree->gtOper == GT_COMMA) |
| 12095 | { |
| 12096 | tree->gtType = genActualType(op2->TypeGet()); |
| 12097 | } |
| 12098 | else |
| 12099 | { |
| 12100 | tree->gtType = genActualType(op1->TypeGet()); |
| 12101 | } |
| 12102 | } |
| 12103 | |
| 12104 | GenTree* oldTree = tree; |
| 12105 | |
| 12106 | GenTree* qmarkOp1 = nullptr; |
| 12107 | GenTree* qmarkOp2 = nullptr; |
| 12108 | |
| 12109 | if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON)) |
| 12110 | { |
| 12111 | qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1; |
| 12112 | qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2; |
| 12113 | } |
| 12114 | |
| 12115 | // Try to fold it, maybe we get lucky, |
| 12116 | tree = gtFoldExpr(tree); |
| 12117 | |
| 12118 | if (oldTree != tree) |
| 12119 | { |
| 12120 | /* if gtFoldExpr returned op1 or op2 then we are done */ |
| 12121 | if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2)) |
| 12122 | { |
| 12123 | return tree; |
| 12124 | } |
| 12125 | |
| 12126 | /* If we created a comma-throw tree then we need to morph op1 */ |
| 12127 | if (fgIsCommaThrow(tree)) |
| 12128 | { |
| 12129 | tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1); |
| 12130 | fgMorphTreeDone(tree); |
| 12131 | return tree; |
| 12132 | } |
| 12133 | |
| 12134 | return tree; |
| 12135 | } |
| 12136 | else if (tree->OperKind() & GTK_CONST) |
| 12137 | { |
| 12138 | return tree; |
| 12139 | } |
| 12140 | |
| 12141 | /* gtFoldExpr could have used setOper to change the oper */ |
| 12142 | oper = tree->OperGet(); |
| 12143 | typ = tree->TypeGet(); |
| 12144 | |
| 12145 | /* gtFoldExpr could have changed op1 and op2 */ |
| 12146 | op1 = tree->gtOp.gtOp1; |
| 12147 | op2 = tree->gtGetOp2IfPresent(); |
| 12148 | |
| 12149 | // Do we have an integer compare operation? |
| 12150 | // |
| 12151 | if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet())) |
| 12152 | { |
| 12153 | // Are we comparing against zero? |
| 12154 | // |
| 12155 | if (op2->IsIntegralConst(0)) |
| 12156 | { |
| 12157 | // Request that the codegen for op1 sets the condition flags |
| 12158 | // when it generates the code for op1. |
| 12159 | // |
| 12160 | // Codegen for op1 must set the condition flags if |
| 12161 | // this method returns true. |
| 12162 | // |
| 12163 | op1->gtRequestSetFlags(); |
| 12164 | } |
| 12165 | } |
| 12166 | /*------------------------------------------------------------------------- |
| 12167 | * Perform the required oper-specific postorder morphing |
| 12168 | */ |
| 12169 | |
| 12170 | GenTree* temp; |
| 12171 | GenTree* cns1; |
| 12172 | GenTree* cns2; |
| 12173 | size_t ival1, ival2; |
| 12174 | GenTree* lclVarTree; |
| 12175 | GenTree* effectiveOp1; |
| 12176 | FieldSeqNode* fieldSeq = nullptr; |
| 12177 | |
| 12178 | switch (oper) |
| 12179 | { |
| 12180 | case GT_ASG: |
| 12181 | |
| 12182 | lclVarTree = fgIsIndirOfAddrOfLocal(op1); |
| 12183 | if (lclVarTree != nullptr) |
| 12184 | { |
| 12185 | lclVarTree->gtFlags |= GTF_VAR_DEF; |
| 12186 | } |
| 12187 | |
| 12188 | effectiveOp1 = op1->gtEffectiveVal(); |
| 12189 | |
| 12190 | if (effectiveOp1->OperIsConst()) |
| 12191 | { |
| 12192 | op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1); |
| 12193 | tree->gtOp.gtOp1 = op1; |
| 12194 | } |
| 12195 | |
| 12196 | /* If we are storing a small type, we might be able to omit a cast */ |
| 12197 | if ((effectiveOp1->gtOper == GT_IND) && varTypeIsSmall(effectiveOp1->TypeGet())) |
| 12198 | { |
| 12199 | if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow()) |
| 12200 | { |
| 12201 | var_types castType = op2->CastToType(); |
| 12202 | |
| 12203 | // If we are performing a narrowing cast and |
| 12204 | // castType is larger or the same as op1's type |
| 12205 | // then we can discard the cast. |
| 12206 | |
| 12207 | if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(effectiveOp1->TypeGet()))) |
| 12208 | { |
| 12209 | tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp(); |
| 12210 | } |
| 12211 | } |
| 12212 | else if (op2->OperIsCompare() && varTypeIsByte(effectiveOp1->TypeGet())) |
| 12213 | { |
| 12214 | /* We don't need to zero extend the setcc instruction */ |
| 12215 | op2->gtType = TYP_BYTE; |
| 12216 | } |
| 12217 | } |
| 12218 | // If we introduced a CSE we may need to undo the optimization above |
| 12219 | // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type) |
| 12220 | // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place. |
| 12221 | else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR)) |
| 12222 | { |
| 12223 | unsigned varNum = op1->gtLclVarCommon.gtLclNum; |
| 12224 | LclVarDsc* varDsc = &lvaTable[varNum]; |
| 12225 | |
| 12226 | /* We again need to zero extend the setcc instruction */ |
| 12227 | op2->gtType = varDsc->TypeGet(); |
| 12228 | } |
| 12229 | fgAssignSetVarDef(tree); |
| 12230 | |
| 12231 | /* We can't CSE the LHS of an assignment */ |
| 12232 | /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */ |
| 12233 | if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) |
| 12234 | { |
| 12235 | op1->gtFlags |= GTF_DONT_CSE; |
| 12236 | } |
| 12237 | break; |
| 12238 | |
| 12239 | case GT_EQ: |
| 12240 | case GT_NE: |
| 12241 | |
| 12242 | /* Make sure we're allowed to do this */ |
| 12243 | |
| 12244 | if (optValnumCSE_phase) |
| 12245 | { |
| 12246 | // It is not safe to reorder/delete CSE's |
| 12247 | break; |
| 12248 | } |
| 12249 | |
| 12250 | cns2 = op2; |
| 12251 | |
| 12252 | /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */ |
| 12253 | |
| 12254 | if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0) |
| 12255 | { |
| 12256 | op1 = tree->gtOp.gtOp1; |
| 12257 | |
| 12258 | /* Since this can occur repeatedly we use a while loop */ |
| 12259 | |
| 12260 | while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && |
| 12261 | (op1->gtType == TYP_INT) && (op1->gtOverflow() == false)) |
| 12262 | { |
| 12263 | /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */ |
| 12264 | |
| 12265 | ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal; |
| 12266 | ival2 = cns2->gtIntCon.gtIconVal; |
| 12267 | |
| 12268 | if (op1->gtOper == GT_ADD) |
| 12269 | { |
| 12270 | ival2 -= ival1; |
| 12271 | } |
| 12272 | else |
| 12273 | { |
| 12274 | ival2 += ival1; |
| 12275 | } |
| 12276 | cns2->gtIntCon.gtIconVal = ival2; |
| 12277 | |
| 12278 | #ifdef _TARGET_64BIT_ |
| 12279 | // we need to properly re-sign-extend or truncate as needed. |
| 12280 | cns2->AsIntCon()->TruncateOrSignExtend32(); |
| 12281 | #endif // _TARGET_64BIT_ |
| 12282 | |
| 12283 | op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1; |
| 12284 | } |
| 12285 | } |
| 12286 | |
| 12287 | // |
| 12288 | // Here we look for the following tree |
| 12289 | // |
| 12290 | // EQ/NE |
| 12291 | // / \ |
| 12292 | // op1 CNS 0/1 |
| 12293 | // |
| 12294 | ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1 |
| 12295 | |
| 12296 | // cast to unsigned allows test for both 0 and 1 |
| 12297 | if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U)) |
| 12298 | { |
| 12299 | ival2 = (size_t)cns2->gtIntConCommon.IconValue(); |
| 12300 | } |
| 12301 | else // cast to UINT64 allows test for both 0 and 1 |
| 12302 | if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL)) |
| 12303 | { |
| 12304 | ival2 = (size_t)cns2->gtIntConCommon.LngValue(); |
| 12305 | } |
| 12306 | |
| 12307 | if (ival2 != INT_MAX) |
| 12308 | { |
| 12309 | // If we don't have a comma and relop, we can't do this optimization |
| 12310 | // |
| 12311 | if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare())) |
| 12312 | { |
| 12313 | // Here we look for the following transformation |
| 12314 | // |
| 12315 | // EQ/NE Possible REVERSE(RELOP) |
| 12316 | // / \ / \ |
| 12317 | // COMMA CNS 0/1 -> COMMA relop_op2 |
| 12318 | // / \ / \ |
| 12319 | // x RELOP x relop_op1 |
| 12320 | // / \ |
| 12321 | // relop_op1 relop_op2 |
| 12322 | // |
| 12323 | // |
| 12324 | // |
| 12325 | GenTree* comma = op1; |
| 12326 | GenTree* relop = comma->gtOp.gtOp2; |
| 12327 | |
| 12328 | GenTree* relop_op1 = relop->gtOp.gtOp1; |
| 12329 | |
| 12330 | bool reverse = ((ival2 == 0) == (oper == GT_EQ)); |
| 12331 | |
| 12332 | if (reverse) |
| 12333 | { |
| 12334 | gtReverseCond(relop); |
| 12335 | } |
| 12336 | |
| 12337 | relop->gtOp.gtOp1 = comma; |
| 12338 | comma->gtOp.gtOp2 = relop_op1; |
| 12339 | |
| 12340 | // Comma now has fewer nodes underneath it, so we need to regenerate its flags |
| 12341 | comma->gtFlags &= ~GTF_ALL_EFFECT; |
| 12342 | comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT; |
| 12343 | comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT; |
| 12344 | |
| 12345 | noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0); |
| 12346 | noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0); |
| 12347 | relop->gtFlags |= |
| 12348 | tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT); |
| 12349 | |
| 12350 | return relop; |
| 12351 | } |
| 12352 | |
| 12353 | if (op1->gtOper == GT_COMMA) |
| 12354 | { |
| 12355 | // Here we look for the following tree |
| 12356 | // and when the LCL_VAR is a temp we can fold the tree: |
| 12357 | // |
| 12358 | // EQ/NE EQ/NE |
| 12359 | // / \ / \ |
| 12360 | // COMMA CNS 0/1 -> RELOP CNS 0/1 |
| 12361 | // / \ / \ |
| 12362 | // ASG LCL_VAR |
| 12363 | // / \ |
| 12364 | // LCL_VAR RELOP |
| 12365 | // / \ |
| 12366 | // |
| 12367 | |
| 12368 | GenTree* asg = op1->gtOp.gtOp1; |
| 12369 | GenTree* lcl = op1->gtOp.gtOp2; |
| 12370 | |
| 12371 | /* Make sure that the left side of the comma is the assignment of the LCL_VAR */ |
| 12372 | if (asg->gtOper != GT_ASG) |
| 12373 | { |
| 12374 | goto SKIP; |
| 12375 | } |
| 12376 | |
| 12377 | /* The right side of the comma must be a LCL_VAR temp */ |
| 12378 | if (lcl->gtOper != GT_LCL_VAR) |
| 12379 | { |
| 12380 | goto SKIP; |
| 12381 | } |
| 12382 | |
| 12383 | unsigned lclNum = lcl->gtLclVarCommon.gtLclNum; |
| 12384 | noway_assert(lclNum < lvaCount); |
| 12385 | |
| 12386 | /* If the LCL_VAR is not a temp then bail, a temp has a single def */ |
| 12387 | if (!lvaTable[lclNum].lvIsTemp) |
| 12388 | { |
| 12389 | goto SKIP; |
| 12390 | } |
| 12391 | |
| 12392 | #if FEATURE_ANYCSE |
| 12393 | /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */ |
| 12394 | // Fix 383856 X86/ARM ILGEN |
| 12395 | if (lclNumIsCSE(lclNum)) |
| 12396 | { |
| 12397 | goto SKIP; |
| 12398 | } |
| 12399 | #endif |
| 12400 | |
| 12401 | /* We also must be assigning the result of a RELOP */ |
| 12402 | if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR) |
| 12403 | { |
| 12404 | goto SKIP; |
| 12405 | } |
| 12406 | |
| 12407 | /* Both of the LCL_VAR must match */ |
| 12408 | if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum) |
| 12409 | { |
| 12410 | goto SKIP; |
| 12411 | } |
| 12412 | |
| 12413 | /* If right side of asg is not a RELOP then skip */ |
| 12414 | if (!asg->gtOp.gtOp2->OperIsCompare()) |
| 12415 | { |
| 12416 | goto SKIP; |
| 12417 | } |
| 12418 | |
| 12419 | LclVarDsc* varDsc = lvaTable + lclNum; |
| 12420 | |
| 12421 | /* Set op1 to the right side of asg, (i.e. the RELOP) */ |
| 12422 | op1 = asg->gtOp.gtOp2; |
| 12423 | |
| 12424 | DEBUG_DESTROY_NODE(asg->gtOp.gtOp1); |
| 12425 | DEBUG_DESTROY_NODE(lcl); |
| 12426 | } |
| 12427 | |
| 12428 | if (op1->OperIsCompare()) |
| 12429 | { |
| 12430 | // Here we look for the following tree |
| 12431 | // |
| 12432 | // EQ/NE -> RELOP/!RELOP |
| 12433 | // / \ / \ |
| 12434 | // RELOP CNS 0/1 |
| 12435 | // / \ |
| 12436 | // |
| 12437 | // Note that we will remove/destroy the EQ/NE node and move |
| 12438 | // the RELOP up into it's location. |
| 12439 | |
| 12440 | /* Here we reverse the RELOP if necessary */ |
| 12441 | |
| 12442 | bool reverse = ((ival2 == 0) == (oper == GT_EQ)); |
| 12443 | |
| 12444 | if (reverse) |
| 12445 | { |
| 12446 | gtReverseCond(op1); |
| 12447 | } |
| 12448 | |
| 12449 | /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */ |
| 12450 | op1->gtType = tree->gtType; |
| 12451 | |
| 12452 | noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0); |
| 12453 | op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE); |
| 12454 | |
| 12455 | DEBUG_DESTROY_NODE(tree); |
| 12456 | return op1; |
| 12457 | } |
| 12458 | |
| 12459 | // |
| 12460 | // Now we check for a compare with the result of an '&' operator |
| 12461 | // |
| 12462 | // Here we look for the following transformation: |
| 12463 | // |
| 12464 | // EQ/NE EQ/NE |
| 12465 | // / \ / \ |
| 12466 | // AND CNS 0/1 -> AND CNS 0 |
| 12467 | // / \ / \ |
| 12468 | // RSZ/RSH CNS 1 x CNS (1 << y) |
| 12469 | // / \ |
| 12470 | // x CNS_INT +y |
| 12471 | |
| 12472 | if (op1->gtOper == GT_AND) |
| 12473 | { |
| 12474 | GenTree* andOp = op1; |
| 12475 | GenTree* rshiftOp = andOp->gtOp.gtOp1; |
| 12476 | |
| 12477 | if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH)) |
| 12478 | { |
| 12479 | goto SKIP; |
| 12480 | } |
| 12481 | |
| 12482 | if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI()) |
| 12483 | { |
| 12484 | goto SKIP; |
| 12485 | } |
| 12486 | |
| 12487 | ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal; |
| 12488 | |
| 12489 | if (shiftAmount < 0) |
| 12490 | { |
| 12491 | goto SKIP; |
| 12492 | } |
| 12493 | |
| 12494 | if (!andOp->gtOp.gtOp2->IsIntegralConst(1)) |
| 12495 | { |
| 12496 | goto SKIP; |
| 12497 | } |
| 12498 | |
| 12499 | if (andOp->gtType == TYP_INT) |
| 12500 | { |
| 12501 | if (shiftAmount > 31) |
| 12502 | { |
| 12503 | goto SKIP; |
| 12504 | } |
| 12505 | |
| 12506 | UINT32 newAndOperand = ((UINT32)1) << shiftAmount; |
| 12507 | |
| 12508 | andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand; |
| 12509 | |
| 12510 | // Reverse the cond if necessary |
| 12511 | if (ival2 == 1) |
| 12512 | { |
| 12513 | gtReverseCond(tree); |
| 12514 | cns2->gtIntCon.gtIconVal = 0; |
| 12515 | oper = tree->gtOper; |
| 12516 | } |
| 12517 | } |
| 12518 | else if (andOp->gtType == TYP_LONG) |
| 12519 | { |
| 12520 | if (shiftAmount > 63) |
| 12521 | { |
| 12522 | goto SKIP; |
| 12523 | } |
| 12524 | |
| 12525 | UINT64 newAndOperand = ((UINT64)1) << shiftAmount; |
| 12526 | |
| 12527 | andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand); |
| 12528 | |
| 12529 | // Reverse the cond if necessary |
| 12530 | if (ival2 == 1) |
| 12531 | { |
| 12532 | gtReverseCond(tree); |
| 12533 | cns2->gtIntConCommon.SetLngValue(0); |
| 12534 | oper = tree->gtOper; |
| 12535 | } |
| 12536 | } |
| 12537 | |
| 12538 | andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1; |
| 12539 | |
| 12540 | DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2); |
| 12541 | DEBUG_DESTROY_NODE(rshiftOp); |
| 12542 | } |
| 12543 | } // END if (ival2 != INT_MAX) |
| 12544 | |
| 12545 | SKIP: |
| 12546 | /* Now check for compares with small constant longs that can be cast to int */ |
| 12547 | |
| 12548 | if (!cns2->OperIsConst()) |
| 12549 | { |
| 12550 | goto COMPARE; |
| 12551 | } |
| 12552 | |
| 12553 | if (cns2->TypeGet() != TYP_LONG) |
| 12554 | { |
| 12555 | goto COMPARE; |
| 12556 | } |
| 12557 | |
| 12558 | /* Is the constant 31 bits or smaller? */ |
| 12559 | |
| 12560 | if ((cns2->gtIntConCommon.LngValue() >> 31) != 0) |
| 12561 | { |
| 12562 | goto COMPARE; |
| 12563 | } |
| 12564 | |
| 12565 | /* Is the first comparand mask operation of type long ? */ |
| 12566 | |
| 12567 | if (op1->gtOper != GT_AND) |
| 12568 | { |
| 12569 | /* Another interesting case: cast from int */ |
| 12570 | |
| 12571 | if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT && |
| 12572 | !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate |
| 12573 | !op1->gtOverflow()) // cannot be an overflow checking cast |
| 12574 | { |
| 12575 | /* Simply make this into an integer comparison */ |
| 12576 | |
| 12577 | tree->gtOp.gtOp1 = op1->gtCast.CastOp(); |
| 12578 | tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT); |
| 12579 | } |
| 12580 | |
| 12581 | goto COMPARE; |
| 12582 | } |
| 12583 | |
| 12584 | noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND); |
| 12585 | |
| 12586 | /* Is the result of the mask effectively an INT ? */ |
| 12587 | |
| 12588 | GenTree* andMask; |
| 12589 | andMask = op1->gtOp.gtOp2; |
| 12590 | if (andMask->gtOper != GT_CNS_NATIVELONG) |
| 12591 | { |
| 12592 | goto COMPARE; |
| 12593 | } |
| 12594 | if ((andMask->gtIntConCommon.LngValue() >> 32) != 0) |
| 12595 | { |
| 12596 | goto COMPARE; |
| 12597 | } |
| 12598 | |
| 12599 | /* Now we know that we can cast gtOp.gtOp1 of AND to int */ |
| 12600 | |
| 12601 | op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, false, TYP_INT); |
| 12602 | |
| 12603 | /* now replace the mask node (gtOp.gtOp2 of AND node) */ |
| 12604 | |
| 12605 | noway_assert(andMask == op1->gtOp.gtOp2); |
| 12606 | |
| 12607 | ival1 = (int)andMask->gtIntConCommon.LngValue(); |
| 12608 | andMask->SetOper(GT_CNS_INT); |
| 12609 | andMask->gtType = TYP_INT; |
| 12610 | andMask->gtIntCon.gtIconVal = ival1; |
| 12611 | |
| 12612 | /* now change the type of the AND node */ |
| 12613 | |
| 12614 | op1->gtType = TYP_INT; |
| 12615 | |
| 12616 | /* finally we replace the comparand */ |
| 12617 | |
| 12618 | ival2 = (int)cns2->gtIntConCommon.LngValue(); |
| 12619 | cns2->SetOper(GT_CNS_INT); |
| 12620 | cns2->gtType = TYP_INT; |
| 12621 | |
| 12622 | noway_assert(cns2 == op2); |
| 12623 | cns2->gtIntCon.gtIconVal = ival2; |
| 12624 | |
| 12625 | goto COMPARE; |
| 12626 | |
| 12627 | case GT_LT: |
| 12628 | case GT_LE: |
| 12629 | case GT_GE: |
| 12630 | case GT_GT: |
| 12631 | |
| 12632 | if ((tree->gtFlags & GTF_UNSIGNED) == 0) |
| 12633 | { |
| 12634 | if (op2->gtOper == GT_CNS_INT) |
| 12635 | { |
| 12636 | cns2 = op2; |
| 12637 | /* Check for "expr relop 1" */ |
| 12638 | if (cns2->IsIntegralConst(1)) |
| 12639 | { |
| 12640 | /* Check for "expr >= 1" */ |
| 12641 | if (oper == GT_GE) |
| 12642 | { |
| 12643 | /* Change to "expr > 0" */ |
| 12644 | oper = GT_GT; |
| 12645 | goto SET_OPER; |
| 12646 | } |
| 12647 | /* Check for "expr < 1" */ |
| 12648 | else if (oper == GT_LT) |
| 12649 | { |
| 12650 | /* Change to "expr <= 0" */ |
| 12651 | oper = GT_LE; |
| 12652 | goto SET_OPER; |
| 12653 | } |
| 12654 | } |
| 12655 | /* Check for "expr relop -1" */ |
| 12656 | else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT))) |
| 12657 | { |
| 12658 | /* Check for "expr <= -1" */ |
| 12659 | if (oper == GT_LE) |
| 12660 | { |
| 12661 | /* Change to "expr < 0" */ |
| 12662 | oper = GT_LT; |
| 12663 | goto SET_OPER; |
| 12664 | } |
| 12665 | /* Check for "expr > -1" */ |
| 12666 | else if (oper == GT_GT) |
| 12667 | { |
| 12668 | /* Change to "expr >= 0" */ |
| 12669 | oper = GT_GE; |
| 12670 | |
| 12671 | SET_OPER: |
| 12672 | // IF we get here we should be changing 'oper' |
| 12673 | assert(tree->OperGet() != oper); |
| 12674 | |
| 12675 | // Keep the old ValueNumber for 'tree' as the new expr |
| 12676 | // will still compute the same value as before |
| 12677 | tree->SetOper(oper, GenTree::PRESERVE_VN); |
| 12678 | cns2->gtIntCon.gtIconVal = 0; |
| 12679 | |
| 12680 | // vnStore is null before the ValueNumber phase has run |
| 12681 | if (vnStore != nullptr) |
| 12682 | { |
| 12683 | // Update the ValueNumber for 'cns2', as we just changed it to 0 |
| 12684 | fgValueNumberTreeConst(cns2); |
| 12685 | } |
| 12686 | |
| 12687 | op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2); |
| 12688 | } |
| 12689 | } |
| 12690 | } |
| 12691 | } |
| 12692 | else // we have an unsigned comparison |
| 12693 | { |
| 12694 | if (op2->IsIntegralConst(0)) |
| 12695 | { |
| 12696 | if ((oper == GT_GT) || (oper == GT_LE)) |
| 12697 | { |
| 12698 | // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT |
| 12699 | // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails |
| 12700 | // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0) |
| 12701 | // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes |
| 12702 | // occurs as a result of branch inversion. |
| 12703 | oper = (oper == GT_LE) ? GT_EQ : GT_NE; |
| 12704 | tree->SetOper(oper, GenTree::PRESERVE_VN); |
| 12705 | tree->gtFlags &= ~GTF_UNSIGNED; |
| 12706 | } |
| 12707 | } |
| 12708 | } |
| 12709 | |
| 12710 | COMPARE: |
| 12711 | |
| 12712 | noway_assert(tree->OperKind() & GTK_RELOP); |
| 12713 | break; |
| 12714 | |
| 12715 | case GT_MUL: |
| 12716 | |
| 12717 | #ifndef _TARGET_64BIT_ |
| 12718 | if (typ == TYP_LONG) |
| 12719 | { |
| 12720 | // This must be GTF_MUL_64RSLT |
| 12721 | assert(tree->gtIsValid64RsltMul()); |
| 12722 | return tree; |
| 12723 | } |
| 12724 | #endif // _TARGET_64BIT_ |
| 12725 | goto CM_OVF_OP; |
| 12726 | |
| 12727 | case GT_SUB: |
| 12728 | |
| 12729 | if (tree->gtOverflow()) |
| 12730 | { |
| 12731 | goto CM_OVF_OP; |
| 12732 | } |
| 12733 | |
| 12734 | // TODO #4104: there are a lot of other places where |
| 12735 | // this condition is not checked before transformations. |
| 12736 | if (fgGlobalMorph) |
| 12737 | { |
| 12738 | /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */ |
| 12739 | |
| 12740 | noway_assert(op2); |
| 12741 | if (op2->IsCnsIntOrI()) |
| 12742 | { |
| 12743 | /* Negate the constant and change the node to be "+" */ |
| 12744 | |
| 12745 | op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue()); |
| 12746 | oper = GT_ADD; |
| 12747 | tree->ChangeOper(oper); |
| 12748 | goto CM_ADD_OP; |
| 12749 | } |
| 12750 | |
| 12751 | /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */ |
| 12752 | |
| 12753 | noway_assert(op1); |
| 12754 | if (op1->IsCnsIntOrI()) |
| 12755 | { |
| 12756 | noway_assert(varTypeIsIntOrI(tree)); |
| 12757 | |
| 12758 | // The type of the new GT_NEG node cannot just be op2->TypeGet(). |
| 12759 | // Otherwise we may sign-extend incorrectly in cases where the GT_NEG |
| 12760 | // node ends up feeding directly into a cast, for example in |
| 12761 | // GT_CAST<ubyte>(GT_SUB(0, s_1.ubyte)) |
| 12762 | tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2); |
| 12763 | fgMorphTreeDone(op2); |
| 12764 | |
| 12765 | oper = GT_ADD; |
| 12766 | tree->ChangeOper(oper); |
| 12767 | goto CM_ADD_OP; |
| 12768 | } |
| 12769 | |
| 12770 | /* No match - exit */ |
| 12771 | } |
| 12772 | break; |
| 12773 | |
| 12774 | #ifdef _TARGET_ARM64_ |
| 12775 | case GT_DIV: |
| 12776 | if (!varTypeIsFloating(tree->gtType)) |
| 12777 | { |
| 12778 | // Codegen for this instruction needs to be able to throw two exceptions: |
| 12779 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); |
| 12780 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); |
| 12781 | } |
| 12782 | break; |
| 12783 | case GT_UDIV: |
| 12784 | // Codegen for this instruction needs to be able to throw one exception: |
| 12785 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); |
| 12786 | break; |
| 12787 | #endif |
| 12788 | |
| 12789 | case GT_ADD: |
| 12790 | |
| 12791 | CM_OVF_OP: |
| 12792 | if (tree->gtOverflow()) |
| 12793 | { |
| 12794 | tree->gtRequestSetFlags(); |
| 12795 | |
| 12796 | // Add the excptn-throwing basic block to jump to on overflow |
| 12797 | |
| 12798 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); |
| 12799 | |
| 12800 | // We can't do any commutative morphing for overflow instructions |
| 12801 | |
| 12802 | break; |
| 12803 | } |
| 12804 | |
| 12805 | CM_ADD_OP: |
| 12806 | |
| 12807 | case GT_OR: |
| 12808 | case GT_XOR: |
| 12809 | case GT_AND: |
| 12810 | |
| 12811 | /* Commute any non-REF constants to the right */ |
| 12812 | |
| 12813 | noway_assert(op1); |
| 12814 | if (op1->OperIsConst() && (op1->gtType != TYP_REF)) |
| 12815 | { |
| 12816 | // TODO-Review: We used to assert here that |
| 12817 | // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD)); |
| 12818 | // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation, |
| 12819 | // and would sometimes hit this assertion. This may indicate a missed "remorph". |
| 12820 | // Task is to re-enable this assertion and investigate. |
| 12821 | |
| 12822 | /* Swap the operands */ |
| 12823 | tree->gtOp.gtOp1 = op2; |
| 12824 | tree->gtOp.gtOp2 = op1; |
| 12825 | |
| 12826 | op1 = op2; |
| 12827 | op2 = tree->gtOp.gtOp2; |
| 12828 | } |
| 12829 | |
| 12830 | /* See if we can fold GT_ADD nodes. */ |
| 12831 | |
| 12832 | if (oper == GT_ADD) |
| 12833 | { |
| 12834 | /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */ |
| 12835 | |
| 12836 | if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) && |
| 12837 | op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT && |
| 12838 | !op1->gtOverflow() && !op2->gtOverflow()) |
| 12839 | { |
| 12840 | cns1 = op1->gtOp.gtOp2; |
| 12841 | cns2 = op2->gtOp.gtOp2; |
| 12842 | cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal; |
| 12843 | #ifdef _TARGET_64BIT_ |
| 12844 | if (cns1->TypeGet() == TYP_INT) |
| 12845 | { |
| 12846 | // we need to properly re-sign-extend or truncate after adding two int constants above |
| 12847 | cns1->AsIntCon()->TruncateOrSignExtend32(); |
| 12848 | } |
| 12849 | #endif //_TARGET_64BIT_ |
| 12850 | |
| 12851 | tree->gtOp.gtOp2 = cns1; |
| 12852 | DEBUG_DESTROY_NODE(cns2); |
| 12853 | |
| 12854 | op1->gtOp.gtOp2 = op2->gtOp.gtOp1; |
| 12855 | op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT); |
| 12856 | DEBUG_DESTROY_NODE(op2); |
| 12857 | op2 = tree->gtOp.gtOp2; |
| 12858 | } |
| 12859 | |
| 12860 | if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ)) |
| 12861 | { |
| 12862 | /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */ |
| 12863 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 12864 | |
| 12865 | if (op1->gtOper == GT_ADD && // |
| 12866 | !gtIsActiveCSE_Candidate(op1) && // |
| 12867 | !op1->gtOverflow() && // |
| 12868 | op1->gtOp.gtOp2->IsCnsIntOrI() && // |
| 12869 | (op1->gtOp.gtOp2->OperGet() == op2->OperGet()) && // |
| 12870 | (op1->gtOp.gtOp2->TypeGet() != TYP_REF) && // Don't fold REFs |
| 12871 | (op2->TypeGet() != TYP_REF)) // Don't fold REFs |
| 12872 | { |
| 12873 | cns1 = op1->gtOp.gtOp2; |
| 12874 | op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() + |
| 12875 | op2->gtIntConCommon.IconValue()); |
| 12876 | #ifdef _TARGET_64BIT_ |
| 12877 | if (op2->TypeGet() == TYP_INT) |
| 12878 | { |
| 12879 | // we need to properly re-sign-extend or truncate after adding two int constants above |
| 12880 | op2->AsIntCon()->TruncateOrSignExtend32(); |
| 12881 | } |
| 12882 | #endif //_TARGET_64BIT_ |
| 12883 | |
| 12884 | if (cns1->OperGet() == GT_CNS_INT) |
| 12885 | { |
| 12886 | op2->gtIntCon.gtFieldSeq = |
| 12887 | GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq); |
| 12888 | } |
| 12889 | DEBUG_DESTROY_NODE(cns1); |
| 12890 | |
| 12891 | tree->gtOp.gtOp1 = op1->gtOp.gtOp1; |
| 12892 | DEBUG_DESTROY_NODE(op1); |
| 12893 | op1 = tree->gtOp.gtOp1; |
| 12894 | } |
| 12895 | |
| 12896 | // Fold (x + 0). |
| 12897 | |
| 12898 | if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree)) |
| 12899 | { |
| 12900 | |
| 12901 | // If this addition is adding an offset to a null pointer, |
| 12902 | // avoid the work and yield the null pointer immediately. |
| 12903 | // Dereferencing the pointer in either case will have the |
| 12904 | // same effect. |
| 12905 | |
| 12906 | if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) && |
| 12907 | ((op1->gtFlags & GTF_ALL_EFFECT) == 0)) |
| 12908 | { |
| 12909 | op2->gtType = tree->gtType; |
| 12910 | DEBUG_DESTROY_NODE(op1); |
| 12911 | DEBUG_DESTROY_NODE(tree); |
| 12912 | return op2; |
| 12913 | } |
| 12914 | |
| 12915 | // Remove the addition iff it won't change the tree type |
| 12916 | // to TYP_REF. |
| 12917 | |
| 12918 | if (!gtIsActiveCSE_Candidate(op2) && |
| 12919 | ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF))) |
| 12920 | { |
| 12921 | if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) && |
| 12922 | (op2->gtIntCon.gtFieldSeq != nullptr) && |
| 12923 | (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField())) |
| 12924 | { |
| 12925 | fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq); |
| 12926 | } |
| 12927 | |
| 12928 | DEBUG_DESTROY_NODE(op2); |
| 12929 | DEBUG_DESTROY_NODE(tree); |
| 12930 | |
| 12931 | return op1; |
| 12932 | } |
| 12933 | } |
| 12934 | } |
| 12935 | } |
| 12936 | /* See if we can fold GT_MUL by const nodes */ |
| 12937 | else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase) |
| 12938 | { |
| 12939 | #ifndef _TARGET_64BIT_ |
| 12940 | noway_assert(typ <= TYP_UINT); |
| 12941 | #endif // _TARGET_64BIT_ |
| 12942 | noway_assert(!tree->gtOverflow()); |
| 12943 | |
| 12944 | ssize_t mult = op2->gtIntConCommon.IconValue(); |
| 12945 | bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr && |
| 12946 | op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq(); |
| 12947 | |
| 12948 | assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr); |
| 12949 | |
| 12950 | if (mult == 0) |
| 12951 | { |
| 12952 | // We may be able to throw away op1 (unless it has side-effects) |
| 12953 | |
| 12954 | if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0) |
| 12955 | { |
| 12956 | DEBUG_DESTROY_NODE(op1); |
| 12957 | DEBUG_DESTROY_NODE(tree); |
| 12958 | return op2; // Just return the "0" node |
| 12959 | } |
| 12960 | |
| 12961 | // We need to keep op1 for the side-effects. Hang it off |
| 12962 | // a GT_COMMA node |
| 12963 | |
| 12964 | tree->ChangeOper(GT_COMMA); |
| 12965 | return tree; |
| 12966 | } |
| 12967 | |
| 12968 | size_t abs_mult = (mult >= 0) ? mult : -mult; |
| 12969 | size_t lowestBit = genFindLowestBit(abs_mult); |
| 12970 | bool changeToShift = false; |
| 12971 | |
| 12972 | // is it a power of two? (positive or negative) |
| 12973 | if (abs_mult == lowestBit) |
| 12974 | { |
| 12975 | // if negative negate (min-int does not need negation) |
| 12976 | if (mult < 0 && mult != SSIZE_T_MIN) |
| 12977 | { |
| 12978 | // The type of the new GT_NEG node cannot just be op1->TypeGet(). |
| 12979 | // Otherwise we may sign-extend incorrectly in cases where the GT_NEG |
| 12980 | // node ends up feeding directly a cast, for example in |
| 12981 | // GT_CAST<ubyte>(GT_MUL(-1, s_1.ubyte)) |
| 12982 | tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); |
| 12983 | fgMorphTreeDone(op1); |
| 12984 | } |
| 12985 | |
| 12986 | // If "op2" is a constant array index, the other multiplicand must be a constant. |
| 12987 | // Transfer the annotation to the other one. |
| 12988 | if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr && |
| 12989 | op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq()) |
| 12990 | { |
| 12991 | assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr); |
| 12992 | GenTree* otherOp = op1; |
| 12993 | if (otherOp->OperGet() == GT_NEG) |
| 12994 | { |
| 12995 | otherOp = otherOp->gtOp.gtOp1; |
| 12996 | } |
| 12997 | assert(otherOp->OperGet() == GT_CNS_INT); |
| 12998 | assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField()); |
| 12999 | otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq; |
| 13000 | } |
| 13001 | |
| 13002 | if (abs_mult == 1) |
| 13003 | { |
| 13004 | DEBUG_DESTROY_NODE(op2); |
| 13005 | DEBUG_DESTROY_NODE(tree); |
| 13006 | return op1; |
| 13007 | } |
| 13008 | |
| 13009 | /* Change the multiplication into a shift by log2(val) bits */ |
| 13010 | op2->gtIntConCommon.SetIconValue(genLog2(abs_mult)); |
| 13011 | changeToShift = true; |
| 13012 | } |
| 13013 | #if LEA_AVAILABLE |
| 13014 | else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult()) |
| 13015 | { |
| 13016 | int shift = genLog2(lowestBit); |
| 13017 | ssize_t factor = abs_mult >> shift; |
| 13018 | |
| 13019 | if (factor == 3 || factor == 5 || factor == 9) |
| 13020 | { |
| 13021 | // if negative negate (min-int does not need negation) |
| 13022 | if (mult < 0 && mult != SSIZE_T_MIN) |
| 13023 | { |
| 13024 | tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); |
| 13025 | fgMorphTreeDone(op1); |
| 13026 | } |
| 13027 | |
| 13028 | GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL); |
| 13029 | if (op2IsConstIndex) |
| 13030 | { |
| 13031 | factorIcon->AsIntCon()->gtFieldSeq = |
| 13032 | GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); |
| 13033 | } |
| 13034 | |
| 13035 | // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift |
| 13036 | tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon); |
| 13037 | fgMorphTreeDone(op1); |
| 13038 | |
| 13039 | op2->gtIntConCommon.SetIconValue(shift); |
| 13040 | changeToShift = true; |
| 13041 | } |
| 13042 | } |
| 13043 | #endif // LEA_AVAILABLE |
| 13044 | if (changeToShift) |
| 13045 | { |
| 13046 | // vnStore is null before the ValueNumber phase has run |
| 13047 | if (vnStore != nullptr) |
| 13048 | { |
| 13049 | // Update the ValueNumber for 'op2', as we just changed the constant |
| 13050 | fgValueNumberTreeConst(op2); |
| 13051 | } |
| 13052 | oper = GT_LSH; |
| 13053 | // Keep the old ValueNumber for 'tree' as the new expr |
| 13054 | // will still compute the same value as before |
| 13055 | tree->ChangeOper(oper, GenTree::PRESERVE_VN); |
| 13056 | |
| 13057 | goto DONE_MORPHING_CHILDREN; |
| 13058 | } |
| 13059 | } |
| 13060 | else if (fgOperIsBitwiseRotationRoot(oper)) |
| 13061 | { |
| 13062 | tree = fgRecognizeAndMorphBitwiseRotation(tree); |
| 13063 | |
| 13064 | // fgRecognizeAndMorphBitwiseRotation may return a new tree |
| 13065 | oper = tree->OperGet(); |
| 13066 | typ = tree->TypeGet(); |
| 13067 | op1 = tree->gtOp.gtOp1; |
| 13068 | op2 = tree->gtOp.gtOp2; |
| 13069 | } |
| 13070 | |
| 13071 | break; |
| 13072 | |
| 13073 | case GT_NOT: |
| 13074 | case GT_NEG: |
| 13075 | |
| 13076 | /* Any constant cases should have been folded earlier */ |
| 13077 | noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase); |
| 13078 | break; |
| 13079 | |
| 13080 | case GT_CKFINITE: |
| 13081 | |
| 13082 | noway_assert(varTypeIsFloating(op1->TypeGet())); |
| 13083 | |
| 13084 | fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN); |
| 13085 | break; |
| 13086 | |
| 13087 | case GT_OBJ: |
| 13088 | // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on |
| 13089 | // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X |
| 13090 | // is a local or clsVar, even if it has been address-exposed. |
| 13091 | if (op1->OperGet() == GT_ADDR) |
| 13092 | { |
| 13093 | tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF); |
| 13094 | } |
| 13095 | break; |
| 13096 | |
| 13097 | case GT_IND: |
| 13098 | |
| 13099 | // Can not remove a GT_IND if it is currently a CSE candidate. |
| 13100 | if (gtIsActiveCSE_Candidate(tree)) |
| 13101 | { |
| 13102 | break; |
| 13103 | } |
| 13104 | |
| 13105 | bool foldAndReturnTemp; |
| 13106 | foldAndReturnTemp = false; |
| 13107 | temp = nullptr; |
| 13108 | ival1 = 0; |
| 13109 | |
| 13110 | // Don't remove a volatile GT_IND, even if the address points to a local variable. |
| 13111 | if ((tree->gtFlags & GTF_IND_VOLATILE) == 0) |
| 13112 | { |
| 13113 | /* Try to Fold *(&X) into X */ |
| 13114 | if (op1->gtOper == GT_ADDR) |
| 13115 | { |
| 13116 | // Can not remove a GT_ADDR if it is currently a CSE candidate. |
| 13117 | if (gtIsActiveCSE_Candidate(op1)) |
| 13118 | { |
| 13119 | break; |
| 13120 | } |
| 13121 | |
| 13122 | temp = op1->gtOp.gtOp1; // X |
| 13123 | |
| 13124 | // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that |
| 13125 | // they are the *same* struct type. In fact, they almost certainly aren't. If the |
| 13126 | // address has an associated field sequence, that identifies this case; go through |
| 13127 | // the "lcl_fld" path rather than this one. |
| 13128 | FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below. |
| 13129 | if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq)) |
| 13130 | { |
| 13131 | foldAndReturnTemp = true; |
| 13132 | } |
| 13133 | else if (temp->OperIsLocal()) |
| 13134 | { |
| 13135 | unsigned lclNum = temp->gtLclVarCommon.gtLclNum; |
| 13136 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 13137 | |
| 13138 | // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset |
| 13139 | if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0)) |
| 13140 | { |
| 13141 | noway_assert(varTypeIsStruct(varDsc)); |
| 13142 | |
| 13143 | // We will try to optimize when we have a single field struct that is being struct promoted |
| 13144 | if (varDsc->lvFieldCnt == 1) |
| 13145 | { |
| 13146 | unsigned lclNumFld = varDsc->lvFieldLclStart; |
| 13147 | // just grab the promoted field |
| 13148 | LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld]; |
| 13149 | |
| 13150 | // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset |
| 13151 | // is zero |
| 13152 | if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0)) |
| 13153 | { |
| 13154 | // We can just use the existing promoted field LclNum |
| 13155 | temp->gtLclVarCommon.SetLclNum(lclNumFld); |
| 13156 | temp->gtType = fieldVarDsc->TypeGet(); |
| 13157 | |
| 13158 | foldAndReturnTemp = true; |
| 13159 | } |
| 13160 | } |
| 13161 | } |
| 13162 | // If the type of the IND (typ) is a "small int", and the type of the local has the |
| 13163 | // same width, then we can reduce to just the local variable -- it will be |
| 13164 | // correctly normalized, and signed/unsigned differences won't matter. |
| 13165 | // |
| 13166 | // The below transformation cannot be applied if the local var needs to be normalized on load. |
| 13167 | else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) && |
| 13168 | !lvaTable[lclNum].lvNormalizeOnLoad()) |
| 13169 | { |
| 13170 | tree->gtType = typ = temp->TypeGet(); |
| 13171 | foldAndReturnTemp = true; |
| 13172 | } |
| 13173 | else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) && |
| 13174 | !lvaTable[lclNum].lvNormalizeOnLoad()) |
| 13175 | { |
| 13176 | tree->gtType = typ = temp->TypeGet(); |
| 13177 | foldAndReturnTemp = true; |
| 13178 | } |
| 13179 | else |
| 13180 | { |
| 13181 | // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e. |
| 13182 | // nullptr) |
| 13183 | assert(fieldSeq == nullptr); |
| 13184 | bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq); |
| 13185 | assert(b || fieldSeq == nullptr); |
| 13186 | |
| 13187 | if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD)) |
| 13188 | { |
| 13189 | // Append the field sequence, change the type. |
| 13190 | temp->AsLclFld()->gtFieldSeq = |
| 13191 | GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq); |
| 13192 | temp->gtType = typ; |
| 13193 | |
| 13194 | foldAndReturnTemp = true; |
| 13195 | } |
| 13196 | } |
| 13197 | // Otherwise will will fold this into a GT_LCL_FLD below |
| 13198 | // where we check (temp != nullptr) |
| 13199 | } |
| 13200 | else // !temp->OperIsLocal() |
| 13201 | { |
| 13202 | // We don't try to fold away the GT_IND/GT_ADDR for this case |
| 13203 | temp = nullptr; |
| 13204 | } |
| 13205 | } |
| 13206 | else if (op1->OperGet() == GT_ADD) |
| 13207 | { |
| 13208 | /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */ |
| 13209 | |
| 13210 | if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT && |
| 13211 | opts.OptimizationEnabled()) |
| 13212 | { |
| 13213 | // No overflow arithmetic with pointers |
| 13214 | noway_assert(!op1->gtOverflow()); |
| 13215 | |
| 13216 | temp = op1->gtOp.gtOp1->gtOp.gtOp1; |
| 13217 | if (!temp->OperIsLocal()) |
| 13218 | { |
| 13219 | temp = nullptr; |
| 13220 | break; |
| 13221 | } |
| 13222 | |
| 13223 | // Can not remove the GT_ADDR if it is currently a CSE candidate. |
| 13224 | if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1)) |
| 13225 | { |
| 13226 | break; |
| 13227 | } |
| 13228 | |
| 13229 | ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal; |
| 13230 | fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq; |
| 13231 | |
| 13232 | // Does the address have an associated zero-offset field sequence? |
| 13233 | FieldSeqNode* addrFieldSeq = nullptr; |
| 13234 | if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq)) |
| 13235 | { |
| 13236 | fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq); |
| 13237 | } |
| 13238 | |
| 13239 | if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT) |
| 13240 | { |
| 13241 | noway_assert(!varTypeIsGC(temp->TypeGet())); |
| 13242 | foldAndReturnTemp = true; |
| 13243 | } |
| 13244 | else |
| 13245 | { |
| 13246 | // The emitter can't handle large offsets |
| 13247 | if (ival1 != (unsigned short)ival1) |
| 13248 | { |
| 13249 | break; |
| 13250 | } |
| 13251 | |
| 13252 | // The emitter can get confused by invalid offsets |
| 13253 | if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum)) |
| 13254 | { |
| 13255 | break; |
| 13256 | } |
| 13257 | |
| 13258 | #ifdef _TARGET_ARM_ |
| 13259 | // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field |
| 13260 | // |
| 13261 | if (varTypeIsFloating(typ)) |
| 13262 | { |
| 13263 | if ((ival1 % emitTypeSize(typ)) != 0) |
| 13264 | { |
| 13265 | tree->gtFlags |= GTF_IND_UNALIGNED; |
| 13266 | break; |
| 13267 | } |
| 13268 | } |
| 13269 | #endif |
| 13270 | } |
| 13271 | // Now we can fold this into a GT_LCL_FLD below |
| 13272 | // where we check (temp != nullptr) |
| 13273 | } |
| 13274 | } |
| 13275 | } |
| 13276 | |
| 13277 | // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging: |
| 13278 | // - We may have a load of a local where the load has a different type than the local |
| 13279 | // - We may have a load of a local plus an offset |
| 13280 | // |
| 13281 | // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and |
| 13282 | // offset if doing so is legal. The only cases in which this transformation is illegal are if the load |
| 13283 | // begins before the local or if the load extends beyond the end of the local (i.e. if the load is |
| 13284 | // out-of-bounds w.r.t. the local). |
| 13285 | if ((temp != nullptr) && !foldAndReturnTemp) |
| 13286 | { |
| 13287 | assert(temp->OperIsLocal()); |
| 13288 | |
| 13289 | const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum; |
| 13290 | LclVarDsc* const varDsc = &lvaTable[lclNum]; |
| 13291 | |
| 13292 | const var_types tempTyp = temp->TypeGet(); |
| 13293 | const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK); |
| 13294 | const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp); |
| 13295 | |
| 13296 | // Make sure we do not enregister this lclVar. |
| 13297 | lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); |
| 13298 | |
| 13299 | // If the size of the load is greater than the size of the lclVar, we cannot fold this access into |
| 13300 | // a lclFld: the access represented by an lclFld node must begin at or after the start of the |
| 13301 | // lclVar and must not extend beyond the end of the lclVar. |
| 13302 | if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize)) |
| 13303 | { |
| 13304 | // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival' |
| 13305 | // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival' |
| 13306 | // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type. |
| 13307 | // |
| 13308 | if (temp->OperGet() == GT_LCL_FLD) |
| 13309 | { |
| 13310 | temp->AsLclFld()->gtLclOffs += (unsigned short)ival1; |
| 13311 | temp->AsLclFld()->gtFieldSeq = |
| 13312 | GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq); |
| 13313 | } |
| 13314 | else |
| 13315 | { |
| 13316 | temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"... |
| 13317 | temp->AsLclFld()->gtLclOffs = (unsigned short)ival1; |
| 13318 | if (fieldSeq != nullptr) |
| 13319 | { // If it does represent a field, note that. |
| 13320 | temp->AsLclFld()->gtFieldSeq = fieldSeq; |
| 13321 | } |
| 13322 | } |
| 13323 | temp->gtType = tree->gtType; |
| 13324 | foldAndReturnTemp = true; |
| 13325 | } |
| 13326 | } |
| 13327 | |
| 13328 | if (foldAndReturnTemp) |
| 13329 | { |
| 13330 | assert(temp != nullptr); |
| 13331 | assert(temp->TypeGet() == typ); |
| 13332 | assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR)); |
| 13333 | |
| 13334 | // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for |
| 13335 | // 'temp' because a GT_ADDR always marks it for its operand. |
| 13336 | temp->gtFlags &= ~GTF_DONT_CSE; |
| 13337 | temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE); |
| 13338 | |
| 13339 | if (op1->OperGet() == GT_ADD) |
| 13340 | { |
| 13341 | DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR |
| 13342 | DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT |
| 13343 | } |
| 13344 | DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR |
| 13345 | DEBUG_DESTROY_NODE(tree); // GT_IND |
| 13346 | |
| 13347 | // If the result of the fold is a local var, we may need to perform further adjustments e.g. for |
| 13348 | // normalization. |
| 13349 | if (temp->OperIs(GT_LCL_VAR)) |
| 13350 | { |
| 13351 | #ifdef DEBUG |
| 13352 | // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear |
| 13353 | // and the node in question must have this bit set (as it has already been morphed). |
| 13354 | temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
| 13355 | #endif // DEBUG |
| 13356 | const bool forceRemorph = true; |
| 13357 | temp = fgMorphLocalVar(temp, forceRemorph); |
| 13358 | #ifdef DEBUG |
| 13359 | // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the |
| 13360 | // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function |
| 13361 | // returns. |
| 13362 | temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 13363 | #endif // DEBUG |
| 13364 | } |
| 13365 | |
| 13366 | return temp; |
| 13367 | } |
| 13368 | |
| 13369 | // Only do this optimization when we are in the global optimizer. Doing this after value numbering |
| 13370 | // could result in an invalid value number for the newly generated GT_IND node. |
| 13371 | if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph) |
| 13372 | { |
| 13373 | // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)). |
| 13374 | // TBD: this transformation is currently necessary for correctness -- it might |
| 13375 | // be good to analyze the failures that result if we don't do this, and fix them |
| 13376 | // in other ways. Ideally, this should be optional. |
| 13377 | GenTree* commaNode = op1; |
| 13378 | unsigned treeFlags = tree->gtFlags; |
| 13379 | commaNode->gtType = typ; |
| 13380 | commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is |
| 13381 | // dangerous, clear the GTF_REVERSE_OPS at |
| 13382 | // least. |
| 13383 | #ifdef DEBUG |
| 13384 | commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 13385 | #endif |
| 13386 | while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA) |
| 13387 | { |
| 13388 | commaNode = commaNode->gtOp.gtOp2; |
| 13389 | commaNode->gtType = typ; |
| 13390 | commaNode->gtFlags = |
| 13391 | (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG & ~GTF_CALL); // Bashing the GT_COMMA flags here is |
| 13392 | // dangerous, clear the GTF_REVERSE_OPS, GT_ASG, and GT_CALL at |
| 13393 | // least. |
| 13394 | commaNode->gtFlags |= |
| 13395 | ((commaNode->gtOp.gtOp1->gtFlags | commaNode->gtOp.gtOp2->gtFlags) & (GTF_ASG | GTF_CALL)); |
| 13396 | #ifdef DEBUG |
| 13397 | commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 13398 | #endif |
| 13399 | } |
| 13400 | bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0; |
| 13401 | ArrayInfo arrInfo; |
| 13402 | if (wasArrIndex) |
| 13403 | { |
| 13404 | bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); |
| 13405 | assert(b); |
| 13406 | GetArrayInfoMap()->Remove(tree); |
| 13407 | } |
| 13408 | tree = op1; |
| 13409 | GenTree* addr = commaNode->gtOp.gtOp2; |
| 13410 | op1 = gtNewIndir(typ, addr); |
| 13411 | // This is very conservative |
| 13412 | op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING; |
| 13413 | op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT); |
| 13414 | |
| 13415 | if (wasArrIndex) |
| 13416 | { |
| 13417 | GetArrayInfoMap()->Set(op1, arrInfo); |
| 13418 | } |
| 13419 | #ifdef DEBUG |
| 13420 | op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 13421 | #endif |
| 13422 | commaNode->gtOp.gtOp2 = op1; |
| 13423 | commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); |
| 13424 | return tree; |
| 13425 | } |
| 13426 | |
| 13427 | break; |
| 13428 | |
| 13429 | case GT_ADDR: |
| 13430 | |
| 13431 | // Can not remove op1 if it is currently a CSE candidate. |
| 13432 | if (gtIsActiveCSE_Candidate(op1)) |
| 13433 | { |
| 13434 | break; |
| 13435 | } |
| 13436 | |
| 13437 | if (op1->OperGet() == GT_IND) |
| 13438 | { |
| 13439 | if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0) |
| 13440 | { |
| 13441 | // Can not remove a GT_ADDR if it is currently a CSE candidate. |
| 13442 | if (gtIsActiveCSE_Candidate(tree)) |
| 13443 | { |
| 13444 | break; |
| 13445 | } |
| 13446 | |
| 13447 | // Perform the transform ADDR(IND(...)) == (...). |
| 13448 | GenTree* addr = op1->gtOp.gtOp1; |
| 13449 | |
| 13450 | noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); |
| 13451 | |
| 13452 | DEBUG_DESTROY_NODE(op1); |
| 13453 | DEBUG_DESTROY_NODE(tree); |
| 13454 | |
| 13455 | return addr; |
| 13456 | } |
| 13457 | } |
| 13458 | else if (op1->OperGet() == GT_OBJ) |
| 13459 | { |
| 13460 | // Can not remove a GT_ADDR if it is currently a CSE candidate. |
| 13461 | if (gtIsActiveCSE_Candidate(tree)) |
| 13462 | { |
| 13463 | break; |
| 13464 | } |
| 13465 | |
| 13466 | // Perform the transform ADDR(OBJ(...)) == (...). |
| 13467 | GenTree* addr = op1->AsObj()->Addr(); |
| 13468 | |
| 13469 | noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); |
| 13470 | |
| 13471 | DEBUG_DESTROY_NODE(op1); |
| 13472 | DEBUG_DESTROY_NODE(tree); |
| 13473 | |
| 13474 | return addr; |
| 13475 | } |
| 13476 | else if (op1->gtOper == GT_CAST) |
| 13477 | { |
| 13478 | GenTree* casting = op1->gtCast.CastOp(); |
| 13479 | if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR) |
| 13480 | { |
| 13481 | DEBUG_DESTROY_NODE(op1); |
| 13482 | tree->gtOp.gtOp1 = op1 = casting; |
| 13483 | } |
| 13484 | } |
| 13485 | else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase) |
| 13486 | { |
| 13487 | // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)). |
| 13488 | // (Be sure to mark "z" as an l-value...) |
| 13489 | |
| 13490 | GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); |
| 13491 | for (GenTree* comma = op1; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) |
| 13492 | { |
| 13493 | commas.Push(comma); |
| 13494 | } |
| 13495 | GenTree* commaNode = commas.Top(); |
| 13496 | |
| 13497 | // The top-level addr might be annotated with a zeroOffset field. |
| 13498 | FieldSeqNode* zeroFieldSeq = nullptr; |
| 13499 | bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq); |
| 13500 | tree = op1; |
| 13501 | commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE; |
| 13502 | |
| 13503 | // If the node we're about to put under a GT_ADDR is an indirection, it |
| 13504 | // doesn't need to be materialized, since we only want the addressing mode. Because |
| 13505 | // of this, this GT_IND is not a faulting indirection and we don't have to extract it |
| 13506 | // as a side effect. |
| 13507 | GenTree* commaOp2 = commaNode->gtOp.gtOp2; |
| 13508 | if (commaOp2->OperIsBlk()) |
| 13509 | { |
| 13510 | commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet()); |
| 13511 | } |
| 13512 | if (commaOp2->gtOper == GT_IND) |
| 13513 | { |
| 13514 | commaOp2->gtFlags |= GTF_IND_NONFAULTING; |
| 13515 | commaOp2->gtFlags &= ~GTF_EXCEPT; |
| 13516 | commaOp2->gtFlags |= (commaOp2->gtOp.gtOp1->gtFlags & GTF_EXCEPT); |
| 13517 | } |
| 13518 | |
| 13519 | op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2); |
| 13520 | |
| 13521 | if (isZeroOffset) |
| 13522 | { |
| 13523 | // Transfer the annotation to the new GT_ADDR node. |
| 13524 | GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq); |
| 13525 | } |
| 13526 | commaNode->gtOp.gtOp2 = op1; |
| 13527 | // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform |
| 13528 | // might give op1 a type different from byref (like, say, native int). So now go back and give |
| 13529 | // all the comma nodes the type of op1. |
| 13530 | // TODO: the comma flag update below is conservative and can be improved. |
| 13531 | // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to |
| 13532 | // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF). |
| 13533 | |
| 13534 | while (!commas.Empty()) |
| 13535 | { |
| 13536 | GenTree* comma = commas.Pop(); |
| 13537 | comma->gtType = op1->gtType; |
| 13538 | comma->gtFlags |= op1->gtFlags; |
| 13539 | #ifdef DEBUG |
| 13540 | comma->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 13541 | #endif |
| 13542 | gtUpdateNodeSideEffects(comma); |
| 13543 | } |
| 13544 | |
| 13545 | return tree; |
| 13546 | } |
| 13547 | |
| 13548 | /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ |
| 13549 | op1->gtFlags |= GTF_DONT_CSE; |
| 13550 | break; |
| 13551 | |
| 13552 | case GT_COLON: |
| 13553 | if (fgGlobalMorph) |
| 13554 | { |
| 13555 | /* Mark the nodes that are conditionally executed */ |
| 13556 | fgWalkTreePre(&tree, gtMarkColonCond); |
| 13557 | } |
| 13558 | /* Since we're doing this postorder we clear this if it got set by a child */ |
| 13559 | fgRemoveRestOfBlock = false; |
| 13560 | break; |
| 13561 | |
| 13562 | case GT_COMMA: |
| 13563 | |
| 13564 | /* Special case: trees that don't produce a value */ |
| 13565 | if (op2->OperIs(GT_ASG) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2)) |
| 13566 | { |
| 13567 | typ = tree->gtType = TYP_VOID; |
| 13568 | } |
| 13569 | |
| 13570 | // If we are in the Valuenum CSE phase then don't morph away anything as these |
| 13571 | // nodes may have CSE defs/uses in them. |
| 13572 | // |
| 13573 | if (!optValnumCSE_phase) |
| 13574 | { |
| 13575 | // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this |
| 13576 | // is all we need. |
| 13577 | |
| 13578 | GenTree* op1SideEffects = nullptr; |
| 13579 | // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example) |
| 13580 | // hoisted expressions in loops. |
| 13581 | gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE)); |
| 13582 | if (op1SideEffects) |
| 13583 | { |
| 13584 | // Replace the left hand side with the side effect list. |
| 13585 | tree->gtOp.gtOp1 = op1SideEffects; |
| 13586 | gtUpdateNodeSideEffects(tree); |
| 13587 | } |
| 13588 | else |
| 13589 | { |
| 13590 | op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); |
| 13591 | DEBUG_DESTROY_NODE(tree); |
| 13592 | DEBUG_DESTROY_NODE(op1); |
| 13593 | return op2; |
| 13594 | } |
| 13595 | |
| 13596 | /* If the right operand is just a void nop node, throw it away */ |
| 13597 | if (op2->IsNothingNode() && op1->gtType == TYP_VOID) |
| 13598 | { |
| 13599 | op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); |
| 13600 | DEBUG_DESTROY_NODE(tree); |
| 13601 | DEBUG_DESTROY_NODE(op2); |
| 13602 | return op1; |
| 13603 | } |
| 13604 | } |
| 13605 | |
| 13606 | break; |
| 13607 | |
| 13608 | case GT_JTRUE: |
| 13609 | |
| 13610 | /* Special case if fgRemoveRestOfBlock is set to true */ |
| 13611 | if (fgRemoveRestOfBlock) |
| 13612 | { |
| 13613 | if (fgIsCommaThrow(op1, true)) |
| 13614 | { |
| 13615 | GenTree* throwNode = op1->gtOp.gtOp1; |
| 13616 | noway_assert(throwNode->gtType == TYP_VOID); |
| 13617 | |
| 13618 | JITDUMP("Removing [%06d] GT_JTRUE as the block now unconditionally throws an exception.\n" , |
| 13619 | dspTreeID(tree)); |
| 13620 | DEBUG_DESTROY_NODE(tree); |
| 13621 | |
| 13622 | return throwNode; |
| 13623 | } |
| 13624 | |
| 13625 | noway_assert(op1->OperKind() & GTK_RELOP); |
| 13626 | noway_assert(op1->gtFlags & GTF_EXCEPT); |
| 13627 | |
| 13628 | // We need to keep op1 for the side-effects. Hang it off |
| 13629 | // a GT_COMMA node |
| 13630 | |
| 13631 | JITDUMP("Keeping side-effects by bashing [%06d] GT_JTRUE into a GT_COMMA.\n" , dspTreeID(tree)); |
| 13632 | |
| 13633 | tree->ChangeOper(GT_COMMA); |
| 13634 | tree->gtOp.gtOp2 = op2 = gtNewNothingNode(); |
| 13635 | |
| 13636 | // Additionally since we're eliminating the JTRUE |
| 13637 | // codegen won't like it if op1 is a RELOP of longs, floats or doubles. |
| 13638 | // So we change it into a GT_COMMA as well. |
| 13639 | JITDUMP("Also bashing [%06d] (a relop) into a GT_COMMA.\n" , dspTreeID(op1)); |
| 13640 | op1->ChangeOper(GT_COMMA); |
| 13641 | op1->gtFlags &= ~GTF_UNSIGNED; // Clear the unsigned flag if it was set on the relop |
| 13642 | op1->gtType = op1->gtOp.gtOp1->gtType; |
| 13643 | |
| 13644 | return tree; |
| 13645 | } |
| 13646 | |
| 13647 | default: |
| 13648 | break; |
| 13649 | } |
| 13650 | |
| 13651 | assert(oper == tree->gtOper); |
| 13652 | |
| 13653 | // If we are in the Valuenum CSE phase then don't morph away anything as these |
| 13654 | // nodes may have CSE defs/uses in them. |
| 13655 | // |
| 13656 | if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList()) |
| 13657 | { |
| 13658 | /* Check for op1 as a GT_COMMA with a unconditional throw node */ |
| 13659 | if (op1 && fgIsCommaThrow(op1, true)) |
| 13660 | { |
| 13661 | if ((op1->gtFlags & GTF_COLON_COND) == 0) |
| 13662 | { |
| 13663 | /* We can safely throw out the rest of the statements */ |
| 13664 | fgRemoveRestOfBlock = true; |
| 13665 | } |
| 13666 | |
| 13667 | GenTree* throwNode = op1->gtOp.gtOp1; |
| 13668 | noway_assert(throwNode->gtType == TYP_VOID); |
| 13669 | |
| 13670 | if (oper == GT_COMMA) |
| 13671 | { |
| 13672 | /* Both tree and op1 are GT_COMMA nodes */ |
| 13673 | /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */ |
| 13674 | tree->gtOp.gtOp1 = throwNode; |
| 13675 | |
| 13676 | // Possibly reset the assignment flag |
| 13677 | if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) |
| 13678 | { |
| 13679 | tree->gtFlags &= ~GTF_ASG; |
| 13680 | } |
| 13681 | |
| 13682 | return tree; |
| 13683 | } |
| 13684 | else if (oper != GT_NOP) |
| 13685 | { |
| 13686 | if (genActualType(typ) == genActualType(op1->gtType)) |
| 13687 | { |
| 13688 | /* The types match so, return the comma throw node as the new tree */ |
| 13689 | return op1; |
| 13690 | } |
| 13691 | else |
| 13692 | { |
| 13693 | if (typ == TYP_VOID) |
| 13694 | { |
| 13695 | // Return the throw node |
| 13696 | return throwNode; |
| 13697 | } |
| 13698 | else |
| 13699 | { |
| 13700 | GenTree* commaOp2 = op1->gtOp.gtOp2; |
| 13701 | |
| 13702 | // need type of oper to be same as tree |
| 13703 | if (typ == TYP_LONG) |
| 13704 | { |
| 13705 | commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); |
| 13706 | commaOp2->gtIntConCommon.SetLngValue(0); |
| 13707 | /* Change the types of oper and commaOp2 to TYP_LONG */ |
| 13708 | op1->gtType = commaOp2->gtType = TYP_LONG; |
| 13709 | } |
| 13710 | else if (varTypeIsFloating(typ)) |
| 13711 | { |
| 13712 | commaOp2->ChangeOperConst(GT_CNS_DBL); |
| 13713 | commaOp2->gtDblCon.gtDconVal = 0.0; |
| 13714 | /* Change the types of oper and commaOp2 to TYP_DOUBLE */ |
| 13715 | op1->gtType = commaOp2->gtType = TYP_DOUBLE; |
| 13716 | } |
| 13717 | else |
| 13718 | { |
| 13719 | commaOp2->ChangeOperConst(GT_CNS_INT); |
| 13720 | commaOp2->gtIntConCommon.SetIconValue(0); |
| 13721 | /* Change the types of oper and commaOp2 to TYP_INT */ |
| 13722 | op1->gtType = commaOp2->gtType = TYP_INT; |
| 13723 | } |
| 13724 | |
| 13725 | /* Return the GT_COMMA node as the new tree */ |
| 13726 | return op1; |
| 13727 | } |
| 13728 | } |
| 13729 | } |
| 13730 | } |
| 13731 | |
| 13732 | /* Check for op2 as a GT_COMMA with a unconditional throw */ |
| 13733 | |
| 13734 | if (op2 && fgIsCommaThrow(op2, true)) |
| 13735 | { |
| 13736 | if ((op2->gtFlags & GTF_COLON_COND) == 0) |
| 13737 | { |
| 13738 | /* We can safely throw out the rest of the statements */ |
| 13739 | fgRemoveRestOfBlock = true; |
| 13740 | } |
| 13741 | |
| 13742 | // If op1 has no side-effects |
| 13743 | if ((op1->gtFlags & GTF_ALL_EFFECT) == 0) |
| 13744 | { |
| 13745 | // If tree is an asg node |
| 13746 | if (tree->OperIs(GT_ASG)) |
| 13747 | { |
| 13748 | /* Return the throw node as the new tree */ |
| 13749 | return op2->gtOp.gtOp1; |
| 13750 | } |
| 13751 | |
| 13752 | if (tree->OperGet() == GT_ARR_BOUNDS_CHECK) |
| 13753 | { |
| 13754 | /* Return the throw node as the new tree */ |
| 13755 | return op2->gtOp.gtOp1; |
| 13756 | } |
| 13757 | |
| 13758 | // If tree is a comma node |
| 13759 | if (tree->OperGet() == GT_COMMA) |
| 13760 | { |
| 13761 | /* Return the throw node as the new tree */ |
| 13762 | return op2->gtOp.gtOp1; |
| 13763 | } |
| 13764 | |
| 13765 | /* for the shift nodes the type of op2 can differ from the tree type */ |
| 13766 | if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT)) |
| 13767 | { |
| 13768 | noway_assert(GenTree::OperIsShiftOrRotate(oper)); |
| 13769 | |
| 13770 | GenTree* commaOp2 = op2->gtOp.gtOp2; |
| 13771 | |
| 13772 | commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); |
| 13773 | commaOp2->gtIntConCommon.SetLngValue(0); |
| 13774 | |
| 13775 | /* Change the types of oper and commaOp2 to TYP_LONG */ |
| 13776 | op2->gtType = commaOp2->gtType = TYP_LONG; |
| 13777 | } |
| 13778 | |
| 13779 | if ((genActualType(typ) == TYP_INT) && |
| 13780 | (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet()))) |
| 13781 | { |
| 13782 | // An example case is comparison (say GT_GT) of two longs or floating point values. |
| 13783 | |
| 13784 | GenTree* commaOp2 = op2->gtOp.gtOp2; |
| 13785 | |
| 13786 | commaOp2->ChangeOperConst(GT_CNS_INT); |
| 13787 | commaOp2->gtIntCon.gtIconVal = 0; |
| 13788 | /* Change the types of oper and commaOp2 to TYP_INT */ |
| 13789 | op2->gtType = commaOp2->gtType = TYP_INT; |
| 13790 | } |
| 13791 | |
| 13792 | if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL)) |
| 13793 | { |
| 13794 | noway_assert(tree->OperGet() == GT_ADD); |
| 13795 | |
| 13796 | GenTree* commaOp2 = op2->gtOp.gtOp2; |
| 13797 | |
| 13798 | commaOp2->ChangeOperConst(GT_CNS_INT); |
| 13799 | commaOp2->gtIntCon.gtIconVal = 0; |
| 13800 | /* Change the types of oper and commaOp2 to TYP_BYREF */ |
| 13801 | op2->gtType = commaOp2->gtType = TYP_BYREF; |
| 13802 | } |
| 13803 | |
| 13804 | /* types should now match */ |
| 13805 | noway_assert((genActualType(typ) == genActualType(op2->gtType))); |
| 13806 | |
| 13807 | /* Return the GT_COMMA node as the new tree */ |
| 13808 | return op2; |
| 13809 | } |
| 13810 | } |
| 13811 | } |
| 13812 | |
| 13813 | /*------------------------------------------------------------------------- |
| 13814 | * Optional morphing is done if tree transformations is permitted |
| 13815 | */ |
| 13816 | |
| 13817 | if ((opts.compFlags & CLFLG_TREETRANS) == 0) |
| 13818 | { |
| 13819 | return tree; |
| 13820 | } |
| 13821 | |
| 13822 | tree = fgMorphSmpOpOptional(tree->AsOp()); |
| 13823 | |
| 13824 | return tree; |
| 13825 | } |
| 13826 | #ifdef _PREFAST_ |
| 13827 | #pragma warning(pop) |
| 13828 | #endif |
| 13829 | |
| 13830 | GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) |
| 13831 | { |
| 13832 | genTreeOps oper = tree->gtOper; |
| 13833 | GenTree* op1 = tree->gtOp1; |
| 13834 | GenTree* op2 = tree->gtOp2; |
| 13835 | var_types typ = tree->TypeGet(); |
| 13836 | |
| 13837 | if (fgGlobalMorph && GenTree::OperIsCommutative(oper)) |
| 13838 | { |
| 13839 | /* Swap the operands so that the more expensive one is 'op1' */ |
| 13840 | |
| 13841 | if (tree->gtFlags & GTF_REVERSE_OPS) |
| 13842 | { |
| 13843 | tree->gtOp1 = op2; |
| 13844 | tree->gtOp2 = op1; |
| 13845 | |
| 13846 | op2 = op1; |
| 13847 | op1 = tree->gtOp1; |
| 13848 | |
| 13849 | tree->gtFlags &= ~GTF_REVERSE_OPS; |
| 13850 | } |
| 13851 | |
| 13852 | if (oper == op2->gtOper) |
| 13853 | { |
| 13854 | /* Reorder nested operators at the same precedence level to be |
| 13855 | left-recursive. For example, change "(a+(b+c))" to the |
| 13856 | equivalent expression "((a+b)+c)". |
| 13857 | */ |
| 13858 | |
| 13859 | /* Things are handled differently for floating-point operators */ |
| 13860 | |
| 13861 | if (!varTypeIsFloating(tree->TypeGet())) |
| 13862 | { |
| 13863 | fgMoveOpsLeft(tree); |
| 13864 | op1 = tree->gtOp1; |
| 13865 | op2 = tree->gtOp2; |
| 13866 | } |
| 13867 | } |
| 13868 | } |
| 13869 | |
| 13870 | #if REARRANGE_ADDS |
| 13871 | |
| 13872 | /* Change "((x+icon)+y)" to "((x+y)+icon)" |
| 13873 | Don't reorder floating-point operations */ |
| 13874 | |
| 13875 | if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() && |
| 13876 | varTypeIsIntegralOrI(typ)) |
| 13877 | { |
| 13878 | GenTree* ad2 = op1->gtOp.gtOp2; |
| 13879 | |
| 13880 | if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0) |
| 13881 | { |
| 13882 | // This takes |
| 13883 | // + (tree) |
| 13884 | // / \ |
| 13885 | // / \ |
| 13886 | // / \ |
| 13887 | // + (op1) op2 |
| 13888 | // / \ |
| 13889 | // \ |
| 13890 | // ad2 |
| 13891 | // |
| 13892 | // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is |
| 13893 | // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node |
| 13894 | // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same |
| 13895 | // type as (tree). |
| 13896 | // |
| 13897 | // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is |
| 13898 | // necessary |
| 13899 | |
| 13900 | if (varTypeIsGC(op2->TypeGet())) |
| 13901 | { |
| 13902 | noway_assert(varTypeIsGC(typ)); |
| 13903 | op1->gtType = typ; |
| 13904 | } |
| 13905 | tree->gtOp2 = ad2; |
| 13906 | |
| 13907 | op1->gtOp.gtOp2 = op2; |
| 13908 | op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT; |
| 13909 | |
| 13910 | op2 = tree->gtOp2; |
| 13911 | } |
| 13912 | } |
| 13913 | |
| 13914 | #endif |
| 13915 | |
| 13916 | /*------------------------------------------------------------------------- |
| 13917 | * Perform optional oper-specific postorder morphing |
| 13918 | */ |
| 13919 | |
| 13920 | switch (oper) |
| 13921 | { |
| 13922 | case GT_ASG: |
| 13923 | if (varTypeIsStruct(typ) && !tree->IsPhiDefn()) |
| 13924 | { |
| 13925 | if (tree->OperIsCopyBlkOp()) |
| 13926 | { |
| 13927 | return fgMorphCopyBlock(tree); |
| 13928 | } |
| 13929 | else |
| 13930 | { |
| 13931 | return fgMorphInitBlock(tree); |
| 13932 | } |
| 13933 | } |
| 13934 | |
| 13935 | if (typ == TYP_LONG) |
| 13936 | { |
| 13937 | break; |
| 13938 | } |
| 13939 | |
| 13940 | /* Make sure we're allowed to do this */ |
| 13941 | |
| 13942 | if (optValnumCSE_phase) |
| 13943 | { |
| 13944 | // It is not safe to reorder/delete CSE's |
| 13945 | break; |
| 13946 | } |
| 13947 | |
| 13948 | if (op2->gtFlags & GTF_ASG) |
| 13949 | { |
| 13950 | break; |
| 13951 | } |
| 13952 | |
| 13953 | if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT)) |
| 13954 | { |
| 13955 | break; |
| 13956 | } |
| 13957 | |
| 13958 | /* Special case: a cast that can be thrown away */ |
| 13959 | |
| 13960 | // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only |
| 13961 | // one cast and sometimes there is another one after it that gets removed by this |
| 13962 | // code. fgMorphSmp should be improved to remove all redundant casts so this code |
| 13963 | // can be removed. |
| 13964 | |
| 13965 | if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow()) |
| 13966 | { |
| 13967 | var_types srct; |
| 13968 | var_types cast; |
| 13969 | var_types dstt; |
| 13970 | |
| 13971 | srct = op2->gtCast.CastOp()->TypeGet(); |
| 13972 | cast = (var_types)op2->CastToType(); |
| 13973 | dstt = op1->TypeGet(); |
| 13974 | |
| 13975 | /* Make sure these are all ints and precision is not lost */ |
| 13976 | |
| 13977 | if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT) |
| 13978 | { |
| 13979 | op2 = tree->gtOp2 = op2->gtCast.CastOp(); |
| 13980 | } |
| 13981 | } |
| 13982 | |
| 13983 | break; |
| 13984 | |
| 13985 | case GT_MUL: |
| 13986 | |
| 13987 | /* Check for the case "(val + icon) * icon" */ |
| 13988 | |
| 13989 | if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD) |
| 13990 | { |
| 13991 | GenTree* add = op1->gtOp.gtOp2; |
| 13992 | |
| 13993 | if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0)) |
| 13994 | { |
| 13995 | if (tree->gtOverflow() || op1->gtOverflow()) |
| 13996 | { |
| 13997 | break; |
| 13998 | } |
| 13999 | |
| 14000 | ssize_t imul = op2->gtIntCon.gtIconVal; |
| 14001 | ssize_t iadd = add->gtIntCon.gtIconVal; |
| 14002 | |
| 14003 | /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */ |
| 14004 | |
| 14005 | oper = GT_ADD; |
| 14006 | tree->ChangeOper(oper); |
| 14007 | |
| 14008 | op2->gtIntCon.gtIconVal = iadd * imul; |
| 14009 | |
| 14010 | op1->ChangeOper(GT_MUL); |
| 14011 | |
| 14012 | add->gtIntCon.gtIconVal = imul; |
| 14013 | #ifdef _TARGET_64BIT_ |
| 14014 | if (add->gtType == TYP_INT) |
| 14015 | { |
| 14016 | // we need to properly re-sign-extend or truncate after multiplying two int constants above |
| 14017 | add->AsIntCon()->TruncateOrSignExtend32(); |
| 14018 | } |
| 14019 | #endif //_TARGET_64BIT_ |
| 14020 | } |
| 14021 | } |
| 14022 | |
| 14023 | break; |
| 14024 | |
| 14025 | case GT_DIV: |
| 14026 | |
| 14027 | /* For "val / 1", just return "val" */ |
| 14028 | |
| 14029 | if (op2->IsIntegralConst(1)) |
| 14030 | { |
| 14031 | DEBUG_DESTROY_NODE(tree); |
| 14032 | return op1; |
| 14033 | } |
| 14034 | |
| 14035 | break; |
| 14036 | |
| 14037 | case GT_LSH: |
| 14038 | |
| 14039 | /* Check for the case "(val + icon) << icon" */ |
| 14040 | |
| 14041 | if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow()) |
| 14042 | { |
| 14043 | GenTree* cns = op1->gtOp.gtOp2; |
| 14044 | |
| 14045 | if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0)) |
| 14046 | { |
| 14047 | ssize_t ishf = op2->gtIntConCommon.IconValue(); |
| 14048 | ssize_t iadd = cns->gtIntConCommon.IconValue(); |
| 14049 | |
| 14050 | // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n"); |
| 14051 | |
| 14052 | /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */ |
| 14053 | |
| 14054 | tree->ChangeOper(GT_ADD); |
| 14055 | ssize_t result = iadd << ishf; |
| 14056 | op2->gtIntConCommon.SetIconValue(result); |
| 14057 | #ifdef _TARGET_64BIT_ |
| 14058 | if (op1->gtType == TYP_INT) |
| 14059 | { |
| 14060 | op2->AsIntCon()->TruncateOrSignExtend32(); |
| 14061 | } |
| 14062 | #endif // _TARGET_64BIT_ |
| 14063 | |
| 14064 | // we are reusing the shift amount node here, but the type we want is that of the shift result |
| 14065 | op2->gtType = op1->gtType; |
| 14066 | |
| 14067 | if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr && |
| 14068 | cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq()) |
| 14069 | { |
| 14070 | assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr); |
| 14071 | op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq; |
| 14072 | } |
| 14073 | |
| 14074 | op1->ChangeOper(GT_LSH); |
| 14075 | |
| 14076 | cns->gtIntConCommon.SetIconValue(ishf); |
| 14077 | } |
| 14078 | } |
| 14079 | |
| 14080 | break; |
| 14081 | |
| 14082 | case GT_XOR: |
| 14083 | |
| 14084 | if (!optValnumCSE_phase) |
| 14085 | { |
| 14086 | /* "x ^ -1" is "~x" */ |
| 14087 | |
| 14088 | if (op2->IsIntegralConst(-1)) |
| 14089 | { |
| 14090 | tree->ChangeOper(GT_NOT); |
| 14091 | tree->gtOp2 = nullptr; |
| 14092 | DEBUG_DESTROY_NODE(op2); |
| 14093 | } |
| 14094 | else if (op2->IsIntegralConst(1) && op1->OperIsCompare()) |
| 14095 | { |
| 14096 | /* "binaryVal ^ 1" is "!binaryVal" */ |
| 14097 | gtReverseCond(op1); |
| 14098 | DEBUG_DESTROY_NODE(op2); |
| 14099 | DEBUG_DESTROY_NODE(tree); |
| 14100 | return op1; |
| 14101 | } |
| 14102 | } |
| 14103 | |
| 14104 | break; |
| 14105 | |
| 14106 | case GT_INIT_VAL: |
| 14107 | // Initialization values for initBlk have special semantics - their lower |
| 14108 | // byte is used to fill the struct. However, we allow 0 as a "bare" value, |
| 14109 | // which enables them to get a VNForZero, and be propagated. |
| 14110 | if (op1->IsIntegralConst(0)) |
| 14111 | { |
| 14112 | return op1; |
| 14113 | } |
| 14114 | break; |
| 14115 | |
| 14116 | default: |
| 14117 | break; |
| 14118 | } |
| 14119 | return tree; |
| 14120 | } |
| 14121 | |
| 14122 | //------------------------------------------------------------------------ |
| 14123 | // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b |
| 14124 | // (see ECMA III 3.55 and III.3.56). |
| 14125 | // |
| 14126 | // Arguments: |
| 14127 | // tree - The GT_MOD/GT_UMOD tree to morph |
| 14128 | // |
| 14129 | // Returns: |
| 14130 | // The morphed tree |
| 14131 | // |
| 14132 | // Notes: |
| 14133 | // For ARM64 we don't have a remainder instruction so this transform is |
| 14134 | // always done. For XARCH this transform is done if we know that magic |
| 14135 | // division will be used, in that case this transform allows CSE to |
| 14136 | // eliminate the redundant div from code like "x = a / 3; y = a % 3;". |
| 14137 | // |
| 14138 | // This method will produce the above expression in 'a' and 'b' are |
| 14139 | // leaf nodes, otherwise, if any of them is not a leaf it will spill |
| 14140 | // its value into a temporary variable, an example: |
| 14141 | // (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) ) |
| 14142 | // |
| 14143 | GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) |
| 14144 | { |
| 14145 | if (tree->OperGet() == GT_MOD) |
| 14146 | { |
| 14147 | tree->SetOper(GT_DIV); |
| 14148 | } |
| 14149 | else if (tree->OperGet() == GT_UMOD) |
| 14150 | { |
| 14151 | tree->SetOper(GT_UDIV); |
| 14152 | } |
| 14153 | else |
| 14154 | { |
| 14155 | noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv" ); |
| 14156 | } |
| 14157 | |
| 14158 | var_types type = tree->gtType; |
| 14159 | GenTree* denominator = tree->gtOp2; |
| 14160 | GenTree* numerator = tree->gtOp1; |
| 14161 | |
| 14162 | if (!numerator->OperIsLeaf()) |
| 14163 | { |
| 14164 | numerator = fgMakeMultiUse(&tree->gtOp1); |
| 14165 | } |
| 14166 | |
| 14167 | if (!denominator->OperIsLeaf()) |
| 14168 | { |
| 14169 | denominator = fgMakeMultiUse(&tree->gtOp2); |
| 14170 | } |
| 14171 | |
| 14172 | // The numerator and denominator may have been assigned to temps, in which case |
| 14173 | // their defining assignments are in the current tree. Therefore, we need to |
| 14174 | // set the execuction order accordingly on the nodes we create. |
| 14175 | // That is, the "mul" will be evaluated in "normal" order, and the "sub" must |
| 14176 | // be set to be evaluated in reverse order. |
| 14177 | // |
| 14178 | GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator)); |
| 14179 | assert(!mul->IsReverseOp()); |
| 14180 | GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul); |
| 14181 | sub->gtFlags |= GTF_REVERSE_OPS; |
| 14182 | |
| 14183 | #ifdef DEBUG |
| 14184 | sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 14185 | #endif |
| 14186 | |
| 14187 | return sub; |
| 14188 | } |
| 14189 | |
| 14190 | //------------------------------------------------------------------------------ |
| 14191 | // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree. |
| 14192 | // |
| 14193 | // |
| 14194 | // Arguments: |
| 14195 | // oper - Operation to check |
| 14196 | // |
| 14197 | // Return Value: |
| 14198 | // True if the operation can be a root of a bitwise rotation tree; false otherwise. |
| 14199 | |
| 14200 | bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper) |
| 14201 | { |
| 14202 | return (oper == GT_OR) || (oper == GT_XOR); |
| 14203 | } |
| 14204 | |
| 14205 | //------------------------------------------------------------------------------ |
| 14206 | // fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return |
| 14207 | // an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree. |
| 14208 | // |
| 14209 | // Arguments: |
| 14210 | // tree - tree to check for a rotation pattern |
| 14211 | // |
| 14212 | // Return Value: |
| 14213 | // An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise. |
| 14214 | // |
| 14215 | // Assumption: |
| 14216 | // The input is a GT_OR or a GT_XOR tree. |
| 14217 | |
| 14218 | GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) |
| 14219 | { |
| 14220 | // |
| 14221 | // Check for a rotation pattern, e.g., |
| 14222 | // |
| 14223 | // OR ROL |
| 14224 | // / \ / \ |
| 14225 | // LSH RSZ -> x y |
| 14226 | // / \ / \ |
| 14227 | // x AND x AND |
| 14228 | // / \ / \ |
| 14229 | // y 31 ADD 31 |
| 14230 | // / \ |
| 14231 | // NEG 32 |
| 14232 | // | |
| 14233 | // y |
| 14234 | // The patterns recognized: |
| 14235 | // (x << (y & M)) op (x >>> ((-y + N) & M)) |
| 14236 | // (x >>> ((-y + N) & M)) op (x << (y & M)) |
| 14237 | // |
| 14238 | // (x << y) op (x >>> (-y + N)) |
| 14239 | // (x >> > (-y + N)) op (x << y) |
| 14240 | // |
| 14241 | // (x >>> (y & M)) op (x << ((-y + N) & M)) |
| 14242 | // (x << ((-y + N) & M)) op (x >>> (y & M)) |
| 14243 | // |
| 14244 | // (x >>> y) op (x << (-y + N)) |
| 14245 | // (x << (-y + N)) op (x >>> y) |
| 14246 | // |
| 14247 | // (x << c1) op (x >>> c2) |
| 14248 | // (x >>> c1) op (x << c2) |
| 14249 | // |
| 14250 | // where |
| 14251 | // c1 and c2 are const |
| 14252 | // c1 + c2 == bitsize(x) |
| 14253 | // N == bitsize(x) |
| 14254 | // M is const |
| 14255 | // M & (N - 1) == N - 1 |
| 14256 | // op is either | or ^ |
| 14257 | |
| 14258 | if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0)) |
| 14259 | { |
| 14260 | // We can't do anything if the tree has assignments, calls, or volatile |
| 14261 | // reads. Note that we allow GTF_EXCEPT side effect since any exceptions |
| 14262 | // thrown by the original tree will be thrown by the transformed tree as well. |
| 14263 | return tree; |
| 14264 | } |
| 14265 | |
| 14266 | genTreeOps oper = tree->OperGet(); |
| 14267 | assert(fgOperIsBitwiseRotationRoot(oper)); |
| 14268 | |
| 14269 | // Check if we have an LSH on one side of the OR and an RSZ on the other side. |
| 14270 | GenTree* op1 = tree->gtGetOp1(); |
| 14271 | GenTree* op2 = tree->gtGetOp2(); |
| 14272 | GenTree* leftShiftTree = nullptr; |
| 14273 | GenTree* rightShiftTree = nullptr; |
| 14274 | if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ)) |
| 14275 | { |
| 14276 | leftShiftTree = op1; |
| 14277 | rightShiftTree = op2; |
| 14278 | } |
| 14279 | else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH)) |
| 14280 | { |
| 14281 | leftShiftTree = op2; |
| 14282 | rightShiftTree = op1; |
| 14283 | } |
| 14284 | else |
| 14285 | { |
| 14286 | return tree; |
| 14287 | } |
| 14288 | |
| 14289 | // Check if the trees representing the value to shift are identical. |
| 14290 | // We already checked that there are no side effects above. |
| 14291 | if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1())) |
| 14292 | { |
| 14293 | GenTree* rotatedValue = leftShiftTree->gtGetOp1(); |
| 14294 | var_types rotatedValueActualType = genActualType(rotatedValue->gtType); |
| 14295 | ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8; |
| 14296 | noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64)); |
| 14297 | GenTree* leftShiftIndex = leftShiftTree->gtGetOp2(); |
| 14298 | GenTree* rightShiftIndex = rightShiftTree->gtGetOp2(); |
| 14299 | |
| 14300 | // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits |
| 14301 | // shouldn't be masked for the transformation to be valid. If additional |
| 14302 | // higher bits are not masked, the transformation is still valid since the result |
| 14303 | // of MSIL shift instructions is unspecified if the shift amount is greater or equal |
| 14304 | // than the width of the value being shifted. |
| 14305 | ssize_t minimalMask = rotatedValueBitSize - 1; |
| 14306 | ssize_t leftShiftMask = -1; |
| 14307 | ssize_t rightShiftMask = -1; |
| 14308 | |
| 14309 | if ((leftShiftIndex->OperGet() == GT_AND)) |
| 14310 | { |
| 14311 | if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI()) |
| 14312 | { |
| 14313 | leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal; |
| 14314 | leftShiftIndex = leftShiftIndex->gtGetOp1(); |
| 14315 | } |
| 14316 | else |
| 14317 | { |
| 14318 | return tree; |
| 14319 | } |
| 14320 | } |
| 14321 | |
| 14322 | if ((rightShiftIndex->OperGet() == GT_AND)) |
| 14323 | { |
| 14324 | if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI()) |
| 14325 | { |
| 14326 | rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal; |
| 14327 | rightShiftIndex = rightShiftIndex->gtGetOp1(); |
| 14328 | } |
| 14329 | else |
| 14330 | { |
| 14331 | return tree; |
| 14332 | } |
| 14333 | } |
| 14334 | |
| 14335 | if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask)) |
| 14336 | { |
| 14337 | // The shift index is overmasked, e.g., we have |
| 14338 | // something like (x << y & 15) or |
| 14339 | // (x >> (32 - y) & 15 with 32 bit x. |
| 14340 | // The transformation is not valid. |
| 14341 | return tree; |
| 14342 | } |
| 14343 | |
| 14344 | GenTree* shiftIndexWithAdd = nullptr; |
| 14345 | GenTree* shiftIndexWithoutAdd = nullptr; |
| 14346 | genTreeOps rotateOp = GT_NONE; |
| 14347 | GenTree* rotateIndex = nullptr; |
| 14348 | |
| 14349 | if (leftShiftIndex->OperGet() == GT_ADD) |
| 14350 | { |
| 14351 | shiftIndexWithAdd = leftShiftIndex; |
| 14352 | shiftIndexWithoutAdd = rightShiftIndex; |
| 14353 | rotateOp = GT_ROR; |
| 14354 | } |
| 14355 | else if (rightShiftIndex->OperGet() == GT_ADD) |
| 14356 | { |
| 14357 | shiftIndexWithAdd = rightShiftIndex; |
| 14358 | shiftIndexWithoutAdd = leftShiftIndex; |
| 14359 | rotateOp = GT_ROL; |
| 14360 | } |
| 14361 | |
| 14362 | if (shiftIndexWithAdd != nullptr) |
| 14363 | { |
| 14364 | if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI()) |
| 14365 | { |
| 14366 | if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize) |
| 14367 | { |
| 14368 | if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG) |
| 14369 | { |
| 14370 | if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd)) |
| 14371 | { |
| 14372 | // We found one of these patterns: |
| 14373 | // (x << (y & M)) | (x >>> ((-y + N) & M)) |
| 14374 | // (x << y) | (x >>> (-y + N)) |
| 14375 | // (x >>> (y & M)) | (x << ((-y + N) & M)) |
| 14376 | // (x >>> y) | (x << (-y + N)) |
| 14377 | // where N == bitsize(x), M is const, and |
| 14378 | // M & (N - 1) == N - 1 |
| 14379 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 14380 | |
| 14381 | #ifndef _TARGET_64BIT_ |
| 14382 | if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64)) |
| 14383 | { |
| 14384 | // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86. |
| 14385 | // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need |
| 14386 | // to add helpers for GT_ROL and GT_ROR. |
| 14387 | return tree; |
| 14388 | } |
| 14389 | #endif |
| 14390 | |
| 14391 | rotateIndex = shiftIndexWithoutAdd; |
| 14392 | } |
| 14393 | } |
| 14394 | } |
| 14395 | } |
| 14396 | } |
| 14397 | else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI())) |
| 14398 | { |
| 14399 | if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize) |
| 14400 | { |
| 14401 | // We found this pattern: |
| 14402 | // (x << c1) | (x >>> c2) |
| 14403 | // where c1 and c2 are const and c1 + c2 == bitsize(x) |
| 14404 | rotateOp = GT_ROL; |
| 14405 | rotateIndex = leftShiftIndex; |
| 14406 | } |
| 14407 | } |
| 14408 | |
| 14409 | if (rotateIndex != nullptr) |
| 14410 | { |
| 14411 | noway_assert(GenTree::OperIsRotate(rotateOp)); |
| 14412 | |
| 14413 | unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT; |
| 14414 | |
| 14415 | // We can use the same tree only during global morph; reusing the tree in a later morph |
| 14416 | // may invalidate value numbers. |
| 14417 | if (fgGlobalMorph) |
| 14418 | { |
| 14419 | tree->gtOp.gtOp1 = rotatedValue; |
| 14420 | tree->gtOp.gtOp2 = rotateIndex; |
| 14421 | tree->ChangeOper(rotateOp); |
| 14422 | |
| 14423 | unsigned childFlags = 0; |
| 14424 | for (GenTree* op : tree->Operands()) |
| 14425 | { |
| 14426 | childFlags |= (op->gtFlags & GTF_ALL_EFFECT); |
| 14427 | } |
| 14428 | |
| 14429 | // The parent's flags should be a superset of its operands' flags |
| 14430 | noway_assert((inputTreeEffects & childFlags) == childFlags); |
| 14431 | } |
| 14432 | else |
| 14433 | { |
| 14434 | tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex); |
| 14435 | noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT)); |
| 14436 | } |
| 14437 | |
| 14438 | return tree; |
| 14439 | } |
| 14440 | } |
| 14441 | return tree; |
| 14442 | } |
| 14443 | |
| 14444 | #if !CPU_HAS_FP_SUPPORT |
| 14445 | GenTree* Compiler::fgMorphToEmulatedFP(GenTree* tree) |
| 14446 | { |
| 14447 | |
| 14448 | genTreeOps oper = tree->OperGet(); |
| 14449 | var_types typ = tree->TypeGet(); |
| 14450 | GenTree* op1 = tree->gtOp.gtOp1; |
| 14451 | GenTree* op2 = tree->gtGetOp2IfPresent(); |
| 14452 | |
| 14453 | /* |
| 14454 | We have to use helper calls for all FP operations: |
| 14455 | |
| 14456 | FP operators that operate on FP values |
| 14457 | casts to and from FP |
| 14458 | comparisons of FP values |
| 14459 | */ |
| 14460 | |
| 14461 | if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet()))) |
| 14462 | { |
| 14463 | int helper; |
| 14464 | GenTree* args; |
| 14465 | |
| 14466 | /* Not all FP operations need helper calls */ |
| 14467 | |
| 14468 | switch (oper) |
| 14469 | { |
| 14470 | case GT_ASG: |
| 14471 | case GT_IND: |
| 14472 | case GT_LIST: |
| 14473 | case GT_ADDR: |
| 14474 | case GT_COMMA: |
| 14475 | return tree; |
| 14476 | } |
| 14477 | |
| 14478 | #ifdef DEBUG |
| 14479 | |
| 14480 | /* If the result isn't FP, it better be a compare or cast */ |
| 14481 | |
| 14482 | if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST)) |
| 14483 | gtDispTree(tree); |
| 14484 | |
| 14485 | noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST); |
| 14486 | #endif |
| 14487 | |
| 14488 | /* Keep track of how many arguments we're passing */ |
| 14489 | |
| 14490 | /* Is this a binary operator? */ |
| 14491 | |
| 14492 | if (op2) |
| 14493 | { |
| 14494 | /* What kind of an operator do we have? */ |
| 14495 | |
| 14496 | switch (oper) |
| 14497 | { |
| 14498 | case GT_ADD: |
| 14499 | helper = CPX_R4_ADD; |
| 14500 | break; |
| 14501 | case GT_SUB: |
| 14502 | helper = CPX_R4_SUB; |
| 14503 | break; |
| 14504 | case GT_MUL: |
| 14505 | helper = CPX_R4_MUL; |
| 14506 | break; |
| 14507 | case GT_DIV: |
| 14508 | helper = CPX_R4_DIV; |
| 14509 | break; |
| 14510 | // case GT_MOD: helper = CPX_R4_REM; break; |
| 14511 | |
| 14512 | case GT_EQ: |
| 14513 | helper = CPX_R4_EQ; |
| 14514 | break; |
| 14515 | case GT_NE: |
| 14516 | helper = CPX_R4_NE; |
| 14517 | break; |
| 14518 | case GT_LT: |
| 14519 | helper = CPX_R4_LT; |
| 14520 | break; |
| 14521 | case GT_LE: |
| 14522 | helper = CPX_R4_LE; |
| 14523 | break; |
| 14524 | case GT_GE: |
| 14525 | helper = CPX_R4_GE; |
| 14526 | break; |
| 14527 | case GT_GT: |
| 14528 | helper = CPX_R4_GT; |
| 14529 | break; |
| 14530 | |
| 14531 | default: |
| 14532 | #ifdef DEBUG |
| 14533 | gtDispTree(tree); |
| 14534 | #endif |
| 14535 | noway_assert(!"unexpected FP binary op" ); |
| 14536 | break; |
| 14537 | } |
| 14538 | |
| 14539 | args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1); |
| 14540 | } |
| 14541 | else |
| 14542 | { |
| 14543 | switch (oper) |
| 14544 | { |
| 14545 | case GT_RETURN: |
| 14546 | return tree; |
| 14547 | |
| 14548 | case GT_CAST: |
| 14549 | noway_assert(!"FP cast" ); |
| 14550 | |
| 14551 | case GT_NEG: |
| 14552 | helper = CPX_R4_NEG; |
| 14553 | break; |
| 14554 | |
| 14555 | default: |
| 14556 | #ifdef DEBUG |
| 14557 | gtDispTree(tree); |
| 14558 | #endif |
| 14559 | noway_assert(!"unexpected FP unary op" ); |
| 14560 | break; |
| 14561 | } |
| 14562 | |
| 14563 | args = gtNewArgList(tree->gtOp.gtOp1); |
| 14564 | } |
| 14565 | |
| 14566 | /* If we have double result/operands, modify the helper */ |
| 14567 | |
| 14568 | if (typ == TYP_DOUBLE) |
| 14569 | { |
| 14570 | static_assert_no_msg(CPX_R4_NEG + 1 == CPX_R8_NEG); |
| 14571 | static_assert_no_msg(CPX_R4_ADD + 1 == CPX_R8_ADD); |
| 14572 | static_assert_no_msg(CPX_R4_SUB + 1 == CPX_R8_SUB); |
| 14573 | static_assert_no_msg(CPX_R4_MUL + 1 == CPX_R8_MUL); |
| 14574 | static_assert_no_msg(CPX_R4_DIV + 1 == CPX_R8_DIV); |
| 14575 | |
| 14576 | helper++; |
| 14577 | } |
| 14578 | else |
| 14579 | { |
| 14580 | noway_assert(tree->OperIsCompare()); |
| 14581 | |
| 14582 | static_assert_no_msg(CPX_R4_EQ + 1 == CPX_R8_EQ); |
| 14583 | static_assert_no_msg(CPX_R4_NE + 1 == CPX_R8_NE); |
| 14584 | static_assert_no_msg(CPX_R4_LT + 1 == CPX_R8_LT); |
| 14585 | static_assert_no_msg(CPX_R4_LE + 1 == CPX_R8_LE); |
| 14586 | static_assert_no_msg(CPX_R4_GE + 1 == CPX_R8_GE); |
| 14587 | static_assert_no_msg(CPX_R4_GT + 1 == CPX_R8_GT); |
| 14588 | } |
| 14589 | |
| 14590 | tree = fgMorphIntoHelperCall(tree, helper, args); |
| 14591 | |
| 14592 | return tree; |
| 14593 | |
| 14594 | case GT_RETURN: |
| 14595 | |
| 14596 | if (op1) |
| 14597 | { |
| 14598 | |
| 14599 | if (compCurBB == genReturnBB) |
| 14600 | { |
| 14601 | /* This is the 'exitCrit' call at the exit label */ |
| 14602 | |
| 14603 | noway_assert(op1->gtType == TYP_VOID); |
| 14604 | noway_assert(op2 == 0); |
| 14605 | |
| 14606 | tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); |
| 14607 | |
| 14608 | return tree; |
| 14609 | } |
| 14610 | |
| 14611 | /* This is a (real) return value -- check its type */ |
| 14612 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 14613 | |
| 14614 | #ifdef DEBUG |
| 14615 | if (genActualType(op1->TypeGet()) != genActualType(info.compRetType)) |
| 14616 | { |
| 14617 | bool allowMismatch = false; |
| 14618 | |
| 14619 | // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa |
| 14620 | if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) || |
| 14621 | (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL)) |
| 14622 | allowMismatch = true; |
| 14623 | |
| 14624 | if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet())) |
| 14625 | allowMismatch = true; |
| 14626 | |
| 14627 | if (!allowMismatch) |
| 14628 | NO_WAY("Return type mismatch" ); |
| 14629 | } |
| 14630 | #endif |
| 14631 | } |
| 14632 | break; |
| 14633 | } |
| 14634 | return tree; |
| 14635 | } |
| 14636 | #endif |
| 14637 | |
| 14638 | /***************************************************************************** |
| 14639 | * |
| 14640 | * Transform the given tree for code generation and return an equivalent tree. |
| 14641 | */ |
| 14642 | |
| 14643 | GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac) |
| 14644 | { |
| 14645 | assert(tree); |
| 14646 | assert(tree->gtOper != GT_STMT); |
| 14647 | |
| 14648 | #ifdef DEBUG |
| 14649 | if (verbose) |
| 14650 | { |
| 14651 | if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID) |
| 14652 | { |
| 14653 | noway_assert(!"JitBreakMorphTree hit" ); |
| 14654 | } |
| 14655 | } |
| 14656 | #endif |
| 14657 | |
| 14658 | #ifdef DEBUG |
| 14659 | int thisMorphNum = 0; |
| 14660 | if (verbose && treesBeforeAfterMorph) |
| 14661 | { |
| 14662 | thisMorphNum = morphNum++; |
| 14663 | printf("\nfgMorphTree (before %d):\n" , thisMorphNum); |
| 14664 | gtDispTree(tree); |
| 14665 | } |
| 14666 | #endif |
| 14667 | |
| 14668 | if (fgGlobalMorph) |
| 14669 | { |
| 14670 | // Apply any rewrites for implicit byref arguments before morphing the |
| 14671 | // tree. |
| 14672 | |
| 14673 | if (fgMorphImplicitByRefArgs(tree)) |
| 14674 | { |
| 14675 | #ifdef DEBUG |
| 14676 | if (verbose && treesBeforeAfterMorph) |
| 14677 | { |
| 14678 | printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n" , thisMorphNum); |
| 14679 | gtDispTree(tree); |
| 14680 | } |
| 14681 | #endif |
| 14682 | } |
| 14683 | } |
| 14684 | |
| 14685 | /*------------------------------------------------------------------------- |
| 14686 | * fgMorphTree() can potentially replace a tree with another, and the |
| 14687 | * caller has to store the return value correctly. |
| 14688 | * Turn this on to always make copy of "tree" here to shake out |
| 14689 | * hidden/unupdated references. |
| 14690 | */ |
| 14691 | |
| 14692 | #ifdef DEBUG |
| 14693 | |
| 14694 | if (compStressCompile(STRESS_GENERIC_CHECK, 0)) |
| 14695 | { |
| 14696 | GenTree* copy; |
| 14697 | |
| 14698 | #ifdef SMALL_TREE_NODES |
| 14699 | if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL) |
| 14700 | { |
| 14701 | copy = gtNewLargeOperNode(GT_ADD, TYP_INT); |
| 14702 | } |
| 14703 | else |
| 14704 | #endif |
| 14705 | { |
| 14706 | copy = new (this, GT_CALL) GenTreeCall(TYP_INT); |
| 14707 | } |
| 14708 | |
| 14709 | copy->ReplaceWith(tree, this); |
| 14710 | |
| 14711 | #if defined(LATE_DISASM) |
| 14712 | // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields |
| 14713 | if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle()) |
| 14714 | { |
| 14715 | copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle; |
| 14716 | } |
| 14717 | #endif |
| 14718 | |
| 14719 | DEBUG_DESTROY_NODE(tree); |
| 14720 | tree = copy; |
| 14721 | } |
| 14722 | #endif // DEBUG |
| 14723 | |
| 14724 | if (fgGlobalMorph) |
| 14725 | { |
| 14726 | /* Ensure that we haven't morphed this node already */ |
| 14727 | assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!" ); |
| 14728 | |
| 14729 | #if LOCAL_ASSERTION_PROP |
| 14730 | /* Before morphing the tree, we try to propagate any active assertions */ |
| 14731 | if (optLocalAssertionProp) |
| 14732 | { |
| 14733 | /* Do we have any active assertions? */ |
| 14734 | |
| 14735 | if (optAssertionCount > 0) |
| 14736 | { |
| 14737 | GenTree* newTree = tree; |
| 14738 | while (newTree != nullptr) |
| 14739 | { |
| 14740 | tree = newTree; |
| 14741 | /* newTree is non-Null if we propagated an assertion */ |
| 14742 | newTree = optAssertionProp(apFull, tree, nullptr); |
| 14743 | } |
| 14744 | assert(tree != nullptr); |
| 14745 | } |
| 14746 | } |
| 14747 | PREFAST_ASSUME(tree != nullptr); |
| 14748 | #endif |
| 14749 | } |
| 14750 | |
| 14751 | /* Save the original un-morphed tree for fgMorphTreeDone */ |
| 14752 | |
| 14753 | GenTree* oldTree = tree; |
| 14754 | |
| 14755 | /* Figure out what kind of a node we have */ |
| 14756 | |
| 14757 | unsigned kind = tree->OperKind(); |
| 14758 | |
| 14759 | /* Is this a constant node? */ |
| 14760 | |
| 14761 | if (kind & GTK_CONST) |
| 14762 | { |
| 14763 | tree = fgMorphConst(tree); |
| 14764 | goto DONE; |
| 14765 | } |
| 14766 | |
| 14767 | /* Is this a leaf node? */ |
| 14768 | |
| 14769 | if (kind & GTK_LEAF) |
| 14770 | { |
| 14771 | tree = fgMorphLeaf(tree); |
| 14772 | goto DONE; |
| 14773 | } |
| 14774 | |
| 14775 | /* Is it a 'simple' unary/binary operator? */ |
| 14776 | |
| 14777 | if (kind & GTK_SMPOP) |
| 14778 | { |
| 14779 | tree = fgMorphSmpOp(tree, mac); |
| 14780 | goto DONE; |
| 14781 | } |
| 14782 | |
| 14783 | /* See what kind of a special operator we have here */ |
| 14784 | |
| 14785 | switch (tree->OperGet()) |
| 14786 | { |
| 14787 | case GT_FIELD: |
| 14788 | tree = fgMorphField(tree, mac); |
| 14789 | break; |
| 14790 | |
| 14791 | case GT_CALL: |
| 14792 | if (tree->OperMayThrow(this)) |
| 14793 | { |
| 14794 | tree->gtFlags |= GTF_EXCEPT; |
| 14795 | } |
| 14796 | else |
| 14797 | { |
| 14798 | tree->gtFlags &= ~GTF_EXCEPT; |
| 14799 | } |
| 14800 | tree = fgMorphCall(tree->AsCall()); |
| 14801 | break; |
| 14802 | |
| 14803 | case GT_ARR_BOUNDS_CHECK: |
| 14804 | #ifdef FEATURE_SIMD |
| 14805 | case GT_SIMD_CHK: |
| 14806 | #endif // FEATURE_SIMD |
| 14807 | #ifdef FEATURE_HW_INTRINSICS |
| 14808 | case GT_HW_INTRINSIC_CHK: |
| 14809 | #endif // FEATURE_HW_INTRINSICS |
| 14810 | { |
| 14811 | fgSetRngChkTarget(tree); |
| 14812 | |
| 14813 | GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); |
| 14814 | bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex); |
| 14815 | bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen); |
| 14816 | // If the index is a comma(throw, x), just return that. |
| 14817 | if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex)) |
| 14818 | { |
| 14819 | tree = bndsChk->gtIndex; |
| 14820 | } |
| 14821 | |
| 14822 | bndsChk->gtFlags &= ~GTF_CALL; |
| 14823 | |
| 14824 | // Propagate effects flags upwards |
| 14825 | bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT); |
| 14826 | bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT); |
| 14827 | |
| 14828 | // Otherwise, we don't change the tree. |
| 14829 | } |
| 14830 | break; |
| 14831 | |
| 14832 | case GT_ARR_ELEM: |
| 14833 | tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj); |
| 14834 | |
| 14835 | unsigned dim; |
| 14836 | for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) |
| 14837 | { |
| 14838 | tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]); |
| 14839 | } |
| 14840 | |
| 14841 | tree->gtFlags &= ~GTF_CALL; |
| 14842 | |
| 14843 | tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT; |
| 14844 | |
| 14845 | for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) |
| 14846 | { |
| 14847 | tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT; |
| 14848 | } |
| 14849 | |
| 14850 | if (fgGlobalMorph) |
| 14851 | { |
| 14852 | fgSetRngChkTarget(tree, false); |
| 14853 | } |
| 14854 | break; |
| 14855 | |
| 14856 | case GT_ARR_OFFSET: |
| 14857 | tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset); |
| 14858 | tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex); |
| 14859 | tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj); |
| 14860 | |
| 14861 | tree->gtFlags &= ~GTF_CALL; |
| 14862 | tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT; |
| 14863 | tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT; |
| 14864 | tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT; |
| 14865 | if (fgGlobalMorph) |
| 14866 | { |
| 14867 | fgSetRngChkTarget(tree, false); |
| 14868 | } |
| 14869 | break; |
| 14870 | |
| 14871 | case GT_CMPXCHG: |
| 14872 | tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation); |
| 14873 | tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue); |
| 14874 | tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand); |
| 14875 | |
| 14876 | tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL); |
| 14877 | |
| 14878 | tree->gtFlags |= tree->gtCmpXchg.gtOpLocation->gtFlags & GTF_ALL_EFFECT; |
| 14879 | tree->gtFlags |= tree->gtCmpXchg.gtOpValue->gtFlags & GTF_ALL_EFFECT; |
| 14880 | tree->gtFlags |= tree->gtCmpXchg.gtOpComparand->gtFlags & GTF_ALL_EFFECT; |
| 14881 | break; |
| 14882 | |
| 14883 | case GT_STORE_DYN_BLK: |
| 14884 | case GT_DYN_BLK: |
| 14885 | if (tree->OperGet() == GT_STORE_DYN_BLK) |
| 14886 | { |
| 14887 | tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data()); |
| 14888 | } |
| 14889 | tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr()); |
| 14890 | tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize); |
| 14891 | |
| 14892 | tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL); |
| 14893 | tree->SetIndirExceptionFlags(this); |
| 14894 | |
| 14895 | if (tree->OperGet() == GT_STORE_DYN_BLK) |
| 14896 | { |
| 14897 | tree->gtFlags |= tree->gtDynBlk.Data()->gtFlags & GTF_ALL_EFFECT; |
| 14898 | } |
| 14899 | tree->gtFlags |= tree->gtDynBlk.Addr()->gtFlags & GTF_ALL_EFFECT; |
| 14900 | tree->gtFlags |= tree->gtDynBlk.gtDynamicSize->gtFlags & GTF_ALL_EFFECT; |
| 14901 | break; |
| 14902 | |
| 14903 | case GT_INDEX_ADDR: |
| 14904 | GenTreeIndexAddr* indexAddr; |
| 14905 | indexAddr = tree->AsIndexAddr(); |
| 14906 | indexAddr->Index() = fgMorphTree(indexAddr->Index()); |
| 14907 | indexAddr->Arr() = fgMorphTree(indexAddr->Arr()); |
| 14908 | |
| 14909 | tree->gtFlags &= ~GTF_CALL; |
| 14910 | |
| 14911 | tree->gtFlags |= indexAddr->Index()->gtFlags & GTF_ALL_EFFECT; |
| 14912 | tree->gtFlags |= indexAddr->Arr()->gtFlags & GTF_ALL_EFFECT; |
| 14913 | break; |
| 14914 | |
| 14915 | default: |
| 14916 | #ifdef DEBUG |
| 14917 | gtDispTree(tree); |
| 14918 | #endif |
| 14919 | noway_assert(!"unexpected operator" ); |
| 14920 | } |
| 14921 | DONE: |
| 14922 | |
| 14923 | fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum)); |
| 14924 | |
| 14925 | return tree; |
| 14926 | } |
| 14927 | |
| 14928 | #if LOCAL_ASSERTION_PROP |
| 14929 | //------------------------------------------------------------------------ |
| 14930 | // fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum |
| 14931 | // |
| 14932 | // Arguments: |
| 14933 | // lclNum - The varNum of the lclVar for which we're killing assertions. |
| 14934 | // tree - (DEBUG only) the tree responsible for killing its assertions. |
| 14935 | // |
| 14936 | void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree)) |
| 14937 | { |
| 14938 | /* All dependent assertions are killed here */ |
| 14939 | |
| 14940 | ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum)); |
| 14941 | |
| 14942 | if (killed) |
| 14943 | { |
| 14944 | AssertionIndex index = optAssertionCount; |
| 14945 | while (killed && (index > 0)) |
| 14946 | { |
| 14947 | if (BitVecOps::IsMember(apTraits, killed, index - 1)) |
| 14948 | { |
| 14949 | #ifdef DEBUG |
| 14950 | AssertionDsc* curAssertion = optGetAssertion(index); |
| 14951 | noway_assert((curAssertion->op1.lcl.lclNum == lclNum) || |
| 14952 | ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum))); |
| 14953 | if (verbose) |
| 14954 | { |
| 14955 | printf("\nThe assignment " ); |
| 14956 | printTreeID(tree); |
| 14957 | printf(" using V%02u removes: " , curAssertion->op1.lcl.lclNum); |
| 14958 | optPrintAssertion(curAssertion); |
| 14959 | } |
| 14960 | #endif |
| 14961 | // Remove this bit from the killed mask |
| 14962 | BitVecOps::RemoveElemD(apTraits, killed, index - 1); |
| 14963 | |
| 14964 | optAssertionRemove(index); |
| 14965 | } |
| 14966 | |
| 14967 | index--; |
| 14968 | } |
| 14969 | |
| 14970 | // killed mask should now be zero |
| 14971 | noway_assert(BitVecOps::IsEmpty(apTraits, killed)); |
| 14972 | } |
| 14973 | } |
| 14974 | //------------------------------------------------------------------------ |
| 14975 | // fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum. |
| 14976 | // |
| 14977 | // Arguments: |
| 14978 | // lclNum - The varNum of the lclVar for which we're killing assertions. |
| 14979 | // tree - (DEBUG only) the tree responsible for killing its assertions. |
| 14980 | // |
| 14981 | // Notes: |
| 14982 | // For structs and struct fields, it will invalidate the children and parent |
| 14983 | // respectively. |
| 14984 | // Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar. |
| 14985 | // |
| 14986 | void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree)) |
| 14987 | { |
| 14988 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 14989 | |
| 14990 | if (varDsc->lvPromoted) |
| 14991 | { |
| 14992 | noway_assert(varTypeIsStruct(varDsc)); |
| 14993 | |
| 14994 | // Kill the field locals. |
| 14995 | for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) |
| 14996 | { |
| 14997 | fgKillDependentAssertionsSingle(i DEBUGARG(tree)); |
| 14998 | } |
| 14999 | |
| 15000 | // Kill the struct local itself. |
| 15001 | fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); |
| 15002 | } |
| 15003 | else if (varDsc->lvIsStructField) |
| 15004 | { |
| 15005 | // Kill the field local. |
| 15006 | fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); |
| 15007 | |
| 15008 | // Kill the parent struct. |
| 15009 | fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree)); |
| 15010 | } |
| 15011 | else |
| 15012 | { |
| 15013 | fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); |
| 15014 | } |
| 15015 | } |
| 15016 | #endif // LOCAL_ASSERTION_PROP |
| 15017 | |
| 15018 | /***************************************************************************** |
| 15019 | * |
| 15020 | * This function is called to complete the morphing of a tree node |
| 15021 | * It should only be called once for each node. |
| 15022 | * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated, |
| 15023 | * to enforce the invariant that each node is only morphed once. |
| 15024 | * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced |
| 15025 | * by an equivalent tree. |
| 15026 | * |
| 15027 | */ |
| 15028 | |
| 15029 | void Compiler::fgMorphTreeDone(GenTree* tree, |
| 15030 | GenTree* oldTree /* == NULL */ |
| 15031 | DEBUGARG(int morphNum)) |
| 15032 | { |
| 15033 | #ifdef DEBUG |
| 15034 | if (verbose && treesBeforeAfterMorph) |
| 15035 | { |
| 15036 | printf("\nfgMorphTree (after %d):\n" , morphNum); |
| 15037 | gtDispTree(tree); |
| 15038 | printf("" ); // in our logic this causes a flush |
| 15039 | } |
| 15040 | #endif |
| 15041 | |
| 15042 | if (!fgGlobalMorph) |
| 15043 | { |
| 15044 | return; |
| 15045 | } |
| 15046 | |
| 15047 | if ((oldTree != nullptr) && (oldTree != tree)) |
| 15048 | { |
| 15049 | /* Ensure that we have morphed this node */ |
| 15050 | assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!" ); |
| 15051 | |
| 15052 | #ifdef DEBUG |
| 15053 | TransferTestDataToNode(oldTree, tree); |
| 15054 | #endif |
| 15055 | } |
| 15056 | else |
| 15057 | { |
| 15058 | // Ensure that we haven't morphed this node already |
| 15059 | assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!" ); |
| 15060 | } |
| 15061 | |
| 15062 | if (tree->OperKind() & GTK_CONST) |
| 15063 | { |
| 15064 | goto DONE; |
| 15065 | } |
| 15066 | |
| 15067 | #if LOCAL_ASSERTION_PROP |
| 15068 | |
| 15069 | if (!optLocalAssertionProp) |
| 15070 | { |
| 15071 | goto DONE; |
| 15072 | } |
| 15073 | |
| 15074 | /* Do we have any active assertions? */ |
| 15075 | |
| 15076 | if (optAssertionCount > 0) |
| 15077 | { |
| 15078 | /* Is this an assignment to a local variable */ |
| 15079 | GenTreeLclVarCommon* lclVarTree = nullptr; |
| 15080 | if (tree->DefinesLocal(this, &lclVarTree)) |
| 15081 | { |
| 15082 | unsigned lclNum = lclVarTree->gtLclNum; |
| 15083 | noway_assert(lclNum < lvaCount); |
| 15084 | fgKillDependentAssertions(lclNum DEBUGARG(tree)); |
| 15085 | } |
| 15086 | } |
| 15087 | |
| 15088 | /* If this tree makes a new assertion - make it available */ |
| 15089 | optAssertionGen(tree); |
| 15090 | |
| 15091 | #endif // LOCAL_ASSERTION_PROP |
| 15092 | |
| 15093 | DONE:; |
| 15094 | |
| 15095 | #ifdef DEBUG |
| 15096 | /* Mark this node as being morphed */ |
| 15097 | tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
| 15098 | #endif |
| 15099 | } |
| 15100 | |
| 15101 | /***************************************************************************** |
| 15102 | * |
| 15103 | * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants |
| 15104 | * Returns true if we modified the flow graph |
| 15105 | */ |
| 15106 | |
| 15107 | bool Compiler::fgFoldConditional(BasicBlock* block) |
| 15108 | { |
| 15109 | bool result = false; |
| 15110 | |
| 15111 | // We don't want to make any code unreachable |
| 15112 | if (opts.OptimizationDisabled()) |
| 15113 | { |
| 15114 | return false; |
| 15115 | } |
| 15116 | |
| 15117 | if (block->bbJumpKind == BBJ_COND) |
| 15118 | { |
| 15119 | noway_assert(block->bbTreeList && block->bbTreeList->gtPrev); |
| 15120 | |
| 15121 | GenTree* stmt = block->bbTreeList->gtPrev; |
| 15122 | |
| 15123 | noway_assert(stmt->gtNext == nullptr); |
| 15124 | |
| 15125 | if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL) |
| 15126 | { |
| 15127 | noway_assert(fgRemoveRestOfBlock); |
| 15128 | |
| 15129 | /* Unconditional throw - transform the basic block into a BBJ_THROW */ |
| 15130 | fgConvertBBToThrowBB(block); |
| 15131 | |
| 15132 | /* Remove 'block' from the predecessor list of 'block->bbNext' */ |
| 15133 | fgRemoveRefPred(block->bbNext, block); |
| 15134 | |
| 15135 | /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */ |
| 15136 | fgRemoveRefPred(block->bbJumpDest, block); |
| 15137 | |
| 15138 | #ifdef DEBUG |
| 15139 | if (verbose) |
| 15140 | { |
| 15141 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
| 15142 | printf(FMT_BB " becomes a BBJ_THROW\n" , block->bbNum); |
| 15143 | } |
| 15144 | #endif |
| 15145 | goto DONE_COND; |
| 15146 | } |
| 15147 | |
| 15148 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); |
| 15149 | |
| 15150 | /* Did we fold the conditional */ |
| 15151 | |
| 15152 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1); |
| 15153 | GenTree* cond; |
| 15154 | cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1; |
| 15155 | |
| 15156 | if (cond->OperKind() & GTK_CONST) |
| 15157 | { |
| 15158 | /* Yupee - we folded the conditional! |
| 15159 | * Remove the conditional statement */ |
| 15160 | |
| 15161 | noway_assert(cond->gtOper == GT_CNS_INT); |
| 15162 | noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0)); |
| 15163 | |
| 15164 | /* remove the statement from bbTreelist - No need to update |
| 15165 | * the reference counts since there are no lcl vars */ |
| 15166 | fgRemoveStmt(block, stmt); |
| 15167 | |
| 15168 | // block is a BBJ_COND that we are folding the conditional for |
| 15169 | // bTaken is the path that will always be taken from block |
| 15170 | // bNotTaken is the path that will never be taken from block |
| 15171 | // |
| 15172 | BasicBlock* bTaken; |
| 15173 | BasicBlock* bNotTaken; |
| 15174 | |
| 15175 | if (cond->gtIntCon.gtIconVal != 0) |
| 15176 | { |
| 15177 | /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */ |
| 15178 | block->bbJumpKind = BBJ_ALWAYS; |
| 15179 | bTaken = block->bbJumpDest; |
| 15180 | bNotTaken = block->bbNext; |
| 15181 | } |
| 15182 | else |
| 15183 | { |
| 15184 | /* Unmark the loop if we are removing a backwards branch */ |
| 15185 | /* dest block must also be marked as a loop head and */ |
| 15186 | /* We must be able to reach the backedge block */ |
| 15187 | if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && |
| 15188 | fgReachable(block->bbJumpDest, block)) |
| 15189 | { |
| 15190 | optUnmarkLoopBlocks(block->bbJumpDest, block); |
| 15191 | } |
| 15192 | |
| 15193 | /* JTRUE 0 - transform the basic block into a BBJ_NONE */ |
| 15194 | block->bbJumpKind = BBJ_NONE; |
| 15195 | noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL)); |
| 15196 | bTaken = block->bbNext; |
| 15197 | bNotTaken = block->bbJumpDest; |
| 15198 | } |
| 15199 | |
| 15200 | if (fgHaveValidEdgeWeights) |
| 15201 | { |
| 15202 | // We are removing an edge from block to bNotTaken |
| 15203 | // and we have already computed the edge weights, so |
| 15204 | // we will try to adjust some of the weights |
| 15205 | // |
| 15206 | flowList* edgeTaken = fgGetPredForBlock(bTaken, block); |
| 15207 | BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block |
| 15208 | |
| 15209 | // We examine the taken edge (block -> bTaken) |
| 15210 | // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight |
| 15211 | // else if bTaken has valid profile weight and block does not we try to adjust block's weight |
| 15212 | // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken |
| 15213 | // |
| 15214 | if (block->hasProfileWeight()) |
| 15215 | { |
| 15216 | // The edge weights for (block -> bTaken) are 100% of block's weight |
| 15217 | edgeTaken->flEdgeWeightMin = block->bbWeight; |
| 15218 | edgeTaken->flEdgeWeightMax = block->bbWeight; |
| 15219 | |
| 15220 | if (!bTaken->hasProfileWeight()) |
| 15221 | { |
| 15222 | if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight)) |
| 15223 | { |
| 15224 | // Update the weight of bTaken |
| 15225 | bTaken->inheritWeight(block); |
| 15226 | bUpdated = bTaken; |
| 15227 | } |
| 15228 | } |
| 15229 | } |
| 15230 | else if (bTaken->hasProfileWeight()) |
| 15231 | { |
| 15232 | if (bTaken->countOfInEdges() == 1) |
| 15233 | { |
| 15234 | // There is only one in edge to bTaken |
| 15235 | edgeTaken->flEdgeWeightMin = bTaken->bbWeight; |
| 15236 | edgeTaken->flEdgeWeightMax = bTaken->bbWeight; |
| 15237 | |
| 15238 | // Update the weight of block |
| 15239 | block->inheritWeight(bTaken); |
| 15240 | bUpdated = block; |
| 15241 | } |
| 15242 | } |
| 15243 | |
| 15244 | if (bUpdated != nullptr) |
| 15245 | { |
| 15246 | flowList* edge; |
| 15247 | // Now fix the weights of the edges out of 'bUpdated' |
| 15248 | switch (bUpdated->bbJumpKind) |
| 15249 | { |
| 15250 | case BBJ_NONE: |
| 15251 | edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); |
| 15252 | edge->flEdgeWeightMax = bUpdated->bbWeight; |
| 15253 | break; |
| 15254 | case BBJ_COND: |
| 15255 | edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); |
| 15256 | edge->flEdgeWeightMax = bUpdated->bbWeight; |
| 15257 | __fallthrough; |
| 15258 | case BBJ_ALWAYS: |
| 15259 | edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated); |
| 15260 | edge->flEdgeWeightMax = bUpdated->bbWeight; |
| 15261 | break; |
| 15262 | default: |
| 15263 | // We don't handle BBJ_SWITCH |
| 15264 | break; |
| 15265 | } |
| 15266 | } |
| 15267 | } |
| 15268 | |
| 15269 | /* modify the flow graph */ |
| 15270 | |
| 15271 | /* Remove 'block' from the predecessor list of 'bNotTaken' */ |
| 15272 | fgRemoveRefPred(bNotTaken, block); |
| 15273 | |
| 15274 | #ifdef DEBUG |
| 15275 | if (verbose) |
| 15276 | { |
| 15277 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
| 15278 | printf(FMT_BB " becomes a %s" , block->bbNum, |
| 15279 | block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE" ); |
| 15280 | if (block->bbJumpKind == BBJ_ALWAYS) |
| 15281 | { |
| 15282 | printf(" to " FMT_BB, block->bbJumpDest->bbNum); |
| 15283 | } |
| 15284 | printf("\n" ); |
| 15285 | } |
| 15286 | #endif |
| 15287 | |
| 15288 | /* if the block was a loop condition we may have to modify |
| 15289 | * the loop table */ |
| 15290 | |
| 15291 | for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) |
| 15292 | { |
| 15293 | /* Some loops may have been already removed by |
| 15294 | * loop unrolling or conditional folding */ |
| 15295 | |
| 15296 | if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) |
| 15297 | { |
| 15298 | continue; |
| 15299 | } |
| 15300 | |
| 15301 | /* We are only interested in the loop bottom */ |
| 15302 | |
| 15303 | if (optLoopTable[loopNum].lpBottom == block) |
| 15304 | { |
| 15305 | if (cond->gtIntCon.gtIconVal == 0) |
| 15306 | { |
| 15307 | /* This was a bogus loop (condition always false) |
| 15308 | * Remove the loop from the table */ |
| 15309 | |
| 15310 | optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; |
| 15311 | #ifdef DEBUG |
| 15312 | if (verbose) |
| 15313 | { |
| 15314 | printf("Removing loop L%02u (from " FMT_BB " to " FMT_BB ")\n\n" , loopNum, |
| 15315 | optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum); |
| 15316 | } |
| 15317 | #endif |
| 15318 | } |
| 15319 | } |
| 15320 | } |
| 15321 | DONE_COND: |
| 15322 | result = true; |
| 15323 | } |
| 15324 | } |
| 15325 | else if (block->bbJumpKind == BBJ_SWITCH) |
| 15326 | { |
| 15327 | noway_assert(block->bbTreeList && block->bbTreeList->gtPrev); |
| 15328 | |
| 15329 | GenTree* stmt = block->bbTreeList->gtPrev; |
| 15330 | |
| 15331 | noway_assert(stmt->gtNext == nullptr); |
| 15332 | |
| 15333 | if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL) |
| 15334 | { |
| 15335 | noway_assert(fgRemoveRestOfBlock); |
| 15336 | |
| 15337 | /* Unconditional throw - transform the basic block into a BBJ_THROW */ |
| 15338 | fgConvertBBToThrowBB(block); |
| 15339 | |
| 15340 | /* update the flow graph */ |
| 15341 | |
| 15342 | unsigned jumpCnt = block->bbJumpSwt->bbsCount; |
| 15343 | BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab; |
| 15344 | |
| 15345 | for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) |
| 15346 | { |
| 15347 | BasicBlock* curJump = *jumpTab; |
| 15348 | |
| 15349 | /* Remove 'block' from the predecessor list of 'curJump' */ |
| 15350 | fgRemoveRefPred(curJump, block); |
| 15351 | } |
| 15352 | |
| 15353 | #ifdef DEBUG |
| 15354 | if (verbose) |
| 15355 | { |
| 15356 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
| 15357 | printf(FMT_BB " becomes a BBJ_THROW\n" , block->bbNum); |
| 15358 | } |
| 15359 | #endif |
| 15360 | goto DONE_SWITCH; |
| 15361 | } |
| 15362 | |
| 15363 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH); |
| 15364 | |
| 15365 | /* Did we fold the conditional */ |
| 15366 | |
| 15367 | noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1); |
| 15368 | GenTree* cond; |
| 15369 | cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1; |
| 15370 | |
| 15371 | if (cond->OperKind() & GTK_CONST) |
| 15372 | { |
| 15373 | /* Yupee - we folded the conditional! |
| 15374 | * Remove the conditional statement */ |
| 15375 | |
| 15376 | noway_assert(cond->gtOper == GT_CNS_INT); |
| 15377 | |
| 15378 | /* remove the statement from bbTreelist - No need to update |
| 15379 | * the reference counts since there are no lcl vars */ |
| 15380 | fgRemoveStmt(block, stmt); |
| 15381 | |
| 15382 | /* modify the flow graph */ |
| 15383 | |
| 15384 | /* Find the actual jump target */ |
| 15385 | unsigned switchVal; |
| 15386 | switchVal = (unsigned)cond->gtIntCon.gtIconVal; |
| 15387 | unsigned jumpCnt; |
| 15388 | jumpCnt = block->bbJumpSwt->bbsCount; |
| 15389 | BasicBlock** jumpTab; |
| 15390 | jumpTab = block->bbJumpSwt->bbsDstTab; |
| 15391 | bool foundVal; |
| 15392 | foundVal = false; |
| 15393 | |
| 15394 | for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) |
| 15395 | { |
| 15396 | BasicBlock* curJump = *jumpTab; |
| 15397 | |
| 15398 | assert(curJump->countOfInEdges() > 0); |
| 15399 | |
| 15400 | // If val matches switchVal or we are at the last entry and |
| 15401 | // we never found the switch value then set the new jump dest |
| 15402 | |
| 15403 | if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1))) |
| 15404 | { |
| 15405 | if (curJump != block->bbNext) |
| 15406 | { |
| 15407 | /* transform the basic block into a BBJ_ALWAYS */ |
| 15408 | block->bbJumpKind = BBJ_ALWAYS; |
| 15409 | block->bbJumpDest = curJump; |
| 15410 | |
| 15411 | // if we are jumping backwards, make sure we have a GC Poll. |
| 15412 | if (curJump->bbNum > block->bbNum) |
| 15413 | { |
| 15414 | block->bbFlags &= ~BBF_NEEDS_GCPOLL; |
| 15415 | } |
| 15416 | } |
| 15417 | else |
| 15418 | { |
| 15419 | /* transform the basic block into a BBJ_NONE */ |
| 15420 | block->bbJumpKind = BBJ_NONE; |
| 15421 | block->bbFlags &= ~BBF_NEEDS_GCPOLL; |
| 15422 | } |
| 15423 | foundVal = true; |
| 15424 | } |
| 15425 | else |
| 15426 | { |
| 15427 | /* Remove 'block' from the predecessor list of 'curJump' */ |
| 15428 | fgRemoveRefPred(curJump, block); |
| 15429 | } |
| 15430 | } |
| 15431 | #ifdef DEBUG |
| 15432 | if (verbose) |
| 15433 | { |
| 15434 | printf("\nConditional folded at " FMT_BB "\n" , block->bbNum); |
| 15435 | printf(FMT_BB " becomes a %s" , block->bbNum, |
| 15436 | block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE" ); |
| 15437 | if (block->bbJumpKind == BBJ_ALWAYS) |
| 15438 | { |
| 15439 | printf(" to " FMT_BB, block->bbJumpDest->bbNum); |
| 15440 | } |
| 15441 | printf("\n" ); |
| 15442 | } |
| 15443 | #endif |
| 15444 | DONE_SWITCH: |
| 15445 | result = true; |
| 15446 | } |
| 15447 | } |
| 15448 | return result; |
| 15449 | } |
| 15450 | |
| 15451 | //***************************************************************************** |
| 15452 | // |
| 15453 | // Morphs a single statement in a block. |
| 15454 | // Can be called anytime, unlike fgMorphStmts() which should only be called once. |
| 15455 | // |
| 15456 | // Returns true if 'stmt' was removed from the block. |
| 15457 | // Returns false if 'stmt' is still in the block (even if other statements were removed). |
| 15458 | // |
| 15459 | |
| 15460 | bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg)) |
| 15461 | { |
| 15462 | assert(block != nullptr); |
| 15463 | assert(stmt != nullptr); |
| 15464 | |
| 15465 | compCurBB = block; |
| 15466 | compCurStmt = stmt; |
| 15467 | |
| 15468 | GenTree* morph = fgMorphTree(stmt->gtStmtExpr); |
| 15469 | |
| 15470 | // Bug 1106830 - During the CSE phase we can't just remove |
| 15471 | // morph->gtOp.gtOp2 as it could contain CSE expressions. |
| 15472 | // This leads to a noway_assert in OptCSE.cpp when |
| 15473 | // searching for the removed CSE ref. (using gtFindLink) |
| 15474 | // |
| 15475 | if (!optValnumCSE_phase) |
| 15476 | { |
| 15477 | // Check for morph as a GT_COMMA with an unconditional throw |
| 15478 | if (fgIsCommaThrow(morph, true)) |
| 15479 | { |
| 15480 | #ifdef DEBUG |
| 15481 | if (verbose) |
| 15482 | { |
| 15483 | printf("Folding a top-level fgIsCommaThrow stmt\n" ); |
| 15484 | printf("Removing op2 as unreachable:\n" ); |
| 15485 | gtDispTree(morph->gtOp.gtOp2); |
| 15486 | printf("\n" ); |
| 15487 | } |
| 15488 | #endif |
| 15489 | // Use the call as the new stmt |
| 15490 | morph = morph->gtOp.gtOp1; |
| 15491 | noway_assert(morph->gtOper == GT_CALL); |
| 15492 | } |
| 15493 | |
| 15494 | // we can get a throw as a statement root |
| 15495 | if (fgIsThrow(morph)) |
| 15496 | { |
| 15497 | #ifdef DEBUG |
| 15498 | if (verbose) |
| 15499 | { |
| 15500 | printf("We have a top-level fgIsThrow stmt\n" ); |
| 15501 | printf("Removing the rest of block as unreachable:\n" ); |
| 15502 | } |
| 15503 | #endif |
| 15504 | noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); |
| 15505 | fgRemoveRestOfBlock = true; |
| 15506 | } |
| 15507 | } |
| 15508 | |
| 15509 | stmt->gtStmtExpr = morph; |
| 15510 | |
| 15511 | // Can the entire tree be removed? |
| 15512 | bool removedStmt = false; |
| 15513 | |
| 15514 | // Defer removing statements during CSE so we don't inadvertently remove any CSE defs. |
| 15515 | if (!optValnumCSE_phase) |
| 15516 | { |
| 15517 | removedStmt = fgCheckRemoveStmt(block, stmt); |
| 15518 | } |
| 15519 | |
| 15520 | // Or this is the last statement of a conditional branch that was just folded? |
| 15521 | if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock) |
| 15522 | { |
| 15523 | if (fgFoldConditional(block)) |
| 15524 | { |
| 15525 | if (block->bbJumpKind != BBJ_THROW) |
| 15526 | { |
| 15527 | removedStmt = true; |
| 15528 | } |
| 15529 | } |
| 15530 | } |
| 15531 | |
| 15532 | if (!removedStmt) |
| 15533 | { |
| 15534 | // Have to re-do the evaluation order since for example some later code does not expect constants as op1 |
| 15535 | gtSetStmtInfo(stmt); |
| 15536 | |
| 15537 | // Have to re-link the nodes for this statement |
| 15538 | fgSetStmtSeq(stmt); |
| 15539 | } |
| 15540 | |
| 15541 | #ifdef DEBUG |
| 15542 | if (verbose) |
| 15543 | { |
| 15544 | printf("%s %s tree:\n" , msg, (removedStmt ? "removed" : "morphed" )); |
| 15545 | gtDispTree(morph); |
| 15546 | printf("\n" ); |
| 15547 | } |
| 15548 | #endif |
| 15549 | |
| 15550 | if (fgRemoveRestOfBlock) |
| 15551 | { |
| 15552 | // Remove the rest of the stmts in the block |
| 15553 | for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt()) |
| 15554 | { |
| 15555 | fgRemoveStmt(block, stmt); |
| 15556 | } |
| 15557 | |
| 15558 | // The rest of block has been removed and we will always throw an exception. |
| 15559 | |
| 15560 | // Update succesors of block |
| 15561 | fgRemoveBlockAsPred(block); |
| 15562 | |
| 15563 | // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE. |
| 15564 | // We should not convert it to a ThrowBB. |
| 15565 | if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0)) |
| 15566 | { |
| 15567 | // Convert block to a throw bb |
| 15568 | fgConvertBBToThrowBB(block); |
| 15569 | } |
| 15570 | |
| 15571 | #ifdef DEBUG |
| 15572 | if (verbose) |
| 15573 | { |
| 15574 | printf("\n%s Block " FMT_BB " becomes a throw block.\n" , msg, block->bbNum); |
| 15575 | } |
| 15576 | #endif |
| 15577 | fgRemoveRestOfBlock = false; |
| 15578 | } |
| 15579 | |
| 15580 | return removedStmt; |
| 15581 | } |
| 15582 | |
| 15583 | /***************************************************************************** |
| 15584 | * |
| 15585 | * Morph the statements of the given block. |
| 15586 | * This function should be called just once for a block. Use fgMorphBlockStmt() |
| 15587 | * for reentrant calls. |
| 15588 | */ |
| 15589 | |
| 15590 | void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw) |
| 15591 | { |
| 15592 | fgRemoveRestOfBlock = false; |
| 15593 | |
| 15594 | /* Make the current basic block address available globally */ |
| 15595 | |
| 15596 | compCurBB = block; |
| 15597 | |
| 15598 | *lnot = *loadw = false; |
| 15599 | |
| 15600 | fgCurrentlyInUseArgTemps = hashBv::Create(this); |
| 15601 | |
| 15602 | GenTreeStmt* stmt = block->firstStmt(); |
| 15603 | GenTree* prev = nullptr; |
| 15604 | for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt) |
| 15605 | { |
| 15606 | assert(stmt->gtOper == GT_STMT); |
| 15607 | |
| 15608 | if (fgRemoveRestOfBlock) |
| 15609 | { |
| 15610 | fgRemoveStmt(block, stmt); |
| 15611 | continue; |
| 15612 | } |
| 15613 | #ifdef FEATURE_SIMD |
| 15614 | if (opts.OptimizationEnabled() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && |
| 15615 | stmt->gtStmtExpr->OperGet() == GT_ASG) |
| 15616 | { |
| 15617 | fgMorphCombineSIMDFieldAssignments(block, stmt); |
| 15618 | } |
| 15619 | #endif |
| 15620 | |
| 15621 | fgMorphStmt = stmt; |
| 15622 | compCurStmt = stmt; |
| 15623 | GenTree* tree = stmt->gtStmtExpr; |
| 15624 | |
| 15625 | #ifdef DEBUG |
| 15626 | compCurStmtNum++; |
| 15627 | if (stmt == block->bbTreeList) |
| 15628 | { |
| 15629 | block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum |
| 15630 | } |
| 15631 | |
| 15632 | unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0); |
| 15633 | |
| 15634 | if (verbose) |
| 15635 | { |
| 15636 | printf("\nfgMorphTree " FMT_BB ", stmt %d (before)\n" , block->bbNum, compCurStmtNum); |
| 15637 | gtDispTree(tree); |
| 15638 | } |
| 15639 | #endif |
| 15640 | |
| 15641 | /* Morph this statement tree */ |
| 15642 | |
| 15643 | GenTree* morph = fgMorphTree(tree); |
| 15644 | |
| 15645 | // mark any outgoing arg temps as free so we can reuse them in the next statement. |
| 15646 | |
| 15647 | fgCurrentlyInUseArgTemps->ZeroAll(); |
| 15648 | |
| 15649 | // Has fgMorphStmt been sneakily changed ? |
| 15650 | |
| 15651 | if (stmt->gtStmtExpr != tree) |
| 15652 | { |
| 15653 | /* This must be tailcall. Ignore 'morph' and carry on with |
| 15654 | the tail-call node */ |
| 15655 | |
| 15656 | morph = stmt->gtStmtExpr; |
| 15657 | noway_assert(compTailCallUsed); |
| 15658 | noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall()); |
| 15659 | noway_assert(stmt->gtNextStmt == nullptr); |
| 15660 | |
| 15661 | GenTreeCall* call = morph->AsCall(); |
| 15662 | // Could either be |
| 15663 | // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or |
| 15664 | // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing |
| 15665 | // a jmp. |
| 15666 | noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || |
| 15667 | (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && |
| 15668 | (compCurBB->bbFlags & BBF_HAS_JMP))); |
| 15669 | } |
| 15670 | else if (block != compCurBB) |
| 15671 | { |
| 15672 | /* This must be a tail call that caused a GCPoll to get |
| 15673 | injected. We haven't actually morphed the call yet |
| 15674 | but the flag still got set, clear it here... */ |
| 15675 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 15676 | |
| 15677 | #ifdef DEBUG |
| 15678 | tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; |
| 15679 | #endif |
| 15680 | |
| 15681 | noway_assert(compTailCallUsed); |
| 15682 | noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall()); |
| 15683 | noway_assert(stmt->gtNextStmt == nullptr); |
| 15684 | |
| 15685 | GenTreeCall* call = morph->AsCall(); |
| 15686 | |
| 15687 | // Could either be |
| 15688 | // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or |
| 15689 | // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing |
| 15690 | // a jmp. |
| 15691 | noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || |
| 15692 | (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && |
| 15693 | (compCurBB->bbFlags & BBF_HAS_JMP))); |
| 15694 | } |
| 15695 | |
| 15696 | #ifdef DEBUG |
| 15697 | if (compStressCompile(STRESS_CLONE_EXPR, 30)) |
| 15698 | { |
| 15699 | // Clone all the trees to stress gtCloneExpr() |
| 15700 | |
| 15701 | if (verbose) |
| 15702 | { |
| 15703 | printf("\nfgMorphTree (stressClone from):\n" ); |
| 15704 | gtDispTree(morph); |
| 15705 | } |
| 15706 | |
| 15707 | morph = gtCloneExpr(morph); |
| 15708 | noway_assert(morph); |
| 15709 | |
| 15710 | if (verbose) |
| 15711 | { |
| 15712 | printf("\nfgMorphTree (stressClone to):\n" ); |
| 15713 | gtDispTree(morph); |
| 15714 | } |
| 15715 | } |
| 15716 | |
| 15717 | /* If the hash value changes. we modified the tree during morphing */ |
| 15718 | if (verbose) |
| 15719 | { |
| 15720 | unsigned newHash = gtHashValue(morph); |
| 15721 | if (newHash != oldHash) |
| 15722 | { |
| 15723 | printf("\nfgMorphTree " FMT_BB ", stmt %d (after)\n" , block->bbNum, compCurStmtNum); |
| 15724 | gtDispTree(morph); |
| 15725 | } |
| 15726 | } |
| 15727 | #endif |
| 15728 | |
| 15729 | /* Check for morph as a GT_COMMA with an unconditional throw */ |
| 15730 | if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true)) |
| 15731 | { |
| 15732 | /* Use the call as the new stmt */ |
| 15733 | morph = morph->gtOp.gtOp1; |
| 15734 | noway_assert(morph->gtOper == GT_CALL); |
| 15735 | noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); |
| 15736 | |
| 15737 | fgRemoveRestOfBlock = true; |
| 15738 | } |
| 15739 | |
| 15740 | stmt->gtStmtExpr = tree = morph; |
| 15741 | |
| 15742 | if (fgRemoveRestOfBlock) |
| 15743 | { |
| 15744 | continue; |
| 15745 | } |
| 15746 | |
| 15747 | /* Has the statement been optimized away */ |
| 15748 | |
| 15749 | if (fgCheckRemoveStmt(block, stmt)) |
| 15750 | { |
| 15751 | continue; |
| 15752 | } |
| 15753 | |
| 15754 | /* Check if this block ends with a conditional branch that can be folded */ |
| 15755 | |
| 15756 | if (fgFoldConditional(block)) |
| 15757 | { |
| 15758 | continue; |
| 15759 | } |
| 15760 | |
| 15761 | if (ehBlockHasExnFlowDsc(block)) |
| 15762 | { |
| 15763 | continue; |
| 15764 | } |
| 15765 | } |
| 15766 | |
| 15767 | if (fgRemoveRestOfBlock) |
| 15768 | { |
| 15769 | if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH)) |
| 15770 | { |
| 15771 | GenTree* first = block->bbTreeList; |
| 15772 | noway_assert(first); |
| 15773 | GenTree* last = first->gtPrev; |
| 15774 | noway_assert(last && last->gtNext == nullptr); |
| 15775 | GenTree* lastStmt = last->gtStmt.gtStmtExpr; |
| 15776 | |
| 15777 | if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) || |
| 15778 | ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH))) |
| 15779 | { |
| 15780 | GenTree* op1 = lastStmt->gtOp.gtOp1; |
| 15781 | |
| 15782 | if (op1->OperKind() & GTK_RELOP) |
| 15783 | { |
| 15784 | /* Unmark the comparison node with GTF_RELOP_JMP_USED */ |
| 15785 | op1->gtFlags &= ~GTF_RELOP_JMP_USED; |
| 15786 | } |
| 15787 | |
| 15788 | last->gtStmt.gtStmtExpr = fgMorphTree(op1); |
| 15789 | } |
| 15790 | } |
| 15791 | |
| 15792 | /* Mark block as a BBJ_THROW block */ |
| 15793 | fgConvertBBToThrowBB(block); |
| 15794 | } |
| 15795 | |
| 15796 | #if FEATURE_FASTTAILCALL |
| 15797 | GenTree* recursiveTailCall = nullptr; |
| 15798 | if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall)) |
| 15799 | { |
| 15800 | fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall()); |
| 15801 | } |
| 15802 | #endif |
| 15803 | |
| 15804 | #ifdef DEBUG |
| 15805 | compCurBB = (BasicBlock*)INVALID_POINTER_VALUE; |
| 15806 | #endif |
| 15807 | |
| 15808 | // Reset this back so that it doesn't leak out impacting other blocks |
| 15809 | fgRemoveRestOfBlock = false; |
| 15810 | } |
| 15811 | |
| 15812 | /***************************************************************************** |
| 15813 | * |
| 15814 | * Morph the blocks of the method. |
| 15815 | * Returns true if the basic block list is modified. |
| 15816 | * This function should be called just once. |
| 15817 | */ |
| 15818 | |
| 15819 | void Compiler::fgMorphBlocks() |
| 15820 | { |
| 15821 | #ifdef DEBUG |
| 15822 | if (verbose) |
| 15823 | { |
| 15824 | printf("\n*************** In fgMorphBlocks()\n" ); |
| 15825 | } |
| 15826 | #endif |
| 15827 | |
| 15828 | /* Since fgMorphTree can be called after various optimizations to re-arrange |
| 15829 | * the nodes we need a global flag to signal if we are during the one-pass |
| 15830 | * global morphing */ |
| 15831 | |
| 15832 | fgGlobalMorph = true; |
| 15833 | |
| 15834 | #if LOCAL_ASSERTION_PROP |
| 15835 | // |
| 15836 | // Local assertion prop is enabled if we are optimized |
| 15837 | // |
| 15838 | optLocalAssertionProp = opts.OptimizationEnabled(); |
| 15839 | |
| 15840 | if (optLocalAssertionProp) |
| 15841 | { |
| 15842 | // |
| 15843 | // Initialize for local assertion prop |
| 15844 | // |
| 15845 | optAssertionInit(true); |
| 15846 | } |
| 15847 | #elif ASSERTION_PROP |
| 15848 | // |
| 15849 | // If LOCAL_ASSERTION_PROP is not set |
| 15850 | // and we have global assertion prop |
| 15851 | // then local assertion prop is always off |
| 15852 | // |
| 15853 | optLocalAssertionProp = false; |
| 15854 | |
| 15855 | #endif |
| 15856 | |
| 15857 | /*------------------------------------------------------------------------- |
| 15858 | * Process all basic blocks in the function |
| 15859 | */ |
| 15860 | |
| 15861 | BasicBlock* block = fgFirstBB; |
| 15862 | noway_assert(block); |
| 15863 | |
| 15864 | #ifdef DEBUG |
| 15865 | compCurStmtNum = 0; |
| 15866 | #endif |
| 15867 | |
| 15868 | do |
| 15869 | { |
| 15870 | #if OPT_BOOL_OPS |
| 15871 | bool lnot = false; |
| 15872 | #endif |
| 15873 | |
| 15874 | bool loadw = false; |
| 15875 | |
| 15876 | #ifdef DEBUG |
| 15877 | if (verbose) |
| 15878 | { |
| 15879 | printf("\nMorphing " FMT_BB " of '%s'\n" , block->bbNum, info.compFullName); |
| 15880 | } |
| 15881 | #endif |
| 15882 | |
| 15883 | #if LOCAL_ASSERTION_PROP |
| 15884 | if (optLocalAssertionProp) |
| 15885 | { |
| 15886 | // |
| 15887 | // Clear out any currently recorded assertion candidates |
| 15888 | // before processing each basic block, |
| 15889 | // also we must handle QMARK-COLON specially |
| 15890 | // |
| 15891 | optAssertionReset(0); |
| 15892 | } |
| 15893 | #endif |
| 15894 | |
| 15895 | /* Process all statement trees in the basic block */ |
| 15896 | |
| 15897 | fgMorphStmts(block, &lnot, &loadw); |
| 15898 | |
| 15899 | /* Are we using a single return block? */ |
| 15900 | |
| 15901 | if (block->bbJumpKind == BBJ_RETURN) |
| 15902 | { |
| 15903 | if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0)) |
| 15904 | { |
| 15905 | |
| 15906 | // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN. |
| 15907 | // For example a method returning void could have an empty block with jump kind BBJ_RETURN. |
| 15908 | // Such blocks do materialize as part of in-lining. |
| 15909 | // |
| 15910 | // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN. |
| 15911 | // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC. |
| 15912 | // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal |
| 15913 | // is BAD_VAR_NUM. |
| 15914 | // |
| 15915 | // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN. |
| 15916 | |
| 15917 | GenTree* last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr; |
| 15918 | GenTree* ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr; |
| 15919 | |
| 15920 | if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0)) |
| 15921 | { |
| 15922 | // This return was generated during epilog merging, so leave it alone |
| 15923 | } |
| 15924 | else |
| 15925 | { |
| 15926 | /* We'll jump to the genReturnBB */ |
| 15927 | CLANG_FORMAT_COMMENT_ANCHOR; |
| 15928 | |
| 15929 | #if !defined(_TARGET_X86_) |
| 15930 | if (info.compFlags & CORINFO_FLG_SYNCH) |
| 15931 | { |
| 15932 | fgConvertSyncReturnToLeave(block); |
| 15933 | } |
| 15934 | else |
| 15935 | #endif // !_TARGET_X86_ |
| 15936 | { |
| 15937 | block->bbJumpKind = BBJ_ALWAYS; |
| 15938 | block->bbJumpDest = genReturnBB; |
| 15939 | fgReturnCount--; |
| 15940 | } |
| 15941 | if (genReturnLocal != BAD_VAR_NUM) |
| 15942 | { |
| 15943 | // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal. |
| 15944 | |
| 15945 | // Method must be returning a value other than TYP_VOID. |
| 15946 | noway_assert(compMethodHasRetVal()); |
| 15947 | |
| 15948 | // This block must be ending with a GT_RETURN |
| 15949 | noway_assert(last != nullptr); |
| 15950 | noway_assert(last->gtOper == GT_STMT); |
| 15951 | noway_assert(last->gtNext == nullptr); |
| 15952 | noway_assert(ret != nullptr); |
| 15953 | |
| 15954 | // GT_RETURN must have non-null operand as the method is returning the value assigned to |
| 15955 | // genReturnLocal |
| 15956 | noway_assert(ret->OperGet() == GT_RETURN); |
| 15957 | noway_assert(ret->gtGetOp1() != nullptr); |
| 15958 | |
| 15959 | GenTree* pAfterStatement = last; |
| 15960 | IL_OFFSETX offset = last->AsStmt()->gtStmtILoffsx; |
| 15961 | GenTree* tree = |
| 15962 | gtNewTempAssign(genReturnLocal, ret->gtGetOp1(), &pAfterStatement, offset, block); |
| 15963 | if (tree->OperIsCopyBlkOp()) |
| 15964 | { |
| 15965 | tree = fgMorphCopyBlock(tree); |
| 15966 | } |
| 15967 | |
| 15968 | if (pAfterStatement == last) |
| 15969 | { |
| 15970 | last->gtStmt.gtStmtExpr = tree; |
| 15971 | } |
| 15972 | else |
| 15973 | { |
| 15974 | // gtNewTempAssign inserted additional statements after last |
| 15975 | fgRemoveStmt(block, last); |
| 15976 | last = fgInsertStmtAfter(block, pAfterStatement, gtNewStmt(tree, offset)); |
| 15977 | } |
| 15978 | |
| 15979 | // make sure that copy-prop ignores this assignment. |
| 15980 | last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE; |
| 15981 | } |
| 15982 | else if (ret != nullptr && ret->OperGet() == GT_RETURN) |
| 15983 | { |
| 15984 | // This block ends with a GT_RETURN |
| 15985 | noway_assert(last != nullptr); |
| 15986 | noway_assert(last->gtOper == GT_STMT); |
| 15987 | noway_assert(last->gtNext == nullptr); |
| 15988 | |
| 15989 | // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn |
| 15990 | // block |
| 15991 | noway_assert(ret->TypeGet() == TYP_VOID); |
| 15992 | noway_assert(ret->gtGetOp1() == nullptr); |
| 15993 | |
| 15994 | fgRemoveStmt(block, last); |
| 15995 | } |
| 15996 | #ifdef DEBUG |
| 15997 | if (verbose) |
| 15998 | { |
| 15999 | printf("morph " FMT_BB " to point at onereturn. New block is\n" , block->bbNum); |
| 16000 | fgTableDispBasicBlock(block); |
| 16001 | } |
| 16002 | #endif |
| 16003 | } |
| 16004 | } |
| 16005 | } |
| 16006 | block = block->bbNext; |
| 16007 | } while (block); |
| 16008 | |
| 16009 | /* We are done with the global morphing phase */ |
| 16010 | |
| 16011 | fgGlobalMorph = false; |
| 16012 | |
| 16013 | #ifdef DEBUG |
| 16014 | if (verboseTrees) |
| 16015 | { |
| 16016 | fgDispBasicBlocks(true); |
| 16017 | } |
| 16018 | #endif |
| 16019 | } |
| 16020 | |
| 16021 | /***************************************************************************** |
| 16022 | * |
| 16023 | * Make some decisions about the kind of code to generate. |
| 16024 | */ |
| 16025 | |
| 16026 | void Compiler::fgSetOptions() |
| 16027 | { |
| 16028 | #ifdef DEBUG |
| 16029 | /* Should we force fully interruptible code ? */ |
| 16030 | if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30)) |
| 16031 | { |
| 16032 | noway_assert(!codeGen->isGCTypeFixed()); |
| 16033 | genInterruptible = true; |
| 16034 | } |
| 16035 | #endif |
| 16036 | |
| 16037 | if (opts.compDbgCode) |
| 16038 | { |
| 16039 | assert(!codeGen->isGCTypeFixed()); |
| 16040 | genInterruptible = true; // debugging is easier this way ... |
| 16041 | } |
| 16042 | |
| 16043 | /* Assume we won't need an explicit stack frame if this is allowed */ |
| 16044 | |
| 16045 | // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of |
| 16046 | // the callee-saved registers. |
| 16047 | noway_assert(!compTailCallUsed || !compLocallocUsed); |
| 16048 | |
| 16049 | if (compLocallocUsed) |
| 16050 | { |
| 16051 | codeGen->setFramePointerRequired(true); |
| 16052 | } |
| 16053 | |
| 16054 | #ifdef _TARGET_X86_ |
| 16055 | |
| 16056 | if (compTailCallUsed) |
| 16057 | codeGen->setFramePointerRequired(true); |
| 16058 | |
| 16059 | #endif // _TARGET_X86_ |
| 16060 | |
| 16061 | if (!opts.genFPopt) |
| 16062 | { |
| 16063 | codeGen->setFramePointerRequired(true); |
| 16064 | } |
| 16065 | |
| 16066 | // Assert that the EH table has been initialized by now. Note that |
| 16067 | // compHndBBtabAllocCount never decreases; it is a high-water mark |
| 16068 | // of table allocation. In contrast, compHndBBtabCount does shrink |
| 16069 | // if we delete a dead EH region, and if it shrinks to zero, the |
| 16070 | // table pointer compHndBBtab is unreliable. |
| 16071 | assert(compHndBBtabAllocCount >= info.compXcptnsCount); |
| 16072 | |
| 16073 | #ifdef _TARGET_X86_ |
| 16074 | |
| 16075 | // Note: this case, and the !X86 case below, should both use the |
| 16076 | // !X86 path. This would require a few more changes for X86 to use |
| 16077 | // compHndBBtabCount (the current number of EH clauses) instead of |
| 16078 | // info.compXcptnsCount (the number of EH clauses in IL), such as |
| 16079 | // in ehNeedsShadowSPslots(). This is because sometimes the IL has |
| 16080 | // an EH clause that we delete as statically dead code before we |
| 16081 | // get here, leaving no EH clauses left, and thus no requirement |
| 16082 | // to use a frame pointer because of EH. But until all the code uses |
| 16083 | // the same test, leave info.compXcptnsCount here. |
| 16084 | if (info.compXcptnsCount > 0) |
| 16085 | { |
| 16086 | codeGen->setFramePointerRequiredEH(true); |
| 16087 | } |
| 16088 | |
| 16089 | #else // !_TARGET_X86_ |
| 16090 | |
| 16091 | if (compHndBBtabCount > 0) |
| 16092 | { |
| 16093 | codeGen->setFramePointerRequiredEH(true); |
| 16094 | } |
| 16095 | |
| 16096 | #endif // _TARGET_X86_ |
| 16097 | |
| 16098 | #ifdef UNIX_X86_ABI |
| 16099 | if (info.compXcptnsCount > 0) |
| 16100 | { |
| 16101 | assert(!codeGen->isGCTypeFixed()); |
| 16102 | // Enforce fully interruptible codegen for funclet unwinding |
| 16103 | genInterruptible = true; |
| 16104 | } |
| 16105 | #endif // UNIX_X86_ABI |
| 16106 | |
| 16107 | if (info.compCallUnmanaged) |
| 16108 | { |
| 16109 | codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame |
| 16110 | } |
| 16111 | |
| 16112 | if (info.compPublishStubParam) |
| 16113 | { |
| 16114 | codeGen->setFramePointerRequiredGCInfo(true); |
| 16115 | } |
| 16116 | |
| 16117 | if (opts.compNeedSecurityCheck) |
| 16118 | { |
| 16119 | codeGen->setFramePointerRequiredGCInfo(true); |
| 16120 | |
| 16121 | #ifndef JIT32_GCENCODER |
| 16122 | |
| 16123 | // The decoder only reports objects in frames with exceptions if the frame |
| 16124 | // is fully interruptible. |
| 16125 | // Even if there is no catch or other way to resume execution in this frame |
| 16126 | // the VM requires the security object to remain alive until later, so |
| 16127 | // Frames with security objects must be fully interruptible. |
| 16128 | genInterruptible = true; |
| 16129 | |
| 16130 | #endif // JIT32_GCENCODER |
| 16131 | } |
| 16132 | |
| 16133 | if (compIsProfilerHookNeeded()) |
| 16134 | { |
| 16135 | codeGen->setFramePointerRequired(true); |
| 16136 | } |
| 16137 | |
| 16138 | if (info.compIsVarArgs) |
| 16139 | { |
| 16140 | // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative. |
| 16141 | codeGen->setFramePointerRequiredGCInfo(true); |
| 16142 | } |
| 16143 | |
| 16144 | if (lvaReportParamTypeArg()) |
| 16145 | { |
| 16146 | codeGen->setFramePointerRequiredGCInfo(true); |
| 16147 | } |
| 16148 | |
| 16149 | // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not"); |
| 16150 | } |
| 16151 | |
| 16152 | /*****************************************************************************/ |
| 16153 | |
| 16154 | GenTree* Compiler::fgInitThisClass() |
| 16155 | { |
| 16156 | noway_assert(!compIsForInlining()); |
| 16157 | |
| 16158 | CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd); |
| 16159 | |
| 16160 | if (!kind.needsRuntimeLookup) |
| 16161 | { |
| 16162 | return fgGetSharedCCtor(info.compClassHnd); |
| 16163 | } |
| 16164 | else |
| 16165 | { |
| 16166 | #ifdef FEATURE_READYTORUN_COMPILER |
| 16167 | // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR. |
| 16168 | if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI)) |
| 16169 | { |
| 16170 | CORINFO_RESOLVED_TOKEN resolvedToken; |
| 16171 | memset(&resolvedToken, 0, sizeof(resolvedToken)); |
| 16172 | |
| 16173 | // We are in a shared method body, but maybe we don't need a runtime lookup after all. |
| 16174 | // This covers the case of a generic method on a non-generic type. |
| 16175 | if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST)) |
| 16176 | { |
| 16177 | resolvedToken.hClass = info.compClassHnd; |
| 16178 | return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF); |
| 16179 | } |
| 16180 | |
| 16181 | // We need a runtime lookup. |
| 16182 | GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind); |
| 16183 | |
| 16184 | // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static |
| 16185 | // base of the class that owns the method being compiled". If we're in this method, it means we're not |
| 16186 | // inlining and there's no ambiguity. |
| 16187 | return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF, |
| 16188 | gtNewArgList(ctxTree), &kind); |
| 16189 | } |
| 16190 | #endif |
| 16191 | |
| 16192 | // Collectible types requires that for shared generic code, if we use the generic context paramter |
| 16193 | // that we report it. (This is a conservative approach, we could detect some cases particularly when the |
| 16194 | // context parameter is this that we don't need the eager reporting logic.) |
| 16195 | lvaGenericsContextUseCount++; |
| 16196 | |
| 16197 | switch (kind.runtimeLookupKind) |
| 16198 | { |
| 16199 | case CORINFO_LOOKUP_THISOBJ: |
| 16200 | // This code takes a this pointer; but we need to pass the static method desc to get the right point in |
| 16201 | // the hierarchy |
| 16202 | { |
| 16203 | GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF); |
| 16204 | // Vtable pointer of this object |
| 16205 | vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree); |
| 16206 | vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception |
| 16207 | GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd); |
| 16208 | |
| 16209 | return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewArgList(vtTree, methodHnd)); |
| 16210 | } |
| 16211 | |
| 16212 | case CORINFO_LOOKUP_CLASSPARAM: |
| 16213 | { |
| 16214 | GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); |
| 16215 | return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewArgList(vtTree)); |
| 16216 | } |
| 16217 | |
| 16218 | case CORINFO_LOOKUP_METHODPARAM: |
| 16219 | { |
| 16220 | GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); |
| 16221 | return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, |
| 16222 | gtNewArgList(gtNewIconNode(0), methHndTree)); |
| 16223 | } |
| 16224 | } |
| 16225 | } |
| 16226 | |
| 16227 | noway_assert(!"Unknown LOOKUP_KIND" ); |
| 16228 | UNREACHABLE(); |
| 16229 | } |
| 16230 | |
| 16231 | #ifdef DEBUG |
| 16232 | /***************************************************************************** |
| 16233 | * |
| 16234 | * Tree walk callback to make sure no GT_QMARK nodes are present in the tree, |
| 16235 | * except for the allowed ? 1 : 0; pattern. |
| 16236 | */ |
| 16237 | Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data) |
| 16238 | { |
| 16239 | if ((*tree)->OperGet() == GT_QMARK) |
| 16240 | { |
| 16241 | fgCheckQmarkAllowedForm(*tree); |
| 16242 | } |
| 16243 | return WALK_CONTINUE; |
| 16244 | } |
| 16245 | |
| 16246 | void Compiler::fgCheckQmarkAllowedForm(GenTree* tree) |
| 16247 | { |
| 16248 | assert(tree->OperGet() == GT_QMARK); |
| 16249 | assert(!"Qmarks beyond morph disallowed." ); |
| 16250 | } |
| 16251 | |
| 16252 | /***************************************************************************** |
| 16253 | * |
| 16254 | * Verify that the importer has created GT_QMARK nodes in a way we can |
| 16255 | * process them. The following is allowed: |
| 16256 | * |
| 16257 | * 1. A top level qmark. Top level qmark is of the form: |
| 16258 | * a) (bool) ? (void) : (void) OR |
| 16259 | * b) V0N = (bool) ? (type) : (type) |
| 16260 | * |
| 16261 | * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child |
| 16262 | * of either op1 of colon or op2 of colon but not a child of any other |
| 16263 | * operator. |
| 16264 | */ |
| 16265 | void Compiler::fgPreExpandQmarkChecks(GenTree* expr) |
| 16266 | { |
| 16267 | GenTree* topQmark = fgGetTopLevelQmark(expr); |
| 16268 | |
| 16269 | // If the top level Qmark is null, then scan the tree to make sure |
| 16270 | // there are no qmarks within it. |
| 16271 | if (topQmark == nullptr) |
| 16272 | { |
| 16273 | fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); |
| 16274 | } |
| 16275 | else |
| 16276 | { |
| 16277 | // We could probably expand the cond node also, but don't think the extra effort is necessary, |
| 16278 | // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks. |
| 16279 | fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr); |
| 16280 | |
| 16281 | fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1); |
| 16282 | fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2); |
| 16283 | } |
| 16284 | } |
| 16285 | #endif // DEBUG |
| 16286 | |
| 16287 | /***************************************************************************** |
| 16288 | * |
| 16289 | * Get the top level GT_QMARK node in a given "expr", return NULL if such a |
| 16290 | * node is not present. If the top level GT_QMARK node is assigned to a |
| 16291 | * GT_LCL_VAR, then return the lcl node in ppDst. |
| 16292 | * |
| 16293 | */ |
| 16294 | GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */) |
| 16295 | { |
| 16296 | if (ppDst != nullptr) |
| 16297 | { |
| 16298 | *ppDst = nullptr; |
| 16299 | } |
| 16300 | |
| 16301 | GenTree* topQmark = nullptr; |
| 16302 | if (expr->gtOper == GT_QMARK) |
| 16303 | { |
| 16304 | topQmark = expr; |
| 16305 | } |
| 16306 | else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR) |
| 16307 | { |
| 16308 | topQmark = expr->gtOp.gtOp2; |
| 16309 | if (ppDst != nullptr) |
| 16310 | { |
| 16311 | *ppDst = expr->gtOp.gtOp1; |
| 16312 | } |
| 16313 | } |
| 16314 | return topQmark; |
| 16315 | } |
| 16316 | |
| 16317 | /********************************************************************************* |
| 16318 | * |
| 16319 | * For a castclass helper call, |
| 16320 | * Importer creates the following tree: |
| 16321 | * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper()); |
| 16322 | * |
| 16323 | * This method splits the qmark expression created by the importer into the |
| 16324 | * following blocks: (block, asg, cond1, cond2, helper, remainder) |
| 16325 | * Notice that op1 is the result for both the conditions. So we coalesce these |
| 16326 | * assignments into a single block instead of two blocks resulting a nested diamond. |
| 16327 | * |
| 16328 | * +---------->-----------+ |
| 16329 | * | | | |
| 16330 | * ^ ^ v |
| 16331 | * | | | |
| 16332 | * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder |
| 16333 | * |
| 16334 | * We expect to achieve the following codegen: |
| 16335 | * mov rsi, rdx tmp = op1 // asgBlock |
| 16336 | * test rsi, rsi goto skip if tmp == null ? // cond1Block |
| 16337 | * je SKIP |
| 16338 | * mov rcx, 0x76543210 cns = op2 // cond2Block |
| 16339 | * cmp qword ptr [rsi], rcx goto skip if *tmp == op2 |
| 16340 | * je SKIP |
| 16341 | * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock |
| 16342 | * mov rsi, rax |
| 16343 | * SKIP: // remainderBlock |
| 16344 | * tmp has the result. |
| 16345 | * |
| 16346 | */ |
| 16347 | void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTree* stmt) |
| 16348 | { |
| 16349 | #ifdef DEBUG |
| 16350 | if (verbose) |
| 16351 | { |
| 16352 | printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n" , block->bbNum); |
| 16353 | fgDispBasicBlocks(block, block, true); |
| 16354 | } |
| 16355 | #endif // DEBUG |
| 16356 | |
| 16357 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
| 16358 | |
| 16359 | GenTree* dst = nullptr; |
| 16360 | GenTree* qmark = fgGetTopLevelQmark(expr, &dst); |
| 16361 | noway_assert(dst != nullptr); |
| 16362 | |
| 16363 | assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF); |
| 16364 | |
| 16365 | // Get cond, true, false exprs for the qmark. |
| 16366 | GenTree* condExpr = qmark->gtGetOp1(); |
| 16367 | GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); |
| 16368 | GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); |
| 16369 | |
| 16370 | // Get cond, true, false exprs for the nested qmark. |
| 16371 | GenTree* nestedQmark = falseExpr; |
| 16372 | GenTree* cond2Expr; |
| 16373 | GenTree* true2Expr; |
| 16374 | GenTree* false2Expr; |
| 16375 | |
| 16376 | if (nestedQmark->gtOper == GT_QMARK) |
| 16377 | { |
| 16378 | cond2Expr = nestedQmark->gtGetOp1(); |
| 16379 | true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode(); |
| 16380 | false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode(); |
| 16381 | |
| 16382 | assert(cond2Expr->gtFlags & GTF_RELOP_QMARK); |
| 16383 | cond2Expr->gtFlags &= ~GTF_RELOP_QMARK; |
| 16384 | } |
| 16385 | else |
| 16386 | { |
| 16387 | // This is a rare case that arises when we are doing minopts and encounter isinst of null |
| 16388 | // gtFoldExpr was still is able to optimize away part of the tree (but not all). |
| 16389 | // That means it does not match our pattern. |
| 16390 | |
| 16391 | // Rather than write code to handle this case, just fake up some nodes to make it match the common |
| 16392 | // case. Synthesize a comparison that is always true, and for the result-on-true, use the |
| 16393 | // entire subtree we expected to be the nested question op. |
| 16394 | |
| 16395 | cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL)); |
| 16396 | true2Expr = nestedQmark; |
| 16397 | false2Expr = gtNewIconNode(0, TYP_I_IMPL); |
| 16398 | } |
| 16399 | assert(false2Expr->OperGet() == trueExpr->OperGet()); |
| 16400 | |
| 16401 | // Clear flags as they are now going to be part of JTRUE. |
| 16402 | assert(condExpr->gtFlags & GTF_RELOP_QMARK); |
| 16403 | condExpr->gtFlags &= ~GTF_RELOP_QMARK; |
| 16404 | |
| 16405 | // Create the chain of blocks. See method header comment. |
| 16406 | // The order of blocks after this is the following: |
| 16407 | // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock |
| 16408 | // |
| 16409 | // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', |
| 16410 | // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only |
| 16411 | // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely |
| 16412 | // remainderBlock will still be GC safe. |
| 16413 | unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; |
| 16414 | BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); |
| 16415 | fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. |
| 16416 | |
| 16417 | BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true); |
| 16418 | BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true); |
| 16419 | BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true); |
| 16420 | BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true); |
| 16421 | |
| 16422 | remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; |
| 16423 | |
| 16424 | // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). |
| 16425 | // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. |
| 16426 | if ((block->bbFlags & BBF_INTERNAL) == 0) |
| 16427 | { |
| 16428 | helperBlock->bbFlags &= ~BBF_INTERNAL; |
| 16429 | cond2Block->bbFlags &= ~BBF_INTERNAL; |
| 16430 | cond1Block->bbFlags &= ~BBF_INTERNAL; |
| 16431 | asgBlock->bbFlags &= ~BBF_INTERNAL; |
| 16432 | helperBlock->bbFlags |= BBF_IMPORTED; |
| 16433 | cond2Block->bbFlags |= BBF_IMPORTED; |
| 16434 | cond1Block->bbFlags |= BBF_IMPORTED; |
| 16435 | asgBlock->bbFlags |= BBF_IMPORTED; |
| 16436 | } |
| 16437 | |
| 16438 | // Chain the flow correctly. |
| 16439 | fgAddRefPred(asgBlock, block); |
| 16440 | fgAddRefPred(cond1Block, asgBlock); |
| 16441 | fgAddRefPred(cond2Block, cond1Block); |
| 16442 | fgAddRefPred(helperBlock, cond2Block); |
| 16443 | fgAddRefPred(remainderBlock, helperBlock); |
| 16444 | fgAddRefPred(remainderBlock, cond1Block); |
| 16445 | fgAddRefPred(remainderBlock, cond2Block); |
| 16446 | |
| 16447 | cond1Block->bbJumpDest = remainderBlock; |
| 16448 | cond2Block->bbJumpDest = remainderBlock; |
| 16449 | |
| 16450 | // Set the weights; some are guesses. |
| 16451 | asgBlock->inheritWeight(block); |
| 16452 | cond1Block->inheritWeight(block); |
| 16453 | cond2Block->inheritWeightPercentage(cond1Block, 50); |
| 16454 | helperBlock->inheritWeightPercentage(cond2Block, 50); |
| 16455 | |
| 16456 | // Append cond1 as JTRUE to cond1Block |
| 16457 | GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr); |
| 16458 | GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); |
| 16459 | fgInsertStmtAtEnd(cond1Block, jmpStmt); |
| 16460 | |
| 16461 | // Append cond2 as JTRUE to cond2Block |
| 16462 | jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr); |
| 16463 | jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); |
| 16464 | fgInsertStmtAtEnd(cond2Block, jmpStmt); |
| 16465 | |
| 16466 | // AsgBlock should get tmp = op1 assignment. |
| 16467 | trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr); |
| 16468 | GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx); |
| 16469 | fgInsertStmtAtEnd(asgBlock, trueStmt); |
| 16470 | |
| 16471 | // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper. |
| 16472 | gtReverseCond(cond2Expr); |
| 16473 | GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr); |
| 16474 | GenTree* helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx); |
| 16475 | fgInsertStmtAtEnd(helperBlock, helperStmt); |
| 16476 | |
| 16477 | // Finally remove the nested qmark stmt. |
| 16478 | fgRemoveStmt(block, stmt); |
| 16479 | |
| 16480 | #ifdef DEBUG |
| 16481 | if (verbose) |
| 16482 | { |
| 16483 | printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n" , block->bbNum); |
| 16484 | fgDispBasicBlocks(block, remainderBlock, true); |
| 16485 | } |
| 16486 | #endif // DEBUG |
| 16487 | } |
| 16488 | |
| 16489 | /***************************************************************************** |
| 16490 | * |
| 16491 | * Expand a statement with a top level qmark node. There are three cases, based |
| 16492 | * on whether the qmark has both "true" and "false" arms, or just one of them. |
| 16493 | * |
| 16494 | * S0; |
| 16495 | * C ? T : F; |
| 16496 | * S1; |
| 16497 | * |
| 16498 | * Generates ===> |
| 16499 | * |
| 16500 | * bbj_always |
| 16501 | * +---->------+ |
| 16502 | * false | | |
| 16503 | * S0 -->-- ~C -->-- T F -->-- S1 |
| 16504 | * | | |
| 16505 | * +--->--------+ |
| 16506 | * bbj_cond(true) |
| 16507 | * |
| 16508 | * ----------------------------------------- |
| 16509 | * |
| 16510 | * S0; |
| 16511 | * C ? T : NOP; |
| 16512 | * S1; |
| 16513 | * |
| 16514 | * Generates ===> |
| 16515 | * |
| 16516 | * false |
| 16517 | * S0 -->-- ~C -->-- T -->-- S1 |
| 16518 | * | | |
| 16519 | * +-->-------------+ |
| 16520 | * bbj_cond(true) |
| 16521 | * |
| 16522 | * ----------------------------------------- |
| 16523 | * |
| 16524 | * S0; |
| 16525 | * C ? NOP : F; |
| 16526 | * S1; |
| 16527 | * |
| 16528 | * Generates ===> |
| 16529 | * |
| 16530 | * false |
| 16531 | * S0 -->-- C -->-- F -->-- S1 |
| 16532 | * | | |
| 16533 | * +-->------------+ |
| 16534 | * bbj_cond(true) |
| 16535 | * |
| 16536 | * If the qmark assigns to a variable, then create tmps for "then" |
| 16537 | * and "else" results and assign the temp to the variable as a writeback step. |
| 16538 | */ |
| 16539 | void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTree* stmt) |
| 16540 | { |
| 16541 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
| 16542 | |
| 16543 | // Retrieve the Qmark node to be expanded. |
| 16544 | GenTree* dst = nullptr; |
| 16545 | GenTree* qmark = fgGetTopLevelQmark(expr, &dst); |
| 16546 | if (qmark == nullptr) |
| 16547 | { |
| 16548 | return; |
| 16549 | } |
| 16550 | |
| 16551 | if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF) |
| 16552 | { |
| 16553 | fgExpandQmarkForCastInstOf(block, stmt); |
| 16554 | return; |
| 16555 | } |
| 16556 | |
| 16557 | #ifdef DEBUG |
| 16558 | if (verbose) |
| 16559 | { |
| 16560 | printf("\nExpanding top-level qmark in " FMT_BB " (before)\n" , block->bbNum); |
| 16561 | fgDispBasicBlocks(block, block, true); |
| 16562 | } |
| 16563 | #endif // DEBUG |
| 16564 | |
| 16565 | // Retrieve the operands. |
| 16566 | GenTree* condExpr = qmark->gtGetOp1(); |
| 16567 | GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); |
| 16568 | GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); |
| 16569 | |
| 16570 | assert(condExpr->gtFlags & GTF_RELOP_QMARK); |
| 16571 | condExpr->gtFlags &= ~GTF_RELOP_QMARK; |
| 16572 | |
| 16573 | assert(!varTypeIsFloating(condExpr->TypeGet())); |
| 16574 | |
| 16575 | bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP); |
| 16576 | bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP); |
| 16577 | assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark! |
| 16578 | |
| 16579 | // Create remainder, cond and "else" blocks. After this, the blocks are in this order: |
| 16580 | // block ... condBlock ... elseBlock ... remainderBlock |
| 16581 | // |
| 16582 | // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', |
| 16583 | // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only |
| 16584 | // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely |
| 16585 | // remainderBlock will still be GC safe. |
| 16586 | unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; |
| 16587 | BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); |
| 16588 | fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. |
| 16589 | |
| 16590 | BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true); |
| 16591 | BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true); |
| 16592 | |
| 16593 | // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). |
| 16594 | // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. |
| 16595 | if ((block->bbFlags & BBF_INTERNAL) == 0) |
| 16596 | { |
| 16597 | condBlock->bbFlags &= ~BBF_INTERNAL; |
| 16598 | elseBlock->bbFlags &= ~BBF_INTERNAL; |
| 16599 | condBlock->bbFlags |= BBF_IMPORTED; |
| 16600 | elseBlock->bbFlags |= BBF_IMPORTED; |
| 16601 | } |
| 16602 | |
| 16603 | remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; |
| 16604 | |
| 16605 | condBlock->inheritWeight(block); |
| 16606 | |
| 16607 | fgAddRefPred(condBlock, block); |
| 16608 | fgAddRefPred(elseBlock, condBlock); |
| 16609 | fgAddRefPred(remainderBlock, elseBlock); |
| 16610 | |
| 16611 | BasicBlock* thenBlock = nullptr; |
| 16612 | if (hasTrueExpr && hasFalseExpr) |
| 16613 | { |
| 16614 | // bbj_always |
| 16615 | // +---->------+ |
| 16616 | // false | | |
| 16617 | // S0 -->-- ~C -->-- T F -->-- S1 |
| 16618 | // | | |
| 16619 | // +--->--------+ |
| 16620 | // bbj_cond(true) |
| 16621 | // |
| 16622 | gtReverseCond(condExpr); |
| 16623 | condBlock->bbJumpDest = elseBlock; |
| 16624 | |
| 16625 | thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true); |
| 16626 | thenBlock->bbJumpDest = remainderBlock; |
| 16627 | if ((block->bbFlags & BBF_INTERNAL) == 0) |
| 16628 | { |
| 16629 | thenBlock->bbFlags &= ~BBF_INTERNAL; |
| 16630 | thenBlock->bbFlags |= BBF_IMPORTED; |
| 16631 | } |
| 16632 | |
| 16633 | elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL); |
| 16634 | |
| 16635 | fgAddRefPred(thenBlock, condBlock); |
| 16636 | fgAddRefPred(remainderBlock, thenBlock); |
| 16637 | |
| 16638 | thenBlock->inheritWeightPercentage(condBlock, 50); |
| 16639 | elseBlock->inheritWeightPercentage(condBlock, 50); |
| 16640 | } |
| 16641 | else if (hasTrueExpr) |
| 16642 | { |
| 16643 | // false |
| 16644 | // S0 -->-- ~C -->-- T -->-- S1 |
| 16645 | // | | |
| 16646 | // +-->-------------+ |
| 16647 | // bbj_cond(true) |
| 16648 | // |
| 16649 | gtReverseCond(condExpr); |
| 16650 | condBlock->bbJumpDest = remainderBlock; |
| 16651 | fgAddRefPred(remainderBlock, condBlock); |
| 16652 | // Since we have no false expr, use the one we'd already created. |
| 16653 | thenBlock = elseBlock; |
| 16654 | elseBlock = nullptr; |
| 16655 | |
| 16656 | thenBlock->inheritWeightPercentage(condBlock, 50); |
| 16657 | } |
| 16658 | else if (hasFalseExpr) |
| 16659 | { |
| 16660 | // false |
| 16661 | // S0 -->-- C -->-- F -->-- S1 |
| 16662 | // | | |
| 16663 | // +-->------------+ |
| 16664 | // bbj_cond(true) |
| 16665 | // |
| 16666 | condBlock->bbJumpDest = remainderBlock; |
| 16667 | fgAddRefPred(remainderBlock, condBlock); |
| 16668 | |
| 16669 | elseBlock->inheritWeightPercentage(condBlock, 50); |
| 16670 | } |
| 16671 | |
| 16672 | GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1()); |
| 16673 | GenTree* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); |
| 16674 | fgInsertStmtAtEnd(condBlock, jmpStmt); |
| 16675 | |
| 16676 | // Remove the original qmark statement. |
| 16677 | fgRemoveStmt(block, stmt); |
| 16678 | |
| 16679 | // Since we have top level qmarks, we either have a dst for it in which case |
| 16680 | // we need to create tmps for true and falseExprs, else just don't bother |
| 16681 | // assigning. |
| 16682 | unsigned lclNum = BAD_VAR_NUM; |
| 16683 | if (dst != nullptr) |
| 16684 | { |
| 16685 | assert(dst->gtOper == GT_LCL_VAR); |
| 16686 | lclNum = dst->gtLclVar.gtLclNum; |
| 16687 | } |
| 16688 | else |
| 16689 | { |
| 16690 | assert(qmark->TypeGet() == TYP_VOID); |
| 16691 | } |
| 16692 | |
| 16693 | if (hasTrueExpr) |
| 16694 | { |
| 16695 | if (dst != nullptr) |
| 16696 | { |
| 16697 | trueExpr = gtNewTempAssign(lclNum, trueExpr); |
| 16698 | } |
| 16699 | GenTree* trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx); |
| 16700 | fgInsertStmtAtEnd(thenBlock, trueStmt); |
| 16701 | } |
| 16702 | |
| 16703 | // Assign the falseExpr into the dst or tmp, insert in elseBlock |
| 16704 | if (hasFalseExpr) |
| 16705 | { |
| 16706 | if (dst != nullptr) |
| 16707 | { |
| 16708 | falseExpr = gtNewTempAssign(lclNum, falseExpr); |
| 16709 | } |
| 16710 | GenTree* falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx); |
| 16711 | fgInsertStmtAtEnd(elseBlock, falseStmt); |
| 16712 | } |
| 16713 | |
| 16714 | #ifdef DEBUG |
| 16715 | if (verbose) |
| 16716 | { |
| 16717 | printf("\nExpanding top-level qmark in " FMT_BB " (after)\n" , block->bbNum); |
| 16718 | fgDispBasicBlocks(block, remainderBlock, true); |
| 16719 | } |
| 16720 | #endif // DEBUG |
| 16721 | } |
| 16722 | |
| 16723 | /***************************************************************************** |
| 16724 | * |
| 16725 | * Expand GT_QMARK nodes from the flow graph into basic blocks. |
| 16726 | * |
| 16727 | */ |
| 16728 | |
| 16729 | void Compiler::fgExpandQmarkNodes() |
| 16730 | { |
| 16731 | if (compQmarkUsed) |
| 16732 | { |
| 16733 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 16734 | { |
| 16735 | for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) |
| 16736 | { |
| 16737 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
| 16738 | #ifdef DEBUG |
| 16739 | fgPreExpandQmarkChecks(expr); |
| 16740 | #endif |
| 16741 | fgExpandQmarkStmt(block, stmt); |
| 16742 | } |
| 16743 | } |
| 16744 | #ifdef DEBUG |
| 16745 | fgPostExpandQmarkChecks(); |
| 16746 | #endif |
| 16747 | } |
| 16748 | compQmarkRationalized = true; |
| 16749 | } |
| 16750 | |
| 16751 | #ifdef DEBUG |
| 16752 | /***************************************************************************** |
| 16753 | * |
| 16754 | * Make sure we don't have any more GT_QMARK nodes. |
| 16755 | * |
| 16756 | */ |
| 16757 | void Compiler::fgPostExpandQmarkChecks() |
| 16758 | { |
| 16759 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 16760 | { |
| 16761 | for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) |
| 16762 | { |
| 16763 | GenTree* expr = stmt->gtStmt.gtStmtExpr; |
| 16764 | fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); |
| 16765 | } |
| 16766 | } |
| 16767 | } |
| 16768 | #endif |
| 16769 | |
| 16770 | /***************************************************************************** |
| 16771 | * |
| 16772 | * Transform all basic blocks for codegen. |
| 16773 | */ |
| 16774 | |
| 16775 | void Compiler::fgMorph() |
| 16776 | { |
| 16777 | noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here. |
| 16778 | |
| 16779 | fgOutgoingArgTemps = nullptr; |
| 16780 | |
| 16781 | #ifdef DEBUG |
| 16782 | if (verbose) |
| 16783 | { |
| 16784 | printf("*************** In fgMorph()\n" ); |
| 16785 | } |
| 16786 | if (verboseTrees) |
| 16787 | { |
| 16788 | fgDispBasicBlocks(true); |
| 16789 | } |
| 16790 | #endif // DEBUG |
| 16791 | |
| 16792 | // Insert call to class constructor as the first basic block if |
| 16793 | // we were asked to do so. |
| 16794 | if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */, |
| 16795 | impTokenLookupContextHandle /* context */) & |
| 16796 | CORINFO_INITCLASS_USE_HELPER) |
| 16797 | { |
| 16798 | fgEnsureFirstBBisScratch(); |
| 16799 | fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass()); |
| 16800 | } |
| 16801 | |
| 16802 | #ifdef DEBUG |
| 16803 | if (opts.compGcChecks) |
| 16804 | { |
| 16805 | for (unsigned i = 0; i < info.compArgsCount; i++) |
| 16806 | { |
| 16807 | if (lvaTable[i].TypeGet() == TYP_REF) |
| 16808 | { |
| 16809 | // confirm that the argument is a GC pointer (for debugging (GC stress)) |
| 16810 | GenTree* op = gtNewLclvNode(i, TYP_REF); |
| 16811 | GenTreeArgList* args = gtNewArgList(op); |
| 16812 | op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, args); |
| 16813 | |
| 16814 | fgEnsureFirstBBisScratch(); |
| 16815 | fgInsertStmtAtEnd(fgFirstBB, op); |
| 16816 | } |
| 16817 | } |
| 16818 | } |
| 16819 | #endif // DEBUG |
| 16820 | |
| 16821 | #if defined(DEBUG) && defined(_TARGET_XARCH_) |
| 16822 | if (opts.compStackCheckOnRet) |
| 16823 | { |
| 16824 | lvaReturnSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnSpCheck" )); |
| 16825 | lvaTable[lvaReturnSpCheck].lvType = TYP_I_IMPL; |
| 16826 | } |
| 16827 | #endif // defined(DEBUG) && defined(_TARGET_XARCH_) |
| 16828 | |
| 16829 | #if defined(DEBUG) && defined(_TARGET_X86_) |
| 16830 | if (opts.compStackCheckOnCall) |
| 16831 | { |
| 16832 | lvaCallSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallSpCheck" )); |
| 16833 | lvaTable[lvaCallSpCheck].lvType = TYP_I_IMPL; |
| 16834 | } |
| 16835 | #endif // defined(DEBUG) && defined(_TARGET_X86_) |
| 16836 | |
| 16837 | /* Filter out unimported BBs */ |
| 16838 | |
| 16839 | fgRemoveEmptyBlocks(); |
| 16840 | |
| 16841 | #ifdef DEBUG |
| 16842 | /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ |
| 16843 | fgDebugCheckBBlist(false, false); |
| 16844 | #endif // DEBUG |
| 16845 | |
| 16846 | EndPhase(PHASE_MORPH_INIT); |
| 16847 | |
| 16848 | /* Inline */ |
| 16849 | fgInline(); |
| 16850 | #if 0 |
| 16851 | JITDUMP("trees after inlining\n" ); |
| 16852 | DBEXEC(VERBOSE, fgDispBasicBlocks(true)); |
| 16853 | #endif |
| 16854 | |
| 16855 | RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time. |
| 16856 | |
| 16857 | EndPhase(PHASE_MORPH_INLINE); |
| 16858 | |
| 16859 | // Transform each GT_ALLOCOBJ node into either an allocation helper call or |
| 16860 | // local variable allocation on the stack. |
| 16861 | ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS |
| 16862 | |
| 16863 | // TODO-ObjectStackAllocation: Enable the optimization for architectures using |
| 16864 | // JIT32_GCENCODER (i.e., x86). |
| 16865 | #ifndef JIT32_GCENCODER |
| 16866 | if (JitConfig.JitObjectStackAllocation() && opts.OptimizationEnabled()) |
| 16867 | { |
| 16868 | objectAllocator.EnableObjectStackAllocation(); |
| 16869 | } |
| 16870 | #endif // JIT32_GCENCODER |
| 16871 | |
| 16872 | objectAllocator.Run(); |
| 16873 | |
| 16874 | /* Add any internal blocks/trees we may need */ |
| 16875 | |
| 16876 | fgAddInternal(); |
| 16877 | |
| 16878 | #ifdef DEBUG |
| 16879 | /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ |
| 16880 | fgDebugCheckBBlist(false, false); |
| 16881 | /* Inliner could clone some trees. */ |
| 16882 | fgDebugCheckNodesUniqueness(); |
| 16883 | #endif // DEBUG |
| 16884 | |
| 16885 | fgRemoveEmptyTry(); |
| 16886 | |
| 16887 | EndPhase(PHASE_EMPTY_TRY); |
| 16888 | |
| 16889 | fgRemoveEmptyFinally(); |
| 16890 | |
| 16891 | EndPhase(PHASE_EMPTY_FINALLY); |
| 16892 | |
| 16893 | fgMergeFinallyChains(); |
| 16894 | |
| 16895 | EndPhase(PHASE_MERGE_FINALLY_CHAINS); |
| 16896 | |
| 16897 | fgCloneFinally(); |
| 16898 | |
| 16899 | EndPhase(PHASE_CLONE_FINALLY); |
| 16900 | |
| 16901 | fgUpdateFinallyTargetFlags(); |
| 16902 | |
| 16903 | /* For x64 and ARM64 we need to mark irregular parameters */ |
| 16904 | |
| 16905 | lvaRefCountState = RCS_EARLY; |
| 16906 | fgMarkImplicitByRefArgs(); |
| 16907 | |
| 16908 | /* Promote struct locals if necessary */ |
| 16909 | fgPromoteStructs(); |
| 16910 | |
| 16911 | /* Now it is the time to figure out what locals have address-taken. */ |
| 16912 | fgMarkAddressExposedLocals(); |
| 16913 | |
| 16914 | EndPhase(PHASE_STR_ADRLCL); |
| 16915 | |
| 16916 | /* Apply the type update to implicit byref parameters; also choose (based on address-exposed |
| 16917 | analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */ |
| 16918 | fgRetypeImplicitByRefArgs(); |
| 16919 | |
| 16920 | #ifdef DEBUG |
| 16921 | /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */ |
| 16922 | lvaStressLclFld(); |
| 16923 | fgStress64RsltMul(); |
| 16924 | #endif // DEBUG |
| 16925 | |
| 16926 | EndPhase(PHASE_MORPH_IMPBYREF); |
| 16927 | |
| 16928 | /* Morph the trees in all the blocks of the method */ |
| 16929 | |
| 16930 | fgMorphBlocks(); |
| 16931 | |
| 16932 | /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */ |
| 16933 | fgMarkDemotedImplicitByRefArgs(); |
| 16934 | lvaRefCountState = RCS_INVALID; |
| 16935 | |
| 16936 | EndPhase(PHASE_MORPH_GLOBAL); |
| 16937 | |
| 16938 | #if 0 |
| 16939 | JITDUMP("trees after fgMorphBlocks\n" ); |
| 16940 | DBEXEC(VERBOSE, fgDispBasicBlocks(true)); |
| 16941 | #endif |
| 16942 | |
| 16943 | #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) |
| 16944 | if (fgNeedToAddFinallyTargetBits) |
| 16945 | { |
| 16946 | // We previously wiped out the BBF_FINALLY_TARGET bits due to some morphing; add them back. |
| 16947 | fgAddFinallyTargetFlags(); |
| 16948 | fgNeedToAddFinallyTargetBits = false; |
| 16949 | } |
| 16950 | #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) |
| 16951 | |
| 16952 | /* Decide the kind of code we want to generate */ |
| 16953 | |
| 16954 | fgSetOptions(); |
| 16955 | |
| 16956 | fgExpandQmarkNodes(); |
| 16957 | |
| 16958 | #ifdef DEBUG |
| 16959 | compCurBB = nullptr; |
| 16960 | #endif // DEBUG |
| 16961 | } |
| 16962 | |
| 16963 | /***************************************************************************** |
| 16964 | * |
| 16965 | * Promoting struct locals |
| 16966 | */ |
| 16967 | void Compiler::fgPromoteStructs() |
| 16968 | { |
| 16969 | #ifdef DEBUG |
| 16970 | if (verbose) |
| 16971 | { |
| 16972 | printf("*************** In fgPromoteStructs()\n" ); |
| 16973 | } |
| 16974 | #endif // DEBUG |
| 16975 | |
| 16976 | if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) |
| 16977 | { |
| 16978 | JITDUMP(" promotion opt flag not enabled\n" ); |
| 16979 | return; |
| 16980 | } |
| 16981 | |
| 16982 | if (fgNoStructPromotion) |
| 16983 | { |
| 16984 | JITDUMP(" promotion disabled by JitNoStructPromotion\n" ); |
| 16985 | return; |
| 16986 | } |
| 16987 | |
| 16988 | #if 0 |
| 16989 | // The code in this #if has been useful in debugging struct promotion issues, by |
| 16990 | // enabling selective enablement of the struct promotion optimization according to |
| 16991 | // method hash. |
| 16992 | #ifdef DEBUG |
| 16993 | unsigned methHash = info.compMethodHash(); |
| 16994 | char* lostr = getenv("structpromohashlo" ); |
| 16995 | unsigned methHashLo = 0; |
| 16996 | if (lostr != NULL) |
| 16997 | { |
| 16998 | sscanf_s(lostr, "%x" , &methHashLo); |
| 16999 | } |
| 17000 | char* histr = getenv("structpromohashhi" ); |
| 17001 | unsigned methHashHi = UINT32_MAX; |
| 17002 | if (histr != NULL) |
| 17003 | { |
| 17004 | sscanf_s(histr, "%x" , &methHashHi); |
| 17005 | } |
| 17006 | if (methHash < methHashLo || methHash > methHashHi) |
| 17007 | { |
| 17008 | return; |
| 17009 | } |
| 17010 | else |
| 17011 | { |
| 17012 | printf("Promoting structs for method %s, hash = 0x%x.\n" , |
| 17013 | info.compFullName, info.compMethodHash()); |
| 17014 | printf("" ); // in our logic this causes a flush |
| 17015 | } |
| 17016 | #endif // DEBUG |
| 17017 | #endif // 0 |
| 17018 | |
| 17019 | if (info.compIsVarArgs) |
| 17020 | { |
| 17021 | JITDUMP(" promotion disabled because of varargs\n" ); |
| 17022 | return; |
| 17023 | } |
| 17024 | |
| 17025 | #ifdef DEBUG |
| 17026 | if (verbose) |
| 17027 | { |
| 17028 | printf("\nlvaTable before fgPromoteStructs\n" ); |
| 17029 | lvaTableDump(); |
| 17030 | } |
| 17031 | #endif // DEBUG |
| 17032 | |
| 17033 | // The lvaTable might grow as we grab temps. Make a local copy here. |
| 17034 | unsigned startLvaCount = lvaCount; |
| 17035 | |
| 17036 | // |
| 17037 | // Loop through the original lvaTable. Looking for struct locals to be promoted. |
| 17038 | // |
| 17039 | lvaStructPromotionInfo structPromotionInfo; |
| 17040 | bool tooManyLocalsReported = false; |
| 17041 | |
| 17042 | for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++) |
| 17043 | { |
| 17044 | // Whether this var got promoted |
| 17045 | bool promotedVar = false; |
| 17046 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 17047 | |
| 17048 | // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote |
| 17049 | // its fields. Instead, we will attempt to enregister the entire struct. |
| 17050 | if (varDsc->lvIsSIMDType() && (varDsc->lvIsUsedInSIMDIntrinsic() || isOpaqueSIMDLclVar(varDsc))) |
| 17051 | { |
| 17052 | varDsc->lvRegStruct = true; |
| 17053 | } |
| 17054 | // Don't promote if we have reached the tracking limit. |
| 17055 | else if (lvaHaveManyLocals()) |
| 17056 | { |
| 17057 | // Print the message first time when we detected this condition |
| 17058 | if (!tooManyLocalsReported) |
| 17059 | { |
| 17060 | JITDUMP("Stopped promoting struct fields, due to too many locals.\n" ); |
| 17061 | } |
| 17062 | tooManyLocalsReported = true; |
| 17063 | } |
| 17064 | else if (varTypeIsStruct(varDsc)) |
| 17065 | { |
| 17066 | assert(structPromotionHelper != nullptr); |
| 17067 | promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum); |
| 17068 | } |
| 17069 | |
| 17070 | if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed) |
| 17071 | { |
| 17072 | // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, |
| 17073 | // we will treat it as a reg struct. |
| 17074 | varDsc->lvRegStruct = true; |
| 17075 | } |
| 17076 | } |
| 17077 | |
| 17078 | #ifdef _TARGET_ARM_ |
| 17079 | if (structPromotionHelper->GetRequiresScratchVar()) |
| 17080 | { |
| 17081 | // Ensure that the scratch variable is allocated, in case we |
| 17082 | // pass a promoted struct as an argument. |
| 17083 | if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) |
| 17084 | { |
| 17085 | lvaPromotedStructAssemblyScratchVar = |
| 17086 | lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var." )); |
| 17087 | lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; |
| 17088 | } |
| 17089 | } |
| 17090 | #endif // _TARGET_ARM_ |
| 17091 | |
| 17092 | #ifdef DEBUG |
| 17093 | if (verbose) |
| 17094 | { |
| 17095 | printf("\nlvaTable after fgPromoteStructs\n" ); |
| 17096 | lvaTableDump(); |
| 17097 | } |
| 17098 | #endif // DEBUG |
| 17099 | } |
| 17100 | |
| 17101 | void Compiler::fgMorphStructField(GenTree* tree, GenTree* parent) |
| 17102 | { |
| 17103 | noway_assert(tree->OperGet() == GT_FIELD); |
| 17104 | |
| 17105 | GenTreeField* field = tree->AsField(); |
| 17106 | GenTree* objRef = field->gtFldObj; |
| 17107 | GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr; |
| 17108 | noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))); |
| 17109 | |
| 17110 | /* Is this an instance data member? */ |
| 17111 | |
| 17112 | if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)) |
| 17113 | { |
| 17114 | unsigned lclNum = obj->gtLclVarCommon.gtLclNum; |
| 17115 | const LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 17116 | |
| 17117 | if (varTypeIsStruct(obj)) |
| 17118 | { |
| 17119 | if (varDsc->lvPromoted) |
| 17120 | { |
| 17121 | // Promoted struct |
| 17122 | unsigned fldOffset = field->gtFldOffset; |
| 17123 | unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); |
| 17124 | |
| 17125 | if (fieldLclIndex == BAD_VAR_NUM) |
| 17126 | { |
| 17127 | // Access a promoted struct's field with an offset that doesn't correspond to any field. |
| 17128 | // It can happen if the struct was cast to another struct with different offsets. |
| 17129 | return; |
| 17130 | } |
| 17131 | |
| 17132 | const LclVarDsc* fieldDsc = &lvaTable[fieldLclIndex]; |
| 17133 | var_types fieldType = fieldDsc->TypeGet(); |
| 17134 | |
| 17135 | assert(fieldType != TYP_STRUCT); // promoted LCL_VAR can't have a struct type. |
| 17136 | if (tree->TypeGet() != fieldType) |
| 17137 | { |
| 17138 | if (tree->TypeGet() != TYP_STRUCT) |
| 17139 | { |
| 17140 | // This is going to be an incorrect instruction promotion. |
| 17141 | // For example when we try to read int as long. |
| 17142 | return; |
| 17143 | } |
| 17144 | |
| 17145 | if (field->gtFldHnd != fieldDsc->lvFieldHnd) |
| 17146 | { |
| 17147 | CORINFO_CLASS_HANDLE fieldTreeClass = nullptr, fieldDscClass = nullptr; |
| 17148 | |
| 17149 | CorInfoType fieldTreeType = info.compCompHnd->getFieldType(field->gtFldHnd, &fieldTreeClass); |
| 17150 | CorInfoType fieldDscType = info.compCompHnd->getFieldType(fieldDsc->lvFieldHnd, &fieldDscClass); |
| 17151 | if (fieldTreeType != fieldDscType || fieldTreeClass != fieldDscClass) |
| 17152 | { |
| 17153 | // Access the promoted field with a different class handle, can't check that types match. |
| 17154 | return; |
| 17155 | } |
| 17156 | // Access the promoted field as a field of a non-promoted struct with the same class handle. |
| 17157 | } |
| 17158 | #ifdef DEBUG |
| 17159 | else if (tree->TypeGet() == TYP_STRUCT) |
| 17160 | { |
| 17161 | // The field tree accesses it as a struct, but the promoted lcl var for the field |
| 17162 | // says that it has another type. It can happen only if struct promotion faked |
| 17163 | // field type for a struct of single field of scalar type aligned at their natural boundary. |
| 17164 | assert(structPromotionHelper != nullptr); |
| 17165 | structPromotionHelper->CheckRetypedAsScalar(field->gtFldHnd, fieldType); |
| 17166 | } |
| 17167 | #endif // DEBUG |
| 17168 | } |
| 17169 | |
| 17170 | tree->SetOper(GT_LCL_VAR); |
| 17171 | tree->gtLclVarCommon.SetLclNum(fieldLclIndex); |
| 17172 | tree->gtType = fieldType; |
| 17173 | tree->gtFlags &= GTF_NODE_MASK; |
| 17174 | tree->gtFlags &= ~GTF_GLOB_REF; |
| 17175 | |
| 17176 | if (parent->gtOper == GT_ASG) |
| 17177 | { |
| 17178 | if (parent->gtOp.gtOp1 == tree) |
| 17179 | { |
| 17180 | tree->gtFlags |= GTF_VAR_DEF; |
| 17181 | tree->gtFlags |= GTF_DONT_CSE; |
| 17182 | } |
| 17183 | |
| 17184 | // Promotion of struct containing struct fields where the field |
| 17185 | // is a struct with a single pointer sized scalar type field: in |
| 17186 | // this case struct promotion uses the type of the underlying |
| 17187 | // scalar field as the type of struct field instead of recursively |
| 17188 | // promoting. This can lead to a case where we have a block-asgn |
| 17189 | // with its RHS replaced with a scalar type. Mark RHS value as |
| 17190 | // DONT_CSE so that assertion prop will not do const propagation. |
| 17191 | // The reason this is required is that if RHS of a block-asg is a |
| 17192 | // constant, then it is interpreted as init-block incorrectly. |
| 17193 | // |
| 17194 | // TODO - This can also be avoided if we implement recursive struct |
| 17195 | // promotion, tracked by #10019. |
| 17196 | if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree)) |
| 17197 | { |
| 17198 | tree->gtFlags |= GTF_DONT_CSE; |
| 17199 | } |
| 17200 | } |
| 17201 | #ifdef DEBUG |
| 17202 | if (verbose) |
| 17203 | { |
| 17204 | printf("Replacing the field in promoted struct with local var V%02u\n" , fieldLclIndex); |
| 17205 | } |
| 17206 | #endif // DEBUG |
| 17207 | } |
| 17208 | } |
| 17209 | else |
| 17210 | { |
| 17211 | // Normed struct |
| 17212 | // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if |
| 17213 | // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 |
| 17214 | // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However, |
| 17215 | // there is one extremely rare case where that won't be true. An enum type is a special value type |
| 17216 | // that contains exactly one element of a primitive integer type (that, for CLS programs is named |
| 17217 | // "value__"). The VM tells us that a local var of that enum type is the primitive type of the |
| 17218 | // enum's single field. It turns out that it is legal for IL to access this field using ldflda or |
| 17219 | // ldfld. For example: |
| 17220 | // |
| 17221 | // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum |
| 17222 | // { |
| 17223 | // .field public specialname rtspecialname int16 value__ |
| 17224 | // .field public static literal valuetype mynamespace.e_t one = int16(0x0000) |
| 17225 | // } |
| 17226 | // .method public hidebysig static void Main() cil managed |
| 17227 | // { |
| 17228 | // .locals init (valuetype mynamespace.e_t V_0) |
| 17229 | // ... |
| 17230 | // ldloca.s V_0 |
| 17231 | // ldflda int16 mynamespace.e_t::value__ |
| 17232 | // ... |
| 17233 | // } |
| 17234 | // |
| 17235 | // Normally, compilers will not generate the ldflda, since it is superfluous. |
| 17236 | // |
| 17237 | // In the example, the lclVar is short, but the JIT promotes all trees using this local to the |
| 17238 | // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type |
| 17239 | // mismatch like this, don't do this morphing. The local var may end up getting marked as |
| 17240 | // address taken, and the appropriate SHORT load will be done from memory in that case. |
| 17241 | |
| 17242 | if (tree->TypeGet() == obj->TypeGet()) |
| 17243 | { |
| 17244 | tree->ChangeOper(GT_LCL_VAR); |
| 17245 | tree->gtLclVarCommon.SetLclNum(lclNum); |
| 17246 | tree->gtFlags &= GTF_NODE_MASK; |
| 17247 | |
| 17248 | if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) |
| 17249 | { |
| 17250 | tree->gtFlags |= GTF_VAR_DEF; |
| 17251 | tree->gtFlags |= GTF_DONT_CSE; |
| 17252 | } |
| 17253 | #ifdef DEBUG |
| 17254 | if (verbose) |
| 17255 | { |
| 17256 | printf("Replacing the field in normed struct with local var V%02u\n" , lclNum); |
| 17257 | } |
| 17258 | #endif // DEBUG |
| 17259 | } |
| 17260 | } |
| 17261 | } |
| 17262 | } |
| 17263 | |
| 17264 | void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) |
| 17265 | { |
| 17266 | noway_assert(tree->OperGet() == GT_LCL_FLD); |
| 17267 | |
| 17268 | unsigned lclNum = tree->gtLclFld.gtLclNum; |
| 17269 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 17270 | |
| 17271 | if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted)) |
| 17272 | { |
| 17273 | // Promoted struct |
| 17274 | unsigned fldOffset = tree->gtLclFld.gtLclOffs; |
| 17275 | unsigned fieldLclIndex = 0; |
| 17276 | LclVarDsc* fldVarDsc = nullptr; |
| 17277 | |
| 17278 | if (fldOffset != BAD_VAR_NUM) |
| 17279 | { |
| 17280 | fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); |
| 17281 | noway_assert(fieldLclIndex != BAD_VAR_NUM); |
| 17282 | fldVarDsc = &lvaTable[fieldLclIndex]; |
| 17283 | } |
| 17284 | |
| 17285 | if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType) |
| 17286 | #ifdef _TARGET_X86_ |
| 17287 | && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType) |
| 17288 | #endif |
| 17289 | ) |
| 17290 | { |
| 17291 | // There is an existing sub-field we can use. |
| 17292 | tree->gtLclFld.SetLclNum(fieldLclIndex); |
| 17293 | |
| 17294 | // The field must be an enregisterable type; otherwise it would not be a promoted field. |
| 17295 | // The tree type may not match, e.g. for return types that have been morphed, but both |
| 17296 | // must be enregisterable types. |
| 17297 | // TODO-Cleanup: varTypeCanReg should presumably return true for SIMD types, but |
| 17298 | // there may be places where that would violate existing assumptions. |
| 17299 | var_types treeType = tree->TypeGet(); |
| 17300 | var_types fieldType = fldVarDsc->TypeGet(); |
| 17301 | assert((varTypeCanReg(treeType) || varTypeIsSIMD(treeType)) && |
| 17302 | (varTypeCanReg(fieldType) || varTypeIsSIMD(fieldType))); |
| 17303 | |
| 17304 | tree->ChangeOper(GT_LCL_VAR); |
| 17305 | assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex); |
| 17306 | tree->gtType = fldVarDsc->TypeGet(); |
| 17307 | #ifdef DEBUG |
| 17308 | if (verbose) |
| 17309 | { |
| 17310 | printf("Replacing the GT_LCL_FLD in promoted struct with local var V%02u\n" , fieldLclIndex); |
| 17311 | } |
| 17312 | #endif // DEBUG |
| 17313 | |
| 17314 | if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) |
| 17315 | { |
| 17316 | tree->gtFlags |= GTF_VAR_DEF; |
| 17317 | tree->gtFlags |= GTF_DONT_CSE; |
| 17318 | } |
| 17319 | } |
| 17320 | else |
| 17321 | { |
| 17322 | // There is no existing field that has all the parts that we need |
| 17323 | // So we must ensure that the struct lives in memory. |
| 17324 | lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); |
| 17325 | |
| 17326 | #ifdef DEBUG |
| 17327 | // We can't convert this guy to a float because he really does have his |
| 17328 | // address taken.. |
| 17329 | varDsc->lvKeepType = 1; |
| 17330 | #endif // DEBUG |
| 17331 | } |
| 17332 | } |
| 17333 | } |
| 17334 | |
| 17335 | //------------------------------------------------------------------------ |
| 17336 | // fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference"; |
| 17337 | // i.e. which the ABI requires to be passed by making a copy in the caller and |
| 17338 | // passing its address to the callee. Mark their `LclVarDsc`s such that |
| 17339 | // `lvaIsImplicitByRefLocal` will return true for them. |
| 17340 | |
| 17341 | void Compiler::fgMarkImplicitByRefArgs() |
| 17342 | { |
| 17343 | #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
| 17344 | #ifdef DEBUG |
| 17345 | if (verbose) |
| 17346 | { |
| 17347 | printf("\n*************** In fgMarkImplicitByRefs()\n" ); |
| 17348 | } |
| 17349 | #endif // DEBUG |
| 17350 | |
| 17351 | for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) |
| 17352 | { |
| 17353 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 17354 | |
| 17355 | if (varDsc->lvIsParam && varTypeIsStruct(varDsc)) |
| 17356 | { |
| 17357 | size_t size; |
| 17358 | |
| 17359 | if (varDsc->lvSize() > REGSIZE_BYTES) |
| 17360 | { |
| 17361 | size = varDsc->lvSize(); |
| 17362 | } |
| 17363 | else |
| 17364 | { |
| 17365 | CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); |
| 17366 | size = info.compCompHnd->getClassSize(typeHnd); |
| 17367 | } |
| 17368 | |
| 17369 | #if defined(_TARGET_AMD64_) |
| 17370 | if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) |
| 17371 | #elif defined(_TARGET_ARM64_) |
| 17372 | if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc, this->info.compIsVarArgs)) |
| 17373 | #endif |
| 17374 | { |
| 17375 | // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local |
| 17376 | // So I am now using it to indicate that this is one of the weird implicit |
| 17377 | // by ref locals. |
| 17378 | // The address taken cleanup will look for references to locals marked like |
| 17379 | // this, and transform them appropriately. |
| 17380 | varDsc->lvIsTemp = 1; |
| 17381 | |
| 17382 | // Clear the ref count field; fgMarkAddressTakenLocals will increment it per |
| 17383 | // appearance of implicit-by-ref param so that call arg morphing can do an |
| 17384 | // optimization for single-use implicit-by-ref params whose single use is as |
| 17385 | // an outgoing call argument. |
| 17386 | varDsc->setLvRefCnt(0, RCS_EARLY); |
| 17387 | } |
| 17388 | } |
| 17389 | } |
| 17390 | |
| 17391 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
| 17392 | } |
| 17393 | |
| 17394 | //------------------------------------------------------------------------ |
| 17395 | // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from |
| 17396 | // struct to pointer). Also choose (based on address-exposed analysis) |
| 17397 | // which struct promotions of implicit byrefs to keep or discard. |
| 17398 | // For those which are kept, insert the appropriate initialization code. |
| 17399 | // For those which are to be discarded, annotate the promoted field locals |
| 17400 | // so that fgMorphImplicitByRefArgs will know to rewrite their appearances |
| 17401 | // using indirections off the pointer parameters. |
| 17402 | |
| 17403 | void Compiler::fgRetypeImplicitByRefArgs() |
| 17404 | { |
| 17405 | #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
| 17406 | #ifdef DEBUG |
| 17407 | if (verbose) |
| 17408 | { |
| 17409 | printf("\n*************** In fgRetypeImplicitByRefArgs()\n" ); |
| 17410 | } |
| 17411 | #endif // DEBUG |
| 17412 | |
| 17413 | for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) |
| 17414 | { |
| 17415 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 17416 | |
| 17417 | if (lvaIsImplicitByRefLocal(lclNum)) |
| 17418 | { |
| 17419 | size_t size; |
| 17420 | |
| 17421 | if (varDsc->lvSize() > REGSIZE_BYTES) |
| 17422 | { |
| 17423 | size = varDsc->lvSize(); |
| 17424 | } |
| 17425 | else |
| 17426 | { |
| 17427 | CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); |
| 17428 | size = info.compCompHnd->getClassSize(typeHnd); |
| 17429 | } |
| 17430 | |
| 17431 | if (varDsc->lvPromoted) |
| 17432 | { |
| 17433 | // This implicit-by-ref was promoted; create a new temp to represent the |
| 17434 | // promoted struct before rewriting this parameter as a pointer. |
| 17435 | unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref" )); |
| 17436 | lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true); |
| 17437 | if (info.compIsVarArgs) |
| 17438 | { |
| 17439 | lvaSetStructUsedAsVarArg(newLclNum); |
| 17440 | } |
| 17441 | |
| 17442 | // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array. |
| 17443 | varDsc = &lvaTable[lclNum]; |
| 17444 | |
| 17445 | // Copy the struct promotion annotations to the new temp. |
| 17446 | LclVarDsc* newVarDsc = &lvaTable[newLclNum]; |
| 17447 | newVarDsc->lvPromoted = true; |
| 17448 | newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart; |
| 17449 | newVarDsc->lvFieldCnt = varDsc->lvFieldCnt; |
| 17450 | newVarDsc->lvContainsHoles = varDsc->lvContainsHoles; |
| 17451 | newVarDsc->lvCustomLayout = varDsc->lvCustomLayout; |
| 17452 | #ifdef DEBUG |
| 17453 | newVarDsc->lvKeepType = true; |
| 17454 | #endif // DEBUG |
| 17455 | |
| 17456 | // Propagate address-taken-ness and do-not-enregister-ness. |
| 17457 | newVarDsc->lvAddrExposed = varDsc->lvAddrExposed; |
| 17458 | newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister; |
| 17459 | #ifdef DEBUG |
| 17460 | newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr; |
| 17461 | newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr; |
| 17462 | newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr; |
| 17463 | newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr; |
| 17464 | newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall; |
| 17465 | #endif // DEBUG |
| 17466 | |
| 17467 | // If the promotion is dependent, the promoted temp would just be committed |
| 17468 | // to memory anyway, so we'll rewrite its appearances to be indirections |
| 17469 | // through the pointer parameter, the same as we'd do for this |
| 17470 | // parameter if it weren't promoted at all (otherwise the initialization |
| 17471 | // of the new temp would just be a needless memcpy at method entry). |
| 17472 | bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || |
| 17473 | (varDsc->lvRefCnt(RCS_EARLY) <= varDsc->lvFieldCnt); |
| 17474 | |
| 17475 | if (!undoPromotion) |
| 17476 | { |
| 17477 | // Insert IR that initializes the temp from the parameter. |
| 17478 | // LHS is a simple reference to the temp. |
| 17479 | fgEnsureFirstBBisScratch(); |
| 17480 | GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType); |
| 17481 | // RHS is an indirection (using GT_OBJ) off the parameter. |
| 17482 | GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF); |
| 17483 | GenTree* rhs = gtNewBlockVal(addr, (unsigned)size); |
| 17484 | GenTree* assign = gtNewAssignNode(lhs, rhs); |
| 17485 | fgInsertStmtAtBeg(fgFirstBB, assign); |
| 17486 | } |
| 17487 | |
| 17488 | // Update the locals corresponding to the promoted fields. |
| 17489 | unsigned fieldLclStart = varDsc->lvFieldLclStart; |
| 17490 | unsigned fieldCount = varDsc->lvFieldCnt; |
| 17491 | unsigned fieldLclStop = fieldLclStart + fieldCount; |
| 17492 | |
| 17493 | for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) |
| 17494 | { |
| 17495 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
| 17496 | |
| 17497 | if (undoPromotion) |
| 17498 | { |
| 17499 | // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs |
| 17500 | // will know to rewrite appearances of this local. |
| 17501 | assert(fieldVarDsc->lvParentLcl == lclNum); |
| 17502 | } |
| 17503 | else |
| 17504 | { |
| 17505 | // Set the new parent. |
| 17506 | fieldVarDsc->lvParentLcl = newLclNum; |
| 17507 | // Clear the ref count field; it is used to communicate the nubmer of references |
| 17508 | // to the implicit byref parameter when morphing calls that pass the implicit byref |
| 17509 | // out as an outgoing argument value, but that doesn't pertain to this field local |
| 17510 | // which is now a field of a non-arg local. |
| 17511 | fieldVarDsc->setLvRefCnt(0, RCS_EARLY); |
| 17512 | } |
| 17513 | |
| 17514 | fieldVarDsc->lvIsParam = false; |
| 17515 | // The fields shouldn't inherit any register preferences from |
| 17516 | // the parameter which is really a pointer to the struct. |
| 17517 | fieldVarDsc->lvIsRegArg = false; |
| 17518 | fieldVarDsc->lvIsMultiRegArg = false; |
| 17519 | fieldVarDsc->lvSetIsHfaRegArg(false); |
| 17520 | fieldVarDsc->lvArgReg = REG_NA; |
| 17521 | #if FEATURE_MULTIREG_ARGS |
| 17522 | fieldVarDsc->lvOtherArgReg = REG_NA; |
| 17523 | #endif |
| 17524 | } |
| 17525 | |
| 17526 | // Hijack lvFieldLclStart to record the new temp number. |
| 17527 | // It will get fixed up in fgMarkDemotedImplicitByRefArgs. |
| 17528 | varDsc->lvFieldLclStart = newLclNum; |
| 17529 | // Go ahead and clear lvFieldCnt -- either we're promoting |
| 17530 | // a replacement temp or we're not promoting this arg, and |
| 17531 | // in either case the parameter is now a pointer that doesn't |
| 17532 | // have these fields. |
| 17533 | varDsc->lvFieldCnt = 0; |
| 17534 | |
| 17535 | // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs |
| 17536 | // whether references to the struct should be rewritten as |
| 17537 | // indirections off the pointer (not promoted) or references |
| 17538 | // to the new struct local (promoted). |
| 17539 | varDsc->lvPromoted = !undoPromotion; |
| 17540 | } |
| 17541 | else |
| 17542 | { |
| 17543 | // The "undo promotion" path above clears lvPromoted for args that struct |
| 17544 | // promotion wanted to promote but that aren't considered profitable to |
| 17545 | // rewrite. It hijacks lvFieldLclStart to communicate to |
| 17546 | // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left |
| 17547 | // on such args for fgMorphImplicitByRefArgs to consult in the interim. |
| 17548 | // Here we have an arg that was simply never promoted, so make sure it doesn't |
| 17549 | // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs |
| 17550 | // and fgMarkDemotedImplicitByRefArgs. |
| 17551 | assert(varDsc->lvFieldLclStart == 0); |
| 17552 | } |
| 17553 | |
| 17554 | // Since the parameter in this position is really a pointer, its type is TYP_BYREF. |
| 17555 | varDsc->lvType = TYP_BYREF; |
| 17556 | |
| 17557 | // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF |
| 17558 | // make sure that the following flag is not set as these will force SSA to |
| 17559 | // exclude tracking/enregistering these LclVars. (see SsaBuilder::IncludeInSsa) |
| 17560 | // |
| 17561 | varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it. |
| 17562 | |
| 17563 | // The struct parameter may have had its address taken, but the pointer parameter |
| 17564 | // cannot -- any uses of the struct parameter's address are uses of the pointer |
| 17565 | // parameter's value, and there's no way for the MSIL to reference the pointer |
| 17566 | // parameter's address. So clear the address-taken bit for the parameter. |
| 17567 | varDsc->lvAddrExposed = 0; |
| 17568 | varDsc->lvDoNotEnregister = 0; |
| 17569 | |
| 17570 | #ifdef DEBUG |
| 17571 | // This should not be converted to a double in stress mode, |
| 17572 | // because it is really a pointer |
| 17573 | varDsc->lvKeepType = 1; |
| 17574 | |
| 17575 | if (verbose) |
| 17576 | { |
| 17577 | printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n" , lclNum); |
| 17578 | } |
| 17579 | #endif // DEBUG |
| 17580 | } |
| 17581 | } |
| 17582 | |
| 17583 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
| 17584 | } |
| 17585 | |
| 17586 | //------------------------------------------------------------------------ |
| 17587 | // fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion |
| 17588 | // asked to promote. Appearances of these have now been rewritten |
| 17589 | // (by fgMorphImplicitByRefArgs) using indirections from the pointer |
| 17590 | // parameter or references to the promotion temp, as appropriate. |
| 17591 | |
| 17592 | void Compiler::fgMarkDemotedImplicitByRefArgs() |
| 17593 | { |
| 17594 | #if (defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) |
| 17595 | |
| 17596 | for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) |
| 17597 | { |
| 17598 | LclVarDsc* varDsc = &lvaTable[lclNum]; |
| 17599 | |
| 17600 | if (lvaIsImplicitByRefLocal(lclNum)) |
| 17601 | { |
| 17602 | if (varDsc->lvPromoted) |
| 17603 | { |
| 17604 | // The parameter is simply a pointer now, so clear lvPromoted. It was left set |
| 17605 | // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that |
| 17606 | // appearances of this arg needed to be rewritten to a new promoted struct local. |
| 17607 | varDsc->lvPromoted = false; |
| 17608 | |
| 17609 | // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs |
| 17610 | // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one. |
| 17611 | varDsc->lvFieldLclStart = 0; |
| 17612 | } |
| 17613 | else if (varDsc->lvFieldLclStart != 0) |
| 17614 | { |
| 17615 | // We created new temps to represent a promoted struct corresponding to this |
| 17616 | // parameter, but decided not to go through with the promotion and have |
| 17617 | // rewritten all uses as indirections off the pointer parameter. |
| 17618 | // We stashed the pointer to the new struct temp in lvFieldLclStart; make |
| 17619 | // note of that and clear the annotation. |
| 17620 | unsigned structLclNum = varDsc->lvFieldLclStart; |
| 17621 | varDsc->lvFieldLclStart = 0; |
| 17622 | |
| 17623 | // Clear the arg's ref count; this was set during address-taken analysis so that |
| 17624 | // call morphing could identify single-use implicit byrefs; we're done with |
| 17625 | // that, and want it to be in its default state of zero when we go to set |
| 17626 | // real ref counts for all variables. |
| 17627 | varDsc->setLvRefCnt(0, RCS_EARLY); |
| 17628 | |
| 17629 | // The temp struct is now unused; set flags appropriately so that we |
| 17630 | // won't allocate space for it on the stack. |
| 17631 | LclVarDsc* structVarDsc = &lvaTable[structLclNum]; |
| 17632 | structVarDsc->setLvRefCnt(0, RCS_EARLY); |
| 17633 | structVarDsc->lvAddrExposed = false; |
| 17634 | #ifdef DEBUG |
| 17635 | structVarDsc->lvUnusedStruct = true; |
| 17636 | #endif // DEBUG |
| 17637 | |
| 17638 | unsigned fieldLclStart = structVarDsc->lvFieldLclStart; |
| 17639 | unsigned fieldCount = structVarDsc->lvFieldCnt; |
| 17640 | unsigned fieldLclStop = fieldLclStart + fieldCount; |
| 17641 | |
| 17642 | for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) |
| 17643 | { |
| 17644 | // Fix the pointer to the parent local. |
| 17645 | LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; |
| 17646 | assert(fieldVarDsc->lvParentLcl == lclNum); |
| 17647 | fieldVarDsc->lvParentLcl = structLclNum; |
| 17648 | |
| 17649 | // The field local is now unused; set flags appropriately so that |
| 17650 | // we won't allocate stack space for it. |
| 17651 | fieldVarDsc->setLvRefCnt(0, RCS_EARLY); |
| 17652 | fieldVarDsc->lvAddrExposed = false; |
| 17653 | } |
| 17654 | } |
| 17655 | } |
| 17656 | } |
| 17657 | |
| 17658 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
| 17659 | } |
| 17660 | |
| 17661 | /***************************************************************************** |
| 17662 | * |
| 17663 | * Morph irregular parameters |
| 17664 | * for x64 and ARM64 this means turning them into byrefs, adding extra indirs. |
| 17665 | */ |
| 17666 | bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) |
| 17667 | { |
| 17668 | #if (!defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)) && !defined(_TARGET_ARM64_) |
| 17669 | |
| 17670 | return false; |
| 17671 | |
| 17672 | #else // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
| 17673 | |
| 17674 | bool changed = false; |
| 17675 | |
| 17676 | // Implicit byref morphing needs to know if the reference to the parameter is a |
| 17677 | // child of GT_ADDR or not, so this method looks one level down and does the |
| 17678 | // rewrite whenever a child is a reference to an implicit byref parameter. |
| 17679 | if (tree->gtOper == GT_ADDR) |
| 17680 | { |
| 17681 | if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) |
| 17682 | { |
| 17683 | GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true); |
| 17684 | changed = (morphedTree != nullptr); |
| 17685 | assert(!changed || (morphedTree == tree)); |
| 17686 | } |
| 17687 | } |
| 17688 | else |
| 17689 | { |
| 17690 | for (GenTree** pTree : tree->UseEdges()) |
| 17691 | { |
| 17692 | GenTree* childTree = *pTree; |
| 17693 | if (childTree->gtOper == GT_LCL_VAR) |
| 17694 | { |
| 17695 | GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false); |
| 17696 | if (newChildTree != nullptr) |
| 17697 | { |
| 17698 | changed = true; |
| 17699 | *pTree = newChildTree; |
| 17700 | } |
| 17701 | } |
| 17702 | } |
| 17703 | } |
| 17704 | |
| 17705 | return changed; |
| 17706 | #endif // (_TARGET_AMD64_ && !UNIX_AMD64_ABI) || _TARGET_ARM64_ |
| 17707 | } |
| 17708 | |
| 17709 | GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) |
| 17710 | { |
| 17711 | assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR))); |
| 17712 | assert(isAddr == (tree->gtOper == GT_ADDR)); |
| 17713 | |
| 17714 | GenTree* lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree; |
| 17715 | unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum; |
| 17716 | LclVarDsc* lclVarDsc = &lvaTable[lclNum]; |
| 17717 | |
| 17718 | CORINFO_FIELD_HANDLE fieldHnd; |
| 17719 | unsigned fieldOffset = 0; |
| 17720 | var_types fieldRefType = TYP_UNKNOWN; |
| 17721 | |
| 17722 | if (lvaIsImplicitByRefLocal(lclNum)) |
| 17723 | { |
| 17724 | // The SIMD transformation to coalesce contiguous references to SIMD vector fields will |
| 17725 | // re-invoke the traversal to mark address-taken locals. |
| 17726 | // So, we may encounter a tree that has already been transformed to TYP_BYREF. |
| 17727 | // If we do, leave it as-is. |
| 17728 | if (!varTypeIsStruct(lclVarTree)) |
| 17729 | { |
| 17730 | assert(lclVarTree->TypeGet() == TYP_BYREF); |
| 17731 | |
| 17732 | return nullptr; |
| 17733 | } |
| 17734 | else if (lclVarDsc->lvPromoted) |
| 17735 | { |
| 17736 | // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this |
| 17737 | // arg. Rewrite this to refer to the new local. |
| 17738 | assert(lclVarDsc->lvFieldLclStart != 0); |
| 17739 | lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart); |
| 17740 | return tree; |
| 17741 | } |
| 17742 | |
| 17743 | fieldHnd = nullptr; |
| 17744 | } |
| 17745 | else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl)) |
| 17746 | { |
| 17747 | // This was a field reference to an implicit-by-reference struct parameter that was |
| 17748 | // dependently promoted; update it to a field reference off the pointer. |
| 17749 | // Grab the field handle from the struct field lclVar. |
| 17750 | fieldHnd = lclVarDsc->lvFieldHnd; |
| 17751 | fieldOffset = lclVarDsc->lvFldOffset; |
| 17752 | assert(fieldHnd != nullptr); |
| 17753 | // Update lclNum/lclVarDsc to refer to the parameter |
| 17754 | lclNum = lclVarDsc->lvParentLcl; |
| 17755 | lclVarDsc = &lvaTable[lclNum]; |
| 17756 | fieldRefType = lclVarTree->TypeGet(); |
| 17757 | } |
| 17758 | else |
| 17759 | { |
| 17760 | // We only need to tranform the 'marked' implicit by ref parameters |
| 17761 | return nullptr; |
| 17762 | } |
| 17763 | |
| 17764 | // This is no longer a def of the lclVar, even if it WAS a def of the struct. |
| 17765 | lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK); |
| 17766 | |
| 17767 | if (isAddr) |
| 17768 | { |
| 17769 | if (fieldHnd == nullptr) |
| 17770 | { |
| 17771 | // change &X into just plain X |
| 17772 | tree->ReplaceWith(lclVarTree, this); |
| 17773 | tree->gtType = TYP_BYREF; |
| 17774 | } |
| 17775 | else |
| 17776 | { |
| 17777 | // change &(X.f) [i.e. GT_ADDR of local for promoted arg field] |
| 17778 | // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param] |
| 17779 | lclVarTree->gtLclVarCommon.SetLclNum(lclNum); |
| 17780 | lclVarTree->gtType = TYP_BYREF; |
| 17781 | tree->gtOp.gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset); |
| 17782 | } |
| 17783 | |
| 17784 | #ifdef DEBUG |
| 17785 | if (verbose) |
| 17786 | { |
| 17787 | printf("Replacing address of implicit by ref struct parameter with byref:\n" ); |
| 17788 | } |
| 17789 | #endif // DEBUG |
| 17790 | } |
| 17791 | else |
| 17792 | { |
| 17793 | // Change X into OBJ(X) or FIELD(X, f) |
| 17794 | var_types structType = tree->gtType; |
| 17795 | tree->gtType = TYP_BYREF; |
| 17796 | |
| 17797 | if (fieldHnd) |
| 17798 | { |
| 17799 | tree->gtLclVarCommon.SetLclNum(lclNum); |
| 17800 | tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset); |
| 17801 | } |
| 17802 | else |
| 17803 | { |
| 17804 | tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree); |
| 17805 | } |
| 17806 | |
| 17807 | if (structType == TYP_STRUCT) |
| 17808 | { |
| 17809 | gtSetObjGcInfo(tree->AsObj()); |
| 17810 | } |
| 17811 | |
| 17812 | // TODO-CQ: If the VM ever stops violating the ABI and passing heap references |
| 17813 | // we could remove TGTANYWHERE |
| 17814 | tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE); |
| 17815 | |
| 17816 | #ifdef DEBUG |
| 17817 | if (verbose) |
| 17818 | { |
| 17819 | printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n" ); |
| 17820 | } |
| 17821 | #endif // DEBUG |
| 17822 | } |
| 17823 | |
| 17824 | #ifdef DEBUG |
| 17825 | if (verbose) |
| 17826 | { |
| 17827 | gtDispTree(tree); |
| 17828 | } |
| 17829 | #endif // DEBUG |
| 17830 | |
| 17831 | return tree; |
| 17832 | } |
| 17833 | |
| 17834 | class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor> |
| 17835 | { |
| 17836 | // During tree traversal every GenTree node produces a "value" that represents: |
| 17837 | // - the memory location associated with a local variable, including an offset |
| 17838 | // accumulated from GT_LCL_FLD and GT_FIELD nodes. |
| 17839 | // - the address of local variable memory location, including an offset as well. |
| 17840 | // - an unknown value - the result of a node we don't know how to process. This |
| 17841 | // also includes the result of TYP_VOID nodes (or any other nodes that don't |
| 17842 | // actually produce values in IR) in order to support the invariant that every |
| 17843 | // node produces a value. |
| 17844 | // |
| 17845 | // The existence of GT_ADDR nodes and their use together with GT_FIELD to form |
| 17846 | // FIELD/ADDR/FIELD/ADDR/LCL_VAR sequences complicate things a bit. A typical |
| 17847 | // GT_FIELD node acts like an indirection and should produce an unknown value, |
| 17848 | // local address analysis doesn't know or care what value the field stores. |
| 17849 | // But a GT_FIELD can also be used as an operand for a GT_ADDR node and then |
| 17850 | // the GT_FIELD node does not perform an indirection, it's just represents a |
| 17851 | // location, similar to GT_LCL_VAR and GT_LCL_FLD. |
| 17852 | // |
| 17853 | // To avoid this issue, the semantics of GT_FIELD (and for simplicity's sake any other |
| 17854 | // indirection) nodes slightly deviates from the IR semantics - an indirection does not |
| 17855 | // actually produce an unknown value but a location value, if the indirection address |
| 17856 | // operand is an address value. |
| 17857 | // |
| 17858 | // The actual indirection is performed when the indirection's user node is processed: |
| 17859 | // - A GT_ADDR user turns the location value produced by the indirection back |
| 17860 | // into an address value. |
| 17861 | // - Any other user node performs the indirection and produces an unknown value. |
| 17862 | // |
| 17863 | class Value |
| 17864 | { |
| 17865 | GenTree* m_node; |
| 17866 | unsigned m_lclNum; |
| 17867 | unsigned m_offset; |
| 17868 | bool m_address; |
| 17869 | INDEBUG(bool m_consumed;) |
| 17870 | |
| 17871 | public: |
| 17872 | // Produce an unknown value associated with the specified node. |
| 17873 | Value(GenTree* node) |
| 17874 | : m_node(node) |
| 17875 | , m_lclNum(BAD_VAR_NUM) |
| 17876 | , m_offset(0) |
| 17877 | , m_address(false) |
| 17878 | #ifdef DEBUG |
| 17879 | , m_consumed(false) |
| 17880 | #endif // DEBUG |
| 17881 | { |
| 17882 | } |
| 17883 | |
| 17884 | // Get the node that produced this value. |
| 17885 | GenTree* Node() const |
| 17886 | { |
| 17887 | return m_node; |
| 17888 | } |
| 17889 | |
| 17890 | // Does this value represent a location? |
| 17891 | bool IsLocation() const |
| 17892 | { |
| 17893 | return (m_lclNum != BAD_VAR_NUM) && !m_address; |
| 17894 | } |
| 17895 | |
| 17896 | // Does this value represent the address of a location? |
| 17897 | bool IsAddress() const |
| 17898 | { |
| 17899 | assert((m_lclNum != BAD_VAR_NUM) || !m_address); |
| 17900 | |
| 17901 | return m_address; |
| 17902 | } |
| 17903 | |
| 17904 | // Get the location's variable number. |
| 17905 | unsigned LclNum() const |
| 17906 | { |
| 17907 | assert(IsLocation() || IsAddress()); |
| 17908 | |
| 17909 | return m_lclNum; |
| 17910 | } |
| 17911 | |
| 17912 | // Get the location's byte offset. |
| 17913 | unsigned Offset() const |
| 17914 | { |
| 17915 | assert(IsLocation() || IsAddress()); |
| 17916 | |
| 17917 | return m_offset; |
| 17918 | } |
| 17919 | |
| 17920 | //------------------------------------------------------------------------ |
| 17921 | // Location: Produce a location value. |
| 17922 | // |
| 17923 | // Arguments: |
| 17924 | // lclNum - the local variable number |
| 17925 | // offset - the byte offset of the location (used for GT_LCL_FLD nodes) |
| 17926 | // |
| 17927 | // Notes: |
| 17928 | // - (lclnum, offset) => LOCATION(lclNum, offset) |
| 17929 | // |
| 17930 | void Location(unsigned lclNum, unsigned offset = 0) |
| 17931 | { |
| 17932 | assert(!IsLocation() && !IsAddress()); |
| 17933 | |
| 17934 | m_lclNum = lclNum; |
| 17935 | m_offset = offset; |
| 17936 | } |
| 17937 | |
| 17938 | //------------------------------------------------------------------------ |
| 17939 | // Address: Produce an address value from a location value. |
| 17940 | // |
| 17941 | // Arguments: |
| 17942 | // val - the input value |
| 17943 | // |
| 17944 | // Notes: |
| 17945 | // - LOCATION(lclNum, offset) => ADDRESS(lclNum, offset) |
| 17946 | // - ADDRESS(lclNum, offset) => invalid, we should never encounter something like ADDR(ADDR(...)) |
| 17947 | // - UNKNOWN => UNKNOWN |
| 17948 | // |
| 17949 | void Address(Value& val) |
| 17950 | { |
| 17951 | assert(!IsLocation() && !IsAddress()); |
| 17952 | assert(!val.IsAddress()); |
| 17953 | |
| 17954 | if (val.IsLocation()) |
| 17955 | { |
| 17956 | m_address = true; |
| 17957 | m_lclNum = val.m_lclNum; |
| 17958 | m_offset = val.m_offset; |
| 17959 | } |
| 17960 | |
| 17961 | INDEBUG(val.Consume();) |
| 17962 | } |
| 17963 | |
| 17964 | //------------------------------------------------------------------------ |
| 17965 | // Field: Produce a location value from an address value. |
| 17966 | // |
| 17967 | // Arguments: |
| 17968 | // val - the input value |
| 17969 | // offset - the offset to add to the existing location offset |
| 17970 | // |
| 17971 | // Return Value: |
| 17972 | // `true` if the value was consumed. `false` if the input value |
| 17973 | // cannot be consumed because it is itsef a location or because |
| 17974 | // the offset overflowed. In this case the caller is expected |
| 17975 | // to escape the input value. |
| 17976 | // |
| 17977 | // Notes: |
| 17978 | // - LOCATION(lclNum, offset) => not representable, must escape |
| 17979 | // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset + field.Offset) |
| 17980 | // if the offset overflows then location is not representable, must escape |
| 17981 | // - UNKNOWN => UNKNOWN |
| 17982 | // |
| 17983 | bool Field(Value& val, unsigned offset) |
| 17984 | { |
| 17985 | assert(!IsLocation() && !IsAddress()); |
| 17986 | |
| 17987 | if (val.IsLocation()) |
| 17988 | { |
| 17989 | return false; |
| 17990 | } |
| 17991 | |
| 17992 | if (val.IsAddress()) |
| 17993 | { |
| 17994 | ClrSafeInt<unsigned> newOffset = ClrSafeInt<unsigned>(val.m_offset) + ClrSafeInt<unsigned>(offset); |
| 17995 | |
| 17996 | if (newOffset.IsOverflow()) |
| 17997 | { |
| 17998 | return false; |
| 17999 | } |
| 18000 | |
| 18001 | m_lclNum = val.m_lclNum; |
| 18002 | m_offset = newOffset.Value(); |
| 18003 | } |
| 18004 | |
| 18005 | INDEBUG(val.Consume();) |
| 18006 | return true; |
| 18007 | } |
| 18008 | |
| 18009 | //------------------------------------------------------------------------ |
| 18010 | // Indir: Produce a location value from an address value. |
| 18011 | // |
| 18012 | // Arguments: |
| 18013 | // val - the input value |
| 18014 | // |
| 18015 | // Return Value: |
| 18016 | // `true` if the value was consumed. `false` if the input value |
| 18017 | // cannot be consumed because it is itsef a location. In this |
| 18018 | // case the caller is expected to escape the input value. |
| 18019 | // |
| 18020 | // Notes: |
| 18021 | // - LOCATION(lclNum, offset) => not representable, must escape |
| 18022 | // - ADDRESS(lclNum, offset) => LOCATION(lclNum, offset) |
| 18023 | // - UNKNOWN => UNKNOWN |
| 18024 | // |
| 18025 | bool Indir(Value& val) |
| 18026 | { |
| 18027 | assert(!IsLocation() && !IsAddress()); |
| 18028 | |
| 18029 | if (val.IsLocation()) |
| 18030 | { |
| 18031 | return false; |
| 18032 | } |
| 18033 | |
| 18034 | if (val.IsAddress()) |
| 18035 | { |
| 18036 | m_lclNum = val.m_lclNum; |
| 18037 | m_offset = val.m_offset; |
| 18038 | } |
| 18039 | |
| 18040 | INDEBUG(val.Consume();) |
| 18041 | return true; |
| 18042 | } |
| 18043 | |
| 18044 | #ifdef DEBUG |
| 18045 | void Consume() |
| 18046 | { |
| 18047 | assert(!m_consumed); |
| 18048 | // Mark the value as consumed so that PopValue can ensure that values |
| 18049 | // aren't popped from the stack without being processed appropriately. |
| 18050 | m_consumed = true; |
| 18051 | } |
| 18052 | |
| 18053 | bool IsConsumed() |
| 18054 | { |
| 18055 | return m_consumed; |
| 18056 | } |
| 18057 | #endif // DEBUG |
| 18058 | }; |
| 18059 | |
| 18060 | ArrayStack<Value> m_valueStack; |
| 18061 | INDEBUG(bool m_stmtModified;) |
| 18062 | |
| 18063 | public: |
| 18064 | enum |
| 18065 | { |
| 18066 | DoPreOrder = true, |
| 18067 | DoPostOrder = true, |
| 18068 | ComputeStack = true, |
| 18069 | DoLclVarsOnly = false, |
| 18070 | UseExecutionOrder = false, |
| 18071 | }; |
| 18072 | |
| 18073 | LocalAddressVisitor(Compiler* comp) |
| 18074 | : GenTreeVisitor<LocalAddressVisitor>(comp), m_valueStack(comp->getAllocator(CMK_LocalAddressVisitor)) |
| 18075 | { |
| 18076 | } |
| 18077 | |
| 18078 | void VisitStmt(GenTreeStmt* stmt) |
| 18079 | { |
| 18080 | #ifdef DEBUG |
| 18081 | if (m_compiler->verbose) |
| 18082 | { |
| 18083 | printf("LocalAddressVisitor visiting statement:\n" ); |
| 18084 | m_compiler->gtDispTree(stmt); |
| 18085 | m_stmtModified = false; |
| 18086 | } |
| 18087 | #endif // DEBUG |
| 18088 | |
| 18089 | WalkTree(&stmt->gtStmtExpr, nullptr); |
| 18090 | |
| 18091 | // We could have somethinge like STMT(IND(ADDR(LCL_VAR))) so we need to escape |
| 18092 | // the location here. This doesn't seem to happen often, if ever. The importer |
| 18093 | // tends to wrap such a tree in a COMMA. |
| 18094 | if (TopValue(0).IsLocation()) |
| 18095 | { |
| 18096 | EscapeLocation(TopValue(0), stmt); |
| 18097 | } |
| 18098 | else |
| 18099 | { |
| 18100 | // If we have an address on the stack then we don't need to do anything. |
| 18101 | // The address tree isn't actually used and it will be discarded during |
| 18102 | // morphing. So just mark any value as consumed to keep PopValue happy. |
| 18103 | INDEBUG(TopValue(0).Consume();) |
| 18104 | } |
| 18105 | |
| 18106 | PopValue(); |
| 18107 | assert(m_valueStack.Empty()); |
| 18108 | |
| 18109 | #ifdef DEBUG |
| 18110 | if (m_compiler->verbose) |
| 18111 | { |
| 18112 | if (m_stmtModified) |
| 18113 | { |
| 18114 | printf("LocalAddressVisitor modified statement:\n" ); |
| 18115 | m_compiler->gtDispTree(stmt); |
| 18116 | } |
| 18117 | |
| 18118 | printf("\n" ); |
| 18119 | } |
| 18120 | #endif // DEBUG |
| 18121 | } |
| 18122 | |
| 18123 | // Morph promoted struct fields and count implict byref argument occurrences. |
| 18124 | // Also create and push the value produced by the visited node. This is done here |
| 18125 | // rather than in PostOrderVisit because it makes it easy to handle nodes with an |
| 18126 | // arbitrary number of operands - just pop values until the value corresponding |
| 18127 | // to the visited node is encountered. |
| 18128 | fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) |
| 18129 | { |
| 18130 | GenTree* node = *use; |
| 18131 | |
| 18132 | if (node->OperIs(GT_FIELD)) |
| 18133 | { |
| 18134 | MorphStructField(node, user); |
| 18135 | } |
| 18136 | else if (node->OperIs(GT_LCL_FLD)) |
| 18137 | { |
| 18138 | MorphLocalField(node, user); |
| 18139 | } |
| 18140 | |
| 18141 | if (node->OperIsLocal()) |
| 18142 | { |
| 18143 | unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); |
| 18144 | |
| 18145 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); |
| 18146 | if (varDsc->lvIsStructField) |
| 18147 | { |
| 18148 | // Promoted field, increase counter for the parent lclVar. |
| 18149 | assert(!m_compiler->lvaIsImplicitByRefLocal(lclNum)); |
| 18150 | unsigned parentLclNum = varDsc->lvParentLcl; |
| 18151 | UpdateEarlyRefCountForImplicitByRef(parentLclNum); |
| 18152 | } |
| 18153 | else |
| 18154 | { |
| 18155 | UpdateEarlyRefCountForImplicitByRef(lclNum); |
| 18156 | } |
| 18157 | } |
| 18158 | |
| 18159 | PushValue(node); |
| 18160 | |
| 18161 | return Compiler::WALK_CONTINUE; |
| 18162 | } |
| 18163 | |
| 18164 | // Evaluate a node. Since this is done in postorder, the node's operands have already been |
| 18165 | // evaluated and are available on the value stack. The value produced by the visited node |
| 18166 | // is left on the top of the evaluation stack. |
| 18167 | fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) |
| 18168 | { |
| 18169 | GenTree* node = *use; |
| 18170 | |
| 18171 | switch (node->OperGet()) |
| 18172 | { |
| 18173 | case GT_LCL_VAR: |
| 18174 | assert(TopValue(0).Node() == node); |
| 18175 | |
| 18176 | TopValue(0).Location(node->AsLclVar()->GetLclNum()); |
| 18177 | break; |
| 18178 | |
| 18179 | case GT_LCL_FLD: |
| 18180 | assert(TopValue(0).Node() == node); |
| 18181 | |
| 18182 | TopValue(0).Location(node->AsLclFld()->GetLclNum(), node->AsLclFld()->gtLclOffs); |
| 18183 | break; |
| 18184 | |
| 18185 | case GT_ADDR: |
| 18186 | assert(TopValue(1).Node() == node); |
| 18187 | assert(TopValue(0).Node() == node->gtGetOp1()); |
| 18188 | |
| 18189 | TopValue(1).Address(TopValue(0)); |
| 18190 | PopValue(); |
| 18191 | break; |
| 18192 | |
| 18193 | case GT_FIELD: |
| 18194 | if (node->AsField()->gtFldObj != nullptr) |
| 18195 | { |
| 18196 | assert(TopValue(1).Node() == node); |
| 18197 | assert(TopValue(0).Node() == node->AsField()->gtFldObj); |
| 18198 | |
| 18199 | if (!TopValue(1).Field(TopValue(0), node->AsField()->gtFldOffset)) |
| 18200 | { |
| 18201 | // Either the address comes from a location value (e.g. FIELD(IND(...))) |
| 18202 | // or the field offset has overflowed. |
| 18203 | EscapeValue(TopValue(0), node); |
| 18204 | } |
| 18205 | |
| 18206 | PopValue(); |
| 18207 | } |
| 18208 | else |
| 18209 | { |
| 18210 | assert(TopValue(0).Node() == node); |
| 18211 | } |
| 18212 | break; |
| 18213 | |
| 18214 | case GT_OBJ: |
| 18215 | case GT_BLK: |
| 18216 | case GT_IND: |
| 18217 | assert(TopValue(1).Node() == node); |
| 18218 | assert(TopValue(0).Node() == node->gtGetOp1()); |
| 18219 | |
| 18220 | if ((node->gtFlags & GTF_IND_VOLATILE) != 0) |
| 18221 | { |
| 18222 | // Volatile indirections must not be removed so the address, |
| 18223 | // if any, must be escaped. |
| 18224 | EscapeValue(TopValue(0), node); |
| 18225 | } |
| 18226 | else if (!TopValue(1).Indir(TopValue(0))) |
| 18227 | { |
| 18228 | // If the address comes from another indirection (e.g. IND(IND(...)) |
| 18229 | // then we need to escape the location. |
| 18230 | EscapeLocation(TopValue(0), node); |
| 18231 | } |
| 18232 | |
| 18233 | PopValue(); |
| 18234 | break; |
| 18235 | |
| 18236 | case GT_DYN_BLK: |
| 18237 | assert(TopValue(2).Node() == node); |
| 18238 | assert(TopValue(1).Node() == node->AsDynBlk()->Addr()); |
| 18239 | assert(TopValue(0).Node() == node->AsDynBlk()->gtDynamicSize); |
| 18240 | |
| 18241 | // The block size may be the result of an indirection so we need |
| 18242 | // to escape the location that may be associated with it. |
| 18243 | EscapeValue(TopValue(0), node); |
| 18244 | |
| 18245 | if (!TopValue(2).Indir(TopValue(1))) |
| 18246 | { |
| 18247 | // If the address comes from another indirection (e.g. DYN_BLK(IND(...)) |
| 18248 | // then we need to escape the location. |
| 18249 | EscapeLocation(TopValue(1), node); |
| 18250 | } |
| 18251 | |
| 18252 | PopValue(); |
| 18253 | PopValue(); |
| 18254 | break; |
| 18255 | |
| 18256 | default: |
| 18257 | while (TopValue(0).Node() != node) |
| 18258 | { |
| 18259 | EscapeValue(TopValue(0), node); |
| 18260 | PopValue(); |
| 18261 | } |
| 18262 | break; |
| 18263 | } |
| 18264 | |
| 18265 | assert(TopValue(0).Node() == node); |
| 18266 | return Compiler::WALK_CONTINUE; |
| 18267 | } |
| 18268 | |
| 18269 | private: |
| 18270 | void PushValue(GenTree* node) |
| 18271 | { |
| 18272 | m_valueStack.Push(node); |
| 18273 | } |
| 18274 | |
| 18275 | Value& TopValue(unsigned index) |
| 18276 | { |
| 18277 | return m_valueStack.IndexRef(index); |
| 18278 | } |
| 18279 | |
| 18280 | void PopValue() |
| 18281 | { |
| 18282 | assert(TopValue(0).IsConsumed()); |
| 18283 | m_valueStack.Pop(); |
| 18284 | } |
| 18285 | |
| 18286 | //------------------------------------------------------------------------ |
| 18287 | // EscapeValue: Process an escaped value |
| 18288 | // |
| 18289 | // Arguments: |
| 18290 | // val - the escaped address value |
| 18291 | // user - the node that uses the escaped value |
| 18292 | // |
| 18293 | void EscapeValue(Value& val, GenTree* user) |
| 18294 | { |
| 18295 | if (val.IsLocation()) |
| 18296 | { |
| 18297 | EscapeLocation(val, user); |
| 18298 | } |
| 18299 | else if (val.IsAddress()) |
| 18300 | { |
| 18301 | EscapeAddress(val, user); |
| 18302 | } |
| 18303 | else |
| 18304 | { |
| 18305 | INDEBUG(val.Consume();) |
| 18306 | } |
| 18307 | } |
| 18308 | |
| 18309 | //------------------------------------------------------------------------ |
| 18310 | // EscapeAddress: Process an escaped address value |
| 18311 | // |
| 18312 | // Arguments: |
| 18313 | // val - the escaped address value |
| 18314 | // user - the node that uses the address value |
| 18315 | // |
| 18316 | void EscapeAddress(Value& val, GenTree* user) |
| 18317 | { |
| 18318 | assert(val.IsAddress()); |
| 18319 | |
| 18320 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum()); |
| 18321 | |
| 18322 | // In general we don't know how an exposed struct field address will be used - it may be used to |
| 18323 | // access only that specific field or it may be used to access other fields in the same struct |
| 18324 | // be using pointer/ref arithmetic. It seems reasonable to make an exception for the "this" arg |
| 18325 | // of calls - it would be highly unsual for a struct member method to attempt to access memory |
| 18326 | // beyond "this" instance. And calling struct member methods is common enough that attempting to |
| 18327 | // mark the entire struct as address exposed results in CQ regressions. |
| 18328 | bool isThisArg = user->IsCall() && (val.Node() == user->AsCall()->gtCallObjp); |
| 18329 | bool exposeParentLcl = varDsc->lvIsStructField && !isThisArg; |
| 18330 | |
| 18331 | m_compiler->lvaSetVarAddrExposed(exposeParentLcl ? varDsc->lvParentLcl : val.LclNum()); |
| 18332 | |
| 18333 | #ifdef _TARGET_64BIT_ |
| 18334 | // If the address of a variable is passed in a call and the allocation size of the variable |
| 18335 | // is 32 bits we will quirk the size to 64 bits. Some PInvoke signatures incorrectly specify |
| 18336 | // a ByRef to an INT32 when they actually write a SIZE_T or INT64. There are cases where |
| 18337 | // overwriting these extra 4 bytes corrupts some data (such as a saved register) that leads |
| 18338 | // to A/V. Wheras previously the JIT64 codegen did not lead to an A/V. |
| 18339 | if (!varDsc->lvIsParam && !varDsc->lvIsStructField && (genActualType(varDsc->TypeGet()) == TYP_INT)) |
| 18340 | { |
| 18341 | // TODO-Cleanup: This should simply check if the user is a call node, not if a call ancestor exists. |
| 18342 | if (Compiler::gtHasCallOnStack(&m_ancestors)) |
| 18343 | { |
| 18344 | varDsc->lvQuirkToLong = true; |
| 18345 | JITDUMP("Adding a quirk for the storage size of V%02u of type %s" , val.LclNum(), |
| 18346 | varTypeName(varDsc->TypeGet())); |
| 18347 | } |
| 18348 | } |
| 18349 | #endif // _TARGET_64BIT_ |
| 18350 | |
| 18351 | INDEBUG(val.Consume();) |
| 18352 | } |
| 18353 | |
| 18354 | //------------------------------------------------------------------------ |
| 18355 | // EscapeLocation: Process an escaped location value |
| 18356 | // |
| 18357 | // Arguments: |
| 18358 | // val - the escaped location value |
| 18359 | // user - the node that uses the location value |
| 18360 | // |
| 18361 | // Notes: |
| 18362 | // Unlike EscapeAddress, this does not necessarily mark the lclvar associated |
| 18363 | // with the value as address exposed. This is needed only if the indirection |
| 18364 | // is wider than the lclvar. |
| 18365 | // |
| 18366 | void EscapeLocation(Value& val, GenTree* user) |
| 18367 | { |
| 18368 | assert(val.IsLocation()); |
| 18369 | |
| 18370 | GenTree* node = val.Node(); |
| 18371 | |
| 18372 | if (node->OperIs(GT_LCL_VAR, GT_LCL_FLD)) |
| 18373 | { |
| 18374 | // If the location is accessed directly then we don't need to do anything. |
| 18375 | |
| 18376 | assert(node->AsLclVarCommon()->GetLclNum() == val.LclNum()); |
| 18377 | } |
| 18378 | else |
| 18379 | { |
| 18380 | // Otherwise it must be accessed through some kind of indirection. Usually this is |
| 18381 | // something like IND(ADDR(LCL_VAR)), global morph will change it to GT_LCL_VAR or |
| 18382 | // GT_LCL_FLD so the lclvar does not need to be address exposed. |
| 18383 | // |
| 18384 | // However, it is possible for the indirection to be wider than the lclvar |
| 18385 | // (e.g. *(long*)&int32Var) or to have a field offset that pushes the indirection |
| 18386 | // past the end of the lclvar memory location. In such cases morph doesn't do |
| 18387 | // anything so the lclvar needs to be address exposed. |
| 18388 | // |
| 18389 | // More importantly, if the lclvar is a promoted struct field then the parent lclvar |
| 18390 | // also needs to be address exposed so we get dependent struct promotion. Code like |
| 18391 | // *(long*)&int32Var has undefined behavior and it's practically useless but reading, |
| 18392 | // say, 2 consecutive Int32 struct fields as Int64 has more practical value. |
| 18393 | |
| 18394 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(val.LclNum()); |
| 18395 | unsigned indirSize = GetIndirSize(node, user); |
| 18396 | bool isWide; |
| 18397 | |
| 18398 | if (indirSize == 0) |
| 18399 | { |
| 18400 | // If we can't figure out the indirection size then treat it as a wide indirection. |
| 18401 | isWide = true; |
| 18402 | } |
| 18403 | else |
| 18404 | { |
| 18405 | ClrSafeInt<unsigned> endOffset = ClrSafeInt<unsigned>(val.Offset()) + ClrSafeInt<unsigned>(indirSize); |
| 18406 | |
| 18407 | if (endOffset.IsOverflow()) |
| 18408 | { |
| 18409 | isWide = true; |
| 18410 | } |
| 18411 | else if (varDsc->TypeGet() == TYP_STRUCT) |
| 18412 | { |
| 18413 | isWide = (endOffset.Value() > varDsc->lvExactSize); |
| 18414 | } |
| 18415 | else |
| 18416 | { |
| 18417 | // For small int types use the real type size, not the stack slot size. |
| 18418 | // Morph does manage to transform `*(int*)&byteVar` into just byteVar where |
| 18419 | // the LCL_VAR node has type TYP_INT. But such code is simply bogus and |
| 18420 | // there's no reason to attempt to optimize it. It makes more sense to |
| 18421 | // mark the variable address exposed in such circumstances. |
| 18422 | // |
| 18423 | // Same for "small" SIMD types - SIMD8/12 have 8/12 bytes, even if the |
| 18424 | // stack location may have 16 bytes. |
| 18425 | // |
| 18426 | // For TYP_BLK variables the type size is 0 so they're always address |
| 18427 | // exposed. |
| 18428 | isWide = (endOffset.Value() > genTypeSize(varDsc->TypeGet())); |
| 18429 | } |
| 18430 | } |
| 18431 | |
| 18432 | if (isWide) |
| 18433 | { |
| 18434 | m_compiler->lvaSetVarAddrExposed(varDsc->lvIsStructField ? varDsc->lvParentLcl : val.LclNum()); |
| 18435 | } |
| 18436 | } |
| 18437 | |
| 18438 | INDEBUG(val.Consume();) |
| 18439 | } |
| 18440 | |
| 18441 | //------------------------------------------------------------------------ |
| 18442 | // GetIndirSize: Return the size (in bytes) of an indirection node. |
| 18443 | // |
| 18444 | // Arguments: |
| 18445 | // indir - the indirection node |
| 18446 | // user - the node that uses the indirection |
| 18447 | // |
| 18448 | // Notes: |
| 18449 | // This returns 0 for indirection of unknown size, typically GT_DYN_BLK. |
| 18450 | // GT_IND nodes that have type TYP_STRUCT are expected to only appears |
| 18451 | // on the RHS of an assignment, in which case the LHS size will be used instead. |
| 18452 | // Otherwise 0 is returned as well. |
| 18453 | // |
| 18454 | unsigned GetIndirSize(GenTree* indir, GenTree* user) |
| 18455 | { |
| 18456 | assert(indir->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_DYN_BLK, GT_FIELD)); |
| 18457 | |
| 18458 | if (indir->TypeGet() != TYP_STRUCT) |
| 18459 | { |
| 18460 | return genTypeSize(indir->TypeGet()); |
| 18461 | } |
| 18462 | |
| 18463 | // A struct indir that is the RHS of an assignment needs special casing: |
| 18464 | // - It can be a GT_IND of type TYP_STRUCT, in which case the size is given by the LHS. |
| 18465 | // - It can be a GT_OBJ that has a correct size, but different than the size of the LHS. |
| 18466 | // The LHS size takes precedence. |
| 18467 | // Just take the LHS size in all cases. |
| 18468 | if (user->OperIs(GT_ASG) && (indir == user->gtGetOp2())) |
| 18469 | { |
| 18470 | indir = user->gtGetOp1(); |
| 18471 | |
| 18472 | if (indir->TypeGet() != TYP_STRUCT) |
| 18473 | { |
| 18474 | return genTypeSize(indir->TypeGet()); |
| 18475 | } |
| 18476 | |
| 18477 | // The LHS may be a LCL_VAR/LCL_FLD, these are not indirections so we need to handle them here. |
| 18478 | // It can also be a GT_INDEX, this is an indirection but it never applies to lclvar addresses |
| 18479 | // so it needs to be handled here as well. |
| 18480 | |
| 18481 | switch (indir->OperGet()) |
| 18482 | { |
| 18483 | case GT_LCL_VAR: |
| 18484 | return m_compiler->lvaGetDesc(indir->AsLclVar())->lvExactSize; |
| 18485 | case GT_LCL_FLD: |
| 18486 | return genTypeSize(indir->TypeGet()); |
| 18487 | case GT_INDEX: |
| 18488 | return indir->AsIndex()->gtIndElemSize; |
| 18489 | default: |
| 18490 | break; |
| 18491 | } |
| 18492 | } |
| 18493 | |
| 18494 | switch (indir->OperGet()) |
| 18495 | { |
| 18496 | case GT_FIELD: |
| 18497 | return m_compiler->info.compCompHnd->getClassSize( |
| 18498 | m_compiler->info.compCompHnd->getFieldClass(indir->AsField()->gtFldHnd)); |
| 18499 | case GT_BLK: |
| 18500 | case GT_OBJ: |
| 18501 | return indir->AsBlk()->gtBlkSize; |
| 18502 | default: |
| 18503 | assert(indir->OperIs(GT_IND, GT_DYN_BLK)); |
| 18504 | return 0; |
| 18505 | } |
| 18506 | } |
| 18507 | |
| 18508 | //------------------------------------------------------------------------ |
| 18509 | // MorphStructField: Replaces a GT_FIELD based promoted/normed struct field access |
| 18510 | // (e.g. FIELD(ADDR(LCL_VAR))) with a GT_LCL_VAR that references the struct field. |
| 18511 | // |
| 18512 | // Arguments: |
| 18513 | // node - the GT_FIELD node |
| 18514 | // user - the node that uses the field |
| 18515 | // |
| 18516 | // Notes: |
| 18517 | // This does not do anything if the field access does not denote |
| 18518 | // a promoted/normed struct field. |
| 18519 | // |
| 18520 | void MorphStructField(GenTree* node, GenTree* user) |
| 18521 | { |
| 18522 | assert(node->OperIs(GT_FIELD)); |
| 18523 | // TODO-Cleanup: Move fgMorphStructField implementation here, it's not used anywhere else. |
| 18524 | m_compiler->fgMorphStructField(node, user); |
| 18525 | INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);) |
| 18526 | } |
| 18527 | |
| 18528 | //------------------------------------------------------------------------ |
| 18529 | // MorphLocalField: Replaces a GT_LCL_FLD based promoted struct field access |
| 18530 | // with a GT_LCL_VAR that references the struct field. |
| 18531 | // |
| 18532 | // Arguments: |
| 18533 | // node - the GT_LCL_FLD node |
| 18534 | // user - the node that uses the field |
| 18535 | // |
| 18536 | // Notes: |
| 18537 | // This does not do anything if the field access does not denote |
| 18538 | // involved a promoted struct local. |
| 18539 | // If the GT_LCL_FLD offset does not have a coresponding promoted struct |
| 18540 | // field then no transformation is done and struct local's enregistration |
| 18541 | // is disabled. |
| 18542 | // |
| 18543 | void MorphLocalField(GenTree* node, GenTree* user) |
| 18544 | { |
| 18545 | assert(node->OperIs(GT_LCL_FLD)); |
| 18546 | // TODO-Cleanup: Move fgMorphLocalField implementation here, it's not used anywhere else. |
| 18547 | m_compiler->fgMorphLocalField(node, user); |
| 18548 | INDEBUG(m_stmtModified |= node->OperIs(GT_LCL_VAR);) |
| 18549 | } |
| 18550 | |
| 18551 | //------------------------------------------------------------------------ |
| 18552 | // UpdateEarlyRefCountForImplicitByRef: updates the ref count for implicit byref params. |
| 18553 | // |
| 18554 | // Arguments: |
| 18555 | // lclNum - the local number to update the count for. |
| 18556 | // |
| 18557 | // Notes: |
| 18558 | // fgMakeOutgoingStructArgCopy checks the ref counts for implicit byref params when it decides |
| 18559 | // if it's legal to elide certain copies of them; |
| 18560 | // fgRetypeImplicitByRefArgs checks the ref counts when it decides to undo promotions. |
| 18561 | // |
| 18562 | void UpdateEarlyRefCountForImplicitByRef(unsigned lclNum) |
| 18563 | { |
| 18564 | if (!m_compiler->lvaIsImplicitByRefLocal(lclNum)) |
| 18565 | { |
| 18566 | return; |
| 18567 | } |
| 18568 | LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); |
| 18569 | JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for V%02d\n" , varDsc->lvRefCnt(RCS_EARLY), |
| 18570 | varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); |
| 18571 | varDsc->incLvRefCnt(1, RCS_EARLY); |
| 18572 | } |
| 18573 | }; |
| 18574 | |
| 18575 | void Compiler::fgAddFieldSeqForZeroOffset(GenTree* op1, FieldSeqNode* fieldSeq) |
| 18576 | { |
| 18577 | assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF); |
| 18578 | |
| 18579 | switch (op1->OperGet()) |
| 18580 | { |
| 18581 | case GT_ADDR: |
| 18582 | if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD) |
| 18583 | { |
| 18584 | GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld(); |
| 18585 | lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq); |
| 18586 | } |
| 18587 | break; |
| 18588 | |
| 18589 | case GT_ADD: |
| 18590 | if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT) |
| 18591 | { |
| 18592 | FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq; |
| 18593 | if (op1Fs != nullptr) |
| 18594 | { |
| 18595 | op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq); |
| 18596 | op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs; |
| 18597 | } |
| 18598 | } |
| 18599 | else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT) |
| 18600 | { |
| 18601 | FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq; |
| 18602 | if (op2Fs != nullptr) |
| 18603 | { |
| 18604 | op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq); |
| 18605 | op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs; |
| 18606 | } |
| 18607 | } |
| 18608 | break; |
| 18609 | |
| 18610 | case GT_CNS_INT: |
| 18611 | { |
| 18612 | FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq; |
| 18613 | if (op1Fs != nullptr) |
| 18614 | { |
| 18615 | op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq); |
| 18616 | op1->gtIntCon.gtFieldSeq = op1Fs; |
| 18617 | } |
| 18618 | } |
| 18619 | break; |
| 18620 | |
| 18621 | default: |
| 18622 | // Record in the general zero-offset map. |
| 18623 | GetZeroOffsetFieldMap()->Set(op1, fieldSeq); |
| 18624 | break; |
| 18625 | } |
| 18626 | } |
| 18627 | |
| 18628 | //------------------------------------------------------------------------ |
| 18629 | // fgMarkAddressExposedLocals: Traverses the entire method and marks address |
| 18630 | // exposed locals. |
| 18631 | // |
| 18632 | // Notes: |
| 18633 | // Trees such as IND(ADDR(LCL_VAR)), that morph is expected to fold |
| 18634 | // to just LCL_VAR, do not result in the involved local being marked |
| 18635 | // address exposed. |
| 18636 | // |
| 18637 | void Compiler::fgMarkAddressExposedLocals() |
| 18638 | { |
| 18639 | #ifdef DEBUG |
| 18640 | if (verbose) |
| 18641 | { |
| 18642 | printf("\n*************** In fgMarkAddressExposedLocals()\n" ); |
| 18643 | } |
| 18644 | #endif // DEBUG |
| 18645 | |
| 18646 | LocalAddressVisitor visitor(this); |
| 18647 | |
| 18648 | for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) |
| 18649 | { |
| 18650 | // Make the current basic block address available globally |
| 18651 | compCurBB = block; |
| 18652 | |
| 18653 | for (GenTree* stmt = block->bbTreeList; stmt != nullptr; stmt = stmt->gtNext) |
| 18654 | { |
| 18655 | visitor.VisitStmt(stmt->AsStmt()); |
| 18656 | } |
| 18657 | } |
| 18658 | } |
| 18659 | |
| 18660 | #ifdef FEATURE_SIMD |
| 18661 | |
| 18662 | //----------------------------------------------------------------------------------- |
| 18663 | // fgMorphCombineSIMDFieldAssignments: |
| 18664 | // If the RHS of the input stmt is a read for simd vector X Field, then this function |
| 18665 | // will keep reading next few stmts based on the vector size(2, 3, 4). |
| 18666 | // If the next stmts LHS are located contiguous and RHS are also located |
| 18667 | // contiguous, then we replace those statements with a copyblk. |
| 18668 | // |
| 18669 | // Argument: |
| 18670 | // block - BasicBlock*. block which stmt belongs to |
| 18671 | // stmt - GenTreeStmt*. the stmt node we want to check |
| 18672 | // |
| 18673 | // return value: |
| 18674 | // if this funciton successfully optimized the stmts, then return true. Otherwise |
| 18675 | // return false; |
| 18676 | |
| 18677 | bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* stmt) |
| 18678 | { |
| 18679 | |
| 18680 | noway_assert(stmt->gtOper == GT_STMT); |
| 18681 | GenTree* tree = stmt->gtStmt.gtStmtExpr; |
| 18682 | assert(tree->OperGet() == GT_ASG); |
| 18683 | |
| 18684 | GenTree* originalLHS = tree->gtOp.gtOp1; |
| 18685 | GenTree* prevLHS = tree->gtOp.gtOp1; |
| 18686 | GenTree* prevRHS = tree->gtOp.gtOp2; |
| 18687 | unsigned index = 0; |
| 18688 | var_types baseType = TYP_UNKNOWN; |
| 18689 | unsigned simdSize = 0; |
| 18690 | GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true); |
| 18691 | |
| 18692 | if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT) |
| 18693 | { |
| 18694 | // if the RHS is not from a SIMD vector field X, then there is no need to check further. |
| 18695 | return false; |
| 18696 | } |
| 18697 | |
| 18698 | var_types simdType = getSIMDTypeForSize(simdSize); |
| 18699 | int assignmentsCount = simdSize / genTypeSize(baseType) - 1; |
| 18700 | int remainingAssignments = assignmentsCount; |
| 18701 | GenTree* curStmt = stmt->gtNext; |
| 18702 | GenTree* lastStmt = stmt; |
| 18703 | |
| 18704 | while (curStmt != nullptr && remainingAssignments > 0) |
| 18705 | { |
| 18706 | GenTree* exp = curStmt->gtStmt.gtStmtExpr; |
| 18707 | if (exp->OperGet() != GT_ASG) |
| 18708 | { |
| 18709 | break; |
| 18710 | } |
| 18711 | GenTree* curLHS = exp->gtGetOp1(); |
| 18712 | GenTree* curRHS = exp->gtGetOp2(); |
| 18713 | |
| 18714 | if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS)) |
| 18715 | { |
| 18716 | break; |
| 18717 | } |
| 18718 | |
| 18719 | remainingAssignments--; |
| 18720 | prevLHS = curLHS; |
| 18721 | prevRHS = curRHS; |
| 18722 | |
| 18723 | lastStmt = curStmt; |
| 18724 | curStmt = curStmt->gtNext; |
| 18725 | } |
| 18726 | |
| 18727 | if (remainingAssignments > 0) |
| 18728 | { |
| 18729 | // if the left assignments number is bigger than zero, then this means |
| 18730 | // that the assignments are not assgining to the contiguously memory |
| 18731 | // locations from same vector. |
| 18732 | return false; |
| 18733 | } |
| 18734 | #ifdef DEBUG |
| 18735 | if (verbose) |
| 18736 | { |
| 18737 | printf("\nFound contiguous assignments from a SIMD vector to memory.\n" ); |
| 18738 | printf("From " FMT_BB ", stmt" , block->bbNum); |
| 18739 | printTreeID(stmt); |
| 18740 | printf(" to stmt" ); |
| 18741 | printTreeID(lastStmt); |
| 18742 | printf("\n" ); |
| 18743 | } |
| 18744 | #endif |
| 18745 | |
| 18746 | for (int i = 0; i < assignmentsCount; i++) |
| 18747 | { |
| 18748 | fgRemoveStmt(block, stmt->gtNext); |
| 18749 | } |
| 18750 | |
| 18751 | GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); |
| 18752 | if (simdStructNode->OperIsLocal()) |
| 18753 | { |
| 18754 | setLclRelatedToSIMDIntrinsic(simdStructNode); |
| 18755 | } |
| 18756 | GenTree* copyBlkAddr = copyBlkDst; |
| 18757 | if (copyBlkAddr->gtOper == GT_LEA) |
| 18758 | { |
| 18759 | copyBlkAddr = copyBlkAddr->AsAddrMode()->Base(); |
| 18760 | } |
| 18761 | GenTreeLclVarCommon* localDst = nullptr; |
| 18762 | if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr)) |
| 18763 | { |
| 18764 | setLclRelatedToSIMDIntrinsic(localDst); |
| 18765 | } |
| 18766 | |
| 18767 | if (simdStructNode->TypeGet() == TYP_BYREF) |
| 18768 | { |
| 18769 | assert(simdStructNode->OperIsLocal()); |
| 18770 | assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum)); |
| 18771 | simdStructNode = gtNewIndir(simdType, simdStructNode); |
| 18772 | } |
| 18773 | else |
| 18774 | { |
| 18775 | assert(varTypeIsSIMD(simdStructNode)); |
| 18776 | } |
| 18777 | |
| 18778 | #ifdef DEBUG |
| 18779 | if (verbose) |
| 18780 | { |
| 18781 | printf("\n" FMT_BB " stmt" , block->bbNum); |
| 18782 | printTreeID(stmt); |
| 18783 | printf("(before)\n" ); |
| 18784 | gtDispTree(stmt); |
| 18785 | } |
| 18786 | #endif |
| 18787 | |
| 18788 | // TODO-1stClassStructs: we should be able to simply use a GT_IND here. |
| 18789 | GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize); |
| 18790 | blkNode->gtType = simdType; |
| 18791 | tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize, |
| 18792 | false, // not volatile |
| 18793 | true); // copyBlock |
| 18794 | |
| 18795 | stmt->gtStmt.gtStmtExpr = tree; |
| 18796 | |
| 18797 | // Since we generated a new address node which didn't exist before, |
| 18798 | // we should expose this address manually here. |
| 18799 | LocalAddressVisitor visitor(this); |
| 18800 | visitor.VisitStmt(stmt->AsStmt()); |
| 18801 | |
| 18802 | #ifdef DEBUG |
| 18803 | if (verbose) |
| 18804 | { |
| 18805 | printf("\nReplaced " FMT_BB " stmt" , block->bbNum); |
| 18806 | printTreeID(stmt); |
| 18807 | printf("(after)\n" ); |
| 18808 | gtDispTree(stmt); |
| 18809 | } |
| 18810 | #endif |
| 18811 | return true; |
| 18812 | } |
| 18813 | |
| 18814 | #endif // FEATURE_SIMD |
| 18815 | |
| 18816 | #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) |
| 18817 | GenTreeStmt* SkipNopStmts(GenTreeStmt* stmt) |
| 18818 | { |
| 18819 | while ((stmt != nullptr) && !stmt->IsNothingNode()) |
| 18820 | { |
| 18821 | stmt = stmt->gtNextStmt; |
| 18822 | } |
| 18823 | return stmt; |
| 18824 | } |
| 18825 | |
| 18826 | #endif // !FEATURE_CORECLR && _TARGET_AMD64_ |
| 18827 | |
| 18828 | //------------------------------------------------------------------------ |
| 18829 | // fgCheckStmtAfterTailCall: check that statements after the tail call stmt |
| 18830 | // candidate are in one of expected forms, that are desctibed below. |
| 18831 | // |
| 18832 | // Return Value: |
| 18833 | // 'true' if stmts are in the expected form, else 'false'. |
| 18834 | // |
| 18835 | bool Compiler::fgCheckStmtAfterTailCall() |
| 18836 | { |
| 18837 | |
| 18838 | // For void calls, we would have created a GT_CALL in the stmt list. |
| 18839 | // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)). |
| 18840 | // For calls returning structs, we would have a void call, followed by a void return. |
| 18841 | // For debuggable code, it would be an assignment of the call to a temp |
| 18842 | // We want to get rid of any of this extra trees, and just leave |
| 18843 | // the call. |
| 18844 | GenTreeStmt* callStmt = fgMorphStmt; |
| 18845 | |
| 18846 | GenTreeStmt* nextMorphStmt = callStmt->gtNextStmt; |
| 18847 | |
| 18848 | #if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) |
| 18849 | // Legacy Jit64 Compat: |
| 18850 | // There could be any number of GT_NOPs between tail call and GT_RETURN. |
| 18851 | // That is tail call pattern could be one of the following: |
| 18852 | // 1) tail.call, nop*, ret |
| 18853 | // 2) tail.call, nop*, pop, nop*, ret |
| 18854 | // 3) var=tail.call, nop*, ret(var) |
| 18855 | // 4) var=tail.call, nop*, pop, ret |
| 18856 | // 5) comma(tail.call, nop), nop*, ret |
| 18857 | // |
| 18858 | // See impIsTailCallILPattern() for details on tail call IL patterns |
| 18859 | // that are supported. |
| 18860 | GenTree* callExpr = callStmt->gtStmtExpr; |
| 18861 | |
| 18862 | if (callExpr->gtOper != GT_RETURN) |
| 18863 | { |
| 18864 | // First skip all GT_NOPs after the call |
| 18865 | nextMorphStmt = SkipNopStmts(nextMorphStmt); |
| 18866 | |
| 18867 | // Check to see if there is a pop. |
| 18868 | // Since tail call is honored, we can get rid of the stmt corresponding to pop. |
| 18869 | if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN) |
| 18870 | { |
| 18871 | // Note that pop opcode may or may not result in a new stmt (for details see |
| 18872 | // impImportBlockCode()). Hence, it is not possible to assert about the IR |
| 18873 | // form generated by pop but pop tree must be side-effect free so that we can |
| 18874 | // delete it safely. |
| 18875 | GenTreeStmt* popStmt = nextMorphStmt; |
| 18876 | |
| 18877 | // Side effect flags on a GT_COMMA may be overly pessimistic, so examine |
| 18878 | // the constituent nodes. |
| 18879 | GenTree* popExpr = popStmt->gtStmtExpr; |
| 18880 | bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0; |
| 18881 | if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA)) |
| 18882 | { |
| 18883 | isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) && |
| 18884 | ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0); |
| 18885 | } |
| 18886 | noway_assert(isSideEffectFree); |
| 18887 | |
| 18888 | nextMorphStmt = popStmt->gtNextStmt; |
| 18889 | } |
| 18890 | |
| 18891 | // Next skip any GT_NOP nodes after the pop |
| 18892 | nextMorphStmt = SkipNopStmts(nextMorphStmt); |
| 18893 | } |
| 18894 | #endif // !FEATURE_CORECLR && _TARGET_AMD64_ |
| 18895 | |
| 18896 | // Check that the rest stmts in the block are in one of the following pattern: |
| 18897 | // 1) ret(void) |
| 18898 | // 2) ret(cast*(callResultLclVar)) |
| 18899 | // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block |
| 18900 | if (nextMorphStmt != nullptr) |
| 18901 | { |
| 18902 | GenTree* callExpr = callStmt->gtStmtExpr; |
| 18903 | if (callExpr->gtOper != GT_ASG) |
| 18904 | { |
| 18905 | // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar), |
| 18906 | // where lclVar was return buffer in the call for structs or simd. |
| 18907 | GenTreeStmt* retStmt = nextMorphStmt; |
| 18908 | GenTree* retExpr = retStmt->gtStmtExpr; |
| 18909 | noway_assert(retExpr->gtOper == GT_RETURN); |
| 18910 | |
| 18911 | nextMorphStmt = retStmt->gtNextStmt; |
| 18912 | } |
| 18913 | else |
| 18914 | { |
| 18915 | noway_assert(callExpr->gtGetOp1()->OperIsLocal()); |
| 18916 | unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum; |
| 18917 | |
| 18918 | #if FEATURE_TAILCALL_OPT_SHARED_RETURN |
| 18919 | |
| 18920 | // We can have a move from the call result to an lvaInlineeReturnSpillTemp. |
| 18921 | // However, we can't check that this assignment was created there. |
| 18922 | if (nextMorphStmt->gtStmtExpr->gtOper == GT_ASG) |
| 18923 | { |
| 18924 | GenTreeStmt* moveStmt = nextMorphStmt; |
| 18925 | GenTree* moveExpr = nextMorphStmt->gtStmtExpr; |
| 18926 | noway_assert(moveExpr->gtGetOp1()->OperIsLocal() && moveExpr->gtGetOp2()->OperIsLocal()); |
| 18927 | |
| 18928 | unsigned srcLclNum = moveExpr->gtGetOp2()->AsLclVarCommon()->gtLclNum; |
| 18929 | noway_assert(srcLclNum == callResultLclNumber); |
| 18930 | unsigned dstLclNum = moveExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum; |
| 18931 | callResultLclNumber = dstLclNum; |
| 18932 | |
| 18933 | nextMorphStmt = moveStmt->gtNextStmt; |
| 18934 | } |
| 18935 | if (nextMorphStmt != nullptr) |
| 18936 | #endif |
| 18937 | { |
| 18938 | GenTreeStmt* retStmt = nextMorphStmt; |
| 18939 | GenTree* retExpr = nextMorphStmt->gtStmtExpr; |
| 18940 | noway_assert(retExpr->gtOper == GT_RETURN); |
| 18941 | |
| 18942 | GenTree* treeWithLcl = retExpr->gtGetOp1(); |
| 18943 | while (treeWithLcl->gtOper == GT_CAST) |
| 18944 | { |
| 18945 | noway_assert(!treeWithLcl->gtOverflow()); |
| 18946 | treeWithLcl = treeWithLcl->gtGetOp1(); |
| 18947 | } |
| 18948 | |
| 18949 | noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->gtLclNum); |
| 18950 | |
| 18951 | nextMorphStmt = retStmt->gtNextStmt; |
| 18952 | } |
| 18953 | } |
| 18954 | } |
| 18955 | return nextMorphStmt == nullptr; |
| 18956 | } |
| 18957 | |
| 18958 | static const int numberOfTrackedFlags = 5; |
| 18959 | static const unsigned trackedFlags[numberOfTrackedFlags] = {GTF_ASG, GTF_CALL, GTF_EXCEPT, GTF_GLOB_REF, |
| 18960 | GTF_ORDER_SIDEEFF}; |
| 18961 | |
| 18962 | //------------------------------------------------------------------------ |
| 18963 | // fgMorphArgList: morph argument list tree without recursion. |
| 18964 | // |
| 18965 | // Arguments: |
| 18966 | // args - argument list tree to morph; |
| 18967 | // mac - morph address context, used to morph children. |
| 18968 | // |
| 18969 | // Return Value: |
| 18970 | // morphed argument list. |
| 18971 | // |
| 18972 | GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac) |
| 18973 | { |
| 18974 | // Use a non-recursive algorithm that morphs all actual list values, |
| 18975 | // memorizes the last node for each effect flag and resets |
| 18976 | // them during the second iteration. |
| 18977 | assert((trackedFlags[0] | trackedFlags[1] | trackedFlags[2] | trackedFlags[3] | trackedFlags[4]) == GTF_ALL_EFFECT); |
| 18978 | |
| 18979 | GenTree* memorizedLastNodes[numberOfTrackedFlags] = {nullptr}; |
| 18980 | |
| 18981 | for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) |
| 18982 | { |
| 18983 | // Morph actual list values. |
| 18984 | GenTree*& arg = listNode->Current(); |
| 18985 | arg = fgMorphTree(arg, mac); |
| 18986 | |
| 18987 | // Remember the last list node with each flag. |
| 18988 | for (int i = 0; i < numberOfTrackedFlags; ++i) |
| 18989 | { |
| 18990 | if ((arg->gtFlags & trackedFlags[i]) != 0) |
| 18991 | { |
| 18992 | memorizedLastNodes[i] = listNode; |
| 18993 | } |
| 18994 | } |
| 18995 | } |
| 18996 | |
| 18997 | for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) |
| 18998 | { |
| 18999 | // Clear all old effects from the list node. |
| 19000 | listNode->gtFlags &= ~GTF_ALL_EFFECT; |
| 19001 | |
| 19002 | // Spread each flag to all list nodes (to the prefix) before the memorized last node. |
| 19003 | for (int i = 0; i < numberOfTrackedFlags; ++i) |
| 19004 | { |
| 19005 | if (memorizedLastNodes[i] != nullptr) |
| 19006 | { |
| 19007 | listNode->gtFlags |= trackedFlags[i]; |
| 19008 | } |
| 19009 | if (listNode == memorizedLastNodes[i]) |
| 19010 | { |
| 19011 | memorizedLastNodes[i] = nullptr; |
| 19012 | } |
| 19013 | } |
| 19014 | } |
| 19015 | |
| 19016 | return args; |
| 19017 | } |
| 19018 | |