| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 7 | XX XX |
| 8 | XX RegAlloc XX |
| 9 | XX XX |
| 10 | XX Does the register allocation and puts the remaining lclVars on the stack XX |
| 11 | XX XX |
| 12 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 13 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 14 | */ |
| 15 | |
| 16 | #include "jitpch.h" |
| 17 | #ifdef _MSC_VER |
| 18 | #pragma hdrstop |
| 19 | #endif |
| 20 | #include "regalloc.h" |
| 21 | |
| 22 | #if DOUBLE_ALIGN |
| 23 | DWORD Compiler::getCanDoubleAlign() |
| 24 | { |
| 25 | #ifdef DEBUG |
| 26 | if (compStressCompile(STRESS_DBL_ALN, 20)) |
| 27 | return MUST_DOUBLE_ALIGN; |
| 28 | |
| 29 | return JitConfig.JitDoubleAlign(); |
| 30 | #else |
| 31 | return DEFAULT_DOUBLE_ALIGN; |
| 32 | #endif |
| 33 | } |
| 34 | |
| 35 | //------------------------------------------------------------------------ |
| 36 | // shouldDoubleAlign: Determine whether to double-align the frame |
| 37 | // |
| 38 | // Arguments: |
| 39 | // refCntStk - sum of ref counts for all stack based variables |
| 40 | // refCntEBP - sum of ref counts for EBP enregistered variables |
| 41 | // refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables |
| 42 | // refCntStkParam - sum of ref counts for all stack based parameters |
| 43 | // refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs |
| 44 | // with double fields). |
| 45 | // |
| 46 | // Return Value: |
| 47 | // Returns true if this method estimates that a double-aligned frame would be beneficial |
| 48 | // |
| 49 | // Notes: |
| 50 | // The impact of a double-aligned frame is computed as follows: |
| 51 | // - We save a byte of code for each parameter reference (they are frame-pointer relative) |
| 52 | // - We pay a byte of code for each non-parameter stack reference. |
| 53 | // - We save the misalignment penalty and possible cache-line crossing penalty. |
| 54 | // This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise. |
| 55 | // - We pay 7 extra bytes for: |
| 56 | // MOV EBP,ESP, |
| 57 | // LEA ESP,[EBP-offset] |
| 58 | // AND ESP,-8 to double align ESP |
| 59 | // - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP). |
| 60 | // |
| 61 | // If the misalignment penalty is estimated to be less than the bytes used, we don't double align. |
| 62 | // Otherwise, we compare the weighted ref count of ebp-enregistered variables against double the |
| 63 | // ref count for double-aligned values. |
| 64 | // |
| 65 | bool Compiler::shouldDoubleAlign( |
| 66 | unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl) |
| 67 | { |
| 68 | bool doDoubleAlign = false; |
| 69 | const unsigned DBL_ALIGN_SETUP_SIZE = 7; |
| 70 | |
| 71 | unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE; |
| 72 | unsigned misaligned_weight = 4; |
| 73 | |
| 74 | if (compCodeOpt() == Compiler::SMALL_CODE) |
| 75 | misaligned_weight = 0; |
| 76 | |
| 77 | if (compCodeOpt() == Compiler::FAST_CODE) |
| 78 | misaligned_weight *= 4; |
| 79 | |
| 80 | JITDUMP("\nDouble alignment:\n" ); |
| 81 | JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n" , bytesUsed); |
| 82 | JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n" , refCntWtdEBP); |
| 83 | JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n" , refCntWtdStkDbl); |
| 84 | |
| 85 | if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT)) |
| 86 | { |
| 87 | JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n" , bytesUsed); |
| 88 | } |
| 89 | else if (refCntWtdEBP > refCntWtdStkDbl * 2) |
| 90 | { |
| 91 | // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is |
| 92 | // not double aligned. |
| 93 | // Here are the numbers that make this not double-aligned. |
| 94 | // refCntWtdStkDbl = 0x164 |
| 95 | // refCntWtdEBP = 0x1a4 |
| 96 | // We think we do need to change the heuristic to be in favor of double-align. |
| 97 | |
| 98 | JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n" ); |
| 99 | } |
| 100 | else |
| 101 | { |
| 102 | // OK we passed all of the benefit tests, so we'll predict a double aligned frame. |
| 103 | JITDUMP(" Predicting to create a double-aligned frame\n" ); |
| 104 | doDoubleAlign = true; |
| 105 | } |
| 106 | return doDoubleAlign; |
| 107 | } |
| 108 | #endif // DOUBLE_ALIGN |
| 109 | |
| 110 | // The code to set the regState for each arg is outlined for shared use |
| 111 | // by linear scan. (It is not shared for System V AMD64 platform.) |
| 112 | regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc) |
| 113 | { |
| 114 | regNumber inArgReg = argDsc->lvArgReg; |
| 115 | regMaskTP inArgMask = genRegMask(inArgReg); |
| 116 | |
| 117 | if (regState->rsIsFloat) |
| 118 | { |
| 119 | noway_assert(inArgMask & RBM_FLTARG_REGS); |
| 120 | } |
| 121 | else // regState is for the integer registers |
| 122 | { |
| 123 | // This might be the fixed return buffer register argument (on ARM64) |
| 124 | // We check and allow inArgReg to be theFixedRetBuffReg |
| 125 | if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg())) |
| 126 | { |
| 127 | // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg |
| 128 | noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL); |
| 129 | // We should have recorded the variable number for the return buffer arg |
| 130 | noway_assert(info.compRetBuffArg != BAD_VAR_NUM); |
| 131 | } |
| 132 | else // we have a regular arg |
| 133 | { |
| 134 | noway_assert(inArgMask & RBM_ARG_REGS); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | regState->rsCalleeRegArgMaskLiveIn |= inArgMask; |
| 139 | |
| 140 | #ifdef _TARGET_ARM_ |
| 141 | if (argDsc->lvType == TYP_DOUBLE) |
| 142 | { |
| 143 | if (info.compIsVarArgs || opts.compUseSoftFP) |
| 144 | { |
| 145 | assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); |
| 146 | assert(!regState->rsIsFloat); |
| 147 | } |
| 148 | else |
| 149 | { |
| 150 | assert(regState->rsIsFloat); |
| 151 | assert(emitter::isDoubleReg(inArgReg)); |
| 152 | } |
| 153 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); |
| 154 | } |
| 155 | else if (argDsc->lvType == TYP_LONG) |
| 156 | { |
| 157 | assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); |
| 158 | assert(!regState->rsIsFloat); |
| 159 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); |
| 160 | } |
| 161 | #endif // _TARGET_ARM_ |
| 162 | |
| 163 | #if FEATURE_MULTIREG_ARGS |
| 164 | if (varTypeIsStruct(argDsc->lvType)) |
| 165 | { |
| 166 | if (argDsc->lvIsHfaRegArg()) |
| 167 | { |
| 168 | assert(regState->rsIsFloat); |
| 169 | unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass()); |
| 170 | for (unsigned i = 1; i < cSlots; i++) |
| 171 | { |
| 172 | assert(inArgReg + i <= LAST_FP_ARGREG); |
| 173 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i)); |
| 174 | } |
| 175 | } |
| 176 | else |
| 177 | { |
| 178 | unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE; |
| 179 | for (unsigned i = 1; i < cSlots; i++) |
| 180 | { |
| 181 | regNumber nextArgReg = (regNumber)(inArgReg + i); |
| 182 | if (nextArgReg > REG_ARG_LAST) |
| 183 | { |
| 184 | break; |
| 185 | } |
| 186 | assert(regState->rsIsFloat == false); |
| 187 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); |
| 188 | } |
| 189 | } |
| 190 | } |
| 191 | #endif // FEATURE_MULTIREG_ARGS |
| 192 | |
| 193 | return inArgReg; |
| 194 | } |
| 195 | |
| 196 | /****************************************************************************/ |
| 197 | /* Returns true when we must create an EBP frame |
| 198 | This is used to force most managed methods to have EBP based frames |
| 199 | which allows the ETW kernel stackwalker to walk the stacks of managed code |
| 200 | this allows the kernel to perform light weight profiling |
| 201 | */ |
| 202 | bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason)) |
| 203 | { |
| 204 | bool result = false; |
| 205 | #ifdef DEBUG |
| 206 | const char* reason = nullptr; |
| 207 | #endif |
| 208 | |
| 209 | #if ETW_EBP_FRAMED |
| 210 | if (!result && opts.OptimizationDisabled()) |
| 211 | { |
| 212 | INDEBUG(reason = "Debug Code" ); |
| 213 | result = true; |
| 214 | } |
| 215 | if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE)) |
| 216 | { |
| 217 | INDEBUG(reason = "IL Code Size" ); |
| 218 | result = true; |
| 219 | } |
| 220 | if (!result && (fgBBcount > 3)) |
| 221 | { |
| 222 | INDEBUG(reason = "BasicBlock Count" ); |
| 223 | result = true; |
| 224 | } |
| 225 | if (!result && fgHasLoops) |
| 226 | { |
| 227 | INDEBUG(reason = "Method has Loops" ); |
| 228 | result = true; |
| 229 | } |
| 230 | if (!result && (optCallCount >= 2)) |
| 231 | { |
| 232 | INDEBUG(reason = "Call Count" ); |
| 233 | result = true; |
| 234 | } |
| 235 | if (!result && (optIndirectCallCount >= 1)) |
| 236 | { |
| 237 | INDEBUG(reason = "Indirect Call" ); |
| 238 | result = true; |
| 239 | } |
| 240 | #endif // ETW_EBP_FRAMED |
| 241 | |
| 242 | // VM wants to identify the containing frame of an InlinedCallFrame always |
| 243 | // via the frame register never the stack register so we need a frame. |
| 244 | if (!result && (optNativeCallCount != 0)) |
| 245 | { |
| 246 | INDEBUG(reason = "Uses PInvoke" ); |
| 247 | result = true; |
| 248 | } |
| 249 | |
| 250 | #ifdef _TARGET_ARM64_ |
| 251 | // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame |
| 252 | // pointer frames. |
| 253 | if (!result) |
| 254 | { |
| 255 | INDEBUG(reason = "Temporary ARM64 force frame pointer" ); |
| 256 | result = true; |
| 257 | } |
| 258 | #endif // _TARGET_ARM64_ |
| 259 | |
| 260 | #ifdef DEBUG |
| 261 | if ((result == true) && (wbReason != nullptr)) |
| 262 | { |
| 263 | *wbReason = reason; |
| 264 | } |
| 265 | #endif |
| 266 | |
| 267 | return result; |
| 268 | } |
| 269 | |
| 270 | /***************************************************************************** |
| 271 | * |
| 272 | * Mark all variables as to whether they live on the stack frame |
| 273 | * (part or whole), and if so what the base is (FP or SP). |
| 274 | */ |
| 275 | |
| 276 | void Compiler::raMarkStkVars() |
| 277 | { |
| 278 | unsigned lclNum; |
| 279 | LclVarDsc* varDsc; |
| 280 | |
| 281 | for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) |
| 282 | { |
| 283 | // lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below. |
| 284 | |
| 285 | if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) |
| 286 | { |
| 287 | noway_assert(!varDsc->lvRegister); |
| 288 | goto ON_STK; |
| 289 | } |
| 290 | |
| 291 | /* Fully enregistered variables don't need any frame space */ |
| 292 | |
| 293 | if (varDsc->lvRegister) |
| 294 | { |
| 295 | goto NOT_STK; |
| 296 | } |
| 297 | /* Unused variables typically don't get any frame space */ |
| 298 | else if (varDsc->lvRefCnt() == 0) |
| 299 | { |
| 300 | bool needSlot = false; |
| 301 | |
| 302 | bool stkFixedArgInVarArgs = |
| 303 | info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg; |
| 304 | |
| 305 | // If its address has been exposed, ignore lvRefCnt. However, exclude |
| 306 | // fixed arguments in varargs method as lvOnFrame shouldn't be set |
| 307 | // for them as we don't want to explicitly report them to GC. |
| 308 | |
| 309 | if (!stkFixedArgInVarArgs) |
| 310 | { |
| 311 | needSlot |= varDsc->lvAddrExposed; |
| 312 | } |
| 313 | |
| 314 | #if FEATURE_FIXED_OUT_ARGS |
| 315 | |
| 316 | /* Is this the dummy variable representing GT_LCLBLK ? */ |
| 317 | needSlot |= (lclNum == lvaOutgoingArgSpaceVar); |
| 318 | |
| 319 | #endif // FEATURE_FIXED_OUT_ARGS |
| 320 | |
| 321 | #ifdef DEBUG |
| 322 | /* For debugging, note that we have to reserve space even for |
| 323 | unused variables if they are ever in scope. However, this is not |
| 324 | an issue as fgExtendDbgLifetimes() adds an initialization and |
| 325 | variables in scope will not have a zero ref-cnt. |
| 326 | */ |
| 327 | if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked) |
| 328 | { |
| 329 | for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++) |
| 330 | { |
| 331 | noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum); |
| 332 | } |
| 333 | } |
| 334 | #endif |
| 335 | /* |
| 336 | For Debug Code, we have to reserve space even if the variable is never |
| 337 | in scope. We will also need to initialize it if it is a GC var. |
| 338 | So we set lvMustInit and verify it has a nonzero ref-cnt. |
| 339 | */ |
| 340 | |
| 341 | if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount) |
| 342 | { |
| 343 | if (varDsc->lvRefCnt() == 0) |
| 344 | { |
| 345 | assert(!"unreferenced local in debug codegen" ); |
| 346 | varDsc->lvImplicitlyReferenced = 1; |
| 347 | } |
| 348 | |
| 349 | needSlot |= true; |
| 350 | |
| 351 | if (!varDsc->lvIsParam) |
| 352 | { |
| 353 | varDsc->lvMustInit = true; |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | varDsc->lvOnFrame = needSlot; |
| 358 | if (!needSlot) |
| 359 | { |
| 360 | /* Clear the lvMustInit flag in case it is set */ |
| 361 | varDsc->lvMustInit = false; |
| 362 | |
| 363 | goto NOT_STK; |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | if (!varDsc->lvOnFrame) |
| 368 | { |
| 369 | goto NOT_STK; |
| 370 | } |
| 371 | |
| 372 | ON_STK: |
| 373 | /* The variable (or part of it) lives on the stack frame */ |
| 374 | |
| 375 | noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN)); |
| 376 | #if FEATURE_FIXED_OUT_ARGS |
| 377 | noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0); |
| 378 | #else // FEATURE_FIXED_OUT_ARGS |
| 379 | noway_assert(lvaLclSize(lclNum) != 0); |
| 380 | #endif // FEATURE_FIXED_OUT_ARGS |
| 381 | |
| 382 | varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the |
| 383 | // stack frame |
| 384 | |
| 385 | NOT_STK:; |
| 386 | varDsc->lvFramePointerBased = codeGen->isFramePointerUsed(); |
| 387 | |
| 388 | #if DOUBLE_ALIGN |
| 389 | |
| 390 | if (codeGen->doDoubleAlign()) |
| 391 | { |
| 392 | noway_assert(codeGen->isFramePointerUsed() == false); |
| 393 | |
| 394 | /* All arguments are off of EBP with double-aligned frames */ |
| 395 | |
| 396 | if (varDsc->lvIsParam && !varDsc->lvIsRegArg) |
| 397 | { |
| 398 | varDsc->lvFramePointerBased = true; |
| 399 | } |
| 400 | } |
| 401 | |
| 402 | #endif |
| 403 | |
| 404 | /* Some basic checks */ |
| 405 | |
| 406 | // It must be in a register, on frame, or have zero references. |
| 407 | |
| 408 | noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt() == 0); |
| 409 | |
| 410 | // We can't have both lvRegister and lvOnFrame |
| 411 | noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame); |
| 412 | |
| 413 | #ifdef DEBUG |
| 414 | |
| 415 | // For varargs functions, there should be no direct references to |
| 416 | // parameter variables except for 'this' (because these were morphed |
| 417 | // in the importer) and the 'arglist' parameter (which is not a GC |
| 418 | // pointer). and the return buffer argument (if we are returning a |
| 419 | // struct). |
| 420 | // This is important because we don't want to try to report them |
| 421 | // to the GC, as the frame offsets in these local varables would |
| 422 | // not be correct. |
| 423 | |
| 424 | if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum)) |
| 425 | { |
| 426 | if (!varDsc->lvPromoted && !varDsc->lvIsStructField) |
| 427 | { |
| 428 | noway_assert(varDsc->lvRefCnt() == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame); |
| 429 | } |
| 430 | } |
| 431 | #endif |
| 432 | } |
| 433 | } |
| 434 | |