1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX RegAlloc XX |
9 | XX XX |
10 | XX Does the register allocation and puts the remaining lclVars on the stack XX |
11 | XX XX |
12 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
13 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
14 | */ |
15 | |
16 | #include "jitpch.h" |
17 | #ifdef _MSC_VER |
18 | #pragma hdrstop |
19 | #endif |
20 | #include "regalloc.h" |
21 | |
22 | #if DOUBLE_ALIGN |
23 | DWORD Compiler::getCanDoubleAlign() |
24 | { |
25 | #ifdef DEBUG |
26 | if (compStressCompile(STRESS_DBL_ALN, 20)) |
27 | return MUST_DOUBLE_ALIGN; |
28 | |
29 | return JitConfig.JitDoubleAlign(); |
30 | #else |
31 | return DEFAULT_DOUBLE_ALIGN; |
32 | #endif |
33 | } |
34 | |
35 | //------------------------------------------------------------------------ |
36 | // shouldDoubleAlign: Determine whether to double-align the frame |
37 | // |
38 | // Arguments: |
39 | // refCntStk - sum of ref counts for all stack based variables |
40 | // refCntEBP - sum of ref counts for EBP enregistered variables |
41 | // refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables |
42 | // refCntStkParam - sum of ref counts for all stack based parameters |
43 | // refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs |
44 | // with double fields). |
45 | // |
46 | // Return Value: |
47 | // Returns true if this method estimates that a double-aligned frame would be beneficial |
48 | // |
49 | // Notes: |
50 | // The impact of a double-aligned frame is computed as follows: |
51 | // - We save a byte of code for each parameter reference (they are frame-pointer relative) |
52 | // - We pay a byte of code for each non-parameter stack reference. |
53 | // - We save the misalignment penalty and possible cache-line crossing penalty. |
54 | // This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise. |
55 | // - We pay 7 extra bytes for: |
56 | // MOV EBP,ESP, |
57 | // LEA ESP,[EBP-offset] |
58 | // AND ESP,-8 to double align ESP |
59 | // - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP). |
60 | // |
61 | // If the misalignment penalty is estimated to be less than the bytes used, we don't double align. |
62 | // Otherwise, we compare the weighted ref count of ebp-enregistered variables against double the |
63 | // ref count for double-aligned values. |
64 | // |
65 | bool Compiler::shouldDoubleAlign( |
66 | unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl) |
67 | { |
68 | bool doDoubleAlign = false; |
69 | const unsigned DBL_ALIGN_SETUP_SIZE = 7; |
70 | |
71 | unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE; |
72 | unsigned misaligned_weight = 4; |
73 | |
74 | if (compCodeOpt() == Compiler::SMALL_CODE) |
75 | misaligned_weight = 0; |
76 | |
77 | if (compCodeOpt() == Compiler::FAST_CODE) |
78 | misaligned_weight *= 4; |
79 | |
80 | JITDUMP("\nDouble alignment:\n" ); |
81 | JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n" , bytesUsed); |
82 | JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n" , refCntWtdEBP); |
83 | JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n" , refCntWtdStkDbl); |
84 | |
85 | if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT)) |
86 | { |
87 | JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n" , bytesUsed); |
88 | } |
89 | else if (refCntWtdEBP > refCntWtdStkDbl * 2) |
90 | { |
91 | // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is |
92 | // not double aligned. |
93 | // Here are the numbers that make this not double-aligned. |
94 | // refCntWtdStkDbl = 0x164 |
95 | // refCntWtdEBP = 0x1a4 |
96 | // We think we do need to change the heuristic to be in favor of double-align. |
97 | |
98 | JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n" ); |
99 | } |
100 | else |
101 | { |
102 | // OK we passed all of the benefit tests, so we'll predict a double aligned frame. |
103 | JITDUMP(" Predicting to create a double-aligned frame\n" ); |
104 | doDoubleAlign = true; |
105 | } |
106 | return doDoubleAlign; |
107 | } |
108 | #endif // DOUBLE_ALIGN |
109 | |
110 | // The code to set the regState for each arg is outlined for shared use |
111 | // by linear scan. (It is not shared for System V AMD64 platform.) |
112 | regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc) |
113 | { |
114 | regNumber inArgReg = argDsc->lvArgReg; |
115 | regMaskTP inArgMask = genRegMask(inArgReg); |
116 | |
117 | if (regState->rsIsFloat) |
118 | { |
119 | noway_assert(inArgMask & RBM_FLTARG_REGS); |
120 | } |
121 | else // regState is for the integer registers |
122 | { |
123 | // This might be the fixed return buffer register argument (on ARM64) |
124 | // We check and allow inArgReg to be theFixedRetBuffReg |
125 | if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg())) |
126 | { |
127 | // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg |
128 | noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL); |
129 | // We should have recorded the variable number for the return buffer arg |
130 | noway_assert(info.compRetBuffArg != BAD_VAR_NUM); |
131 | } |
132 | else // we have a regular arg |
133 | { |
134 | noway_assert(inArgMask & RBM_ARG_REGS); |
135 | } |
136 | } |
137 | |
138 | regState->rsCalleeRegArgMaskLiveIn |= inArgMask; |
139 | |
140 | #ifdef _TARGET_ARM_ |
141 | if (argDsc->lvType == TYP_DOUBLE) |
142 | { |
143 | if (info.compIsVarArgs || opts.compUseSoftFP) |
144 | { |
145 | assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); |
146 | assert(!regState->rsIsFloat); |
147 | } |
148 | else |
149 | { |
150 | assert(regState->rsIsFloat); |
151 | assert(emitter::isDoubleReg(inArgReg)); |
152 | } |
153 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); |
154 | } |
155 | else if (argDsc->lvType == TYP_LONG) |
156 | { |
157 | assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); |
158 | assert(!regState->rsIsFloat); |
159 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); |
160 | } |
161 | #endif // _TARGET_ARM_ |
162 | |
163 | #if FEATURE_MULTIREG_ARGS |
164 | if (varTypeIsStruct(argDsc->lvType)) |
165 | { |
166 | if (argDsc->lvIsHfaRegArg()) |
167 | { |
168 | assert(regState->rsIsFloat); |
169 | unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass()); |
170 | for (unsigned i = 1; i < cSlots; i++) |
171 | { |
172 | assert(inArgReg + i <= LAST_FP_ARGREG); |
173 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i)); |
174 | } |
175 | } |
176 | else |
177 | { |
178 | unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE; |
179 | for (unsigned i = 1; i < cSlots; i++) |
180 | { |
181 | regNumber nextArgReg = (regNumber)(inArgReg + i); |
182 | if (nextArgReg > REG_ARG_LAST) |
183 | { |
184 | break; |
185 | } |
186 | assert(regState->rsIsFloat == false); |
187 | regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); |
188 | } |
189 | } |
190 | } |
191 | #endif // FEATURE_MULTIREG_ARGS |
192 | |
193 | return inArgReg; |
194 | } |
195 | |
196 | /****************************************************************************/ |
197 | /* Returns true when we must create an EBP frame |
198 | This is used to force most managed methods to have EBP based frames |
199 | which allows the ETW kernel stackwalker to walk the stacks of managed code |
200 | this allows the kernel to perform light weight profiling |
201 | */ |
202 | bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason)) |
203 | { |
204 | bool result = false; |
205 | #ifdef DEBUG |
206 | const char* reason = nullptr; |
207 | #endif |
208 | |
209 | #if ETW_EBP_FRAMED |
210 | if (!result && opts.OptimizationDisabled()) |
211 | { |
212 | INDEBUG(reason = "Debug Code" ); |
213 | result = true; |
214 | } |
215 | if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE)) |
216 | { |
217 | INDEBUG(reason = "IL Code Size" ); |
218 | result = true; |
219 | } |
220 | if (!result && (fgBBcount > 3)) |
221 | { |
222 | INDEBUG(reason = "BasicBlock Count" ); |
223 | result = true; |
224 | } |
225 | if (!result && fgHasLoops) |
226 | { |
227 | INDEBUG(reason = "Method has Loops" ); |
228 | result = true; |
229 | } |
230 | if (!result && (optCallCount >= 2)) |
231 | { |
232 | INDEBUG(reason = "Call Count" ); |
233 | result = true; |
234 | } |
235 | if (!result && (optIndirectCallCount >= 1)) |
236 | { |
237 | INDEBUG(reason = "Indirect Call" ); |
238 | result = true; |
239 | } |
240 | #endif // ETW_EBP_FRAMED |
241 | |
242 | // VM wants to identify the containing frame of an InlinedCallFrame always |
243 | // via the frame register never the stack register so we need a frame. |
244 | if (!result && (optNativeCallCount != 0)) |
245 | { |
246 | INDEBUG(reason = "Uses PInvoke" ); |
247 | result = true; |
248 | } |
249 | |
250 | #ifdef _TARGET_ARM64_ |
251 | // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame |
252 | // pointer frames. |
253 | if (!result) |
254 | { |
255 | INDEBUG(reason = "Temporary ARM64 force frame pointer" ); |
256 | result = true; |
257 | } |
258 | #endif // _TARGET_ARM64_ |
259 | |
260 | #ifdef DEBUG |
261 | if ((result == true) && (wbReason != nullptr)) |
262 | { |
263 | *wbReason = reason; |
264 | } |
265 | #endif |
266 | |
267 | return result; |
268 | } |
269 | |
270 | /***************************************************************************** |
271 | * |
272 | * Mark all variables as to whether they live on the stack frame |
273 | * (part or whole), and if so what the base is (FP or SP). |
274 | */ |
275 | |
276 | void Compiler::raMarkStkVars() |
277 | { |
278 | unsigned lclNum; |
279 | LclVarDsc* varDsc; |
280 | |
281 | for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) |
282 | { |
283 | // lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below. |
284 | |
285 | if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) |
286 | { |
287 | noway_assert(!varDsc->lvRegister); |
288 | goto ON_STK; |
289 | } |
290 | |
291 | /* Fully enregistered variables don't need any frame space */ |
292 | |
293 | if (varDsc->lvRegister) |
294 | { |
295 | goto NOT_STK; |
296 | } |
297 | /* Unused variables typically don't get any frame space */ |
298 | else if (varDsc->lvRefCnt() == 0) |
299 | { |
300 | bool needSlot = false; |
301 | |
302 | bool stkFixedArgInVarArgs = |
303 | info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg; |
304 | |
305 | // If its address has been exposed, ignore lvRefCnt. However, exclude |
306 | // fixed arguments in varargs method as lvOnFrame shouldn't be set |
307 | // for them as we don't want to explicitly report them to GC. |
308 | |
309 | if (!stkFixedArgInVarArgs) |
310 | { |
311 | needSlot |= varDsc->lvAddrExposed; |
312 | } |
313 | |
314 | #if FEATURE_FIXED_OUT_ARGS |
315 | |
316 | /* Is this the dummy variable representing GT_LCLBLK ? */ |
317 | needSlot |= (lclNum == lvaOutgoingArgSpaceVar); |
318 | |
319 | #endif // FEATURE_FIXED_OUT_ARGS |
320 | |
321 | #ifdef DEBUG |
322 | /* For debugging, note that we have to reserve space even for |
323 | unused variables if they are ever in scope. However, this is not |
324 | an issue as fgExtendDbgLifetimes() adds an initialization and |
325 | variables in scope will not have a zero ref-cnt. |
326 | */ |
327 | if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked) |
328 | { |
329 | for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++) |
330 | { |
331 | noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum); |
332 | } |
333 | } |
334 | #endif |
335 | /* |
336 | For Debug Code, we have to reserve space even if the variable is never |
337 | in scope. We will also need to initialize it if it is a GC var. |
338 | So we set lvMustInit and verify it has a nonzero ref-cnt. |
339 | */ |
340 | |
341 | if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount) |
342 | { |
343 | if (varDsc->lvRefCnt() == 0) |
344 | { |
345 | assert(!"unreferenced local in debug codegen" ); |
346 | varDsc->lvImplicitlyReferenced = 1; |
347 | } |
348 | |
349 | needSlot |= true; |
350 | |
351 | if (!varDsc->lvIsParam) |
352 | { |
353 | varDsc->lvMustInit = true; |
354 | } |
355 | } |
356 | |
357 | varDsc->lvOnFrame = needSlot; |
358 | if (!needSlot) |
359 | { |
360 | /* Clear the lvMustInit flag in case it is set */ |
361 | varDsc->lvMustInit = false; |
362 | |
363 | goto NOT_STK; |
364 | } |
365 | } |
366 | |
367 | if (!varDsc->lvOnFrame) |
368 | { |
369 | goto NOT_STK; |
370 | } |
371 | |
372 | ON_STK: |
373 | /* The variable (or part of it) lives on the stack frame */ |
374 | |
375 | noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN)); |
376 | #if FEATURE_FIXED_OUT_ARGS |
377 | noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0); |
378 | #else // FEATURE_FIXED_OUT_ARGS |
379 | noway_assert(lvaLclSize(lclNum) != 0); |
380 | #endif // FEATURE_FIXED_OUT_ARGS |
381 | |
382 | varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the |
383 | // stack frame |
384 | |
385 | NOT_STK:; |
386 | varDsc->lvFramePointerBased = codeGen->isFramePointerUsed(); |
387 | |
388 | #if DOUBLE_ALIGN |
389 | |
390 | if (codeGen->doDoubleAlign()) |
391 | { |
392 | noway_assert(codeGen->isFramePointerUsed() == false); |
393 | |
394 | /* All arguments are off of EBP with double-aligned frames */ |
395 | |
396 | if (varDsc->lvIsParam && !varDsc->lvIsRegArg) |
397 | { |
398 | varDsc->lvFramePointerBased = true; |
399 | } |
400 | } |
401 | |
402 | #endif |
403 | |
404 | /* Some basic checks */ |
405 | |
406 | // It must be in a register, on frame, or have zero references. |
407 | |
408 | noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt() == 0); |
409 | |
410 | // We can't have both lvRegister and lvOnFrame |
411 | noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame); |
412 | |
413 | #ifdef DEBUG |
414 | |
415 | // For varargs functions, there should be no direct references to |
416 | // parameter variables except for 'this' (because these were morphed |
417 | // in the importer) and the 'arglist' parameter (which is not a GC |
418 | // pointer). and the return buffer argument (if we are returning a |
419 | // struct). |
420 | // This is important because we don't want to try to report them |
421 | // to the GC, as the frame offsets in these local varables would |
422 | // not be correct. |
423 | |
424 | if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum)) |
425 | { |
426 | if (!varDsc->lvPromoted && !varDsc->lvIsStructField) |
427 | { |
428 | noway_assert(varDsc->lvRefCnt() == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame); |
429 | } |
430 | } |
431 | #endif |
432 | } |
433 | } |
434 | |