1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Code Generator Common: XX |
9 | XX Methods common to all architectures and register allocation strategies XX |
10 | XX XX |
11 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
12 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
13 | */ |
14 | |
15 | // TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost |
16 | // identical, and which should probably be moved here. |
17 | |
18 | #include "jitpch.h" |
19 | #ifdef _MSC_VER |
20 | #pragma hdrstop |
21 | #endif |
22 | #include "codegen.h" |
23 | |
24 | #include "gcinfo.h" |
25 | #include "emit.h" |
26 | |
27 | #ifndef JIT32_GCENCODER |
28 | #include "gcinfoencoder.h" |
29 | #endif |
30 | |
31 | /*****************************************************************************/ |
32 | |
33 | const BYTE genTypeSizes[] = { |
34 | #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz, |
35 | #include "typelist.h" |
36 | #undef DEF_TP |
37 | }; |
38 | |
39 | const BYTE genTypeAlignments[] = { |
40 | #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al, |
41 | #include "typelist.h" |
42 | #undef DEF_TP |
43 | }; |
44 | |
45 | const BYTE genTypeStSzs[] = { |
46 | #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st, |
47 | #include "typelist.h" |
48 | #undef DEF_TP |
49 | }; |
50 | |
51 | const BYTE genActualTypes[] = { |
52 | #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType, |
53 | #include "typelist.h" |
54 | #undef DEF_TP |
55 | }; |
56 | |
57 | void CodeGenInterface::setFramePointerRequiredEH(bool value) |
58 | { |
59 | m_cgFramePointerRequired = value; |
60 | |
61 | #ifndef JIT32_GCENCODER |
62 | if (value) |
63 | { |
64 | // EnumGcRefs will only enumerate slots in aborted frames |
65 | // if they are fully-interruptible. So if we have a catch |
66 | // or finally that will keep frame-vars alive, we need to |
67 | // force fully-interruptible. |
68 | CLANG_FORMAT_COMMENT_ANCHOR; |
69 | |
70 | #ifdef DEBUG |
71 | if (verbose) |
72 | { |
73 | printf("Method has EH, marking method as fully interruptible\n" ); |
74 | } |
75 | #endif |
76 | |
77 | m_cgInterruptible = true; |
78 | } |
79 | #endif // JIT32_GCENCODER |
80 | } |
81 | |
82 | /*****************************************************************************/ |
83 | CodeGenInterface* getCodeGenerator(Compiler* comp) |
84 | { |
85 | return new (comp, CMK_Codegen) CodeGen(comp); |
86 | } |
87 | |
88 | // CodeGen constructor |
89 | CodeGenInterface::CodeGenInterface(Compiler* theCompiler) |
90 | : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler), treeLifeUpdater(nullptr) |
91 | { |
92 | } |
93 | |
94 | /*****************************************************************************/ |
95 | |
96 | CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) |
97 | { |
98 | #if defined(_TARGET_XARCH_) |
99 | negBitmaskFlt = nullptr; |
100 | negBitmaskDbl = nullptr; |
101 | absBitmaskFlt = nullptr; |
102 | absBitmaskDbl = nullptr; |
103 | u8ToDblBitmask = nullptr; |
104 | #endif // defined(_TARGET_XARCH_) |
105 | |
106 | #if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_) |
107 | m_stkArgVarNum = BAD_VAR_NUM; |
108 | #endif |
109 | |
110 | #if defined(UNIX_X86_ABI) |
111 | curNestedAlignment = 0; |
112 | maxNestedAlignment = 0; |
113 | #endif |
114 | |
115 | gcInfo.regSet = ®Set; |
116 | m_cgEmitter = new (compiler->getAllocator()) emitter(); |
117 | m_cgEmitter->codeGen = this; |
118 | m_cgEmitter->gcInfo = &gcInfo; |
119 | |
120 | #ifdef DEBUG |
121 | setVerbose(compiler->verbose); |
122 | #endif // DEBUG |
123 | |
124 | regSet.tmpInit(); |
125 | |
126 | instInit(); |
127 | |
128 | #ifdef LATE_DISASM |
129 | getDisAssembler().disInit(compiler); |
130 | #endif |
131 | |
132 | #ifdef DEBUG |
133 | genTempLiveChg = true; |
134 | genTrnslLocalVarCount = 0; |
135 | |
136 | // Shouldn't be used before it is set in genFnProlog() |
137 | compiler->compCalleeRegsPushed = UninitializedWord<unsigned>(compiler); |
138 | |
139 | #if defined(_TARGET_XARCH_) |
140 | // Shouldn't be used before it is set in genFnProlog() |
141 | compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1; |
142 | #endif // defined(_TARGET_XARCH_) |
143 | #endif // DEBUG |
144 | |
145 | #ifdef _TARGET_AMD64_ |
146 | // This will be set before final frame layout. |
147 | compiler->compVSQuirkStackPaddingNeeded = 0; |
148 | |
149 | // Set to true if we perform the Quirk that fixes the PPP issue |
150 | compiler->compQuirkForPPPflag = false; |
151 | #endif // _TARGET_AMD64_ |
152 | |
153 | // Initialize the IP-mapping logic. |
154 | compiler->genIPmappingList = nullptr; |
155 | compiler->genIPmappingLast = nullptr; |
156 | compiler->genCallSite2ILOffsetMap = nullptr; |
157 | |
158 | /* Assume that we not fully interruptible */ |
159 | |
160 | genInterruptible = false; |
161 | #ifdef _TARGET_ARMARCH_ |
162 | hasTailCalls = false; |
163 | #endif // _TARGET_ARMARCH_ |
164 | #ifdef DEBUG |
165 | genInterruptibleUsed = false; |
166 | genCurDispOffset = (unsigned)-1; |
167 | #endif |
168 | } |
169 | |
170 | void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg) |
171 | { |
172 | tree->gtRegNum = reg; |
173 | } |
174 | |
175 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
176 | |
177 | //--------------------------------------------------------------------- |
178 | // genTotalFrameSize - return the "total" size of the stack frame, including local size |
179 | // and callee-saved register size. There are a few things "missing" depending on the |
180 | // platform. The function genCallerSPtoInitialSPdelta() includes those things. |
181 | // |
182 | // For ARM, this doesn't include the prespilled registers. |
183 | // |
184 | // For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true. |
185 | // It also doesn't include the pushed return address. |
186 | // |
187 | // Return value: |
188 | // Frame size |
189 | |
190 | int CodeGenInterface::genTotalFrameSize() |
191 | { |
192 | assert(!IsUninitialized(compiler->compCalleeRegsPushed)); |
193 | |
194 | int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; |
195 | |
196 | assert(totalFrameSize >= 0); |
197 | return totalFrameSize; |
198 | } |
199 | |
200 | //--------------------------------------------------------------------- |
201 | // genSPtoFPdelta - return the offset from SP to the frame pointer. |
202 | // This number is going to be positive, since SP must be at the lowest |
203 | // address. |
204 | // |
205 | // There must be a frame pointer to call this function! |
206 | |
207 | int CodeGenInterface::genSPtoFPdelta() |
208 | { |
209 | assert(isFramePointerUsed()); |
210 | |
211 | int delta; |
212 | |
213 | delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta(); |
214 | |
215 | assert(delta >= 0); |
216 | return delta; |
217 | } |
218 | |
219 | //--------------------------------------------------------------------- |
220 | // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. |
221 | // This number is going to be negative, since the Caller-SP is at a higher |
222 | // address than the frame pointer. |
223 | // |
224 | // There must be a frame pointer to call this function! |
225 | |
226 | int CodeGenInterface::genCallerSPtoFPdelta() |
227 | { |
228 | assert(isFramePointerUsed()); |
229 | int callerSPtoFPdelta = 0; |
230 | |
231 | #if defined(_TARGET_ARM_) |
232 | // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11. |
233 | callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; |
234 | callerSPtoFPdelta -= 2 * REGSIZE_BYTES; |
235 | #elif defined(_TARGET_X86_) |
236 | // Thanks to ebp chaining, the difference between ebp-based addresses |
237 | // and caller-SP-relative addresses is just the 2 pointers: |
238 | // return address |
239 | // pushed ebp |
240 | callerSPtoFPdelta -= 2 * REGSIZE_BYTES; |
241 | #else |
242 | #error "Unknown _TARGET_" |
243 | #endif // _TARGET_* |
244 | |
245 | assert(callerSPtoFPdelta <= 0); |
246 | return callerSPtoFPdelta; |
247 | } |
248 | |
249 | //--------------------------------------------------------------------- |
250 | // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. |
251 | // |
252 | // This number will be negative. |
253 | |
254 | int CodeGenInterface::genCallerSPtoInitialSPdelta() |
255 | { |
256 | int callerSPtoSPdelta = 0; |
257 | |
258 | #if defined(_TARGET_ARM_) |
259 | callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; |
260 | callerSPtoSPdelta -= genTotalFrameSize(); |
261 | #elif defined(_TARGET_X86_) |
262 | callerSPtoSPdelta -= genTotalFrameSize(); |
263 | callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address |
264 | |
265 | // compCalleeRegsPushed does not account for the frame pointer |
266 | // TODO-Cleanup: shouldn't this be part of genTotalFrameSize? |
267 | if (isFramePointerUsed()) |
268 | { |
269 | callerSPtoSPdelta -= REGSIZE_BYTES; |
270 | } |
271 | #else |
272 | #error "Unknown _TARGET_" |
273 | #endif // _TARGET_* |
274 | |
275 | assert(callerSPtoSPdelta <= 0); |
276 | return callerSPtoSPdelta; |
277 | } |
278 | |
279 | #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
280 | |
281 | /***************************************************************************** |
282 | * Should we round simple operations (assignments, arithmetic operations, etc.) |
283 | */ |
284 | |
285 | // inline |
286 | // static |
287 | bool CodeGen::genShouldRoundFP() |
288 | { |
289 | RoundLevel roundLevel = getRoundFloatLevel(); |
290 | |
291 | switch (roundLevel) |
292 | { |
293 | case ROUND_NEVER: |
294 | case ROUND_CMP_CONST: |
295 | case ROUND_CMP: |
296 | return false; |
297 | |
298 | default: |
299 | assert(roundLevel == ROUND_ALWAYS); |
300 | return true; |
301 | } |
302 | } |
303 | |
304 | /***************************************************************************** |
305 | * |
306 | * Initialize some global variables. |
307 | */ |
308 | |
309 | void CodeGen::genPrepForCompiler() |
310 | { |
311 | treeLifeUpdater = new (compiler, CMK_bitset) TreeLifeUpdater<true>(compiler); |
312 | |
313 | /* Figure out which non-register variables hold pointers */ |
314 | |
315 | VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler)); |
316 | |
317 | // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live |
318 | // in a register (i.e. they live on the stack for all or part of their lifetime). |
319 | // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime. |
320 | |
321 | unsigned varNum; |
322 | LclVarDsc* varDsc; |
323 | for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) |
324 | { |
325 | if (varDsc->lvTracked || varDsc->lvIsRegCandidate()) |
326 | { |
327 | if (!varDsc->lvRegister && compiler->lvaIsGCTracked(varDsc)) |
328 | { |
329 | VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex); |
330 | } |
331 | } |
332 | } |
333 | VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler)); |
334 | genLastLiveMask = RBM_NONE; |
335 | #ifdef DEBUG |
336 | compiler->fgBBcountAtCodegen = compiler->fgBBcount; |
337 | #endif |
338 | } |
339 | |
340 | /***************************************************************************** |
341 | * To report exception handling information to the VM, we need the size of the exception |
342 | * handling regions. To compute that, we need to emit labels for the beginning block of |
343 | * an EH region, and the block that immediately follows a region. Go through the EH |
344 | * table and mark all these blocks with BBF_HAS_LABEL to make this happen. |
345 | * |
346 | * The beginning blocks of the EH regions already should have this flag set. |
347 | * |
348 | * No blocks should be added or removed after this. |
349 | * |
350 | * This code is closely couple with genReportEH() in the sense that any block |
351 | * that this procedure has determined it needs to have a label has to be selected |
352 | * using the same logic both here and in genReportEH(), so basically any time there is |
353 | * a change in the way we handle EH reporting, we have to keep the logic of these two |
354 | * methods 'in sync'. |
355 | */ |
356 | |
357 | void CodeGen::genPrepForEHCodegen() |
358 | { |
359 | assert(!compiler->fgSafeBasicBlockCreation); |
360 | |
361 | EHblkDsc* HBtab; |
362 | EHblkDsc* HBtabEnd; |
363 | |
364 | bool anyFinallys = false; |
365 | |
366 | for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount; |
367 | HBtab < HBtabEnd; HBtab++) |
368 | { |
369 | assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL); |
370 | assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL); |
371 | |
372 | if (HBtab->ebdTryLast->bbNext != nullptr) |
373 | { |
374 | HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL; |
375 | } |
376 | |
377 | if (HBtab->ebdHndLast->bbNext != nullptr) |
378 | { |
379 | HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL; |
380 | } |
381 | |
382 | if (HBtab->HasFilter()) |
383 | { |
384 | assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL); |
385 | // The block after the last block of the filter is |
386 | // the handler begin block, which we already asserted |
387 | // has BBF_HAS_LABEL set. |
388 | } |
389 | |
390 | #if FEATURE_EH_CALLFINALLY_THUNKS |
391 | if (HBtab->HasFinallyHandler()) |
392 | { |
393 | anyFinallys = true; |
394 | } |
395 | #endif // FEATURE_EH_CALLFINALLY_THUNKS |
396 | } |
397 | |
398 | #if FEATURE_EH_CALLFINALLY_THUNKS |
399 | if (anyFinallys) |
400 | { |
401 | for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) |
402 | { |
403 | if (block->bbJumpKind == BBJ_CALLFINALLY) |
404 | { |
405 | BasicBlock* bbToLabel = block->bbNext; |
406 | if (block->isBBCallAlwaysPair()) |
407 | { |
408 | bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS |
409 | } |
410 | if (bbToLabel != nullptr) |
411 | { |
412 | bbToLabel->bbFlags |= BBF_HAS_LABEL; |
413 | } |
414 | } // block is BBJ_CALLFINALLY |
415 | } // for each block |
416 | } // if (anyFinallys) |
417 | #endif // FEATURE_EH_CALLFINALLY_THUNKS |
418 | } |
419 | |
420 | void CodeGenInterface::genUpdateLife(GenTree* tree) |
421 | { |
422 | treeLifeUpdater->UpdateLife(tree); |
423 | } |
424 | |
425 | void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife) |
426 | { |
427 | compiler->compUpdateLife</*ForCodeGen*/ true>(newLife); |
428 | } |
429 | |
430 | // Return the register mask for the given register variable |
431 | // inline |
432 | regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc) |
433 | { |
434 | regMaskTP regMask = RBM_NONE; |
435 | |
436 | assert(varDsc->lvIsInReg()); |
437 | |
438 | if (varTypeIsFloating(varDsc->TypeGet())) |
439 | { |
440 | regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet()); |
441 | } |
442 | else |
443 | { |
444 | regMask = genRegMask(varDsc->lvRegNum); |
445 | } |
446 | return regMask; |
447 | } |
448 | |
449 | // Return the register mask for the given lclVar or regVar tree node |
450 | // inline |
451 | regMaskTP CodeGenInterface::genGetRegMask(GenTree* tree) |
452 | { |
453 | assert(tree->gtOper == GT_LCL_VAR); |
454 | |
455 | regMaskTP regMask = RBM_NONE; |
456 | const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; |
457 | if (varDsc->lvPromoted) |
458 | { |
459 | for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) |
460 | { |
461 | noway_assert(compiler->lvaTable[i].lvIsStructField); |
462 | if (compiler->lvaTable[i].lvIsInReg()) |
463 | { |
464 | regMask |= genGetRegMask(&compiler->lvaTable[i]); |
465 | } |
466 | } |
467 | } |
468 | else if (varDsc->lvIsInReg()) |
469 | { |
470 | regMask = genGetRegMask(varDsc); |
471 | } |
472 | return regMask; |
473 | } |
474 | |
475 | // The given lclVar is either going live (being born) or dying. |
476 | // It might be both going live and dying (that is, it is a dead store) under MinOpts. |
477 | // Update regSet.rsMaskVars accordingly. |
478 | // inline |
479 | void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTree* tree)) |
480 | { |
481 | regMaskTP regMask = genGetRegMask(varDsc); |
482 | |
483 | #ifdef DEBUG |
484 | if (compiler->verbose) |
485 | { |
486 | printf("\t\t\t\t\t\t\tV%02u in reg " , (varDsc - compiler->lvaTable)); |
487 | varDsc->PrintVarReg(); |
488 | printf(" is becoming %s " , (isDying) ? "dead" : "live" ); |
489 | Compiler::printTreeID(tree); |
490 | printf("\n" ); |
491 | } |
492 | #endif // DEBUG |
493 | |
494 | if (isDying) |
495 | { |
496 | // We'd like to be able to assert the following, however if we are walking |
497 | // through a qmark/colon tree, we may encounter multiple last-use nodes. |
498 | // assert((regSet.rsMaskVars & regMask) == regMask); |
499 | regSet.RemoveMaskVars(regMask); |
500 | } |
501 | else |
502 | { |
503 | assert((regSet.rsMaskVars & regMask) == 0); |
504 | regSet.AddMaskVars(regMask); |
505 | } |
506 | } |
507 | |
508 | //---------------------------------------------------------------------- |
509 | // compHelperCallKillSet: Gets a register mask that represents the kill set for a helper call. |
510 | // Not all JIT Helper calls follow the standard ABI on the target architecture. |
511 | // |
512 | // TODO-CQ: Currently this list is incomplete (not all helpers calls are |
513 | // enumerated) and not 100% accurate (some killsets are bigger than |
514 | // what they really are). |
515 | // There's some work to be done in several places in the JIT to |
516 | // accurately track the registers that are getting killed by |
517 | // helper calls: |
518 | // a) LSRA needs several changes to accomodate more precise killsets |
519 | // for every helper call it sees (both explicitly [easy] and |
520 | // implicitly [hard]) |
521 | // b) Currently for AMD64, when we generate code for a helper call |
522 | // we're independently over-pessimizing the killsets of the call |
523 | // (independently from LSRA) and this needs changes |
524 | // both in CodeGenAmd64.cpp and emitx86.cpp. |
525 | // |
526 | // The best solution for this problem would be to try to centralize |
527 | // the killset information in a single place but then make the |
528 | // corresponding changes so every code generation phase is in sync |
529 | // about this. |
530 | // |
531 | // The interim solution is to only add known helper calls that don't |
532 | // follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile. |
533 | // |
534 | // Arguments: |
535 | // helper - The helper being inquired about |
536 | // |
537 | // Return Value: |
538 | // Mask of register kills -- registers whose values are no longer guaranteed to be the same. |
539 | // |
540 | regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) |
541 | { |
542 | switch (helper) |
543 | { |
544 | case CORINFO_HELP_ASSIGN_BYREF: |
545 | #if defined(_TARGET_AMD64_) |
546 | return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC; |
547 | #elif defined(_TARGET_ARMARCH_) |
548 | return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF; |
549 | #elif defined(_TARGET_X86_) |
550 | return RBM_ESI | RBM_EDI | RBM_ECX; |
551 | #else |
552 | NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch" ); |
553 | return RBM_CALLEE_TRASH; |
554 | #endif |
555 | |
556 | #if defined(_TARGET_ARMARCH_) |
557 | case CORINFO_HELP_ASSIGN_REF: |
558 | case CORINFO_HELP_CHECKED_ASSIGN_REF: |
559 | return RBM_CALLEE_TRASH_WRITEBARRIER; |
560 | #endif |
561 | |
562 | case CORINFO_HELP_PROF_FCN_ENTER: |
563 | #ifdef RBM_PROFILER_ENTER_TRASH |
564 | return RBM_PROFILER_ENTER_TRASH; |
565 | #else |
566 | NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch" ); |
567 | #endif |
568 | |
569 | case CORINFO_HELP_PROF_FCN_LEAVE: |
570 | #ifdef RBM_PROFILER_LEAVE_TRASH |
571 | return RBM_PROFILER_LEAVE_TRASH; |
572 | #else |
573 | NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch" ); |
574 | #endif |
575 | |
576 | case CORINFO_HELP_PROF_FCN_TAILCALL: |
577 | #ifdef RBM_PROFILER_TAILCALL_TRASH |
578 | return RBM_PROFILER_TAILCALL_TRASH; |
579 | #else |
580 | NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch" ); |
581 | #endif |
582 | |
583 | #ifdef _TARGET_X86_ |
584 | case CORINFO_HELP_ASSIGN_REF_EAX: |
585 | case CORINFO_HELP_ASSIGN_REF_ECX: |
586 | case CORINFO_HELP_ASSIGN_REF_EBX: |
587 | case CORINFO_HELP_ASSIGN_REF_EBP: |
588 | case CORINFO_HELP_ASSIGN_REF_ESI: |
589 | case CORINFO_HELP_ASSIGN_REF_EDI: |
590 | |
591 | case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX: |
592 | case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX: |
593 | case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX: |
594 | case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP: |
595 | case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI: |
596 | case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI: |
597 | return RBM_EDX; |
598 | |
599 | #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS |
600 | case CORINFO_HELP_ASSIGN_REF: |
601 | case CORINFO_HELP_CHECKED_ASSIGN_REF: |
602 | return RBM_EAX | RBM_EDX; |
603 | #endif // FEATURE_USE_ASM_GC_WRITE_BARRIERS |
604 | #endif |
605 | |
606 | case CORINFO_HELP_STOP_FOR_GC: |
607 | return RBM_STOP_FOR_GC_TRASH; |
608 | |
609 | case CORINFO_HELP_INIT_PINVOKE_FRAME: |
610 | return RBM_INIT_PINVOKE_FRAME_TRASH; |
611 | |
612 | default: |
613 | return RBM_CALLEE_TRASH; |
614 | } |
615 | } |
616 | |
617 | //---------------------------------------------------------------------- |
618 | // compNoGCHelperCallKillSet: Gets a register mask that represents the set of registers that no longer |
619 | // contain GC or byref pointers, for "NO GC" helper calls. This is used by the emitter when determining |
620 | // what registers to remove from the current live GC/byref sets (and thus what to report as dead in the |
621 | // GC info). Note that for the CORINFO_HELP_ASSIGN_BYREF helper, in particular, the kill set reported by |
622 | // compHelperCallKillSet() doesn't match this kill set. compHelperCallKillSet() reports the dst/src |
623 | // address registers as killed for liveness purposes, since their values change. However, they still are |
624 | // valid byref pointers after the call, so the dst/src address registers are NOT reported as killed here. |
625 | // |
626 | // Note: This list may not be complete and defaults to the default RBM_CALLEE_TRASH_NOGC registers. |
627 | // |
628 | // Arguments: |
629 | // helper - The helper being inquired about |
630 | // |
631 | // Return Value: |
632 | // Mask of GC register kills |
633 | // |
634 | regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper) |
635 | { |
636 | assert(emitter::emitNoGChelper(helper)); |
637 | |
638 | switch (helper) |
639 | { |
640 | case CORINFO_HELP_ASSIGN_BYREF: |
641 | #if defined(_TARGET_X86_) |
642 | // This helper only trashes ECX. |
643 | return RBM_ECX; |
644 | #elif defined(_TARGET_AMD64_) |
645 | // This uses and defs RDI and RSI. |
646 | return RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI); |
647 | #elif defined(_TARGET_ARMARCH_) |
648 | return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; |
649 | #else |
650 | assert(!"unknown arch" ); |
651 | #endif |
652 | |
653 | #if defined(_TARGET_XARCH_) |
654 | case CORINFO_HELP_PROF_FCN_ENTER: |
655 | return RBM_PROFILER_ENTER_TRASH; |
656 | |
657 | case CORINFO_HELP_PROF_FCN_LEAVE: |
658 | return RBM_PROFILER_LEAVE_TRASH; |
659 | |
660 | case CORINFO_HELP_PROF_FCN_TAILCALL: |
661 | return RBM_PROFILER_TAILCALL_TRASH; |
662 | #endif // defined(_TARGET_XARCH_) |
663 | |
664 | #if defined(_TARGET_ARMARCH_) |
665 | case CORINFO_HELP_ASSIGN_REF: |
666 | case CORINFO_HELP_CHECKED_ASSIGN_REF: |
667 | return RBM_CALLEE_GCTRASH_WRITEBARRIER; |
668 | case CORINFO_HELP_PROF_FCN_LEAVE: |
669 | // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH on ARMARCH. |
670 | return RBM_CALLEE_TRASH_NOGC & ~RBM_PROFILER_RET_SCRATCH; |
671 | #endif |
672 | |
673 | #if defined(_TARGET_X86_) |
674 | case CORINFO_HELP_INIT_PINVOKE_FRAME: |
675 | return RBM_INIT_PINVOKE_FRAME_TRASH; |
676 | #endif // defined(_TARGET_X86_) |
677 | |
678 | default: |
679 | return RBM_CALLEE_TRASH_NOGC; |
680 | } |
681 | } |
682 | |
683 | template <bool ForCodeGen> |
684 | void Compiler::compChangeLife(VARSET_VALARG_TP newLife) |
685 | { |
686 | LclVarDsc* varDsc; |
687 | |
688 | #ifdef DEBUG |
689 | if (verbose) |
690 | { |
691 | printf("Change life %s " , VarSetOps::ToString(this, compCurLife)); |
692 | dumpConvertedVarSet(this, compCurLife); |
693 | printf(" -> %s " , VarSetOps::ToString(this, newLife)); |
694 | dumpConvertedVarSet(this, newLife); |
695 | printf("\n" ); |
696 | } |
697 | #endif // DEBUG |
698 | |
699 | /* We should only be called when the live set has actually changed */ |
700 | |
701 | noway_assert(!VarSetOps::Equal(this, compCurLife, newLife)); |
702 | |
703 | if (!ForCodeGen) |
704 | { |
705 | VarSetOps::Assign(this, compCurLife, newLife); |
706 | return; |
707 | } |
708 | |
709 | /* Figure out which variables are becoming live/dead at this point */ |
710 | |
711 | // deadSet = compCurLife - newLife |
712 | VARSET_TP deadSet(VarSetOps::Diff(this, compCurLife, newLife)); |
713 | |
714 | // bornSet = newLife - compCurLife |
715 | VARSET_TP bornSet(VarSetOps::Diff(this, newLife, compCurLife)); |
716 | |
717 | /* Can't simultaneously become live and dead at the same time */ |
718 | |
719 | // (deadSet UNION bornSet) != EMPTY |
720 | noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet)); |
721 | // (deadSet INTERSECTION bornSet) == EMPTY |
722 | noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet)); |
723 | |
724 | VarSetOps::Assign(this, compCurLife, newLife); |
725 | |
726 | // Handle the dying vars first, then the newly live vars. |
727 | // This is because, in the RyuJIT backend case, they may occupy registers that |
728 | // will be occupied by another var that is newly live. |
729 | VarSetOps::Iter deadIter(this, deadSet); |
730 | unsigned deadVarIndex = 0; |
731 | while (deadIter.NextElem(&deadVarIndex)) |
732 | { |
733 | unsigned varNum = lvaTrackedToVarNum[deadVarIndex]; |
734 | varDsc = lvaTable + varNum; |
735 | bool isGCRef = (varDsc->TypeGet() == TYP_REF); |
736 | bool isByRef = (varDsc->TypeGet() == TYP_BYREF); |
737 | |
738 | if (varDsc->lvIsInReg()) |
739 | { |
740 | // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the |
741 | // gc sets |
742 | regMaskTP regMask = varDsc->lvRegMask(); |
743 | if (isGCRef) |
744 | { |
745 | codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask; |
746 | } |
747 | else if (isByRef) |
748 | { |
749 | codeGen->gcInfo.gcRegByrefSetCur &= ~regMask; |
750 | } |
751 | codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr)); |
752 | } |
753 | // This isn't in a register, so update the gcVarPtrSetCur. |
754 | else if (isGCRef || isByRef) |
755 | { |
756 | VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex); |
757 | JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n" , varNum); |
758 | } |
759 | } |
760 | |
761 | VarSetOps::Iter bornIter(this, bornSet); |
762 | unsigned bornVarIndex = 0; |
763 | while (bornIter.NextElem(&bornVarIndex)) |
764 | { |
765 | unsigned varNum = lvaTrackedToVarNum[bornVarIndex]; |
766 | varDsc = lvaTable + varNum; |
767 | bool isGCRef = (varDsc->TypeGet() == TYP_REF); |
768 | bool isByRef = (varDsc->TypeGet() == TYP_BYREF); |
769 | |
770 | if (varDsc->lvIsInReg()) |
771 | { |
772 | #ifdef DEBUG |
773 | if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex)) |
774 | { |
775 | JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n" , varNum); |
776 | } |
777 | #endif // DEBUG |
778 | VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); |
779 | codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr)); |
780 | regMaskTP regMask = varDsc->lvRegMask(); |
781 | if (isGCRef) |
782 | { |
783 | codeGen->gcInfo.gcRegGCrefSetCur |= regMask; |
784 | } |
785 | else if (isByRef) |
786 | { |
787 | codeGen->gcInfo.gcRegByrefSetCur |= regMask; |
788 | } |
789 | } |
790 | // This isn't in a register, so update the gcVarPtrSetCur |
791 | else if (lvaIsGCTracked(varDsc)) |
792 | { |
793 | VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); |
794 | JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n" , varNum); |
795 | } |
796 | } |
797 | |
798 | codeGen->siUpdate(); |
799 | } |
800 | |
801 | // Need an explicit instantiation. |
802 | template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife); |
803 | |
804 | /***************************************************************************** |
805 | * |
806 | * Generate a spill. |
807 | */ |
808 | void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg) |
809 | { |
810 | getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0); |
811 | } |
812 | |
813 | /***************************************************************************** |
814 | * |
815 | * Generate a reload. |
816 | */ |
817 | void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg) |
818 | { |
819 | getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0); |
820 | } |
821 | |
822 | // inline |
823 | regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const |
824 | { |
825 | return REG_ARG_0; |
826 | } |
827 | |
828 | //---------------------------------------------------------------------- |
829 | // getSpillTempDsc: get the TempDsc corresponding to a spilled tree. |
830 | // |
831 | // Arguments: |
832 | // tree - spilled GenTree node |
833 | // |
834 | // Return Value: |
835 | // TempDsc corresponding to tree |
836 | TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree) |
837 | { |
838 | // tree must be in spilled state. |
839 | assert((tree->gtFlags & GTF_SPILLED) != 0); |
840 | |
841 | // Get the tree's SpillDsc. |
842 | RegSet::SpillDsc* prevDsc; |
843 | RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc); |
844 | assert(spillDsc != nullptr); |
845 | |
846 | // Get the temp desc. |
847 | TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc); |
848 | return temp; |
849 | } |
850 | |
851 | #ifdef _TARGET_XARCH_ |
852 | |
853 | #ifdef _TARGET_AMD64_ |
854 | // Returns relocation type hint for an addr. |
855 | // Note that there are no reloc hints on x86. |
856 | // |
857 | // Arguments |
858 | // addr - data address |
859 | // |
860 | // Returns |
861 | // relocation type hint |
862 | // |
863 | unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr) |
864 | { |
865 | return compiler->eeGetRelocTypeHint((void*)addr); |
866 | } |
867 | #endif //_TARGET_AMD64_ |
868 | |
869 | // Return true if an absolute indirect data address can be encoded as IP-relative. |
870 | // offset. Note that this method should be used only when the caller knows that |
871 | // the address is an icon value that VM has given and there is no GenTree node |
872 | // representing it. Otherwise, one should always use FitsInAddrBase(). |
873 | // |
874 | // Arguments |
875 | // addr - an absolute indirect data address |
876 | // |
877 | // Returns |
878 | // true if indir data addr could be encoded as IP-relative offset. |
879 | // |
880 | bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr) |
881 | { |
882 | #ifdef _TARGET_AMD64_ |
883 | return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32; |
884 | #else |
885 | // x86: PC-relative addressing is available only for control flow instructions (jmp and call) |
886 | return false; |
887 | #endif |
888 | } |
889 | |
890 | // Return true if an indirect code address can be encoded as IP-relative offset. |
891 | // Note that this method should be used only when the caller knows that the |
892 | // address is an icon value that VM has given and there is no GenTree node |
893 | // representing it. Otherwise, one should always use FitsInAddrBase(). |
894 | // |
895 | // Arguments |
896 | // addr - an absolute indirect code address |
897 | // |
898 | // Returns |
899 | // true if indir code addr could be encoded as IP-relative offset. |
900 | // |
901 | bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr) |
902 | { |
903 | #ifdef _TARGET_AMD64_ |
904 | return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32; |
905 | #else |
906 | // x86: PC-relative addressing is available only for control flow instructions (jmp and call) |
907 | return true; |
908 | #endif |
909 | } |
910 | |
911 | // Return true if an indirect code address can be encoded as 32-bit displacement |
912 | // relative to zero. Note that this method should be used only when the caller |
913 | // knows that the address is an icon value that VM has given and there is no |
914 | // GenTree node representing it. Otherwise, one should always use FitsInAddrBase(). |
915 | // |
916 | // Arguments |
917 | // addr - absolute indirect code address |
918 | // |
919 | // Returns |
920 | // true if absolute indir code addr could be encoded as 32-bit displacement relative to zero. |
921 | // |
922 | bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr) |
923 | { |
924 | return GenTreeIntConCommon::FitsInI32((ssize_t)addr); |
925 | } |
926 | |
927 | // Return true if an absolute indirect code address needs a relocation recorded with VM. |
928 | // |
929 | // Arguments |
930 | // addr - an absolute indirect code address |
931 | // |
932 | // Returns |
933 | // true if indir code addr needs a relocation recorded with VM |
934 | // |
935 | bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr) |
936 | { |
937 | // If generating relocatable ngen code, then all code addr should go through relocation |
938 | if (compiler->opts.compReloc) |
939 | { |
940 | return true; |
941 | } |
942 | |
943 | #ifdef _TARGET_AMD64_ |
944 | // See if the code indir addr can be encoded as 32-bit displacement relative to zero. |
945 | // We don't need a relocation in that case. |
946 | if (genCodeIndirAddrCanBeEncodedAsZeroRelOffset(addr)) |
947 | { |
948 | return false; |
949 | } |
950 | |
951 | // Else we need a relocation. |
952 | return true; |
953 | #else //_TARGET_X86_ |
954 | // On x86 there is no need to record or ask for relocations during jitting, |
955 | // because all addrs fit within 32-bits. |
956 | return false; |
957 | #endif //_TARGET_X86_ |
958 | } |
959 | |
960 | // Return true if a direct code address needs to be marked as relocatable. |
961 | // |
962 | // Arguments |
963 | // addr - absolute direct code address |
964 | // |
965 | // Returns |
966 | // true if direct code addr needs a relocation recorded with VM |
967 | // |
968 | bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr) |
969 | { |
970 | // If generating relocatable ngen code, then all code addr should go through relocation |
971 | if (compiler->opts.compReloc) |
972 | { |
973 | return true; |
974 | } |
975 | |
976 | #ifdef _TARGET_AMD64_ |
977 | // By default all direct code addresses go through relocation so that VM will setup |
978 | // a jump stub if addr cannot be encoded as pc-relative offset. |
979 | return true; |
980 | #else //_TARGET_X86_ |
981 | // On x86 there is no need for recording relocations during jitting, |
982 | // because all addrs fit within 32-bits. |
983 | return false; |
984 | #endif //_TARGET_X86_ |
985 | } |
986 | #endif //_TARGET_XARCH_ |
987 | |
988 | /***************************************************************************** |
989 | * |
990 | * The following can be used to create basic blocks that serve as labels for |
991 | * the emitter. Use with caution - these are not real basic blocks! |
992 | * |
993 | */ |
994 | |
995 | // inline |
996 | BasicBlock* CodeGen::genCreateTempLabel() |
997 | { |
998 | #ifdef DEBUG |
999 | // These blocks don't affect FP |
1000 | compiler->fgSafeBasicBlockCreation = true; |
1001 | #endif |
1002 | |
1003 | BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE); |
1004 | |
1005 | #ifdef DEBUG |
1006 | compiler->fgSafeBasicBlockCreation = false; |
1007 | #endif |
1008 | |
1009 | block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; |
1010 | |
1011 | // Use coldness of current block, as this label will |
1012 | // be contained in it. |
1013 | block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD); |
1014 | |
1015 | #ifdef DEBUG |
1016 | #ifdef UNIX_X86_ABI |
1017 | block->bbTgtStkDepth = (genStackLevel - curNestedAlignment) / sizeof(int); |
1018 | #else |
1019 | block->bbTgtStkDepth = genStackLevel / sizeof(int); |
1020 | #endif |
1021 | #endif |
1022 | return block; |
1023 | } |
1024 | |
1025 | // inline |
1026 | void CodeGen::genDefineTempLabel(BasicBlock* label) |
1027 | { |
1028 | #ifdef DEBUG |
1029 | if (compiler->opts.dspCode) |
1030 | { |
1031 | printf("\n L_M%03u_" FMT_BB ":\n" , Compiler::s_compMethodsCount, label->bbNum); |
1032 | } |
1033 | #endif |
1034 | |
1035 | label->bbEmitCookie = |
1036 | getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); |
1037 | } |
1038 | |
1039 | /***************************************************************************** |
1040 | * |
1041 | * Adjust the stack pointer by the given value; assumes that this follows |
1042 | * a call so only callee-saved registers (and registers that may hold a |
1043 | * return value) are used at this point. |
1044 | */ |
1045 | |
1046 | void CodeGen::genAdjustSP(target_ssize_t delta) |
1047 | { |
1048 | #if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI) |
1049 | if (delta == sizeof(int)) |
1050 | inst_RV(INS_pop, REG_ECX, TYP_INT); |
1051 | else |
1052 | #endif |
1053 | inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE); |
1054 | } |
1055 | |
1056 | //------------------------------------------------------------------------ |
1057 | // genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block |
1058 | // |
1059 | // Arguments: |
1060 | // block - The BasicBlock for which we are about to generate code. |
1061 | // |
1062 | // Assumptions: |
1063 | // Must be called just prior to generating code for 'block'. |
1064 | // |
1065 | // Notes: |
1066 | // This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer, |
1067 | // and if 'block' is a throw helper block with a non-zero stack level. |
1068 | |
1069 | void CodeGen::genAdjustStackLevel(BasicBlock* block) |
1070 | { |
1071 | #if !FEATURE_FIXED_OUT_ARGS |
1072 | // Check for inserted throw blocks and adjust genStackLevel. |
1073 | CLANG_FORMAT_COMMENT_ANCHOR; |
1074 | |
1075 | #if defined(UNIX_X86_ABI) |
1076 | if (isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block)) |
1077 | { |
1078 | // x86/Linux requires stack frames to be 16-byte aligned, but SP may be unaligned |
1079 | // at this point if a jump to this block is made in the middle of pushing arugments. |
1080 | // |
1081 | // Here we restore SP to prevent potential stack alignment issues. |
1082 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -genSPtoFPdelta()); |
1083 | } |
1084 | #endif |
1085 | |
1086 | if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block)) |
1087 | { |
1088 | noway_assert(block->bbFlags & BBF_JMP_TARGET); |
1089 | |
1090 | SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int)); |
1091 | |
1092 | if (genStackLevel != 0) |
1093 | { |
1094 | #ifdef _TARGET_X86_ |
1095 | getEmitter()->emitMarkStackLvl(genStackLevel); |
1096 | inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE); |
1097 | SetStackLevel(0); |
1098 | #else // _TARGET_X86_ |
1099 | NYI("Need emitMarkStackLvl()" ); |
1100 | #endif // _TARGET_X86_ |
1101 | } |
1102 | } |
1103 | #endif // !FEATURE_FIXED_OUT_ARGS |
1104 | } |
1105 | |
1106 | #ifdef _TARGET_ARMARCH_ |
1107 | // return size |
1108 | // alignmentWB is out param |
1109 | unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB) |
1110 | { |
1111 | unsigned alignment = 0; |
1112 | unsigned opSize = 0; |
1113 | |
1114 | if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp()) |
1115 | { |
1116 | opSize = InferStructOpSizeAlign(op, &alignment); |
1117 | } |
1118 | else |
1119 | { |
1120 | alignment = genTypeAlignments[op->TypeGet()]; |
1121 | opSize = genTypeSizes[op->TypeGet()]; |
1122 | } |
1123 | |
1124 | assert(opSize != 0); |
1125 | assert(alignment != 0); |
1126 | |
1127 | (*alignmentWB) = alignment; |
1128 | return opSize; |
1129 | } |
1130 | // return size |
1131 | // alignmentWB is out param |
1132 | unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignmentWB) |
1133 | { |
1134 | unsigned alignment = 0; |
1135 | unsigned opSize = 0; |
1136 | |
1137 | while (op->gtOper == GT_COMMA) |
1138 | { |
1139 | op = op->gtOp.gtOp2; |
1140 | } |
1141 | |
1142 | if (op->gtOper == GT_OBJ) |
1143 | { |
1144 | CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass; |
1145 | opSize = compiler->info.compCompHnd->getClassSize(clsHnd); |
1146 | alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE); |
1147 | } |
1148 | else if (op->gtOper == GT_LCL_VAR) |
1149 | { |
1150 | unsigned varNum = op->gtLclVarCommon.gtLclNum; |
1151 | LclVarDsc* varDsc = compiler->lvaTable + varNum; |
1152 | assert(varDsc->lvType == TYP_STRUCT); |
1153 | opSize = varDsc->lvSize(); |
1154 | #ifndef _TARGET_64BIT_ |
1155 | if (varDsc->lvStructDoubleAlign) |
1156 | { |
1157 | alignment = TARGET_POINTER_SIZE * 2; |
1158 | } |
1159 | else |
1160 | #endif // !_TARGET_64BIT_ |
1161 | { |
1162 | alignment = TARGET_POINTER_SIZE; |
1163 | } |
1164 | } |
1165 | else if (op->OperIsCopyBlkOp()) |
1166 | { |
1167 | GenTree* op2 = op->gtOp.gtOp2; |
1168 | |
1169 | if (op2->OperGet() == GT_CNS_INT) |
1170 | { |
1171 | if (op2->IsIconHandle(GTF_ICON_CLASS_HDL)) |
1172 | { |
1173 | CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal; |
1174 | opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); |
1175 | alignment = |
1176 | roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE); |
1177 | } |
1178 | else |
1179 | { |
1180 | opSize = (unsigned)op2->gtIntCon.gtIconVal; |
1181 | GenTree* op1 = op->gtOp.gtOp1; |
1182 | assert(op1->OperGet() == GT_LIST); |
1183 | GenTree* dstAddr = op1->gtOp.gtOp1; |
1184 | if (dstAddr->OperGet() == GT_ADDR) |
1185 | { |
1186 | InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment); |
1187 | } |
1188 | else |
1189 | { |
1190 | assert(!"Unhandle dstAddr node" ); |
1191 | alignment = TARGET_POINTER_SIZE; |
1192 | } |
1193 | } |
1194 | } |
1195 | else |
1196 | { |
1197 | noway_assert(!"Variable sized COPYBLK register arg!" ); |
1198 | opSize = 0; |
1199 | alignment = TARGET_POINTER_SIZE; |
1200 | } |
1201 | } |
1202 | else if (op->gtOper == GT_MKREFANY) |
1203 | { |
1204 | opSize = TARGET_POINTER_SIZE * 2; |
1205 | alignment = TARGET_POINTER_SIZE; |
1206 | } |
1207 | else if (op->IsArgPlaceHolderNode()) |
1208 | { |
1209 | CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd; |
1210 | assert(clsHnd != 0); |
1211 | opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); |
1212 | alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE); |
1213 | } |
1214 | else |
1215 | { |
1216 | assert(!"Unhandled gtOper" ); |
1217 | opSize = TARGET_POINTER_SIZE; |
1218 | alignment = TARGET_POINTER_SIZE; |
1219 | } |
1220 | |
1221 | assert(opSize != 0); |
1222 | assert(alignment != 0); |
1223 | |
1224 | (*alignmentWB) = alignment; |
1225 | return opSize; |
1226 | } |
1227 | |
1228 | #endif // _TARGET_ARMARCH_ |
1229 | |
1230 | /***************************************************************************** |
1231 | * |
1232 | * Take an address expression and try to find the best set of components to |
1233 | * form an address mode; returns non-zero if this is successful. |
1234 | * |
1235 | * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code. |
1236 | * Refactor this code so that the underlying analysis can be used in |
1237 | * the RyuJIT Backend to do lowering, instead of having to call this method with the |
1238 | * option to not generate the code. |
1239 | * |
1240 | * 'fold' specifies if it is OK to fold the array index which hangs off |
1241 | * a GT_NOP node. |
1242 | * |
1243 | * If successful, the parameters will be set to the following values: |
1244 | * |
1245 | * *rv1Ptr ... base operand |
1246 | * *rv2Ptr ... optional operand |
1247 | * *revPtr ... true if rv2 is before rv1 in the evaluation order |
1248 | * #if SCALED_ADDR_MODES |
1249 | * *mulPtr ... optional multiplier (2/4/8) for rv2 |
1250 | * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0. |
1251 | * #endif |
1252 | * *cnsPtr ... integer constant [optional] |
1253 | * |
1254 | * IMPORTANT NOTE: This routine doesn't generate any code, it merely |
1255 | * identifies the components that might be used to |
1256 | * form an address mode later on. |
1257 | */ |
1258 | |
1259 | bool CodeGen::genCreateAddrMode(GenTree* addr, |
1260 | bool fold, |
1261 | bool* revPtr, |
1262 | GenTree** rv1Ptr, |
1263 | GenTree** rv2Ptr, |
1264 | #if SCALED_ADDR_MODES |
1265 | unsigned* mulPtr, |
1266 | #endif // SCALED_ADDR_MODES |
1267 | ssize_t* cnsPtr) |
1268 | { |
1269 | /* |
1270 | The following indirections are valid address modes on x86/x64: |
1271 | |
1272 | [ icon] * not handled here |
1273 | [reg ] |
1274 | [reg + icon] |
1275 | [reg1 + reg2 ] |
1276 | [reg1 + reg2 + icon] |
1277 | [reg1 + 2 * reg2 ] |
1278 | [reg1 + 4 * reg2 ] |
1279 | [reg1 + 8 * reg2 ] |
1280 | [ 2 * reg2 + icon] |
1281 | [ 4 * reg2 + icon] |
1282 | [ 8 * reg2 + icon] |
1283 | [reg1 + 2 * reg2 + icon] |
1284 | [reg1 + 4 * reg2 + icon] |
1285 | [reg1 + 8 * reg2 + icon] |
1286 | |
1287 | The following indirections are valid address modes on arm64: |
1288 | |
1289 | [reg] |
1290 | [reg + icon] |
1291 | [reg1 + reg2] |
1292 | [reg1 + reg2 * natural-scale] |
1293 | |
1294 | */ |
1295 | |
1296 | /* All indirect address modes require the address to be an addition */ |
1297 | |
1298 | if (addr->gtOper != GT_ADD) |
1299 | { |
1300 | return false; |
1301 | } |
1302 | |
1303 | // Can't use indirect addressing mode as we need to check for overflow. |
1304 | // Also, can't use 'lea' as it doesn't set the flags. |
1305 | |
1306 | if (addr->gtOverflow()) |
1307 | { |
1308 | return false; |
1309 | } |
1310 | |
1311 | GenTree* rv1 = nullptr; |
1312 | GenTree* rv2 = nullptr; |
1313 | |
1314 | GenTree* op1; |
1315 | GenTree* op2; |
1316 | |
1317 | ssize_t cns; |
1318 | #if SCALED_ADDR_MODES |
1319 | unsigned mul; |
1320 | #endif // SCALED_ADDR_MODES |
1321 | |
1322 | GenTree* tmp; |
1323 | |
1324 | /* What order are the sub-operands to be evaluated */ |
1325 | |
1326 | if (addr->gtFlags & GTF_REVERSE_OPS) |
1327 | { |
1328 | op1 = addr->gtOp.gtOp2; |
1329 | op2 = addr->gtOp.gtOp1; |
1330 | } |
1331 | else |
1332 | { |
1333 | op1 = addr->gtOp.gtOp1; |
1334 | op2 = addr->gtOp.gtOp2; |
1335 | } |
1336 | |
1337 | bool rev = false; // Is op2 first in the evaluation order? |
1338 | |
1339 | /* |
1340 | A complex address mode can combine the following operands: |
1341 | |
1342 | op1 ... base address |
1343 | op2 ... optional scaled index |
1344 | #if SCALED_ADDR_MODES |
1345 | mul ... optional multiplier (2/4/8) for op2 |
1346 | #endif |
1347 | cns ... optional displacement |
1348 | |
1349 | Here we try to find such a set of operands and arrange for these |
1350 | to sit in registers. |
1351 | */ |
1352 | |
1353 | cns = 0; |
1354 | #if SCALED_ADDR_MODES |
1355 | mul = 0; |
1356 | #endif // SCALED_ADDR_MODES |
1357 | |
1358 | AGAIN: |
1359 | /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that |
1360 | constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back |
1361 | here if we find a scaled index. |
1362 | */ |
1363 | CLANG_FORMAT_COMMENT_ANCHOR; |
1364 | |
1365 | #if SCALED_ADDR_MODES |
1366 | assert(mul == 0); |
1367 | #endif // SCALED_ADDR_MODES |
1368 | |
1369 | /* Special case: keep constants as 'op2' */ |
1370 | |
1371 | if (op1->IsCnsIntOrI()) |
1372 | { |
1373 | // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)? |
1374 | tmp = op1; |
1375 | op1 = op2; |
1376 | op2 = tmp; |
1377 | } |
1378 | |
1379 | /* Check for an addition of a constant */ |
1380 | |
1381 | if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue())) |
1382 | { |
1383 | /* We're adding a constant */ |
1384 | |
1385 | cns += op2->gtIntConCommon.IconValue(); |
1386 | |
1387 | #if defined(_TARGET_ARMARCH_) |
1388 | if (cns == 0) |
1389 | #endif |
1390 | { |
1391 | /* Inspect the operand the constant is being added to */ |
1392 | |
1393 | switch (op1->gtOper) |
1394 | { |
1395 | case GT_ADD: |
1396 | |
1397 | if (op1->gtOverflow()) |
1398 | { |
1399 | break; |
1400 | } |
1401 | |
1402 | op2 = op1->gtOp.gtOp2; |
1403 | op1 = op1->gtOp.gtOp1; |
1404 | |
1405 | goto AGAIN; |
1406 | |
1407 | #if SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_) |
1408 | // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. |
1409 | case GT_MUL: |
1410 | if (op1->gtOverflow()) |
1411 | { |
1412 | return false; // Need overflow check |
1413 | } |
1414 | |
1415 | __fallthrough; |
1416 | |
1417 | case GT_LSH: |
1418 | |
1419 | mul = op1->GetScaledIndex(); |
1420 | if (mul) |
1421 | { |
1422 | /* We can use "[mul*rv2 + icon]" */ |
1423 | |
1424 | rv1 = nullptr; |
1425 | rv2 = op1->gtOp.gtOp1; |
1426 | |
1427 | goto FOUND_AM; |
1428 | } |
1429 | break; |
1430 | #endif // SCALED_ADDR_MODES && !defined(_TARGET_ARMARCH_) |
1431 | |
1432 | default: |
1433 | break; |
1434 | } |
1435 | } |
1436 | |
1437 | /* The best we can do is "[rv1 + icon]" */ |
1438 | |
1439 | rv1 = op1; |
1440 | rv2 = nullptr; |
1441 | |
1442 | goto FOUND_AM; |
1443 | } |
1444 | |
1445 | // op2 is not a constant. So keep on trying. |
1446 | |
1447 | /* Neither op1 nor op2 are sitting in a register right now */ |
1448 | |
1449 | switch (op1->gtOper) |
1450 | { |
1451 | #if !defined(_TARGET_ARMARCH_) |
1452 | // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. |
1453 | case GT_ADD: |
1454 | |
1455 | if (op1->gtOverflow()) |
1456 | { |
1457 | break; |
1458 | } |
1459 | |
1460 | if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal)) |
1461 | { |
1462 | cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal; |
1463 | op1 = op1->gtOp.gtOp1; |
1464 | |
1465 | goto AGAIN; |
1466 | } |
1467 | |
1468 | break; |
1469 | |
1470 | #if SCALED_ADDR_MODES |
1471 | |
1472 | case GT_MUL: |
1473 | |
1474 | if (op1->gtOverflow()) |
1475 | { |
1476 | break; |
1477 | } |
1478 | |
1479 | __fallthrough; |
1480 | |
1481 | case GT_LSH: |
1482 | |
1483 | mul = op1->GetScaledIndex(); |
1484 | if (mul) |
1485 | { |
1486 | /* 'op1' is a scaled value */ |
1487 | |
1488 | rv1 = op2; |
1489 | rv2 = op1->gtOp.gtOp1; |
1490 | |
1491 | int argScale; |
1492 | while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0) |
1493 | { |
1494 | if (jitIsScaleIndexMul(argScale * mul)) |
1495 | { |
1496 | mul = mul * argScale; |
1497 | rv2 = rv2->gtOp.gtOp1; |
1498 | } |
1499 | else |
1500 | { |
1501 | break; |
1502 | } |
1503 | } |
1504 | |
1505 | noway_assert(rev == false); |
1506 | rev = true; |
1507 | |
1508 | goto FOUND_AM; |
1509 | } |
1510 | break; |
1511 | |
1512 | #endif // SCALED_ADDR_MODES |
1513 | #endif // !_TARGET_ARMARCH |
1514 | |
1515 | case GT_NOP: |
1516 | |
1517 | op1 = op1->gtOp.gtOp1; |
1518 | goto AGAIN; |
1519 | |
1520 | case GT_COMMA: |
1521 | |
1522 | op1 = op1->gtOp.gtOp2; |
1523 | goto AGAIN; |
1524 | |
1525 | default: |
1526 | break; |
1527 | } |
1528 | |
1529 | noway_assert(op2); |
1530 | switch (op2->gtOper) |
1531 | { |
1532 | #if !defined(_TARGET_ARMARCH_) |
1533 | // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index. |
1534 | case GT_ADD: |
1535 | |
1536 | if (op2->gtOverflow()) |
1537 | { |
1538 | break; |
1539 | } |
1540 | |
1541 | if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal)) |
1542 | { |
1543 | cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal; |
1544 | op2 = op2->gtOp.gtOp1; |
1545 | |
1546 | goto AGAIN; |
1547 | } |
1548 | |
1549 | break; |
1550 | |
1551 | #if SCALED_ADDR_MODES |
1552 | |
1553 | case GT_MUL: |
1554 | |
1555 | if (op2->gtOverflow()) |
1556 | { |
1557 | break; |
1558 | } |
1559 | |
1560 | __fallthrough; |
1561 | |
1562 | case GT_LSH: |
1563 | |
1564 | mul = op2->GetScaledIndex(); |
1565 | if (mul) |
1566 | { |
1567 | // 'op2' is a scaled value...is it's argument also scaled? |
1568 | int argScale; |
1569 | rv2 = op2->gtOp.gtOp1; |
1570 | while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0) |
1571 | { |
1572 | if (jitIsScaleIndexMul(argScale * mul)) |
1573 | { |
1574 | mul = mul * argScale; |
1575 | rv2 = rv2->gtOp.gtOp1; |
1576 | } |
1577 | else |
1578 | { |
1579 | break; |
1580 | } |
1581 | } |
1582 | |
1583 | rv1 = op1; |
1584 | |
1585 | goto FOUND_AM; |
1586 | } |
1587 | break; |
1588 | |
1589 | #endif // SCALED_ADDR_MODES |
1590 | #endif // !_TARGET_ARMARCH |
1591 | |
1592 | case GT_NOP: |
1593 | |
1594 | op2 = op2->gtOp.gtOp1; |
1595 | goto AGAIN; |
1596 | |
1597 | case GT_COMMA: |
1598 | |
1599 | op2 = op2->gtOp.gtOp2; |
1600 | goto AGAIN; |
1601 | |
1602 | default: |
1603 | break; |
1604 | } |
1605 | |
1606 | /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */ |
1607 | |
1608 | rv1 = op1; |
1609 | rv2 = op2; |
1610 | #ifdef _TARGET_ARM64_ |
1611 | assert(cns == 0); |
1612 | #endif |
1613 | |
1614 | FOUND_AM: |
1615 | |
1616 | if (rv2) |
1617 | { |
1618 | /* Make sure a GC address doesn't end up in 'rv2' */ |
1619 | |
1620 | if (varTypeIsGC(rv2->TypeGet())) |
1621 | { |
1622 | noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet())); |
1623 | |
1624 | tmp = rv1; |
1625 | rv1 = rv2; |
1626 | rv2 = tmp; |
1627 | |
1628 | rev = !rev; |
1629 | } |
1630 | |
1631 | /* Special case: constant array index (that is range-checked) */ |
1632 | |
1633 | if (fold) |
1634 | { |
1635 | ssize_t tmpMul; |
1636 | GenTree* index; |
1637 | |
1638 | if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI())) |
1639 | { |
1640 | /* For valuetype arrays where we can't use the scaled address |
1641 | mode, rv2 will point to the scaled index. So we have to do |
1642 | more work */ |
1643 | |
1644 | tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false)); |
1645 | if (mul) |
1646 | { |
1647 | tmpMul *= mul; |
1648 | } |
1649 | } |
1650 | else |
1651 | { |
1652 | /* May be a simple array. rv2 will points to the actual index */ |
1653 | |
1654 | index = rv2; |
1655 | tmpMul = mul; |
1656 | } |
1657 | |
1658 | /* Get hold of the array index and see if it's a constant */ |
1659 | if (index->IsIntCnsFitsInI32()) |
1660 | { |
1661 | /* Get hold of the index value */ |
1662 | ssize_t ixv = index->AsIntConCommon()->IconValue(); |
1663 | |
1664 | #if SCALED_ADDR_MODES |
1665 | /* Scale the index if necessary */ |
1666 | if (tmpMul) |
1667 | { |
1668 | ixv *= tmpMul; |
1669 | } |
1670 | #endif |
1671 | |
1672 | if (FitsIn<INT32>(cns + ixv)) |
1673 | { |
1674 | /* Add the scaled index to the offset value */ |
1675 | |
1676 | cns += ixv; |
1677 | |
1678 | #if SCALED_ADDR_MODES |
1679 | /* There is no scaled operand any more */ |
1680 | mul = 0; |
1681 | #endif |
1682 | rv2 = nullptr; |
1683 | } |
1684 | } |
1685 | } |
1686 | } |
1687 | |
1688 | // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns] |
1689 | noway_assert(rv1 || mul != 1); |
1690 | |
1691 | noway_assert(FitsIn<INT32>(cns)); |
1692 | |
1693 | if (rv1 == nullptr && rv2 == nullptr) |
1694 | { |
1695 | return false; |
1696 | } |
1697 | |
1698 | /* Success - return the various components to the caller */ |
1699 | |
1700 | *revPtr = rev; |
1701 | *rv1Ptr = rv1; |
1702 | *rv2Ptr = rv2; |
1703 | #if SCALED_ADDR_MODES |
1704 | *mulPtr = mul; |
1705 | #endif |
1706 | *cnsPtr = cns; |
1707 | |
1708 | return true; |
1709 | } |
1710 | |
1711 | /***************************************************************************** |
1712 | * The condition to use for (the jmp/set for) the given type of operation |
1713 | * |
1714 | * In case of amd64, this routine should be used when there is no gentree available |
1715 | * and one needs to generate jumps based on integer comparisons. When gentree is |
1716 | * available always use its overloaded version. |
1717 | * |
1718 | */ |
1719 | |
1720 | // static |
1721 | emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind) |
1722 | { |
1723 | const static BYTE genJCCinsSigned[] = { |
1724 | #if defined(_TARGET_XARCH_) |
1725 | EJ_je, // GT_EQ |
1726 | EJ_jne, // GT_NE |
1727 | EJ_jl, // GT_LT |
1728 | EJ_jle, // GT_LE |
1729 | EJ_jge, // GT_GE |
1730 | EJ_jg, // GT_GT |
1731 | EJ_je, // GT_TEST_EQ |
1732 | EJ_jne, // GT_TEST_NE |
1733 | #elif defined(_TARGET_ARMARCH_) |
1734 | EJ_eq, // GT_EQ |
1735 | EJ_ne, // GT_NE |
1736 | EJ_lt, // GT_LT |
1737 | EJ_le, // GT_LE |
1738 | EJ_ge, // GT_GE |
1739 | EJ_gt, // GT_GT |
1740 | #if defined(_TARGET_ARM64_) |
1741 | EJ_eq, // GT_TEST_EQ |
1742 | EJ_ne, // GT_TEST_NE |
1743 | #endif |
1744 | #endif |
1745 | }; |
1746 | |
1747 | const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */ |
1748 | { |
1749 | #if defined(_TARGET_XARCH_) |
1750 | EJ_je, // GT_EQ |
1751 | EJ_jne, // GT_NE |
1752 | EJ_jb, // GT_LT |
1753 | EJ_jbe, // GT_LE |
1754 | EJ_jae, // GT_GE |
1755 | EJ_ja, // GT_GT |
1756 | EJ_je, // GT_TEST_EQ |
1757 | EJ_jne, // GT_TEST_NE |
1758 | #elif defined(_TARGET_ARMARCH_) |
1759 | EJ_eq, // GT_EQ |
1760 | EJ_ne, // GT_NE |
1761 | EJ_lo, // GT_LT |
1762 | EJ_ls, // GT_LE |
1763 | EJ_hs, // GT_GE |
1764 | EJ_hi, // GT_GT |
1765 | #if defined(_TARGET_ARM64_) |
1766 | EJ_eq, // GT_TEST_EQ |
1767 | EJ_ne, // GT_TEST_NE |
1768 | #endif |
1769 | #endif |
1770 | }; |
1771 | |
1772 | const static BYTE genJCCinsLogical[] = /* logical operation */ |
1773 | { |
1774 | #if defined(_TARGET_XARCH_) |
1775 | EJ_je, // GT_EQ (Z == 1) |
1776 | EJ_jne, // GT_NE (Z == 0) |
1777 | EJ_js, // GT_LT (S == 1) |
1778 | EJ_NONE, // GT_LE |
1779 | EJ_jns, // GT_GE (S == 0) |
1780 | EJ_NONE, // GT_GT |
1781 | EJ_NONE, // GT_TEST_EQ |
1782 | EJ_NONE, // GT_TEST_NE |
1783 | #elif defined(_TARGET_ARMARCH_) |
1784 | EJ_eq, // GT_EQ (Z == 1) |
1785 | EJ_ne, // GT_NE (Z == 0) |
1786 | EJ_mi, // GT_LT (N == 1) |
1787 | EJ_NONE, // GT_LE |
1788 | EJ_pl, // GT_GE (N == 0) |
1789 | EJ_NONE, // GT_GT |
1790 | #if defined(_TARGET_ARM64_) |
1791 | EJ_eq, // GT_TEST_EQ |
1792 | EJ_ne, // GT_TEST_NE |
1793 | #endif |
1794 | #endif |
1795 | }; |
1796 | |
1797 | #if defined(_TARGET_XARCH_) |
1798 | assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je); |
1799 | assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne); |
1800 | assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl); |
1801 | assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle); |
1802 | assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge); |
1803 | assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg); |
1804 | assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je); |
1805 | assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne); |
1806 | |
1807 | assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je); |
1808 | assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne); |
1809 | assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb); |
1810 | assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe); |
1811 | assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae); |
1812 | assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja); |
1813 | assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je); |
1814 | assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne); |
1815 | |
1816 | assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je); |
1817 | assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne); |
1818 | assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js); |
1819 | assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns); |
1820 | #elif defined(_TARGET_ARMARCH_) |
1821 | assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq); |
1822 | assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne); |
1823 | assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt); |
1824 | assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le); |
1825 | assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge); |
1826 | assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt); |
1827 | |
1828 | assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq); |
1829 | assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne); |
1830 | assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo); |
1831 | assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls); |
1832 | assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs); |
1833 | assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi); |
1834 | |
1835 | assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq); |
1836 | assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne); |
1837 | assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi); |
1838 | assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl); |
1839 | #else |
1840 | assert(!"unknown arch" ); |
1841 | #endif |
1842 | assert(GenTree::OperIsCompare(cmp)); |
1843 | |
1844 | emitJumpKind result = EJ_COUNT; |
1845 | |
1846 | if (compareKind == CK_UNSIGNED) |
1847 | { |
1848 | result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ]; |
1849 | } |
1850 | else if (compareKind == CK_SIGNED) |
1851 | { |
1852 | result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ]; |
1853 | } |
1854 | else if (compareKind == CK_LOGICAL) |
1855 | { |
1856 | result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ]; |
1857 | } |
1858 | assert(result != EJ_COUNT); |
1859 | return result; |
1860 | } |
1861 | |
1862 | #ifdef _TARGET_ARMARCH_ |
1863 | //------------------------------------------------------------------------ |
1864 | // genEmitGSCookieCheck: Generate code to check that the GS cookie |
1865 | // wasn't thrashed by a buffer overrun. Common code for ARM32 and ARM64. |
1866 | // |
1867 | void CodeGen::genEmitGSCookieCheck(bool pushReg) |
1868 | { |
1869 | noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); |
1870 | |
1871 | // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while |
1872 | // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0). |
1873 | if (!pushReg && (compiler->info.compRetType == TYP_REF)) |
1874 | gcInfo.gcRegGCrefSetCur |= RBM_INTRET; |
1875 | |
1876 | // We need two temporary registers, to load the GS cookie values and compare them. We can't use |
1877 | // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be |
1878 | // callee-trash registers, which should not contain anything interesting at this point. |
1879 | // We don't have any IR node representing this check, so LSRA can't communicate registers |
1880 | // for us to use. |
1881 | |
1882 | regNumber regGSConst = REG_GSCOOKIE_TMP_0; |
1883 | regNumber regGSValue = REG_GSCOOKIE_TMP_1; |
1884 | |
1885 | if (compiler->gsGlobalSecurityCookieAddr == nullptr) |
1886 | { |
1887 | // load the GS cookie constant into a reg |
1888 | // |
1889 | genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); |
1890 | } |
1891 | else |
1892 | { |
1893 | // Ngen case - GS cookie constant needs to be accessed through an indirection. |
1894 | instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); |
1895 | getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0); |
1896 | } |
1897 | // Load this method's GS value from the stack frame |
1898 | getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); |
1899 | // Compare with the GC cookie constant |
1900 | getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue); |
1901 | |
1902 | BasicBlock* gsCheckBlk = genCreateTempLabel(); |
1903 | emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); |
1904 | inst_JMP(jmpEqual, gsCheckBlk); |
1905 | // regGSConst and regGSValue aren't needed anymore, we can use them for helper call |
1906 | genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst); |
1907 | genDefineTempLabel(gsCheckBlk); |
1908 | } |
1909 | #endif // _TARGET_ARMARCH_ |
1910 | |
1911 | /***************************************************************************** |
1912 | * |
1913 | * Generate an exit sequence for a return from a method (note: when compiling |
1914 | * for speed there might be multiple exit points). |
1915 | */ |
1916 | |
1917 | void CodeGen::genExitCode(BasicBlock* block) |
1918 | { |
1919 | /* Just wrote the first instruction of the epilog - inform debugger |
1920 | Note that this may result in a duplicate IPmapping entry, and |
1921 | that this is ok */ |
1922 | |
1923 | // For non-optimized debuggable code, there is only one epilog. |
1924 | genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true); |
1925 | |
1926 | bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); |
1927 | if (compiler->getNeedsGSSecurityCookie()) |
1928 | { |
1929 | genEmitGSCookieCheck(jmpEpilog); |
1930 | |
1931 | if (jmpEpilog) |
1932 | { |
1933 | // Dev10 642944 - |
1934 | // The GS cookie check created a temp label that has no live |
1935 | // incoming GC registers, we need to fix that |
1936 | |
1937 | unsigned varNum; |
1938 | LclVarDsc* varDsc; |
1939 | |
1940 | /* Figure out which register parameters hold pointers */ |
1941 | |
1942 | for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg; |
1943 | varNum++, varDsc++) |
1944 | { |
1945 | noway_assert(varDsc->lvIsParam); |
1946 | |
1947 | gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet()); |
1948 | } |
1949 | |
1950 | getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur; |
1951 | getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur; |
1952 | } |
1953 | } |
1954 | |
1955 | genReserveEpilog(block); |
1956 | } |
1957 | |
1958 | //------------------------------------------------------------------------ |
1959 | // genJumpToThrowHlpBlk: Generate code for an out-of-line exception. |
1960 | // |
1961 | // Notes: |
1962 | // For code that uses throw helper blocks, we share the helper blocks created by fgAddCodeRef(). |
1963 | // Otherwise, we generate the 'throw' inline. |
1964 | // |
1965 | // Arguments: |
1966 | // jumpKind - jump kind to generate; |
1967 | // codeKind - the special throw-helper kind; |
1968 | // failBlk - optional fail target block, if it is already known; |
1969 | // |
1970 | void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTree* failBlk) |
1971 | { |
1972 | bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks(); |
1973 | #if defined(UNIX_X86_ABI) && FEATURE_EH_FUNCLETS |
1974 | // Inline exception-throwing code in funclet to make it possible to unwind funclet frames. |
1975 | useThrowHlpBlk = useThrowHlpBlk && (compiler->funCurrentFunc()->funKind == FUNC_ROOT); |
1976 | #endif // UNIX_X86_ABI && FEATURE_EH_FUNCLETS |
1977 | |
1978 | if (useThrowHlpBlk) |
1979 | { |
1980 | // For code with throw helper blocks, find and use the helper block for |
1981 | // raising the exception. The block may be shared by other trees too. |
1982 | |
1983 | BasicBlock* excpRaisingBlock; |
1984 | |
1985 | if (failBlk != nullptr) |
1986 | { |
1987 | // We already know which block to jump to. Use that. |
1988 | assert(failBlk->gtOper == GT_LABEL); |
1989 | excpRaisingBlock = failBlk->gtLabel.gtLabBB; |
1990 | |
1991 | #ifdef DEBUG |
1992 | Compiler::AddCodeDsc* add = |
1993 | compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); |
1994 | assert(excpRaisingBlock == add->acdDstBlk); |
1995 | #if !FEATURE_FIXED_OUT_ARGS |
1996 | assert(add->acdStkLvlInit || isFramePointerUsed()); |
1997 | #endif // !FEATURE_FIXED_OUT_ARGS |
1998 | #endif // DEBUG |
1999 | } |
2000 | else |
2001 | { |
2002 | // Find the helper-block which raises the exception. |
2003 | Compiler::AddCodeDsc* add = |
2004 | compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB)); |
2005 | PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block" )); |
2006 | excpRaisingBlock = add->acdDstBlk; |
2007 | #if !FEATURE_FIXED_OUT_ARGS |
2008 | assert(add->acdStkLvlInit || isFramePointerUsed()); |
2009 | #endif // !FEATURE_FIXED_OUT_ARGS |
2010 | } |
2011 | |
2012 | noway_assert(excpRaisingBlock != nullptr); |
2013 | |
2014 | // Jump to the exception-throwing block on error. |
2015 | inst_JMP(jumpKind, excpRaisingBlock); |
2016 | } |
2017 | else |
2018 | { |
2019 | // The code to throw the exception will be generated inline, and |
2020 | // we will jump around it in the normal non-exception case. |
2021 | |
2022 | BasicBlock* tgtBlk = nullptr; |
2023 | emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind); |
2024 | if (reverseJumpKind != jumpKind) |
2025 | { |
2026 | tgtBlk = genCreateTempLabel(); |
2027 | inst_JMP(reverseJumpKind, tgtBlk); |
2028 | } |
2029 | |
2030 | genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN); |
2031 | |
2032 | // Define the spot for the normal non-exception case to jump to. |
2033 | if (tgtBlk != nullptr) |
2034 | { |
2035 | assert(reverseJumpKind != jumpKind); |
2036 | genDefineTempLabel(tgtBlk); |
2037 | } |
2038 | } |
2039 | } |
2040 | |
2041 | /***************************************************************************** |
2042 | * |
2043 | * The last operation done was generating code for "tree" and that would |
2044 | * have set the flags. Check if the operation caused an overflow. |
2045 | */ |
2046 | |
2047 | // inline |
2048 | void CodeGen::genCheckOverflow(GenTree* tree) |
2049 | { |
2050 | // Overflow-check should be asked for this tree |
2051 | noway_assert(tree->gtOverflow()); |
2052 | |
2053 | const var_types type = tree->TypeGet(); |
2054 | |
2055 | // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG) |
2056 | noway_assert(!varTypeIsSmall(type)); |
2057 | |
2058 | emitJumpKind jumpKind; |
2059 | |
2060 | #ifdef _TARGET_ARM64_ |
2061 | if (tree->OperGet() == GT_MUL) |
2062 | { |
2063 | jumpKind = EJ_ne; |
2064 | } |
2065 | else |
2066 | #endif |
2067 | { |
2068 | bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0); |
2069 | |
2070 | #if defined(_TARGET_XARCH_) |
2071 | |
2072 | jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo; |
2073 | |
2074 | #elif defined(_TARGET_ARMARCH_) |
2075 | |
2076 | jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs; |
2077 | |
2078 | if (jumpKind == EJ_lo) |
2079 | { |
2080 | if (tree->OperGet() != GT_SUB) |
2081 | { |
2082 | jumpKind = EJ_hs; |
2083 | } |
2084 | } |
2085 | |
2086 | #endif // defined(_TARGET_ARMARCH_) |
2087 | } |
2088 | |
2089 | // Jump to the block which will throw the expection |
2090 | |
2091 | genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW); |
2092 | } |
2093 | |
2094 | #if FEATURE_EH_FUNCLETS |
2095 | |
2096 | /***************************************************************************** |
2097 | * |
2098 | * Update the current funclet as needed by calling genUpdateCurrentFunclet(). |
2099 | * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet |
2100 | * is up-to-date. |
2101 | * |
2102 | */ |
2103 | |
2104 | void CodeGen::genUpdateCurrentFunclet(BasicBlock* block) |
2105 | { |
2106 | if (block->bbFlags & BBF_FUNCLET_BEG) |
2107 | { |
2108 | compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block)); |
2109 | if (compiler->funCurrentFunc()->funKind == FUNC_FILTER) |
2110 | { |
2111 | assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block); |
2112 | } |
2113 | else |
2114 | { |
2115 | // We shouldn't see FUNC_ROOT |
2116 | assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER); |
2117 | assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block); |
2118 | } |
2119 | } |
2120 | else |
2121 | { |
2122 | assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount); |
2123 | if (compiler->funCurrentFunc()->funKind == FUNC_FILTER) |
2124 | { |
2125 | assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block)); |
2126 | } |
2127 | else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT) |
2128 | { |
2129 | assert(!block->hasHndIndex()); |
2130 | } |
2131 | else |
2132 | { |
2133 | assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER); |
2134 | assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block)); |
2135 | } |
2136 | } |
2137 | } |
2138 | |
2139 | #if defined(_TARGET_ARM_) |
2140 | void CodeGen::genInsertNopForUnwinder(BasicBlock* block) |
2141 | { |
2142 | // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region, |
2143 | // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that |
2144 | // calls the funclet during non-exceptional control flow. |
2145 | if (block->bbFlags & BBF_FINALLY_TARGET) |
2146 | { |
2147 | assert(block->bbFlags & BBF_JMP_TARGET); |
2148 | |
2149 | #ifdef DEBUG |
2150 | if (compiler->verbose) |
2151 | { |
2152 | printf("\nEmitting finally target NOP predecessor for " FMT_BB "\n" , block->bbNum); |
2153 | } |
2154 | #endif |
2155 | // Create a label that we'll use for computing the start of an EH region, if this block is |
2156 | // at the beginning of such a region. If we used the existing bbEmitCookie as is for |
2157 | // determining the EH regions, then this NOP would end up outside of the region, if this |
2158 | // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP |
2159 | // would be executed, which we would prefer not to do. |
2160 | |
2161 | block->bbUnwindNopEmitCookie = |
2162 | getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); |
2163 | |
2164 | instGen(INS_nop); |
2165 | } |
2166 | } |
2167 | #endif |
2168 | |
2169 | #endif // FEATURE_EH_FUNCLETS |
2170 | |
2171 | /***************************************************************************** |
2172 | * |
2173 | * Generate code for the function. |
2174 | */ |
2175 | |
2176 | void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) |
2177 | { |
2178 | #ifdef DEBUG |
2179 | if (verbose) |
2180 | { |
2181 | printf("*************** In genGenerateCode()\n" ); |
2182 | compiler->fgDispBasicBlocks(compiler->verboseTrees); |
2183 | } |
2184 | #endif |
2185 | |
2186 | unsigned codeSize; |
2187 | unsigned prologSize; |
2188 | unsigned epilogSize; |
2189 | |
2190 | void* consPtr; |
2191 | |
2192 | #ifdef DEBUG |
2193 | genInterruptibleUsed = true; |
2194 | |
2195 | #if STACK_PROBES |
2196 | genNeedPrologStackProbe = false; |
2197 | #endif |
2198 | |
2199 | compiler->fgDebugCheckBBlist(); |
2200 | #endif // DEBUG |
2201 | |
2202 | /* This is the real thing */ |
2203 | |
2204 | genPrepForCompiler(); |
2205 | |
2206 | /* Prepare the emitter */ |
2207 | getEmitter()->Init(); |
2208 | #ifdef DEBUG |
2209 | VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler)); |
2210 | #endif |
2211 | |
2212 | #ifdef DEBUG |
2213 | if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg) |
2214 | { |
2215 | compiler->opts.disAsm = true; |
2216 | } |
2217 | |
2218 | if (compiler->opts.disAsm) |
2219 | { |
2220 | printf("; Assembly listing for method %s\n" , compiler->info.compFullName); |
2221 | |
2222 | printf("; Emitting " ); |
2223 | |
2224 | if (compiler->compCodeOpt() == Compiler::SMALL_CODE) |
2225 | { |
2226 | printf("SMALL_CODE" ); |
2227 | } |
2228 | else if (compiler->compCodeOpt() == Compiler::FAST_CODE) |
2229 | { |
2230 | printf("FAST_CODE" ); |
2231 | } |
2232 | else |
2233 | { |
2234 | printf("BLENDED_CODE" ); |
2235 | } |
2236 | |
2237 | printf(" for " ); |
2238 | |
2239 | if (compiler->info.genCPU == CPU_X86) |
2240 | { |
2241 | printf("generic X86 CPU" ); |
2242 | } |
2243 | else if (compiler->info.genCPU == CPU_X86_PENTIUM_4) |
2244 | { |
2245 | printf("Pentium 4" ); |
2246 | } |
2247 | else if (compiler->info.genCPU == CPU_X64) |
2248 | { |
2249 | if (compiler->canUseVexEncoding()) |
2250 | { |
2251 | printf("X64 CPU with AVX" ); |
2252 | } |
2253 | else |
2254 | { |
2255 | printf("X64 CPU with SSE2" ); |
2256 | } |
2257 | } |
2258 | else if (compiler->info.genCPU == CPU_ARM) |
2259 | { |
2260 | printf("generic ARM CPU" ); |
2261 | } |
2262 | else if (compiler->info.genCPU == CPU_ARM64) |
2263 | { |
2264 | printf("generic ARM64 CPU" ); |
2265 | } |
2266 | else |
2267 | { |
2268 | printf("unknown architecture" ); |
2269 | } |
2270 | |
2271 | #if defined(_TARGET_WINDOWS_) |
2272 | printf(" - Windows" ); |
2273 | #elif defined(_TARGET_UNIX_) |
2274 | printf(" - Unix" ); |
2275 | #endif |
2276 | |
2277 | printf("\n" ); |
2278 | |
2279 | if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0)) |
2280 | { |
2281 | printf("; Tier-0 compilation\n" ); |
2282 | } |
2283 | if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1)) |
2284 | { |
2285 | printf("; Tier-1 compilation\n" ); |
2286 | } |
2287 | |
2288 | if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT) |
2289 | { |
2290 | printf("; optimized code\n" ); |
2291 | } |
2292 | else if (compiler->opts.compDbgCode) |
2293 | { |
2294 | printf("; debuggable code\n" ); |
2295 | } |
2296 | else if (compiler->opts.MinOpts()) |
2297 | { |
2298 | printf("; compiler->opts.MinOpts() is true\n" ); |
2299 | } |
2300 | else |
2301 | { |
2302 | printf("; unknown optimization flags\n" ); |
2303 | } |
2304 | |
2305 | #if DOUBLE_ALIGN |
2306 | if (compiler->genDoubleAlign()) |
2307 | printf("; double-aligned frame\n" ); |
2308 | else |
2309 | #endif |
2310 | printf("; %s based frame\n" , isFramePointerUsed() ? STR_FPBASE : STR_SPBASE); |
2311 | |
2312 | if (genInterruptible) |
2313 | { |
2314 | printf("; fully interruptible\n" ); |
2315 | } |
2316 | else |
2317 | { |
2318 | printf("; partially interruptible\n" ); |
2319 | } |
2320 | |
2321 | if (compiler->fgHaveProfileData()) |
2322 | { |
2323 | printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n" , |
2324 | compiler->fgHaveValidEdgeWeights ? "valid" : "invalid" , compiler->fgCalledCount); |
2325 | } |
2326 | |
2327 | if (compiler->fgProfileData_ILSizeMismatch) |
2328 | { |
2329 | printf("; discarded IBC profile data due to mismatch in ILSize\n" ); |
2330 | } |
2331 | } |
2332 | #endif // DEBUG |
2333 | |
2334 | // We compute the final frame layout before code generation. This is because LSRA |
2335 | // has already computed exactly the maximum concurrent number of spill temps of each type that are |
2336 | // required during code generation. So, there is nothing left to estimate: we can be precise in the frame |
2337 | // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of |
2338 | // memory from the VM. |
2339 | |
2340 | genFinalizeFrame(); |
2341 | |
2342 | unsigned maxTmpSize = regSet.tmpGetTotalSize(); // This is precise after LSRA has pre-allocated the temps. |
2343 | |
2344 | getEmitter()->emitBegFN(isFramePointerUsed() |
2345 | #if defined(DEBUG) |
2346 | , |
2347 | (compiler->compCodeOpt() != Compiler::SMALL_CODE) && |
2348 | !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) |
2349 | #endif |
2350 | , |
2351 | maxTmpSize); |
2352 | |
2353 | /* Now generate code for the function */ |
2354 | genCodeForBBlist(); |
2355 | |
2356 | #ifdef DEBUG |
2357 | // After code generation, dump the frame layout again. It should be the same as before code generation, if code |
2358 | // generation hasn't touched it (it shouldn't!). |
2359 | if (verbose) |
2360 | { |
2361 | compiler->lvaTableDump(); |
2362 | } |
2363 | #endif // DEBUG |
2364 | |
2365 | /* We can now generate the function prolog and epilog */ |
2366 | |
2367 | genGeneratePrologsAndEpilogs(); |
2368 | |
2369 | /* Bind jump distances */ |
2370 | |
2371 | getEmitter()->emitJumpDistBind(); |
2372 | |
2373 | /* The code is now complete and final; it should not change after this. */ |
2374 | |
2375 | /* Compute the size of the code sections that we are going to ask the VM |
2376 | to allocate. Note that this might not be precisely the size of the |
2377 | code we emit, though it's fatal if we emit more code than the size we |
2378 | compute here. |
2379 | (Note: an example of a case where we emit less code would be useful.) |
2380 | */ |
2381 | |
2382 | getEmitter()->emitComputeCodeSizes(); |
2383 | |
2384 | #ifdef DEBUG |
2385 | |
2386 | // Code to test or stress our ability to run a fallback compile. |
2387 | // We trigger the fallback here, before asking the VM for any memory, |
2388 | // because if not, we will leak mem, as the current codebase can't free |
2389 | // the mem after the emitter asks the VM for it. As this is only a stress |
2390 | // mode, we only want the functionality, and don't care about the relative |
2391 | // ugliness of having the failure here. |
2392 | if (!compiler->jitFallbackCompile) |
2393 | { |
2394 | // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening, |
2395 | // especially that caused by enabling JIT stress. |
2396 | if (!JitConfig.JitNoForceFallback()) |
2397 | { |
2398 | if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5)) |
2399 | { |
2400 | NO_WAY_NOASSERT("Stress failure" ); |
2401 | } |
2402 | } |
2403 | } |
2404 | |
2405 | #endif // DEBUG |
2406 | |
2407 | /* We've finished collecting all the unwind information for the function. Now reserve |
2408 | space for it from the VM. |
2409 | */ |
2410 | |
2411 | compiler->unwindReserve(); |
2412 | |
2413 | #if DISPLAY_SIZES |
2414 | |
2415 | size_t dataSize = getEmitter()->emitDataSize(); |
2416 | |
2417 | #endif // DISPLAY_SIZES |
2418 | |
2419 | void* coldCodePtr; |
2420 | |
2421 | bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ? |
2422 | |
2423 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) |
2424 | trackedStackPtrsContig = false; |
2425 | #elif defined(_TARGET_ARM_) |
2426 | // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous |
2427 | trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded(); |
2428 | #else |
2429 | trackedStackPtrsContig = !compiler->opts.compDbgEnC; |
2430 | #endif |
2431 | |
2432 | #ifdef DEBUG |
2433 | /* We're done generating code for this function */ |
2434 | compiler->compCodeGenDone = true; |
2435 | #endif |
2436 | |
2437 | compiler->EndPhase(PHASE_GENERATE_CODE); |
2438 | |
2439 | codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap, |
2440 | (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount, |
2441 | &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr); |
2442 | |
2443 | compiler->EndPhase(PHASE_EMIT_CODE); |
2444 | |
2445 | #ifdef DEBUG |
2446 | if (compiler->opts.disAsm) |
2447 | { |
2448 | printf("; Total bytes of code %d, prolog size %d for method %s\n" , codeSize, prologSize, |
2449 | compiler->info.compFullName); |
2450 | printf("; ============================================================\n" ); |
2451 | printf("" ); // in our logic this causes a flush |
2452 | } |
2453 | |
2454 | if (verbose) |
2455 | { |
2456 | printf("*************** After end code gen, before unwindEmit()\n" ); |
2457 | getEmitter()->emitDispIGlist(true); |
2458 | } |
2459 | #endif |
2460 | |
2461 | #if EMIT_TRACK_STACK_DEPTH |
2462 | // Check our max stack level. Needed for fgAddCodeRef(). |
2463 | // We need to relax the assert as our estimation won't include code-gen |
2464 | // stack changes (which we know don't affect fgAddCodeRef()). |
2465 | // NOTE: after emitEndCodeGen (including here), emitMaxStackDepth is a |
2466 | // count of DWORD-sized arguments, NOT argument size in bytes. |
2467 | { |
2468 | unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments. |
2469 | compiler->compHndBBtabCount + // Return address for locally-called finallys |
2470 | genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc |
2471 | (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args |
2472 | #if defined(UNIX_X86_ABI) |
2473 | // Convert maxNestedAlignment to DWORD count before adding to maxAllowedStackDepth. |
2474 | assert(maxNestedAlignment % sizeof(int) == 0); |
2475 | maxAllowedStackDepth += maxNestedAlignment / sizeof(int); |
2476 | #endif |
2477 | noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth); |
2478 | } |
2479 | #endif // EMIT_TRACK_STACK_DEPTH |
2480 | |
2481 | *nativeSizeOfCode = codeSize; |
2482 | compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize; |
2483 | |
2484 | // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName); |
2485 | |
2486 | // Make sure that the x86 alignment and cache prefetch optimization rules |
2487 | // were obeyed. |
2488 | |
2489 | // Don't start a method in the last 7 bytes of a 16-byte alignment area |
2490 | // unless we are generating SMALL_CODE |
2491 | // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE)); |
2492 | |
2493 | /* Now that the code is issued, we can finalize and emit the unwind data */ |
2494 | |
2495 | compiler->unwindEmit(*codePtr, coldCodePtr); |
2496 | |
2497 | /* Finalize the line # tracking logic after we know the exact block sizes/offsets */ |
2498 | |
2499 | genIPmappingGen(); |
2500 | |
2501 | /* Finalize the Local Var info in terms of generated code */ |
2502 | |
2503 | genSetScopeInfo(); |
2504 | |
2505 | #ifdef LATE_DISASM |
2506 | unsigned finalHotCodeSize; |
2507 | unsigned finalColdCodeSize; |
2508 | if (compiler->fgFirstColdBlock != nullptr) |
2509 | { |
2510 | // We did some hot/cold splitting. The hot section is always padded out to the |
2511 | // size we thought it would be, but the cold section is not. |
2512 | assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize); |
2513 | assert(compiler->info.compTotalHotCodeSize > 0); |
2514 | assert(compiler->info.compTotalColdCodeSize > 0); |
2515 | finalHotCodeSize = compiler->info.compTotalHotCodeSize; |
2516 | finalColdCodeSize = codeSize - finalHotCodeSize; |
2517 | } |
2518 | else |
2519 | { |
2520 | // No hot/cold splitting |
2521 | assert(codeSize <= compiler->info.compTotalHotCodeSize); |
2522 | assert(compiler->info.compTotalHotCodeSize > 0); |
2523 | assert(compiler->info.compTotalColdCodeSize == 0); |
2524 | finalHotCodeSize = codeSize; |
2525 | finalColdCodeSize = 0; |
2526 | } |
2527 | getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize); |
2528 | #endif // LATE_DISASM |
2529 | |
2530 | /* Report any exception handlers to the VM */ |
2531 | |
2532 | genReportEH(); |
2533 | |
2534 | #ifdef JIT32_GCENCODER |
2535 | #ifdef DEBUG |
2536 | void* infoPtr = |
2537 | #endif // DEBUG |
2538 | #endif |
2539 | // Create and store the GC info for this method. |
2540 | genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr)); |
2541 | |
2542 | #ifdef DEBUG |
2543 | FILE* dmpf = jitstdout; |
2544 | |
2545 | compiler->opts.dmpHex = false; |
2546 | if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for" )) |
2547 | { |
2548 | FILE* codf; |
2549 | errno_t ec = fopen_s(&codf, "C:\\JIT.COD" , "at" ); // NOTE: file append mode |
2550 | if (ec != 0) |
2551 | { |
2552 | assert(codf); |
2553 | dmpf = codf; |
2554 | compiler->opts.dmpHex = true; |
2555 | } |
2556 | } |
2557 | if (compiler->opts.dmpHex) |
2558 | { |
2559 | size_t consSize = getEmitter()->emitDataSize(); |
2560 | size_t infoSize = compiler->compInfoBlkSize; |
2561 | |
2562 | fprintf(dmpf, "Generated code for %s:\n" , compiler->info.compFullName); |
2563 | fprintf(dmpf, "\n" ); |
2564 | |
2565 | if (codeSize) |
2566 | { |
2567 | fprintf(dmpf, " Code at %p [%04X bytes]\n" , dspPtr(*codePtr), codeSize); |
2568 | } |
2569 | if (consSize) |
2570 | { |
2571 | fprintf(dmpf, " Const at %p [%04X bytes]\n" , dspPtr(consPtr), consSize); |
2572 | } |
2573 | #ifdef JIT32_GCENCODER |
2574 | if (infoSize) |
2575 | fprintf(dmpf, " Info at %p [%04X bytes]\n" , dspPtr(infoPtr), infoSize); |
2576 | #endif // JIT32_GCENCODER |
2577 | |
2578 | fprintf(dmpf, "\n" ); |
2579 | |
2580 | if (codeSize) |
2581 | { |
2582 | hexDump(dmpf, "Code" , (BYTE*)*codePtr, codeSize); |
2583 | } |
2584 | if (consSize) |
2585 | { |
2586 | hexDump(dmpf, "Const" , (BYTE*)consPtr, consSize); |
2587 | } |
2588 | #ifdef JIT32_GCENCODER |
2589 | if (infoSize) |
2590 | hexDump(dmpf, "Info" , (BYTE*)infoPtr, infoSize); |
2591 | #endif // JIT32_GCENCODER |
2592 | |
2593 | fflush(dmpf); |
2594 | } |
2595 | |
2596 | if (dmpf != jitstdout) |
2597 | { |
2598 | fclose(dmpf); |
2599 | } |
2600 | |
2601 | #endif // DEBUG |
2602 | |
2603 | /* Tell the emitter that we're done with this function */ |
2604 | |
2605 | getEmitter()->emitEndFN(); |
2606 | |
2607 | /* Shut down the spill logic */ |
2608 | |
2609 | regSet.rsSpillDone(); |
2610 | |
2611 | /* Shut down the temp logic */ |
2612 | |
2613 | regSet.tmpDone(); |
2614 | |
2615 | #if DISPLAY_SIZES |
2616 | |
2617 | grossVMsize += compiler->info.compILCodeSize; |
2618 | totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize; |
2619 | grossNCsize += codeSize + dataSize; |
2620 | |
2621 | #endif // DISPLAY_SIZES |
2622 | |
2623 | compiler->EndPhase(PHASE_EMIT_GCEH); |
2624 | } |
2625 | |
2626 | /***************************************************************************** |
2627 | * |
2628 | * Report EH clauses to the VM |
2629 | */ |
2630 | |
2631 | void CodeGen::genReportEH() |
2632 | { |
2633 | if (compiler->compHndBBtabCount == 0) |
2634 | { |
2635 | return; |
2636 | } |
2637 | |
2638 | #ifdef DEBUG |
2639 | if (compiler->opts.dspEHTable) |
2640 | { |
2641 | printf("*************** EH table for %s\n" , compiler->info.compFullName); |
2642 | } |
2643 | #endif // DEBUG |
2644 | |
2645 | unsigned XTnum; |
2646 | EHblkDsc* HBtab; |
2647 | EHblkDsc* HBtabEnd; |
2648 | |
2649 | bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI); |
2650 | |
2651 | unsigned EHCount = compiler->compHndBBtabCount; |
2652 | |
2653 | #if FEATURE_EH_FUNCLETS |
2654 | // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the |
2655 | // VM. |
2656 | unsigned duplicateClauseCount = 0; |
2657 | unsigned enclosingTryIndex; |
2658 | |
2659 | // Duplicate clauses are not used by CoreRT ABI |
2660 | if (!isCoreRTABI) |
2661 | { |
2662 | for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++) |
2663 | { |
2664 | for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index, |
2665 | // ignoring 'mutual protect' trys |
2666 | enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX; |
2667 | enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex)) |
2668 | { |
2669 | ++duplicateClauseCount; |
2670 | } |
2671 | } |
2672 | EHCount += duplicateClauseCount; |
2673 | } |
2674 | |
2675 | #if FEATURE_EH_CALLFINALLY_THUNKS |
2676 | unsigned clonedFinallyCount = 0; |
2677 | |
2678 | // Duplicate clauses are not used by CoreRT ABI |
2679 | if (!isCoreRTABI) |
2680 | { |
2681 | // We don't keep track of how many cloned finally there are. So, go through and count. |
2682 | // We do a quick pass first through the EH table to see if there are any try/finally |
2683 | // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY. |
2684 | |
2685 | bool anyFinallys = false; |
2686 | for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount; |
2687 | HBtab < HBtabEnd; HBtab++) |
2688 | { |
2689 | if (HBtab->HasFinallyHandler()) |
2690 | { |
2691 | anyFinallys = true; |
2692 | break; |
2693 | } |
2694 | } |
2695 | if (anyFinallys) |
2696 | { |
2697 | for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) |
2698 | { |
2699 | if (block->bbJumpKind == BBJ_CALLFINALLY) |
2700 | { |
2701 | ++clonedFinallyCount; |
2702 | } |
2703 | } |
2704 | |
2705 | EHCount += clonedFinallyCount; |
2706 | } |
2707 | } |
2708 | #endif // FEATURE_EH_CALLFINALLY_THUNKS |
2709 | |
2710 | #endif // FEATURE_EH_FUNCLETS |
2711 | |
2712 | #ifdef DEBUG |
2713 | if (compiler->opts.dspEHTable) |
2714 | { |
2715 | #if FEATURE_EH_FUNCLETS |
2716 | #if FEATURE_EH_CALLFINALLY_THUNKS |
2717 | printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n" , |
2718 | compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount); |
2719 | assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount); |
2720 | #else // !FEATURE_EH_CALLFINALLY_THUNKS |
2721 | printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n" , |
2722 | compiler->compHndBBtabCount, duplicateClauseCount, EHCount); |
2723 | assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount); |
2724 | #endif // !FEATURE_EH_CALLFINALLY_THUNKS |
2725 | #else // !FEATURE_EH_FUNCLETS |
2726 | printf("%d EH table entries, %d total EH entries reported to VM\n" , compiler->compHndBBtabCount, EHCount); |
2727 | assert(compiler->compHndBBtabCount == EHCount); |
2728 | #endif // !FEATURE_EH_FUNCLETS |
2729 | } |
2730 | #endif // DEBUG |
2731 | |
2732 | // Tell the VM how many EH clauses to expect. |
2733 | compiler->eeSetEHcount(EHCount); |
2734 | |
2735 | XTnum = 0; // This is the index we pass to the VM |
2736 | |
2737 | for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount; |
2738 | HBtab < HBtabEnd; HBtab++) |
2739 | { |
2740 | UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp; |
2741 | |
2742 | tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg); |
2743 | hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg); |
2744 | |
2745 | tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize |
2746 | : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext); |
2747 | hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize |
2748 | : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext); |
2749 | |
2750 | if (HBtab->HasFilter()) |
2751 | { |
2752 | hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter); |
2753 | } |
2754 | else |
2755 | { |
2756 | hndTyp = HBtab->ebdTyp; |
2757 | } |
2758 | |
2759 | CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType); |
2760 | |
2761 | if (isCoreRTABI && (XTnum > 0)) |
2762 | { |
2763 | // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same |
2764 | // try block as the previous one. The runtime cannot reliably infer this information from |
2765 | // native code offsets because of different try blocks can have same offsets. Alternative |
2766 | // solution to this problem would be inserting extra nops to ensure that different try |
2767 | // blocks have different offsets. |
2768 | if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1)) |
2769 | { |
2770 | // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is |
2771 | // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in |
2772 | // IL as "try { try {} catch {} catch {} } finally {}". |
2773 | assert(HBtab->HasCatchHandler()); |
2774 | flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY); |
2775 | } |
2776 | } |
2777 | |
2778 | // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of |
2779 | // the fields aren't accurate. |
2780 | |
2781 | CORINFO_EH_CLAUSE clause; |
2782 | clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */ |
2783 | clause.Flags = flags; |
2784 | clause.TryOffset = tryBeg; |
2785 | clause.TryLength = tryEnd; |
2786 | clause.HandlerOffset = hndBeg; |
2787 | clause.HandlerLength = hndEnd; |
2788 | |
2789 | assert(XTnum < EHCount); |
2790 | |
2791 | // Tell the VM about this EH clause. |
2792 | compiler->eeSetEHinfo(XTnum, &clause); |
2793 | |
2794 | ++XTnum; |
2795 | } |
2796 | |
2797 | #if FEATURE_EH_FUNCLETS |
2798 | // Now output duplicated clauses. |
2799 | // |
2800 | // If a funclet has been created by moving a handler out of a try region that it was originally nested |
2801 | // within, then we need to report a "duplicate" clause representing the fact that an exception in that |
2802 | // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region |
2803 | // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is |
2804 | // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler |
2805 | // region as the enclosing try region's handler region. This is the sense in which it is duplicated: |
2806 | // there is now a "duplicate" clause with the same handler region as another, but a different 'try' |
2807 | // region. |
2808 | // |
2809 | // For example, consider this (capital letters represent an unknown code sequence, numbers identify a |
2810 | // try or handler region): |
2811 | // |
2812 | // A |
2813 | // try (1) { |
2814 | // B |
2815 | // try (2) { |
2816 | // C |
2817 | // } catch (3) { |
2818 | // D |
2819 | // } catch (4) { |
2820 | // E |
2821 | // } |
2822 | // F |
2823 | // } catch (5) { |
2824 | // G |
2825 | // } |
2826 | // H |
2827 | // |
2828 | // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected |
2829 | // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D". |
2830 | // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4) |
2831 | // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again |
2832 | // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that. |
2833 | // The code "D" and "E" won't be contiguous with the protected region for try (1) (which |
2834 | // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor |
2835 | // representing try (1) protecting the new funclets catch (3) and (4). |
2836 | // The code will be generated as follows: |
2837 | // |
2838 | // ABCFH // "main" code |
2839 | // D // funclet |
2840 | // E // funclet |
2841 | // G // funclet |
2842 | // |
2843 | // The EH regions are: |
2844 | // |
2845 | // C -> D |
2846 | // C -> E |
2847 | // BCF -> G |
2848 | // D -> G // "duplicate" clause |
2849 | // E -> G // "duplicate" clause |
2850 | // |
2851 | // Note that we actually need to generate one of these additional "duplicate" clauses for every |
2852 | // region the funclet is nested in. Take this example: |
2853 | // |
2854 | // A |
2855 | // try (1) { |
2856 | // B |
2857 | // try (2,3) { |
2858 | // C |
2859 | // try (4) { |
2860 | // D |
2861 | // try (5,6) { |
2862 | // E |
2863 | // } catch { |
2864 | // F |
2865 | // } catch { |
2866 | // G |
2867 | // } |
2868 | // H |
2869 | // } catch { |
2870 | // I |
2871 | // } |
2872 | // J |
2873 | // } catch { |
2874 | // K |
2875 | // } catch { |
2876 | // L |
2877 | // } |
2878 | // M |
2879 | // } catch { |
2880 | // N |
2881 | // } |
2882 | // O |
2883 | // |
2884 | // When we pull out funclets, we get the following generated code: |
2885 | // |
2886 | // ABCDEHJMO // "main" function |
2887 | // F // funclet |
2888 | // G // funclet |
2889 | // I // funclet |
2890 | // K // funclet |
2891 | // L // funclet |
2892 | // N // funclet |
2893 | // |
2894 | // And the EH regions we report to the VM are (in order; main clauses |
2895 | // first in most-to-least nested order, funclets ("duplicated clauses") |
2896 | // last, in most-to-least nested) are: |
2897 | // |
2898 | // E -> F |
2899 | // E -> G |
2900 | // DEH -> I |
2901 | // CDEHJ -> K |
2902 | // CDEHJ -> L |
2903 | // BCDEHJM -> N |
2904 | // F -> I // funclet clause #1 for F |
2905 | // F -> K // funclet clause #2 for F |
2906 | // F -> L // funclet clause #3 for F |
2907 | // F -> N // funclet clause #4 for F |
2908 | // G -> I // funclet clause #1 for G |
2909 | // G -> K // funclet clause #2 for G |
2910 | // G -> L // funclet clause #3 for G |
2911 | // G -> N // funclet clause #4 for G |
2912 | // I -> K // funclet clause #1 for I |
2913 | // I -> L // funclet clause #2 for I |
2914 | // I -> N // funclet clause #3 for I |
2915 | // K -> N // funclet clause #1 for K |
2916 | // L -> N // funclet clause #1 for L |
2917 | // |
2918 | // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM. |
2919 | // Note that due to the nature of 'mutually protect' clauses, it would be incorrect |
2920 | // to add a clause "F -> G" because F is NOT protected by G, but we still have |
2921 | // both "F -> K" and "F -> L" because F IS protected by both of those handlers. |
2922 | // |
2923 | // The overall ordering of the clauses is still the same most-to-least nesting |
2924 | // after front-to-back start offset. Because we place the funclets at the end |
2925 | // these new clauses should also go at the end by this ordering. |
2926 | // |
2927 | |
2928 | if (duplicateClauseCount > 0) |
2929 | { |
2930 | unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported? |
2931 | unsigned XTnum2; |
2932 | for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++) |
2933 | { |
2934 | unsigned enclosingTryIndex; |
2935 | |
2936 | EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2); |
2937 | |
2938 | for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index, |
2939 | // ignoring 'mutual protect' trys |
2940 | enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX; |
2941 | enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex)) |
2942 | { |
2943 | // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet |
2944 | // that will have the enclosing try protecting the funclet. |
2945 | |
2946 | noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a |
2947 | // greater EH table index |
2948 | |
2949 | EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex); |
2950 | |
2951 | // The try region is the handler of the funclet. Note that for filters, we don't protect the |
2952 | // filter region, only the filter handler region. This is because exceptions in filters never |
2953 | // escape; the VM swallows them. |
2954 | |
2955 | BasicBlock* bbTryBeg = fletTab->ebdHndBeg; |
2956 | BasicBlock* bbTryLast = fletTab->ebdHndLast; |
2957 | |
2958 | BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try |
2959 | BasicBlock* bbHndLast = encTab->ebdHndLast; |
2960 | |
2961 | UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp; |
2962 | |
2963 | tryBeg = compiler->ehCodeOffset(bbTryBeg); |
2964 | hndBeg = compiler->ehCodeOffset(bbHndBeg); |
2965 | |
2966 | tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize |
2967 | : compiler->ehCodeOffset(bbTryLast->bbNext); |
2968 | hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize |
2969 | : compiler->ehCodeOffset(bbHndLast->bbNext); |
2970 | |
2971 | if (encTab->HasFilter()) |
2972 | { |
2973 | hndTyp = compiler->ehCodeOffset(encTab->ebdFilter); |
2974 | } |
2975 | else |
2976 | { |
2977 | hndTyp = encTab->ebdTyp; |
2978 | } |
2979 | |
2980 | CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType); |
2981 | |
2982 | // Tell the VM this is an extra clause caused by moving funclets out of line. |
2983 | flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE); |
2984 | |
2985 | // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of |
2986 | // the fields aren't really accurate. For example, we set "TryLength" to the offset of the |
2987 | // instruction immediately after the 'try' body. So, it really could be more accurately named |
2988 | // "TryEndOffset". |
2989 | |
2990 | CORINFO_EH_CLAUSE clause; |
2991 | clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */ |
2992 | clause.Flags = flags; |
2993 | clause.TryOffset = tryBeg; |
2994 | clause.TryLength = tryEnd; |
2995 | clause.HandlerOffset = hndBeg; |
2996 | clause.HandlerLength = hndEnd; |
2997 | |
2998 | assert(XTnum < EHCount); |
2999 | |
3000 | // Tell the VM about this EH clause (a duplicated clause). |
3001 | compiler->eeSetEHinfo(XTnum, &clause); |
3002 | |
3003 | ++XTnum; |
3004 | ++reportedDuplicateClauseCount; |
3005 | |
3006 | #ifndef DEBUG |
3007 | if (duplicateClauseCount == reportedDuplicateClauseCount) |
3008 | { |
3009 | break; // we've reported all of them; no need to continue looking |
3010 | } |
3011 | #endif // !DEBUG |
3012 | |
3013 | } // for each 'true' enclosing 'try' |
3014 | } // for each EH table entry |
3015 | |
3016 | assert(duplicateClauseCount == reportedDuplicateClauseCount); |
3017 | } // if (duplicateClauseCount > 0) |
3018 | |
3019 | #if FEATURE_EH_CALLFINALLY_THUNKS |
3020 | if (clonedFinallyCount > 0) |
3021 | { |
3022 | unsigned reportedClonedFinallyCount = 0; |
3023 | for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) |
3024 | { |
3025 | if (block->bbJumpKind == BBJ_CALLFINALLY) |
3026 | { |
3027 | UNATIVE_OFFSET hndBeg, hndEnd; |
3028 | |
3029 | hndBeg = compiler->ehCodeOffset(block); |
3030 | |
3031 | // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be |
3032 | // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through. |
3033 | BasicBlock* bbLabel = block->bbNext; |
3034 | if (block->isBBCallAlwaysPair()) |
3035 | { |
3036 | bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS |
3037 | } |
3038 | if (bbLabel == nullptr) |
3039 | { |
3040 | hndEnd = compiler->info.compNativeCodeSize; |
3041 | } |
3042 | else |
3043 | { |
3044 | assert(bbLabel->bbEmitCookie != nullptr); |
3045 | hndEnd = compiler->ehCodeOffset(bbLabel); |
3046 | } |
3047 | |
3048 | CORINFO_EH_CLAUSE clause; |
3049 | clause.ClassToken = 0; // unused |
3050 | clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE); |
3051 | clause.TryOffset = hndBeg; |
3052 | clause.TryLength = hndBeg; |
3053 | clause.HandlerOffset = hndBeg; |
3054 | clause.HandlerLength = hndEnd; |
3055 | |
3056 | assert(XTnum < EHCount); |
3057 | |
3058 | // Tell the VM about this EH clause (a cloned finally clause). |
3059 | compiler->eeSetEHinfo(XTnum, &clause); |
3060 | |
3061 | ++XTnum; |
3062 | ++reportedClonedFinallyCount; |
3063 | |
3064 | #ifndef DEBUG |
3065 | if (clonedFinallyCount == reportedClonedFinallyCount) |
3066 | { |
3067 | break; // we're done; no need to keep looking |
3068 | } |
3069 | #endif // !DEBUG |
3070 | } // block is BBJ_CALLFINALLY |
3071 | } // for each block |
3072 | |
3073 | assert(clonedFinallyCount == reportedClonedFinallyCount); |
3074 | } // if (clonedFinallyCount > 0) |
3075 | #endif // FEATURE_EH_CALLFINALLY_THUNKS |
3076 | |
3077 | #endif // FEATURE_EH_FUNCLETS |
3078 | |
3079 | assert(XTnum == EHCount); |
3080 | } |
3081 | |
3082 | //---------------------------------------------------------------------- |
3083 | // genUseOptimizedWriteBarriers: Determine if an optimized write barrier |
3084 | // helper should be used. |
3085 | // |
3086 | // Arguments: |
3087 | // wbf - The WriteBarrierForm of the write (GT_STOREIND) that is happening. |
3088 | // |
3089 | // Return Value: |
3090 | // true if an optimized write barrier helper should be used, false otherwise. |
3091 | // Note: only x86 implements register-specific source optimized write |
3092 | // barriers currently. |
3093 | // |
3094 | bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf) |
3095 | { |
3096 | #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS |
3097 | #ifdef DEBUG |
3098 | return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method. |
3099 | #else |
3100 | return true; |
3101 | #endif |
3102 | #else |
3103 | return false; |
3104 | #endif |
3105 | } |
3106 | |
3107 | //---------------------------------------------------------------------- |
3108 | // genUseOptimizedWriteBarriers: Determine if an optimized write barrier |
3109 | // helper should be used. |
3110 | // |
3111 | // This has the same functionality as the version of |
3112 | // genUseOptimizedWriteBarriers that takes a WriteBarrierForm, but avoids |
3113 | // determining what the required write barrier form is, if possible. |
3114 | // |
3115 | // Arguments: |
3116 | // tgt - target tree of write (e.g., GT_STOREIND) |
3117 | // assignVal - tree with value to write |
3118 | // |
3119 | // Return Value: |
3120 | // true if an optimized write barrier helper should be used, false otherwise. |
3121 | // Note: only x86 implements register-specific source optimized write |
3122 | // barriers currently. |
3123 | // |
3124 | bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal) |
3125 | { |
3126 | #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS |
3127 | #ifdef DEBUG |
3128 | GCInfo::WriteBarrierForm wbf = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal); |
3129 | return (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method. |
3130 | #else |
3131 | return true; |
3132 | #endif |
3133 | #else |
3134 | return false; |
3135 | #endif |
3136 | } |
3137 | |
3138 | //---------------------------------------------------------------------- |
3139 | // genWriteBarrierHelperForWriteBarrierForm: Given a write node requiring a write |
3140 | // barrier, and the write barrier form required, determine the helper to call. |
3141 | // |
3142 | // Arguments: |
3143 | // tgt - target tree of write (e.g., GT_STOREIND) |
3144 | // wbf - already computed write barrier form to use |
3145 | // |
3146 | // Return Value: |
3147 | // Write barrier helper to use. |
3148 | // |
3149 | // Note: do not call this function to get an optimized write barrier helper (e.g., |
3150 | // for x86). |
3151 | // |
3152 | CorInfoHelpFunc CodeGenInterface::genWriteBarrierHelperForWriteBarrierForm(GenTree* tgt, GCInfo::WriteBarrierForm wbf) |
3153 | { |
3154 | noway_assert(tgt->gtOper == GT_STOREIND); |
3155 | |
3156 | CorInfoHelpFunc helper = CORINFO_HELP_ASSIGN_REF; |
3157 | |
3158 | #ifdef DEBUG |
3159 | if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) |
3160 | { |
3161 | helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP; |
3162 | } |
3163 | else |
3164 | #endif |
3165 | if (tgt->gtOper != GT_CLS_VAR) |
3166 | { |
3167 | if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below. |
3168 | { |
3169 | if (tgt->gtFlags & GTF_IND_TGTANYWHERE) |
3170 | { |
3171 | helper = CORINFO_HELP_CHECKED_ASSIGN_REF; |
3172 | } |
3173 | else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL) |
3174 | { |
3175 | helper = CORINFO_HELP_CHECKED_ASSIGN_REF; |
3176 | } |
3177 | } |
3178 | } |
3179 | assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) || |
3180 | ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) && |
3181 | (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) || |
3182 | ((helper == CORINFO_HELP_ASSIGN_REF) && |
3183 | (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown))); |
3184 | |
3185 | return helper; |
3186 | } |
3187 | |
3188 | //---------------------------------------------------------------------- |
3189 | // genGCWriteBarrier: Generate a write barrier for a node. |
3190 | // |
3191 | // Arguments: |
3192 | // tgt - target tree of write (e.g., GT_STOREIND) |
3193 | // wbf - already computed write barrier form to use |
3194 | // |
3195 | void CodeGen::genGCWriteBarrier(GenTree* tgt, GCInfo::WriteBarrierForm wbf) |
3196 | { |
3197 | CorInfoHelpFunc helper = genWriteBarrierHelperForWriteBarrierForm(tgt, wbf); |
3198 | |
3199 | #ifdef FEATURE_COUNT_GC_WRITE_BARRIERS |
3200 | // We classify the "tgt" trees as follows: |
3201 | // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"): |
3202 | // IND [-> ADDR -> IND] -> { GT_LCL_VAR, ADD({GT_LCL_VAR}, X), ADD(X, (GT_LCL_VAR)) } |
3203 | // then let "v" be the GT_LCL_VAR. |
3204 | // * If "v" is the return buffer argument, classify as CWBKind_RetBuf. |
3205 | // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg. |
3206 | // * Otherwise, classify as CWBKind_OtherByRefLocal. |
3207 | // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal. |
3208 | // Otherwise, classify as CWBKind_Unclassified. |
3209 | |
3210 | CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified; |
3211 | if (tgt->gtOper == GT_IND) |
3212 | { |
3213 | GenTree* lcl = NULL; |
3214 | |
3215 | GenTree* indArg = tgt->gtOp.gtOp1; |
3216 | if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND) |
3217 | { |
3218 | indArg = indArg->gtOp.gtOp1->gtOp.gtOp1; |
3219 | } |
3220 | if (indArg->gtOper == GT_LCL_VAR) |
3221 | { |
3222 | lcl = indArg; |
3223 | } |
3224 | else if (indArg->gtOper == GT_ADD) |
3225 | { |
3226 | if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR) |
3227 | { |
3228 | lcl = indArg->gtOp.gtOp1; |
3229 | } |
3230 | else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR) |
3231 | { |
3232 | lcl = indArg->gtOp.gtOp2; |
3233 | } |
3234 | } |
3235 | if (lcl != NULL) |
3236 | { |
3237 | wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable. |
3238 | unsigned lclNum = lcl->AsLclVar()->GetLclNum(); |
3239 | if (lclNum == compiler->info.compRetBuffArg) |
3240 | { |
3241 | wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit. |
3242 | } |
3243 | else |
3244 | { |
3245 | LclVarDsc* varDsc = &compiler->lvaTable[lclNum]; |
3246 | if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF) |
3247 | { |
3248 | wbKind = CWBKind_ByRefArg; // Out (or in/out) arg |
3249 | } |
3250 | } |
3251 | } |
3252 | else |
3253 | { |
3254 | // We should have eliminated the barrier for this case. |
3255 | assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR)); |
3256 | } |
3257 | } |
3258 | |
3259 | if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF) |
3260 | { |
3261 | #if 0 |
3262 | #ifdef DEBUG |
3263 | // Enable this to sample the unclassified trees. |
3264 | static int unclassifiedBarrierSite = 0; |
3265 | if (wbKind == CWBKind_Unclassified) |
3266 | { |
3267 | unclassifiedBarrierSite++; |
3268 | printf("unclassifiedBarrierSite = %d:\n" , unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf("" ); printf("\n" ); |
3269 | } |
3270 | #endif // DEBUG |
3271 | #endif // 0 |
3272 | AddStackLevel(4); |
3273 | inst_IV(INS_push, wbKind); |
3274 | genEmitHelperCall(helper, |
3275 | 4, // argSize |
3276 | EA_PTRSIZE); // retSize |
3277 | SubtractStackLevel(4); |
3278 | } |
3279 | else |
3280 | { |
3281 | genEmitHelperCall(helper, |
3282 | 0, // argSize |
3283 | EA_PTRSIZE); // retSize |
3284 | } |
3285 | |
3286 | #else // !FEATURE_COUNT_GC_WRITE_BARRIERS |
3287 | genEmitHelperCall(helper, |
3288 | 0, // argSize |
3289 | EA_PTRSIZE); // retSize |
3290 | #endif // !FEATURE_COUNT_GC_WRITE_BARRIERS |
3291 | } |
3292 | |
3293 | /* |
3294 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
3295 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
3296 | XX XX |
3297 | XX Prolog / Epilog XX |
3298 | XX XX |
3299 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
3300 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
3301 | */ |
3302 | |
3303 | /***************************************************************************** |
3304 | * |
3305 | * Generates code for moving incoming register arguments to their |
3306 | * assigned location, in the function prolog. |
3307 | */ |
3308 | |
3309 | #ifdef _PREFAST_ |
3310 | #pragma warning(push) |
3311 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
3312 | #endif |
3313 | void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState) |
3314 | { |
3315 | #ifdef DEBUG |
3316 | if (verbose) |
3317 | { |
3318 | printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n" , regState->rsIsFloat ? "float" : "int" ); |
3319 | } |
3320 | #endif |
3321 | |
3322 | unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg) |
3323 | unsigned argNum; // current argNum, always in [0..argMax-1] |
3324 | unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64) |
3325 | unsigned regArgNum; // index into the regArgTab[] table |
3326 | regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; |
3327 | bool doingFloat = regState->rsIsFloat; |
3328 | |
3329 | // We should be generating the prolog block when we are called |
3330 | assert(compiler->compGeneratingProlog); |
3331 | |
3332 | // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called. |
3333 | noway_assert(regArgMaskLive != 0); |
3334 | |
3335 | // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2 |
3336 | // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8 |
3337 | // |
3338 | // The regArgTab can always have unused entries, |
3339 | // for example if an architecture always increments the arg register number but uses either |
3340 | // an integer register or a floating point register to hold the next argument |
3341 | // then with a mix of float and integer args you could have: |
3342 | // |
3343 | // sampleMethod(int i, float x, int j, float y, int k, float z); |
3344 | // r0, r2 and r4 as valid integer arguments with argMax as 5 |
3345 | // and f1, f3 and f5 and valid floating point arguments with argMax as 6 |
3346 | // The first one is doingFloat==false and the second one is doingFloat==true |
3347 | // |
3348 | // If a fixed return buffer (in r8) was also present then the first one would become: |
3349 | // r0, r2, r4 and r8 as valid integer arguments with argMax as 9 |
3350 | // |
3351 | |
3352 | argMax = regState->rsCalleeRegArgCount; |
3353 | fixedRetBufIndex = (unsigned)-1; // Invalid value |
3354 | |
3355 | // If necessary we will select a correct xtraReg for circular floating point args later. |
3356 | if (doingFloat) |
3357 | { |
3358 | xtraReg = REG_NA; |
3359 | noway_assert(argMax <= MAX_FLOAT_REG_ARG); |
3360 | } |
3361 | else // we are doing the integer registers |
3362 | { |
3363 | noway_assert(argMax <= MAX_REG_ARG); |
3364 | if (hasFixedRetBuffReg()) |
3365 | { |
3366 | fixedRetBufIndex = theFixedRetBuffArgNum(); |
3367 | // We have an additional integer register argument when hasFixedRetBuffReg() is true |
3368 | argMax = fixedRetBufIndex + 1; |
3369 | assert(argMax == (MAX_REG_ARG + 1)); |
3370 | } |
3371 | } |
3372 | |
3373 | // |
3374 | // Construct a table with the register arguments, for detecting circular and |
3375 | // non-circular dependencies between the register arguments. A dependency is when |
3376 | // an argument register Rn needs to be moved to register Rm that is also an argument |
3377 | // register. The table is constructed in the order the arguments are passed in |
3378 | // registers: the first register argument is in regArgTab[0], the second in |
3379 | // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting |
3380 | // at an even index. The regArgTab is indexed from 0 to argMax - 1. |
3381 | // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg()) |
3382 | // we have increased the allocated size of the regArgTab[] by one. |
3383 | // |
3384 | struct regArgElem |
3385 | { |
3386 | unsigned varNum; // index into compiler->lvaTable[] for this register argument |
3387 | #if defined(UNIX_AMD64_ABI) |
3388 | var_types type; // the Jit type of this regArgTab entry |
3389 | #endif // defined(UNIX_AMD64_ABI) |
3390 | unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. |
3391 | // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to |
3392 | // argument register number 'x'. Only used when circular = true. |
3393 | char slot; // 0 means the register is not used for a register argument |
3394 | // 1 means the first part of a register argument |
3395 | // 2, 3 or 4 means the second,third or fourth part of a multireg argument |
3396 | bool stackArg; // true if the argument gets homed to the stack |
3397 | bool processed; // true after we've processed the argument (and it is in its final location) |
3398 | bool circular; // true if this register participates in a circular dependency loop. |
3399 | |
3400 | #ifdef UNIX_AMD64_ABI |
3401 | |
3402 | // For UNIX AMD64 struct passing, the type of the register argument slot can differ from |
3403 | // the type of the lclVar in ways that are not ascertainable from lvType. |
3404 | // So, for that case we retain the type of the register in the regArgTab. |
3405 | |
3406 | var_types getRegType(Compiler* compiler) |
3407 | { |
3408 | return type; // UNIX_AMD64 implementation |
3409 | } |
3410 | |
3411 | #else // !UNIX_AMD64_ABI |
3412 | |
3413 | // In other cases, we simply use the type of the lclVar to determine the type of the register. |
3414 | var_types getRegType(Compiler* compiler) |
3415 | { |
3416 | const LclVarDsc& varDsc = compiler->lvaTable[varNum]; |
3417 | // Check if this is an HFA register arg and return the HFA type |
3418 | if (varDsc.lvIsHfaRegArg()) |
3419 | { |
3420 | #if defined(_TARGET_WINDOWS_) |
3421 | // Cannot have hfa types on windows arm targets |
3422 | // in vararg methods. |
3423 | assert(!compiler->info.compIsVarArgs); |
3424 | #endif // defined(_TARGET_WINDOWS_) |
3425 | return varDsc.GetHfaType(); |
3426 | } |
3427 | return compiler->mangleVarArgsType(varDsc.lvType); |
3428 | } |
3429 | |
3430 | #endif // !UNIX_AMD64_ABI |
3431 | } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {}; |
3432 | |
3433 | unsigned varNum; |
3434 | LclVarDsc* varDsc; |
3435 | |
3436 | for (varNum = 0; varNum < compiler->lvaCount; ++varNum) |
3437 | { |
3438 | varDsc = compiler->lvaTable + varNum; |
3439 | |
3440 | // Is this variable a register arg? |
3441 | if (!varDsc->lvIsParam) |
3442 | { |
3443 | continue; |
3444 | } |
3445 | |
3446 | if (!varDsc->lvIsRegArg) |
3447 | { |
3448 | continue; |
3449 | } |
3450 | |
3451 | // When we have a promoted struct we have two possible LclVars that can represent the incoming argument |
3452 | // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField. |
3453 | // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise |
3454 | // use the the original TYP_STRUCT argument. |
3455 | // |
3456 | if (varDsc->lvPromoted || varDsc->lvIsStructField) |
3457 | { |
3458 | LclVarDsc* parentVarDsc = varDsc; |
3459 | if (varDsc->lvIsStructField) |
3460 | { |
3461 | assert(!varDsc->lvPromoted); |
3462 | parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl]; |
3463 | } |
3464 | |
3465 | Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc); |
3466 | |
3467 | if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) |
3468 | { |
3469 | noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here |
3470 | |
3471 | // For register arguments that are independent promoted structs we put the promoted field varNum in the |
3472 | // regArgTab[] |
3473 | if (varDsc->lvPromoted) |
3474 | { |
3475 | continue; |
3476 | } |
3477 | } |
3478 | else |
3479 | { |
3480 | // For register arguments that are not independent promoted structs we put the parent struct varNum in |
3481 | // the regArgTab[] |
3482 | if (varDsc->lvIsStructField) |
3483 | { |
3484 | continue; |
3485 | } |
3486 | } |
3487 | } |
3488 | |
3489 | var_types regType = compiler->mangleVarArgsType(varDsc->TypeGet()); |
3490 | // Change regType to the HFA type when we have a HFA argument |
3491 | if (varDsc->lvIsHfaRegArg()) |
3492 | { |
3493 | #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) |
3494 | if (compiler->info.compIsVarArgs) |
3495 | { |
3496 | assert(!"Illegal incoming HFA arg encountered in Vararg method." ); |
3497 | } |
3498 | #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) |
3499 | regType = varDsc->GetHfaType(); |
3500 | } |
3501 | |
3502 | #if defined(UNIX_AMD64_ABI) |
3503 | if (!varTypeIsStruct(regType)) |
3504 | #endif // defined(UNIX_AMD64_ABI) |
3505 | { |
3506 | // A struct might be passed partially in XMM register for System V calls. |
3507 | // So a single arg might use both register files. |
3508 | if (isFloatRegType(regType) != doingFloat) |
3509 | { |
3510 | continue; |
3511 | } |
3512 | } |
3513 | |
3514 | int slots = 0; |
3515 | |
3516 | #if defined(UNIX_AMD64_ABI) |
3517 | if (varTypeIsStruct(varDsc)) |
3518 | { |
3519 | CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); |
3520 | assert(typeHnd != nullptr); |
3521 | SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; |
3522 | compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); |
3523 | if (!structDesc.passedInRegisters) |
3524 | { |
3525 | // The var is not passed in registers. |
3526 | continue; |
3527 | } |
3528 | |
3529 | unsigned firstRegSlot = 0; |
3530 | for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++) |
3531 | { |
3532 | regNumber regNum = varDsc->lvRegNumForSlot(slotCounter); |
3533 | var_types regType; |
3534 | |
3535 | #ifdef FEATURE_SIMD |
3536 | // Assumption 1: |
3537 | // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off |
3538 | // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for |
3539 | // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should |
3540 | // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack. |
3541 | // |
3542 | // Assumption 2: |
3543 | // RyuJit backend is making another implicit assumption that Vector3 type args when passed in |
3544 | // registers or on stack, the upper most 4-bytes will be zero. |
3545 | // |
3546 | // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee |
3547 | // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is |
3548 | // invalid. |
3549 | // |
3550 | // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12 |
3551 | // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and |
3552 | // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason, |
3553 | // there is no need to clear upper 4-bytes of Vector3 type args. |
3554 | // |
3555 | // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16. |
3556 | // Vector3 return values are returned two return registers and Caller assembles them into a |
3557 | // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3 |
3558 | // type args in prolog and Vector3 type return value of a call |
3559 | |
3560 | if (varDsc->lvType == TYP_SIMD12) |
3561 | { |
3562 | regType = TYP_DOUBLE; |
3563 | } |
3564 | else |
3565 | #endif |
3566 | { |
3567 | regType = compiler->GetEightByteType(structDesc, slotCounter); |
3568 | } |
3569 | |
3570 | regArgNum = genMapRegNumToRegArgNum(regNum, regType); |
3571 | |
3572 | if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) || |
3573 | (doingFloat && (structDesc.IsSseSlot(slotCounter)))) |
3574 | { |
3575 | // Store the reg for the first slot. |
3576 | if (slots == 0) |
3577 | { |
3578 | firstRegSlot = regArgNum; |
3579 | } |
3580 | |
3581 | // Bingo - add it to our table |
3582 | noway_assert(regArgNum < argMax); |
3583 | noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better |
3584 | // not be multiple vars representing this argument |
3585 | // register) |
3586 | regArgTab[regArgNum].varNum = varNum; |
3587 | regArgTab[regArgNum].slot = (char)(slotCounter + 1); |
3588 | regArgTab[regArgNum].type = regType; |
3589 | slots++; |
3590 | } |
3591 | } |
3592 | |
3593 | if (slots == 0) |
3594 | { |
3595 | continue; // Nothing to do for this regState set. |
3596 | } |
3597 | |
3598 | regArgNum = firstRegSlot; |
3599 | } |
3600 | else |
3601 | #endif // defined(UNIX_AMD64_ABI) |
3602 | { |
3603 | // Bingo - add it to our table |
3604 | regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType); |
3605 | |
3606 | noway_assert(regArgNum < argMax); |
3607 | // We better not have added it already (there better not be multiple vars representing this argument |
3608 | // register) |
3609 | noway_assert(regArgTab[regArgNum].slot == 0); |
3610 | |
3611 | #if defined(UNIX_AMD64_ABI) |
3612 | // Set the register type. |
3613 | regArgTab[regArgNum].type = regType; |
3614 | #endif // defined(UNIX_AMD64_ABI) |
3615 | |
3616 | regArgTab[regArgNum].varNum = varNum; |
3617 | regArgTab[regArgNum].slot = 1; |
3618 | |
3619 | slots = 1; |
3620 | |
3621 | #if FEATURE_MULTIREG_ARGS |
3622 | if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs)) |
3623 | { |
3624 | if (varDsc->lvIsHfaRegArg()) |
3625 | { |
3626 | // We have an HFA argument, set slots to the number of registers used |
3627 | slots = varDsc->lvHfaSlots(); |
3628 | } |
3629 | else |
3630 | { |
3631 | // Currently all non-HFA multireg structs are two registers in size (i.e. two slots) |
3632 | assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE)); |
3633 | // We have a non-HFA multireg argument, set slots to two |
3634 | slots = 2; |
3635 | } |
3636 | |
3637 | // Note that regArgNum+1 represents an argument index not an actual argument register. |
3638 | // see genMapRegArgNumToRegNum(unsigned argNum, var_types type) |
3639 | |
3640 | // This is the setup for the rest of a multireg struct arg |
3641 | |
3642 | for (int i = 1; i < slots; i++) |
3643 | { |
3644 | noway_assert((regArgNum + i) < argMax); |
3645 | |
3646 | // We better not have added it already (there better not be multiple vars representing this argument |
3647 | // register) |
3648 | noway_assert(regArgTab[regArgNum + i].slot == 0); |
3649 | |
3650 | regArgTab[regArgNum + i].varNum = varNum; |
3651 | regArgTab[regArgNum + i].slot = (char)(i + 1); |
3652 | } |
3653 | } |
3654 | #endif // FEATURE_MULTIREG_ARGS |
3655 | } |
3656 | |
3657 | #ifdef _TARGET_ARM_ |
3658 | int lclSize = compiler->lvaLclSize(varNum); |
3659 | |
3660 | if (lclSize > REGSIZE_BYTES) |
3661 | { |
3662 | unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG; |
3663 | slots = lclSize / REGSIZE_BYTES; |
3664 | if (regArgNum + slots > maxRegArgNum) |
3665 | { |
3666 | slots = maxRegArgNum - regArgNum; |
3667 | } |
3668 | } |
3669 | C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG); |
3670 | assert(slots < INT8_MAX); |
3671 | for (char i = 1; i < slots; i++) |
3672 | { |
3673 | regArgTab[regArgNum + i].varNum = varNum; |
3674 | regArgTab[regArgNum + i].slot = i + 1; |
3675 | } |
3676 | #endif // _TARGET_ARM_ |
3677 | |
3678 | for (int i = 0; i < slots; i++) |
3679 | { |
3680 | regType = regArgTab[regArgNum + i].getRegType(compiler); |
3681 | regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType); |
3682 | |
3683 | #if !defined(UNIX_AMD64_ABI) |
3684 | assert((i > 0) || (regNum == varDsc->lvArgReg)); |
3685 | #endif // defined(UNIX_AMD64_ABI) |
3686 | |
3687 | // Is the arg dead on entry to the method ? |
3688 | |
3689 | if ((regArgMaskLive & genRegMask(regNum)) == 0) |
3690 | { |
3691 | if (varDsc->lvTrackedNonStruct()) |
3692 | { |
3693 | // We may now see some tracked locals with zero refs. |
3694 | // See Lowering::DoPhase. Tolerate these. |
3695 | if (varDsc->lvRefCnt() > 0) |
3696 | { |
3697 | noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)); |
3698 | } |
3699 | } |
3700 | else |
3701 | { |
3702 | #ifdef _TARGET_X86_ |
3703 | noway_assert(varDsc->lvType == TYP_STRUCT); |
3704 | #else // !_TARGET_X86_ |
3705 | // For LSRA, it may not be in regArgMaskLive if it has a zero |
3706 | // refcnt. This is in contrast with the non-LSRA case in which all |
3707 | // non-tracked args are assumed live on entry. |
3708 | noway_assert((varDsc->lvRefCnt() == 0) || (varDsc->lvType == TYP_STRUCT) || |
3709 | (varDsc->lvAddrExposed && compiler->info.compIsVarArgs) || |
3710 | (varDsc->lvAddrExposed && compiler->opts.compUseSoftFP)); |
3711 | #endif // !_TARGET_X86_ |
3712 | } |
3713 | // Mark it as processed and be done with it |
3714 | regArgTab[regArgNum + i].processed = true; |
3715 | goto NON_DEP; |
3716 | } |
3717 | |
3718 | #ifdef _TARGET_ARM_ |
3719 | // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg |
3720 | // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since |
3721 | // they've already been spilled. |
3722 | // |
3723 | if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0) |
3724 | #endif // _TARGET_ARM_ |
3725 | { |
3726 | #if !defined(UNIX_AMD64_ABI) |
3727 | noway_assert(xtraReg != (varDsc->lvArgReg + i)); |
3728 | #endif |
3729 | noway_assert(regArgMaskLive & genRegMask(regNum)); |
3730 | } |
3731 | |
3732 | regArgTab[regArgNum + i].processed = false; |
3733 | |
3734 | /* mark stack arguments since we will take care of those first */ |
3735 | regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true; |
3736 | |
3737 | /* If it goes on the stack or in a register that doesn't hold |
3738 | * an argument anymore -> CANNOT form a circular dependency */ |
3739 | |
3740 | if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive)) |
3741 | { |
3742 | /* will trash another argument -> possible dependency |
3743 | * We may need several passes after the table is constructed |
3744 | * to decide on that */ |
3745 | |
3746 | /* Maybe the argument stays in the register (IDEAL) */ |
3747 | |
3748 | if ((i == 0) && (varDsc->lvRegNum == regNum)) |
3749 | { |
3750 | goto NON_DEP; |
3751 | } |
3752 | |
3753 | #if !defined(_TARGET_64BIT_) |
3754 | if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum)) |
3755 | { |
3756 | goto NON_DEP; |
3757 | } |
3758 | if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum)) |
3759 | { |
3760 | goto NON_DEP; |
3761 | } |
3762 | |
3763 | if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) && |
3764 | (REG_NEXT(varDsc->lvRegNum) == regNum)) |
3765 | { |
3766 | goto NON_DEP; |
3767 | } |
3768 | #endif // !defined(_TARGET_64BIT_) |
3769 | regArgTab[regArgNum + i].circular = true; |
3770 | } |
3771 | else |
3772 | { |
3773 | NON_DEP: |
3774 | regArgTab[regArgNum + i].circular = false; |
3775 | |
3776 | /* mark the argument register as free */ |
3777 | regArgMaskLive &= ~genRegMask(regNum); |
3778 | } |
3779 | } |
3780 | } |
3781 | |
3782 | /* Find the circular dependencies for the argument registers, if any. |
3783 | * A circular dependency is a set of registers R1, R2, ..., Rn |
3784 | * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */ |
3785 | |
3786 | bool change = true; |
3787 | if (regArgMaskLive) |
3788 | { |
3789 | /* Possible circular dependencies still exist; the previous pass was not enough |
3790 | * to filter them out. Use a "sieve" strategy to find all circular dependencies. */ |
3791 | |
3792 | while (change) |
3793 | { |
3794 | change = false; |
3795 | |
3796 | for (argNum = 0; argNum < argMax; argNum++) |
3797 | { |
3798 | // If we already marked the argument as non-circular then continue |
3799 | |
3800 | if (!regArgTab[argNum].circular) |
3801 | { |
3802 | continue; |
3803 | } |
3804 | |
3805 | if (regArgTab[argNum].slot == 0) // Not a register argument |
3806 | { |
3807 | continue; |
3808 | } |
3809 | |
3810 | varNum = regArgTab[argNum].varNum; |
3811 | noway_assert(varNum < compiler->lvaCount); |
3812 | varDsc = compiler->lvaTable + varNum; |
3813 | noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); |
3814 | |
3815 | /* cannot possibly have stack arguments */ |
3816 | noway_assert(varDsc->lvIsInReg()); |
3817 | noway_assert(!regArgTab[argNum].stackArg); |
3818 | |
3819 | var_types regType = regArgTab[argNum].getRegType(compiler); |
3820 | regNumber regNum = genMapRegArgNumToRegNum(argNum, regType); |
3821 | |
3822 | regNumber destRegNum = REG_NA; |
3823 | if (regArgTab[argNum].slot == 1) |
3824 | { |
3825 | destRegNum = varDsc->lvRegNum; |
3826 | } |
3827 | #if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_) |
3828 | else |
3829 | { |
3830 | assert(regArgTab[argNum].slot == 2); |
3831 | assert(argNum > 0); |
3832 | assert(regArgTab[argNum - 1].slot == 1); |
3833 | assert(regArgTab[argNum - 1].varNum == varNum); |
3834 | assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16)); |
3835 | regArgMaskLive &= ~genRegMask(regNum); |
3836 | regArgTab[argNum].circular = false; |
3837 | change = true; |
3838 | continue; |
3839 | } |
3840 | #elif !defined(_TARGET_64BIT_) |
3841 | else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG) |
3842 | { |
3843 | destRegNum = varDsc->lvOtherReg; |
3844 | } |
3845 | else |
3846 | { |
3847 | assert(regArgTab[argNum].slot == 2); |
3848 | assert(varDsc->TypeGet() == TYP_DOUBLE); |
3849 | destRegNum = REG_NEXT(varDsc->lvRegNum); |
3850 | } |
3851 | #endif // !defined(_TARGET_64BIT_) |
3852 | noway_assert(destRegNum != REG_NA); |
3853 | if (genRegMask(destRegNum) & regArgMaskLive) |
3854 | { |
3855 | /* we are trashing a live argument register - record it */ |
3856 | unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType); |
3857 | noway_assert(destRegArgNum < argMax); |
3858 | regArgTab[destRegArgNum].trashBy = argNum; |
3859 | } |
3860 | else |
3861 | { |
3862 | /* argument goes to a free register */ |
3863 | regArgTab[argNum].circular = false; |
3864 | change = true; |
3865 | |
3866 | /* mark the argument register as free */ |
3867 | regArgMaskLive &= ~genRegMask(regNum); |
3868 | } |
3869 | } |
3870 | } |
3871 | } |
3872 | |
3873 | /* At this point, everything that has the "circular" flag |
3874 | * set to "true" forms a circular dependency */ |
3875 | CLANG_FORMAT_COMMENT_ANCHOR; |
3876 | |
3877 | #ifdef DEBUG |
3878 | if (regArgMaskLive) |
3879 | { |
3880 | if (verbose) |
3881 | { |
3882 | printf("Circular dependencies found while home-ing the incoming arguments.\n" ); |
3883 | } |
3884 | } |
3885 | #endif |
3886 | |
3887 | // LSRA allocates registers to incoming parameters in order and will not overwrite |
3888 | // a register still holding a live parameter. |
3889 | |
3890 | noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && |
3891 | "Homing of float argument registers with circular dependencies not implemented." ); |
3892 | |
3893 | /* Now move the arguments to their locations. |
3894 | * First consider ones that go on the stack since they may |
3895 | * free some registers. */ |
3896 | |
3897 | regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start |
3898 | for (argNum = 0; argNum < argMax; argNum++) |
3899 | { |
3900 | emitAttr size; |
3901 | |
3902 | #if defined(UNIX_AMD64_ABI) |
3903 | // If this is the wrong register file, just continue. |
3904 | if (regArgTab[argNum].type == TYP_UNDEF) |
3905 | { |
3906 | // This could happen if the reg in regArgTab[argNum] is of the other register file - |
3907 | // for System V register passed structs where the first reg is GPR and the second an XMM reg. |
3908 | // The next register file processing will process it. |
3909 | continue; |
3910 | } |
3911 | #endif // defined(UNIX_AMD64_ABI) |
3912 | |
3913 | // If the arg is dead on entry to the method, skip it |
3914 | |
3915 | if (regArgTab[argNum].processed) |
3916 | { |
3917 | continue; |
3918 | } |
3919 | |
3920 | if (regArgTab[argNum].slot == 0) // Not a register argument |
3921 | { |
3922 | continue; |
3923 | } |
3924 | |
3925 | varNum = regArgTab[argNum].varNum; |
3926 | noway_assert(varNum < compiler->lvaCount); |
3927 | varDsc = compiler->lvaTable + varNum; |
3928 | |
3929 | #ifndef _TARGET_64BIT_ |
3930 | // If not a stack arg go to the next one |
3931 | if (varDsc->lvType == TYP_LONG) |
3932 | { |
3933 | if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg) |
3934 | { |
3935 | continue; |
3936 | } |
3937 | else if (varDsc->lvOtherReg != REG_STK) |
3938 | { |
3939 | continue; |
3940 | } |
3941 | } |
3942 | else |
3943 | #endif // !_TARGET_64BIT_ |
3944 | { |
3945 | // If not a stack arg go to the next one |
3946 | if (!regArgTab[argNum].stackArg) |
3947 | { |
3948 | continue; |
3949 | } |
3950 | } |
3951 | |
3952 | #if defined(_TARGET_ARM_) |
3953 | if (varDsc->lvType == TYP_DOUBLE) |
3954 | { |
3955 | if (regArgTab[argNum].slot == 2) |
3956 | { |
3957 | // We handled the entire double when processing the first half (slot == 1) |
3958 | continue; |
3959 | } |
3960 | } |
3961 | #endif |
3962 | |
3963 | noway_assert(regArgTab[argNum].circular == false); |
3964 | |
3965 | noway_assert(varDsc->lvIsParam); |
3966 | noway_assert(varDsc->lvIsRegArg); |
3967 | noway_assert(varDsc->lvIsInReg() == false || |
3968 | (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2)); |
3969 | |
3970 | var_types storeType = TYP_UNDEF; |
3971 | unsigned slotSize = TARGET_POINTER_SIZE; |
3972 | |
3973 | if (varTypeIsStruct(varDsc)) |
3974 | { |
3975 | storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer |
3976 | #if FEATURE_MULTIREG_ARGS |
3977 | // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers |
3978 | noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES); |
3979 | #endif // FEATURE_MULTIREG_ARGS |
3980 | #ifdef UNIX_AMD64_ABI |
3981 | storeType = regArgTab[argNum].type; |
3982 | #endif // !UNIX_AMD64_ABI |
3983 | if (varDsc->lvIsHfaRegArg()) |
3984 | { |
3985 | #ifdef _TARGET_ARM_ |
3986 | // On ARM32 the storeType for HFA args is always TYP_FLOAT |
3987 | storeType = TYP_FLOAT; |
3988 | slotSize = (unsigned)emitActualTypeSize(storeType); |
3989 | #else // _TARGET_ARM64_ |
3990 | storeType = genActualType(varDsc->GetHfaType()); |
3991 | slotSize = (unsigned)emitActualTypeSize(storeType); |
3992 | #endif // _TARGET_ARM64_ |
3993 | } |
3994 | } |
3995 | else // Not a struct type |
3996 | { |
3997 | storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet())); |
3998 | } |
3999 | size = emitActualTypeSize(storeType); |
4000 | #ifdef _TARGET_X86_ |
4001 | noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE); |
4002 | #endif //_TARGET_X86_ |
4003 | |
4004 | regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType); |
4005 | |
4006 | // Stack argument - if the ref count is 0 don't care about it |
4007 | |
4008 | if (!varDsc->lvOnFrame) |
4009 | { |
4010 | noway_assert(varDsc->lvRefCnt() == 0); |
4011 | } |
4012 | else |
4013 | { |
4014 | // Since slot is typically 1, baseOffset is typically 0 |
4015 | int baseOffset = (regArgTab[argNum].slot - 1) * slotSize; |
4016 | |
4017 | getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset); |
4018 | |
4019 | #ifndef UNIX_AMD64_ABI |
4020 | // Check if we are writing past the end of the struct |
4021 | if (varTypeIsStruct(varDsc)) |
4022 | { |
4023 | assert(varDsc->lvSize() >= baseOffset + (unsigned)size); |
4024 | } |
4025 | #endif // !UNIX_AMD64_ABI |
4026 | |
4027 | if (regArgTab[argNum].slot == 1) |
4028 | { |
4029 | psiMoveToStack(varNum); |
4030 | } |
4031 | } |
4032 | |
4033 | /* mark the argument as processed */ |
4034 | |
4035 | regArgTab[argNum].processed = true; |
4036 | regArgMaskLive &= ~genRegMask(srcRegNum); |
4037 | |
4038 | #if defined(_TARGET_ARM_) |
4039 | if (storeType == TYP_DOUBLE) |
4040 | { |
4041 | regArgTab[argNum + 1].processed = true; |
4042 | regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum)); |
4043 | } |
4044 | #endif |
4045 | } |
4046 | |
4047 | /* Process any circular dependencies */ |
4048 | if (regArgMaskLive) |
4049 | { |
4050 | unsigned begReg, destReg, srcReg; |
4051 | unsigned varNumDest, varNumSrc; |
4052 | LclVarDsc* varDscDest; |
4053 | LclVarDsc* varDscSrc; |
4054 | instruction insCopy = INS_mov; |
4055 | |
4056 | if (doingFloat) |
4057 | { |
4058 | #if defined(FEATURE_HFA) || defined(UNIX_AMD64_ABI) |
4059 | insCopy = ins_Copy(TYP_DOUBLE); |
4060 | // Compute xtraReg here when we have a float argument |
4061 | assert(xtraReg == REG_NA); |
4062 | |
4063 | regMaskTP fpAvailMask; |
4064 | |
4065 | fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive; |
4066 | #if defined(FEATURE_HFA) |
4067 | fpAvailMask &= RBM_ALLDOUBLE; |
4068 | #else |
4069 | #if !defined(UNIX_AMD64_ABI) |
4070 | #error Error. Wrong architecture. |
4071 | #endif // !defined(UNIX_AMD64_ABI) |
4072 | #endif // defined(FEATURE_HFA) |
4073 | |
4074 | if (fpAvailMask == RBM_NONE) |
4075 | { |
4076 | fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive; |
4077 | #if defined(FEATURE_HFA) |
4078 | fpAvailMask &= RBM_ALLDOUBLE; |
4079 | #else |
4080 | #if !defined(UNIX_AMD64_ABI) |
4081 | #error Error. Wrong architecture. |
4082 | #endif // !defined(UNIX_AMD64_ABI) |
4083 | #endif // defined(FEATURE_HFA) |
4084 | } |
4085 | |
4086 | assert(fpAvailMask != RBM_NONE); |
4087 | |
4088 | // We pick the lowest avail register number |
4089 | regMaskTP tempMask = genFindLowestBit(fpAvailMask); |
4090 | xtraReg = genRegNumFromMask(tempMask); |
4091 | #elif defined(_TARGET_X86_) |
4092 | // This case shouldn't occur on x86 since NYI gets converted to an assert |
4093 | NYI("Homing circular FP registers via xtraReg" ); |
4094 | #endif |
4095 | } |
4096 | |
4097 | for (argNum = 0; argNum < argMax; argNum++) |
4098 | { |
4099 | // If not a circular dependency then continue |
4100 | if (!regArgTab[argNum].circular) |
4101 | { |
4102 | continue; |
4103 | } |
4104 | |
4105 | // If already processed the dependency then continue |
4106 | |
4107 | if (regArgTab[argNum].processed) |
4108 | { |
4109 | continue; |
4110 | } |
4111 | |
4112 | if (regArgTab[argNum].slot == 0) // Not a register argument |
4113 | { |
4114 | continue; |
4115 | } |
4116 | |
4117 | destReg = begReg = argNum; |
4118 | srcReg = regArgTab[argNum].trashBy; |
4119 | |
4120 | varNumDest = regArgTab[destReg].varNum; |
4121 | noway_assert(varNumDest < compiler->lvaCount); |
4122 | varDscDest = compiler->lvaTable + varNumDest; |
4123 | noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg); |
4124 | |
4125 | noway_assert(srcReg < argMax); |
4126 | varNumSrc = regArgTab[srcReg].varNum; |
4127 | noway_assert(varNumSrc < compiler->lvaCount); |
4128 | varDscSrc = compiler->lvaTable + varNumSrc; |
4129 | noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg); |
4130 | |
4131 | emitAttr size = EA_PTRSIZE; |
4132 | |
4133 | #ifdef _TARGET_XARCH_ |
4134 | // |
4135 | // The following code relies upon the target architecture having an |
4136 | // 'xchg' instruction which directly swaps the values held in two registers. |
4137 | // On the ARM architecture we do not have such an instruction. |
4138 | // |
4139 | if (destReg == regArgTab[srcReg].trashBy) |
4140 | { |
4141 | /* only 2 registers form the circular dependency - use "xchg" */ |
4142 | |
4143 | varNum = regArgTab[argNum].varNum; |
4144 | noway_assert(varNum < compiler->lvaCount); |
4145 | varDsc = compiler->lvaTable + varNum; |
4146 | noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); |
4147 | |
4148 | noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES); |
4149 | |
4150 | /* Set "size" to indicate GC if one and only one of |
4151 | * the operands is a pointer |
4152 | * RATIONALE: If both are pointers, nothing changes in |
4153 | * the GC pointer tracking. If only one is a pointer we |
4154 | * have to "swap" the registers in the GC reg pointer mask |
4155 | */ |
4156 | |
4157 | if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet())) |
4158 | { |
4159 | size = EA_GCREF; |
4160 | } |
4161 | |
4162 | noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum); |
4163 | |
4164 | getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg); |
4165 | regSet.verifyRegUsed(varDscSrc->lvRegNum); |
4166 | regSet.verifyRegUsed(varDscSrc->lvArgReg); |
4167 | |
4168 | /* mark both arguments as processed */ |
4169 | regArgTab[destReg].processed = true; |
4170 | regArgTab[srcReg].processed = true; |
4171 | |
4172 | regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg); |
4173 | regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg); |
4174 | |
4175 | psiMoveToReg(varNumSrc); |
4176 | psiMoveToReg(varNumDest); |
4177 | } |
4178 | else |
4179 | #endif // _TARGET_XARCH_ |
4180 | { |
4181 | var_types destMemType = varDscDest->TypeGet(); |
4182 | |
4183 | #ifdef _TARGET_ARM_ |
4184 | bool cycleAllDouble = true; // assume the best |
4185 | |
4186 | unsigned iter = begReg; |
4187 | do |
4188 | { |
4189 | if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE) |
4190 | { |
4191 | cycleAllDouble = false; |
4192 | break; |
4193 | } |
4194 | iter = regArgTab[iter].trashBy; |
4195 | } while (iter != begReg); |
4196 | |
4197 | // We may treat doubles as floats for ARM because we could have partial circular |
4198 | // dependencies of a float with a lo/hi part of the double. We mark the |
4199 | // trashBy values for each slot of the double, so let the circular dependency |
4200 | // logic work its way out for floats rather than doubles. If a cycle has all |
4201 | // doubles, then optimize so that instead of two vmov.f32's to move a double, |
4202 | // we can use one vmov.f64. |
4203 | // |
4204 | if (!cycleAllDouble && destMemType == TYP_DOUBLE) |
4205 | { |
4206 | destMemType = TYP_FLOAT; |
4207 | } |
4208 | #endif // _TARGET_ARM_ |
4209 | |
4210 | if (destMemType == TYP_REF) |
4211 | { |
4212 | size = EA_GCREF; |
4213 | } |
4214 | else if (destMemType == TYP_BYREF) |
4215 | { |
4216 | size = EA_BYREF; |
4217 | } |
4218 | else if (destMemType == TYP_DOUBLE) |
4219 | { |
4220 | size = EA_8BYTE; |
4221 | } |
4222 | else if (destMemType == TYP_FLOAT) |
4223 | { |
4224 | size = EA_4BYTE; |
4225 | } |
4226 | |
4227 | /* move the dest reg (begReg) in the extra reg */ |
4228 | |
4229 | assert(xtraReg != REG_NA); |
4230 | |
4231 | regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType); |
4232 | |
4233 | getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum); |
4234 | |
4235 | regSet.verifyRegUsed(xtraReg); |
4236 | |
4237 | *pXtraRegClobbered = true; |
4238 | |
4239 | psiMoveToReg(varNumDest, xtraReg); |
4240 | |
4241 | /* start moving everything to its right place */ |
4242 | |
4243 | while (srcReg != begReg) |
4244 | { |
4245 | /* mov dest, src */ |
4246 | |
4247 | regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType); |
4248 | regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType); |
4249 | |
4250 | getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum); |
4251 | |
4252 | regSet.verifyRegUsed(destRegNum); |
4253 | |
4254 | /* mark 'src' as processed */ |
4255 | noway_assert(srcReg < argMax); |
4256 | regArgTab[srcReg].processed = true; |
4257 | #ifdef _TARGET_ARM_ |
4258 | if (size == EA_8BYTE) |
4259 | regArgTab[srcReg + 1].processed = true; |
4260 | #endif |
4261 | regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType); |
4262 | |
4263 | /* move to the next pair */ |
4264 | destReg = srcReg; |
4265 | srcReg = regArgTab[srcReg].trashBy; |
4266 | |
4267 | varDscDest = varDscSrc; |
4268 | destMemType = varDscDest->TypeGet(); |
4269 | #ifdef _TARGET_ARM_ |
4270 | if (!cycleAllDouble && destMemType == TYP_DOUBLE) |
4271 | { |
4272 | destMemType = TYP_FLOAT; |
4273 | } |
4274 | #endif |
4275 | varNumSrc = regArgTab[srcReg].varNum; |
4276 | noway_assert(varNumSrc < compiler->lvaCount); |
4277 | varDscSrc = compiler->lvaTable + varNumSrc; |
4278 | noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg); |
4279 | |
4280 | if (destMemType == TYP_REF) |
4281 | { |
4282 | size = EA_GCREF; |
4283 | } |
4284 | else if (destMemType == TYP_DOUBLE) |
4285 | { |
4286 | size = EA_8BYTE; |
4287 | } |
4288 | else |
4289 | { |
4290 | size = EA_4BYTE; |
4291 | } |
4292 | } |
4293 | |
4294 | /* take care of the beginning register */ |
4295 | |
4296 | noway_assert(srcReg == begReg); |
4297 | |
4298 | /* move the dest reg (begReg) in the extra reg */ |
4299 | |
4300 | regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType); |
4301 | |
4302 | getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg); |
4303 | |
4304 | regSet.verifyRegUsed(destRegNum); |
4305 | |
4306 | psiMoveToReg(varNumSrc); |
4307 | |
4308 | /* mark the beginning register as processed */ |
4309 | |
4310 | regArgTab[srcReg].processed = true; |
4311 | #ifdef _TARGET_ARM_ |
4312 | if (size == EA_8BYTE) |
4313 | regArgTab[srcReg + 1].processed = true; |
4314 | #endif |
4315 | regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType); |
4316 | } |
4317 | } |
4318 | } |
4319 | |
4320 | /* Finally take care of the remaining arguments that must be enregistered */ |
4321 | while (regArgMaskLive) |
4322 | { |
4323 | regMaskTP regArgMaskLiveSave = regArgMaskLive; |
4324 | |
4325 | for (argNum = 0; argNum < argMax; argNum++) |
4326 | { |
4327 | /* If already processed go to the next one */ |
4328 | if (regArgTab[argNum].processed) |
4329 | { |
4330 | continue; |
4331 | } |
4332 | |
4333 | if (regArgTab[argNum].slot == 0) |
4334 | { // Not a register argument |
4335 | continue; |
4336 | } |
4337 | |
4338 | varNum = regArgTab[argNum].varNum; |
4339 | noway_assert(varNum < compiler->lvaCount); |
4340 | varDsc = compiler->lvaTable + varNum; |
4341 | var_types regType = regArgTab[argNum].getRegType(compiler); |
4342 | regNumber regNum = genMapRegArgNumToRegNum(argNum, regType); |
4343 | |
4344 | #if defined(UNIX_AMD64_ABI) |
4345 | if (regType == TYP_UNDEF) |
4346 | { |
4347 | // This could happen if the reg in regArgTab[argNum] is of the other register file - |
4348 | // for System V register passed structs where the first reg is GPR and the second an XMM reg. |
4349 | // The next register file processing will process it. |
4350 | regArgMaskLive &= ~genRegMask(regNum); |
4351 | continue; |
4352 | } |
4353 | #endif // defined(UNIX_AMD64_ABI) |
4354 | |
4355 | noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg); |
4356 | #ifndef _TARGET_64BIT_ |
4357 | #ifndef _TARGET_ARM_ |
4358 | // Right now we think that incoming arguments are not pointer sized. When we eventually |
4359 | // understand the calling convention, this still won't be true. But maybe we'll have a better |
4360 | // idea of how to ignore it. |
4361 | |
4362 | // On Arm, a long can be passed in register |
4363 | noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == TARGET_POINTER_SIZE); |
4364 | #endif |
4365 | #endif //_TARGET_64BIT_ |
4366 | |
4367 | noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular); |
4368 | |
4369 | /* Register argument - hopefully it stays in the same register */ |
4370 | regNumber destRegNum = REG_NA; |
4371 | var_types destMemType = varDsc->TypeGet(); |
4372 | |
4373 | if (regArgTab[argNum].slot == 1) |
4374 | { |
4375 | destRegNum = varDsc->lvRegNum; |
4376 | |
4377 | #ifdef _TARGET_ARM_ |
4378 | if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed) |
4379 | { |
4380 | // The second half of the double has already been processed! Treat this as a single. |
4381 | destMemType = TYP_FLOAT; |
4382 | } |
4383 | #endif // _TARGET_ARM_ |
4384 | } |
4385 | #ifndef _TARGET_64BIT_ |
4386 | else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG) |
4387 | { |
4388 | assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE); |
4389 | if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) |
4390 | { |
4391 | destRegNum = regNum; |
4392 | } |
4393 | else |
4394 | { |
4395 | destRegNum = varDsc->lvOtherReg; |
4396 | } |
4397 | |
4398 | assert(destRegNum != REG_STK); |
4399 | } |
4400 | else |
4401 | { |
4402 | assert(regArgTab[argNum].slot == 2); |
4403 | assert(destMemType == TYP_DOUBLE); |
4404 | |
4405 | // For doubles, we move the entire double using the argNum representing |
4406 | // the first half of the double. There are two things we won't do: |
4407 | // (1) move the double when the 1st half of the destination is free but the |
4408 | // 2nd half is occupied, and (2) move the double when the 2nd half of the |
4409 | // destination is free but the 1st half is occupied. Here we consider the |
4410 | // case where the first half can't be moved initially because its target is |
4411 | // still busy, but the second half can be moved. We wait until the entire |
4412 | // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3, |
4413 | // and F2 single moving to F16. When we process F0, its target F2 is busy, |
4414 | // so we skip it on the first pass. When we process F1, its target F3 is |
4415 | // available. However, we want to move F0/F1 all at once, so we skip it here. |
4416 | // We process F2, which frees up F2. The next pass through, we process F0 and |
4417 | // F2/F3 are empty, so we move it. Note that if half of a double is involved |
4418 | // in a circularity with a single, then we will have already moved that half |
4419 | // above, so we go ahead and move the remaining half as a single. |
4420 | // Because there are no circularities left, we are guaranteed to terminate. |
4421 | |
4422 | assert(argNum > 0); |
4423 | assert(regArgTab[argNum - 1].slot == 1); |
4424 | |
4425 | if (!regArgTab[argNum - 1].processed) |
4426 | { |
4427 | // The first half of the double hasn't been processed; try to be processed at the same time |
4428 | continue; |
4429 | } |
4430 | |
4431 | // The first half of the double has been processed but the second half hasn't! |
4432 | // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2. |
4433 | // In that case, there is a F0/F2 loop that is not a double-only loop. The circular |
4434 | // dependency logic above will move them as singles, leaving just F3 to move. Treat |
4435 | // it as a single to finish the shuffling. |
4436 | |
4437 | destMemType = TYP_FLOAT; |
4438 | destRegNum = REG_NEXT(varDsc->lvRegNum); |
4439 | } |
4440 | #endif // !_TARGET_64BIT_ |
4441 | #if (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD) |
4442 | else |
4443 | { |
4444 | assert(regArgTab[argNum].slot == 2); |
4445 | assert(argNum > 0); |
4446 | assert(regArgTab[argNum - 1].slot == 1); |
4447 | assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16)); |
4448 | destRegNum = varDsc->lvRegNum; |
4449 | noway_assert(regNum != destRegNum); |
4450 | continue; |
4451 | } |
4452 | #endif // (defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD) |
4453 | noway_assert(destRegNum != REG_NA); |
4454 | if (destRegNum != regNum) |
4455 | { |
4456 | /* Cannot trash a currently live register argument. |
4457 | * Skip this one until its target will be free |
4458 | * which is guaranteed to happen since we have no circular dependencies. */ |
4459 | |
4460 | regMaskTP destMask = genRegMask(destRegNum); |
4461 | #ifdef _TARGET_ARM_ |
4462 | // Don't process the double until both halves of the destination are clear. |
4463 | if (genActualType(destMemType) == TYP_DOUBLE) |
4464 | { |
4465 | assert((destMask & RBM_DBL_REGS) != 0); |
4466 | destMask |= genRegMask(REG_NEXT(destRegNum)); |
4467 | } |
4468 | #endif |
4469 | |
4470 | if (destMask & regArgMaskLive) |
4471 | { |
4472 | continue; |
4473 | } |
4474 | |
4475 | /* Move it to the new register */ |
4476 | |
4477 | emitAttr size = emitActualTypeSize(destMemType); |
4478 | |
4479 | #if defined(_TARGET_ARM64_) |
4480 | if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2) |
4481 | { |
4482 | // For a SIMD type that is passed in two integer registers, |
4483 | // Limit the copy below to the first 8 bytes from the first integer register. |
4484 | // Handle the remaining 8 bytes from the second slot in the code further below |
4485 | assert(EA_SIZE(size) >= 8); |
4486 | size = EA_8BYTE; |
4487 | } |
4488 | #endif |
4489 | |
4490 | getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum); |
4491 | |
4492 | psiMoveToReg(varNum); |
4493 | } |
4494 | |
4495 | /* mark the argument as processed */ |
4496 | |
4497 | assert(!regArgTab[argNum].processed); |
4498 | regArgTab[argNum].processed = true; |
4499 | regArgMaskLive &= ~genRegMask(regNum); |
4500 | #if FEATURE_MULTIREG_ARGS |
4501 | int argRegCount = 1; |
4502 | #ifdef _TARGET_ARM_ |
4503 | if (genActualType(destMemType) == TYP_DOUBLE) |
4504 | { |
4505 | argRegCount = 2; |
4506 | } |
4507 | #endif |
4508 | #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) |
4509 | if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2) |
4510 | { |
4511 | argRegCount = 2; |
4512 | int nextArgNum = argNum + 1; |
4513 | regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); |
4514 | noway_assert(regArgTab[nextArgNum].varNum == varNum); |
4515 | // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg |
4516 | // and moves the 0th element of the src reg into the 1st element of the dest reg. |
4517 | getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0); |
4518 | // Set destRegNum to regNum so that we skip the setting of the register below, |
4519 | // but mark argNum as processed and clear regNum from the live mask. |
4520 | destRegNum = regNum; |
4521 | } |
4522 | #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) |
4523 | #if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD) |
4524 | if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2) |
4525 | { |
4526 | // For a SIMD type that is passed in two integer registers, |
4527 | // Code above copies the first integer argument register into the lower 8 bytes |
4528 | // of the target register. Here we must handle the second 8 bytes of the slot pair by |
4529 | // inserting the second integer register into the upper 8 bytes of the target |
4530 | // SIMD floating point register. |
4531 | argRegCount = 2; |
4532 | int nextArgNum = argNum + 1; |
4533 | regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); |
4534 | noway_assert(regArgTab[nextArgNum].varNum == varNum); |
4535 | noway_assert(genIsValidIntReg(nextRegNum)); |
4536 | noway_assert(genIsValidFloatReg(destRegNum)); |
4537 | getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1); |
4538 | } |
4539 | #endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD) |
4540 | |
4541 | // Mark the rest of the argument registers corresponding to this multi-reg type as |
4542 | // being processed and no longer live. |
4543 | for (int regSlot = 1; regSlot < argRegCount; regSlot++) |
4544 | { |
4545 | int nextArgNum = argNum + regSlot; |
4546 | assert(!regArgTab[nextArgNum].processed); |
4547 | regArgTab[nextArgNum].processed = true; |
4548 | regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); |
4549 | regArgMaskLive &= ~genRegMask(nextRegNum); |
4550 | } |
4551 | #endif // FEATURE_MULTIREG_ARGS |
4552 | } |
4553 | |
4554 | noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop |
4555 | } |
4556 | } |
4557 | #ifdef _PREFAST_ |
4558 | #pragma warning(pop) |
4559 | #endif |
4560 | |
4561 | /***************************************************************************** |
4562 | * If any incoming stack arguments live in registers, load them. |
4563 | */ |
4564 | void CodeGen::genEnregisterIncomingStackArgs() |
4565 | { |
4566 | #ifdef DEBUG |
4567 | if (verbose) |
4568 | { |
4569 | printf("*************** In genEnregisterIncomingStackArgs()\n" ); |
4570 | } |
4571 | #endif |
4572 | |
4573 | assert(compiler->compGeneratingProlog); |
4574 | |
4575 | unsigned varNum = 0; |
4576 | |
4577 | for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) |
4578 | { |
4579 | /* Is this variable a parameter? */ |
4580 | |
4581 | if (!varDsc->lvIsParam) |
4582 | { |
4583 | continue; |
4584 | } |
4585 | |
4586 | /* If it's a register argument then it's already been taken care of. |
4587 | But, on Arm when under a profiler, we would have prespilled a register argument |
4588 | and hence here we need to load it from its prespilled location. |
4589 | */ |
4590 | bool isPrespilledForProfiling = false; |
4591 | #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED) |
4592 | isPrespilledForProfiling = |
4593 | compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false)); |
4594 | #endif |
4595 | |
4596 | if (varDsc->lvIsRegArg && !isPrespilledForProfiling) |
4597 | { |
4598 | continue; |
4599 | } |
4600 | |
4601 | /* Has the parameter been assigned to a register? */ |
4602 | |
4603 | if (!varDsc->lvIsInReg()) |
4604 | { |
4605 | continue; |
4606 | } |
4607 | |
4608 | var_types type = genActualType(varDsc->TypeGet()); |
4609 | |
4610 | /* Is the variable dead on entry */ |
4611 | |
4612 | if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) |
4613 | { |
4614 | continue; |
4615 | } |
4616 | |
4617 | /* Load the incoming parameter into the register */ |
4618 | |
4619 | /* Figure out the home offset of the incoming argument */ |
4620 | |
4621 | regNumber regNum = varDsc->lvArgInitReg; |
4622 | assert(regNum != REG_STK); |
4623 | |
4624 | getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0); |
4625 | regSet.verifyRegUsed(regNum); |
4626 | |
4627 | psiMoveToReg(varNum); |
4628 | } |
4629 | } |
4630 | |
4631 | /*------------------------------------------------------------------------- |
4632 | * |
4633 | * We have to decide whether we're going to use block initialization |
4634 | * in the prolog before we assign final stack offsets. This is because |
4635 | * when using block initialization we may need additional callee-saved |
4636 | * registers which need to be saved on the frame, thus increasing the |
4637 | * frame size. |
4638 | * |
4639 | * We'll count the number of locals we have to initialize, |
4640 | * and if there are lots of them we'll use block initialization. |
4641 | * Thus, the local variable table must have accurate register location |
4642 | * information for enregistered locals for their register state on entry |
4643 | * to the function. |
4644 | * |
4645 | * At the same time we set lvMustInit for locals (enregistered or on stack) |
4646 | * that must be initialized (e.g. initialize memory (comInitMem), |
4647 | * untracked pointers or disable DFA) |
4648 | */ |
4649 | void CodeGen::genCheckUseBlockInit() |
4650 | { |
4651 | assert(!compiler->compGeneratingProlog); |
4652 | |
4653 | unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables |
4654 | // larger than int count for more than 1). |
4655 | unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to |
4656 | // determine whether to use block init. |
4657 | |
4658 | unsigned varNum; |
4659 | LclVarDsc* varDsc; |
4660 | |
4661 | for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) |
4662 | { |
4663 | if (varDsc->lvIsParam) |
4664 | { |
4665 | continue; |
4666 | } |
4667 | |
4668 | if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame) |
4669 | { |
4670 | noway_assert(varDsc->lvRefCnt() == 0); |
4671 | continue; |
4672 | } |
4673 | |
4674 | if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar) |
4675 | { |
4676 | continue; |
4677 | } |
4678 | |
4679 | #if FEATURE_FIXED_OUT_ARGS |
4680 | if (varNum == compiler->lvaPInvokeFrameRegSaveVar) |
4681 | { |
4682 | continue; |
4683 | } |
4684 | if (varNum == compiler->lvaOutgoingArgSpaceVar) |
4685 | { |
4686 | continue; |
4687 | } |
4688 | #endif |
4689 | |
4690 | #if FEATURE_EH_FUNCLETS |
4691 | // There's no need to force 0-initialization of the PSPSym, it will be |
4692 | // initialized with a real value in the prolog |
4693 | if (varNum == compiler->lvaPSPSym) |
4694 | { |
4695 | continue; |
4696 | } |
4697 | #endif |
4698 | |
4699 | if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) |
4700 | { |
4701 | // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been |
4702 | // initialized by the parent struct. No need to set the lvMustInit bit in the |
4703 | // field locals. |
4704 | continue; |
4705 | } |
4706 | |
4707 | if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) || |
4708 | varDsc->lvMustInit) |
4709 | { |
4710 | if (varDsc->lvTracked) |
4711 | { |
4712 | /* For uninitialized use of tracked variables, the liveness |
4713 | * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness() |
4714 | */ |
4715 | if (varDsc->lvMustInit || |
4716 | VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) |
4717 | { |
4718 | /* This var must be initialized */ |
4719 | |
4720 | varDsc->lvMustInit = 1; |
4721 | |
4722 | /* See if the variable is on the stack will be initialized |
4723 | * using rep stos - compute the total size to be zero-ed */ |
4724 | |
4725 | if (varDsc->lvOnFrame) |
4726 | { |
4727 | if (!varDsc->lvRegister) |
4728 | { |
4729 | if (!varDsc->lvIsInReg()) |
4730 | { |
4731 | // Var is on the stack at entry. |
4732 | initStkLclCnt += |
4733 | roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); |
4734 | } |
4735 | } |
4736 | else |
4737 | { |
4738 | // Var is partially enregistered |
4739 | noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK); |
4740 | initStkLclCnt += genTypeStSz(TYP_INT); |
4741 | } |
4742 | } |
4743 | } |
4744 | } |
4745 | |
4746 | /* With compInitMem, all untracked vars will have to be init'ed */ |
4747 | /* VSW 102460 - Do not force initialization of compiler generated temps, |
4748 | unless they are untracked GC type or structs that contain GC pointers */ |
4749 | CLANG_FORMAT_COMMENT_ANCHOR; |
4750 | |
4751 | #if FEATURE_SIMD |
4752 | // TODO-1stClassStructs |
4753 | // This is here to duplicate previous behavior, where TYP_SIMD8 locals |
4754 | // were not being re-typed correctly. |
4755 | if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) && |
4756 | #else // !FEATURE_SIMD |
4757 | if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) && |
4758 | #endif // !FEATURE_SIMD |
4759 | varDsc->lvOnFrame && |
4760 | (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0))) |
4761 | { |
4762 | varDsc->lvMustInit = true; |
4763 | |
4764 | initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); |
4765 | } |
4766 | |
4767 | continue; |
4768 | } |
4769 | |
4770 | /* Ignore if not a pointer variable or value class with a GC field */ |
4771 | |
4772 | if (!compiler->lvaTypeIsGC(varNum)) |
4773 | { |
4774 | continue; |
4775 | } |
4776 | |
4777 | /* If we don't know lifetimes of variables, must be conservative */ |
4778 | if (!compiler->backendRequiresLocalVarLifetimes()) |
4779 | { |
4780 | varDsc->lvMustInit = true; |
4781 | noway_assert(!varDsc->lvRegister); |
4782 | } |
4783 | else |
4784 | { |
4785 | if (!varDsc->lvTracked) |
4786 | { |
4787 | varDsc->lvMustInit = true; |
4788 | } |
4789 | } |
4790 | |
4791 | /* Is this a 'must-init' stack pointer local? */ |
4792 | |
4793 | if (varDsc->lvMustInit && varDsc->lvOnFrame) |
4794 | { |
4795 | initStkLclCnt += varDsc->lvStructGcCount; |
4796 | } |
4797 | |
4798 | if ((compiler->lvaLclSize(varNum) > (3 * TARGET_POINTER_SIZE)) && (largeGcStructs <= 4)) |
4799 | { |
4800 | largeGcStructs++; |
4801 | } |
4802 | } |
4803 | |
4804 | /* Don't forget about spill temps that hold pointers */ |
4805 | |
4806 | if (!TRACK_GC_TEMP_LIFETIMES) |
4807 | { |
4808 | assert(regSet.tmpAllFree()); |
4809 | for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis)) |
4810 | { |
4811 | if (varTypeIsGC(tempThis->tdTempType())) |
4812 | { |
4813 | initStkLclCnt++; |
4814 | } |
4815 | } |
4816 | } |
4817 | |
4818 | // After debugging this further it was found that this logic is incorrect: |
4819 | // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case) |
4820 | // and this also double counts variables (we saw this in the debugger) around line 4829. |
4821 | // Even though this doesn't pose a problem with correctness it will improperly decide to |
4822 | // zero init the stack using a block operation instead of a 'case by case' basis. |
4823 | genInitStkLclCnt = initStkLclCnt; |
4824 | |
4825 | /* If we have more than 4 untracked locals, use block initialization */ |
4826 | /* TODO-Review: If we have large structs, bias toward not using block initialization since |
4827 | we waste all the other slots. Really need to compute the correct |
4828 | and compare that against zeroing the slots individually */ |
4829 | |
4830 | genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4)); |
4831 | |
4832 | if (genUseBlockInit) |
4833 | { |
4834 | regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn; |
4835 | |
4836 | // If there is a secret stub param, don't count it, as it will no longer |
4837 | // be live when we do block init. |
4838 | if (compiler->info.compPublishStubParam) |
4839 | { |
4840 | maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM; |
4841 | } |
4842 | |
4843 | #ifdef _TARGET_XARCH_ |
4844 | // If we're going to use "REP STOS", remember that we will trash EDI |
4845 | // For fastcall we will have to save ECX, EAX |
4846 | // so reserve two extra callee saved |
4847 | // This is better than pushing eax, ecx, because we in the later |
4848 | // we will mess up already computed offsets on the stack (for ESP frames) |
4849 | regSet.rsSetRegsModified(RBM_EDI); |
4850 | |
4851 | #ifdef UNIX_AMD64_ABI |
4852 | // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.) |
4853 | // In such case use R12 and R13 registers. |
4854 | if (maskCalleeRegArgMask & RBM_RCX) |
4855 | { |
4856 | regSet.rsSetRegsModified(RBM_R12); |
4857 | } |
4858 | |
4859 | if (maskCalleeRegArgMask & RBM_RDI) |
4860 | { |
4861 | regSet.rsSetRegsModified(RBM_R13); |
4862 | } |
4863 | #else // !UNIX_AMD64_ABI |
4864 | if (maskCalleeRegArgMask & RBM_ECX) |
4865 | { |
4866 | regSet.rsSetRegsModified(RBM_ESI); |
4867 | } |
4868 | #endif // !UNIX_AMD64_ABI |
4869 | |
4870 | if (maskCalleeRegArgMask & RBM_EAX) |
4871 | { |
4872 | regSet.rsSetRegsModified(RBM_EBX); |
4873 | } |
4874 | |
4875 | #endif // _TARGET_XARCH_ |
4876 | #ifdef _TARGET_ARM_ |
4877 | // |
4878 | // On the Arm if we are using a block init to initialize, then we |
4879 | // must force spill R4/R5/R6 so that we can use them during |
4880 | // zero-initialization process. |
4881 | // |
4882 | int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1; |
4883 | if (forceSpillRegCount > 0) |
4884 | regSet.rsSetRegsModified(RBM_R4); |
4885 | if (forceSpillRegCount > 1) |
4886 | regSet.rsSetRegsModified(RBM_R5); |
4887 | if (forceSpillRegCount > 2) |
4888 | regSet.rsSetRegsModified(RBM_R6); |
4889 | #endif // _TARGET_ARM_ |
4890 | } |
4891 | } |
4892 | |
4893 | /*----------------------------------------------------------------------------- |
4894 | * |
4895 | * Push any callee-saved registers we have used |
4896 | */ |
4897 | |
4898 | #if defined(_TARGET_ARM64_) |
4899 | void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) |
4900 | #else |
4901 | void CodeGen::genPushCalleeSavedRegisters() |
4902 | #endif |
4903 | { |
4904 | assert(compiler->compGeneratingProlog); |
4905 | |
4906 | #if defined(_TARGET_XARCH_) |
4907 | // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack |
4908 | // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not |
4909 | // here. |
4910 | regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED; |
4911 | #else // !defined(_TARGET_XARCH_) |
4912 | regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; |
4913 | #endif |
4914 | |
4915 | #if ETW_EBP_FRAMED |
4916 | if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) |
4917 | { |
4918 | noway_assert(!"Used register RBM_FPBASE as a scratch register!" ); |
4919 | } |
4920 | #endif |
4921 | |
4922 | #ifdef _TARGET_XARCH_ |
4923 | // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method |
4924 | if (isFramePointerUsed()) |
4925 | { |
4926 | rsPushRegs &= ~RBM_FPBASE; |
4927 | } |
4928 | #endif |
4929 | |
4930 | #ifdef _TARGET_ARMARCH_ |
4931 | // On ARM we push the FP (frame-pointer) here along with all other callee saved registers |
4932 | if (isFramePointerUsed()) |
4933 | rsPushRegs |= RBM_FPBASE; |
4934 | |
4935 | // |
4936 | // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require |
4937 | // changes in GC suspension architecture. |
4938 | // |
4939 | // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we |
4940 | // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf |
4941 | // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends |
4942 | // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never |
4943 | // be saved on the stack and the GC suspension would time out. |
4944 | // |
4945 | // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of |
4946 | // the following to make GC suspension work in the above scenario: |
4947 | // - Make return address hijacking work even when lr is not saved on the stack. |
4948 | // - Generate fully interruptible code for loops that contains calls |
4949 | // - Generate fully interruptible code for leaf methods |
4950 | // |
4951 | // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity |
4952 | // is not worth it. |
4953 | // |
4954 | rsPushRegs |= RBM_LR; // We must save the return address (in the LR register) |
4955 | |
4956 | regSet.rsMaskCalleeSaved = rsPushRegs; |
4957 | #endif // _TARGET_ARMARCH_ |
4958 | |
4959 | #ifdef DEBUG |
4960 | if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) |
4961 | { |
4962 | printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d " , |
4963 | compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); |
4964 | dspRegMask(rsPushRegs); |
4965 | printf("\n" ); |
4966 | assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); |
4967 | } |
4968 | #endif // DEBUG |
4969 | |
4970 | #if defined(_TARGET_ARM_) |
4971 | regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT; |
4972 | regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat; |
4973 | |
4974 | maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat); |
4975 | |
4976 | assert(FitsIn<int>(maskPushRegsInt)); |
4977 | inst_IV(INS_push, (int)maskPushRegsInt); |
4978 | compiler->unwindPushMaskInt(maskPushRegsInt); |
4979 | |
4980 | if (maskPushRegsFloat != 0) |
4981 | { |
4982 | genPushFltRegs(maskPushRegsFloat); |
4983 | compiler->unwindPushMaskFloat(maskPushRegsFloat); |
4984 | } |
4985 | #elif defined(_TARGET_ARM64_) |
4986 | // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and |
4987 | // options. Case numbers in comments here refer to this document. |
4988 | // |
4989 | // For most frames, generate, e.g.: |
4990 | // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair |
4991 | // // ensures stack stays aligned. |
4992 | // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area |
4993 | // // at top of frame (highest addresses). |
4994 | // stp r21, r22, [sp, 0x70] |
4995 | // |
4996 | // Notes: |
4997 | // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers |
4998 | // at the top of the frame. |
4999 | // 2. If we save FP, then the first store is FP, LR. |
5000 | // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only |
5001 | // preserve their lower 8 bytes, by calling convention. |
5002 | // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are |
5003 | // consecutive. |
5004 | // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). |
5005 | |
5006 | int totalFrameSize = genTotalFrameSize(); |
5007 | |
5008 | int offset; // This will be the starting place for saving the callee-saved registers, in increasing order. |
5009 | |
5010 | regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; |
5011 | regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; |
5012 | |
5013 | int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we |
5014 | // generate based on various sizes. |
5015 | int calleeSaveSPDelta = 0; |
5016 | int calleeSaveSPDeltaUnaligned = 0; |
5017 | |
5018 | if (isFramePointerUsed()) |
5019 | { |
5020 | // We need to save both FP and LR. |
5021 | |
5022 | assert((maskSaveRegsInt & RBM_FP) != 0); |
5023 | assert((maskSaveRegsInt & RBM_LR) != 0); |
5024 | |
5025 | if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512)) |
5026 | { |
5027 | // Case #1. |
5028 | // |
5029 | // Generate: |
5030 | // stp fp,lr,[sp,#-framesz]! |
5031 | // |
5032 | // The (totalFrameSize < 512) condition ensures that both the predecrement |
5033 | // and the postincrement of SP can occur with STP. |
5034 | // |
5035 | // After saving callee-saved registers, we establish the frame pointer with: |
5036 | // mov fp,sp |
5037 | // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. |
5038 | |
5039 | frameType = 1; |
5040 | |
5041 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, |
5042 | INS_OPTS_PRE_INDEX); |
5043 | compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); |
5044 | |
5045 | maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR |
5046 | offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR |
5047 | } |
5048 | else if (totalFrameSize <= 512) |
5049 | { |
5050 | // Case #2. |
5051 | // |
5052 | // Generate: |
5053 | // sub sp,sp,#framesz |
5054 | // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496. |
5055 | // |
5056 | // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with |
5057 | // signed offset encoding. |
5058 | // |
5059 | // After saving callee-saved registers, we establish the frame pointer with: |
5060 | // add fp,sp,#outsz |
5061 | // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. |
5062 | |
5063 | frameType = 2; |
5064 | |
5065 | assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize); |
5066 | |
5067 | getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); |
5068 | compiler->unwindAllocStack(totalFrameSize); |
5069 | |
5070 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, |
5071 | compiler->lvaOutgoingArgSpaceSize); |
5072 | compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize); |
5073 | |
5074 | maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR |
5075 | offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR |
5076 | } |
5077 | else |
5078 | { |
5079 | // Case 5 or 6. |
5080 | // |
5081 | // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index |
5082 | // to subtract from SP as the first instruction. It must also leave space for varargs registers to be |
5083 | // stored. For example: |
5084 | // stp r19,r20,[sp,#-96]! |
5085 | // stp d8,d9,[sp,#16] |
5086 | // ... save varargs incoming integer registers ... |
5087 | // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be |
5088 | // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment). |
5089 | // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved |
5090 | // register): |
5091 | // sub sp,sp,#16 |
5092 | // str r19,[sp,#8] |
5093 | // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be |
5094 | // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one |
5095 | // above them. If that is preferable, we could implement it. |
5096 | // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers. |
5097 | // |
5098 | // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment |
5099 | // padding from above). |
5100 | // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP. |
5101 | // |
5102 | // Generate: |
5103 | // sub sp,sp,#remainingFrameSz |
5104 | // or, for large frames: |
5105 | // mov rX, #remainingFrameSz // maybe multiple instructions |
5106 | // sub sp,sp,rX |
5107 | // |
5108 | // followed by: |
5109 | // stp fp,lr,[sp,#outsz] |
5110 | // add fp,sp,#outsz |
5111 | // |
5112 | // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can |
5113 | // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e., |
5114 | // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the |
5115 | // following sequences: |
5116 | // |
5117 | // Define #remainingFrameSz2 = #remainingFrameSz - #outsz. |
5118 | // |
5119 | // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned |
5120 | // stp fp,lr,[sp] |
5121 | // mov fp,sp |
5122 | // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned |
5123 | // |
5124 | // Or: |
5125 | // |
5126 | // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is |
5127 | // // always guaranteed to be 8 byte aligned). |
5128 | // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case |
5129 | // add fp,sp,#8 |
5130 | // sub sp,sp,#outsz - #8 |
5131 | // |
5132 | // (As usual, for a large constant "#outsz - #8", we might need multiple instructions: |
5133 | // mov rX, #outsz - #8 // maybe multiple instructions |
5134 | // sub sp,sp,rX |
5135 | // ) |
5136 | |
5137 | frameType = 3; |
5138 | |
5139 | calleeSaveSPDeltaUnaligned = |
5140 | totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later. |
5141 | assert(calleeSaveSPDeltaUnaligned >= 0); |
5142 | assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned. |
5143 | calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN); |
5144 | |
5145 | offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned; |
5146 | assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we |
5147 | // store the callee-saved registers. |
5148 | |
5149 | // We'll take care of these later, but callee-saved regs code shouldn't see them. |
5150 | maskSaveRegsInt &= ~(RBM_FP | RBM_LR); |
5151 | } |
5152 | } |
5153 | else |
5154 | { |
5155 | // No frame pointer (no chaining). |
5156 | assert((maskSaveRegsInt & RBM_FP) == 0); |
5157 | assert((maskSaveRegsInt & RBM_LR) != 0); |
5158 | |
5159 | // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp' |
5160 | // if we only have one callee-saved register plus LR to save. |
5161 | |
5162 | NYI("Frame without frame pointer" ); |
5163 | offset = 0; |
5164 | } |
5165 | |
5166 | assert(frameType != 0); |
5167 | |
5168 | genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta); |
5169 | |
5170 | offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES; |
5171 | |
5172 | // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, |
5173 | // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't |
5174 | // need to add codes at all. |
5175 | |
5176 | if (compiler->info.compIsVarArgs) |
5177 | { |
5178 | // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here. |
5179 | assert((offset % 16) == 0); |
5180 | for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1))) |
5181 | { |
5182 | regNumber reg2 = REG_NEXT(reg1); |
5183 | // stp REG, REG + 1, [SP, #offset] |
5184 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset); |
5185 | compiler->unwindNop(); |
5186 | offset += 2 * REGSIZE_BYTES; |
5187 | } |
5188 | } |
5189 | |
5190 | if (frameType == 1) |
5191 | { |
5192 | getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE); |
5193 | compiler->unwindSetFrameReg(REG_FPBASE, 0); |
5194 | } |
5195 | else if (frameType == 2) |
5196 | { |
5197 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize); |
5198 | compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); |
5199 | } |
5200 | else if (frameType == 3) |
5201 | { |
5202 | int remainingFrameSz = totalFrameSize - calleeSaveSPDelta; |
5203 | assert(remainingFrameSz > 0); |
5204 | assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component -- |
5205 | // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned. |
5206 | |
5207 | if (compiler->lvaOutgoingArgSpaceSize >= 504) |
5208 | { |
5209 | // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big. |
5210 | // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment. |
5211 | assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize); |
5212 | int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize; |
5213 | int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN); |
5214 | int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned; |
5215 | assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8)); |
5216 | |
5217 | genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); |
5218 | offset += spAdjustment2; |
5219 | |
5220 | // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included |
5221 | // some of it) |
5222 | |
5223 | int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2; |
5224 | assert(spAdjustment3 > 0); |
5225 | assert((spAdjustment3 % 16) == 0); |
5226 | |
5227 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2); |
5228 | compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2); |
5229 | |
5230 | genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed); |
5231 | offset += spAdjustment3; |
5232 | } |
5233 | else |
5234 | { |
5235 | genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, |
5236 | pInitRegZeroed); |
5237 | offset += remainingFrameSz; |
5238 | |
5239 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize); |
5240 | compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); |
5241 | } |
5242 | } |
5243 | |
5244 | assert(offset == totalFrameSize); |
5245 | |
5246 | #elif defined(_TARGET_XARCH_) |
5247 | // Push backwards so we match the order we will pop them in the epilog |
5248 | // and all the other code that expects it to be in this order. |
5249 | for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg)) |
5250 | { |
5251 | regMaskTP regBit = genRegMask(reg); |
5252 | |
5253 | if ((regBit & rsPushRegs) != 0) |
5254 | { |
5255 | inst_RV(INS_push, reg, TYP_REF); |
5256 | compiler->unwindPush(reg); |
5257 | |
5258 | if (!doubleAlignOrFramePointerUsed()) |
5259 | { |
5260 | psiAdjustStackLevel(REGSIZE_BYTES); |
5261 | } |
5262 | |
5263 | rsPushRegs &= ~regBit; |
5264 | } |
5265 | } |
5266 | |
5267 | #else |
5268 | assert(!"Unknown TARGET" ); |
5269 | #endif // _TARGET_* |
5270 | } |
5271 | |
5272 | #if defined(_TARGET_ARM_) |
5273 | |
5274 | void CodeGen::genPushFltRegs(regMaskTP regMask) |
5275 | { |
5276 | assert(regMask != 0); // Don't call uness we have some registers to push |
5277 | assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask |
5278 | |
5279 | regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask)); |
5280 | int slots = genCountBits(regMask); |
5281 | // regMask should be contiguously set |
5282 | regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set |
5283 | assert((tmpMask & (tmpMask - 1)) == 0); |
5284 | assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes |
5285 | |
5286 | // Our calling convention requires that we only use vpush for TYP_DOUBLE registers |
5287 | noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE)); |
5288 | noway_assert((slots % 2) == 0); |
5289 | |
5290 | getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2); |
5291 | } |
5292 | |
5293 | void CodeGen::genPopFltRegs(regMaskTP regMask) |
5294 | { |
5295 | assert(regMask != 0); // Don't call uness we have some registers to pop |
5296 | assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask |
5297 | |
5298 | regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask)); |
5299 | int slots = genCountBits(regMask); |
5300 | // regMask should be contiguously set |
5301 | regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set |
5302 | assert((tmpMask & (tmpMask - 1)) == 0); |
5303 | |
5304 | // Our calling convention requires that we only use vpop for TYP_DOUBLE registers |
5305 | noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE)); |
5306 | noway_assert((slots % 2) == 0); |
5307 | |
5308 | getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2); |
5309 | } |
5310 | |
5311 | /*----------------------------------------------------------------------------- |
5312 | * |
5313 | * If we have a jmp call, then the argument registers cannot be used in the |
5314 | * epilog. So return the current call's argument registers as the argument |
5315 | * registers for the jmp call. |
5316 | */ |
5317 | regMaskTP CodeGen::genJmpCallArgMask() |
5318 | { |
5319 | assert(compiler->compGeneratingEpilog); |
5320 | |
5321 | regMaskTP argMask = RBM_NONE; |
5322 | for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum) |
5323 | { |
5324 | const LclVarDsc& desc = compiler->lvaTable[varNum]; |
5325 | if (desc.lvIsRegArg) |
5326 | { |
5327 | argMask |= genRegMask(desc.lvArgReg); |
5328 | } |
5329 | } |
5330 | return argMask; |
5331 | } |
5332 | |
5333 | /*----------------------------------------------------------------------------- |
5334 | * |
5335 | * Free the local stack frame: add to SP. |
5336 | * If epilog unwind hasn't been started, and we generate code, we start unwind |
5337 | * and set *pUnwindStarted = true. |
5338 | */ |
5339 | |
5340 | void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog) |
5341 | { |
5342 | assert(compiler->compGeneratingEpilog); |
5343 | |
5344 | if (frameSize == 0) |
5345 | return; |
5346 | |
5347 | // Add 'frameSize' to SP. |
5348 | // |
5349 | // Unfortunately, we can't just use: |
5350 | // |
5351 | // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE); |
5352 | // |
5353 | // because we need to generate proper unwind codes for each instruction generated, |
5354 | // and large frame sizes might generate a temp register load which might |
5355 | // need an unwind code. We don't want to generate a "NOP" code for this |
5356 | // temp register load; we want the unwind codes to start after that. |
5357 | |
5358 | if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE)) |
5359 | { |
5360 | if (!*pUnwindStarted) |
5361 | { |
5362 | compiler->unwindBegEpilog(); |
5363 | *pUnwindStarted = true; |
5364 | } |
5365 | |
5366 | getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE); |
5367 | } |
5368 | else |
5369 | { |
5370 | regMaskTP grabMask = RBM_INT_CALLEE_TRASH; |
5371 | if (jmpEpilog) |
5372 | { |
5373 | // Do not use argument registers as scratch registers in the jmp epilog. |
5374 | grabMask &= ~genJmpCallArgMask(); |
5375 | } |
5376 | regNumber tmpReg = REG_TMP_0; |
5377 | instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize); |
5378 | if (*pUnwindStarted) |
5379 | { |
5380 | compiler->unwindPadding(); |
5381 | } |
5382 | |
5383 | // We're going to generate an unwindable instruction, so check again if |
5384 | // we need to start the unwind codes. |
5385 | |
5386 | if (!*pUnwindStarted) |
5387 | { |
5388 | compiler->unwindBegEpilog(); |
5389 | *pUnwindStarted = true; |
5390 | } |
5391 | |
5392 | getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE); |
5393 | } |
5394 | |
5395 | compiler->unwindAllocStack(frameSize); |
5396 | } |
5397 | |
5398 | /*----------------------------------------------------------------------------- |
5399 | * |
5400 | * Move of relocatable displacement value to register |
5401 | */ |
5402 | void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg) |
5403 | { |
5404 | getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg); |
5405 | getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg); |
5406 | |
5407 | if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS)) |
5408 | { |
5409 | getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC); |
5410 | } |
5411 | } |
5412 | |
5413 | /*----------------------------------------------------------------------------- |
5414 | * |
5415 | * Move of relocatable data-label to register |
5416 | */ |
5417 | void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg) |
5418 | { |
5419 | getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg); |
5420 | getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg); |
5421 | |
5422 | if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS)) |
5423 | { |
5424 | getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC); |
5425 | } |
5426 | } |
5427 | |
5428 | /*----------------------------------------------------------------------------- |
5429 | * |
5430 | * Move of relocatable immediate to register |
5431 | */ |
5432 | void CodeGen::genMov32RelocatableImmediate(emitAttr size, BYTE* addr, regNumber reg) |
5433 | { |
5434 | _ASSERTE(EA_IS_RELOC(size)); |
5435 | |
5436 | getEmitter()->emitIns_MovRelocatableImmediate(INS_movw, size, reg, addr); |
5437 | getEmitter()->emitIns_MovRelocatableImmediate(INS_movt, size, reg, addr); |
5438 | |
5439 | if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS)) |
5440 | { |
5441 | getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC); |
5442 | } |
5443 | } |
5444 | |
5445 | /*----------------------------------------------------------------------------- |
5446 | * |
5447 | * Returns register mask to push/pop to allocate a small stack frame, |
5448 | * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size |
5449 | * is zero, or if we should use "sub sp" / "add sp" instead of push/pop. |
5450 | */ |
5451 | regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat) |
5452 | { |
5453 | assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog); |
5454 | |
5455 | // We can't do this optimization with callee saved floating point registers because |
5456 | // the stack would be allocated in a wrong spot. |
5457 | if (maskCalleeSavedFloat != RBM_NONE) |
5458 | return RBM_NONE; |
5459 | |
5460 | // Allocate space for small frames by pushing extra registers. It generates smaller and faster code |
5461 | // that extra sub sp,XXX/add sp,XXX. |
5462 | // R0 and R1 may be used by return value. Keep things simple and just skip the optimization |
5463 | // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more |
5464 | // significant negative side-effects (more memory bus traffic). |
5465 | switch (frameSize) |
5466 | { |
5467 | case REGSIZE_BYTES: |
5468 | return RBM_R3; |
5469 | case 2 * REGSIZE_BYTES: |
5470 | return RBM_R2 | RBM_R3; |
5471 | default: |
5472 | return RBM_NONE; |
5473 | } |
5474 | } |
5475 | |
5476 | #endif // _TARGET_ARM_ |
5477 | |
5478 | /***************************************************************************** |
5479 | * |
5480 | * initFltRegs -- The mask of float regs to be zeroed. |
5481 | * initDblRegs -- The mask of double regs to be zeroed. |
5482 | * initReg -- A zero initialized integer reg to copy from. |
5483 | * |
5484 | * Does best effort to move between VFP/xmm regs if one is already |
5485 | * initialized to 0. (Arm Only) Else copies from the integer register which |
5486 | * is slower. |
5487 | */ |
5488 | void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg) |
5489 | { |
5490 | assert(compiler->compGeneratingProlog); |
5491 | |
5492 | // The first float/double reg that is initialized to 0. So they can be used to |
5493 | // initialize the remaining registers. |
5494 | regNumber fltInitReg = REG_NA; |
5495 | regNumber dblInitReg = REG_NA; |
5496 | |
5497 | // Iterate through float/double registers and initialize them to 0 or |
5498 | // copy from already initialized register of the same type. |
5499 | regMaskTP regMask = genRegMask(REG_FP_FIRST); |
5500 | for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1) |
5501 | { |
5502 | if (regMask & initFltRegs) |
5503 | { |
5504 | // Do we have a float register already set to 0? |
5505 | if (fltInitReg != REG_NA) |
5506 | { |
5507 | // Copy from float. |
5508 | inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT); |
5509 | } |
5510 | else |
5511 | { |
5512 | #ifdef _TARGET_ARM_ |
5513 | // Do we have a double register initialized to 0? |
5514 | if (dblInitReg != REG_NA) |
5515 | { |
5516 | // Copy from double. |
5517 | inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT); |
5518 | } |
5519 | else |
5520 | { |
5521 | // Copy from int. |
5522 | inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE); |
5523 | } |
5524 | #elif defined(_TARGET_XARCH_) |
5525 | // XORPS is the fastest and smallest way to initialize a XMM register to zero. |
5526 | inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE); |
5527 | dblInitReg = reg; |
5528 | #elif defined(_TARGET_ARM64_) |
5529 | // We will just zero out the entire vector register. This sets it to a double/float zero value |
5530 | getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); |
5531 | #else // _TARGET_* |
5532 | #error Unsupported or unset target architecture |
5533 | #endif |
5534 | fltInitReg = reg; |
5535 | } |
5536 | } |
5537 | else if (regMask & initDblRegs) |
5538 | { |
5539 | // Do we have a double register already set to 0? |
5540 | if (dblInitReg != REG_NA) |
5541 | { |
5542 | // Copy from double. |
5543 | inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE); |
5544 | } |
5545 | else |
5546 | { |
5547 | #ifdef _TARGET_ARM_ |
5548 | // Do we have a float register initialized to 0? |
5549 | if (fltInitReg != REG_NA) |
5550 | { |
5551 | // Copy from float. |
5552 | inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE); |
5553 | } |
5554 | else |
5555 | { |
5556 | // Copy from int. |
5557 | inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE); |
5558 | } |
5559 | #elif defined(_TARGET_XARCH_) |
5560 | // XORPS is the fastest and smallest way to initialize a XMM register to zero. |
5561 | inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE); |
5562 | fltInitReg = reg; |
5563 | #elif defined(_TARGET_ARM64_) |
5564 | // We will just zero out the entire vector register. This sets it to a double/float zero value |
5565 | getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B); |
5566 | #else // _TARGET_* |
5567 | #error Unsupported or unset target architecture |
5568 | #endif |
5569 | dblInitReg = reg; |
5570 | } |
5571 | } |
5572 | } |
5573 | } |
5574 | |
5575 | /*----------------------------------------------------------------------------- |
5576 | * |
5577 | * Restore any callee-saved registers we have used |
5578 | */ |
5579 | |
5580 | #if defined(_TARGET_ARM_) |
5581 | |
5582 | bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog) |
5583 | { |
5584 | assert(compiler->compGeneratingEpilog); |
5585 | |
5586 | if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE) |
5587 | return true; |
5588 | else |
5589 | return false; |
5590 | } |
5591 | |
5592 | void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) |
5593 | { |
5594 | assert(compiler->compGeneratingEpilog); |
5595 | |
5596 | regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; |
5597 | regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT; |
5598 | regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat; |
5599 | |
5600 | // First, pop float registers |
5601 | |
5602 | if (maskPopRegsFloat != RBM_NONE) |
5603 | { |
5604 | genPopFltRegs(maskPopRegsFloat); |
5605 | compiler->unwindPopMaskFloat(maskPopRegsFloat); |
5606 | } |
5607 | |
5608 | // Next, pop integer registers |
5609 | |
5610 | if (!jmpEpilog) |
5611 | { |
5612 | regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat); |
5613 | maskPopRegsInt |= maskStackAlloc; |
5614 | } |
5615 | |
5616 | if (isFramePointerUsed()) |
5617 | { |
5618 | assert(!regSet.rsRegsModified(RBM_FPBASE)); |
5619 | maskPopRegsInt |= RBM_FPBASE; |
5620 | } |
5621 | |
5622 | if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog)) |
5623 | { |
5624 | maskPopRegsInt |= RBM_PC; |
5625 | // Record the fact that we use a pop to the PC to perform the return |
5626 | genUsedPopToReturn = true; |
5627 | } |
5628 | else |
5629 | { |
5630 | maskPopRegsInt |= RBM_LR; |
5631 | // Record the fact that we did not use a pop to the PC to perform the return |
5632 | genUsedPopToReturn = false; |
5633 | } |
5634 | |
5635 | assert(FitsIn<int>(maskPopRegsInt)); |
5636 | inst_IV(INS_pop, (int)maskPopRegsInt); |
5637 | compiler->unwindPopMaskInt(maskPopRegsInt); |
5638 | } |
5639 | |
5640 | #elif defined(_TARGET_ARM64_) |
5641 | |
5642 | void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) |
5643 | { |
5644 | assert(compiler->compGeneratingEpilog); |
5645 | |
5646 | regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; |
5647 | |
5648 | if (isFramePointerUsed()) |
5649 | { |
5650 | rsRestoreRegs |= RBM_FPBASE; |
5651 | } |
5652 | |
5653 | rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register) |
5654 | |
5655 | regMaskTP regsToRestoreMask = rsRestoreRegs; |
5656 | |
5657 | int totalFrameSize = genTotalFrameSize(); |
5658 | |
5659 | int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing |
5660 | // order. |
5661 | int frameType = 0; // An indicator of what type of frame we are popping. |
5662 | int calleeSaveSPDelta = 0; |
5663 | int calleeSaveSPDeltaUnaligned = 0; |
5664 | |
5665 | if (isFramePointerUsed()) |
5666 | { |
5667 | if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512)) |
5668 | { |
5669 | frameType = 1; |
5670 | if (compiler->compLocallocUsed) |
5671 | { |
5672 | // Restore sp from fp |
5673 | // mov sp, fp |
5674 | inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE); |
5675 | compiler->unwindSetFrameReg(REG_FPBASE, 0); |
5676 | } |
5677 | |
5678 | regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. |
5679 | |
5680 | // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom |
5681 | // of stack. |
5682 | calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; |
5683 | } |
5684 | else if (totalFrameSize <= 512) |
5685 | { |
5686 | frameType = 2; |
5687 | if (compiler->compLocallocUsed) |
5688 | { |
5689 | // Restore sp from fp |
5690 | // sub sp, fp, #outsz |
5691 | getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, |
5692 | compiler->lvaOutgoingArgSpaceSize); |
5693 | compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); |
5694 | } |
5695 | |
5696 | regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP. |
5697 | |
5698 | // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom |
5699 | // of stack. |
5700 | calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; |
5701 | } |
5702 | else |
5703 | { |
5704 | frameType = 3; |
5705 | |
5706 | calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize - |
5707 | 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later. |
5708 | assert(calleeSaveSPDeltaUnaligned >= 0); |
5709 | assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned. |
5710 | calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN); |
5711 | |
5712 | regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP. |
5713 | |
5714 | int remainingFrameSz = totalFrameSize - calleeSaveSPDelta; |
5715 | assert(remainingFrameSz > 0); |
5716 | |
5717 | if (compiler->lvaOutgoingArgSpaceSize >= 504) |
5718 | { |
5719 | // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big. |
5720 | // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment. |
5721 | assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize); |
5722 | int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize; |
5723 | int spAdjustment2 = (int)roundUp((unsigned)spAdjustment2Unaligned, STACK_ALIGN); |
5724 | int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned; |
5725 | assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES)); |
5726 | |
5727 | if (compiler->compLocallocUsed) |
5728 | { |
5729 | // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in |
5730 | // prolog. |
5731 | // sub sp, fp, #alignmentAdjustment2 |
5732 | getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2); |
5733 | compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2); |
5734 | } |
5735 | else |
5736 | { |
5737 | // Generate: |
5738 | // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more |
5739 | // ; careful |
5740 | int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2; |
5741 | assert(spAdjustment3 > 0); |
5742 | assert((spAdjustment3 % 16) == 0); |
5743 | genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr); |
5744 | } |
5745 | |
5746 | // Generate: |
5747 | // ldp fp,lr,[sp] |
5748 | // add sp,sp,#remainingFrameSz |
5749 | genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP1, nullptr); |
5750 | } |
5751 | else |
5752 | { |
5753 | if (compiler->compLocallocUsed) |
5754 | { |
5755 | // Restore sp from fp |
5756 | // sub sp, fp, #outsz |
5757 | getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, |
5758 | compiler->lvaOutgoingArgSpaceSize); |
5759 | compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize); |
5760 | } |
5761 | |
5762 | // Generate: |
5763 | // ldp fp,lr,[sp,#outsz] |
5764 | // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if |
5765 | // ; it's large |
5766 | |
5767 | genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP1, |
5768 | nullptr); |
5769 | } |
5770 | |
5771 | // Unlike frameType=1 or frameType=2 that restore SP at the end, |
5772 | // frameType=3 already adjusted SP above to delete local frame. |
5773 | // There is at most one alignment slot between SP and where we store the callee-saved registers. |
5774 | calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned; |
5775 | assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES)); |
5776 | } |
5777 | } |
5778 | else |
5779 | { |
5780 | // No frame pointer (no chaining). |
5781 | NYI("Frame without frame pointer" ); |
5782 | calleeSaveSPOffset = 0; |
5783 | } |
5784 | |
5785 | genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta); |
5786 | |
5787 | if (frameType == 1) |
5788 | { |
5789 | // Generate: |
5790 | // ldp fp,lr,[sp],#framesz |
5791 | |
5792 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize, |
5793 | INS_OPTS_POST_INDEX); |
5794 | compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); |
5795 | } |
5796 | else if (frameType == 2) |
5797 | { |
5798 | // Generate: |
5799 | // ldr fp,lr,[sp,#outsz] |
5800 | // add sp,sp,#framesz |
5801 | |
5802 | getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, |
5803 | compiler->lvaOutgoingArgSpaceSize); |
5804 | compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize); |
5805 | |
5806 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); |
5807 | compiler->unwindAllocStack(totalFrameSize); |
5808 | } |
5809 | else if (frameType == 3) |
5810 | { |
5811 | // Nothing to do after restoring callee-saved registers. |
5812 | } |
5813 | else |
5814 | { |
5815 | unreached(); |
5816 | } |
5817 | } |
5818 | |
5819 | #elif defined(_TARGET_XARCH_) |
5820 | |
5821 | void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) |
5822 | { |
5823 | assert(compiler->compGeneratingEpilog); |
5824 | |
5825 | unsigned popCount = 0; |
5826 | if (regSet.rsRegsModified(RBM_EBX)) |
5827 | { |
5828 | popCount++; |
5829 | inst_RV(INS_pop, REG_EBX, TYP_I_IMPL); |
5830 | } |
5831 | if (regSet.rsRegsModified(RBM_FPBASE)) |
5832 | { |
5833 | // EBP cannot be directly modified for EBP frame and double-aligned frames |
5834 | assert(!doubleAlignOrFramePointerUsed()); |
5835 | |
5836 | popCount++; |
5837 | inst_RV(INS_pop, REG_EBP, TYP_I_IMPL); |
5838 | } |
5839 | |
5840 | #ifndef UNIX_AMD64_ABI |
5841 | // For System V AMD64 calling convention ESI and EDI are volatile registers. |
5842 | if (regSet.rsRegsModified(RBM_ESI)) |
5843 | { |
5844 | popCount++; |
5845 | inst_RV(INS_pop, REG_ESI, TYP_I_IMPL); |
5846 | } |
5847 | if (regSet.rsRegsModified(RBM_EDI)) |
5848 | { |
5849 | popCount++; |
5850 | inst_RV(INS_pop, REG_EDI, TYP_I_IMPL); |
5851 | } |
5852 | #endif // !defined(UNIX_AMD64_ABI) |
5853 | |
5854 | #ifdef _TARGET_AMD64_ |
5855 | if (regSet.rsRegsModified(RBM_R12)) |
5856 | { |
5857 | popCount++; |
5858 | inst_RV(INS_pop, REG_R12, TYP_I_IMPL); |
5859 | } |
5860 | if (regSet.rsRegsModified(RBM_R13)) |
5861 | { |
5862 | popCount++; |
5863 | inst_RV(INS_pop, REG_R13, TYP_I_IMPL); |
5864 | } |
5865 | if (regSet.rsRegsModified(RBM_R14)) |
5866 | { |
5867 | popCount++; |
5868 | inst_RV(INS_pop, REG_R14, TYP_I_IMPL); |
5869 | } |
5870 | if (regSet.rsRegsModified(RBM_R15)) |
5871 | { |
5872 | popCount++; |
5873 | inst_RV(INS_pop, REG_R15, TYP_I_IMPL); |
5874 | } |
5875 | #endif // _TARGET_AMD64_ |
5876 | |
5877 | // Amd64/x86 doesn't support push/pop of xmm registers. |
5878 | // These will get saved to stack separately after allocating |
5879 | // space on stack in prolog sequence. PopCount is essentially |
5880 | // tracking the count of integer registers pushed. |
5881 | |
5882 | noway_assert(compiler->compCalleeRegsPushed == popCount); |
5883 | } |
5884 | |
5885 | #elif defined(_TARGET_X86_) |
5886 | |
5887 | void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) |
5888 | { |
5889 | assert(compiler->compGeneratingEpilog); |
5890 | |
5891 | unsigned popCount = 0; |
5892 | |
5893 | /* NOTE: The EBP-less frame code below depends on the fact that |
5894 | all of the pops are generated right at the start and |
5895 | each takes one byte of machine code. |
5896 | */ |
5897 | |
5898 | if (regSet.rsRegsModified(RBM_FPBASE)) |
5899 | { |
5900 | // EBP cannot be directly modified for EBP frame and double-aligned frames |
5901 | noway_assert(!doubleAlignOrFramePointerUsed()); |
5902 | |
5903 | inst_RV(INS_pop, REG_EBP, TYP_I_IMPL); |
5904 | popCount++; |
5905 | } |
5906 | if (regSet.rsRegsModified(RBM_EBX)) |
5907 | { |
5908 | popCount++; |
5909 | inst_RV(INS_pop, REG_EBX, TYP_I_IMPL); |
5910 | } |
5911 | if (regSet.rsRegsModified(RBM_ESI)) |
5912 | { |
5913 | popCount++; |
5914 | inst_RV(INS_pop, REG_ESI, TYP_I_IMPL); |
5915 | } |
5916 | if (regSet.rsRegsModified(RBM_EDI)) |
5917 | { |
5918 | popCount++; |
5919 | inst_RV(INS_pop, REG_EDI, TYP_I_IMPL); |
5920 | } |
5921 | noway_assert(compiler->compCalleeRegsPushed == popCount); |
5922 | } |
5923 | |
5924 | #endif // _TARGET_* |
5925 | |
5926 | // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so. |
5927 | // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR. |
5928 | regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed) |
5929 | { |
5930 | #ifdef _TARGET_ARM64_ |
5931 | return REG_ZR; |
5932 | #else // !_TARGET_ARM64_ |
5933 | if (*pInitRegZeroed == false) |
5934 | { |
5935 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); |
5936 | *pInitRegZeroed = true; |
5937 | } |
5938 | return initReg; |
5939 | #endif // !_TARGET_ARM64_ |
5940 | } |
5941 | |
5942 | /*----------------------------------------------------------------------------- |
5943 | * |
5944 | * Do we have any untracked pointer locals at all, |
5945 | * or do we need to initialize memory for locspace? |
5946 | * |
5947 | * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end |
5948 | * initializing memory (not inclusive). |
5949 | * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero |
5950 | * initializing memory. |
5951 | * initReg - A scratch register (that gets set to zero on some platforms). |
5952 | * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed. |
5953 | */ |
5954 | void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) |
5955 | { |
5956 | assert(compiler->compGeneratingProlog); |
5957 | |
5958 | if (genUseBlockInit) |
5959 | { |
5960 | assert(untrLclHi > untrLclLo); |
5961 | #ifdef _TARGET_ARMARCH_ |
5962 | /* |
5963 | Generate the following code: |
5964 | |
5965 | For cnt less than 10 |
5966 | |
5967 | mov rZero1, 0 |
5968 | mov rZero2, 0 |
5969 | mov rCnt, <cnt> |
5970 | stm <rZero1,rZero2>,[rAddr!] |
5971 | <optional> stm <rZero1,rZero2>,[rAddr!] |
5972 | <optional> stm <rZero1,rZero2>,[rAddr!] |
5973 | <optional> stm <rZero1,rZero2>,[rAddr!] |
5974 | <optional> str rZero1,[rAddr] |
5975 | |
5976 | For rCnt greater than or equal to 10 |
5977 | |
5978 | mov rZero1, 0 |
5979 | mov rZero2, 0 |
5980 | mov rCnt, <cnt/2> |
5981 | sub rAddr, sp, OFFS |
5982 | |
5983 | loop: |
5984 | stm <rZero1,rZero2>,[rAddr!] |
5985 | sub rCnt,rCnt,1 |
5986 | jnz loop |
5987 | |
5988 | <optional> str rZero1,[rAddr] // When cnt is odd |
5989 | |
5990 | NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers. |
5991 | */ |
5992 | |
5993 | regNumber rAddr; |
5994 | regNumber rCnt = REG_NA; // Invalid |
5995 | regMaskTP regMask; |
5996 | |
5997 | regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers |
5998 | availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are |
5999 | // currently live |
6000 | availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for |
6001 | // a large constant. |
6002 | |
6003 | #if defined(_TARGET_ARM_) |
6004 | |
6005 | if (compiler->compLocallocUsed) |
6006 | { |
6007 | availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame |
6008 | } |
6009 | |
6010 | regNumber rZero1; // We're going to use initReg for rZero1 |
6011 | regNumber rZero2; |
6012 | |
6013 | // We pick the next lowest register number for rZero2 |
6014 | noway_assert(availMask != RBM_NONE); |
6015 | regMask = genFindLowestBit(availMask); |
6016 | rZero2 = genRegNumFromMask(regMask); |
6017 | availMask &= ~regMask; |
6018 | assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) == |
6019 | 0); // rZero2 is not a live incoming argument reg |
6020 | |
6021 | // We pick the next lowest register number for rAddr |
6022 | noway_assert(availMask != RBM_NONE); |
6023 | regMask = genFindLowestBit(availMask); |
6024 | rAddr = genRegNumFromMask(regMask); |
6025 | availMask &= ~regMask; |
6026 | |
6027 | #else // !define(_TARGET_ARM_) |
6028 | |
6029 | regNumber rZero1 = REG_ZR; |
6030 | rAddr = initReg; |
6031 | *pInitRegZeroed = false; |
6032 | |
6033 | #endif // !defined(_TARGET_ARM_) |
6034 | |
6035 | bool useLoop = false; |
6036 | unsigned uCntBytes = untrLclHi - untrLclLo; |
6037 | assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes. |
6038 | unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use. |
6039 | |
6040 | // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline. |
6041 | // When it is 10 or greater, we will emit a loop containing a stm/stp instruction. |
6042 | // In both of these cases the stm/stp instruction will write two zeros to memory |
6043 | // and we will use a single str instruction at the end whenever we have an odd count. |
6044 | if (uCntSlots >= 10) |
6045 | useLoop = true; |
6046 | |
6047 | if (useLoop) |
6048 | { |
6049 | // We pick the next lowest register number for rCnt |
6050 | noway_assert(availMask != RBM_NONE); |
6051 | regMask = genFindLowestBit(availMask); |
6052 | rCnt = genRegNumFromMask(regMask); |
6053 | availMask &= ~regMask; |
6054 | } |
6055 | |
6056 | assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == |
6057 | 0); // rAddr is not a live incoming argument reg |
6058 | #if defined(_TARGET_ARM_) |
6059 | if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE)) |
6060 | #else // !_TARGET_ARM_ |
6061 | if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE)) |
6062 | #endif // !_TARGET_ARM_ |
6063 | { |
6064 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo); |
6065 | } |
6066 | else |
6067 | { |
6068 | // Load immediate into the InitReg register |
6069 | instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo); |
6070 | getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg); |
6071 | *pInitRegZeroed = false; |
6072 | } |
6073 | |
6074 | if (useLoop) |
6075 | { |
6076 | noway_assert(uCntSlots >= 2); |
6077 | assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == |
6078 | 0); // rCnt is not a live incoming argument reg |
6079 | instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2); |
6080 | } |
6081 | |
6082 | #if defined(_TARGET_ARM_) |
6083 | rZero1 = genGetZeroReg(initReg, pInitRegZeroed); |
6084 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2); |
6085 | target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) | genRegMask(rZero2)); |
6086 | #endif // _TARGET_ARM_ |
6087 | |
6088 | if (!useLoop) |
6089 | { |
6090 | while (uCntBytes >= REGSIZE_BYTES * 2) |
6091 | { |
6092 | #ifdef _TARGET_ARM_ |
6093 | getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); |
6094 | #else // !_TARGET_ARM_ |
6095 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES, |
6096 | INS_OPTS_POST_INDEX); |
6097 | #endif // !_TARGET_ARM_ |
6098 | uCntBytes -= REGSIZE_BYTES * 2; |
6099 | } |
6100 | } |
6101 | else // useLoop is true |
6102 | { |
6103 | #ifdef _TARGET_ARM_ |
6104 | getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots |
6105 | getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET); |
6106 | #else // !_TARGET_ARM_ |
6107 | getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES, |
6108 | INS_OPTS_POST_INDEX); // zero stack slots |
6109 | getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1); |
6110 | #endif // !_TARGET_ARM_ |
6111 | getEmitter()->emitIns_J(INS_bhi, NULL, -3); |
6112 | uCntBytes %= REGSIZE_BYTES * 2; |
6113 | } |
6114 | |
6115 | if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number) |
6116 | { |
6117 | #ifdef _TARGET_ARM_ |
6118 | getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0); |
6119 | #else // _TARGET_ARM_ |
6120 | if ((uCntBytes - REGSIZE_BYTES) == 0) |
6121 | { |
6122 | getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0); |
6123 | } |
6124 | else |
6125 | { |
6126 | getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX); |
6127 | } |
6128 | #endif // !_TARGET_ARM_ |
6129 | uCntBytes -= REGSIZE_BYTES; |
6130 | } |
6131 | #ifdef _TARGET_ARM64_ |
6132 | if (uCntBytes > 0) |
6133 | { |
6134 | assert(uCntBytes == sizeof(int)); |
6135 | getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0); |
6136 | uCntBytes -= sizeof(int); |
6137 | } |
6138 | #endif // _TARGET_ARM64_ |
6139 | noway_assert(uCntBytes == 0); |
6140 | |
6141 | #elif defined(_TARGET_XARCH_) |
6142 | /* |
6143 | Generate the following code: |
6144 | |
6145 | lea edi, [ebp/esp-OFFS] |
6146 | mov ecx, <size> |
6147 | xor eax, eax |
6148 | rep stosd |
6149 | */ |
6150 | |
6151 | noway_assert(regSet.rsRegsModified(RBM_EDI)); |
6152 | |
6153 | #ifdef UNIX_AMD64_ABI |
6154 | // For register arguments we may have to save ECX and RDI on Amd64 System V OSes |
6155 | if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX) |
6156 | { |
6157 | noway_assert(regSet.rsRegsModified(RBM_R12)); |
6158 | inst_RV_RV(INS_mov, REG_R12, REG_RCX); |
6159 | regSet.verifyRegUsed(REG_R12); |
6160 | } |
6161 | |
6162 | if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI) |
6163 | { |
6164 | noway_assert(regSet.rsRegsModified(RBM_R13)); |
6165 | inst_RV_RV(INS_mov, REG_R13, REG_RDI); |
6166 | regSet.verifyRegUsed(REG_R13); |
6167 | } |
6168 | #else // !UNIX_AMD64_ABI |
6169 | // For register arguments we may have to save ECX |
6170 | if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX) |
6171 | { |
6172 | noway_assert(regSet.rsRegsModified(RBM_ESI)); |
6173 | inst_RV_RV(INS_mov, REG_ESI, REG_ECX); |
6174 | regSet.verifyRegUsed(REG_ESI); |
6175 | } |
6176 | #endif // !UNIX_AMD64_ABI |
6177 | |
6178 | noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0); |
6179 | |
6180 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo); |
6181 | regSet.verifyRegUsed(REG_EDI); |
6182 | |
6183 | inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE); |
6184 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX); |
6185 | instGen(INS_r_stosd); |
6186 | |
6187 | #ifdef UNIX_AMD64_ABI |
6188 | // Move back the argument registers |
6189 | if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX) |
6190 | { |
6191 | inst_RV_RV(INS_mov, REG_RCX, REG_R12); |
6192 | } |
6193 | |
6194 | if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI) |
6195 | { |
6196 | inst_RV_RV(INS_mov, REG_RDI, REG_R13); |
6197 | } |
6198 | #else // !UNIX_AMD64_ABI |
6199 | // Move back the argument registers |
6200 | if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX) |
6201 | { |
6202 | inst_RV_RV(INS_mov, REG_ECX, REG_ESI); |
6203 | } |
6204 | #endif // !UNIX_AMD64_ABI |
6205 | |
6206 | #else // _TARGET_* |
6207 | #error Unsupported or unset target architecture |
6208 | #endif // _TARGET_* |
6209 | } |
6210 | else if (genInitStkLclCnt > 0) |
6211 | { |
6212 | assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) == |
6213 | 0); // initReg is not a live incoming argument reg |
6214 | |
6215 | /* Initialize any lvMustInit vars on the stack */ |
6216 | |
6217 | LclVarDsc* varDsc; |
6218 | unsigned varNum; |
6219 | |
6220 | for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) |
6221 | { |
6222 | if (!varDsc->lvMustInit) |
6223 | { |
6224 | continue; |
6225 | } |
6226 | |
6227 | // TODO-Review: I'm not sure that we're correctly handling the mustInit case for |
6228 | // partially-enregistered vars in the case where we don't use a block init. |
6229 | noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame); |
6230 | |
6231 | // lvMustInit can only be set for GC types or TYP_STRUCT types |
6232 | // or when compInitMem is true |
6233 | // or when in debug code |
6234 | |
6235 | noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) || |
6236 | compiler->info.compInitMem || compiler->opts.compDbgCode); |
6237 | |
6238 | if (!varDsc->lvOnFrame) |
6239 | { |
6240 | continue; |
6241 | } |
6242 | |
6243 | if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem && |
6244 | (varDsc->lvExactSize >= TARGET_POINTER_SIZE)) |
6245 | { |
6246 | // We only initialize the GC variables in the TYP_STRUCT |
6247 | const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES; |
6248 | const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum); |
6249 | |
6250 | for (unsigned i = 0; i < slots; i++) |
6251 | { |
6252 | if (gcPtrs[i] != TYPE_GC_NONE) |
6253 | { |
6254 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, |
6255 | genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES); |
6256 | } |
6257 | } |
6258 | } |
6259 | else |
6260 | { |
6261 | regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed); |
6262 | |
6263 | // zero out the whole thing rounded up to a single stack slot size |
6264 | unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int)); |
6265 | unsigned i; |
6266 | for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES) |
6267 | { |
6268 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i); |
6269 | } |
6270 | |
6271 | #ifdef _TARGET_64BIT_ |
6272 | assert(i == lclSize || (i + sizeof(int) == lclSize)); |
6273 | if (i != lclSize) |
6274 | { |
6275 | getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i); |
6276 | i += sizeof(int); |
6277 | } |
6278 | #endif // _TARGET_64BIT_ |
6279 | assert(i == lclSize); |
6280 | } |
6281 | } |
6282 | |
6283 | if (!TRACK_GC_TEMP_LIFETIMES) |
6284 | { |
6285 | assert(regSet.tmpAllFree()); |
6286 | for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis)) |
6287 | { |
6288 | if (!varTypeIsGC(tempThis->tdTempType())) |
6289 | { |
6290 | continue; |
6291 | } |
6292 | |
6293 | // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs); |
6294 | |
6295 | inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL); |
6296 | } |
6297 | } |
6298 | } |
6299 | } |
6300 | |
6301 | /*----------------------------------------------------------------------------- |
6302 | * |
6303 | * Save the generic context argument. |
6304 | * |
6305 | * We need to do this within the "prolog" in case anyone tries to inspect |
6306 | * the param-type-arg/this (which can be done after the prolog) using |
6307 | * ICodeManager::GetParamTypeArg(). |
6308 | */ |
6309 | |
6310 | void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed) |
6311 | { |
6312 | assert(compiler->compGeneratingProlog); |
6313 | |
6314 | bool reportArg = compiler->lvaReportParamTypeArg(); |
6315 | |
6316 | // We should report either generic context arg or "this" when used so. |
6317 | if (!reportArg) |
6318 | { |
6319 | #ifndef JIT32_GCENCODER |
6320 | if (!compiler->lvaKeepAliveAndReportThis()) |
6321 | #endif |
6322 | { |
6323 | return; |
6324 | } |
6325 | } |
6326 | |
6327 | // For JIT32_GCENCODER, we won't be here if reportArg is false. |
6328 | unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg; |
6329 | |
6330 | noway_assert(contextArg != BAD_VAR_NUM); |
6331 | LclVarDsc* varDsc = &compiler->lvaTable[contextArg]; |
6332 | |
6333 | // We are still in the prolog and compiler->info.compTypeCtxtArg has not been |
6334 | // moved to its final home location. So we need to use it from the |
6335 | // incoming location. |
6336 | |
6337 | regNumber reg; |
6338 | |
6339 | bool isPrespilledForProfiling = false; |
6340 | #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED) |
6341 | isPrespilledForProfiling = |
6342 | compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false)); |
6343 | #endif |
6344 | |
6345 | // Load from the argument register only if it is not prespilled. |
6346 | if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling) |
6347 | { |
6348 | reg = varDsc->lvArgReg; |
6349 | } |
6350 | else |
6351 | { |
6352 | if (isFramePointerUsed()) |
6353 | { |
6354 | #if defined(_TARGET_ARM_) |
6355 | // lvStkOffs is always valid for incoming stack-arguments, even if the argument |
6356 | // will become enregistered. |
6357 | // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES |
6358 | noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) && |
6359 | (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES)); |
6360 | #else |
6361 | // lvStkOffs is always valid for incoming stack-arguments, even if the argument |
6362 | // will become enregistered. |
6363 | noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize)); |
6364 | #endif |
6365 | } |
6366 | |
6367 | // We will just use the initReg since it is an available register |
6368 | // and we are probably done using it anyway... |
6369 | reg = initReg; |
6370 | *pInitRegZeroed = false; |
6371 | |
6372 | // mov reg, [compiler->info.compTypeCtxtArg] |
6373 | getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs); |
6374 | regSet.verifyRegUsed(reg); |
6375 | } |
6376 | |
6377 | #if CPU_LOAD_STORE_ARCH |
6378 | getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), |
6379 | compiler->lvaCachedGenericContextArgOffset()); |
6380 | #else // CPU_LOAD_STORE_ARCH |
6381 | // mov [ebp-lvaCachedGenericContextArgOffset()], reg |
6382 | getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), |
6383 | compiler->lvaCachedGenericContextArgOffset()); |
6384 | #endif // !CPU_LOAD_STORE_ARCH |
6385 | } |
6386 | |
6387 | /*----------------------------------------------------------------------------- |
6388 | * |
6389 | * Set the "GS" security cookie in the prolog. |
6390 | */ |
6391 | |
6392 | void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) |
6393 | { |
6394 | assert(compiler->compGeneratingProlog); |
6395 | |
6396 | if (!compiler->getNeedsGSSecurityCookie()) |
6397 | { |
6398 | return; |
6399 | } |
6400 | |
6401 | noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); |
6402 | |
6403 | if (compiler->gsGlobalSecurityCookieAddr == nullptr) |
6404 | { |
6405 | #ifdef _TARGET_AMD64_ |
6406 | // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax |
6407 | getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal); |
6408 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0); |
6409 | #else |
6410 | // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal |
6411 | instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal, |
6412 | compiler->lvaGSSecurityCookie, 0, initReg); |
6413 | #endif |
6414 | } |
6415 | else |
6416 | { |
6417 | regNumber reg; |
6418 | #ifdef _TARGET_XARCH_ |
6419 | // Always use EAX on x86 and x64 |
6420 | // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it. |
6421 | reg = REG_EAX; |
6422 | #else |
6423 | // We will just use the initReg since it is an available register |
6424 | reg = initReg; |
6425 | #endif |
6426 | |
6427 | *pInitRegZeroed = false; |
6428 | |
6429 | #if CPU_LOAD_STORE_ARCH |
6430 | instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); |
6431 | getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0); |
6432 | regSet.verifyRegUsed(reg); |
6433 | #else |
6434 | // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr] |
6435 | // mov dword ptr [frame.GSSecurityCookie], reg |
6436 | getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); |
6437 | regSet.verifyRegUsed(reg); |
6438 | #endif |
6439 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0); |
6440 | } |
6441 | } |
6442 | |
6443 | #ifdef PROFILING_SUPPORTED |
6444 | |
6445 | //----------------------------------------------------------------------------------- |
6446 | // genProfilingEnterCallback: Generate the profiling function enter callback. |
6447 | // |
6448 | // Arguments: |
6449 | // initReg - register to use as scratch register |
6450 | // pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is |
6451 | // not zero after this call. |
6452 | // |
6453 | // Return Value: |
6454 | // None |
6455 | // |
6456 | // Notes: |
6457 | // The x86 profile enter helper has the following requirements (see ProfileEnterNaked in |
6458 | // VM\i386\asmhelpers.asm for details): |
6459 | // 1. The calling sequence for calling the helper is: |
6460 | // push FunctionIDOrClientID |
6461 | // call ProfileEnterHelper |
6462 | // 2. The calling function has an EBP frame. |
6463 | // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, |
6464 | // the following prolog is assumed: |
6465 | // push ESP |
6466 | // mov EBP, ESP |
6467 | // 4. All registers are preserved. |
6468 | // 5. The helper pops the FunctionIDOrClientID argument from the stack. |
6469 | // |
6470 | void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) |
6471 | { |
6472 | assert(compiler->compGeneratingProlog); |
6473 | |
6474 | // Give profiler a chance to back out of hooking this method |
6475 | if (!compiler->compIsProfilerHookNeeded()) |
6476 | { |
6477 | return; |
6478 | } |
6479 | |
6480 | #if defined(_TARGET_AMD64_) |
6481 | #if !defined(UNIX_AMD64_ABI) |
6482 | |
6483 | unsigned varNum; |
6484 | LclVarDsc* varDsc; |
6485 | |
6486 | // Since the method needs to make a profiler callback, it should have out-going arg space allocated. |
6487 | noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); |
6488 | noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); |
6489 | |
6490 | // Home all arguments passed in arg registers (RCX, RDX, R8 and R9). |
6491 | // In case of vararg methods, arg regs are already homed. |
6492 | // |
6493 | // Note: Here we don't need to worry about updating gc'info since enter |
6494 | // callback is generated as part of prolog which is non-gc interruptible. |
6495 | // Moreover GC cannot kick while executing inside profiler callback which is a |
6496 | // profiler requirement so it can examine arguments which could be obj refs. |
6497 | if (!compiler->info.compIsVarArgs) |
6498 | { |
6499 | for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) |
6500 | { |
6501 | noway_assert(varDsc->lvIsParam); |
6502 | |
6503 | if (!varDsc->lvIsRegArg) |
6504 | { |
6505 | continue; |
6506 | } |
6507 | |
6508 | var_types storeType = varDsc->lvaArgType(); |
6509 | regNumber argReg = varDsc->lvArgReg; |
6510 | |
6511 | instruction store_ins = ins_Store(storeType); |
6512 | |
6513 | #ifdef FEATURE_SIMD |
6514 | if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg)) |
6515 | { |
6516 | store_ins = INS_mov; |
6517 | } |
6518 | #endif // FEATURE_SIMD |
6519 | |
6520 | getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0); |
6521 | } |
6522 | } |
6523 | |
6524 | // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) |
6525 | // RCX = ProfilerMethHnd |
6526 | if (compiler->compProfilerMethHndIndirected) |
6527 | { |
6528 | // Profiler hooks enabled during Ngen time. |
6529 | // Profiler handle needs to be accessed through an indirection of a pointer. |
6530 | getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6531 | } |
6532 | else |
6533 | { |
6534 | // No need to record relocations, if we are generating ELT hooks under the influence |
6535 | // of COMPlus_JitELTHookEnabled=1 |
6536 | if (compiler->opts.compJitELTHookEnabled) |
6537 | { |
6538 | genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); |
6539 | } |
6540 | else |
6541 | { |
6542 | instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6543 | } |
6544 | } |
6545 | |
6546 | // RDX = caller's SP |
6547 | // Notes |
6548 | // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. |
6549 | // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value |
6550 | // of that offset to FramePointer to obtain caller's SP value. |
6551 | assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); |
6552 | int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); |
6553 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); |
6554 | |
6555 | // Can't have a call until we have enough padding for rejit |
6556 | genPrologPadForReJit(); |
6557 | |
6558 | // This will emit either |
6559 | // "call ip-relative 32-bit offset" or |
6560 | // "mov rax, helper addr; call rax" |
6561 | genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN); |
6562 | |
6563 | // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog |
6564 | // generation logic that moves args around as required by first BB entry point conditions |
6565 | // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs() |
6566 | // and genEnregisterIncomingStackArgs(). |
6567 | // |
6568 | // Now reload arg registers from home locations. |
6569 | // Vararg methods: |
6570 | // - we need to reload only known (i.e. fixed) reg args. |
6571 | // - if floating point type, also reload it into corresponding integer reg |
6572 | for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) |
6573 | { |
6574 | noway_assert(varDsc->lvIsParam); |
6575 | |
6576 | if (!varDsc->lvIsRegArg) |
6577 | { |
6578 | continue; |
6579 | } |
6580 | |
6581 | var_types loadType = varDsc->lvaArgType(); |
6582 | regNumber argReg = varDsc->lvArgReg; |
6583 | |
6584 | instruction load_ins = ins_Load(loadType); |
6585 | |
6586 | #ifdef FEATURE_SIMD |
6587 | if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg)) |
6588 | { |
6589 | load_ins = INS_mov; |
6590 | } |
6591 | #endif // FEATURE_SIMD |
6592 | |
6593 | getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0); |
6594 | |
6595 | #if FEATURE_VARARG |
6596 | if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) |
6597 | { |
6598 | regNumber intArgReg = compiler->getCallArgIntRegister(argReg); |
6599 | instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); |
6600 | inst_RV_RV(ins, argReg, intArgReg, loadType); |
6601 | } |
6602 | #endif // FEATURE_VARARG |
6603 | } |
6604 | |
6605 | // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. |
6606 | if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) |
6607 | { |
6608 | *pInitRegZeroed = false; |
6609 | } |
6610 | |
6611 | #else // !defined(UNIX_AMD64_ABI) |
6612 | |
6613 | // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) |
6614 | // R14 = ProfilerMethHnd |
6615 | if (compiler->compProfilerMethHndIndirected) |
6616 | { |
6617 | // Profiler hooks enabled during Ngen time. |
6618 | // Profiler handle needs to be accessed through an indirection of a pointer. |
6619 | getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0, |
6620 | (ssize_t)compiler->compProfilerMethHnd); |
6621 | } |
6622 | else |
6623 | { |
6624 | // No need to record relocations, if we are generating ELT hooks under the influence |
6625 | // of COMPlus_JitELTHookEnabled=1 |
6626 | if (compiler->opts.compJitELTHookEnabled) |
6627 | { |
6628 | genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); |
6629 | } |
6630 | else |
6631 | { |
6632 | instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6633 | } |
6634 | } |
6635 | |
6636 | // R15 = caller's SP |
6637 | // Notes |
6638 | // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. |
6639 | // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value |
6640 | // of that offset to FramePointer to obtain caller's SP value. |
6641 | assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); |
6642 | int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); |
6643 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset); |
6644 | |
6645 | // Can't have a call until we have enough padding for rejit |
6646 | genPrologPadForReJit(); |
6647 | |
6648 | // We can use any callee trash register (other than RAX, RDI, RSI) for call target. |
6649 | // We use R11 here. This will emit either |
6650 | // "call ip-relative 32-bit offset" or |
6651 | // "mov r11, helper addr; call r11" |
6652 | genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); |
6653 | |
6654 | // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. |
6655 | if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) |
6656 | { |
6657 | *pInitRegZeroed = false; |
6658 | } |
6659 | |
6660 | #endif // !defined(UNIX_AMD64_ABI) |
6661 | |
6662 | #elif defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
6663 | |
6664 | unsigned saveStackLvl2 = genStackLevel; |
6665 | |
6666 | #if defined(_TARGET_X86_) |
6667 | // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK() |
6668 | // for x86 stack unwinding |
6669 | |
6670 | #if defined(UNIX_X86_ABI) |
6671 | // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() |
6672 | getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); |
6673 | #endif // UNIX_X86_ABI |
6674 | |
6675 | // Push the profilerHandle |
6676 | if (compiler->compProfilerMethHndIndirected) |
6677 | { |
6678 | getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); |
6679 | } |
6680 | else |
6681 | { |
6682 | inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); |
6683 | } |
6684 | |
6685 | #elif defined(_TARGET_ARM_) |
6686 | // On Arm arguments are prespilled on stack, which frees r0-r3. |
6687 | // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle. |
6688 | // The call target register could be any free register. |
6689 | regNumber argReg = REG_PROFILER_ENTER_ARG; |
6690 | regMaskTP argRegMask = genRegMask(argReg); |
6691 | assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0); |
6692 | |
6693 | if (compiler->compProfilerMethHndIndirected) |
6694 | { |
6695 | getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd); |
6696 | regSet.verifyRegUsed(argReg); |
6697 | } |
6698 | else |
6699 | { |
6700 | instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd); |
6701 | } |
6702 | #else // _TARGET_* |
6703 | NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers" ); |
6704 | #endif // _TARGET_* |
6705 | |
6706 | // |
6707 | // Can't have a call until we have enough padding for rejit |
6708 | // |
6709 | genPrologPadForReJit(); |
6710 | |
6711 | // This will emit either |
6712 | // "call ip-relative 32-bit offset" or |
6713 | // "mov rax, helper addr; call rax" |
6714 | genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, |
6715 | 0, // argSize. Again, we have to lie about it |
6716 | EA_UNKNOWN); // retSize |
6717 | |
6718 | #if defined(_TARGET_X86_) |
6719 | // Check that we have place for the push. |
6720 | assert(compiler->fgPtrArgCntMax >= 1); |
6721 | |
6722 | #if defined(UNIX_X86_ABI) |
6723 | // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall |
6724 | getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); |
6725 | #endif // UNIX_X86_ABI |
6726 | |
6727 | #elif defined(_TARGET_ARM_) |
6728 | if (initReg == argReg) |
6729 | { |
6730 | *pInitRegZeroed = false; |
6731 | } |
6732 | #else // _TARGET_* |
6733 | NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers" ); |
6734 | #endif // _TARGET_* |
6735 | |
6736 | /* Restore the stack level */ |
6737 | |
6738 | SetStackLevel(saveStackLvl2); |
6739 | |
6740 | #else // target |
6741 | NYI("Emit Profiler Enter callback" ); |
6742 | #endif // target |
6743 | } |
6744 | |
6745 | //----------------------------------------------------------------------------------- |
6746 | // genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. |
6747 | // Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. |
6748 | // |
6749 | // Arguments: |
6750 | // helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL |
6751 | // |
6752 | // Return Value: |
6753 | // None |
6754 | // |
6755 | // Notes: |
6756 | // The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and |
6757 | // ProfileTailcallNaked in VM\i386\asmhelpers.asm for details): |
6758 | // 1. The calling sequence for calling the helper is: |
6759 | // push FunctionIDOrClientID |
6760 | // call ProfileLeaveHelper or ProfileTailcallHelper |
6761 | // 2. The calling function has an EBP frame. |
6762 | // 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, |
6763 | // the following prolog is assumed: |
6764 | // push ESP |
6765 | // mov EBP, ESP |
6766 | // 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved. |
6767 | // helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved. |
6768 | // 5. The helper pops the FunctionIDOrClientID argument from the stack. |
6769 | // |
6770 | void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) |
6771 | { |
6772 | assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); |
6773 | |
6774 | // Only hook if profiler says it's okay. |
6775 | if (!compiler->compIsProfilerHookNeeded()) |
6776 | { |
6777 | return; |
6778 | } |
6779 | |
6780 | compiler->info.compProfilerCallback = true; |
6781 | |
6782 | // Need to save on to the stack level, since the helper call will pop the argument |
6783 | unsigned saveStackLvl2 = genStackLevel; |
6784 | |
6785 | #if defined(_TARGET_AMD64_) |
6786 | #if !defined(UNIX_AMD64_ABI) |
6787 | |
6788 | // Since the method needs to make a profiler callback, it should have out-going arg space allocated. |
6789 | noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); |
6790 | noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); |
6791 | |
6792 | // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash |
6793 | // registers that profiler callback kills. |
6794 | if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg()) |
6795 | { |
6796 | regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum); |
6797 | noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0); |
6798 | } |
6799 | |
6800 | // At this point return value is computed and stored in RAX or XMM0. |
6801 | // On Amd64, Leave callback preserves the return register. We keep |
6802 | // RAX alive by not reporting as trashed by helper call. Also note |
6803 | // that GC cannot kick-in while executing inside profiler callback, |
6804 | // which is a requirement of profiler as well since it needs to examine |
6805 | // return value which could be an obj ref. |
6806 | |
6807 | // RCX = ProfilerMethHnd |
6808 | if (compiler->compProfilerMethHndIndirected) |
6809 | { |
6810 | // Profiler hooks enabled during Ngen time. |
6811 | // Profiler handle needs to be accessed through an indirection of an address. |
6812 | getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6813 | } |
6814 | else |
6815 | { |
6816 | // Don't record relocations, if we are generating ELT hooks under the influence |
6817 | // of COMPlus_JitELTHookEnabled=1 |
6818 | if (compiler->opts.compJitELTHookEnabled) |
6819 | { |
6820 | genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); |
6821 | } |
6822 | else |
6823 | { |
6824 | instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6825 | } |
6826 | } |
6827 | |
6828 | // RDX = caller's SP |
6829 | // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion |
6830 | // of the stmnts to execute unconditionally and clean-up rest. |
6831 | if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) |
6832 | { |
6833 | // Caller's SP relative offset to FramePointer will be negative. We need to add absolute |
6834 | // value of that offset to FramePointer to obtain caller's SP value. |
6835 | int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); |
6836 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); |
6837 | } |
6838 | else |
6839 | { |
6840 | // If we are here means that it is a tentative frame layout during which we |
6841 | // cannot use caller's SP offset since it is an estimate. For now we require the |
6842 | // method to have at least a single arg so that we can use it to obtain caller's |
6843 | // SP. |
6844 | LclVarDsc* varDsc = compiler->lvaTable; |
6845 | NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params" ); |
6846 | |
6847 | // lea rdx, [FramePointer + Arg0's offset] |
6848 | getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); |
6849 | } |
6850 | |
6851 | // We can use any callee trash register (other than RAX, RCX, RDX) for call target. |
6852 | // We use R8 here. This will emit either |
6853 | // "call ip-relative 32-bit offset" or |
6854 | // "mov r8, helper addr; call r8" |
6855 | genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2); |
6856 | |
6857 | #else // !defined(UNIX_AMD64_ABI) |
6858 | |
6859 | // RDI = ProfilerMethHnd |
6860 | if (compiler->compProfilerMethHndIndirected) |
6861 | { |
6862 | getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6863 | } |
6864 | else |
6865 | { |
6866 | if (compiler->opts.compJitELTHookEnabled) |
6867 | { |
6868 | genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); |
6869 | } |
6870 | else |
6871 | { |
6872 | instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6873 | } |
6874 | } |
6875 | |
6876 | // RSI = caller's SP |
6877 | if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) |
6878 | { |
6879 | int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); |
6880 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); |
6881 | } |
6882 | else |
6883 | { |
6884 | LclVarDsc* varDsc = compiler->lvaTable; |
6885 | NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params" ); |
6886 | |
6887 | // lea rdx, [FramePointer + Arg0's offset] |
6888 | getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); |
6889 | } |
6890 | |
6891 | // We can use any callee trash register (other than RAX, RDI, RSI) for call target. |
6892 | // We use R11 here. This will emit either |
6893 | // "call ip-relative 32-bit offset" or |
6894 | // "mov r11, helper addr; call r11" |
6895 | genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); |
6896 | |
6897 | #endif // !defined(UNIX_AMD64_ABI) |
6898 | |
6899 | #elif defined(_TARGET_X86_) |
6900 | |
6901 | #if defined(UNIX_X86_ABI) |
6902 | // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() |
6903 | getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); |
6904 | AddStackLevel(0xC); |
6905 | AddNestedAlignment(0xC); |
6906 | #endif // UNIX_X86_ABI |
6907 | |
6908 | // |
6909 | // Push the profilerHandle |
6910 | // |
6911 | |
6912 | if (compiler->compProfilerMethHndIndirected) |
6913 | { |
6914 | getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); |
6915 | } |
6916 | else |
6917 | { |
6918 | inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); |
6919 | } |
6920 | genSinglePush(); |
6921 | |
6922 | #if defined(UNIX_X86_ABI) |
6923 | int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl) |
6924 | #else |
6925 | int argSize = REGSIZE_BYTES; |
6926 | #endif |
6927 | genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */); |
6928 | |
6929 | // Check that we have place for the push. |
6930 | assert(compiler->fgPtrArgCntMax >= 1); |
6931 | |
6932 | #if defined(UNIX_X86_ABI) |
6933 | // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall |
6934 | getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); |
6935 | SubtractStackLevel(0x10); |
6936 | SubtractNestedAlignment(0xC); |
6937 | #endif // UNIX_X86_ABI |
6938 | |
6939 | #elif defined(_TARGET_ARM_) |
6940 | // |
6941 | // Push the profilerHandle |
6942 | // |
6943 | |
6944 | // Contract between JIT and Profiler Leave callout on arm: |
6945 | // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value |
6946 | // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value. |
6947 | // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods. |
6948 | // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15. |
6949 | // |
6950 | // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave |
6951 | // callback. |
6952 | bool r0Trashed; |
6953 | emitAttr attr = EA_UNKNOWN; |
6954 | |
6955 | if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && |
6956 | (varTypeIsFloating(compiler->info.compRetType) || |
6957 | compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass)))) |
6958 | { |
6959 | r0Trashed = false; |
6960 | } |
6961 | else |
6962 | { |
6963 | // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing |
6964 | // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract. |
6965 | if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur) |
6966 | { |
6967 | attr = EA_GCREF; |
6968 | gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH); |
6969 | } |
6970 | else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur) |
6971 | { |
6972 | attr = EA_BYREF; |
6973 | gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH); |
6974 | } |
6975 | else |
6976 | { |
6977 | attr = EA_4BYTE; |
6978 | } |
6979 | |
6980 | getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0); |
6981 | regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH); |
6982 | gcInfo.gcMarkRegSetNpt(RBM_ARG_0); |
6983 | r0Trashed = true; |
6984 | } |
6985 | |
6986 | if (compiler->compProfilerMethHndIndirected) |
6987 | { |
6988 | getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6989 | regSet.verifyRegUsed(REG_ARG_0); |
6990 | } |
6991 | else |
6992 | { |
6993 | instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); |
6994 | } |
6995 | |
6996 | genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE, |
6997 | 0, // argSize |
6998 | EA_UNKNOWN); // retSize |
6999 | |
7000 | // Restore state that existed before profiler callback |
7001 | if (r0Trashed) |
7002 | { |
7003 | getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH); |
7004 | regSet.verifyRegUsed(REG_ARG_0); |
7005 | gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH); |
7006 | } |
7007 | |
7008 | #else // target |
7009 | NYI("Emit Profiler Leave callback" ); |
7010 | #endif // target |
7011 | |
7012 | /* Restore the stack level */ |
7013 | SetStackLevel(saveStackLvl2); |
7014 | } |
7015 | |
7016 | #endif // PROFILING_SUPPORTED |
7017 | |
7018 | /***************************************************************************** |
7019 | |
7020 | Esp frames : |
7021 | ---------- |
7022 | |
7023 | These instructions are just a reordering of the instructions used today. |
7024 | |
7025 | push ebp |
7026 | push esi |
7027 | push edi |
7028 | push ebx |
7029 | sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*) |
7030 | ... |
7031 | add esp, LOCALS_SIZE / pop dummyReg |
7032 | pop ebx |
7033 | pop edi |
7034 | pop esi |
7035 | pop ebp |
7036 | ret |
7037 | |
7038 | Ebp frames : |
7039 | ---------- |
7040 | |
7041 | The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp". |
7042 | Everything else is similar, though in a different order. |
7043 | |
7044 | The security object will no longer be at a fixed offset. However, the |
7045 | offset can still be determined by looking up the GC-info and determining |
7046 | how many callee-saved registers are pushed. |
7047 | |
7048 | push ebp |
7049 | mov ebp, esp |
7050 | push esi |
7051 | push edi |
7052 | push ebx |
7053 | sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*) |
7054 | ... |
7055 | add esp, LOCALS_SIZE / pop dummyReg |
7056 | pop ebx |
7057 | pop edi |
7058 | pop esi |
7059 | (mov esp, ebp if there are no callee-saved registers) |
7060 | pop ebp |
7061 | ret |
7062 | |
7063 | Double-aligned frame : |
7064 | -------------------- |
7065 | |
7066 | LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number |
7067 | of callee-saved registers are pushed on the stack so that the locals |
7068 | themselves are qword-aligned. The instructions are the same as today, |
7069 | just in a different order. |
7070 | |
7071 | push ebp |
7072 | mov ebp, esp |
7073 | and esp, 0xFFFFFFFC |
7074 | push esi |
7075 | push edi |
7076 | push ebx |
7077 | sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*) |
7078 | ... |
7079 | add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg |
7080 | pop ebx |
7081 | pop edi |
7082 | pop esi |
7083 | pop ebp |
7084 | mov esp, ebp |
7085 | pop ebp |
7086 | ret |
7087 | |
7088 | localloc (with ebp) frames : |
7089 | -------------------------- |
7090 | |
7091 | The instructions are the same as today, just in a different order. |
7092 | Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]" |
7093 | which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]". |
7094 | |
7095 | push ebp |
7096 | mov ebp, esp |
7097 | push esi |
7098 | push edi |
7099 | push ebx |
7100 | sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*) |
7101 | ... |
7102 | lea esp, [ebp-calleeSavedRegsPushedSize] |
7103 | pop ebx |
7104 | pop edi |
7105 | pop esi |
7106 | (mov esp, ebp if there are no callee-saved registers) |
7107 | pop ebp |
7108 | ret |
7109 | |
7110 | *****************************************************************************/ |
7111 | |
7112 | /***************************************************************************** |
7113 | * |
7114 | * Generates appropriate NOP padding for a function prolog to support ReJIT. |
7115 | */ |
7116 | |
7117 | void CodeGen::genPrologPadForReJit() |
7118 | { |
7119 | assert(compiler->compGeneratingProlog); |
7120 | |
7121 | #ifdef _TARGET_XARCH_ |
7122 | if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS)) |
7123 | { |
7124 | return; |
7125 | } |
7126 | |
7127 | #if FEATURE_EH_FUNCLETS |
7128 | |
7129 | // No need to generate pad (nops) for funclets. |
7130 | // When compiling the main function (and not a funclet) |
7131 | // the value of funCurrentFunc->funKind is equal to FUNC_ROOT. |
7132 | if (compiler->funCurrentFunc()->funKind != FUNC_ROOT) |
7133 | { |
7134 | return; |
7135 | } |
7136 | |
7137 | #endif // FEATURE_EH_FUNCLETS |
7138 | |
7139 | unsigned size = getEmitter()->emitGetPrologOffsetEstimate(); |
7140 | if (size < 5) |
7141 | { |
7142 | instNop(5 - size); |
7143 | } |
7144 | #endif |
7145 | } |
7146 | |
7147 | /***************************************************************************** |
7148 | * |
7149 | * Reserve space for a function prolog. |
7150 | */ |
7151 | |
7152 | void CodeGen::genReserveProlog(BasicBlock* block) |
7153 | { |
7154 | assert(block != nullptr); |
7155 | |
7156 | JITDUMP("Reserving prolog IG for block " FMT_BB "\n" , block->bbNum); |
7157 | |
7158 | /* Nothing is live on entry to the prolog */ |
7159 | |
7160 | getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false); |
7161 | } |
7162 | |
7163 | /***************************************************************************** |
7164 | * |
7165 | * Reserve space for a function epilog. |
7166 | */ |
7167 | |
7168 | void CodeGen::genReserveEpilog(BasicBlock* block) |
7169 | { |
7170 | regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur; |
7171 | regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur; |
7172 | |
7173 | /* The return value is special-cased: make sure it goes live for the epilog */ |
7174 | |
7175 | bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); |
7176 | |
7177 | if (genFullPtrRegMap && !jmpEpilog) |
7178 | { |
7179 | if (varTypeIsGC(compiler->info.compRetNativeType)) |
7180 | { |
7181 | noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL)); |
7182 | |
7183 | gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType); |
7184 | |
7185 | switch (compiler->info.compRetNativeType) |
7186 | { |
7187 | case TYP_REF: |
7188 | gcrefRegsArg |= RBM_INTRET; |
7189 | break; |
7190 | case TYP_BYREF: |
7191 | byrefRegsArg |= RBM_INTRET; |
7192 | break; |
7193 | default: |
7194 | break; |
7195 | } |
7196 | } |
7197 | } |
7198 | |
7199 | JITDUMP("Reserving epilog IG for block " FMT_BB "\n" , block->bbNum); |
7200 | |
7201 | assert(block != nullptr); |
7202 | const VARSET_TP& gcrefVarsArg(getEmitter()->emitThisGCrefVars); |
7203 | bool last = (block->bbNext == nullptr); |
7204 | getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last); |
7205 | } |
7206 | |
7207 | #if FEATURE_EH_FUNCLETS |
7208 | |
7209 | /***************************************************************************** |
7210 | * |
7211 | * Reserve space for a funclet prolog. |
7212 | */ |
7213 | |
7214 | void CodeGen::genReserveFuncletProlog(BasicBlock* block) |
7215 | { |
7216 | assert(block != nullptr); |
7217 | |
7218 | /* Currently, no registers are live on entry to the prolog, except maybe |
7219 | the exception object. There might be some live stack vars, but they |
7220 | cannot be accessed until after the frame pointer is re-established. |
7221 | In order to potentially prevent emitting a death before the prolog |
7222 | and a birth right after it, we just report it as live during the |
7223 | prolog, and rely on the prolog being non-interruptible. Trust |
7224 | genCodeForBBlist to correctly initialize all the sets. |
7225 | |
7226 | We might need to relax these asserts if the VM ever starts |
7227 | restoring any registers, then we could have live-in reg vars... |
7228 | */ |
7229 | |
7230 | noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur); |
7231 | noway_assert(gcInfo.gcRegByrefSetCur == 0); |
7232 | |
7233 | JITDUMP("Reserving funclet prolog IG for block " FMT_BB "\n" , block->bbNum); |
7234 | |
7235 | getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, |
7236 | gcInfo.gcRegByrefSetCur, false); |
7237 | } |
7238 | |
7239 | /***************************************************************************** |
7240 | * |
7241 | * Reserve space for a funclet epilog. |
7242 | */ |
7243 | |
7244 | void CodeGen::genReserveFuncletEpilog(BasicBlock* block) |
7245 | { |
7246 | assert(block != nullptr); |
7247 | |
7248 | JITDUMP("Reserving funclet epilog IG for block " FMT_BB "\n" , block->bbNum); |
7249 | |
7250 | bool last = (block->bbNext == nullptr); |
7251 | getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, |
7252 | gcInfo.gcRegByrefSetCur, last); |
7253 | } |
7254 | |
7255 | #endif // FEATURE_EH_FUNCLETS |
7256 | |
7257 | /***************************************************************************** |
7258 | * Finalize the frame size and offset assignments. |
7259 | * |
7260 | * No changes can be made to the modified register set after this, since that can affect how many |
7261 | * callee-saved registers get saved. |
7262 | */ |
7263 | void CodeGen::genFinalizeFrame() |
7264 | { |
7265 | JITDUMP("Finalizing stack frame\n" ); |
7266 | |
7267 | // Initializations need to happen based on the var locations at the start |
7268 | // of the first basic block, so load those up. In particular, the determination |
7269 | // of whether or not to use block init in the prolog is dependent on the variable |
7270 | // locations on entry to the function. |
7271 | compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB); |
7272 | |
7273 | genCheckUseBlockInit(); |
7274 | |
7275 | // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc. |
7276 | CLANG_FORMAT_COMMENT_ANCHOR; |
7277 | |
7278 | #if defined(_TARGET_X86_) |
7279 | |
7280 | if (compiler->compTailCallUsed) |
7281 | { |
7282 | // If we are generating a helper-based tailcall, we've set the tailcall helper "flags" |
7283 | // argument to "1", indicating to the tailcall helper that we've saved the callee-saved |
7284 | // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers |
7285 | // actually get saved. |
7286 | |
7287 | regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED); |
7288 | } |
7289 | #endif // _TARGET_X86_ |
7290 | |
7291 | #if defined(_TARGET_ARMARCH_) |
7292 | // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop |
7293 | // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details. |
7294 | if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize()) |
7295 | { |
7296 | regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK); |
7297 | } |
7298 | #endif // defined(_TARGET_ARMARCH_) |
7299 | |
7300 | #if defined(_TARGET_ARM_) |
7301 | // If there are any reserved registers, add them to the |
7302 | if (regSet.rsMaskResvd != RBM_NONE) |
7303 | { |
7304 | regSet.rsSetRegsModified(regSet.rsMaskResvd); |
7305 | } |
7306 | #endif // _TARGET_ARM_ |
7307 | |
7308 | #ifdef DEBUG |
7309 | if (verbose) |
7310 | { |
7311 | printf("Modified regs: " ); |
7312 | dspRegMask(regSet.rsGetModifiedRegsMask()); |
7313 | printf("\n" ); |
7314 | } |
7315 | #endif // DEBUG |
7316 | |
7317 | // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc. |
7318 | if (compiler->opts.compDbgEnC) |
7319 | { |
7320 | // We always save FP. |
7321 | noway_assert(isFramePointerUsed()); |
7322 | #ifdef _TARGET_AMD64_ |
7323 | // On x64 we always save exactly RBP, RSI and RDI for EnC. |
7324 | regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI); |
7325 | regSet.rsSetRegsModified(RBM_RSI | RBM_RDI); |
7326 | noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0); |
7327 | #else // !_TARGET_AMD64_ |
7328 | // On x86 we save all callee saved regs so the saved reg area size is consistent |
7329 | regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); |
7330 | #endif // !_TARGET_AMD64_ |
7331 | } |
7332 | |
7333 | /* If we have any pinvoke calls, we might potentially trash everything */ |
7334 | if (compiler->info.compCallUnmanaged) |
7335 | { |
7336 | noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame |
7337 | regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); |
7338 | } |
7339 | |
7340 | #ifdef UNIX_AMD64_ABI |
7341 | // On Unix x64 we also save R14 and R15 for ELT profiler hook generation. |
7342 | if (compiler->compIsProfilerHookNeeded()) |
7343 | { |
7344 | regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1); |
7345 | } |
7346 | #endif |
7347 | |
7348 | /* Count how many callee-saved registers will actually be saved (pushed) */ |
7349 | |
7350 | // EBP cannot be (directly) modified for EBP frame and double-aligned frames |
7351 | noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE)); |
7352 | |
7353 | #if ETW_EBP_FRAMED |
7354 | // EBP cannot be (directly) modified |
7355 | noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); |
7356 | #endif |
7357 | |
7358 | regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; |
7359 | |
7360 | #ifdef _TARGET_ARMARCH_ |
7361 | if (isFramePointerUsed()) |
7362 | { |
7363 | // For a FP based frame we have to push/pop the FP register |
7364 | // |
7365 | maskCalleeRegsPushed |= RBM_FPBASE; |
7366 | |
7367 | // This assert check that we are not using REG_FP |
7368 | // as both the frame pointer and as a codegen register |
7369 | // |
7370 | assert(!regSet.rsRegsModified(RBM_FPBASE)); |
7371 | } |
7372 | |
7373 | // we always push LR. See genPushCalleeSavedRegisters |
7374 | // |
7375 | maskCalleeRegsPushed |= RBM_LR; |
7376 | |
7377 | #if defined(_TARGET_ARM_) |
7378 | // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64? |
7379 | regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT; |
7380 | regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat; |
7381 | |
7382 | if ((maskPushRegsFloat != RBM_NONE) || |
7383 | (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD))) |
7384 | { |
7385 | // Here we try to keep stack double-aligned before the vpush |
7386 | if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0) |
7387 | { |
7388 | regNumber extraPushedReg = REG_R4; |
7389 | while (maskPushRegsInt & genRegMask(extraPushedReg)) |
7390 | { |
7391 | extraPushedReg = REG_NEXT(extraPushedReg); |
7392 | } |
7393 | if (extraPushedReg < REG_R11) |
7394 | { |
7395 | maskPushRegsInt |= genRegMask(extraPushedReg); |
7396 | regSet.rsSetRegsModified(genRegMask(extraPushedReg)); |
7397 | } |
7398 | } |
7399 | maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat; |
7400 | } |
7401 | |
7402 | // We currently only expect to push/pop consecutive FP registers |
7403 | // and these have to be double-sized registers as well. |
7404 | // Here we will insure that maskPushRegsFloat obeys these requirements. |
7405 | // |
7406 | if (maskPushRegsFloat != RBM_NONE) |
7407 | { |
7408 | regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE); |
7409 | while (maskPushRegsFloat > contiguousMask) |
7410 | { |
7411 | contiguousMask <<= 2; |
7412 | contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE); |
7413 | } |
7414 | if (maskPushRegsFloat != contiguousMask) |
7415 | { |
7416 | regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat; |
7417 | maskPushRegsFloat |= maskExtraRegs; |
7418 | regSet.rsSetRegsModified(maskExtraRegs); |
7419 | maskCalleeRegsPushed |= maskExtraRegs; |
7420 | } |
7421 | } |
7422 | #endif // _TARGET_ARM_ |
7423 | #endif // _TARGET_ARMARCH_ |
7424 | |
7425 | #if defined(_TARGET_XARCH_) |
7426 | // Compute the count of callee saved float regs saved on stack. |
7427 | // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15) |
7428 | // regs are stack allocated and preserved in their stack locations. |
7429 | compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED; |
7430 | maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED; |
7431 | #endif // defined(_TARGET_XARCH_) |
7432 | |
7433 | compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); |
7434 | |
7435 | #ifdef DEBUG |
7436 | if (verbose) |
7437 | { |
7438 | printf("Callee-saved registers pushed: %d " , compiler->compCalleeRegsPushed); |
7439 | dspRegMask(maskCalleeRegsPushed); |
7440 | printf("\n" ); |
7441 | } |
7442 | #endif // DEBUG |
7443 | |
7444 | /* Assign the final offsets to things living on the stack frame */ |
7445 | |
7446 | compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT); |
7447 | |
7448 | /* We want to make sure that the prolog size calculated here is accurate |
7449 | (that is instructions will not shrink because of conservative stack |
7450 | frame approximations). We do this by filling in the correct size |
7451 | here (where we have committed to the final numbers for the frame offsets) |
7452 | This will ensure that the prolog size is always correct |
7453 | */ |
7454 | getEmitter()->emitMaxTmpSize = regSet.tmpGetTotalSize(); |
7455 | |
7456 | #ifdef DEBUG |
7457 | if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose) |
7458 | { |
7459 | compiler->lvaTableDump(); |
7460 | } |
7461 | #endif |
7462 | } |
7463 | |
7464 | //------------------------------------------------------------------------ |
7465 | // genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer. |
7466 | // |
7467 | // Arguments: |
7468 | // delta - the offset to add to the current stack pointer to establish the frame pointer |
7469 | // reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data. |
7470 | |
7471 | void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) |
7472 | { |
7473 | assert(compiler->compGeneratingProlog); |
7474 | |
7475 | #if defined(_TARGET_XARCH_) |
7476 | |
7477 | if (delta == 0) |
7478 | { |
7479 | getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE); |
7480 | psiMoveESPtoEBP(); |
7481 | } |
7482 | else |
7483 | { |
7484 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); |
7485 | // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code |
7486 | // anyway. |
7487 | } |
7488 | |
7489 | if (reportUnwindData) |
7490 | { |
7491 | compiler->unwindSetFrameReg(REG_FPBASE, delta); |
7492 | } |
7493 | |
7494 | #elif defined(_TARGET_ARM_) |
7495 | |
7496 | assert(arm_Valid_Imm_For_Add_SP(delta)); |
7497 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta); |
7498 | |
7499 | if (reportUnwindData) |
7500 | { |
7501 | compiler->unwindPadding(); |
7502 | } |
7503 | |
7504 | #else |
7505 | NYI("establish frame pointer" ); |
7506 | #endif |
7507 | } |
7508 | |
7509 | /***************************************************************************** |
7510 | * |
7511 | * Generates code for a function prolog. |
7512 | * |
7513 | * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER: |
7514 | * |
7515 | * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not |
7516 | * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and |
7517 | * only instructions which result in control not going to the next instruction. Basically, any time execution would |
7518 | * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to |
7519 | * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it |
7520 | * can ensure stepping isn't broken. This is also a requirement for x86 and amd64. |
7521 | * |
7522 | * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the |
7523 | * debugger team to ensure that stepping still works. |
7524 | * |
7525 | * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp. |
7526 | */ |
7527 | |
7528 | #ifdef _PREFAST_ |
7529 | #pragma warning(push) |
7530 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
7531 | #endif |
7532 | void CodeGen::genFnProlog() |
7533 | { |
7534 | ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true); |
7535 | |
7536 | compiler->funSetCurrentFunc(0); |
7537 | |
7538 | #ifdef DEBUG |
7539 | if (verbose) |
7540 | { |
7541 | printf("*************** In genFnProlog()\n" ); |
7542 | } |
7543 | #endif |
7544 | |
7545 | #ifdef DEBUG |
7546 | genInterruptibleUsed = true; |
7547 | #endif |
7548 | |
7549 | assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); |
7550 | |
7551 | /* Ready to start on the prolog proper */ |
7552 | |
7553 | getEmitter()->emitBegProlog(); |
7554 | compiler->unwindBegProlog(); |
7555 | |
7556 | // Do this so we can put the prolog instruction group ahead of |
7557 | // other instruction groups |
7558 | genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG); |
7559 | |
7560 | #ifdef DEBUG |
7561 | if (compiler->opts.dspCode) |
7562 | { |
7563 | printf("\n__prolog:\n" ); |
7564 | } |
7565 | #endif |
7566 | |
7567 | if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) |
7568 | { |
7569 | // Create new scopes for the method-parameters for the prolog-block. |
7570 | psiBegProlog(); |
7571 | } |
7572 | |
7573 | #ifdef DEBUG |
7574 | |
7575 | if (compiler->compJitHaltMethod()) |
7576 | { |
7577 | /* put a nop first because the debugger and other tools are likely to |
7578 | put an int3 at the begining and we don't want to confuse them */ |
7579 | |
7580 | instGen(INS_nop); |
7581 | instGen(INS_BREAKPOINT); |
7582 | |
7583 | #ifdef _TARGET_ARMARCH_ |
7584 | // Avoid asserts in the unwind info because these instructions aren't accounted for. |
7585 | compiler->unwindPadding(); |
7586 | #endif // _TARGET_ARMARCH_ |
7587 | } |
7588 | #endif // DEBUG |
7589 | |
7590 | #if FEATURE_EH_FUNCLETS && defined(DEBUG) |
7591 | |
7592 | // We cannot force 0-initialization of the PSPSym |
7593 | // as it will overwrite the real value |
7594 | if (compiler->lvaPSPSym != BAD_VAR_NUM) |
7595 | { |
7596 | LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym]; |
7597 | assert(!varDsc->lvMustInit); |
7598 | } |
7599 | |
7600 | #endif // FEATURE_EH_FUNCLETS && DEBUG |
7601 | |
7602 | /*------------------------------------------------------------------------- |
7603 | * |
7604 | * Record the stack frame ranges that will cover all of the tracked |
7605 | * and untracked pointer variables. |
7606 | * Also find which registers will need to be zero-initialized. |
7607 | * |
7608 | * 'initRegs': - Generally, enregistered variables should not need to be |
7609 | * zero-inited. They only need to be zero-inited when they |
7610 | * have a possibly uninitialized read on some control |
7611 | * flow path. Apparently some of the IL_STUBs that we |
7612 | * generate have this property. |
7613 | */ |
7614 | |
7615 | int untrLclLo = +INT_MAX; |
7616 | int untrLclHi = -INT_MAX; |
7617 | // 'hasUntrLcl' is true if there are any stack locals which must be init'ed. |
7618 | // Note that they may be tracked, but simply not allocated to a register. |
7619 | bool hasUntrLcl = false; |
7620 | |
7621 | int GCrefLo = +INT_MAX; |
7622 | int GCrefHi = -INT_MAX; |
7623 | bool hasGCRef = false; |
7624 | |
7625 | regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed. |
7626 | regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed. |
7627 | regMaskTP initDblRegs = RBM_NONE; |
7628 | |
7629 | unsigned varNum; |
7630 | LclVarDsc* varDsc; |
7631 | |
7632 | for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) |
7633 | { |
7634 | if (varDsc->lvIsParam && !varDsc->lvIsRegArg) |
7635 | { |
7636 | continue; |
7637 | } |
7638 | |
7639 | if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame) |
7640 | { |
7641 | noway_assert(varDsc->lvRefCnt() == 0); |
7642 | continue; |
7643 | } |
7644 | |
7645 | signed int loOffs = varDsc->lvStkOffs; |
7646 | signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum); |
7647 | |
7648 | /* We need to know the offset range of tracked stack GC refs */ |
7649 | /* We assume that the GC reference can be anywhere in the TYP_STRUCT */ |
7650 | |
7651 | if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame) |
7652 | { |
7653 | // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been |
7654 | // taken care of by the parent struct. |
7655 | if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) |
7656 | { |
7657 | hasGCRef = true; |
7658 | |
7659 | if (loOffs < GCrefLo) |
7660 | { |
7661 | GCrefLo = loOffs; |
7662 | } |
7663 | if (hiOffs > GCrefHi) |
7664 | { |
7665 | GCrefHi = hiOffs; |
7666 | } |
7667 | } |
7668 | } |
7669 | |
7670 | /* For lvMustInit vars, gather pertinent info */ |
7671 | |
7672 | if (!varDsc->lvMustInit) |
7673 | { |
7674 | continue; |
7675 | } |
7676 | |
7677 | if (varDsc->lvIsInReg()) |
7678 | { |
7679 | regMaskTP regMask = genRegMask(varDsc->lvRegNum); |
7680 | if (!varDsc->IsFloatRegType()) |
7681 | { |
7682 | initRegs |= regMask; |
7683 | |
7684 | if (varTypeIsMultiReg(varDsc)) |
7685 | { |
7686 | if (varDsc->lvOtherReg != REG_STK) |
7687 | { |
7688 | initRegs |= genRegMask(varDsc->lvOtherReg); |
7689 | } |
7690 | else |
7691 | { |
7692 | /* Upper DWORD is on the stack, and needs to be inited */ |
7693 | |
7694 | loOffs += sizeof(int); |
7695 | goto INIT_STK; |
7696 | } |
7697 | } |
7698 | } |
7699 | else if (varDsc->TypeGet() == TYP_DOUBLE) |
7700 | { |
7701 | initDblRegs |= regMask; |
7702 | } |
7703 | else |
7704 | { |
7705 | initFltRegs |= regMask; |
7706 | } |
7707 | } |
7708 | else |
7709 | { |
7710 | INIT_STK: |
7711 | |
7712 | hasUntrLcl = true; |
7713 | |
7714 | if (loOffs < untrLclLo) |
7715 | { |
7716 | untrLclLo = loOffs; |
7717 | } |
7718 | if (hiOffs > untrLclHi) |
7719 | { |
7720 | untrLclHi = hiOffs; |
7721 | } |
7722 | } |
7723 | } |
7724 | |
7725 | /* Don't forget about spill temps that hold pointers */ |
7726 | |
7727 | if (!TRACK_GC_TEMP_LIFETIMES) |
7728 | { |
7729 | assert(regSet.tmpAllFree()); |
7730 | for (TempDsc* tempThis = regSet.tmpListBeg(); tempThis != nullptr; tempThis = regSet.tmpListNxt(tempThis)) |
7731 | { |
7732 | if (!varTypeIsGC(tempThis->tdTempType())) |
7733 | { |
7734 | continue; |
7735 | } |
7736 | |
7737 | signed int loOffs = tempThis->tdTempOffs(); |
7738 | signed int hiOffs = loOffs + TARGET_POINTER_SIZE; |
7739 | |
7740 | // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the |
7741 | // previous frame pointer. Thus, stkOffs can't be zero. |
7742 | CLANG_FORMAT_COMMENT_ANCHOR; |
7743 | |
7744 | #if !defined(_TARGET_AMD64_) |
7745 | // However, on amd64 there is no requirement to chain frame pointers. |
7746 | |
7747 | noway_assert(!isFramePointerUsed() || loOffs != 0); |
7748 | #endif // !defined(_TARGET_AMD64_) |
7749 | |
7750 | // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs); |
7751 | |
7752 | hasUntrLcl = true; |
7753 | |
7754 | if (loOffs < untrLclLo) |
7755 | { |
7756 | untrLclLo = loOffs; |
7757 | } |
7758 | if (hiOffs > untrLclHi) |
7759 | { |
7760 | untrLclHi = hiOffs; |
7761 | } |
7762 | } |
7763 | } |
7764 | |
7765 | assert((genInitStkLclCnt > 0) == hasUntrLcl); |
7766 | |
7767 | #ifdef DEBUG |
7768 | if (verbose) |
7769 | { |
7770 | if (genInitStkLclCnt > 0) |
7771 | { |
7772 | printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n" , genInitStkLclCnt, -untrLclLo, |
7773 | -untrLclHi); |
7774 | } |
7775 | } |
7776 | #endif |
7777 | |
7778 | #ifdef _TARGET_ARM_ |
7779 | // On the ARM we will spill any incoming struct args in the first instruction in the prolog |
7780 | // Ditto for all enregistered user arguments in a varargs method. |
7781 | // These registers will be available to use for the initReg. We just remove |
7782 | // all of these registers from the rsCalleeRegArgMaskLiveIn. |
7783 | // |
7784 | intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false); |
7785 | #endif |
7786 | |
7787 | /* Choose the register to use for zero initialization */ |
7788 | |
7789 | regNumber initReg = REG_SCRATCH; // Unless we find a better register below |
7790 | bool initRegZeroed = false; |
7791 | regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn; |
7792 | regMaskTP tempMask; |
7793 | |
7794 | // We should not use the special PINVOKE registers as the initReg |
7795 | // since they are trashed by the jithelper call to setup the PINVOKE frame |
7796 | if (compiler->info.compCallUnmanaged) |
7797 | { |
7798 | excludeMask |= RBM_PINVOKE_FRAME; |
7799 | |
7800 | assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM)); |
7801 | if (!compiler->opts.ShouldUsePInvokeHelpers()) |
7802 | { |
7803 | noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount); |
7804 | |
7805 | excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH); |
7806 | |
7807 | // We also must exclude the register used by compLvFrameListRoot when it is enregistered |
7808 | // |
7809 | LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; |
7810 | if (varDsc->lvRegister) |
7811 | { |
7812 | excludeMask |= genRegMask(varDsc->lvRegNum); |
7813 | } |
7814 | } |
7815 | } |
7816 | |
7817 | #ifdef _TARGET_ARM_ |
7818 | // If we have a variable sized frame (compLocallocUsed is true) |
7819 | // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed |
7820 | if (compiler->compLocallocUsed) |
7821 | { |
7822 | excludeMask |= RBM_SAVED_LOCALLOC_SP; |
7823 | } |
7824 | #endif // _TARGET_ARM_ |
7825 | |
7826 | #if defined(_TARGET_XARCH_) |
7827 | if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize()) |
7828 | { |
7829 | // We currently must use REG_EAX on x86 here |
7830 | // because the loop's backwards branch depends upon the size of EAX encodings |
7831 | assert(initReg == REG_EAX); |
7832 | } |
7833 | else |
7834 | #endif // _TARGET_XARCH_ |
7835 | { |
7836 | tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd; |
7837 | |
7838 | if (tempMask != RBM_NONE) |
7839 | { |
7840 | // We will use one of the registers that we were planning to zero init anyway. |
7841 | // We pick the lowest register number. |
7842 | tempMask = genFindLowestBit(tempMask); |
7843 | initReg = genRegNumFromMask(tempMask); |
7844 | } |
7845 | // Next we prefer to use one of the unused argument registers. |
7846 | // If they aren't available we use one of the caller-saved integer registers. |
7847 | else |
7848 | { |
7849 | tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd; |
7850 | if (tempMask != RBM_NONE) |
7851 | { |
7852 | // We pick the lowest register number |
7853 | tempMask = genFindLowestBit(tempMask); |
7854 | initReg = genRegNumFromMask(tempMask); |
7855 | } |
7856 | } |
7857 | } |
7858 | |
7859 | noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME)); |
7860 | |
7861 | #if defined(_TARGET_AMD64_) |
7862 | // If we are a varargs call, in order to set up the arguments correctly this |
7863 | // must be done in a 2 step process. As per the x64 ABI: |
7864 | // a) The caller sets up the argument shadow space (just before the return |
7865 | // address, 4 pointer sized slots). |
7866 | // b) The callee is responsible to home the arguments on the shadow space |
7867 | // provided by the caller. |
7868 | // This way, the varargs iterator will be able to retrieve the |
7869 | // call arguments properly since both the arg regs and the stack allocated |
7870 | // args will be contiguous. |
7871 | if (compiler->info.compIsVarArgs) |
7872 | { |
7873 | getEmitter()->spillIntArgRegsToShadowSlots(); |
7874 | } |
7875 | |
7876 | #endif // _TARGET_AMD64_ |
7877 | |
7878 | #ifdef _TARGET_ARM_ |
7879 | /*------------------------------------------------------------------------- |
7880 | * |
7881 | * Now start emitting the part of the prolog which sets up the frame |
7882 | */ |
7883 | |
7884 | if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE) |
7885 | { |
7886 | inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true)); |
7887 | compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true)); |
7888 | } |
7889 | #endif // _TARGET_ARM_ |
7890 | |
7891 | #ifdef _TARGET_XARCH_ |
7892 | if (doubleAlignOrFramePointerUsed()) |
7893 | { |
7894 | inst_RV(INS_push, REG_FPBASE, TYP_REF); |
7895 | compiler->unwindPush(REG_FPBASE); |
7896 | psiAdjustStackLevel(REGSIZE_BYTES); |
7897 | |
7898 | #ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp" |
7899 | genEstablishFramePointer(0, /*reportUnwindData*/ true); |
7900 | #endif // !_TARGET_AMD64_ |
7901 | |
7902 | #if DOUBLE_ALIGN |
7903 | if (compiler->genDoubleAlign()) |
7904 | { |
7905 | noway_assert(isFramePointerUsed() == false); |
7906 | noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */ |
7907 | |
7908 | inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE); |
7909 | } |
7910 | #endif // DOUBLE_ALIGN |
7911 | } |
7912 | #endif // _TARGET_XARCH_ |
7913 | |
7914 | #ifdef _TARGET_ARM64_ |
7915 | // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame. |
7916 | genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn); |
7917 | genPushCalleeSavedRegisters(initReg, &initRegZeroed); |
7918 | #else // !_TARGET_ARM64_ |
7919 | genPushCalleeSavedRegisters(); |
7920 | #endif // !_TARGET_ARM64_ |
7921 | |
7922 | #ifdef _TARGET_ARM_ |
7923 | bool needToEstablishFP = false; |
7924 | int afterLclFrameSPtoFPdelta = 0; |
7925 | if (doubleAlignOrFramePointerUsed()) |
7926 | { |
7927 | needToEstablishFP = true; |
7928 | |
7929 | // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog. |
7930 | // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is |
7931 | // too big, we go ahead and do it here. |
7932 | |
7933 | int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES; |
7934 | afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize; |
7935 | if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta)) |
7936 | { |
7937 | // Oh well, it looks too big. Go ahead and establish the frame pointer here. |
7938 | genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true); |
7939 | needToEstablishFP = false; |
7940 | } |
7941 | } |
7942 | #endif // _TARGET_ARM_ |
7943 | |
7944 | //------------------------------------------------------------------------- |
7945 | // |
7946 | // Subtract the local frame size from SP. |
7947 | // |
7948 | //------------------------------------------------------------------------- |
7949 | CLANG_FORMAT_COMMENT_ANCHOR; |
7950 | |
7951 | #ifndef _TARGET_ARM64_ |
7952 | regMaskTP maskStackAlloc = RBM_NONE; |
7953 | |
7954 | #ifdef _TARGET_ARM_ |
7955 | maskStackAlloc = |
7956 | genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED); |
7957 | #endif // _TARGET_ARM_ |
7958 | |
7959 | if (maskStackAlloc == RBM_NONE) |
7960 | { |
7961 | genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn); |
7962 | } |
7963 | #endif // !_TARGET_ARM64_ |
7964 | |
7965 | //------------------------------------------------------------------------- |
7966 | |
7967 | #ifdef _TARGET_ARM_ |
7968 | if (compiler->compLocallocUsed) |
7969 | { |
7970 | getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE); |
7971 | regSet.verifyRegUsed(REG_SAVED_LOCALLOC_SP); |
7972 | compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0); |
7973 | } |
7974 | #endif // _TARGET_ARMARCH_ |
7975 | |
7976 | #if defined(_TARGET_XARCH_) |
7977 | // Preserve callee saved float regs to stack. |
7978 | genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize); |
7979 | #endif // defined(_TARGET_XARCH_) |
7980 | |
7981 | #ifdef _TARGET_AMD64_ |
7982 | // Establish the AMD64 frame pointer after the OS-reported prolog. |
7983 | if (doubleAlignOrFramePointerUsed()) |
7984 | { |
7985 | bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC; |
7986 | genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData); |
7987 | } |
7988 | #endif //_TARGET_AMD64_ |
7989 | |
7990 | //------------------------------------------------------------------------- |
7991 | // |
7992 | // This is the end of the OS-reported prolog for purposes of unwinding |
7993 | // |
7994 | //------------------------------------------------------------------------- |
7995 | |
7996 | #ifdef _TARGET_ARM_ |
7997 | if (needToEstablishFP) |
7998 | { |
7999 | genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false); |
8000 | needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit |
8001 | } |
8002 | #endif // _TARGET_ARM_ |
8003 | |
8004 | if (compiler->info.compPublishStubParam) |
8005 | { |
8006 | #if CPU_LOAD_STORE_ARCH |
8007 | getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(), |
8008 | compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs); |
8009 | #else |
8010 | // mov [lvaStubArgumentVar], EAX |
8011 | getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(), |
8012 | compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs); |
8013 | #endif |
8014 | assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM); |
8015 | |
8016 | // It's no longer live; clear it out so it can be used after this in the prolog |
8017 | intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM; |
8018 | } |
8019 | |
8020 | #if STACK_PROBES |
8021 | // We could probably fold this into the loop for the FrameSize >= 0x3000 probing |
8022 | // when creating the stack frame. Don't think it's worth it, though. |
8023 | if (genNeedPrologStackProbe) |
8024 | { |
8025 | // |
8026 | // Can't have a call until we have enough padding for rejit |
8027 | // |
8028 | genPrologPadForReJit(); |
8029 | noway_assert(compiler->opts.compNeedStackProbes); |
8030 | genGenerateStackProbe(); |
8031 | compiler->compStackProbePrologDone = true; |
8032 | } |
8033 | #endif // STACK_PROBES |
8034 | |
8035 | // |
8036 | // Zero out the frame as needed |
8037 | // |
8038 | |
8039 | genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed); |
8040 | |
8041 | #if FEATURE_EH_FUNCLETS |
8042 | |
8043 | genSetPSPSym(initReg, &initRegZeroed); |
8044 | |
8045 | #else // !FEATURE_EH_FUNCLETS |
8046 | |
8047 | // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots |
8048 | if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem) |
8049 | { |
8050 | // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) |
8051 | unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE; |
8052 | |
8053 | // Zero out the slot for nesting level 0 |
8054 | unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE; |
8055 | |
8056 | if (!initRegZeroed) |
8057 | { |
8058 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); |
8059 | initRegZeroed = true; |
8060 | } |
8061 | |
8062 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar, |
8063 | firstSlotOffs); |
8064 | } |
8065 | |
8066 | #endif // !FEATURE_EH_FUNCLETS |
8067 | |
8068 | genReportGenericContextArg(initReg, &initRegZeroed); |
8069 | |
8070 | // The local variable representing the security object must be on the stack frame |
8071 | // and must be 0 initialized. |
8072 | noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) || |
8073 | (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame && |
8074 | compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit)); |
8075 | |
8076 | #ifdef JIT32_GCENCODER |
8077 | // Initialize the LocalAllocSP slot if there is localloc in the function. |
8078 | if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) |
8079 | { |
8080 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); |
8081 | } |
8082 | #endif // JIT32_GCENCODER |
8083 | |
8084 | // Set up the GS security cookie |
8085 | |
8086 | genSetGSSecurityCookie(initReg, &initRegZeroed); |
8087 | |
8088 | #ifdef PROFILING_SUPPORTED |
8089 | |
8090 | // Insert a function entry callback for profiling, if requested. |
8091 | genProfilingEnterCallback(initReg, &initRegZeroed); |
8092 | |
8093 | #endif // PROFILING_SUPPORTED |
8094 | |
8095 | if (!genInterruptible) |
8096 | { |
8097 | /*------------------------------------------------------------------------- |
8098 | * |
8099 | * The 'real' prolog ends here for non-interruptible methods. |
8100 | * For fully-interruptible methods, we extend the prolog so that |
8101 | * we do not need to track GC inforation while shuffling the |
8102 | * arguments. |
8103 | * |
8104 | * Make sure there's enough padding for ReJIT. |
8105 | * |
8106 | */ |
8107 | genPrologPadForReJit(); |
8108 | getEmitter()->emitMarkPrologEnd(); |
8109 | } |
8110 | |
8111 | #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) |
8112 | // The unused bits of Vector3 arguments must be cleared |
8113 | // since native compiler doesn't initize the upper bits to zeros. |
8114 | // |
8115 | // TODO-Cleanup: This logic can be implemented in |
8116 | // genFnPrologCalleeRegArgs() for argument registers and |
8117 | // genEnregisterIncomingStackArgs() for stack arguments. |
8118 | genClearStackVec3ArgUpperBits(); |
8119 | #endif // UNIX_AMD64_ABI && FEATURE_SIMD |
8120 | |
8121 | /*----------------------------------------------------------------------------- |
8122 | * Take care of register arguments first |
8123 | */ |
8124 | |
8125 | RegState* regState; |
8126 | |
8127 | // Update the arg initial register locations. |
8128 | compiler->lvaUpdateArgsWithInitialReg(); |
8129 | |
8130 | FOREACH_REGISTER_FILE(regState) |
8131 | { |
8132 | if (regState->rsCalleeRegArgMaskLiveIn) |
8133 | { |
8134 | // If we need an extra register to shuffle around the incoming registers |
8135 | // we will use xtraReg (initReg) and set the xtraRegClobbered flag, |
8136 | // if we don't need to use the xtraReg then this flag will stay false |
8137 | // |
8138 | regNumber xtraReg; |
8139 | bool xtraRegClobbered = false; |
8140 | |
8141 | if (genRegMask(initReg) & RBM_ARG_REGS) |
8142 | { |
8143 | xtraReg = initReg; |
8144 | } |
8145 | else |
8146 | { |
8147 | xtraReg = REG_SCRATCH; |
8148 | initRegZeroed = false; |
8149 | } |
8150 | |
8151 | genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState); |
8152 | |
8153 | if (xtraRegClobbered) |
8154 | { |
8155 | initRegZeroed = false; |
8156 | } |
8157 | } |
8158 | } |
8159 | |
8160 | // Home the incoming arguments |
8161 | genEnregisterIncomingStackArgs(); |
8162 | |
8163 | /* Initialize any must-init registers variables now */ |
8164 | |
8165 | if (initRegs) |
8166 | { |
8167 | regMaskTP regMask = 0x1; |
8168 | |
8169 | for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1) |
8170 | { |
8171 | if (regMask & initRegs) |
8172 | { |
8173 | // Check if we have already zeroed this register |
8174 | if ((reg == initReg) && initRegZeroed) |
8175 | { |
8176 | continue; |
8177 | } |
8178 | else |
8179 | { |
8180 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg); |
8181 | if (reg == initReg) |
8182 | { |
8183 | initRegZeroed = true; |
8184 | } |
8185 | } |
8186 | } |
8187 | } |
8188 | } |
8189 | |
8190 | if (initFltRegs | initDblRegs) |
8191 | { |
8192 | // If initReg is not in initRegs then we will use REG_SCRATCH |
8193 | if ((genRegMask(initReg) & initRegs) == 0) |
8194 | { |
8195 | initReg = REG_SCRATCH; |
8196 | initRegZeroed = false; |
8197 | } |
8198 | |
8199 | #ifdef _TARGET_ARM_ |
8200 | // This is needed only for Arm since it can use a zero initialized int register |
8201 | // to initialize vfp registers. |
8202 | if (!initRegZeroed) |
8203 | { |
8204 | instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); |
8205 | initRegZeroed = true; |
8206 | } |
8207 | #endif // _TARGET_ARM_ |
8208 | |
8209 | genZeroInitFltRegs(initFltRegs, initDblRegs, initReg); |
8210 | } |
8211 | |
8212 | //----------------------------------------------------------------------------- |
8213 | |
8214 | // |
8215 | // Increase the prolog size here only if fully interruptible. |
8216 | // And again make sure it's big enough for ReJIT |
8217 | // |
8218 | |
8219 | if (genInterruptible) |
8220 | { |
8221 | genPrologPadForReJit(); |
8222 | getEmitter()->emitMarkPrologEnd(); |
8223 | } |
8224 | |
8225 | if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) |
8226 | { |
8227 | psiEndProlog(); |
8228 | } |
8229 | |
8230 | if (hasGCRef) |
8231 | { |
8232 | getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi); |
8233 | } |
8234 | else |
8235 | { |
8236 | noway_assert(GCrefLo == +INT_MAX); |
8237 | noway_assert(GCrefHi == -INT_MAX); |
8238 | } |
8239 | |
8240 | #ifdef DEBUG |
8241 | if (compiler->opts.dspCode) |
8242 | { |
8243 | printf("\n" ); |
8244 | } |
8245 | #endif |
8246 | |
8247 | #ifdef _TARGET_X86_ |
8248 | // On non-x86 the VARARG cookie does not need any special treatment. |
8249 | |
8250 | // Load up the VARARG argument pointer register so it doesn't get clobbered. |
8251 | // only do this if we actually access any statically declared args |
8252 | // (our argument pointer register has a refcount > 0). |
8253 | unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs; |
8254 | |
8255 | if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt() > 0) |
8256 | { |
8257 | varDsc = &compiler->lvaTable[argsStartVar]; |
8258 | |
8259 | noway_assert(compiler->info.compArgsCount > 0); |
8260 | |
8261 | // MOV EAX, <VARARGS HANDLE> |
8262 | getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0); |
8263 | regSet.verifyRegUsed(REG_EAX); |
8264 | |
8265 | // MOV EAX, [EAX] |
8266 | getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0); |
8267 | |
8268 | // EDX might actually be holding something here. So make sure to only use EAX for this code |
8269 | // sequence. |
8270 | |
8271 | LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1]; |
8272 | noway_assert(!lastArg->lvRegister); |
8273 | signed offset = lastArg->lvStkOffs; |
8274 | assert(offset != BAD_STK_OFFS); |
8275 | noway_assert(lastArg->lvFramePointerBased); |
8276 | |
8277 | // LEA EAX, &<VARARGS HANDLE> + EAX |
8278 | getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset); |
8279 | |
8280 | if (varDsc->lvIsInReg()) |
8281 | { |
8282 | if (varDsc->lvRegNum != REG_EAX) |
8283 | { |
8284 | getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX); |
8285 | regSet.verifyRegUsed(varDsc->lvRegNum); |
8286 | } |
8287 | } |
8288 | else |
8289 | { |
8290 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0); |
8291 | } |
8292 | } |
8293 | |
8294 | #endif // _TARGET_X86_ |
8295 | |
8296 | #if defined(DEBUG) && defined(_TARGET_XARCH_) |
8297 | if (compiler->opts.compStackCheckOnRet) |
8298 | { |
8299 | noway_assert(compiler->lvaReturnSpCheck != 0xCCCCCCCC && |
8300 | compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister && |
8301 | compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame); |
8302 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0); |
8303 | } |
8304 | #endif // defined(DEBUG) && defined(_TARGET_XARCH_) |
8305 | |
8306 | getEmitter()->emitEndProlog(); |
8307 | compiler->unwindEndProlog(); |
8308 | |
8309 | noway_assert(getEmitter()->emitMaxTmpSize == regSet.tmpGetTotalSize()); |
8310 | } |
8311 | #ifdef _PREFAST_ |
8312 | #pragma warning(pop) |
8313 | #endif |
8314 | |
8315 | /***************************************************************************** |
8316 | * |
8317 | * Generates code for a function epilog. |
8318 | * |
8319 | * Please consult the "debugger team notification" comment in genFnProlog(). |
8320 | */ |
8321 | |
8322 | #if defined(_TARGET_ARMARCH_) |
8323 | |
8324 | void CodeGen::genFnEpilog(BasicBlock* block) |
8325 | { |
8326 | #ifdef DEBUG |
8327 | if (verbose) |
8328 | printf("*************** In genFnEpilog()\n" ); |
8329 | #endif // DEBUG |
8330 | |
8331 | ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); |
8332 | |
8333 | VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars); |
8334 | gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs; |
8335 | gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs; |
8336 | |
8337 | #ifdef DEBUG |
8338 | if (compiler->opts.dspCode) |
8339 | printf("\n__epilog:\n" ); |
8340 | |
8341 | if (verbose) |
8342 | { |
8343 | printf("gcVarPtrSetCur=%s " , VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur)); |
8344 | dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); |
8345 | printf(", gcRegGCrefSetCur=" ); |
8346 | printRegMaskInt(gcInfo.gcRegGCrefSetCur); |
8347 | getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); |
8348 | printf(", gcRegByrefSetCur=" ); |
8349 | printRegMaskInt(gcInfo.gcRegByrefSetCur); |
8350 | getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); |
8351 | printf("\n" ); |
8352 | } |
8353 | #endif // DEBUG |
8354 | |
8355 | bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); |
8356 | |
8357 | GenTree* lastNode = block->lastNode(); |
8358 | |
8359 | // Method handle and address info used in case of jump epilog |
8360 | CORINFO_METHOD_HANDLE methHnd = nullptr; |
8361 | CORINFO_CONST_LOOKUP addrInfo; |
8362 | addrInfo.addr = nullptr; |
8363 | addrInfo.accessType = IAT_VALUE; |
8364 | |
8365 | if (jmpEpilog && lastNode->gtOper == GT_JMP) |
8366 | { |
8367 | methHnd = (CORINFO_METHOD_HANDLE)lastNode->gtVal.gtVal1; |
8368 | compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); |
8369 | } |
8370 | |
8371 | #ifdef _TARGET_ARM_ |
8372 | // We delay starting the unwind codes until we have an instruction which we know |
8373 | // needs an unwind code. In particular, for large stack frames in methods without |
8374 | // localloc, the sequence might look something like this: |
8375 | // movw r3, 0x38e0 |
8376 | // add sp, r3 |
8377 | // pop {r4,r5,r6,r10,r11,pc} |
8378 | // In this case, the "movw" should not be part of the unwind codes, since it will |
8379 | // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog() |
8380 | // also sets the current location as the beginning offset of the epilog, so every |
8381 | // instruction afterwards needs an unwind code. In the case above, if you call |
8382 | // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw". |
8383 | |
8384 | bool unwindStarted = false; |
8385 | |
8386 | // Tear down the stack frame |
8387 | |
8388 | if (compiler->compLocallocUsed) |
8389 | { |
8390 | if (!unwindStarted) |
8391 | { |
8392 | compiler->unwindBegEpilog(); |
8393 | unwindStarted = true; |
8394 | } |
8395 | |
8396 | // mov R9 into SP |
8397 | inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP); |
8398 | compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0); |
8399 | } |
8400 | |
8401 | if (jmpEpilog || |
8402 | genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) == |
8403 | RBM_NONE) |
8404 | { |
8405 | genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog); |
8406 | } |
8407 | |
8408 | if (!unwindStarted) |
8409 | { |
8410 | // If we haven't generated anything yet, we're certainly going to generate a "pop" next. |
8411 | compiler->unwindBegEpilog(); |
8412 | unwindStarted = true; |
8413 | } |
8414 | |
8415 | if (jmpEpilog && lastNode->gtOper == GT_JMP && addrInfo.accessType == IAT_RELPVALUE) |
8416 | { |
8417 | // IAT_RELPVALUE jump at the end is done using relative indirection, so, |
8418 | // additional helper register is required. |
8419 | // We use LR just before it is going to be restored from stack, i.e. |
8420 | // |
8421 | // movw r12, laddr |
8422 | // movt r12, haddr |
8423 | // mov lr, r12 |
8424 | // ldr r12, [r12] |
8425 | // add r12, r12, lr |
8426 | // pop {lr} |
8427 | // ... |
8428 | // bx r12 |
8429 | |
8430 | regNumber indCallReg = REG_R12; |
8431 | regNumber vptrReg1 = REG_LR; |
8432 | |
8433 | instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); |
8434 | getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, vptrReg1, indCallReg); |
8435 | getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); |
8436 | getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, indCallReg, vptrReg1); |
8437 | } |
8438 | |
8439 | genPopCalleeSavedRegisters(jmpEpilog); |
8440 | |
8441 | if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE) |
8442 | { |
8443 | // We better not have used a pop PC to return otherwise this will be unreachable code |
8444 | noway_assert(!genUsedPopToReturn); |
8445 | |
8446 | int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; |
8447 | inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE); |
8448 | compiler->unwindAllocStack(preSpillRegArgSize); |
8449 | } |
8450 | |
8451 | if (jmpEpilog) |
8452 | { |
8453 | // We better not have used a pop PC to return otherwise this will be unreachable code |
8454 | noway_assert(!genUsedPopToReturn); |
8455 | } |
8456 | |
8457 | #else // _TARGET_ARM64_ |
8458 | compiler->unwindBegEpilog(); |
8459 | |
8460 | genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog); |
8461 | #endif // _TARGET_ARM64_ |
8462 | |
8463 | if (jmpEpilog) |
8464 | { |
8465 | hasTailCalls = true; |
8466 | |
8467 | noway_assert(block->bbJumpKind == BBJ_RETURN); |
8468 | noway_assert(block->bbTreeList != nullptr); |
8469 | |
8470 | /* figure out what jump we have */ |
8471 | GenTree* jmpNode = lastNode; |
8472 | #if !FEATURE_FASTTAILCALL |
8473 | noway_assert(jmpNode->gtOper == GT_JMP); |
8474 | #else // FEATURE_FASTTAILCALL |
8475 | // armarch |
8476 | // If jmpNode is GT_JMP then gtNext must be null. |
8477 | // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. |
8478 | noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); |
8479 | |
8480 | // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp |
8481 | noway_assert((jmpNode->gtOper == GT_JMP) || |
8482 | ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); |
8483 | |
8484 | // The next block is associated with this "if" stmt |
8485 | if (jmpNode->gtOper == GT_JMP) |
8486 | #endif // FEATURE_FASTTAILCALL |
8487 | { |
8488 | // Simply emit a jump to the methodHnd. This is similar to a call so we can use |
8489 | // the same descriptor with some minor adjustments. |
8490 | assert(methHnd != nullptr); |
8491 | assert(addrInfo.addr != nullptr); |
8492 | |
8493 | #ifdef _TARGET_ARMARCH_ |
8494 | emitter::EmitCallType callType; |
8495 | void* addr; |
8496 | regNumber indCallReg; |
8497 | switch (addrInfo.accessType) |
8498 | { |
8499 | case IAT_VALUE: |
8500 | if (validImmForBL((ssize_t)addrInfo.addr)) |
8501 | { |
8502 | // Simple direct call |
8503 | callType = emitter::EC_FUNC_TOKEN; |
8504 | addr = addrInfo.addr; |
8505 | indCallReg = REG_NA; |
8506 | break; |
8507 | } |
8508 | |
8509 | // otherwise the target address doesn't fit in an immediate |
8510 | // so we have to burn a register... |
8511 | __fallthrough; |
8512 | |
8513 | case IAT_PVALUE: |
8514 | // Load the address into a register, load indirect and call through a register |
8515 | // We have to use R12 since we assume the argument registers are in use |
8516 | callType = emitter::EC_INDIR_R; |
8517 | indCallReg = REG_INDIRECT_CALL_TARGET_REG; |
8518 | addr = NULL; |
8519 | instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr); |
8520 | if (addrInfo.accessType == IAT_PVALUE) |
8521 | { |
8522 | getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); |
8523 | regSet.verifyRegUsed(indCallReg); |
8524 | } |
8525 | break; |
8526 | |
8527 | case IAT_RELPVALUE: |
8528 | { |
8529 | // Load the address into a register, load relative indirect and call through a register |
8530 | // We have to use R12 since we assume the argument registers are in use |
8531 | // LR is used as helper register right before it is restored from stack, thus, |
8532 | // all relative address calculations are performed before LR is restored. |
8533 | callType = emitter::EC_INDIR_R; |
8534 | indCallReg = REG_R12; |
8535 | addr = NULL; |
8536 | |
8537 | regSet.verifyRegUsed(indCallReg); |
8538 | break; |
8539 | } |
8540 | |
8541 | case IAT_PPVALUE: |
8542 | default: |
8543 | NO_WAY("Unsupported JMP indirection" ); |
8544 | } |
8545 | |
8546 | /* Simply emit a jump to the methodHnd. This is similar to a call so we can use |
8547 | * the same descriptor with some minor adjustments. |
8548 | */ |
8549 | |
8550 | // clang-format off |
8551 | getEmitter()->emitIns_Call(callType, |
8552 | methHnd, |
8553 | INDEBUG_LDISASM_COMMA(nullptr) |
8554 | addr, |
8555 | 0, // argSize |
8556 | EA_UNKNOWN, // retSize |
8557 | #if defined(_TARGET_ARM64_) |
8558 | EA_UNKNOWN, // secondRetSize |
8559 | #endif |
8560 | gcInfo.gcVarPtrSetCur, |
8561 | gcInfo.gcRegGCrefSetCur, |
8562 | gcInfo.gcRegByrefSetCur, |
8563 | BAD_IL_OFFSET, // IL offset |
8564 | indCallReg, // ireg |
8565 | REG_NA, // xreg |
8566 | 0, // xmul |
8567 | 0, // disp |
8568 | true); // isJump |
8569 | // clang-format on |
8570 | CLANG_FORMAT_COMMENT_ANCHOR; |
8571 | #endif //_TARGET_ARMARCH_ |
8572 | } |
8573 | #if FEATURE_FASTTAILCALL |
8574 | else |
8575 | { |
8576 | // Fast tail call. |
8577 | // Call target = REG_FASTTAILCALL_TARGET |
8578 | // https://github.com/dotnet/coreclr/issues/4827 |
8579 | // Do we need a special encoding for stack walker like rex.w prefix for x64? |
8580 | getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET); |
8581 | } |
8582 | #endif // FEATURE_FASTTAILCALL |
8583 | } |
8584 | else |
8585 | { |
8586 | #ifdef _TARGET_ARM_ |
8587 | if (!genUsedPopToReturn) |
8588 | { |
8589 | // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}", |
8590 | // so we need a "bx lr" instruction to return from the function. |
8591 | inst_RV(INS_bx, REG_LR, TYP_I_IMPL); |
8592 | compiler->unwindBranch16(); |
8593 | } |
8594 | #else // _TARGET_ARM64_ |
8595 | inst_RV(INS_ret, REG_LR, TYP_I_IMPL); |
8596 | compiler->unwindReturn(REG_LR); |
8597 | #endif // _TARGET_ARM64_ |
8598 | } |
8599 | |
8600 | compiler->unwindEndEpilog(); |
8601 | } |
8602 | |
8603 | #elif defined(_TARGET_XARCH_) |
8604 | |
8605 | void CodeGen::genFnEpilog(BasicBlock* block) |
8606 | { |
8607 | #ifdef DEBUG |
8608 | if (verbose) |
8609 | { |
8610 | printf("*************** In genFnEpilog()\n" ); |
8611 | } |
8612 | #endif |
8613 | |
8614 | ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); |
8615 | |
8616 | VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars); |
8617 | gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs; |
8618 | gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs; |
8619 | |
8620 | noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts |
8621 | |
8622 | #ifdef DEBUG |
8623 | genInterruptibleUsed = true; |
8624 | #endif |
8625 | |
8626 | bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); |
8627 | |
8628 | #ifdef DEBUG |
8629 | if (compiler->opts.dspCode) |
8630 | { |
8631 | printf("\n__epilog:\n" ); |
8632 | } |
8633 | |
8634 | if (verbose) |
8635 | { |
8636 | printf("gcVarPtrSetCur=%s " , VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur)); |
8637 | dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur); |
8638 | printf(", gcRegGCrefSetCur=" ); |
8639 | printRegMaskInt(gcInfo.gcRegGCrefSetCur); |
8640 | getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur); |
8641 | printf(", gcRegByrefSetCur=" ); |
8642 | printRegMaskInt(gcInfo.gcRegByrefSetCur); |
8643 | getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur); |
8644 | printf("\n" ); |
8645 | } |
8646 | #endif |
8647 | |
8648 | // Restore float registers that were saved to stack before SP is modified. |
8649 | genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize); |
8650 | |
8651 | #ifdef JIT32_GCENCODER |
8652 | // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after |
8653 | // the above call to `genRestoreCalleeSavedFltRegs` because that function |
8654 | // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI, |
8655 | // which is the only target that uses the JIT32 GC encoder |
8656 | // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties. |
8657 | // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no |
8658 | // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the |
8659 | // unwinder (and break binary compat with older versions of the runtime) by starting the epilog |
8660 | // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes, |
8661 | // we will need to rethink this. |
8662 | getEmitter()->emitStartEpilog(); |
8663 | #endif |
8664 | |
8665 | /* Compute the size in bytes we've pushed/popped */ |
8666 | |
8667 | if (!doubleAlignOrFramePointerUsed()) |
8668 | { |
8669 | // We have an ESP frame */ |
8670 | |
8671 | noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer |
8672 | |
8673 | /* Get rid of our local variables */ |
8674 | |
8675 | if (compiler->compLclFrameSize) |
8676 | { |
8677 | #ifdef _TARGET_X86_ |
8678 | /* Add 'compiler->compLclFrameSize' to ESP */ |
8679 | /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */ |
8680 | |
8681 | if ((compiler->compLclFrameSize == TARGET_POINTER_SIZE) && !compiler->compJmpOpUsed) |
8682 | { |
8683 | inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); |
8684 | regSet.verifyRegUsed(REG_ECX); |
8685 | } |
8686 | else |
8687 | #endif // _TARGET_X86 |
8688 | { |
8689 | /* Add 'compiler->compLclFrameSize' to ESP */ |
8690 | /* Generate "add esp, <stack-size>" */ |
8691 | inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE); |
8692 | } |
8693 | } |
8694 | |
8695 | genPopCalleeSavedRegisters(); |
8696 | } |
8697 | else |
8698 | { |
8699 | noway_assert(doubleAlignOrFramePointerUsed()); |
8700 | |
8701 | /* Tear down the stack frame */ |
8702 | |
8703 | bool needMovEspEbp = false; |
8704 | |
8705 | #if DOUBLE_ALIGN |
8706 | if (compiler->genDoubleAlign()) |
8707 | { |
8708 | // |
8709 | // add esp, compLclFrameSize |
8710 | // |
8711 | // We need not do anything (except the "mov esp, ebp") if |
8712 | // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it |
8713 | // also complicates the code manager. Hence, we ignore that case. |
8714 | |
8715 | noway_assert(compiler->compLclFrameSize != 0); |
8716 | inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE); |
8717 | |
8718 | needMovEspEbp = true; |
8719 | } |
8720 | else |
8721 | #endif // DOUBLE_ALIGN |
8722 | { |
8723 | bool needLea = false; |
8724 | |
8725 | if (compiler->compLocallocUsed) |
8726 | { |
8727 | // ESP may be variable if a localloc was actually executed. Reset it. |
8728 | // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES] |
8729 | |
8730 | needLea = true; |
8731 | } |
8732 | else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED)) |
8733 | { |
8734 | if (compiler->compLclFrameSize != 0) |
8735 | { |
8736 | #ifdef _TARGET_AMD64_ |
8737 | // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So, |
8738 | // do an LEA to "pop off" the frame allocation. |
8739 | needLea = true; |
8740 | #else // !_TARGET_AMD64_ |
8741 | // We will just generate "mov esp, ebp" and be done with it. |
8742 | needMovEspEbp = true; |
8743 | #endif // !_TARGET_AMD64_ |
8744 | } |
8745 | } |
8746 | else if (compiler->compLclFrameSize == 0) |
8747 | { |
8748 | // do nothing before popping the callee-saved registers |
8749 | } |
8750 | #ifdef _TARGET_X86_ |
8751 | else if (compiler->compLclFrameSize == REGSIZE_BYTES) |
8752 | { |
8753 | // "pop ecx" will make ESP point to the callee-saved registers |
8754 | inst_RV(INS_pop, REG_ECX, TYP_I_IMPL); |
8755 | regSet.verifyRegUsed(REG_ECX); |
8756 | } |
8757 | #endif // _TARGET_X86 |
8758 | else |
8759 | { |
8760 | // We need to make ESP point to the callee-saved registers |
8761 | needLea = true; |
8762 | } |
8763 | |
8764 | if (needLea) |
8765 | { |
8766 | int offset; |
8767 | |
8768 | #ifdef _TARGET_AMD64_ |
8769 | // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta] |
8770 | // |
8771 | // Case 1: localloc not used. |
8772 | // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize |
8773 | // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES; |
8774 | // The amount to be subtracted from RBP to point at callee saved int regs. |
8775 | // |
8776 | // Case 2: localloc used |
8777 | // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize) |
8778 | // Offset = Amount to be added to RBP to point at callee saved int regs. |
8779 | offset = genSPtoFPdelta() - compiler->compLclFrameSize; |
8780 | |
8781 | // Offset should fit within a byte if localloc is not used. |
8782 | if (!compiler->compLocallocUsed) |
8783 | { |
8784 | noway_assert(offset < UCHAR_MAX); |
8785 | } |
8786 | #else |
8787 | // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES] |
8788 | offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES; |
8789 | noway_assert(offset < UCHAR_MAX); // the offset fits in a byte |
8790 | #endif |
8791 | |
8792 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset); |
8793 | } |
8794 | } |
8795 | |
8796 | // |
8797 | // Pop the callee-saved registers (if any) |
8798 | // |
8799 | |
8800 | genPopCalleeSavedRegisters(); |
8801 | |
8802 | #ifdef _TARGET_AMD64_ |
8803 | assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs |
8804 | #else // !_TARGET_AMD64_ |
8805 | if (needMovEspEbp) |
8806 | { |
8807 | // mov esp, ebp |
8808 | inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE); |
8809 | } |
8810 | #endif // !_TARGET_AMD64_ |
8811 | |
8812 | // pop ebp |
8813 | inst_RV(INS_pop, REG_EBP, TYP_I_IMPL); |
8814 | } |
8815 | |
8816 | getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence |
8817 | |
8818 | /* Check if this a special return block i.e. |
8819 | * CEE_JMP instruction */ |
8820 | |
8821 | if (jmpEpilog) |
8822 | { |
8823 | noway_assert(block->bbJumpKind == BBJ_RETURN); |
8824 | noway_assert(block->bbTreeList); |
8825 | |
8826 | // figure out what jump we have |
8827 | GenTree* jmpNode = block->lastNode(); |
8828 | #if !FEATURE_FASTTAILCALL |
8829 | // x86 |
8830 | noway_assert(jmpNode->gtOper == GT_JMP); |
8831 | #else |
8832 | // amd64 |
8833 | // If jmpNode is GT_JMP then gtNext must be null. |
8834 | // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts. |
8835 | noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr)); |
8836 | |
8837 | // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp |
8838 | noway_assert((jmpNode->gtOper == GT_JMP) || |
8839 | ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall())); |
8840 | |
8841 | // The next block is associated with this "if" stmt |
8842 | if (jmpNode->gtOper == GT_JMP) |
8843 | #endif |
8844 | { |
8845 | // Simply emit a jump to the methodHnd. This is similar to a call so we can use |
8846 | // the same descriptor with some minor adjustments. |
8847 | CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1; |
8848 | |
8849 | CORINFO_CONST_LOOKUP addrInfo; |
8850 | compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo); |
8851 | if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE) |
8852 | { |
8853 | NO_WAY("Unsupported JMP indirection" ); |
8854 | } |
8855 | |
8856 | const emitter::EmitCallType callType = |
8857 | (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR; |
8858 | |
8859 | // Simply emit a jump to the methodHnd. This is similar to a call so we can use |
8860 | // the same descriptor with some minor adjustments. |
8861 | |
8862 | // clang-format off |
8863 | getEmitter()->emitIns_Call(callType, |
8864 | methHnd, |
8865 | INDEBUG_LDISASM_COMMA(nullptr) |
8866 | addrInfo.addr, |
8867 | 0, // argSize |
8868 | EA_UNKNOWN // retSize |
8869 | MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize |
8870 | gcInfo.gcVarPtrSetCur, |
8871 | gcInfo.gcRegGCrefSetCur, |
8872 | gcInfo.gcRegByrefSetCur, |
8873 | BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */ |
8874 | true /* isJump */ |
8875 | ); |
8876 | // clang-format on |
8877 | } |
8878 | #if FEATURE_FASTTAILCALL |
8879 | else |
8880 | { |
8881 | #ifdef _TARGET_AMD64_ |
8882 | // Fast tail call. |
8883 | // Call target = RAX. |
8884 | // Stack walker requires that a register indirect tail call be rex.w prefixed. |
8885 | getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX); |
8886 | #else |
8887 | assert(!"Fast tail call as epilog+jmp" ); |
8888 | unreached(); |
8889 | #endif //_TARGET_AMD64_ |
8890 | } |
8891 | #endif // FEATURE_FASTTAILCALL |
8892 | } |
8893 | else |
8894 | { |
8895 | unsigned stkArgSize = 0; // Zero on all platforms except x86 |
8896 | |
8897 | #if defined(_TARGET_X86_) |
8898 | bool fCalleePop = true; |
8899 | |
8900 | // varargs has caller pop |
8901 | if (compiler->info.compIsVarArgs) |
8902 | fCalleePop = false; |
8903 | |
8904 | #ifdef UNIX_X86_ABI |
8905 | if (IsCallerPop(compiler->info.compMethodInfo->args.callConv)) |
8906 | fCalleePop = false; |
8907 | #endif // UNIX_X86_ABI |
8908 | |
8909 | if (fCalleePop) |
8910 | { |
8911 | noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES); |
8912 | stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; |
8913 | |
8914 | noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand |
8915 | } |
8916 | #endif // _TARGET_X86_ |
8917 | |
8918 | /* Return, popping our arguments (if any) */ |
8919 | instGen_Return(stkArgSize); |
8920 | } |
8921 | } |
8922 | |
8923 | #else // _TARGET_* |
8924 | #error Unsupported or unset target architecture |
8925 | #endif // _TARGET_* |
8926 | |
8927 | #if FEATURE_EH_FUNCLETS |
8928 | |
8929 | #ifdef _TARGET_ARM_ |
8930 | |
8931 | /***************************************************************************** |
8932 | * |
8933 | * Generates code for an EH funclet prolog. |
8934 | * |
8935 | * Funclets have the following incoming arguments: |
8936 | * |
8937 | * catch: r0 = the exception object that was caught (see GT_CATCH_ARG) |
8938 | * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function |
8939 | * finally/fault: none |
8940 | * |
8941 | * Funclets set the following registers on exit: |
8942 | * |
8943 | * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET) |
8944 | * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) |
8945 | * finally/fault: none |
8946 | * |
8947 | * The ARM funclet prolog sequence is: |
8948 | * |
8949 | * push {regs,lr} ; We push the callee-saved regs and 'lr'. |
8950 | * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we |
8951 | * ; actually use in the funclet. Currently, we save the same set of callee-saved regs |
8952 | * ; calculated for the entire function. |
8953 | * sub sp, XXX ; Establish the rest of the frame. |
8954 | * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned |
8955 | * ; up to preserve stack alignment. If we push an odd number of registers, we also |
8956 | * ; generate this, to keep the stack aligned. |
8957 | * |
8958 | * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested |
8959 | * ; filters. |
8960 | * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet |
8961 | * ; epilog. |
8962 | * |
8963 | * if (this is a filter funclet) |
8964 | * { |
8965 | * // r1 on entry to a filter funclet is CallerSP of the containing function: |
8966 | * // either the main function, or the funclet for a handler that this filter is dynamically nested within. |
8967 | * // Note that a filter can be dynamically nested within a funclet even if it is not statically within |
8968 | * // a funclet. Consider: |
8969 | * // |
8970 | * // try { |
8971 | * // try { |
8972 | * // throw new Exception(); |
8973 | * // } catch(Exception) { |
8974 | * // throw new Exception(); // The exception thrown here ... |
8975 | * // } |
8976 | * // } filter { // ... will be processed here, while the "catch" funclet frame is |
8977 | * // // still on the stack |
8978 | * // } filter-handler { |
8979 | * // } |
8980 | * // |
8981 | * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the |
8982 | * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting |
8983 | * // nested EH. To simplify, we just always create a main function PSP for any function with a filter. |
8984 | * |
8985 | * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of |
8986 | * ; the dynamically containing funclet or function) |
8987 | * str r1, [sp + PSP_slot_SP_offset] ; store the PSP |
8988 | * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer |
8989 | * } |
8990 | * else |
8991 | * { |
8992 | * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. |
8993 | * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction. |
8994 | * |
8995 | * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch. |
8996 | * str r3, [sp + PSP_slot_SP_offset] ; store the PSP |
8997 | * } |
8998 | * |
8999 | * The epilog sequence is then: |
9000 | * |
9001 | * add sp, XXX ; if necessary |
9002 | * pop {regs,pc} |
9003 | * |
9004 | * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction. |
9005 | * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow. |
9006 | * |
9007 | * The funclet frame is thus: |
9008 | * |
9009 | * | | |
9010 | * |-----------------------| |
9011 | * | incoming | |
9012 | * | arguments | |
9013 | * +=======================+ <---- Caller's SP |
9014 | * |Callee saved registers | |
9015 | * |-----------------------| |
9016 | * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset |
9017 | * | | // in function and funclet |
9018 | * |-----------------------| |
9019 | * | PSP slot | // Omitted in CoreRT ABI |
9020 | * |-----------------------| |
9021 | * ~ possible 4 byte pad ~ |
9022 | * ~ for alignment ~ |
9023 | * |-----------------------| |
9024 | * | Outgoing arg space | |
9025 | * |-----------------------| <---- Ambient SP |
9026 | * | | | |
9027 | * ~ | Stack grows ~ |
9028 | * | | downward | |
9029 | * V |
9030 | */ |
9031 | |
9032 | void CodeGen::genFuncletProlog(BasicBlock* block) |
9033 | { |
9034 | #ifdef DEBUG |
9035 | if (verbose) |
9036 | printf("*************** In genFuncletProlog()\n" ); |
9037 | #endif |
9038 | |
9039 | assert(block != NULL); |
9040 | assert(block->bbFlags & BBF_FUNCLET_BEG); |
9041 | |
9042 | ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true); |
9043 | |
9044 | gcInfo.gcResetForBB(); |
9045 | |
9046 | compiler->unwindBegProlog(); |
9047 | |
9048 | regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; |
9049 | regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat; |
9050 | |
9051 | regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat); |
9052 | maskPushRegsInt |= maskStackAlloc; |
9053 | |
9054 | assert(FitsIn<int>(maskPushRegsInt)); |
9055 | inst_IV(INS_push, (int)maskPushRegsInt); |
9056 | compiler->unwindPushMaskInt(maskPushRegsInt); |
9057 | |
9058 | if (maskPushRegsFloat != RBM_NONE) |
9059 | { |
9060 | genPushFltRegs(maskPushRegsFloat); |
9061 | compiler->unwindPushMaskFloat(maskPushRegsFloat); |
9062 | } |
9063 | |
9064 | bool isFilter = (block->bbCatchTyp == BBCT_FILTER); |
9065 | |
9066 | regMaskTP maskArgRegsLiveIn; |
9067 | if (isFilter) |
9068 | { |
9069 | maskArgRegsLiveIn = RBM_R0 | RBM_R1; |
9070 | } |
9071 | else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) |
9072 | { |
9073 | maskArgRegsLiveIn = RBM_NONE; |
9074 | } |
9075 | else |
9076 | { |
9077 | maskArgRegsLiveIn = RBM_R0; |
9078 | } |
9079 | |
9080 | regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed |
9081 | bool initRegZeroed = false; |
9082 | |
9083 | if (maskStackAlloc == RBM_NONE) |
9084 | { |
9085 | genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn); |
9086 | } |
9087 | |
9088 | // This is the end of the OS-reported prolog for purposes of unwinding |
9089 | compiler->unwindEndProlog(); |
9090 | |
9091 | if (isFilter) |
9092 | { |
9093 | // This is the first block of a filter |
9094 | |
9095 | getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1, |
9096 | genFuncletInfo.fiPSP_slot_CallerSP_offset); |
9097 | regSet.verifyRegUsed(REG_R1); |
9098 | getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE, |
9099 | genFuncletInfo.fiPSP_slot_SP_offset); |
9100 | getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1, |
9101 | genFuncletInfo.fiFunctionCallerSPtoFPdelta); |
9102 | } |
9103 | else |
9104 | { |
9105 | // This is a non-filter funclet |
9106 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, |
9107 | genFuncletInfo.fiFunctionCallerSPtoFPdelta); |
9108 | regSet.verifyRegUsed(REG_R3); |
9109 | getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, |
9110 | genFuncletInfo.fiPSP_slot_SP_offset); |
9111 | } |
9112 | } |
9113 | |
9114 | /***************************************************************************** |
9115 | * |
9116 | * Generates code for an EH funclet epilog. |
9117 | */ |
9118 | |
9119 | void CodeGen::genFuncletEpilog() |
9120 | { |
9121 | #ifdef DEBUG |
9122 | if (verbose) |
9123 | printf("*************** In genFuncletEpilog()\n" ); |
9124 | #endif |
9125 | |
9126 | ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); |
9127 | |
9128 | // Just as for the main function, we delay starting the unwind codes until we have |
9129 | // an instruction which we know needs an unwind code. This is to support code like |
9130 | // this: |
9131 | // movw r3, 0x38e0 |
9132 | // add sp, r3 |
9133 | // pop {r4,r5,r6,r10,r11,pc} |
9134 | // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details. |
9135 | |
9136 | bool unwindStarted = false; |
9137 | |
9138 | /* The saved regs info saves the LR register. We need to pop the PC register to return */ |
9139 | assert(genFuncletInfo.fiSaveRegs & RBM_LR); |
9140 | |
9141 | regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; |
9142 | regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat; |
9143 | |
9144 | regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat); |
9145 | maskPopRegsInt |= maskStackAlloc; |
9146 | |
9147 | if (maskStackAlloc == RBM_NONE) |
9148 | { |
9149 | genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false); |
9150 | } |
9151 | |
9152 | if (!unwindStarted) |
9153 | { |
9154 | // We'll definitely generate an unwindable instruction next |
9155 | compiler->unwindBegEpilog(); |
9156 | unwindStarted = true; |
9157 | } |
9158 | |
9159 | maskPopRegsInt &= ~RBM_LR; |
9160 | maskPopRegsInt |= RBM_PC; |
9161 | |
9162 | if (maskPopRegsFloat != RBM_NONE) |
9163 | { |
9164 | genPopFltRegs(maskPopRegsFloat); |
9165 | compiler->unwindPopMaskFloat(maskPopRegsFloat); |
9166 | } |
9167 | |
9168 | assert(FitsIn<int>(maskPopRegsInt)); |
9169 | inst_IV(INS_pop, (int)maskPopRegsInt); |
9170 | compiler->unwindPopMaskInt(maskPopRegsInt); |
9171 | |
9172 | compiler->unwindEndEpilog(); |
9173 | } |
9174 | |
9175 | /***************************************************************************** |
9176 | * |
9177 | * Capture the information used to generate the funclet prologs and epilogs. |
9178 | * Note that all funclet prologs are identical, and all funclet epilogs are |
9179 | * identical (per type: filters are identical, and non-filters are identical). |
9180 | * Thus, we compute the data used for these just once. |
9181 | * |
9182 | * See genFuncletProlog() for more information about the prolog/epilog sequences. |
9183 | */ |
9184 | |
9185 | void CodeGen::genCaptureFuncletPrologEpilogInfo() |
9186 | { |
9187 | if (compiler->ehAnyFunclets()) |
9188 | { |
9189 | assert(isFramePointerUsed()); |
9190 | assert(compiler->lvaDoneFrameLayout == |
9191 | Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized |
9192 | |
9193 | // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead |
9194 | // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11 |
9195 | // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved |
9196 | // (also assumed in genFnProlog()). |
9197 | assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0); |
9198 | unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; |
9199 | genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES; |
9200 | |
9201 | regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; |
9202 | unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); |
9203 | unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving |
9204 | assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); |
9205 | unsigned funcletFrameSize = |
9206 | preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize; |
9207 | |
9208 | unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); |
9209 | unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; |
9210 | unsigned spDelta = funcletFrameSizeAligned - saveRegsSize; |
9211 | |
9212 | unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad; |
9213 | int PSP_slot_CallerSP_offset = |
9214 | -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative! |
9215 | |
9216 | /* Now save it for future use */ |
9217 | |
9218 | genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; |
9219 | genFuncletInfo.fiSpDelta = spDelta; |
9220 | genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset; |
9221 | genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset; |
9222 | |
9223 | #ifdef DEBUG |
9224 | if (verbose) |
9225 | { |
9226 | printf("\n" ); |
9227 | printf("Funclet prolog / epilog info\n" ); |
9228 | printf(" Function CallerSP-to-FP delta: %d\n" , genFuncletInfo.fiFunctionCallerSPtoFPdelta); |
9229 | printf(" Save regs: " ); |
9230 | dspRegMask(rsMaskSaveRegs); |
9231 | printf("\n" ); |
9232 | printf(" SP delta: %d\n" , genFuncletInfo.fiSpDelta); |
9233 | printf(" PSP slot SP offset: %d\n" , genFuncletInfo.fiPSP_slot_SP_offset); |
9234 | printf(" PSP slot Caller SP offset: %d\n" , genFuncletInfo.fiPSP_slot_CallerSP_offset); |
9235 | |
9236 | if (PSP_slot_CallerSP_offset != |
9237 | compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging |
9238 | printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n" , |
9239 | compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); |
9240 | } |
9241 | #endif // DEBUG |
9242 | |
9243 | assert(PSP_slot_CallerSP_offset < 0); |
9244 | if (compiler->lvaPSPSym != BAD_VAR_NUM) |
9245 | { |
9246 | assert(PSP_slot_CallerSP_offset == |
9247 | compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main |
9248 | // function and funclet! |
9249 | } |
9250 | } |
9251 | } |
9252 | |
9253 | #elif defined(_TARGET_AMD64_) |
9254 | |
9255 | /***************************************************************************** |
9256 | * |
9257 | * Generates code for an EH funclet prolog. |
9258 | * |
9259 | * Funclets have the following incoming arguments: |
9260 | * |
9261 | * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG) |
9262 | * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG) |
9263 | * finally/fault: rcx = InitialSP |
9264 | * |
9265 | * Funclets set the following registers on exit: |
9266 | * |
9267 | * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET) |
9268 | * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) |
9269 | * finally/fault: none |
9270 | * |
9271 | * The AMD64 funclet prolog sequence is: |
9272 | * |
9273 | * push ebp |
9274 | * push callee-saved regs |
9275 | * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use |
9276 | * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for |
9277 | * ; the entire function. |
9278 | * sub sp, XXX ; Establish the rest of the frame. |
9279 | * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned |
9280 | * ; up to preserve stack alignment. If we push an odd number of registers, we also |
9281 | * ; generate this, to keep the stack aligned. |
9282 | * |
9283 | * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested |
9284 | * ; filters. |
9285 | * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet |
9286 | * ; epilog. |
9287 | * ; Also, re-establish the frame pointer from the PSP. |
9288 | * |
9289 | * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the |
9290 | * ; PSP of the dynamically containing funclet or function) |
9291 | * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame |
9292 | * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If |
9293 | * ; Function_InitialSP_to_FP_delta==0, we don't need this |
9294 | * ; instruction. |
9295 | * |
9296 | * The epilog sequence is then: |
9297 | * |
9298 | * add rsp, XXX |
9299 | * pop callee-saved regs ; if necessary |
9300 | * pop rbp |
9301 | * ret |
9302 | * |
9303 | * The funclet frame is thus: |
9304 | * |
9305 | * | | |
9306 | * |-----------------------| |
9307 | * | incoming | |
9308 | * | arguments | |
9309 | * +=======================+ <---- Caller's SP |
9310 | * | Return address | |
9311 | * |-----------------------| |
9312 | * | Saved EBP | |
9313 | * |-----------------------| |
9314 | * |Callee saved registers | |
9315 | * |-----------------------| |
9316 | * ~ possible 8 byte pad ~ |
9317 | * ~ for alignment ~ |
9318 | * |-----------------------| |
9319 | * | PSP slot | // Omitted in CoreRT ABI |
9320 | * |-----------------------| |
9321 | * | Outgoing arg space | // this only exists if the function makes a call |
9322 | * |-----------------------| <---- Initial SP |
9323 | * | | | |
9324 | * ~ | Stack grows ~ |
9325 | * | | downward | |
9326 | * V |
9327 | * |
9328 | * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this |
9329 | * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64 |
9330 | * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h |
9331 | * "FRAMEPTR OFFSETS" for details. |
9332 | */ |
9333 | |
9334 | void CodeGen::genFuncletProlog(BasicBlock* block) |
9335 | { |
9336 | #ifdef DEBUG |
9337 | if (verbose) |
9338 | { |
9339 | printf("*************** In genFuncletProlog()\n" ); |
9340 | } |
9341 | #endif |
9342 | |
9343 | assert(!regSet.rsRegsModified(RBM_FPBASE)); |
9344 | assert(block != nullptr); |
9345 | assert(block->bbFlags & BBF_FUNCLET_BEG); |
9346 | assert(isFramePointerUsed()); |
9347 | |
9348 | ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true); |
9349 | |
9350 | gcInfo.gcResetForBB(); |
9351 | |
9352 | compiler->unwindBegProlog(); |
9353 | |
9354 | // We need to push ebp, since it's callee-saved. |
9355 | // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't |
9356 | // keep track of that on a per-funclet basis, so we push the same set as in the main function. |
9357 | // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else |
9358 | // is stored here (all temps are allocated in the parent frame). |
9359 | // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same |
9360 | // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same. |
9361 | |
9362 | inst_RV(INS_push, REG_FPBASE, TYP_REF); |
9363 | compiler->unwindPush(REG_FPBASE); |
9364 | |
9365 | // Callee saved int registers are pushed to stack. |
9366 | genPushCalleeSavedRegisters(); |
9367 | |
9368 | regMaskTP maskArgRegsLiveIn; |
9369 | if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) |
9370 | { |
9371 | maskArgRegsLiveIn = RBM_ARG_0; |
9372 | } |
9373 | else |
9374 | { |
9375 | maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2; |
9376 | } |
9377 | |
9378 | regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed |
9379 | bool initRegZeroed = false; |
9380 | |
9381 | genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn); |
9382 | |
9383 | // Callee saved float registers are copied to stack in their assigned stack slots |
9384 | // after allocating space for them as part of funclet frame. |
9385 | genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta); |
9386 | |
9387 | // This is the end of the OS-reported prolog for purposes of unwinding |
9388 | compiler->unwindEndProlog(); |
9389 | |
9390 | // If there is no PSPSym (CoreRT ABI), we are done. |
9391 | if (compiler->lvaPSPSym == BAD_VAR_NUM) |
9392 | { |
9393 | return; |
9394 | } |
9395 | |
9396 | getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset); |
9397 | |
9398 | regSet.verifyRegUsed(REG_FPBASE); |
9399 | |
9400 | getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset); |
9401 | |
9402 | if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0) |
9403 | { |
9404 | getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE, |
9405 | genFuncletInfo.fiFunction_InitialSP_to_FP_delta); |
9406 | } |
9407 | |
9408 | // We've modified EBP, but not really. Say that we haven't... |
9409 | regSet.rsRemoveRegsModified(RBM_FPBASE); |
9410 | } |
9411 | |
9412 | /***************************************************************************** |
9413 | * |
9414 | * Generates code for an EH funclet epilog. |
9415 | * |
9416 | * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog. |
9417 | */ |
9418 | |
9419 | void CodeGen::genFuncletEpilog() |
9420 | { |
9421 | #ifdef DEBUG |
9422 | if (verbose) |
9423 | { |
9424 | printf("*************** In genFuncletEpilog()\n" ); |
9425 | } |
9426 | #endif |
9427 | |
9428 | ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); |
9429 | |
9430 | // Restore callee saved XMM regs from their stack slots before modifying SP |
9431 | // to position at callee saved int regs. |
9432 | genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta); |
9433 | inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE); |
9434 | genPopCalleeSavedRegisters(); |
9435 | inst_RV(INS_pop, REG_EBP, TYP_I_IMPL); |
9436 | instGen_Return(0); |
9437 | } |
9438 | |
9439 | /***************************************************************************** |
9440 | * |
9441 | * Capture the information used to generate the funclet prologs and epilogs. |
9442 | */ |
9443 | |
9444 | void CodeGen::genCaptureFuncletPrologEpilogInfo() |
9445 | { |
9446 | if (!compiler->ehAnyFunclets()) |
9447 | { |
9448 | return; |
9449 | } |
9450 | |
9451 | // Note that compLclFrameSize can't be used (for can we call functions that depend on it), |
9452 | // because we're not going to allocate the same size frame as the parent. |
9453 | |
9454 | assert(isFramePointerUsed()); |
9455 | assert(compiler->lvaDoneFrameLayout == |
9456 | Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized |
9457 | assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized |
9458 | |
9459 | // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize, |
9460 | // that's ok, because we're figuring out an offset in the parent frame. |
9461 | genFuncletInfo.fiFunction_InitialSP_to_FP_delta = |
9462 | compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame |
9463 | // pointer. |
9464 | |
9465 | assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); |
9466 | #ifndef UNIX_AMD64_ABI |
9467 | // No 4 slots for outgoing params on the stack for System V systems. |
9468 | assert((compiler->lvaOutgoingArgSpaceSize == 0) || |
9469 | (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument |
9470 | // slots if there are any calls in the function. |
9471 | #endif // UNIX_AMD64_ABI |
9472 | unsigned offset = compiler->lvaOutgoingArgSpaceSize; |
9473 | |
9474 | genFuncletInfo.fiPSP_slot_InitialSP_offset = offset; |
9475 | |
9476 | // How much stack do we allocate in the funclet? |
9477 | // We need to 16-byte align the stack. |
9478 | |
9479 | unsigned totalFrameSize = |
9480 | REGSIZE_BYTES // return address |
9481 | + REGSIZE_BYTES // pushed EBP |
9482 | + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP |
9483 | |
9484 | // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement. |
9485 | // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary. |
9486 | unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES; |
9487 | unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0; |
9488 | |
9489 | unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; |
9490 | |
9491 | totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs |
9492 | + calleeFPRegsSavedSize // pushed callee-saved float regs |
9493 | // below calculated 'pad' will go here |
9494 | + PSPSymSize // PSPSym |
9495 | + compiler->lvaOutgoingArgSpaceSize // outgoing arg space |
9496 | ; |
9497 | |
9498 | unsigned pad = AlignmentPad(totalFrameSize, 16); |
9499 | |
9500 | genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary |
9501 | + calleeFPRegsSavedSize // Callee saved xmm regs |
9502 | + pad + PSPSymSize // PSPSym |
9503 | + compiler->lvaOutgoingArgSpaceSize // outgoing arg space |
9504 | ; |
9505 | |
9506 | #ifdef DEBUG |
9507 | if (verbose) |
9508 | { |
9509 | printf("\n" ); |
9510 | printf("Funclet prolog / epilog info\n" ); |
9511 | printf(" Function InitialSP-to-FP delta: %d\n" , genFuncletInfo.fiFunction_InitialSP_to_FP_delta); |
9512 | printf(" SP delta: %d\n" , genFuncletInfo.fiSpDelta); |
9513 | printf(" PSP slot Initial SP offset: %d\n" , genFuncletInfo.fiPSP_slot_InitialSP_offset); |
9514 | } |
9515 | |
9516 | if (compiler->lvaPSPSym != BAD_VAR_NUM) |
9517 | { |
9518 | assert(genFuncletInfo.fiPSP_slot_InitialSP_offset == |
9519 | compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and |
9520 | // funclet! |
9521 | } |
9522 | #endif // DEBUG |
9523 | } |
9524 | |
9525 | #elif defined(_TARGET_ARM64_) |
9526 | |
9527 | // Look in CodeGenArm64.cpp |
9528 | |
9529 | #elif defined(_TARGET_X86_) |
9530 | |
9531 | /***************************************************************************** |
9532 | * |
9533 | * Generates code for an EH funclet prolog. |
9534 | * |
9535 | * |
9536 | * Funclets have the following incoming arguments: |
9537 | * |
9538 | * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG) |
9539 | * filter: eax = the exception object that was caught (see GT_CATCH_ARG) |
9540 | * finally/fault: none |
9541 | * |
9542 | * Funclets set the following registers on exit: |
9543 | * |
9544 | * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET) |
9545 | * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) |
9546 | * finally/fault: none |
9547 | * |
9548 | * Funclet prolog/epilog sequence and funclet frame layout are TBD. |
9549 | * |
9550 | */ |
9551 | |
9552 | void CodeGen::genFuncletProlog(BasicBlock* block) |
9553 | { |
9554 | #ifdef DEBUG |
9555 | if (verbose) |
9556 | { |
9557 | printf("*************** In genFuncletProlog()\n" ); |
9558 | } |
9559 | #endif |
9560 | |
9561 | ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true); |
9562 | |
9563 | gcInfo.gcResetForBB(); |
9564 | |
9565 | compiler->unwindBegProlog(); |
9566 | |
9567 | // This is the end of the OS-reported prolog for purposes of unwinding |
9568 | compiler->unwindEndProlog(); |
9569 | |
9570 | // TODO We may need EBP restore sequence here if we introduce PSPSym |
9571 | |
9572 | // Add a padding for 16-byte alignment |
9573 | inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE); |
9574 | } |
9575 | |
9576 | /***************************************************************************** |
9577 | * |
9578 | * Generates code for an EH funclet epilog. |
9579 | */ |
9580 | |
9581 | void CodeGen::genFuncletEpilog() |
9582 | { |
9583 | #ifdef DEBUG |
9584 | if (verbose) |
9585 | { |
9586 | printf("*************** In genFuncletEpilog()\n" ); |
9587 | } |
9588 | #endif |
9589 | |
9590 | ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); |
9591 | |
9592 | // Revert a padding that was added for 16-byte alignment |
9593 | inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE); |
9594 | |
9595 | instGen_Return(0); |
9596 | } |
9597 | |
9598 | /***************************************************************************** |
9599 | * |
9600 | * Capture the information used to generate the funclet prologs and epilogs. |
9601 | */ |
9602 | |
9603 | void CodeGen::genCaptureFuncletPrologEpilogInfo() |
9604 | { |
9605 | if (!compiler->ehAnyFunclets()) |
9606 | { |
9607 | return; |
9608 | } |
9609 | } |
9610 | |
9611 | #else // _TARGET_* |
9612 | |
9613 | /***************************************************************************** |
9614 | * |
9615 | * Generates code for an EH funclet prolog. |
9616 | */ |
9617 | |
9618 | void CodeGen::genFuncletProlog(BasicBlock* block) |
9619 | { |
9620 | NYI("Funclet prolog" ); |
9621 | } |
9622 | |
9623 | /***************************************************************************** |
9624 | * |
9625 | * Generates code for an EH funclet epilog. |
9626 | */ |
9627 | |
9628 | void CodeGen::genFuncletEpilog() |
9629 | { |
9630 | NYI("Funclet epilog" ); |
9631 | } |
9632 | |
9633 | /***************************************************************************** |
9634 | * |
9635 | * Capture the information used to generate the funclet prologs and epilogs. |
9636 | */ |
9637 | |
9638 | void CodeGen::genCaptureFuncletPrologEpilogInfo() |
9639 | { |
9640 | if (compiler->ehAnyFunclets()) |
9641 | { |
9642 | NYI("genCaptureFuncletPrologEpilogInfo()" ); |
9643 | } |
9644 | } |
9645 | |
9646 | #endif // _TARGET_* |
9647 | |
9648 | /*----------------------------------------------------------------------------- |
9649 | * |
9650 | * Set the main function PSPSym value in the frame. |
9651 | * Funclets use different code to load the PSP sym and save it in their frame. |
9652 | * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym. |
9653 | * The PSPSym section of that document is copied here. |
9654 | * |
9655 | *********************************** |
9656 | * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet |
9657 | * accesses locals from the main function body. |
9658 | * |
9659 | * First, two definitions. |
9660 | * |
9661 | * Caller-SP is the value of the stack pointer in a function's caller before the call |
9662 | * instruction is executed. That is, when function A calls function B, Caller-SP for B |
9663 | * is the value of the stack pointer immediately before the call instruction in A |
9664 | * (calling B) was executed. Note that this definition holds for both AMD64, which |
9665 | * pushes the return value when a call instruction is executed, and for ARM, which |
9666 | * doesn't. For AMD64, Caller-SP is the address above the call return address. |
9667 | * |
9668 | * Initial-SP is the initial value of the stack pointer after the fixed-size portion of |
9669 | * the frame has been allocated. That is, before any "alloca"-type allocations. |
9670 | * |
9671 | * The PSPSym is a pointer-sized local variable in the frame of the main function and |
9672 | * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP |
9673 | * for the main function. The stack offset of the PSPSym is reported to the VM in the |
9674 | * GC information header. The value reported in the GC information is the offset of the |
9675 | * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the |
9676 | * value is reported to the VM, differs between architectures. In particular, note that |
9677 | * most things in the GC information header are reported as offsets relative to Caller-SP, |
9678 | * but PSPSym on AMD64 is one (maybe the only) exception.) |
9679 | * |
9680 | * The VM uses the PSPSym to find other locals it cares about (such as the generics context |
9681 | * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that |
9682 | * the frame pointer is the same value in a funclet as it is in the main function body. |
9683 | * |
9684 | * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is |
9685 | * true for all funclets and it is passed as the first argument in RCX, but for ARM this is |
9686 | * only true for first pass funclets (currently just filters) and it is passed as the second |
9687 | * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent" |
9688 | * frame in the exception processing system. For the CLR, it points either to the main function |
9689 | * frame or a dynamically enclosing funclet frame from the same function, for the funclet being |
9690 | * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM. |
9691 | * |
9692 | * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we |
9693 | * don't know if the Establisher Frame is from the main function or a funclet, we design the |
9694 | * main function and funclet frame layouts to place the PSPSym at an identical, small, constant |
9695 | * offset from the Establisher Frame in each case. (This is also required because we only report |
9696 | * a single offset to the PSPSym in the GC information, and that offset must be valid for the main |
9697 | * function and all of its funclets). Then, the funclet uses this known offset to compute the |
9698 | * PSPSym address and read its value. From this, it can compute the value of the frame pointer |
9699 | * (which is a constant offset from the PSPSym value) and set the frame register to be the same |
9700 | * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's |
9701 | * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular, |
9702 | * for every nested funclet invocation. |
9703 | * |
9704 | * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM |
9705 | * restores all non-volatile registers to their values within the parent frame. This includes |
9706 | * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register |
9707 | * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets. |
9708 | * |
9709 | * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument |
9710 | * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On |
9711 | * ARM this is the first argument and passed in R0. |
9712 | * |
9713 | * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always |
9714 | * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of |
9715 | * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym |
9716 | * is required in all funclets as well as the main function, whereas if the establisher frame was |
9717 | * correctly reported, the PSPSym could be omitted in some cases.) |
9718 | *********************************** |
9719 | */ |
9720 | void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) |
9721 | { |
9722 | assert(compiler->compGeneratingProlog); |
9723 | |
9724 | if (compiler->lvaPSPSym == BAD_VAR_NUM) |
9725 | { |
9726 | return; |
9727 | } |
9728 | |
9729 | noway_assert(isFramePointerUsed()); // We need an explicit frame pointer |
9730 | |
9731 | #if defined(_TARGET_ARM_) |
9732 | |
9733 | // We either generate: |
9734 | // add r1, r11, 8 |
9735 | // str r1, [reg + PSPSymOffset] |
9736 | // or: |
9737 | // add r1, sp, 76 |
9738 | // str r1, [reg + PSPSymOffset] |
9739 | // depending on the smallest encoding |
9740 | |
9741 | int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); |
9742 | |
9743 | int callerSPOffs; |
9744 | regNumber regBase; |
9745 | |
9746 | if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta)) |
9747 | { |
9748 | // use the "add <reg>, sp, imm" form |
9749 | |
9750 | callerSPOffs = SPtoCallerSPdelta; |
9751 | regBase = REG_SPBASE; |
9752 | } |
9753 | else |
9754 | { |
9755 | // use the "add <reg>, r11, imm" form |
9756 | |
9757 | int FPtoCallerSPdelta = -genCallerSPtoFPdelta(); |
9758 | noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE)); |
9759 | |
9760 | callerSPOffs = FPtoCallerSPdelta; |
9761 | regBase = REG_FPBASE; |
9762 | } |
9763 | |
9764 | // We will just use the initReg since it is an available register |
9765 | // and we are probably done using it anyway... |
9766 | regNumber regTmp = initReg; |
9767 | *pInitRegZeroed = false; |
9768 | |
9769 | getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs); |
9770 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); |
9771 | |
9772 | #elif defined(_TARGET_ARM64_) |
9773 | |
9774 | int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); |
9775 | |
9776 | // We will just use the initReg since it is an available register |
9777 | // and we are probably done using it anyway... |
9778 | regNumber regTmp = initReg; |
9779 | *pInitRegZeroed = false; |
9780 | |
9781 | getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta); |
9782 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); |
9783 | |
9784 | #elif defined(_TARGET_AMD64_) |
9785 | |
9786 | // The PSP sym value is Initial-SP, not Caller-SP! |
9787 | // We assume that RSP is Initial-SP when this function is called. That is, the stack frame |
9788 | // has been established. |
9789 | // |
9790 | // We generate: |
9791 | // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym |
9792 | |
9793 | getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0); |
9794 | |
9795 | #else // _TARGET_* |
9796 | |
9797 | NYI("Set function PSP sym" ); |
9798 | |
9799 | #endif // _TARGET_* |
9800 | } |
9801 | |
9802 | #endif // FEATURE_EH_FUNCLETS |
9803 | |
9804 | /***************************************************************************** |
9805 | * |
9806 | * Generates code for all the function and funclet prologs and epilogs. |
9807 | */ |
9808 | |
9809 | void CodeGen::genGeneratePrologsAndEpilogs() |
9810 | { |
9811 | #ifdef DEBUG |
9812 | if (verbose) |
9813 | { |
9814 | printf("*************** Before prolog / epilog generation\n" ); |
9815 | getEmitter()->emitDispIGlist(false); |
9816 | } |
9817 | #endif |
9818 | |
9819 | // Before generating the prolog, we need to reset the variable locations to what they will be on entry. |
9820 | // This affects our code that determines which untracked locals need to be zero initialized. |
9821 | compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB); |
9822 | |
9823 | // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation. |
9824 | |
9825 | getEmitter()->emitStartPrologEpilogGeneration(); |
9826 | |
9827 | gcInfo.gcResetForBB(); |
9828 | genFnProlog(); |
9829 | |
9830 | // Generate all the prologs and epilogs. |
9831 | CLANG_FORMAT_COMMENT_ANCHOR; |
9832 | |
9833 | #if FEATURE_EH_FUNCLETS |
9834 | |
9835 | // Capture the data we're going to use in the funclet prolog and epilog generation. This is |
9836 | // information computed during codegen, or during function prolog generation, like |
9837 | // frame offsets. It must run after main function prolog generation. |
9838 | |
9839 | genCaptureFuncletPrologEpilogInfo(); |
9840 | |
9841 | #endif // FEATURE_EH_FUNCLETS |
9842 | |
9843 | // Walk the list of prologs and epilogs and generate them. |
9844 | // We maintain a list of prolog and epilog basic blocks in |
9845 | // the insGroup structure in the emitter. This list was created |
9846 | // during code generation by the genReserve*() functions. |
9847 | // |
9848 | // TODO: it seems like better design would be to create a list of prologs/epilogs |
9849 | // in the code generator (not the emitter), and then walk that list. But we already |
9850 | // have the insGroup list, which serves well, so we don't need the extra allocations |
9851 | // for a prolog/epilog list in the code generator. |
9852 | |
9853 | getEmitter()->emitGeneratePrologEpilog(); |
9854 | |
9855 | // Tell the emitter we're done with all prolog and epilog generation. |
9856 | |
9857 | getEmitter()->emitFinishPrologEpilogGeneration(); |
9858 | |
9859 | #ifdef DEBUG |
9860 | if (verbose) |
9861 | { |
9862 | printf("*************** After prolog / epilog generation\n" ); |
9863 | getEmitter()->emitDispIGlist(false); |
9864 | } |
9865 | #endif |
9866 | } |
9867 | |
9868 | /* |
9869 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
9870 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
9871 | XX XX |
9872 | XX End Prolog / Epilog XX |
9873 | XX XX |
9874 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
9875 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
9876 | */ |
9877 | |
9878 | #if STACK_PROBES |
9879 | void CodeGen::genGenerateStackProbe() |
9880 | { |
9881 | noway_assert(compiler->opts.compNeedStackProbes); |
9882 | |
9883 | // If this assert fires, it means somebody has changed the value |
9884 | // CORINFO_STACKPROBE_DEPTH. |
9885 | // Why does the EE need such a deep probe? It should just need a couple |
9886 | // of bytes, to set up a frame in the unmanaged code.. |
9887 | |
9888 | static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize()); |
9889 | |
9890 | JITDUMP("Emitting stack probe:\n" ); |
9891 | getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, |
9892 | -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK)); |
9893 | } |
9894 | #endif // STACK_PROBES |
9895 | |
9896 | #if defined(_TARGET_XARCH_) |
9897 | // Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working |
9898 | // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE] |
9899 | // Here offset = 16-byte aligned offset after pushing integer registers. |
9900 | // |
9901 | // Params |
9902 | // lclFrameSize - Fixed frame size excluding callee pushed int regs. |
9903 | // non-funclet: this will be compLclFrameSize. |
9904 | // funclet frames: this will be FuncletInfo.fiSpDelta. |
9905 | void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) |
9906 | { |
9907 | genVzeroupperIfNeeded(false); |
9908 | regMaskTP regMask = compiler->compCalleeFPRegsSavedMask; |
9909 | |
9910 | // Only callee saved floating point registers should be in regMask |
9911 | assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask); |
9912 | |
9913 | // fast path return |
9914 | if (regMask == RBM_NONE) |
9915 | { |
9916 | return; |
9917 | } |
9918 | |
9919 | #ifdef _TARGET_AMD64_ |
9920 | unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0; |
9921 | unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES; |
9922 | |
9923 | // Offset is 16-byte aligned since we use movaps for preserving xmm regs. |
9924 | assert((offset % 16) == 0); |
9925 | instruction copyIns = ins_Copy(TYP_FLOAT); |
9926 | #else // !_TARGET_AMD64_ |
9927 | unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES; |
9928 | instruction copyIns = INS_movupd; |
9929 | #endif // !_TARGET_AMD64_ |
9930 | |
9931 | for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg)) |
9932 | { |
9933 | regMaskTP regBit = genRegMask(reg); |
9934 | if ((regBit & regMask) != 0) |
9935 | { |
9936 | // ABI requires us to preserve lower 128-bits of YMM register. |
9937 | getEmitter()->emitIns_AR_R(copyIns, |
9938 | EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be |
9939 | // EA_16BYTE |
9940 | reg, REG_SPBASE, offset); |
9941 | compiler->unwindSaveReg(reg, offset); |
9942 | regMask &= ~regBit; |
9943 | offset -= XMM_REGSIZE_BYTES; |
9944 | } |
9945 | } |
9946 | } |
9947 | |
9948 | // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working |
9949 | // down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE] |
9950 | // Here offset = 16-byte aligned offset after pushing integer registers. |
9951 | // |
9952 | // Params |
9953 | // lclFrameSize - Fixed frame size excluding callee pushed int regs. |
9954 | // non-funclet: this will be compLclFrameSize. |
9955 | // funclet frames: this will be FuncletInfo.fiSpDelta. |
9956 | void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) |
9957 | { |
9958 | regMaskTP regMask = compiler->compCalleeFPRegsSavedMask; |
9959 | |
9960 | // Only callee saved floating point registers should be in regMask |
9961 | assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask); |
9962 | |
9963 | // fast path return |
9964 | if (regMask == RBM_NONE) |
9965 | { |
9966 | genVzeroupperIfNeeded(); |
9967 | return; |
9968 | } |
9969 | |
9970 | #ifdef _TARGET_AMD64_ |
9971 | unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0; |
9972 | instruction copyIns = ins_Copy(TYP_FLOAT); |
9973 | #else // !_TARGET_AMD64_ |
9974 | unsigned firstFPRegPadding = 0; |
9975 | instruction copyIns = INS_movupd; |
9976 | #endif // !_TARGET_AMD64_ |
9977 | |
9978 | unsigned offset; |
9979 | regNumber regBase; |
9980 | if (compiler->compLocallocUsed) |
9981 | { |
9982 | // localloc frame: use frame pointer relative offset |
9983 | assert(isFramePointerUsed()); |
9984 | regBase = REG_FPBASE; |
9985 | offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES; |
9986 | } |
9987 | else |
9988 | { |
9989 | regBase = REG_SPBASE; |
9990 | offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES; |
9991 | } |
9992 | |
9993 | #ifdef _TARGET_AMD64_ |
9994 | // Offset is 16-byte aligned since we use movaps for restoring xmm regs |
9995 | assert((offset % 16) == 0); |
9996 | #endif // _TARGET_AMD64_ |
9997 | |
9998 | for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg)) |
9999 | { |
10000 | regMaskTP regBit = genRegMask(reg); |
10001 | if ((regBit & regMask) != 0) |
10002 | { |
10003 | // ABI requires us to restore lower 128-bits of YMM register. |
10004 | getEmitter()->emitIns_R_AR(copyIns, |
10005 | EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be |
10006 | // EA_16BYTE |
10007 | reg, regBase, offset); |
10008 | regMask &= ~regBit; |
10009 | offset -= XMM_REGSIZE_BYTES; |
10010 | } |
10011 | } |
10012 | genVzeroupperIfNeeded(); |
10013 | } |
10014 | |
10015 | // Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the |
10016 | // AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs |
10017 | // (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains |
10018 | // 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native |
10019 | // code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog |
10020 | // if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty. |
10021 | // |
10022 | // Params |
10023 | // check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper |
10024 | // instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit). |
10025 | // |
10026 | void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/) |
10027 | { |
10028 | bool emitVzeroUpper = false; |
10029 | if (check256bitOnly) |
10030 | { |
10031 | emitVzeroUpper = getEmitter()->Contains256bitAVX(); |
10032 | } |
10033 | else |
10034 | { |
10035 | emitVzeroUpper = getEmitter()->ContainsAVX(); |
10036 | } |
10037 | |
10038 | if (emitVzeroUpper) |
10039 | { |
10040 | assert(compiler->canUseVexEncoding()); |
10041 | instGen(INS_vzeroupper); |
10042 | } |
10043 | } |
10044 | |
10045 | #endif // defined(_TARGET_XARCH_) |
10046 | |
10047 | //----------------------------------------------------------------------------------- |
10048 | // IsMultiRegReturnedType: Returns true if the type is returned in multiple registers |
10049 | // |
10050 | // Arguments: |
10051 | // hClass - type handle |
10052 | // |
10053 | // Return Value: |
10054 | // true if type is returned in multiple registers, false otherwise. |
10055 | // |
10056 | bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass) |
10057 | { |
10058 | if (hClass == NO_CLASS_HANDLE) |
10059 | { |
10060 | return false; |
10061 | } |
10062 | |
10063 | structPassingKind howToReturnStruct; |
10064 | var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct); |
10065 | |
10066 | return (varTypeIsStruct(returnType)); |
10067 | } |
10068 | |
10069 | //---------------------------------------------- |
10070 | // Methods that support HFA's for ARM32/ARM64 |
10071 | //---------------------------------------------- |
10072 | |
10073 | bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) |
10074 | { |
10075 | #ifdef FEATURE_HFA |
10076 | return varTypeIsFloating(GetHfaType(hClass)); |
10077 | #else |
10078 | return false; |
10079 | #endif |
10080 | } |
10081 | |
10082 | bool Compiler::IsHfa(GenTree* tree) |
10083 | { |
10084 | #ifdef FEATURE_HFA |
10085 | return IsHfa(gtGetStructHandleIfPresent(tree)); |
10086 | #else |
10087 | return false; |
10088 | #endif |
10089 | } |
10090 | |
10091 | var_types Compiler::GetHfaType(GenTree* tree) |
10092 | { |
10093 | #ifdef FEATURE_HFA |
10094 | return GetHfaType(gtGetStructHandleIfPresent(tree)); |
10095 | #else |
10096 | return TYP_UNDEF; |
10097 | #endif |
10098 | } |
10099 | |
10100 | unsigned Compiler::GetHfaCount(GenTree* tree) |
10101 | { |
10102 | return GetHfaCount(gtGetStructHandleIfPresent(tree)); |
10103 | } |
10104 | |
10105 | var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass) |
10106 | { |
10107 | var_types result = TYP_UNDEF; |
10108 | if (hClass != NO_CLASS_HANDLE) |
10109 | { |
10110 | #ifdef FEATURE_HFA |
10111 | CorInfoType corType = info.compCompHnd->getHFAType(hClass); |
10112 | if (corType != CORINFO_TYPE_UNDEF) |
10113 | { |
10114 | result = JITtype2varType(corType); |
10115 | } |
10116 | #endif // FEATURE_HFA |
10117 | } |
10118 | return result; |
10119 | } |
10120 | |
10121 | //------------------------------------------------------------------------ |
10122 | // GetHfaCount: Given a class handle for an HFA struct |
10123 | // return the number of registers needed to hold the HFA |
10124 | // |
10125 | // Note that on ARM32 the single precision registers overlap with |
10126 | // the double precision registers and for that reason each |
10127 | // double register is considered to be two single registers. |
10128 | // Thus for ARM32 an HFA of 4 doubles this function will return 8. |
10129 | // On ARM64 given an HFA of 4 singles or 4 doubles this function will |
10130 | // will return 4 for both. |
10131 | // Arguments: |
10132 | // hClass: the class handle of a HFA struct |
10133 | // |
10134 | unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass) |
10135 | { |
10136 | assert(IsHfa(hClass)); |
10137 | #ifdef _TARGET_ARM_ |
10138 | // A HFA of doubles is twice as large as an HFA of singles for ARM32 |
10139 | // (i.e. uses twice the number of single precison registers) |
10140 | return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES; |
10141 | #else // _TARGET_ARM64_ |
10142 | var_types hfaType = GetHfaType(hClass); |
10143 | unsigned classSize = info.compCompHnd->getClassSize(hClass); |
10144 | // Note that the retail build issues a warning about a potential divsion by zero without the Max function |
10145 | unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType))); |
10146 | return classSize / elemSize; |
10147 | #endif // _TARGET_ARM64_ |
10148 | } |
10149 | |
10150 | #ifdef _TARGET_XARCH_ |
10151 | |
10152 | //------------------------------------------------------------------------ |
10153 | // genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction, |
10154 | // map it to the specific x86/x64 shift opcode for a shift/rotate by a constant. |
10155 | // X86/x64 has a special encoding for shift/rotate-by-constant-1. |
10156 | // |
10157 | // Arguments: |
10158 | // ins: the base shift/rotate instruction |
10159 | // shiftByValue: the constant value by which we are shifting/rotating |
10160 | // |
10161 | instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue) |
10162 | { |
10163 | assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr || |
10164 | ins == INS_sar); |
10165 | |
10166 | // Which format should we use? |
10167 | |
10168 | instruction shiftByConstantIns; |
10169 | |
10170 | if (shiftByValue == 1) |
10171 | { |
10172 | // Use the shift-by-one format. |
10173 | |
10174 | assert(INS_rcl + 1 == INS_rcl_1); |
10175 | assert(INS_rcr + 1 == INS_rcr_1); |
10176 | assert(INS_rol + 1 == INS_rol_1); |
10177 | assert(INS_ror + 1 == INS_ror_1); |
10178 | assert(INS_shl + 1 == INS_shl_1); |
10179 | assert(INS_shr + 1 == INS_shr_1); |
10180 | assert(INS_sar + 1 == INS_sar_1); |
10181 | |
10182 | shiftByConstantIns = (instruction)(ins + 1); |
10183 | } |
10184 | else |
10185 | { |
10186 | // Use the shift-by-NNN format. |
10187 | |
10188 | assert(INS_rcl + 2 == INS_rcl_N); |
10189 | assert(INS_rcr + 2 == INS_rcr_N); |
10190 | assert(INS_rol + 2 == INS_rol_N); |
10191 | assert(INS_ror + 2 == INS_ror_N); |
10192 | assert(INS_shl + 2 == INS_shl_N); |
10193 | assert(INS_shr + 2 == INS_shr_N); |
10194 | assert(INS_sar + 2 == INS_sar_N); |
10195 | |
10196 | shiftByConstantIns = (instruction)(ins + 2); |
10197 | } |
10198 | |
10199 | return shiftByConstantIns; |
10200 | } |
10201 | |
10202 | #endif // _TARGET_XARCH_ |
10203 | |
10204 | //------------------------------------------------------------------------------------------------ // |
10205 | // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame. |
10206 | // |
10207 | // Return value: |
10208 | // The number of the first argument with stack slot on the caller's frame. |
10209 | // |
10210 | // Note: |
10211 | // On x64 Windows the caller always creates slots (homing space) in its frame for the |
10212 | // first 4 arguments of a callee (register passed args). So, the the variable number |
10213 | // (lclNum) for the first argument with a stack slot is always 0. |
10214 | // For System V systems or armarch, there is no such calling convention requirement, and the code |
10215 | // needs to find the first stack passed argument from the caller. This is done by iterating over |
10216 | // all the lvParam variables and finding the first with lvArgReg equals to REG_STK. |
10217 | // |
10218 | unsigned CodeGen::getFirstArgWithStackSlot() |
10219 | { |
10220 | #if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARMARCH_) |
10221 | unsigned baseVarNum = 0; |
10222 | // Iterate over all the lvParam variables in the Lcl var table until we find the first one |
10223 | // that's passed on the stack. |
10224 | LclVarDsc* varDsc = nullptr; |
10225 | for (unsigned i = 0; i < compiler->info.compArgsCount; i++) |
10226 | { |
10227 | varDsc = &(compiler->lvaTable[i]); |
10228 | |
10229 | // We should have found a stack parameter (and broken out of this loop) before |
10230 | // we find any non-parameters. |
10231 | assert(varDsc->lvIsParam); |
10232 | |
10233 | if (varDsc->lvArgReg == REG_STK) |
10234 | { |
10235 | baseVarNum = i; |
10236 | break; |
10237 | } |
10238 | } |
10239 | assert(varDsc != nullptr); |
10240 | |
10241 | return baseVarNum; |
10242 | #elif defined(_TARGET_AMD64_) |
10243 | return 0; |
10244 | #else // _TARGET_X86 |
10245 | // Not implemented for x86. |
10246 | NYI_X86("getFirstArgWithStackSlot not yet implemented for x86." ); |
10247 | return BAD_VAR_NUM; |
10248 | #endif // _TARGET_X86_ |
10249 | } |
10250 | |
10251 | //------------------------------------------------------------------------ |
10252 | // genSinglePush: Report a change in stack level caused by a single word-sized push instruction |
10253 | // |
10254 | void CodeGen::genSinglePush() |
10255 | { |
10256 | AddStackLevel(REGSIZE_BYTES); |
10257 | } |
10258 | |
10259 | //------------------------------------------------------------------------ |
10260 | // genSinglePop: Report a change in stack level caused by a single word-sized pop instruction |
10261 | // |
10262 | void CodeGen::genSinglePop() |
10263 | { |
10264 | SubtractStackLevel(REGSIZE_BYTES); |
10265 | } |
10266 | |
10267 | //------------------------------------------------------------------------ |
10268 | // genPushRegs: Push the given registers. |
10269 | // |
10270 | // Arguments: |
10271 | // regs - mask or registers to push |
10272 | // byrefRegs - OUT arg. Set to byref registers that were pushed. |
10273 | // noRefRegs - OUT arg. Set to non-GC ref registers that were pushed. |
10274 | // |
10275 | // Return Value: |
10276 | // Mask of registers pushed. |
10277 | // |
10278 | // Notes: |
10279 | // This function does not check if the register is marked as used, etc. |
10280 | // |
10281 | regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs) |
10282 | { |
10283 | *byrefRegs = RBM_NONE; |
10284 | *noRefRegs = RBM_NONE; |
10285 | |
10286 | if (regs == RBM_NONE) |
10287 | { |
10288 | return RBM_NONE; |
10289 | } |
10290 | |
10291 | #if FEATURE_FIXED_OUT_ARGS |
10292 | |
10293 | NYI("Don't call genPushRegs with real regs!" ); |
10294 | return RBM_NONE; |
10295 | |
10296 | #else // FEATURE_FIXED_OUT_ARGS |
10297 | |
10298 | noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL)); |
10299 | noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL)); |
10300 | |
10301 | regMaskTP pushedRegs = regs; |
10302 | |
10303 | for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) |
10304 | { |
10305 | regMaskTP regBit = regMaskTP(1) << reg; |
10306 | |
10307 | if ((regBit & regs) == RBM_NONE) |
10308 | continue; |
10309 | |
10310 | var_types type; |
10311 | if (regBit & gcInfo.gcRegGCrefSetCur) |
10312 | { |
10313 | type = TYP_REF; |
10314 | } |
10315 | else if (regBit & gcInfo.gcRegByrefSetCur) |
10316 | { |
10317 | *byrefRegs |= regBit; |
10318 | type = TYP_BYREF; |
10319 | } |
10320 | else if (noRefRegs != NULL) |
10321 | { |
10322 | *noRefRegs |= regBit; |
10323 | type = TYP_I_IMPL; |
10324 | } |
10325 | else |
10326 | { |
10327 | continue; |
10328 | } |
10329 | |
10330 | inst_RV(INS_push, reg, type); |
10331 | |
10332 | genSinglePush(); |
10333 | gcInfo.gcMarkRegSetNpt(regBit); |
10334 | |
10335 | regs &= ~regBit; |
10336 | } |
10337 | |
10338 | return pushedRegs; |
10339 | |
10340 | #endif // FEATURE_FIXED_OUT_ARGS |
10341 | } |
10342 | |
10343 | //------------------------------------------------------------------------ |
10344 | // genPopRegs: Pop the registers that were pushed by genPushRegs(). |
10345 | // |
10346 | // Arguments: |
10347 | // regs - mask of registers to pop |
10348 | // byrefRegs - The byref registers that were pushed by genPushRegs(). |
10349 | // noRefRegs - The non-GC ref registers that were pushed by genPushRegs(). |
10350 | // |
10351 | // Return Value: |
10352 | // None |
10353 | // |
10354 | void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs) |
10355 | { |
10356 | if (regs == RBM_NONE) |
10357 | { |
10358 | return; |
10359 | } |
10360 | |
10361 | #if FEATURE_FIXED_OUT_ARGS |
10362 | |
10363 | NYI("Don't call genPopRegs with real regs!" ); |
10364 | |
10365 | #else // FEATURE_FIXED_OUT_ARGS |
10366 | |
10367 | noway_assert((regs & byrefRegs) == byrefRegs); |
10368 | noway_assert((regs & noRefRegs) == noRefRegs); |
10369 | noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE); |
10370 | |
10371 | noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT)); |
10372 | noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT)); |
10373 | |
10374 | // Walk the registers in the reverse order as genPushRegs() |
10375 | for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) |
10376 | { |
10377 | regMaskTP regBit = regMaskTP(1) << reg; |
10378 | |
10379 | if ((regBit & regs) == RBM_NONE) |
10380 | continue; |
10381 | |
10382 | var_types type; |
10383 | if (regBit & byrefRegs) |
10384 | { |
10385 | type = TYP_BYREF; |
10386 | } |
10387 | else if (regBit & noRefRegs) |
10388 | { |
10389 | type = TYP_INT; |
10390 | } |
10391 | else |
10392 | { |
10393 | type = TYP_REF; |
10394 | } |
10395 | |
10396 | inst_RV(INS_pop, reg, type); |
10397 | genSinglePop(); |
10398 | |
10399 | if (type != TYP_INT) |
10400 | gcInfo.gcMarkRegPtrVal(reg, type); |
10401 | |
10402 | regs &= ~regBit; |
10403 | } |
10404 | |
10405 | #endif // FEATURE_FIXED_OUT_ARGS |
10406 | } |
10407 | |
10408 | /***************************************************************************** |
10409 | * genSetScopeInfo |
10410 | * |
10411 | * This function should be called only after the sizes of the emitter blocks |
10412 | * have been finalized. |
10413 | */ |
10414 | |
10415 | void CodeGen::genSetScopeInfo() |
10416 | { |
10417 | if (!compiler->opts.compScopeInfo) |
10418 | { |
10419 | return; |
10420 | } |
10421 | |
10422 | #ifdef DEBUG |
10423 | if (verbose) |
10424 | { |
10425 | printf("*************** In genSetScopeInfo()\n" ); |
10426 | } |
10427 | #endif |
10428 | |
10429 | if (compiler->info.compVarScopesCount == 0) |
10430 | { |
10431 | compiler->eeSetLVcount(0); |
10432 | compiler->eeSetLVdone(); |
10433 | return; |
10434 | } |
10435 | |
10436 | noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)); |
10437 | noway_assert(psiOpenScopeList.scNext == nullptr); |
10438 | |
10439 | unsigned i; |
10440 | unsigned scopeCnt = siScopeCnt + psiScopeCnt; |
10441 | |
10442 | compiler->eeSetLVcount(scopeCnt); |
10443 | |
10444 | #ifdef DEBUG |
10445 | genTrnslLocalVarCount = scopeCnt; |
10446 | if (scopeCnt) |
10447 | { |
10448 | genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt]; |
10449 | } |
10450 | #endif |
10451 | |
10452 | // Record the scopes found for the parameters over the prolog. |
10453 | // The prolog needs to be treated differently as a variable may not |
10454 | // have the same info in the prolog block as is given by compiler->lvaTable. |
10455 | // eg. A register parameter is actually on the stack, before it is loaded to reg. |
10456 | |
10457 | CodeGen::psiScope* scopeP; |
10458 | |
10459 | for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext) |
10460 | { |
10461 | noway_assert(scopeP != nullptr); |
10462 | noway_assert(scopeP->scStartLoc.Valid()); |
10463 | noway_assert(scopeP->scEndLoc.Valid()); |
10464 | |
10465 | UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter()); |
10466 | UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter()); |
10467 | |
10468 | unsigned varNum = scopeP->scSlotNum; |
10469 | noway_assert(startOffs <= endOffs); |
10470 | |
10471 | // The range may be 0 if the prolog is empty. For such a case, |
10472 | // report the liveness of arguments to span at least the first |
10473 | // instruction in the method. This will be incorrect (except on |
10474 | // entry to the method) if the very first instruction of the method |
10475 | // is part of a loop. However, this should happen |
10476 | // very rarely, and the incorrectness is worth being able to look |
10477 | // at the argument on entry to the method. |
10478 | if (startOffs == endOffs) |
10479 | { |
10480 | noway_assert(startOffs == 0); |
10481 | endOffs++; |
10482 | } |
10483 | |
10484 | Compiler::siVarLoc varLoc; |
10485 | |
10486 | if (scopeP->scRegister) |
10487 | { |
10488 | varLoc.vlType = Compiler::VLT_REG; |
10489 | varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum; |
10490 | } |
10491 | else |
10492 | { |
10493 | varLoc.vlType = Compiler::VLT_STK; |
10494 | varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg; |
10495 | varLoc.vlStk.vlsOffset = scopeP->u2.scOffset; |
10496 | } |
10497 | |
10498 | genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc); |
10499 | } |
10500 | |
10501 | // Record the scopes for the rest of the method. |
10502 | // Check that the LocalVarInfo scopes look OK |
10503 | noway_assert(siOpenScopeList.scNext == nullptr); |
10504 | |
10505 | CodeGen::siScope* scopeL; |
10506 | |
10507 | for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext) |
10508 | { |
10509 | noway_assert(scopeL != nullptr); |
10510 | noway_assert(scopeL->scStartLoc.Valid()); |
10511 | noway_assert(scopeL->scEndLoc.Valid()); |
10512 | |
10513 | // Find the start and end IP |
10514 | |
10515 | UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter()); |
10516 | UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter()); |
10517 | |
10518 | noway_assert(scopeL->scStartLoc != scopeL->scEndLoc); |
10519 | |
10520 | // For stack vars, find the base register, and offset |
10521 | |
10522 | regNumber baseReg; |
10523 | signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs; |
10524 | |
10525 | if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased) |
10526 | { |
10527 | baseReg = REG_SPBASE; |
10528 | offset += scopeL->scStackLevel; |
10529 | } |
10530 | else |
10531 | { |
10532 | baseReg = REG_FPBASE; |
10533 | } |
10534 | |
10535 | // Now fill in the varLoc |
10536 | |
10537 | Compiler::siVarLoc varLoc; |
10538 | |
10539 | // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register |
10540 | // for part of its lifetime, or in different registers for different parts of its lifetime. |
10541 | // This should only matter for non-debug code, where we do variable enregistration. |
10542 | // We should store the ranges of variable enregistration in the scope table. |
10543 | if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg()) |
10544 | { |
10545 | var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet()); |
10546 | switch (type) |
10547 | { |
10548 | case TYP_INT: |
10549 | case TYP_REF: |
10550 | case TYP_BYREF: |
10551 | #ifdef _TARGET_64BIT_ |
10552 | case TYP_LONG: |
10553 | #endif // _TARGET_64BIT_ |
10554 | |
10555 | varLoc.vlType = Compiler::VLT_REG; |
10556 | varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum; |
10557 | break; |
10558 | |
10559 | #ifndef _TARGET_64BIT_ |
10560 | case TYP_LONG: |
10561 | #if !CPU_HAS_FP_SUPPORT |
10562 | case TYP_DOUBLE: |
10563 | #endif |
10564 | |
10565 | if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK) |
10566 | { |
10567 | varLoc.vlType = Compiler::VLT_REG_REG; |
10568 | varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum; |
10569 | varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg; |
10570 | } |
10571 | else |
10572 | { |
10573 | varLoc.vlType = Compiler::VLT_REG_STK; |
10574 | varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum; |
10575 | varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg; |
10576 | if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE) |
10577 | { |
10578 | varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP; |
10579 | } |
10580 | varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int); |
10581 | } |
10582 | break; |
10583 | #endif // !_TARGET_64BIT_ |
10584 | |
10585 | #ifdef _TARGET_64BIT_ |
10586 | |
10587 | case TYP_FLOAT: |
10588 | case TYP_DOUBLE: |
10589 | // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15, |
10590 | // so no XMM registers can get debug information. |
10591 | varLoc.vlType = Compiler::VLT_REG_FP; |
10592 | varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum; |
10593 | break; |
10594 | |
10595 | #else // !_TARGET_64BIT_ |
10596 | |
10597 | #if CPU_HAS_FP_SUPPORT |
10598 | case TYP_FLOAT: |
10599 | case TYP_DOUBLE: |
10600 | if (isFloatRegType(type)) |
10601 | { |
10602 | varLoc.vlType = Compiler::VLT_FPSTK; |
10603 | varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum; |
10604 | } |
10605 | break; |
10606 | #endif // CPU_HAS_FP_SUPPORT |
10607 | |
10608 | #endif // !_TARGET_64BIT_ |
10609 | |
10610 | #ifdef FEATURE_SIMD |
10611 | case TYP_SIMD8: |
10612 | case TYP_SIMD12: |
10613 | case TYP_SIMD16: |
10614 | case TYP_SIMD32: |
10615 | varLoc.vlType = Compiler::VLT_REG_FP; |
10616 | |
10617 | // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15, |
10618 | // so no XMM registers can get debug information. |
10619 | // |
10620 | // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert |
10621 | // in eeDispVar() --> getRegName() that regNumber is valid. |
10622 | varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum; |
10623 | break; |
10624 | #endif // FEATURE_SIMD |
10625 | |
10626 | default: |
10627 | noway_assert(!"Invalid type" ); |
10628 | } |
10629 | } |
10630 | else |
10631 | { |
10632 | assert(offset != BAD_STK_OFFS); |
10633 | LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum; |
10634 | switch (genActualType(varDsc->TypeGet())) |
10635 | { |
10636 | case TYP_INT: |
10637 | case TYP_REF: |
10638 | case TYP_BYREF: |
10639 | case TYP_FLOAT: |
10640 | case TYP_STRUCT: |
10641 | case TYP_BLK: // Needed because of the TYP_BLK stress mode |
10642 | #ifdef FEATURE_SIMD |
10643 | case TYP_SIMD8: |
10644 | case TYP_SIMD12: |
10645 | case TYP_SIMD16: |
10646 | case TYP_SIMD32: |
10647 | #endif |
10648 | #ifdef _TARGET_64BIT_ |
10649 | case TYP_LONG: |
10650 | case TYP_DOUBLE: |
10651 | #endif // _TARGET_64BIT_ |
10652 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) |
10653 | // In the AMD64 ABI we are supposed to pass a struct by reference when its |
10654 | // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies |
10655 | // the IR to comply with the ABI and therefore changes the type of the lclVar |
10656 | // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that |
10657 | // this is still a struct by setting the lvIsTemp flag. |
10658 | // The same is true for ARM64 and structs > 16 bytes. |
10659 | // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail) |
10660 | // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF |
10661 | // to accomodate for this situation. |
10662 | if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp) |
10663 | { |
10664 | assert(varDsc->lvIsParam); |
10665 | varLoc.vlType = Compiler::VLT_STK_BYREF; |
10666 | } |
10667 | else |
10668 | #endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) |
10669 | { |
10670 | varLoc.vlType = Compiler::VLT_STK; |
10671 | } |
10672 | varLoc.vlStk.vlsBaseReg = baseReg; |
10673 | varLoc.vlStk.vlsOffset = offset; |
10674 | if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE) |
10675 | { |
10676 | varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP; |
10677 | } |
10678 | break; |
10679 | |
10680 | #ifndef _TARGET_64BIT_ |
10681 | case TYP_LONG: |
10682 | case TYP_DOUBLE: |
10683 | varLoc.vlType = Compiler::VLT_STK2; |
10684 | varLoc.vlStk2.vls2BaseReg = baseReg; |
10685 | varLoc.vlStk2.vls2Offset = offset; |
10686 | if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE) |
10687 | { |
10688 | varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP; |
10689 | } |
10690 | break; |
10691 | #endif // !_TARGET_64BIT_ |
10692 | |
10693 | default: |
10694 | noway_assert(!"Invalid type" ); |
10695 | } |
10696 | } |
10697 | |
10698 | genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum, |
10699 | scopeL->scAvailable, varLoc); |
10700 | } |
10701 | |
10702 | compiler->eeSetLVdone(); |
10703 | } |
10704 | |
10705 | //------------------------------------------------------------------------ |
10706 | // genSetScopeInfo: Record scope information for debug info |
10707 | // |
10708 | // Arguments: |
10709 | // which |
10710 | // startOffs - the starting offset for this scope |
10711 | // length - the length of this scope |
10712 | // varNum - the lclVar for this scope info |
10713 | // LVnum |
10714 | // avail |
10715 | // varLoc |
10716 | // |
10717 | // Notes: |
10718 | // Called for every scope info piece to record by the main genSetScopeInfo() |
10719 | |
10720 | void CodeGen::genSetScopeInfo(unsigned which, |
10721 | UNATIVE_OFFSET startOffs, |
10722 | UNATIVE_OFFSET length, |
10723 | unsigned varNum, |
10724 | unsigned LVnum, |
10725 | bool avail, |
10726 | Compiler::siVarLoc& varLoc) |
10727 | { |
10728 | // We need to do some mapping while reporting back these variables. |
10729 | |
10730 | unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); |
10731 | noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); |
10732 | |
10733 | #ifdef _TARGET_X86_ |
10734 | // Non-x86 platforms are allowed to access all arguments directly |
10735 | // so we don't need this code. |
10736 | |
10737 | // Is this a varargs function? |
10738 | |
10739 | if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg && |
10740 | varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg) |
10741 | { |
10742 | noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2); |
10743 | |
10744 | // All stack arguments (except the varargs handle) have to be |
10745 | // accessed via the varargs cookie. Discard generated info, |
10746 | // and just find its position relative to the varargs handle |
10747 | |
10748 | PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount); |
10749 | if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame) |
10750 | { |
10751 | noway_assert(!compiler->opts.compDbgCode); |
10752 | return; |
10753 | } |
10754 | |
10755 | // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for |
10756 | // arguments of vararg functions to avoid reporting them to GC. |
10757 | noway_assert(!compiler->lvaTable[varNum].lvRegister); |
10758 | unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs; |
10759 | unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs; |
10760 | |
10761 | noway_assert(cookieOffset < varOffset); |
10762 | unsigned offset = varOffset - cookieOffset; |
10763 | unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; |
10764 | noway_assert(offset < stkArgSize); |
10765 | offset = stkArgSize - offset; |
10766 | |
10767 | varLoc.vlType = Compiler::VLT_FIXED_VA; |
10768 | varLoc.vlFixedVarArg.vlfvOffset = offset; |
10769 | } |
10770 | |
10771 | #endif // _TARGET_X86_ |
10772 | |
10773 | VarName name = nullptr; |
10774 | |
10775 | #ifdef DEBUG |
10776 | |
10777 | for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) |
10778 | { |
10779 | if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) |
10780 | { |
10781 | name = compiler->info.compVarScopes[scopeNum].vsdName; |
10782 | } |
10783 | } |
10784 | |
10785 | // Hang on to this compiler->info. |
10786 | |
10787 | TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; |
10788 | |
10789 | tlvi.tlviVarNum = ilVarNum; |
10790 | tlvi.tlviLVnum = LVnum; |
10791 | tlvi.tlviName = name; |
10792 | tlvi.tlviStartPC = startOffs; |
10793 | tlvi.tlviLength = length; |
10794 | tlvi.tlviAvailable = avail; |
10795 | tlvi.tlviVarLoc = varLoc; |
10796 | |
10797 | #endif // DEBUG |
10798 | |
10799 | compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); |
10800 | } |
10801 | |
10802 | /*****************************************************************************/ |
10803 | #ifdef LATE_DISASM |
10804 | #if defined(DEBUG) |
10805 | /***************************************************************************** |
10806 | * CompilerRegName |
10807 | * |
10808 | * Can be called only after lviSetLocalVarInfo() has been called |
10809 | */ |
10810 | |
10811 | /* virtual */ |
10812 | const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg) |
10813 | { |
10814 | if (!compiler->opts.compScopeInfo) |
10815 | return nullptr; |
10816 | |
10817 | if (compiler->info.compVarScopesCount == 0) |
10818 | return nullptr; |
10819 | |
10820 | noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo); |
10821 | |
10822 | for (unsigned i = 0; i < genTrnslLocalVarCount; i++) |
10823 | { |
10824 | if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) && |
10825 | (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) && |
10826 | (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs)) |
10827 | { |
10828 | return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL; |
10829 | } |
10830 | } |
10831 | |
10832 | return NULL; |
10833 | } |
10834 | |
10835 | /***************************************************************************** |
10836 | * CompilerStkName |
10837 | * |
10838 | * Can be called only after lviSetLocalVarInfo() has been called |
10839 | */ |
10840 | |
10841 | /* virtual */ |
10842 | const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs) |
10843 | { |
10844 | if (!compiler->opts.compScopeInfo) |
10845 | return nullptr; |
10846 | |
10847 | if (compiler->info.compVarScopesCount == 0) |
10848 | return nullptr; |
10849 | |
10850 | noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo); |
10851 | |
10852 | for (unsigned i = 0; i < genTrnslLocalVarCount; i++) |
10853 | { |
10854 | if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) && |
10855 | (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) && |
10856 | (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs)) |
10857 | { |
10858 | return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL; |
10859 | } |
10860 | } |
10861 | |
10862 | return NULL; |
10863 | } |
10864 | |
10865 | /*****************************************************************************/ |
10866 | #endif // defined(DEBUG) |
10867 | #endif // LATE_DISASM |
10868 | |
10869 | #ifdef DEBUG |
10870 | |
10871 | /***************************************************************************** |
10872 | * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number. |
10873 | */ |
10874 | |
10875 | void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping) |
10876 | { |
10877 | if (mappingNum != unsigned(-1)) |
10878 | { |
10879 | printf("%d: " , mappingNum); |
10880 | } |
10881 | |
10882 | IL_OFFSETX offsx = ipMapping->ipmdILoffsx; |
10883 | |
10884 | if (offsx == BAD_IL_OFFSET) |
10885 | { |
10886 | printf("???" ); |
10887 | } |
10888 | else |
10889 | { |
10890 | Compiler::eeDispILOffs(jitGetILoffsAny(offsx)); |
10891 | |
10892 | if (jitIsStackEmpty(offsx)) |
10893 | { |
10894 | printf(" STACK_EMPTY" ); |
10895 | } |
10896 | |
10897 | if (jitIsCallInstruction(offsx)) |
10898 | { |
10899 | printf(" CALL_INSTRUCTION" ); |
10900 | } |
10901 | } |
10902 | |
10903 | printf(" " ); |
10904 | ipMapping->ipmdNativeLoc.Print(); |
10905 | // We can only call this after code generation. Is there any way to tell when it's legal to call? |
10906 | // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter())); |
10907 | |
10908 | if (ipMapping->ipmdIsLabel) |
10909 | { |
10910 | printf(" label" ); |
10911 | } |
10912 | |
10913 | printf("\n" ); |
10914 | } |
10915 | |
10916 | void CodeGen::genIPmappingListDisp() |
10917 | { |
10918 | unsigned mappingNum = 0; |
10919 | Compiler::IPmappingDsc* ipMapping; |
10920 | |
10921 | for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext) |
10922 | { |
10923 | genIPmappingDisp(mappingNum, ipMapping); |
10924 | ++mappingNum; |
10925 | } |
10926 | } |
10927 | |
10928 | #endif // DEBUG |
10929 | |
10930 | /***************************************************************************** |
10931 | * |
10932 | * Append an IPmappingDsc struct to the list that we're maintaining |
10933 | * for the debugger. |
10934 | * Record the instr offset as being at the current code gen position. |
10935 | */ |
10936 | |
10937 | void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel) |
10938 | { |
10939 | if (!compiler->opts.compDbgInfo) |
10940 | { |
10941 | return; |
10942 | } |
10943 | |
10944 | assert(offsx != BAD_IL_OFFSET); |
10945 | |
10946 | switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed. |
10947 | { |
10948 | case ICorDebugInfo::PROLOG: |
10949 | case ICorDebugInfo::EPILOG: |
10950 | break; |
10951 | |
10952 | default: |
10953 | |
10954 | if (offsx != ICorDebugInfo::NO_MAPPING) |
10955 | { |
10956 | noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize); |
10957 | } |
10958 | |
10959 | // Ignore this one if it's the same IL offset as the last one we saw. |
10960 | // Note that we'll let through two identical IL offsets if the flag bits |
10961 | // differ, or two identical "special" mappings (e.g., PROLOG). |
10962 | if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx)) |
10963 | { |
10964 | JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n" , offsx); |
10965 | return; |
10966 | } |
10967 | break; |
10968 | } |
10969 | |
10970 | /* Create a mapping entry and append it to the list */ |
10971 | |
10972 | Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1); |
10973 | addMapping->ipmdNativeLoc.CaptureLocation(getEmitter()); |
10974 | addMapping->ipmdILoffsx = offsx; |
10975 | addMapping->ipmdIsLabel = isLabel; |
10976 | addMapping->ipmdNext = nullptr; |
10977 | |
10978 | if (compiler->genIPmappingList != nullptr) |
10979 | { |
10980 | assert(compiler->genIPmappingLast != nullptr); |
10981 | assert(compiler->genIPmappingLast->ipmdNext == nullptr); |
10982 | compiler->genIPmappingLast->ipmdNext = addMapping; |
10983 | } |
10984 | else |
10985 | { |
10986 | assert(compiler->genIPmappingLast == nullptr); |
10987 | compiler->genIPmappingList = addMapping; |
10988 | } |
10989 | |
10990 | compiler->genIPmappingLast = addMapping; |
10991 | |
10992 | #ifdef DEBUG |
10993 | if (verbose) |
10994 | { |
10995 | printf("Added IP mapping: " ); |
10996 | genIPmappingDisp(unsigned(-1), addMapping); |
10997 | } |
10998 | #endif // DEBUG |
10999 | } |
11000 | |
11001 | /***************************************************************************** |
11002 | * |
11003 | * Prepend an IPmappingDsc struct to the list that we're maintaining |
11004 | * for the debugger. |
11005 | * Record the instr offset as being at the current code gen position. |
11006 | */ |
11007 | void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx) |
11008 | { |
11009 | if (!compiler->opts.compDbgInfo) |
11010 | { |
11011 | return; |
11012 | } |
11013 | |
11014 | assert(offsx != BAD_IL_OFFSET); |
11015 | assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation. |
11016 | |
11017 | switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed. |
11018 | { |
11019 | case ICorDebugInfo::NO_MAPPING: |
11020 | case ICorDebugInfo::PROLOG: |
11021 | case ICorDebugInfo::EPILOG: |
11022 | break; |
11023 | |
11024 | default: |
11025 | noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize); |
11026 | break; |
11027 | } |
11028 | |
11029 | /* Create a mapping entry and prepend it to the list */ |
11030 | |
11031 | Compiler::IPmappingDsc* addMapping = compiler->getAllocator(CMK_DebugInfo).allocate<Compiler::IPmappingDsc>(1); |
11032 | addMapping->ipmdNativeLoc.CaptureLocation(getEmitter()); |
11033 | addMapping->ipmdILoffsx = offsx; |
11034 | addMapping->ipmdIsLabel = true; |
11035 | addMapping->ipmdNext = nullptr; |
11036 | |
11037 | addMapping->ipmdNext = compiler->genIPmappingList; |
11038 | compiler->genIPmappingList = addMapping; |
11039 | |
11040 | if (compiler->genIPmappingLast == nullptr) |
11041 | { |
11042 | compiler->genIPmappingLast = addMapping; |
11043 | } |
11044 | |
11045 | #ifdef DEBUG |
11046 | if (verbose) |
11047 | { |
11048 | printf("Added IP mapping to front: " ); |
11049 | genIPmappingDisp(unsigned(-1), addMapping); |
11050 | } |
11051 | #endif // DEBUG |
11052 | } |
11053 | |
11054 | /*****************************************************************************/ |
11055 | |
11056 | C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET)); |
11057 | C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET)); |
11058 | C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET)); |
11059 | |
11060 | C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET); |
11061 | C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET); |
11062 | C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET); |
11063 | C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET); |
11064 | |
11065 | //------------------------------------------------------------------------ |
11066 | // jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type. |
11067 | // Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING) |
11068 | // is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET. |
11069 | // |
11070 | // Arguments: |
11071 | // offsx - the IL_OFFSETX value with the IL offset to extract. |
11072 | // |
11073 | // Return Value: |
11074 | // The IL offset. |
11075 | |
11076 | IL_OFFSET jitGetILoffs(IL_OFFSETX offsx) |
11077 | { |
11078 | assert(offsx != BAD_IL_OFFSET); |
11079 | |
11080 | switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed. |
11081 | { |
11082 | case ICorDebugInfo::NO_MAPPING: |
11083 | case ICorDebugInfo::PROLOG: |
11084 | case ICorDebugInfo::EPILOG: |
11085 | unreached(); |
11086 | |
11087 | default: |
11088 | return IL_OFFSET(offsx & ~IL_OFFSETX_BITS); |
11089 | } |
11090 | } |
11091 | |
11092 | //------------------------------------------------------------------------ |
11093 | // jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo |
11094 | // distinguished values. Asserts if passed BAD_IL_OFFSET. |
11095 | // |
11096 | // Arguments: |
11097 | // offsx - the IL_OFFSETX value with the IL offset to extract. |
11098 | // |
11099 | // Return Value: |
11100 | // The IL offset. |
11101 | |
11102 | IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx) |
11103 | { |
11104 | assert(offsx != BAD_IL_OFFSET); |
11105 | |
11106 | switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed. |
11107 | { |
11108 | case ICorDebugInfo::NO_MAPPING: |
11109 | case ICorDebugInfo::PROLOG: |
11110 | case ICorDebugInfo::EPILOG: |
11111 | return IL_OFFSET(offsx); |
11112 | |
11113 | default: |
11114 | return IL_OFFSET(offsx & ~IL_OFFSETX_BITS); |
11115 | } |
11116 | } |
11117 | |
11118 | //------------------------------------------------------------------------ |
11119 | // jitIsStackEmpty: Does the IL offset have the stack empty bit set? |
11120 | // Asserts if passed BAD_IL_OFFSET. |
11121 | // |
11122 | // Arguments: |
11123 | // offsx - the IL_OFFSETX value to check |
11124 | // |
11125 | // Return Value: |
11126 | // 'true' if the stack empty bit is set; 'false' otherwise. |
11127 | |
11128 | bool jitIsStackEmpty(IL_OFFSETX offsx) |
11129 | { |
11130 | assert(offsx != BAD_IL_OFFSET); |
11131 | |
11132 | switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed. |
11133 | { |
11134 | case ICorDebugInfo::NO_MAPPING: |
11135 | case ICorDebugInfo::PROLOG: |
11136 | case ICorDebugInfo::EPILOG: |
11137 | return true; |
11138 | |
11139 | default: |
11140 | return (offsx & IL_OFFSETX_STKBIT) == 0; |
11141 | } |
11142 | } |
11143 | |
11144 | //------------------------------------------------------------------------ |
11145 | // jitIsCallInstruction: Does the IL offset have the call instruction bit set? |
11146 | // Asserts if passed BAD_IL_OFFSET. |
11147 | // |
11148 | // Arguments: |
11149 | // offsx - the IL_OFFSETX value to check |
11150 | // |
11151 | // Return Value: |
11152 | // 'true' if the call instruction bit is set; 'false' otherwise. |
11153 | |
11154 | bool jitIsCallInstruction(IL_OFFSETX offsx) |
11155 | { |
11156 | assert(offsx != BAD_IL_OFFSET); |
11157 | |
11158 | switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed. |
11159 | { |
11160 | case ICorDebugInfo::NO_MAPPING: |
11161 | case ICorDebugInfo::PROLOG: |
11162 | case ICorDebugInfo::EPILOG: |
11163 | return false; |
11164 | |
11165 | default: |
11166 | return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0; |
11167 | } |
11168 | } |
11169 | |
11170 | /*****************************************************************************/ |
11171 | |
11172 | void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx) |
11173 | { |
11174 | if (!compiler->opts.compDbgCode) |
11175 | { |
11176 | return; |
11177 | } |
11178 | |
11179 | if (offsx == BAD_IL_OFFSET) |
11180 | { |
11181 | return; |
11182 | } |
11183 | |
11184 | /* If other IL were offsets reported, skip */ |
11185 | |
11186 | if (compiler->genIPmappingLast == nullptr) |
11187 | { |
11188 | return; |
11189 | } |
11190 | |
11191 | if (compiler->genIPmappingLast->ipmdILoffsx != offsx) |
11192 | { |
11193 | return; |
11194 | } |
11195 | |
11196 | /* offsx was the last reported offset. Make sure that we generated native code */ |
11197 | |
11198 | if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter())) |
11199 | { |
11200 | instGen(INS_nop); |
11201 | } |
11202 | } |
11203 | |
11204 | /***************************************************************************** |
11205 | * |
11206 | * Shut down the IP-mapping logic, report the info to the EE. |
11207 | */ |
11208 | |
11209 | void CodeGen::genIPmappingGen() |
11210 | { |
11211 | if (!compiler->opts.compDbgInfo) |
11212 | { |
11213 | return; |
11214 | } |
11215 | |
11216 | #ifdef DEBUG |
11217 | if (verbose) |
11218 | { |
11219 | printf("*************** In genIPmappingGen()\n" ); |
11220 | } |
11221 | #endif |
11222 | |
11223 | if (compiler->genIPmappingList == nullptr) |
11224 | { |
11225 | compiler->eeSetLIcount(0); |
11226 | compiler->eeSetLIdone(); |
11227 | return; |
11228 | } |
11229 | |
11230 | Compiler::IPmappingDsc* tmpMapping; |
11231 | Compiler::IPmappingDsc* prevMapping; |
11232 | unsigned mappingCnt; |
11233 | UNATIVE_OFFSET lastNativeOfs; |
11234 | |
11235 | /* First count the number of distinct mapping records */ |
11236 | |
11237 | mappingCnt = 0; |
11238 | lastNativeOfs = UNATIVE_OFFSET(~0); |
11239 | |
11240 | for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; |
11241 | tmpMapping = tmpMapping->ipmdNext) |
11242 | { |
11243 | IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx; |
11244 | |
11245 | // Managed RetVal - since new sequence points are emitted to identify IL calls, |
11246 | // make sure that those are not filtered and do not interfere with filtering of |
11247 | // other sequence points. |
11248 | if (jitIsCallInstruction(srcIP)) |
11249 | { |
11250 | mappingCnt++; |
11251 | continue; |
11252 | } |
11253 | |
11254 | UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter()); |
11255 | |
11256 | if (nextNativeOfs != lastNativeOfs) |
11257 | { |
11258 | mappingCnt++; |
11259 | lastNativeOfs = nextNativeOfs; |
11260 | prevMapping = tmpMapping; |
11261 | continue; |
11262 | } |
11263 | |
11264 | /* If there are mappings with the same native offset, then: |
11265 | o If one of them is NO_MAPPING, ignore it |
11266 | o If one of them is a label, report that and ignore the other one |
11267 | o Else report the higher IL offset |
11268 | */ |
11269 | |
11270 | PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true |
11271 | if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING) |
11272 | { |
11273 | // If the previous entry was NO_MAPPING, ignore it |
11274 | prevMapping->ipmdNativeLoc.Init(); |
11275 | prevMapping = tmpMapping; |
11276 | } |
11277 | else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING) |
11278 | { |
11279 | // If the current entry is NO_MAPPING, ignore it |
11280 | // Leave prevMapping unchanged as tmpMapping is no longer valid |
11281 | tmpMapping->ipmdNativeLoc.Init(); |
11282 | } |
11283 | else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0) |
11284 | { |
11285 | // counting for special cases: see below |
11286 | mappingCnt++; |
11287 | prevMapping = tmpMapping; |
11288 | } |
11289 | else |
11290 | { |
11291 | noway_assert(prevMapping != nullptr); |
11292 | noway_assert(!prevMapping->ipmdNativeLoc.Valid() || |
11293 | lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter())); |
11294 | |
11295 | /* The previous block had the same native offset. We have to |
11296 | discard one of the mappings. Simply reinitialize ipmdNativeLoc |
11297 | and prevMapping will be ignored later. */ |
11298 | |
11299 | if (prevMapping->ipmdIsLabel) |
11300 | { |
11301 | // Leave prevMapping unchanged as tmpMapping is no longer valid |
11302 | tmpMapping->ipmdNativeLoc.Init(); |
11303 | } |
11304 | else |
11305 | { |
11306 | prevMapping->ipmdNativeLoc.Init(); |
11307 | prevMapping = tmpMapping; |
11308 | } |
11309 | } |
11310 | } |
11311 | |
11312 | /* Tell them how many mapping records we've got */ |
11313 | |
11314 | compiler->eeSetLIcount(mappingCnt); |
11315 | |
11316 | /* Now tell them about the mappings */ |
11317 | |
11318 | mappingCnt = 0; |
11319 | lastNativeOfs = UNATIVE_OFFSET(~0); |
11320 | |
11321 | for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext) |
11322 | { |
11323 | // Do we have to skip this record ? |
11324 | if (!tmpMapping->ipmdNativeLoc.Valid()) |
11325 | { |
11326 | continue; |
11327 | } |
11328 | |
11329 | UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter()); |
11330 | IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx; |
11331 | |
11332 | if (jitIsCallInstruction(srcIP)) |
11333 | { |
11334 | compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true); |
11335 | } |
11336 | else if (nextNativeOfs != lastNativeOfs) |
11337 | { |
11338 | compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false); |
11339 | lastNativeOfs = nextNativeOfs; |
11340 | } |
11341 | else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0) |
11342 | { |
11343 | // For the special case of an IL instruction with no body |
11344 | // followed by the epilog (say ret void immediately preceding |
11345 | // the method end), we put two entries in, so that we'll stop |
11346 | // at the (empty) ret statement if the user tries to put a |
11347 | // breakpoint there, and then have the option of seeing the |
11348 | // epilog or not based on SetUnmappedStopMask for the stepper. |
11349 | compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false); |
11350 | } |
11351 | } |
11352 | |
11353 | #if 0 |
11354 | // TODO-Review: |
11355 | //This check is disabled. It is always true that any time this check asserts, the debugger would have a |
11356 | //problem with IL source level debugging. However, for a C# file, it only matters if things are on |
11357 | //different source lines. As a result, we have all sorts of latent problems with how we emit debug |
11358 | //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this |
11359 | //assert back on. |
11360 | if (compiler->opts.compDbgCode) |
11361 | { |
11362 | //Assert that the first instruction of every basic block with more than one incoming edge has a |
11363 | //different sequence point from each incoming block. |
11364 | // |
11365 | //It turns out that the only thing we really have to assert is that the first statement in each basic |
11366 | //block has an IL offset and appears in eeBoundaries. |
11367 | for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) |
11368 | { |
11369 | if ((block->bbRefs > 1) && (block->bbTreeList != nullptr)) |
11370 | { |
11371 | noway_assert(block->bbTreeList->gtOper == GT_STMT); |
11372 | bool found = false; |
11373 | if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET) |
11374 | { |
11375 | IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx); |
11376 | for (unsigned i = 0; i < eeBoundariesCount; ++i) |
11377 | { |
11378 | if (eeBoundaries[i].ilOffset == ilOffs) |
11379 | { |
11380 | found = true; |
11381 | break; |
11382 | } |
11383 | } |
11384 | } |
11385 | noway_assert(found && "A basic block that is a jump target did not start a new sequence point." ); |
11386 | } |
11387 | } |
11388 | } |
11389 | #endif // 0 |
11390 | |
11391 | compiler->eeSetLIdone(); |
11392 | } |
11393 | |
11394 | /*============================================================================ |
11395 | * |
11396 | * These are empty stubs to help the late dis-assembler to compile |
11397 | * if the late disassembler is being built into a non-DEBUG build. |
11398 | * |
11399 | *============================================================================ |
11400 | */ |
11401 | |
11402 | #if defined(LATE_DISASM) |
11403 | #if !defined(DEBUG) |
11404 | |
11405 | /* virtual */ |
11406 | const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg) |
11407 | { |
11408 | return NULL; |
11409 | } |
11410 | |
11411 | /* virtual */ |
11412 | const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs) |
11413 | { |
11414 | return NULL; |
11415 | } |
11416 | |
11417 | /*****************************************************************************/ |
11418 | #endif // !defined(DEBUG) |
11419 | #endif // defined(LATE_DISASM) |
11420 | /*****************************************************************************/ |
11421 | |
11422 | //------------------------------------------------------------------------ |
11423 | // indirForm: Make a temporary indir we can feed to pattern matching routines |
11424 | // in cases where we don't want to instantiate all the indirs that happen. |
11425 | // |
11426 | GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base) |
11427 | { |
11428 | GenTreeIndir i(GT_IND, type, base, nullptr); |
11429 | i.gtRegNum = REG_NA; |
11430 | i.SetContained(); |
11431 | return i; |
11432 | } |
11433 | |
11434 | //------------------------------------------------------------------------ |
11435 | // intForm: Make a temporary int we can feed to pattern matching routines |
11436 | // in cases where we don't want to instantiate. |
11437 | // |
11438 | GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) |
11439 | { |
11440 | GenTreeIntCon i(type, value); |
11441 | i.gtRegNum = REG_NA; |
11442 | return i; |
11443 | } |
11444 | |
11445 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
11446 | //------------------------------------------------------------------------ |
11447 | // genLongReturn: Generates code for long return statement for x86 and arm. |
11448 | // |
11449 | // Note: treeNode's and op1's registers are already consumed. |
11450 | // |
11451 | // Arguments: |
11452 | // treeNode - The GT_RETURN or GT_RETFILT tree node with LONG return type. |
11453 | // |
11454 | // Return Value: |
11455 | // None |
11456 | // |
11457 | void CodeGen::genLongReturn(GenTree* treeNode) |
11458 | { |
11459 | assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); |
11460 | assert(treeNode->TypeGet() == TYP_LONG); |
11461 | GenTree* op1 = treeNode->gtGetOp1(); |
11462 | var_types targetType = treeNode->TypeGet(); |
11463 | |
11464 | assert(op1 != nullptr); |
11465 | assert(op1->OperGet() == GT_LONG); |
11466 | GenTree* loRetVal = op1->gtGetOp1(); |
11467 | GenTree* hiRetVal = op1->gtGetOp2(); |
11468 | assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA)); |
11469 | |
11470 | genConsumeReg(loRetVal); |
11471 | genConsumeReg(hiRetVal); |
11472 | if (loRetVal->gtRegNum != REG_LNGRET_LO) |
11473 | { |
11474 | inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT); |
11475 | } |
11476 | if (hiRetVal->gtRegNum != REG_LNGRET_HI) |
11477 | { |
11478 | inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT); |
11479 | } |
11480 | } |
11481 | #endif // _TARGET_X86_ || _TARGET_ARM_ |
11482 | |
11483 | //------------------------------------------------------------------------ |
11484 | // genReturn: Generates code for return statement. |
11485 | // In case of struct return, delegates to the genStructReturn method. |
11486 | // |
11487 | // Arguments: |
11488 | // treeNode - The GT_RETURN or GT_RETFILT tree node. |
11489 | // |
11490 | // Return Value: |
11491 | // None |
11492 | // |
11493 | void CodeGen::genReturn(GenTree* treeNode) |
11494 | { |
11495 | assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); |
11496 | GenTree* op1 = treeNode->gtGetOp1(); |
11497 | var_types targetType = treeNode->TypeGet(); |
11498 | |
11499 | // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return |
11500 | // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the |
11501 | // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined". |
11502 | assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT)); |
11503 | |
11504 | #ifdef DEBUG |
11505 | if (targetType == TYP_VOID) |
11506 | { |
11507 | assert(op1 == nullptr); |
11508 | } |
11509 | #endif // DEBUG |
11510 | |
11511 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
11512 | if (targetType == TYP_LONG) |
11513 | { |
11514 | genLongReturn(treeNode); |
11515 | } |
11516 | else |
11517 | #endif // _TARGET_X86_ || _TARGET_ARM_ |
11518 | { |
11519 | if (isStructReturn(treeNode)) |
11520 | { |
11521 | genStructReturn(treeNode); |
11522 | } |
11523 | else if (targetType != TYP_VOID) |
11524 | { |
11525 | assert(op1 != nullptr); |
11526 | noway_assert(op1->gtRegNum != REG_NA); |
11527 | |
11528 | // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has |
11529 | // consumed a reg for the operand. This is because the variable |
11530 | // is dead after return. But we are issuing more instructions |
11531 | // like "profiler leave callback" after this consumption. So |
11532 | // if you are issuing more instructions after this point, |
11533 | // remember to keep the variable live up until the new method |
11534 | // exit point where it is actually dead. |
11535 | genConsumeReg(op1); |
11536 | |
11537 | #if defined(_TARGET_ARM64_) |
11538 | genSimpleReturn(treeNode); |
11539 | #else // !_TARGET_ARM64_ |
11540 | #if defined(_TARGET_X86_) |
11541 | if (varTypeIsFloating(treeNode)) |
11542 | { |
11543 | genFloatReturn(treeNode); |
11544 | } |
11545 | else |
11546 | #elif defined(_TARGET_ARM_) |
11547 | if (varTypeIsFloating(treeNode) && (compiler->opts.compUseSoftFP || compiler->info.compIsVarArgs)) |
11548 | { |
11549 | if (targetType == TYP_FLOAT) |
11550 | { |
11551 | getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum); |
11552 | } |
11553 | else |
11554 | { |
11555 | assert(targetType == TYP_DOUBLE); |
11556 | getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, REG_INTRET, REG_NEXT(REG_INTRET), |
11557 | op1->gtRegNum); |
11558 | } |
11559 | } |
11560 | else |
11561 | #endif // _TARGET_ARM_ |
11562 | { |
11563 | regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; |
11564 | if (op1->gtRegNum != retReg) |
11565 | { |
11566 | inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType); |
11567 | } |
11568 | } |
11569 | #endif // !_TARGET_ARM64_ |
11570 | } |
11571 | } |
11572 | |
11573 | #ifdef PROFILING_SUPPORTED |
11574 | // !! Note !! |
11575 | // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs |
11576 | // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp |
11577 | // in the handling of the GT_RETURN statement. |
11578 | // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt |
11579 | // for the return registers containing GC refs. |
11580 | |
11581 | // There will be a single return block while generating profiler ELT callbacks. |
11582 | // |
11583 | // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN: |
11584 | // In flowgraph and other places assert that the last node of a block marked as |
11585 | // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to |
11586 | // maintain such an invariant irrespective of whether profiler hook needed or not. |
11587 | // Also, there is not much to be gained by materializing it as an explicit node. |
11588 | if (compiler->compCurBB == compiler->genReturnBB) |
11589 | { |
11590 | // !! NOTE !! |
11591 | // Since we are invalidating the assumption that we would slip into the epilog |
11592 | // right after the "return", we need to preserve the return reg's GC state |
11593 | // across the call until actual method return. |
11594 | ReturnTypeDesc retTypeDesc; |
11595 | unsigned regCount = 0; |
11596 | if (compiler->compMethodReturnsMultiRegRetType()) |
11597 | { |
11598 | if (varTypeIsLong(compiler->info.compRetNativeType)) |
11599 | { |
11600 | retTypeDesc.InitializeLongReturnType(compiler); |
11601 | } |
11602 | else // we must have a struct return type |
11603 | { |
11604 | retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass); |
11605 | } |
11606 | regCount = retTypeDesc.GetReturnRegCount(); |
11607 | } |
11608 | |
11609 | if (varTypeIsGC(compiler->info.compRetType)) |
11610 | { |
11611 | gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType); |
11612 | } |
11613 | else if (compiler->compMethodReturnsMultiRegRetType()) |
11614 | { |
11615 | for (unsigned i = 0; i < regCount; ++i) |
11616 | { |
11617 | if (varTypeIsGC(retTypeDesc.GetReturnRegType(i))) |
11618 | { |
11619 | gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i)); |
11620 | } |
11621 | } |
11622 | } |
11623 | |
11624 | genProfilingLeaveCallback(); |
11625 | |
11626 | if (varTypeIsGC(compiler->info.compRetType)) |
11627 | { |
11628 | gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET)); |
11629 | } |
11630 | else if (compiler->compMethodReturnsMultiRegRetType()) |
11631 | { |
11632 | for (unsigned i = 0; i < regCount; ++i) |
11633 | { |
11634 | if (varTypeIsGC(retTypeDesc.GetReturnRegType(i))) |
11635 | { |
11636 | gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i))); |
11637 | } |
11638 | } |
11639 | } |
11640 | } |
11641 | #endif // PROFILING_SUPPORTED |
11642 | |
11643 | #if defined(DEBUG) && defined(_TARGET_XARCH_) |
11644 | bool doStackPointerCheck = compiler->opts.compStackCheckOnRet; |
11645 | |
11646 | #if FEATURE_EH_FUNCLETS |
11647 | // Don't do stack pointer check at the return from a funclet; only for the main function. |
11648 | if (compiler->funCurrentFunc()->funKind != FUNC_ROOT) |
11649 | { |
11650 | doStackPointerCheck = false; |
11651 | } |
11652 | #else // !FEATURE_EH_FUNCLETS |
11653 | // Don't generate stack checks for x86 finally/filter EH returns: these are not invoked |
11654 | // with the same SP as the main function. See also CodeGen::genEHFinallyOrFilterRet(). |
11655 | if ((compiler->compCurBB->bbJumpKind == BBJ_EHFINALLYRET) || (compiler->compCurBB->bbJumpKind == BBJ_EHFILTERRET)) |
11656 | { |
11657 | doStackPointerCheck = false; |
11658 | } |
11659 | #endif // !FEATURE_EH_FUNCLETS |
11660 | |
11661 | genStackPointerCheck(doStackPointerCheck, compiler->lvaReturnSpCheck); |
11662 | #endif // defined(DEBUG) && defined(_TARGET_XARCH_) |
11663 | } |
11664 | |
11665 | #if defined(DEBUG) && defined(_TARGET_XARCH_) |
11666 | |
11667 | //------------------------------------------------------------------------ |
11668 | // genStackPointerCheck: Generate code to check the stack pointer against a saved value. |
11669 | // This is a debug check. |
11670 | // |
11671 | // Arguments: |
11672 | // doStackPointerCheck - If true, do the stack pointer check, otherwise do nothing. |
11673 | // lvaStackPointerVar - The local variable number that holds the value of the stack pointer |
11674 | // we are comparing against. |
11675 | // |
11676 | // Return Value: |
11677 | // None |
11678 | // |
11679 | void CodeGen::genStackPointerCheck(bool doStackPointerCheck, unsigned lvaStackPointerVar) |
11680 | { |
11681 | if (doStackPointerCheck) |
11682 | { |
11683 | noway_assert(lvaStackPointerVar != 0xCCCCCCCC && compiler->lvaTable[lvaStackPointerVar].lvDoNotEnregister && |
11684 | compiler->lvaTable[lvaStackPointerVar].lvOnFrame); |
11685 | getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, lvaStackPointerVar, 0); |
11686 | |
11687 | BasicBlock* sp_check = genCreateTempLabel(); |
11688 | getEmitter()->emitIns_J(INS_je, sp_check); |
11689 | instGen(INS_BREAKPOINT); |
11690 | genDefineTempLabel(sp_check); |
11691 | } |
11692 | } |
11693 | |
11694 | #endif // defined(DEBUG) && defined(_TARGET_XARCH_) |
11695 | |