1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Lower XX |
9 | XX XX |
10 | XX Preconditions: XX |
11 | XX XX |
12 | XX Postconditions (for the nodes currently handled): XX |
13 | XX - All operands requiring a register are explicit in the graph XX |
14 | XX XX |
15 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
16 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
17 | */ |
18 | |
19 | #include "jitpch.h" |
20 | #ifdef _MSC_VER |
21 | #pragma hdrstop |
22 | #endif |
23 | |
24 | #include "lower.h" |
25 | |
26 | #if !defined(_TARGET_64BIT_) |
27 | #include "decomposelongs.h" |
28 | #endif // !defined(_TARGET_64BIT_) |
29 | |
30 | //------------------------------------------------------------------------ |
31 | // MakeSrcContained: Make "childNode" a contained node |
32 | // |
33 | // Arguments: |
34 | // parentNode - is a non-leaf node that can contain its 'childNode' |
35 | // childNode - is an op that will now be contained by its parent. |
36 | // |
37 | // Notes: |
38 | // If 'childNode' it has any existing sources, they will now be sources for the parent. |
39 | // |
40 | void Lowering::MakeSrcContained(GenTree* parentNode, GenTree* childNode) |
41 | { |
42 | assert(!parentNode->OperIsLeaf()); |
43 | assert(childNode->canBeContained()); |
44 | childNode->SetContained(); |
45 | assert(childNode->isContained()); |
46 | } |
47 | |
48 | //------------------------------------------------------------------------ |
49 | // CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate |
50 | // and, if so, makes it contained. |
51 | // |
52 | // Arguments: |
53 | // parentNode - is any non-leaf node |
54 | // childNode - is an child op of 'parentNode' |
55 | // |
56 | // Return value: |
57 | // true if we are able to make childNode a contained immediate |
58 | // |
59 | bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode) |
60 | { |
61 | assert(!parentNode->OperIsLeaf()); |
62 | // If childNode is a containable immediate |
63 | if (IsContainableImmed(parentNode, childNode)) |
64 | { |
65 | // then make it contained within the parentNode |
66 | MakeSrcContained(parentNode, childNode); |
67 | return true; |
68 | } |
69 | return false; |
70 | } |
71 | |
72 | //------------------------------------------------------------------------ |
73 | // IsSafeToContainMem: Checks for conflicts between childNode and parentNode, |
74 | // and returns 'true' iff memory operand childNode can be contained in parentNode. |
75 | // |
76 | // Arguments: |
77 | // parentNode - any non-leaf node |
78 | // childNode - some node that is an input to `parentNode` |
79 | // |
80 | // Return value: |
81 | // true if it is safe to make childNode a contained memory operand. |
82 | // |
83 | bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode) |
84 | { |
85 | m_scratchSideEffects.Clear(); |
86 | m_scratchSideEffects.AddNode(comp, childNode); |
87 | |
88 | for (GenTree* node = childNode->gtNext; node != parentNode; node = node->gtNext) |
89 | { |
90 | if (m_scratchSideEffects.InterferesWith(comp, node, false)) |
91 | { |
92 | return false; |
93 | } |
94 | } |
95 | |
96 | return true; |
97 | } |
98 | |
99 | //------------------------------------------------------------------------ |
100 | |
101 | // This is the main entry point for Lowering. |
102 | GenTree* Lowering::LowerNode(GenTree* node) |
103 | { |
104 | assert(node != nullptr); |
105 | switch (node->gtOper) |
106 | { |
107 | case GT_IND: |
108 | TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true); |
109 | ContainCheckIndir(node->AsIndir()); |
110 | break; |
111 | |
112 | case GT_STOREIND: |
113 | TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true); |
114 | if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(node)) |
115 | { |
116 | LowerStoreIndir(node->AsIndir()); |
117 | } |
118 | break; |
119 | |
120 | case GT_ADD: |
121 | { |
122 | GenTree* afterTransform = LowerAdd(node); |
123 | if (afterTransform != nullptr) |
124 | { |
125 | return afterTransform; |
126 | } |
127 | __fallthrough; |
128 | } |
129 | |
130 | #if !defined(_TARGET_64BIT_) |
131 | case GT_ADD_LO: |
132 | case GT_ADD_HI: |
133 | case GT_SUB_LO: |
134 | case GT_SUB_HI: |
135 | #endif |
136 | case GT_SUB: |
137 | case GT_AND: |
138 | case GT_OR: |
139 | case GT_XOR: |
140 | ContainCheckBinary(node->AsOp()); |
141 | break; |
142 | |
143 | case GT_MUL: |
144 | case GT_MULHI: |
145 | #if defined(_TARGET_X86_) |
146 | case GT_MUL_LONG: |
147 | #endif |
148 | ContainCheckMul(node->AsOp()); |
149 | break; |
150 | |
151 | case GT_UDIV: |
152 | case GT_UMOD: |
153 | if (!LowerUnsignedDivOrMod(node->AsOp())) |
154 | { |
155 | ContainCheckDivOrMod(node->AsOp()); |
156 | } |
157 | break; |
158 | |
159 | case GT_DIV: |
160 | case GT_MOD: |
161 | return LowerSignedDivOrMod(node); |
162 | |
163 | case GT_SWITCH: |
164 | return LowerSwitch(node); |
165 | |
166 | case GT_CALL: |
167 | LowerCall(node); |
168 | break; |
169 | |
170 | case GT_LT: |
171 | case GT_LE: |
172 | case GT_GT: |
173 | case GT_GE: |
174 | case GT_EQ: |
175 | case GT_NE: |
176 | case GT_TEST_EQ: |
177 | case GT_TEST_NE: |
178 | case GT_CMP: |
179 | return LowerCompare(node); |
180 | |
181 | case GT_JTRUE: |
182 | return LowerJTrue(node->AsOp()); |
183 | |
184 | case GT_JMP: |
185 | LowerJmpMethod(node); |
186 | break; |
187 | |
188 | case GT_RETURN: |
189 | LowerRet(node); |
190 | break; |
191 | |
192 | case GT_RETURNTRAP: |
193 | ContainCheckReturnTrap(node->AsOp()); |
194 | break; |
195 | |
196 | case GT_CAST: |
197 | LowerCast(node); |
198 | break; |
199 | |
200 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
201 | case GT_ARR_BOUNDS_CHECK: |
202 | #ifdef FEATURE_SIMD |
203 | case GT_SIMD_CHK: |
204 | #endif // FEATURE_SIMD |
205 | #ifdef FEATURE_HW_INTRINSICS |
206 | case GT_HW_INTRINSIC_CHK: |
207 | #endif // FEATURE_HW_INTRINSICS |
208 | ContainCheckBoundsChk(node->AsBoundsChk()); |
209 | break; |
210 | #endif // _TARGET_XARCH_ |
211 | case GT_ARR_ELEM: |
212 | return LowerArrElem(node); |
213 | |
214 | case GT_ARR_OFFSET: |
215 | ContainCheckArrOffset(node->AsArrOffs()); |
216 | break; |
217 | |
218 | case GT_ROL: |
219 | case GT_ROR: |
220 | LowerRotate(node); |
221 | break; |
222 | |
223 | #ifndef _TARGET_64BIT_ |
224 | case GT_LSH_HI: |
225 | case GT_RSH_LO: |
226 | ContainCheckShiftRotate(node->AsOp()); |
227 | break; |
228 | #endif // !_TARGET_64BIT_ |
229 | |
230 | case GT_LSH: |
231 | case GT_RSH: |
232 | case GT_RSZ: |
233 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
234 | LowerShift(node->AsOp()); |
235 | #else |
236 | ContainCheckShiftRotate(node->AsOp()); |
237 | #endif |
238 | break; |
239 | |
240 | case GT_STORE_BLK: |
241 | case GT_STORE_OBJ: |
242 | case GT_STORE_DYN_BLK: |
243 | { |
244 | GenTreeBlk* blkNode = node->AsBlk(); |
245 | TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false); |
246 | LowerBlockStore(blkNode); |
247 | } |
248 | break; |
249 | |
250 | case GT_LCLHEAP: |
251 | ContainCheckLclHeap(node->AsOp()); |
252 | break; |
253 | |
254 | #ifdef _TARGET_XARCH_ |
255 | case GT_INTRINSIC: |
256 | ContainCheckIntrinsic(node->AsOp()); |
257 | break; |
258 | #endif // _TARGET_XARCH_ |
259 | |
260 | #ifdef FEATURE_SIMD |
261 | case GT_SIMD: |
262 | LowerSIMD(node->AsSIMD()); |
263 | break; |
264 | #endif // FEATURE_SIMD |
265 | |
266 | #ifdef FEATURE_HW_INTRINSICS |
267 | case GT_HWIntrinsic: |
268 | LowerHWIntrinsic(node->AsHWIntrinsic()); |
269 | break; |
270 | #endif // FEATURE_HW_INTRINSICS |
271 | |
272 | case GT_LCL_FLD: |
273 | { |
274 | // We should only encounter this for lclVars that are lvDoNotEnregister. |
275 | verifyLclFldDoNotEnregister(node->AsLclVarCommon()->gtLclNum); |
276 | break; |
277 | } |
278 | |
279 | case GT_LCL_VAR: |
280 | WidenSIMD12IfNecessary(node->AsLclVarCommon()); |
281 | break; |
282 | |
283 | case GT_STORE_LCL_VAR: |
284 | WidenSIMD12IfNecessary(node->AsLclVarCommon()); |
285 | __fallthrough; |
286 | |
287 | case GT_STORE_LCL_FLD: |
288 | { |
289 | #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD) |
290 | GenTreeLclVarCommon* const store = node->AsLclVarCommon(); |
291 | if ((store->TypeGet() == TYP_SIMD8) != (store->gtOp1->TypeGet() == TYP_SIMD8)) |
292 | { |
293 | GenTreeUnOp* bitcast = |
294 | new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, store->TypeGet(), store->gtOp1, nullptr); |
295 | store->gtOp1 = bitcast; |
296 | BlockRange().InsertBefore(store, bitcast); |
297 | } |
298 | #endif // _TARGET_AMD64_ |
299 | // TODO-1stClassStructs: Once we remove the requirement that all struct stores |
300 | // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local |
301 | // store under a block store if codegen will require it. |
302 | if ((node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI)) |
303 | { |
304 | #if FEATURE_MULTIREG_RET |
305 | GenTree* src = node->gtGetOp1(); |
306 | assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal()); |
307 | #else // !FEATURE_MULTIREG_RET |
308 | assert(!"Unexpected struct local store in Lowering" ); |
309 | #endif // !FEATURE_MULTIREG_RET |
310 | } |
311 | LowerStoreLoc(node->AsLclVarCommon()); |
312 | break; |
313 | } |
314 | |
315 | #if defined(_TARGET_ARM64_) |
316 | case GT_CMPXCHG: |
317 | CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand); |
318 | break; |
319 | |
320 | case GT_XADD: |
321 | CheckImmedAndMakeContained(node, node->gtOp.gtOp2); |
322 | break; |
323 | #elif defined(_TARGET_XARCH_) |
324 | case GT_XADD: |
325 | if (node->IsUnusedValue()) |
326 | { |
327 | node->ClearUnusedValue(); |
328 | // Make sure the types are identical, since the node type is changed to VOID |
329 | // CodeGen relies on op2's type to determine the instruction size. |
330 | // Note that the node type cannot be a small int but the data operand can. |
331 | assert(genActualType(node->gtGetOp2()->TypeGet()) == node->TypeGet()); |
332 | node->SetOper(GT_LOCKADD); |
333 | node->gtType = TYP_VOID; |
334 | CheckImmedAndMakeContained(node, node->gtGetOp2()); |
335 | } |
336 | break; |
337 | #endif |
338 | |
339 | #ifndef _TARGET_ARMARCH_ |
340 | // TODO-ARMARCH-CQ: We should contain this as long as the offset fits. |
341 | case GT_OBJ: |
342 | if (node->AsObj()->Addr()->OperIsLocalAddr()) |
343 | { |
344 | node->AsObj()->Addr()->SetContained(); |
345 | } |
346 | break; |
347 | #endif // !_TARGET_ARMARCH_ |
348 | |
349 | default: |
350 | break; |
351 | } |
352 | |
353 | return node->gtNext; |
354 | } |
355 | |
356 | /** -- Switch Lowering -- |
357 | * The main idea of switch lowering is to keep transparency of the register requirements of this node |
358 | * downstream in LSRA. Given that the switch instruction is inherently a control statement which in the JIT |
359 | * is represented as a simple tree node, at the time we actually generate code for it we end up |
360 | * generating instructions that actually modify the flow of execution that imposes complicated |
361 | * register requirement and lifetimes. |
362 | * |
363 | * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually |
364 | * means and more importantly, which and when do we need a register for each instruction we want to issue |
365 | * to correctly allocate them downstream. |
366 | * |
367 | * For this purpose, this procedure performs switch lowering in two different ways: |
368 | * |
369 | * a) Represent the switch statement as a zero-index jump table construct. This means that for every destination |
370 | * of the switch, we will store this destination in an array of addresses and the code generator will issue |
371 | * a data section where this array will live and will emit code that based on the switch index, will indirect and |
372 | * jump to the destination specified in the jump table. |
373 | * |
374 | * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch |
375 | * node for jump table based switches. |
376 | * The overall structure of a GT_SWITCH_TABLE is: |
377 | * |
378 | * GT_SWITCH_TABLE |
379 | * |_________ localVar (a temporary local that holds the switch index) |
380 | * |_________ jumpTable (this is a special node that holds the address of the jump table array) |
381 | * |
382 | * Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following: |
383 | * |
384 | * Input: GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH) |
385 | * |_____ expr (an arbitrarily complex GT_NODE that represents the switch index) |
386 | * |
387 | * This gets transformed into the following statements inside a BBJ_COND basic block (the target would be |
388 | * the default case of the switch in case the conditional is evaluated to true). |
389 | * |
390 | * ----- original block, transformed |
391 | * GT_STORE_LCL_VAR tempLocal (a new temporary local variable used to store the switch index) |
392 | * |_____ expr (the index expression) |
393 | * |
394 | * GT_JTRUE |
395 | * |_____ GT_COND |
396 | * |_____ GT_GE |
397 | * |___ Int_Constant (This constant is the index of the default case |
398 | * that happens to be the highest index in the jump table). |
399 | * |___ tempLocal (The local variable were we stored the index expression). |
400 | * |
401 | * ----- new basic block |
402 | * GT_SWITCH_TABLE |
403 | * |_____ tempLocal |
404 | * |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly |
405 | * and LinearCodeGen will be responsible to generate downstream). |
406 | * |
407 | * This way there are no implicit temporaries. |
408 | * |
409 | * b) For small-sized switches, we will actually morph them into a series of conditionals of the form |
410 | * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case } |
411 | * (For the default case conditional, we'll be constructing the exact same code as the jump table case one). |
412 | * else if (case == firstCase){ goto jumpTable[1]; } |
413 | * else if (case == secondCase) { goto jumptable[2]; } and so on. |
414 | * |
415 | * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer |
416 | * require internal temporaries to maintain the index we're evaluating plus we're using existing code from |
417 | * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and |
418 | * InstrGroups downstream. |
419 | */ |
420 | |
421 | GenTree* Lowering::LowerSwitch(GenTree* node) |
422 | { |
423 | unsigned jumpCnt; |
424 | unsigned targetCnt; |
425 | BasicBlock** jumpTab; |
426 | |
427 | assert(node->gtOper == GT_SWITCH); |
428 | |
429 | // The first step is to build the default case conditional construct that is |
430 | // shared between both kinds of expansion of the switch node. |
431 | |
432 | // To avoid confusion, we'll alias m_block to originalSwitchBB |
433 | // that represents the node we're morphing. |
434 | BasicBlock* originalSwitchBB = m_block; |
435 | LIR::Range& switchBBRange = LIR::AsRange(originalSwitchBB); |
436 | |
437 | // jumpCnt is the number of elements in the jump table array. |
438 | // jumpTab is the actual pointer to the jump table array. |
439 | // targetCnt is the number of unique targets in the jump table array. |
440 | jumpCnt = originalSwitchBB->bbJumpSwt->bbsCount; |
441 | jumpTab = originalSwitchBB->bbJumpSwt->bbsDstTab; |
442 | targetCnt = originalSwitchBB->NumSucc(comp); |
443 | |
444 | // GT_SWITCH must be a top-level node with no use. |
445 | #ifdef DEBUG |
446 | { |
447 | LIR::Use use; |
448 | assert(!switchBBRange.TryGetUse(node, &use)); |
449 | } |
450 | #endif |
451 | |
452 | JITDUMP("Lowering switch " FMT_BB ", %d cases\n" , originalSwitchBB->bbNum, jumpCnt); |
453 | |
454 | // Handle a degenerate case: if the switch has only a default case, just convert it |
455 | // to an unconditional branch. This should only happen in minopts or with debuggable |
456 | // code. |
457 | if (targetCnt == 1) |
458 | { |
459 | JITDUMP("Lowering switch " FMT_BB ": single target; converting to BBJ_ALWAYS\n" , originalSwitchBB->bbNum); |
460 | noway_assert(comp->opts.OptimizationDisabled()); |
461 | if (originalSwitchBB->bbNext == jumpTab[0]) |
462 | { |
463 | originalSwitchBB->bbJumpKind = BBJ_NONE; |
464 | originalSwitchBB->bbJumpDest = nullptr; |
465 | } |
466 | else |
467 | { |
468 | originalSwitchBB->bbJumpKind = BBJ_ALWAYS; |
469 | originalSwitchBB->bbJumpDest = jumpTab[0]; |
470 | } |
471 | // Remove extra predecessor links if there was more than one case. |
472 | for (unsigned i = 1; i < jumpCnt; ++i) |
473 | { |
474 | (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB); |
475 | } |
476 | |
477 | // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign |
478 | // the result of the child subtree to a temp. |
479 | GenTree* rhs = node->gtOp.gtOp1; |
480 | |
481 | unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable" )); |
482 | comp->lvaTable[lclNum].lvType = rhs->TypeGet(); |
483 | |
484 | GenTreeLclVar* store = |
485 | new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET); |
486 | store->gtOp1 = rhs; |
487 | store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK); |
488 | store->gtFlags |= GTF_VAR_DEF; |
489 | |
490 | switchBBRange.InsertAfter(node, store); |
491 | switchBBRange.Remove(node); |
492 | |
493 | return store; |
494 | } |
495 | |
496 | noway_assert(jumpCnt >= 2); |
497 | |
498 | // Spill the argument to the switch node into a local so that it can be used later. |
499 | unsigned blockWeight = originalSwitchBB->getBBWeight(comp); |
500 | |
501 | LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node); |
502 | ReplaceWithLclVar(use); |
503 | |
504 | // GT_SWITCH(indexExpression) is now two statements: |
505 | // 1. a statement containing 'asg' (for temp = indexExpression) |
506 | // 2. and a statement with GT_SWITCH(temp) |
507 | |
508 | assert(node->gtOper == GT_SWITCH); |
509 | GenTree* temp = node->gtOp.gtOp1; |
510 | assert(temp->gtOper == GT_LCL_VAR); |
511 | unsigned tempLclNum = temp->gtLclVarCommon.gtLclNum; |
512 | LclVarDsc* tempVarDsc = comp->lvaTable + tempLclNum; |
513 | var_types tempLclType = temp->TypeGet(); |
514 | |
515 | BasicBlock* defaultBB = jumpTab[jumpCnt - 1]; |
516 | BasicBlock* followingBB = originalSwitchBB->bbNext; |
517 | |
518 | /* Is the number of cases right for a test and jump switch? */ |
519 | const bool fFirstCaseFollows = (followingBB == jumpTab[0]); |
520 | const bool fDefaultFollows = (followingBB == defaultBB); |
521 | |
522 | unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc |
523 | |
524 | // This means really just a single cmp/jcc (aka a simple if/else) |
525 | if (fFirstCaseFollows || fDefaultFollows) |
526 | { |
527 | minSwitchTabJumpCnt++; |
528 | } |
529 | |
530 | #if defined(_TARGET_ARM_) |
531 | // On ARM for small switch tables we will |
532 | // generate a sequence of compare and branch instructions |
533 | // because the code to load the base of the switch |
534 | // table is huge and hideous due to the relocation... :( |
535 | minSwitchTabJumpCnt += 2; |
536 | #endif // _TARGET_ARM_ |
537 | |
538 | // Once we have the temporary variable, we construct the conditional branch for |
539 | // the default case. As stated above, this conditional is being shared between |
540 | // both GT_SWITCH lowering code paths. |
541 | // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; } |
542 | GenTree* gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType), |
543 | comp->gtNewIconNode(jumpCnt - 2, genActualType(tempLclType))); |
544 | |
545 | // Make sure we perform an unsigned comparison, just in case the switch index in 'temp' |
546 | // is now less than zero 0 (that would also hit the default case). |
547 | gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED; |
548 | |
549 | GenTree* gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond); |
550 | gtDefaultCaseJump->gtFlags = node->gtFlags; |
551 | |
552 | LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump); |
553 | switchBBRange.InsertAtEnd(std::move(condRange)); |
554 | |
555 | BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode()); |
556 | |
557 | // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor. |
558 | // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock |
559 | // representing the fall-through flow from originalSwitchBB. |
560 | assert(originalSwitchBB->bbJumpKind == BBJ_NONE); |
561 | assert(originalSwitchBB->bbNext == afterDefaultCondBlock); |
562 | assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH); |
563 | assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault); |
564 | assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet. |
565 | |
566 | // The GT_SWITCH code is still in originalSwitchBB (it will be removed later). |
567 | |
568 | // Turn originalSwitchBB into a BBJ_COND. |
569 | originalSwitchBB->bbJumpKind = BBJ_COND; |
570 | originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1]; |
571 | |
572 | // Fix the pred for the default case: the default block target still has originalSwitchBB |
573 | // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point |
574 | // to afterDefaultCondBlock. |
575 | flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock); |
576 | comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge); |
577 | |
578 | bool useJumpSequence = jumpCnt < minSwitchTabJumpCnt; |
579 | |
580 | #if defined(_TARGET_UNIX_) && defined(_TARGET_ARM_) |
581 | // Force using an inlined jumping instead switch table generation. |
582 | // Switch jump table is generated with incorrect values in CoreRT case, |
583 | // so any large switch will crash after loading to PC any such value. |
584 | // I think this is due to the fact that we use absolute addressing |
585 | // instead of relative. But in CoreRT is used as a rule relative |
586 | // addressing when we generate an executable. |
587 | // See also https://github.com/dotnet/coreclr/issues/13194 |
588 | // Also https://github.com/dotnet/coreclr/pull/13197 |
589 | useJumpSequence = useJumpSequence || comp->IsTargetAbi(CORINFO_CORERT_ABI); |
590 | #endif // defined(_TARGET_UNIX_) && defined(_TARGET_ARM_) |
591 | |
592 | // If we originally had 2 unique successors, check to see whether there is a unique |
593 | // non-default case, in which case we can eliminate the switch altogether. |
594 | // Note that the single unique successor case is handled above. |
595 | BasicBlock* uniqueSucc = nullptr; |
596 | if (targetCnt == 2) |
597 | { |
598 | uniqueSucc = jumpTab[0]; |
599 | noway_assert(jumpCnt >= 2); |
600 | for (unsigned i = 1; i < jumpCnt - 1; i++) |
601 | { |
602 | if (jumpTab[i] != uniqueSucc) |
603 | { |
604 | uniqueSucc = nullptr; |
605 | break; |
606 | } |
607 | } |
608 | } |
609 | if (uniqueSucc != nullptr) |
610 | { |
611 | // If the unique successor immediately follows this block, we have nothing to do - |
612 | // it will simply fall-through after we remove the switch, below. |
613 | // Otherwise, make this a BBJ_ALWAYS. |
614 | // Now, fixup the predecessor links to uniqueSucc. In the original jumpTab: |
615 | // jumpTab[i-1] was the default target, which we handled above, |
616 | // jumpTab[0] is the first target, and we'll leave that predecessor link. |
617 | // Remove any additional predecessor links to uniqueSucc. |
618 | for (unsigned i = 1; i < jumpCnt - 1; ++i) |
619 | { |
620 | assert(jumpTab[i] == uniqueSucc); |
621 | (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock); |
622 | } |
623 | if (afterDefaultCondBlock->bbNext == uniqueSucc) |
624 | { |
625 | afterDefaultCondBlock->bbJumpKind = BBJ_NONE; |
626 | afterDefaultCondBlock->bbJumpDest = nullptr; |
627 | } |
628 | else |
629 | { |
630 | afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS; |
631 | afterDefaultCondBlock->bbJumpDest = uniqueSucc; |
632 | } |
633 | } |
634 | // If the number of possible destinations is small enough, we proceed to expand the switch |
635 | // into a series of conditional branches, otherwise we follow the jump table based switch |
636 | // transformation. |
637 | else if (useJumpSequence || comp->compStressCompile(Compiler::STRESS_SWITCH_CMP_BR_EXPANSION, 50)) |
638 | { |
639 | // Lower the switch into a series of compare and branch IR trees. |
640 | // |
641 | // In this case we will morph the node in the following way: |
642 | // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.) |
643 | // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain |
644 | // a statement that is responsible for performing a comparison of the table index and conditional |
645 | // branch if equal. |
646 | |
647 | JITDUMP("Lowering switch " FMT_BB ": using compare/branch expansion\n" , originalSwitchBB->bbNum); |
648 | |
649 | // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new |
650 | // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through), |
651 | // we'll delete it. |
652 | bool fUsedAfterDefaultCondBlock = false; |
653 | BasicBlock* currentBlock = afterDefaultCondBlock; |
654 | LIR::Range* currentBBRange = &LIR::AsRange(currentBlock); |
655 | |
656 | // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through. |
657 | // If no case target follows, the last one doesn't need to be a compare/branch: it can be an |
658 | // unconditional branch. |
659 | bool fAnyTargetFollows = false; |
660 | for (unsigned i = 0; i < jumpCnt - 1; ++i) |
661 | { |
662 | assert(currentBlock != nullptr); |
663 | |
664 | // Remove the switch from the predecessor list of this case target's block. |
665 | // We'll add the proper new predecessor edge later. |
666 | flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock); |
667 | |
668 | if (jumpTab[i] == followingBB) |
669 | { |
670 | // This case label follows the switch; let it fall through. |
671 | fAnyTargetFollows = true; |
672 | continue; |
673 | } |
674 | |
675 | // We need a block to put in the new compare and/or branch. |
676 | // If we haven't used the afterDefaultCondBlock yet, then use that. |
677 | if (fUsedAfterDefaultCondBlock) |
678 | { |
679 | BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true); |
680 | comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor. |
681 | currentBlock = newBlock; |
682 | currentBBRange = &LIR::AsRange(currentBlock); |
683 | } |
684 | else |
685 | { |
686 | assert(currentBlock == afterDefaultCondBlock); |
687 | fUsedAfterDefaultCondBlock = true; |
688 | } |
689 | |
690 | // We're going to have a branch, either a conditional or unconditional, |
691 | // to the target. Set the target. |
692 | currentBlock->bbJumpDest = jumpTab[i]; |
693 | |
694 | // Wire up the predecessor list for the "branch" case. |
695 | comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge); |
696 | |
697 | if (!fAnyTargetFollows && (i == jumpCnt - 2)) |
698 | { |
699 | // We're processing the last one, and there is no fall through from any case |
700 | // to the following block, so we can use an unconditional branch to the final |
701 | // case: there is no need to compare against the case index, since it's |
702 | // guaranteed to be taken (since the default case was handled first, above). |
703 | |
704 | currentBlock->bbJumpKind = BBJ_ALWAYS; |
705 | } |
706 | else |
707 | { |
708 | // Otherwise, it's a conditional branch. Set the branch kind, then add the |
709 | // condition statement. |
710 | currentBlock->bbJumpKind = BBJ_COND; |
711 | |
712 | // Now, build the conditional statement for the current case that is |
713 | // being evaluated: |
714 | // GT_JTRUE |
715 | // |__ GT_COND |
716 | // |____GT_EQ |
717 | // |____ (switchIndex) (The temp variable) |
718 | // |____ (ICon) (The actual case constant) |
719 | GenTree* gtCaseCond = comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType), |
720 | comp->gtNewIconNode(i, tempLclType)); |
721 | GenTree* gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond); |
722 | LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch); |
723 | currentBBRange->InsertAtEnd(std::move(caseRange)); |
724 | } |
725 | } |
726 | |
727 | if (fAnyTargetFollows) |
728 | { |
729 | // There is a fall-through to the following block. In the loop |
730 | // above, we deleted all the predecessor edges from the switch. |
731 | // In this case, we need to add one back. |
732 | comp->fgAddRefPred(currentBlock->bbNext, currentBlock); |
733 | } |
734 | |
735 | if (!fUsedAfterDefaultCondBlock) |
736 | { |
737 | // All the cases were fall-through! We don't need this block. |
738 | // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag |
739 | // so fgRemoveBlock() doesn't complain. |
740 | JITDUMP("Lowering switch " FMT_BB ": all switch cases were fall-through\n" , originalSwitchBB->bbNum); |
741 | assert(currentBlock == afterDefaultCondBlock); |
742 | assert(currentBlock->bbJumpKind == BBJ_SWITCH); |
743 | currentBlock->bbJumpKind = BBJ_NONE; |
744 | currentBlock->bbFlags &= ~BBF_DONT_REMOVE; |
745 | comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block. |
746 | } |
747 | } |
748 | else |
749 | { |
750 | // At this point the default case has already been handled and we need to generate a jump |
751 | // table based switch or a bit test based switch at the end of afterDefaultCondBlock. Both |
752 | // switch variants need the switch value so create the necessary LclVar node here. |
753 | GenTree* switchValue = comp->gtNewLclvNode(tempLclNum, tempLclType); |
754 | LIR::Range& switchBlockRange = LIR::AsRange(afterDefaultCondBlock); |
755 | switchBlockRange.InsertAtEnd(switchValue); |
756 | |
757 | // Try generating a bit test based switch first, |
758 | // if that's not possible a jump table based switch will be generated. |
759 | if (!TryLowerSwitchToBitTest(jumpTab, jumpCnt, targetCnt, afterDefaultCondBlock, switchValue)) |
760 | { |
761 | JITDUMP("Lowering switch " FMT_BB ": using jump table expansion\n" , originalSwitchBB->bbNum); |
762 | |
763 | #ifdef _TARGET_64BIT_ |
764 | if (tempLclType != TYP_I_IMPL) |
765 | { |
766 | // SWITCH_TABLE expects the switch value (the index into the jump table) to be TYP_I_IMPL. |
767 | // Note that the switch value is unsigned so the cast should be unsigned as well. |
768 | switchValue = comp->gtNewCastNode(TYP_I_IMPL, switchValue, true, TYP_U_IMPL); |
769 | switchBlockRange.InsertAtEnd(switchValue); |
770 | } |
771 | #endif |
772 | |
773 | GenTree* switchTable = comp->gtNewJmpTableNode(); |
774 | GenTree* switchJump = comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, switchValue, switchTable); |
775 | switchBlockRange.InsertAfter(switchValue, switchTable, switchJump); |
776 | |
777 | // this block no longer branches to the default block |
778 | afterDefaultCondBlock->bbJumpSwt->removeDefault(); |
779 | } |
780 | |
781 | comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock); |
782 | } |
783 | |
784 | GenTree* next = node->gtNext; |
785 | |
786 | // Get rid of the GT_SWITCH(temp). |
787 | switchBBRange.Remove(node->gtOp.gtOp1); |
788 | switchBBRange.Remove(node); |
789 | |
790 | return next; |
791 | } |
792 | |
793 | //------------------------------------------------------------------------ |
794 | // TryLowerSwitchToBitTest: Attempts to transform a jump table switch into a bit test. |
795 | // |
796 | // Arguments: |
797 | // jumpTable - The jump table |
798 | // jumpCount - The number of blocks in the jump table |
799 | // targetCount - The number of distinct blocks in the jump table |
800 | // bbSwitch - The switch block |
801 | // switchValue - A LclVar node that provides the switch value |
802 | // |
803 | // Return value: |
804 | // true if the switch has been lowered to a bit test |
805 | // |
806 | // Notes: |
807 | // If the jump table contains less than 32 (64 on 64 bit targets) entries and there |
808 | // are at most 2 distinct jump targets then the jump table can be converted to a word |
809 | // of bits where a 0 bit corresponds to one jump target and a 1 bit corresponds to the |
810 | // other jump target. Instead of the indirect jump a BT-JCC sequence is used to jump |
811 | // to the appropriate target: |
812 | // mov eax, 245 ; jump table converted to a "bit table" |
813 | // bt eax, ebx ; ebx is supposed to contain the switch value |
814 | // jc target1 |
815 | // target0: |
816 | // ... |
817 | // target1: |
818 | // Such code is both shorter and faster (in part due to the removal of a memory load) |
819 | // than the traditional jump table base code. And of course, it also avoids the need |
820 | // to emit the jump table itself that can reach up to 256 bytes (for 64 entries). |
821 | // |
822 | bool Lowering::TryLowerSwitchToBitTest( |
823 | BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue) |
824 | { |
825 | #ifndef _TARGET_XARCH_ |
826 | // Other architectures may use this if they substitute GT_BT with equivalent code. |
827 | return false; |
828 | #else |
829 | assert(jumpCount >= 2); |
830 | assert(targetCount >= 2); |
831 | assert(bbSwitch->bbJumpKind == BBJ_SWITCH); |
832 | assert(switchValue->OperIs(GT_LCL_VAR)); |
833 | |
834 | // |
835 | // Quick check to see if it's worth going through the jump table. The bit test switch supports |
836 | // up to 2 targets but targetCount also includes the default block so we need to allow 3 targets. |
837 | // We'll ensure that there are only 2 targets when building the bit table. |
838 | // |
839 | |
840 | if (targetCount > 3) |
841 | { |
842 | return false; |
843 | } |
844 | |
845 | // |
846 | // The number of bits in the bit table is the same as the number of jump table entries. But the |
847 | // jump table also includes the default target (at the end) so we need to ignore it. The default |
848 | // has already been handled by a JTRUE(GT(switchValue, jumpCount - 2)) that LowerSwitch generates. |
849 | // |
850 | |
851 | const unsigned bitCount = jumpCount - 1; |
852 | |
853 | if (bitCount > (genTypeSize(TYP_I_IMPL) * 8)) |
854 | { |
855 | return false; |
856 | } |
857 | |
858 | // |
859 | // Build a bit table where a bit set to 0 corresponds to bbCase0 and a bit set to 1 corresponds to |
860 | // bbCase1. Simply use the first block in the jump table as bbCase1, later we can invert the bit |
861 | // table and/or swap the blocks if it's beneficial. |
862 | // |
863 | |
864 | BasicBlock* bbCase0 = nullptr; |
865 | BasicBlock* bbCase1 = jumpTable[0]; |
866 | size_t bitTable = 1; |
867 | |
868 | for (unsigned bitIndex = 1; bitIndex < bitCount; bitIndex++) |
869 | { |
870 | if (jumpTable[bitIndex] == bbCase1) |
871 | { |
872 | bitTable |= (size_t(1) << bitIndex); |
873 | } |
874 | else if (bbCase0 == nullptr) |
875 | { |
876 | bbCase0 = jumpTable[bitIndex]; |
877 | } |
878 | else if (jumpTable[bitIndex] != bbCase0) |
879 | { |
880 | // If it's neither bbCase0 nor bbCase1 then it means we have 3 targets. There can't be more |
881 | // than 3 because of the check at the start of the function. |
882 | assert(targetCount == 3); |
883 | return false; |
884 | } |
885 | } |
886 | |
887 | // |
888 | // One of the case blocks has to follow the switch block. This requirement could be avoided |
889 | // by adding a BBJ_ALWAYS block after the switch block but doing that sometimes negatively |
890 | // impacts register allocation. |
891 | // |
892 | |
893 | if ((bbSwitch->bbNext != bbCase0) && (bbSwitch->bbNext != bbCase1)) |
894 | { |
895 | return false; |
896 | } |
897 | |
898 | #ifdef _TARGET_64BIT_ |
899 | // |
900 | // See if we can avoid a 8 byte immediate on 64 bit targets. If all upper 32 bits are 1 |
901 | // then inverting the bit table will make them 0 so that the table now fits in 32 bits. |
902 | // Note that this does not change the number of bits in the bit table, it just takes |
903 | // advantage of the fact that loading a 32 bit immediate into a 64 bit register zero |
904 | // extends the immediate value to 64 bit. |
905 | // |
906 | |
907 | if (~bitTable <= UINT32_MAX) |
908 | { |
909 | bitTable = ~bitTable; |
910 | std::swap(bbCase0, bbCase1); |
911 | } |
912 | #endif |
913 | |
914 | // |
915 | // Rewire the blocks as needed and figure out the condition to use for JCC. |
916 | // |
917 | |
918 | genTreeOps bbSwitchCondition = GT_NONE; |
919 | bbSwitch->bbJumpKind = BBJ_COND; |
920 | |
921 | comp->fgRemoveAllRefPreds(bbCase1, bbSwitch); |
922 | comp->fgRemoveAllRefPreds(bbCase0, bbSwitch); |
923 | |
924 | if (bbSwitch->bbNext == bbCase0) |
925 | { |
926 | // GT_LT + GTF_UNSIGNED generates JC so we jump to bbCase1 when the bit is set |
927 | bbSwitchCondition = GT_LT; |
928 | bbSwitch->bbJumpDest = bbCase1; |
929 | |
930 | comp->fgAddRefPred(bbCase0, bbSwitch); |
931 | comp->fgAddRefPred(bbCase1, bbSwitch); |
932 | } |
933 | else |
934 | { |
935 | assert(bbSwitch->bbNext == bbCase1); |
936 | |
937 | // GT_GE + GTF_UNSIGNED generates JNC so we jump to bbCase0 when the bit is not set |
938 | bbSwitchCondition = GT_GE; |
939 | bbSwitch->bbJumpDest = bbCase0; |
940 | |
941 | comp->fgAddRefPred(bbCase0, bbSwitch); |
942 | comp->fgAddRefPred(bbCase1, bbSwitch); |
943 | } |
944 | |
945 | // |
946 | // Append BT(bitTable, switchValue) and JCC(condition) to the switch block. |
947 | // |
948 | |
949 | var_types bitTableType = (bitCount <= (genTypeSize(TYP_INT) * 8)) ? TYP_INT : TYP_LONG; |
950 | GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType); |
951 | GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue); |
952 | bitTest->gtFlags |= GTF_SET_FLAGS; |
953 | GenTreeCC* jcc = new (comp, GT_JCC) GenTreeCC(GT_JCC, bbSwitchCondition); |
954 | jcc->gtFlags |= GTF_UNSIGNED | GTF_USE_FLAGS; |
955 | |
956 | LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc); |
957 | |
958 | return true; |
959 | #endif // _TARGET_XARCH_ |
960 | } |
961 | |
962 | // NOTE: this method deliberately does not update the call arg table. It must only |
963 | // be used by NewPutArg and LowerArg; these functions are responsible for updating |
964 | // the call arg table as necessary. |
965 | void Lowering::ReplaceArgWithPutArgOrBitcast(GenTree** argSlot, GenTree* putArgOrBitcast) |
966 | { |
967 | assert(argSlot != nullptr); |
968 | assert(*argSlot != nullptr); |
969 | assert(putArgOrBitcast->OperIsPutArg() || putArgOrBitcast->OperIs(GT_BITCAST)); |
970 | |
971 | GenTree* arg = *argSlot; |
972 | |
973 | // Replace the argument with the putarg/copy |
974 | *argSlot = putArgOrBitcast; |
975 | putArgOrBitcast->gtOp.gtOp1 = arg; |
976 | |
977 | // Insert the putarg/copy into the block |
978 | BlockRange().InsertAfter(arg, putArgOrBitcast); |
979 | } |
980 | |
981 | //------------------------------------------------------------------------ |
982 | // NewPutArg: rewrites the tree to put an arg in a register or on the stack. |
983 | // |
984 | // Arguments: |
985 | // call - the call whose arg is being rewritten. |
986 | // arg - the arg being rewritten. |
987 | // info - the fgArgTabEntry information for the argument. |
988 | // type - the type of the argument. |
989 | // |
990 | // Return Value: |
991 | // The new tree that was created to put the arg in the right place |
992 | // or the incoming arg if the arg tree was not rewritten. |
993 | // |
994 | // Assumptions: |
995 | // call, arg, and info must be non-null. |
996 | // |
997 | // Notes: |
998 | // For System V systems with native struct passing (i.e. UNIX_AMD64_ABI defined) |
999 | // this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs |
1000 | // for two eightbyte structs. |
1001 | // |
1002 | // For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing |
1003 | // (i.e. UNIX_AMD64_ABI defined) this method also sets the GC pointers count and the pointers |
1004 | // layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value. |
1005 | // (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.) |
1006 | // |
1007 | GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* info, var_types type) |
1008 | { |
1009 | assert(call != nullptr); |
1010 | assert(arg != nullptr); |
1011 | assert(info != nullptr); |
1012 | |
1013 | GenTree* putArg = nullptr; |
1014 | bool updateArgTable = true; |
1015 | |
1016 | bool isOnStack = true; |
1017 | isOnStack = info->regNum == REG_STK; |
1018 | |
1019 | #ifdef _TARGET_ARMARCH_ |
1020 | // Mark contained when we pass struct |
1021 | // GT_FIELD_LIST is always marked contained when it is generated |
1022 | if (type == TYP_STRUCT) |
1023 | { |
1024 | arg->SetContained(); |
1025 | if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR)) |
1026 | { |
1027 | MakeSrcContained(arg, arg->AsObj()->Addr()); |
1028 | } |
1029 | } |
1030 | #endif |
1031 | |
1032 | #if FEATURE_ARG_SPLIT |
1033 | // Struct can be split into register(s) and stack on ARM |
1034 | if (info->isSplit) |
1035 | { |
1036 | assert(arg->OperGet() == GT_OBJ || arg->OperGet() == GT_FIELD_LIST); |
1037 | // TODO: Need to check correctness for FastTailCall |
1038 | if (call->IsFastTailCall()) |
1039 | { |
1040 | #ifdef _TARGET_ARM_ |
1041 | NYI_ARM("lower: struct argument by fast tail call" ); |
1042 | #endif // _TARGET_ARM_ |
1043 | } |
1044 | |
1045 | putArg = new (comp, GT_PUTARG_SPLIT) |
1046 | GenTreePutArgSplit(arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), info->numRegs, |
1047 | call->IsFastTailCall(), call); |
1048 | |
1049 | // If struct argument is morphed to GT_FIELD_LIST node(s), |
1050 | // we can know GC info by type of each GT_FIELD_LIST node. |
1051 | // So we skip setting GC Pointer info. |
1052 | // |
1053 | GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit(); |
1054 | for (unsigned regIndex = 0; regIndex < info->numRegs; regIndex++) |
1055 | { |
1056 | argSplit->SetRegNumByIdx(info->getRegNum(regIndex), regIndex); |
1057 | } |
1058 | |
1059 | if (arg->OperGet() == GT_OBJ) |
1060 | { |
1061 | BYTE* gcLayout = nullptr; |
1062 | unsigned numRefs = 0; |
1063 | GenTreeObj* argObj = arg->AsObj(); |
1064 | |
1065 | if (argObj->IsGCInfoInitialized()) |
1066 | { |
1067 | gcLayout = argObj->gtGcPtrs; |
1068 | numRefs = argObj->GetGcPtrCount(); |
1069 | } |
1070 | else |
1071 | { |
1072 | // Set GC Pointer info |
1073 | gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots + info->numRegs]; |
1074 | numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); |
1075 | argSplit->setGcPointers(numRefs, gcLayout); |
1076 | } |
1077 | |
1078 | // Set type of registers |
1079 | for (unsigned index = 0; index < info->numRegs; index++) |
1080 | { |
1081 | var_types regType = comp->getJitGCType(gcLayout[index]); |
1082 | // Account for the possibility that float fields may be passed in integer registers. |
1083 | if (varTypeIsFloating(regType) && !genIsValidFloatReg(argSplit->GetRegNumByIdx(index))) |
1084 | { |
1085 | regType = (regType == TYP_FLOAT) ? TYP_INT : TYP_LONG; |
1086 | } |
1087 | argSplit->m_regType[index] = regType; |
1088 | } |
1089 | } |
1090 | else |
1091 | { |
1092 | GenTreeFieldList* fieldListPtr = arg->AsFieldList(); |
1093 | for (unsigned index = 0; index < info->numRegs; fieldListPtr = fieldListPtr->Rest(), index++) |
1094 | { |
1095 | var_types regType = fieldListPtr->gtGetOp1()->TypeGet(); |
1096 | // Account for the possibility that float fields may be passed in integer registers. |
1097 | if (varTypeIsFloating(regType) && !genIsValidFloatReg(argSplit->GetRegNumByIdx(index))) |
1098 | { |
1099 | regType = (regType == TYP_FLOAT) ? TYP_INT : TYP_LONG; |
1100 | } |
1101 | argSplit->m_regType[index] = regType; |
1102 | |
1103 | // Clear the register assignments on the fieldList nodes, as these are contained. |
1104 | fieldListPtr->gtRegNum = REG_NA; |
1105 | } |
1106 | } |
1107 | } |
1108 | else |
1109 | #endif // FEATURE_ARG_SPLIT |
1110 | { |
1111 | if (!isOnStack) |
1112 | { |
1113 | #if FEATURE_MULTIREG_ARGS |
1114 | if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST)) |
1115 | { |
1116 | assert(arg->OperGet() == GT_FIELD_LIST); |
1117 | |
1118 | assert(arg->AsFieldList()->IsFieldListHead()); |
1119 | unsigned int regIndex = 0; |
1120 | for (GenTreeFieldList* fieldListPtr = arg->AsFieldList(); fieldListPtr != nullptr; |
1121 | fieldListPtr = fieldListPtr->Rest()) |
1122 | { |
1123 | regNumber argReg = info->getRegNum(regIndex); |
1124 | GenTree* curOp = fieldListPtr->gtOp.gtOp1; |
1125 | var_types curTyp = curOp->TypeGet(); |
1126 | |
1127 | // Create a new GT_PUTARG_REG node with op1 |
1128 | GenTree* newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg); |
1129 | |
1130 | // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST |
1131 | ReplaceArgWithPutArgOrBitcast(&fieldListPtr->gtOp.gtOp1, newOper); |
1132 | regIndex++; |
1133 | |
1134 | // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal. |
1135 | fieldListPtr->gtRegNum = REG_NA; |
1136 | } |
1137 | |
1138 | // Just return arg. The GT_FIELD_LIST is not replaced. |
1139 | // Nothing more to do. |
1140 | return arg; |
1141 | } |
1142 | else |
1143 | #endif // FEATURE_MULTIREG_ARGS |
1144 | { |
1145 | putArg = comp->gtNewPutArgReg(type, arg, info->regNum); |
1146 | } |
1147 | } |
1148 | else |
1149 | { |
1150 | // Mark this one as tail call arg if it is a fast tail call. |
1151 | // This provides the info to put this argument in in-coming arg area slot |
1152 | // instead of in out-going arg area slot. |
1153 | |
1154 | // Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce |
1155 | // a result. So the type of its operand must be the correct type to push on the stack. |
1156 | // For a FIELD_LIST, this will be the type of the field (not the type of the arg), |
1157 | // but otherwise it is generally the type of the operand. |
1158 | info->checkIsStruct(); |
1159 | if ((arg->OperGet() != GT_FIELD_LIST)) |
1160 | { |
1161 | #if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK) |
1162 | if (type == TYP_SIMD12) |
1163 | { |
1164 | assert(info->numSlots == 3); |
1165 | } |
1166 | else |
1167 | #endif // defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK) |
1168 | { |
1169 | assert(genActualType(arg->TypeGet()) == type); |
1170 | } |
1171 | } |
1172 | |
1173 | putArg = |
1174 | new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg, |
1175 | info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), |
1176 | call->IsFastTailCall(), call); |
1177 | |
1178 | #ifdef FEATURE_PUT_STRUCT_ARG_STK |
1179 | // If the ArgTabEntry indicates that this arg is a struct |
1180 | // get and store the number of slots that are references. |
1181 | // This is later used in the codegen for PUT_ARG_STK implementation |
1182 | // for struct to decide whether and how many single eight-byte copies |
1183 | // to be done (only for reference slots), so gcinfo is emitted. |
1184 | // For non-reference slots faster/smaller size instructions are used - |
1185 | // pair copying using XMM registers or rep mov instructions. |
1186 | if (info->isStruct) |
1187 | { |
1188 | // We use GT_OBJ only for non-lclVar, non-SIMD, non-FIELD_LIST struct arguments. |
1189 | if (arg->OperIsLocal()) |
1190 | { |
1191 | // This must have a type with a known size (SIMD or has been morphed to a primitive type). |
1192 | assert(arg->TypeGet() != TYP_STRUCT); |
1193 | } |
1194 | else if (arg->OperIs(GT_OBJ)) |
1195 | { |
1196 | unsigned numRefs = 0; |
1197 | BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots]; |
1198 | assert(!varTypeIsSIMD(arg)); |
1199 | numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); |
1200 | putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); |
1201 | |
1202 | #ifdef _TARGET_X86_ |
1203 | // On x86 VM lies about the type of a struct containing a pointer sized |
1204 | // integer field by returning the type of its field as the type of struct. |
1205 | // Such struct can be passed in a register depending its position in |
1206 | // parameter list. VM does this unwrapping only one level and therefore |
1207 | // a type like Struct Foo { Struct Bar { int f}} awlays needs to be |
1208 | // passed on stack. Also, VM doesn't lie about type of such a struct |
1209 | // when it is a field of another struct. That is VM doesn't lie about |
1210 | // the type of Foo.Bar |
1211 | // |
1212 | // We now support the promotion of fields that are of type struct. |
1213 | // However we only support a limited case where the struct field has a |
1214 | // single field and that single field must be a scalar type. Say Foo.Bar |
1215 | // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT, |
1216 | // as per x86 ABI it should always be passed on stack. Therefore GenTree |
1217 | // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where |
1218 | // local v1 could be a promoted field standing for Foo.Bar. Note that |
1219 | // the type of v1 will be the type of field of Foo.Bar.f when Foo is |
1220 | // promoted. That is v1 will be a scalar type. In this case we need to |
1221 | // pass v1 on stack instead of in a register. |
1222 | // |
1223 | // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is |
1224 | // a scalar type and the width of GT_OBJ matches the type size of v1. |
1225 | // Note that this cannot be done till call node arguments are morphed |
1226 | // because we should not lose the fact that the type of argument is |
1227 | // a struct so that the arg gets correctly marked to be passed on stack. |
1228 | GenTree* objOp1 = arg->gtGetOp1(); |
1229 | if (objOp1->OperGet() == GT_LCL_VAR_ADDR) |
1230 | { |
1231 | unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum(); |
1232 | if (comp->lvaTable[lclNum].lvType != TYP_STRUCT) |
1233 | { |
1234 | comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr)); |
1235 | } |
1236 | } |
1237 | #endif // _TARGET_X86_ |
1238 | } |
1239 | else if (!arg->OperIs(GT_FIELD_LIST)) |
1240 | { |
1241 | assert(varTypeIsSIMD(arg) || (info->numSlots == 1)); |
1242 | } |
1243 | } |
1244 | #endif // FEATURE_PUT_STRUCT_ARG_STK |
1245 | } |
1246 | } |
1247 | |
1248 | JITDUMP("new node is : " ); |
1249 | DISPNODE(putArg); |
1250 | JITDUMP("\n" ); |
1251 | |
1252 | if (arg->gtFlags & GTF_LATE_ARG) |
1253 | { |
1254 | putArg->gtFlags |= GTF_LATE_ARG; |
1255 | } |
1256 | else if (updateArgTable) |
1257 | { |
1258 | info->node = putArg; |
1259 | } |
1260 | return putArg; |
1261 | } |
1262 | |
1263 | //------------------------------------------------------------------------ |
1264 | // LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between |
1265 | // the argument evaluation and the call. This is the point at which the source is |
1266 | // consumed and the value transitions from control of the register allocator to the calling |
1267 | // convention. |
1268 | // |
1269 | // Arguments: |
1270 | // call - The call node |
1271 | // ppArg - Pointer to the call argument pointer. We might replace the call argument by |
1272 | // changing *ppArg. |
1273 | // |
1274 | // Return Value: |
1275 | // None. |
1276 | // |
1277 | void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) |
1278 | { |
1279 | GenTree* arg = *ppArg; |
1280 | |
1281 | JITDUMP("lowering arg : " ); |
1282 | DISPNODE(arg); |
1283 | |
1284 | // No assignments should remain by Lowering. |
1285 | assert(!arg->OperIs(GT_ASG)); |
1286 | assert(!arg->OperIsPutArgStk()); |
1287 | |
1288 | // Assignments/stores at this level are not really placing an argument. |
1289 | // They are setting up temporary locals that will later be placed into |
1290 | // outgoing regs or stack. |
1291 | // Note that atomic ops may be stores and still produce a value. |
1292 | if (!arg->IsValue()) |
1293 | { |
1294 | assert((arg->OperIsStore() && !arg->IsValue()) || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || |
1295 | arg->OperIsCopyBlkOp()); |
1296 | return; |
1297 | } |
1298 | |
1299 | fgArgTabEntry* info = comp->gtArgEntryByNode(call, arg); |
1300 | assert(info->node == arg); |
1301 | var_types type = arg->TypeGet(); |
1302 | |
1303 | if (varTypeIsSmall(type)) |
1304 | { |
1305 | // Normalize 'type', it represents the item that we will be storing in the Outgoing Args |
1306 | type = TYP_INT; |
1307 | } |
1308 | |
1309 | #if defined(FEATURE_SIMD) |
1310 | #if defined(_TARGET_X86_) |
1311 | // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their |
1312 | // allocated size (see lvSize()). However, when passing the variables as arguments, and |
1313 | // storing the variables to the outgoing argument area on the stack, we must use their |
1314 | // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written. |
1315 | if (type == TYP_SIMD16) |
1316 | { |
1317 | if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR)) |
1318 | { |
1319 | unsigned varNum = arg->AsLclVarCommon()->GetLclNum(); |
1320 | LclVarDsc* varDsc = &comp->lvaTable[varNum]; |
1321 | type = varDsc->lvType; |
1322 | } |
1323 | else if (arg->OperGet() == GT_SIMD) |
1324 | { |
1325 | assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12)); |
1326 | |
1327 | if (arg->AsSIMD()->gtSIMDSize == 12) |
1328 | { |
1329 | type = TYP_SIMD12; |
1330 | } |
1331 | } |
1332 | } |
1333 | #elif defined(_TARGET_AMD64_) |
1334 | // TYP_SIMD8 parameters that are passed as longs |
1335 | if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum)) |
1336 | { |
1337 | GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, arg, nullptr); |
1338 | BlockRange().InsertAfter(arg, bitcast); |
1339 | |
1340 | info->node = *ppArg = arg = bitcast; |
1341 | type = TYP_LONG; |
1342 | } |
1343 | #endif // defined(_TARGET_X86_) |
1344 | #endif // defined(FEATURE_SIMD) |
1345 | |
1346 | // If we hit this we are probably double-lowering. |
1347 | assert(!arg->OperIsPutArg()); |
1348 | |
1349 | #if !defined(_TARGET_64BIT_) |
1350 | if (varTypeIsLong(type)) |
1351 | { |
1352 | bool isReg = (info->regNum != REG_STK); |
1353 | if (isReg) |
1354 | { |
1355 | noway_assert(arg->OperGet() == GT_LONG); |
1356 | assert(info->numRegs == 2); |
1357 | |
1358 | GenTree* argLo = arg->gtGetOp1(); |
1359 | GenTree* argHi = arg->gtGetOp2(); |
1360 | |
1361 | GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr); |
1362 | // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence. |
1363 | (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList); |
1364 | GenTree* putArg = NewPutArg(call, fieldList, info, type); |
1365 | |
1366 | BlockRange().InsertBefore(arg, putArg); |
1367 | BlockRange().Remove(arg); |
1368 | *ppArg = fieldList; |
1369 | info->node = fieldList; |
1370 | } |
1371 | else |
1372 | { |
1373 | assert(arg->OperGet() == GT_LONG); |
1374 | // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK. |
1375 | // Although the hi argument needs to be pushed first, that will be handled by the general case, |
1376 | // in which the fields will be reversed. |
1377 | assert(info->numSlots == 2); |
1378 | GenTree* argLo = arg->gtGetOp1(); |
1379 | GenTree* argHi = arg->gtGetOp2(); |
1380 | GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr); |
1381 | // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence. |
1382 | (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList); |
1383 | GenTree* putArg = NewPutArg(call, fieldList, info, type); |
1384 | putArg->gtRegNum = info->regNum; |
1385 | |
1386 | // We can't call ReplaceArgWithPutArgOrBitcast here because it presumes that we are keeping the original |
1387 | // arg. |
1388 | BlockRange().InsertBefore(arg, fieldList, putArg); |
1389 | BlockRange().Remove(arg); |
1390 | *ppArg = putArg; |
1391 | } |
1392 | } |
1393 | else |
1394 | #endif // !defined(_TARGET_64BIT_) |
1395 | { |
1396 | |
1397 | #ifdef _TARGET_ARMARCH_ |
1398 | if (call->IsVarargs() || comp->opts.compUseSoftFP) |
1399 | { |
1400 | // For vararg call or on armel, reg args should be all integer. |
1401 | // Insert copies as needed to move float value to integer register. |
1402 | GenTree* newNode = LowerFloatArg(ppArg, info); |
1403 | if (newNode != nullptr) |
1404 | { |
1405 | type = newNode->TypeGet(); |
1406 | } |
1407 | } |
1408 | #endif // _TARGET_ARMARCH_ |
1409 | |
1410 | GenTree* putArg = NewPutArg(call, arg, info, type); |
1411 | |
1412 | // In the case of register passable struct (in one or two registers) |
1413 | // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.) |
1414 | // If an extra node is returned, splice it in the right place in the tree. |
1415 | if (arg != putArg) |
1416 | { |
1417 | ReplaceArgWithPutArgOrBitcast(ppArg, putArg); |
1418 | } |
1419 | } |
1420 | } |
1421 | |
1422 | #ifdef _TARGET_ARMARCH_ |
1423 | //------------------------------------------------------------------------ |
1424 | // LowerFloatArg: Lower float call arguments on the arm platform. |
1425 | // |
1426 | // Arguments: |
1427 | // arg - The arg node |
1428 | // info - call argument info |
1429 | // |
1430 | // Return Value: |
1431 | // Return nullptr, if no transformation was done; |
1432 | // return arg if there was in place transformation; |
1433 | // return a new tree if the root was changed. |
1434 | // |
1435 | // Notes: |
1436 | // This must handle scalar float arguments as well as GT_FIELD_LISTs |
1437 | // with floating point fields. |
1438 | // |
1439 | GenTree* Lowering::LowerFloatArg(GenTree** pArg, fgArgTabEntry* info) |
1440 | { |
1441 | GenTree* arg = *pArg; |
1442 | if (info->regNum != REG_STK) |
1443 | { |
1444 | if (arg->OperIsFieldList()) |
1445 | { |
1446 | GenTreeFieldList* currListNode = arg->AsFieldList(); |
1447 | regNumber currRegNumber = info->regNum; |
1448 | |
1449 | // Transform fields that are passed as registers in place. |
1450 | unsigned fieldRegCount; |
1451 | for (unsigned i = 0; i < info->numRegs; i += fieldRegCount) |
1452 | { |
1453 | assert(currListNode != nullptr); |
1454 | GenTree* node = currListNode->Current(); |
1455 | if (varTypeIsFloating(node)) |
1456 | { |
1457 | GenTree* intNode = LowerFloatArgReg(node, currRegNumber); |
1458 | assert(intNode != nullptr); |
1459 | |
1460 | ReplaceArgWithPutArgOrBitcast(currListNode->pCurrent(), intNode); |
1461 | currListNode->ChangeType(intNode->TypeGet()); |
1462 | } |
1463 | |
1464 | if (node->TypeGet() == TYP_DOUBLE) |
1465 | { |
1466 | currRegNumber = REG_NEXT(REG_NEXT(currRegNumber)); |
1467 | fieldRegCount = 2; |
1468 | } |
1469 | else |
1470 | { |
1471 | currRegNumber = REG_NEXT(currRegNumber); |
1472 | fieldRegCount = 1; |
1473 | } |
1474 | currListNode = currListNode->Rest(); |
1475 | } |
1476 | // List fields were replaced in place. |
1477 | return arg; |
1478 | } |
1479 | else if (varTypeIsFloating(arg)) |
1480 | { |
1481 | GenTree* intNode = LowerFloatArgReg(arg, info->regNum); |
1482 | assert(intNode != nullptr); |
1483 | ReplaceArgWithPutArgOrBitcast(pArg, intNode); |
1484 | return *pArg; |
1485 | } |
1486 | } |
1487 | return nullptr; |
1488 | } |
1489 | |
1490 | //------------------------------------------------------------------------ |
1491 | // LowerFloatArgReg: Lower the float call argument node that is passed via register. |
1492 | // |
1493 | // Arguments: |
1494 | // arg - The arg node |
1495 | // regNum - register number |
1496 | // |
1497 | // Return Value: |
1498 | // Return new bitcast node, that moves float to int register. |
1499 | // |
1500 | GenTree* Lowering::LowerFloatArgReg(GenTree* arg, regNumber regNum) |
1501 | { |
1502 | var_types floatType = arg->TypeGet(); |
1503 | assert(varTypeIsFloating(floatType)); |
1504 | var_types intType = (floatType == TYP_DOUBLE) ? TYP_LONG : TYP_INT; |
1505 | GenTree* intArg = comp->gtNewBitCastNode(intType, arg); |
1506 | intArg->gtRegNum = regNum; |
1507 | #ifdef _TARGET_ARM_ |
1508 | if (floatType == TYP_DOUBLE) |
1509 | { |
1510 | regNumber nextReg = REG_NEXT(regNum); |
1511 | intArg->AsMultiRegOp()->gtOtherReg = nextReg; |
1512 | } |
1513 | #endif |
1514 | return intArg; |
1515 | } |
1516 | #endif |
1517 | |
1518 | // do lowering steps for each arg of a call |
1519 | void Lowering::LowerArgsForCall(GenTreeCall* call) |
1520 | { |
1521 | JITDUMP("objp:\n======\n" ); |
1522 | if (call->gtCallObjp) |
1523 | { |
1524 | LowerArg(call, &call->gtCallObjp); |
1525 | } |
1526 | |
1527 | GenTreeArgList* args = call->gtCallArgs; |
1528 | |
1529 | JITDUMP("\nargs:\n======\n" ); |
1530 | for (; args; args = args->Rest()) |
1531 | { |
1532 | LowerArg(call, &args->Current()); |
1533 | } |
1534 | |
1535 | JITDUMP("\nlate:\n======\n" ); |
1536 | for (args = call->gtCallLateArgs; args; args = args->Rest()) |
1537 | { |
1538 | LowerArg(call, &args->Current()); |
1539 | } |
1540 | } |
1541 | |
1542 | // helper that create a node representing a relocatable physical address computation |
1543 | GenTree* Lowering::AddrGen(ssize_t addr) |
1544 | { |
1545 | // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr) |
1546 | GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); |
1547 | return result; |
1548 | } |
1549 | |
1550 | // variant that takes a void* |
1551 | GenTree* Lowering::AddrGen(void* addr) |
1552 | { |
1553 | return AddrGen((ssize_t)addr); |
1554 | } |
1555 | |
1556 | // do lowering steps for a call |
1557 | // this includes: |
1558 | // - adding the placement nodes (either stack or register variety) for arguments |
1559 | // - lowering the expression that calculates the target address |
1560 | // - adding nodes for other operations that occur after the call sequence starts and before |
1561 | // control transfer occurs (profiling and tail call helpers, pinvoke incantations) |
1562 | // |
1563 | void Lowering::LowerCall(GenTree* node) |
1564 | { |
1565 | GenTreeCall* call = node->AsCall(); |
1566 | |
1567 | JITDUMP("lowering call (before):\n" ); |
1568 | DISPTREERANGE(BlockRange(), call); |
1569 | JITDUMP("\n" ); |
1570 | |
1571 | call->ClearOtherRegs(); |
1572 | LowerArgsForCall(call); |
1573 | |
1574 | // note that everything generated from this point on runs AFTER the outgoing args are placed |
1575 | GenTree* controlExpr = nullptr; |
1576 | |
1577 | // for x86, this is where we record ESP for checking later to make sure stack is balanced |
1578 | |
1579 | // Check for Delegate.Invoke(). If so, we inline it. We get the |
1580 | // target-object and target-function from the delegate-object, and do |
1581 | // an indirect call. |
1582 | if (call->IsDelegateInvoke()) |
1583 | { |
1584 | controlExpr = LowerDelegateInvoke(call); |
1585 | } |
1586 | else |
1587 | { |
1588 | // Virtual and interface calls |
1589 | switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK) |
1590 | { |
1591 | case GTF_CALL_VIRT_STUB: |
1592 | controlExpr = LowerVirtualStubCall(call); |
1593 | break; |
1594 | |
1595 | case GTF_CALL_VIRT_VTABLE: |
1596 | // stub dispatching is off or this is not a virtual call (could be a tailcall) |
1597 | controlExpr = LowerVirtualVtableCall(call); |
1598 | break; |
1599 | |
1600 | case GTF_CALL_NONVIRT: |
1601 | if (call->IsUnmanaged()) |
1602 | { |
1603 | controlExpr = LowerNonvirtPinvokeCall(call); |
1604 | } |
1605 | else if (call->gtCallType == CT_INDIRECT) |
1606 | { |
1607 | controlExpr = LowerIndirectNonvirtCall(call); |
1608 | } |
1609 | else |
1610 | { |
1611 | controlExpr = LowerDirectCall(call); |
1612 | } |
1613 | break; |
1614 | |
1615 | default: |
1616 | noway_assert(!"strange call type" ); |
1617 | break; |
1618 | } |
1619 | } |
1620 | |
1621 | if (call->IsTailCallViaHelper()) |
1622 | { |
1623 | // Either controlExpr or gtCallAddr must contain real call target. |
1624 | if (controlExpr == nullptr) |
1625 | { |
1626 | assert(call->gtCallType == CT_INDIRECT); |
1627 | assert(call->gtCallAddr != nullptr); |
1628 | controlExpr = call->gtCallAddr; |
1629 | } |
1630 | |
1631 | controlExpr = LowerTailCallViaHelper(call, controlExpr); |
1632 | } |
1633 | |
1634 | if (controlExpr != nullptr) |
1635 | { |
1636 | LIR::Range controlExprRange = LIR::SeqTree(comp, controlExpr); |
1637 | |
1638 | JITDUMP("results of lowering call:\n" ); |
1639 | DISPRANGE(controlExprRange); |
1640 | |
1641 | GenTree* insertionPoint = call; |
1642 | if (!call->IsTailCallViaHelper()) |
1643 | { |
1644 | // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist |
1645 | // |
1646 | // TODO-LIR: find out what's really required here, as this is currently a tree order |
1647 | // dependency. |
1648 | if (call->gtCallType == CT_INDIRECT) |
1649 | { |
1650 | bool isClosed = false; |
1651 | if (call->gtCallCookie != nullptr) |
1652 | { |
1653 | #ifdef DEBUG |
1654 | GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); |
1655 | assert(isClosed); |
1656 | assert(call->gtCallCookie->Precedes(firstCallAddrNode)); |
1657 | #endif // DEBUG |
1658 | |
1659 | insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode(); |
1660 | assert(isClosed); |
1661 | } |
1662 | else if (call->gtCallAddr != nullptr) |
1663 | { |
1664 | insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); |
1665 | assert(isClosed); |
1666 | } |
1667 | } |
1668 | } |
1669 | |
1670 | ContainCheckRange(controlExprRange); |
1671 | BlockRange().InsertBefore(insertionPoint, std::move(controlExprRange)); |
1672 | |
1673 | call->gtControlExpr = controlExpr; |
1674 | } |
1675 | if (call->IsFastTailCall()) |
1676 | { |
1677 | // Lower fast tail call can introduce new temps to set up args correctly for Callee. |
1678 | // This involves patching LCL_VAR and LCL_VAR_ADDR nodes holding Caller stack args |
1679 | // and replacing them with a new temp. Control expr also can contain nodes that need |
1680 | // to be patched. |
1681 | // Therefore lower fast tail call must be done after controlExpr is inserted into LIR. |
1682 | // There is one side effect which is flipping the order of PME and control expression |
1683 | // since LowerFastTailCall calls InsertPInvokeMethodEpilog. |
1684 | LowerFastTailCall(call); |
1685 | } |
1686 | |
1687 | if (comp->opts.IsJit64Compat()) |
1688 | { |
1689 | CheckVSQuirkStackPaddingNeeded(call); |
1690 | } |
1691 | |
1692 | ContainCheckCallOperands(call); |
1693 | JITDUMP("lowering call (after):\n" ); |
1694 | DISPTREERANGE(BlockRange(), call); |
1695 | JITDUMP("\n" ); |
1696 | } |
1697 | |
1698 | // Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14), |
1699 | // we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012) |
1700 | // continues to work. |
1701 | // This quirk is excluded from other targets that have no back compat burden. |
1702 | // |
1703 | // Quirk for VS debug-launch scenario to work: |
1704 | // See if this is a PInvoke call with exactly one param that is the address of a struct local. |
1705 | // In such a case indicate to frame-layout logic to add 16-bytes of padding |
1706 | // between save-reg area and locals. This is to protect against the buffer |
1707 | // overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop(). |
1708 | // |
1709 | // A work-around to this bug is to disable IntelliTrace debugging |
1710 | // (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option). |
1711 | // The reason why this works on Jit64 is that at the point of AV the call stack is |
1712 | // |
1713 | // GetSystemInfo() Native call |
1714 | // IL_Stub generated for PInvoke declaration. |
1715 | // ProfilerInterface::InitInterop() |
1716 | // ProfilerInterface.Cctor() |
1717 | // VM asm worker |
1718 | // |
1719 | // The cctor body has just the call to InitInterop(). VM asm worker is holding |
1720 | // something in rbx that is used immediately after the Cctor call. Jit64 generated |
1721 | // InitInterop() method is pushing the registers in the following order |
1722 | // |
1723 | // rbx |
1724 | // rbp |
1725 | // rsi |
1726 | // rdi |
1727 | // r12 |
1728 | // r13 |
1729 | // Struct local |
1730 | // |
1731 | // Due to buffer overrun, rbx doesn't get impacted. Whereas RyuJIT jitted code of |
1732 | // the same method is pushing regs in the following order |
1733 | // |
1734 | // rbp |
1735 | // rdi |
1736 | // rsi |
1737 | // rbx |
1738 | // struct local |
1739 | // |
1740 | // Therefore as a fix, we add padding between save-reg area and locals to |
1741 | // make this scenario work against JB. |
1742 | // |
1743 | // Note: If this quirk gets broken due to other JIT optimizations, we should consider |
1744 | // more tolerant fix. One such fix is to padd the struct. |
1745 | void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call) |
1746 | { |
1747 | assert(comp->opts.IsJit64Compat()); |
1748 | |
1749 | #ifdef _TARGET_AMD64_ |
1750 | // Confine this to IL stub calls which aren't marked as unmanaged. |
1751 | if (call->IsPInvoke() && !call->IsUnmanaged()) |
1752 | { |
1753 | bool paddingNeeded = false; |
1754 | GenTree* firstPutArgReg = nullptr; |
1755 | for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest()) |
1756 | { |
1757 | GenTree* tmp = args->Current(); |
1758 | if (tmp->OperGet() == GT_PUTARG_REG) |
1759 | { |
1760 | if (firstPutArgReg == nullptr) |
1761 | { |
1762 | firstPutArgReg = tmp; |
1763 | GenTree* op1 = firstPutArgReg->gtOp.gtOp1; |
1764 | |
1765 | if (op1->OperGet() == GT_LCL_VAR_ADDR) |
1766 | { |
1767 | unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); |
1768 | // TODO-1stClassStructs: This is here to duplicate previous behavior, |
1769 | // but is not needed because the scenario being quirked did not involve |
1770 | // a SIMD or enregisterable struct. |
1771 | // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT) |
1772 | if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet())) |
1773 | { |
1774 | // First arg is addr of a struct local. |
1775 | paddingNeeded = true; |
1776 | } |
1777 | else |
1778 | { |
1779 | // Not a struct local. |
1780 | assert(paddingNeeded == false); |
1781 | break; |
1782 | } |
1783 | } |
1784 | else |
1785 | { |
1786 | // First arg is not a local var addr. |
1787 | assert(paddingNeeded == false); |
1788 | break; |
1789 | } |
1790 | } |
1791 | else |
1792 | { |
1793 | // Has more than one arg. |
1794 | paddingNeeded = false; |
1795 | break; |
1796 | } |
1797 | } |
1798 | } |
1799 | |
1800 | if (paddingNeeded) |
1801 | { |
1802 | comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD; |
1803 | } |
1804 | } |
1805 | #endif // _TARGET_AMD64_ |
1806 | } |
1807 | |
1808 | // Inserts profiler hook, GT_PROF_HOOK for a tail call node. |
1809 | // |
1810 | // AMD64: |
1811 | // We need to insert this after all nested calls, but before all the arguments to this call have been set up. |
1812 | // To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before |
1813 | // that. If there are no args, then it should be inserted before the call node. |
1814 | // |
1815 | // For example: |
1816 | // * stmtExpr void (top level) (IL 0x000...0x010) |
1817 | // arg0 SETUP | /--* argPlace ref REG NA $c5 |
1818 | // this in rcx | | /--* argPlace ref REG NA $c1 |
1819 | // | | | /--* call ref System.Globalization.CultureInfo.get_InvariantCulture $c2 |
1820 | // arg1 SETUP | | +--* st.lclVar ref V02 tmp1 REG NA $c2 |
1821 | // | | | /--* lclVar ref V02 tmp1 u : 2 (last use) REG NA $c2 |
1822 | // arg1 in rdx | | +--* putarg_reg ref REG NA |
1823 | // | | | /--* lclVar ref V00 arg0 u : 2 (last use) REG NA $80 |
1824 | // this in rcx | | +--* putarg_reg ref REG NA |
1825 | // | | /--* call nullcheck ref System.String.ToLower $c5 |
1826 | // | | { * stmtExpr void (embedded)(IL 0x000... ? ? ? ) |
1827 | // | | { \--* prof_hook void REG NA |
1828 | // arg0 in rcx | +--* putarg_reg ref REG NA |
1829 | // control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA |
1830 | // \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void |
1831 | // |
1832 | // In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call |
1833 | // (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call. |
1834 | // |
1835 | // X86: |
1836 | // Insert the profiler hook immediately before the call. The profiler hook will preserve |
1837 | // all argument registers (ECX, EDX), but nothing else. |
1838 | // |
1839 | // Params: |
1840 | // callNode - tail call node |
1841 | // insertionPoint - if non-null, insert the profiler hook before this point. |
1842 | // If null, insert the profiler hook before args are setup |
1843 | // but after all arg side effects are computed. |
1844 | // |
1845 | void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint) |
1846 | { |
1847 | assert(call->IsTailCall()); |
1848 | assert(comp->compIsProfilerHookNeeded()); |
1849 | |
1850 | #if defined(_TARGET_X86_) |
1851 | |
1852 | if (insertionPoint == nullptr) |
1853 | { |
1854 | insertionPoint = call; |
1855 | } |
1856 | |
1857 | #else // !defined(_TARGET_X86_) |
1858 | |
1859 | if (insertionPoint == nullptr) |
1860 | { |
1861 | GenTree* tmp = nullptr; |
1862 | for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest()) |
1863 | { |
1864 | tmp = args->Current(); |
1865 | assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs |
1866 | if (tmp->OperGet() == GT_PUTARG_STK) |
1867 | { |
1868 | // found it |
1869 | insertionPoint = tmp; |
1870 | break; |
1871 | } |
1872 | } |
1873 | |
1874 | if (insertionPoint == nullptr) |
1875 | { |
1876 | for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest()) |
1877 | { |
1878 | tmp = args->Current(); |
1879 | if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK)) |
1880 | { |
1881 | // found it |
1882 | insertionPoint = tmp; |
1883 | break; |
1884 | } |
1885 | } |
1886 | |
1887 | // If there are no args, insert before the call node |
1888 | if (insertionPoint == nullptr) |
1889 | { |
1890 | insertionPoint = call; |
1891 | } |
1892 | } |
1893 | } |
1894 | |
1895 | #endif // !defined(_TARGET_X86_) |
1896 | |
1897 | assert(insertionPoint != nullptr); |
1898 | GenTree* profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID); |
1899 | BlockRange().InsertBefore(insertionPoint, profHookNode); |
1900 | } |
1901 | |
1902 | // Lower fast tail call implemented as epilog+jmp. |
1903 | // Also inserts PInvoke method epilog if required. |
1904 | void Lowering::LowerFastTailCall(GenTreeCall* call) |
1905 | { |
1906 | #if FEATURE_FASTTAILCALL |
1907 | // Tail call restrictions i.e. conditions under which tail prefix is ignored. |
1908 | // Most of these checks are already done by importer or fgMorphTailCall(). |
1909 | // This serves as a double sanity check. |
1910 | assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods |
1911 | assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check |
1912 | assert(!call->IsUnmanaged()); // tail calls to unamanaged methods |
1913 | assert(!comp->compLocallocUsed); // tail call from methods that also do localloc |
1914 | |
1915 | #ifdef _TARGET_AMD64_ |
1916 | assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check |
1917 | #endif // _TARGET_AMD64_ |
1918 | |
1919 | // We expect to see a call that meets the following conditions |
1920 | assert(call->IsFastTailCall()); |
1921 | |
1922 | // VM cannot use return address hijacking when A() and B() tail call each |
1923 | // other in mutual recursion. Therefore, this block is reachable through |
1924 | // a GC-safe point or the whole method is marked as fully interruptible. |
1925 | // |
1926 | // TODO-Cleanup: |
1927 | // optReachWithoutCall() depends on the fact that loop headers blocks |
1928 | // will have a block number > fgLastBB. These loop headers gets added |
1929 | // after dominator computation and get skipped by OptReachWithoutCall(). |
1930 | // The below condition cannot be asserted in lower because fgSimpleLowering() |
1931 | // can add a new basic block for range check failure which becomes |
1932 | // fgLastBB with block number > loop header block number. |
1933 | // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || |
1934 | // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible); |
1935 | |
1936 | // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that |
1937 | // a method returns. This is a case of caller method has both PInvokes and tail calls. |
1938 | if (comp->info.compCallUnmanaged) |
1939 | { |
1940 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call)); |
1941 | } |
1942 | |
1943 | // Args for tail call are setup in incoming arg area. The gc-ness of args of |
1944 | // caller and callee (which being tail called) may not match. Therefore, everything |
1945 | // from arg setup until the epilog need to be non-interuptible by GC. This is |
1946 | // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node |
1947 | // of call is setup. Note that once a stack arg is setup, it cannot have nested |
1948 | // calls subsequently in execution order to setup other args, because the nested |
1949 | // call could over-write the stack arg that is setup earlier. |
1950 | GenTree* firstPutArgStk = nullptr; |
1951 | GenTreeArgList* args; |
1952 | ArrayStack<GenTree*> putargs(comp->getAllocator(CMK_ArrayStack)); |
1953 | |
1954 | for (args = call->gtCallArgs; args; args = args->Rest()) |
1955 | { |
1956 | GenTree* tmp = args->Current(); |
1957 | if (tmp->OperGet() == GT_PUTARG_STK) |
1958 | { |
1959 | putargs.Push(tmp); |
1960 | } |
1961 | } |
1962 | |
1963 | for (args = call->gtCallLateArgs; args; args = args->Rest()) |
1964 | { |
1965 | GenTree* tmp = args->Current(); |
1966 | if (tmp->OperGet() == GT_PUTARG_STK) |
1967 | { |
1968 | putargs.Push(tmp); |
1969 | } |
1970 | } |
1971 | |
1972 | if (!putargs.Empty()) |
1973 | { |
1974 | firstPutArgStk = putargs.Bottom(); |
1975 | } |
1976 | |
1977 | // If we have a putarg_stk node, also count the number of non-standard args the |
1978 | // call node has. Note that while determining whether a tail call can be fast |
1979 | // tail called, we don't count non-standard args (passed in R10 or R11) since they |
1980 | // don't contribute to outgoing arg space. These non-standard args are not |
1981 | // accounted in caller's arg count but accounted in callee's arg count after |
1982 | // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping |
1983 | // callee's stack arg num to corresponding caller's stack arg num. |
1984 | unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp); |
1985 | |
1986 | // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a) |
1987 | // i.e. passes its arguments in reverse to Callee. During call site |
1988 | // setup, after computing argument side effects, stack args are setup |
1989 | // first and reg args next. In the above example, both Callers and |
1990 | // Callee stack args (e and a respectively) share the same stack slot |
1991 | // and are alive at the same time. The act of setting up Callee's |
1992 | // stack arg will over-write the stack arg of Caller and if there are |
1993 | // further uses of Caller stack arg we have to make sure that we move |
1994 | // it to a temp before over-writing its slot and use temp in place of |
1995 | // the corresponding Caller stack arg. |
1996 | // |
1997 | // For the above example, conceptually this is what is done |
1998 | // tmp = e; |
1999 | // Stack slot of e = a |
2000 | // R9 = b, R8 = c, RDx = d |
2001 | // RCX = tmp |
2002 | // |
2003 | // The below logic is meant to detect cases like this and introduce |
2004 | // temps to set up args correctly for Callee. |
2005 | |
2006 | for (int i = 0; i < putargs.Height(); i++) |
2007 | { |
2008 | GenTree* putArgStkNode = putargs.Bottom(i); |
2009 | |
2010 | assert(putArgStkNode->OperGet() == GT_PUTARG_STK); |
2011 | |
2012 | // Get the caller arg num corresponding to this callee arg. |
2013 | // Note that these two args share the same stack slot. Therefore, |
2014 | // if there are further uses of corresponding caller arg, we need |
2015 | // to move it to a temp and use the temp in this call tree. |
2016 | // |
2017 | // Note that Caller is guaranteed to have a param corresponding to |
2018 | // this Callee's arg since fast tail call mechanism counts the |
2019 | // stack slots required for both Caller and Callee for passing params |
2020 | // and allow fast tail call only if stack slots required by Caller >= |
2021 | // Callee. |
2022 | fgArgTabEntry* argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode); |
2023 | assert(argTabEntry); |
2024 | unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount; |
2025 | noway_assert(callerArgNum < comp->info.compArgsCount); |
2026 | |
2027 | unsigned callerArgLclNum = callerArgNum; |
2028 | LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum; |
2029 | if (callerArgDsc->lvPromoted) |
2030 | { |
2031 | callerArgLclNum = |
2032 | callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum |
2033 | callerArgDsc = comp->lvaTable + callerArgLclNum; |
2034 | } |
2035 | noway_assert(callerArgDsc->lvIsParam); |
2036 | |
2037 | // Start searching in execution order list till we encounter call node |
2038 | unsigned tmpLclNum = BAD_VAR_NUM; |
2039 | var_types tmpType = TYP_UNDEF; |
2040 | for (GenTree* treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext) |
2041 | { |
2042 | if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr()) |
2043 | { |
2044 | // This should not be a GT_PHI_ARG. |
2045 | assert(treeNode->OperGet() != GT_PHI_ARG); |
2046 | |
2047 | GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon(); |
2048 | LclVarDsc* lclVar = &comp->lvaTable[lcl->gtLclNum]; |
2049 | |
2050 | // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args. |
2051 | // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot |
2052 | // is getting over-written by setting up of a stack arg and there are further uses of |
2053 | // any of its fields if such a struct is type-dependently promoted. In this case too |
2054 | // we need to introduce a temp. |
2055 | if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum)) |
2056 | { |
2057 | // Create tmp and use it in place of callerArgDsc |
2058 | if (tmpLclNum == BAD_VAR_NUM) |
2059 | { |
2060 | // Set tmpType first before calling lvaGrabTemp, as that call invalidates callerArgDsc |
2061 | tmpType = genActualType(callerArgDsc->lvaArgType()); |
2062 | tmpLclNum = comp->lvaGrabTemp( |
2063 | true DEBUGARG("Fast tail call lowering is creating a new local variable" )); |
2064 | |
2065 | comp->lvaTable[tmpLclNum].lvType = tmpType; |
2066 | comp->lvaTable[tmpLclNum].lvDoNotEnregister = comp->lvaTable[lcl->gtLclNum].lvDoNotEnregister; |
2067 | } |
2068 | |
2069 | lcl->SetLclNum(tmpLclNum); |
2070 | } |
2071 | } |
2072 | } |
2073 | |
2074 | // If we have created a temp, insert an embedded assignment stmnt before |
2075 | // the first putargStkNode i.e. |
2076 | // tmpLcl = CallerArg |
2077 | if (tmpLclNum != BAD_VAR_NUM) |
2078 | { |
2079 | assert(tmpType != TYP_UNDEF); |
2080 | GenTreeLclVar* local = |
2081 | new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET); |
2082 | GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local); |
2083 | ContainCheckRange(local, assignExpr); |
2084 | BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr)); |
2085 | } |
2086 | } |
2087 | |
2088 | // Insert GT_START_NONGC node before the first GT_PUTARG_STK node. |
2089 | // Note that if there are no args to be setup on stack, no need to |
2090 | // insert GT_START_NONGC node. |
2091 | GenTree* startNonGCNode = nullptr; |
2092 | if (firstPutArgStk != nullptr) |
2093 | { |
2094 | startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID); |
2095 | BlockRange().InsertBefore(firstPutArgStk, startNonGCNode); |
2096 | |
2097 | // Gc-interruptability in the following case: |
2098 | // foo(a, b, c, d, e) { bar(a, b, c, d, e); } |
2099 | // bar(a, b, c, d, e) { foo(a, b, d, d, e); } |
2100 | // |
2101 | // Since the instruction group starting from the instruction that sets up first |
2102 | // stack arg to the end of the tail call is marked as non-gc interruptible, |
2103 | // this will form a non-interruptible tight loop causing gc-starvation. To fix |
2104 | // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method |
2105 | // has a single basic block and is not a GC-safe point. The presence of a single |
2106 | // nop outside non-gc interruptible region will prevent gc starvation. |
2107 | if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT)) |
2108 | { |
2109 | assert(comp->fgFirstBB == comp->compCurBB); |
2110 | GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID); |
2111 | BlockRange().InsertBefore(startNonGCNode, noOp); |
2112 | } |
2113 | } |
2114 | |
2115 | // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be |
2116 | // inserted before the args are setup but after the side effects of args are |
2117 | // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC |
2118 | // node if one exists. |
2119 | if (comp->compIsProfilerHookNeeded()) |
2120 | { |
2121 | InsertProfTailCallHook(call, startNonGCNode); |
2122 | } |
2123 | |
2124 | #else // !FEATURE_FASTTAILCALL |
2125 | |
2126 | // Platform choose not to implement fast tail call mechanism. |
2127 | // In such a case we should never be reaching this method as |
2128 | // the expectation is that IsTailCallViaHelper() will always |
2129 | // be true on such a platform. |
2130 | unreached(); |
2131 | #endif |
2132 | } |
2133 | |
2134 | //------------------------------------------------------------------------ |
2135 | // LowerTailCallViaHelper: lower a call via the tailcall helper. Morph |
2136 | // has already inserted tailcall helper special arguments. This function |
2137 | // inserts actual data for some placeholders. |
2138 | // |
2139 | // For ARM32, AMD64, lower |
2140 | // tail.call(void* copyRoutine, void* dummyArg, ...) |
2141 | // as |
2142 | // Jit_TailCall(void* copyRoutine, void* callTarget, ...) |
2143 | // |
2144 | // For x86, lower |
2145 | // tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg) |
2146 | // as |
2147 | // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* |
2148 | // callTarget) |
2149 | // Note that the special arguments are on the stack, whereas the function arguments follow the normal convention. |
2150 | // |
2151 | // Also inserts PInvoke method epilog if required. |
2152 | // |
2153 | // Arguments: |
2154 | // call - The call node |
2155 | // callTarget - The real call target. This is used to replace the dummyArg during lowering. |
2156 | // |
2157 | // Return Value: |
2158 | // Returns control expression tree for making a call to helper Jit_TailCall. |
2159 | // |
2160 | GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget) |
2161 | { |
2162 | // Tail call restrictions i.e. conditions under which tail prefix is ignored. |
2163 | // Most of these checks are already done by importer or fgMorphTailCall(). |
2164 | // This serves as a double sanity check. |
2165 | assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods |
2166 | assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check |
2167 | assert(!call->IsUnmanaged()); // tail calls to unamanaged methods |
2168 | assert(!comp->compLocallocUsed); // tail call from methods that also do localloc |
2169 | |
2170 | #ifdef _TARGET_AMD64_ |
2171 | assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check |
2172 | #endif // _TARGET_AMD64_ |
2173 | |
2174 | // We expect to see a call that meets the following conditions |
2175 | assert(call->IsTailCallViaHelper()); |
2176 | assert(callTarget != nullptr); |
2177 | |
2178 | // The TailCall helper call never returns to the caller and is not GC interruptible. |
2179 | // Therefore the block containing the tail call should be a GC safe point to avoid |
2180 | // GC starvation. It is legal for the block to be unmarked iff the entry block is a |
2181 | // GC safe point, as the entry block trivially dominates every reachable block. |
2182 | assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || (comp->fgFirstBB->bbFlags & BBF_GC_SAFE_POINT)); |
2183 | |
2184 | // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that |
2185 | // a method returns. This is a case of caller method has both PInvokes and tail calls. |
2186 | if (comp->info.compCallUnmanaged) |
2187 | { |
2188 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call)); |
2189 | } |
2190 | |
2191 | // Remove gtCallAddr from execution order if present. |
2192 | if (call->gtCallType == CT_INDIRECT) |
2193 | { |
2194 | assert(call->gtCallAddr != nullptr); |
2195 | |
2196 | bool isClosed; |
2197 | LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed); |
2198 | assert(isClosed); |
2199 | |
2200 | BlockRange().Remove(std::move(callAddrRange)); |
2201 | } |
2202 | |
2203 | // The callTarget tree needs to be sequenced. |
2204 | LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget); |
2205 | |
2206 | #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) |
2207 | |
2208 | // For ARM32 and AMD64, first argument is CopyRoutine and second argument is a place holder node. |
2209 | fgArgTabEntry* argEntry; |
2210 | |
2211 | #ifdef DEBUG |
2212 | argEntry = comp->gtArgEntryByArgNum(call, 0); |
2213 | assert(argEntry != nullptr); |
2214 | assert(argEntry->node->gtOper == GT_PUTARG_REG); |
2215 | GenTree* firstArg = argEntry->node->gtOp.gtOp1; |
2216 | assert(firstArg->gtOper == GT_CNS_INT); |
2217 | #endif |
2218 | |
2219 | // Replace second arg by callTarget. |
2220 | argEntry = comp->gtArgEntryByArgNum(call, 1); |
2221 | assert(argEntry != nullptr); |
2222 | assert(argEntry->node->gtOper == GT_PUTARG_REG); |
2223 | GenTree* secondArg = argEntry->node->gtOp.gtOp1; |
2224 | |
2225 | ContainCheckRange(callTargetRange); |
2226 | BlockRange().InsertAfter(secondArg, std::move(callTargetRange)); |
2227 | |
2228 | bool isClosed; |
2229 | LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed); |
2230 | assert(isClosed); |
2231 | |
2232 | BlockRange().Remove(std::move(secondArgRange)); |
2233 | |
2234 | argEntry->node->gtOp.gtOp1 = callTarget; |
2235 | |
2236 | #elif defined(_TARGET_X86_) |
2237 | |
2238 | // Verify the special args are what we expect, and replace the dummy args with real values. |
2239 | // We need to figure out the size of the outgoing stack arguments, not including the special args. |
2240 | // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes. |
2241 | // This number is exactly the next slot number in the call's argument info struct. |
2242 | unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum(); |
2243 | assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args. |
2244 | nNewStkArgsWords -= 4; |
2245 | |
2246 | unsigned numArgs = call->fgArgInfo->ArgCount(); |
2247 | |
2248 | fgArgTabEntry* argEntry; |
2249 | |
2250 | // arg 0 == callTarget. |
2251 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1); |
2252 | assert(argEntry != nullptr); |
2253 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
2254 | GenTree* arg0 = argEntry->node->gtOp.gtOp1; |
2255 | |
2256 | ContainCheckRange(callTargetRange); |
2257 | BlockRange().InsertAfter(arg0, std::move(callTargetRange)); |
2258 | |
2259 | bool isClosed; |
2260 | LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed); |
2261 | assert(isClosed); |
2262 | BlockRange().Remove(std::move(secondArgRange)); |
2263 | |
2264 | argEntry->node->gtOp.gtOp1 = callTarget; |
2265 | |
2266 | // arg 1 == flags |
2267 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2); |
2268 | assert(argEntry != nullptr); |
2269 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
2270 | GenTree* arg1 = argEntry->node->gtOp.gtOp1; |
2271 | assert(arg1->gtOper == GT_CNS_INT); |
2272 | |
2273 | ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX |
2274 | (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag |
2275 | arg1->gtIntCon.gtIconVal = tailCallHelperFlags; |
2276 | |
2277 | // arg 2 == numberOfNewStackArgsWords |
2278 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3); |
2279 | assert(argEntry != nullptr); |
2280 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
2281 | GenTree* arg2 = argEntry->node->gtOp.gtOp1; |
2282 | assert(arg2->gtOper == GT_CNS_INT); |
2283 | |
2284 | arg2->gtIntCon.gtIconVal = nNewStkArgsWords; |
2285 | |
2286 | #ifdef DEBUG |
2287 | // arg 3 == numberOfOldStackArgsWords |
2288 | argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4); |
2289 | assert(argEntry != nullptr); |
2290 | assert(argEntry->node->gtOper == GT_PUTARG_STK); |
2291 | GenTree* arg3 = argEntry->node->gtOp.gtOp1; |
2292 | assert(arg3->gtOper == GT_CNS_INT); |
2293 | #endif // DEBUG |
2294 | |
2295 | #else |
2296 | NYI("LowerTailCallViaHelper" ); |
2297 | #endif // _TARGET_* |
2298 | |
2299 | // Transform this call node into a call to Jit tail call helper. |
2300 | call->gtCallType = CT_HELPER; |
2301 | call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL); |
2302 | call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; |
2303 | |
2304 | // Lower this as if it were a pure helper call. |
2305 | call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER); |
2306 | GenTree* result = LowerDirectCall(call); |
2307 | |
2308 | // Now add back tail call flags for identifying this node as tail call dispatched via helper. |
2309 | call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; |
2310 | |
2311 | #ifdef PROFILING_SUPPORTED |
2312 | // Insert profiler tail call hook if needed. |
2313 | // Since we don't know the insertion point, pass null for second param. |
2314 | if (comp->compIsProfilerHookNeeded()) |
2315 | { |
2316 | InsertProfTailCallHook(call, nullptr); |
2317 | } |
2318 | #endif // PROFILING_SUPPORTED |
2319 | |
2320 | assert(call->IsTailCallViaHelper()); |
2321 | |
2322 | return result; |
2323 | } |
2324 | |
2325 | #ifndef _TARGET_64BIT_ |
2326 | //------------------------------------------------------------------------ |
2327 | // Lowering::DecomposeLongCompare: Decomposes a TYP_LONG compare node. |
2328 | // |
2329 | // Arguments: |
2330 | // cmp - the compare node |
2331 | // |
2332 | // Return Value: |
2333 | // The next node to lower. |
2334 | // |
2335 | // Notes: |
2336 | // This is done during lowering because DecomposeLongs handles only nodes |
2337 | // that produce TYP_LONG values. Compare nodes may consume TYP_LONG values |
2338 | // but produce TYP_INT values. |
2339 | // |
2340 | GenTree* Lowering::DecomposeLongCompare(GenTree* cmp) |
2341 | { |
2342 | assert(cmp->gtGetOp1()->TypeGet() == TYP_LONG); |
2343 | |
2344 | GenTree* src1 = cmp->gtGetOp1(); |
2345 | GenTree* src2 = cmp->gtGetOp2(); |
2346 | assert(src1->OperIs(GT_LONG)); |
2347 | assert(src2->OperIs(GT_LONG)); |
2348 | GenTree* loSrc1 = src1->gtGetOp1(); |
2349 | GenTree* hiSrc1 = src1->gtGetOp2(); |
2350 | GenTree* loSrc2 = src2->gtGetOp1(); |
2351 | GenTree* hiSrc2 = src2->gtGetOp2(); |
2352 | BlockRange().Remove(src1); |
2353 | BlockRange().Remove(src2); |
2354 | |
2355 | genTreeOps condition = cmp->OperGet(); |
2356 | GenTree* loCmp; |
2357 | GenTree* hiCmp; |
2358 | |
2359 | if (cmp->OperIs(GT_EQ, GT_NE)) |
2360 | { |
2361 | // |
2362 | // Transform (x EQ|NE y) into (((x.lo XOR y.lo) OR (x.hi XOR y.hi)) EQ|NE 0). If y is 0 then this can |
2363 | // be reduced to just ((x.lo OR x.hi) EQ|NE 0). The OR is expected to set the condition flags so we |
2364 | // don't need to generate a redundant compare against 0, we only generate a SETCC|JCC instruction. |
2365 | // |
2366 | // XOR is used rather than SUB because it is commutative and thus allows swapping the operands when |
2367 | // the first happens to be a constant. Usually only the second compare operand is a constant but it's |
2368 | // still possible to have a constant on the left side. For example, when src1 is a uint->ulong cast |
2369 | // then hiSrc1 would be 0. |
2370 | // |
2371 | |
2372 | if (loSrc1->OperIs(GT_CNS_INT)) |
2373 | { |
2374 | std::swap(loSrc1, loSrc2); |
2375 | } |
2376 | |
2377 | if (loSrc2->IsIntegralConst(0)) |
2378 | { |
2379 | BlockRange().Remove(loSrc2); |
2380 | loCmp = loSrc1; |
2381 | } |
2382 | else |
2383 | { |
2384 | loCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, loSrc1, loSrc2); |
2385 | BlockRange().InsertBefore(cmp, loCmp); |
2386 | ContainCheckBinary(loCmp->AsOp()); |
2387 | } |
2388 | |
2389 | if (hiSrc1->OperIs(GT_CNS_INT)) |
2390 | { |
2391 | std::swap(hiSrc1, hiSrc2); |
2392 | } |
2393 | |
2394 | if (hiSrc2->IsIntegralConst(0)) |
2395 | { |
2396 | BlockRange().Remove(hiSrc2); |
2397 | hiCmp = hiSrc1; |
2398 | } |
2399 | else |
2400 | { |
2401 | hiCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, hiSrc1, hiSrc2); |
2402 | BlockRange().InsertBefore(cmp, hiCmp); |
2403 | ContainCheckBinary(hiCmp->AsOp()); |
2404 | } |
2405 | |
2406 | hiCmp = comp->gtNewOperNode(GT_OR, TYP_INT, loCmp, hiCmp); |
2407 | BlockRange().InsertBefore(cmp, hiCmp); |
2408 | ContainCheckBinary(hiCmp->AsOp()); |
2409 | } |
2410 | else |
2411 | { |
2412 | assert(cmp->OperIs(GT_LT, GT_LE, GT_GE, GT_GT)); |
2413 | |
2414 | // |
2415 | // If the compare is signed then (x LT|GE y) can be transformed into ((x SUB y) LT|GE 0). |
2416 | // If the compare is unsigned we can still use SUB but we need to check the Carry flag, |
2417 | // not the actual result. In both cases we can simply check the appropiate condition flags |
2418 | // and ignore the actual result: |
2419 | // SUB_LO loSrc1, loSrc2 |
2420 | // SUB_HI hiSrc1, hiSrc2 |
2421 | // SETCC|JCC (signed|unsigned LT|GE) |
2422 | // If loSrc2 happens to be 0 then the first SUB can be eliminated and the second one can |
2423 | // be turned into a CMP because the first SUB would have set carry to 0. This effectively |
2424 | // transforms a long compare against 0 into an int compare of the high part against 0. |
2425 | // |
2426 | // (x LE|GT y) can to be transformed into ((x SUB y) LE|GT 0) but checking that a long value |
2427 | // is greater than 0 is not so easy. We need to turn this into a positive/negative check |
2428 | // like the one we get for LT|GE compares, this can be achieved by swapping the compare: |
2429 | // (x LE|GT y) becomes (y GE|LT x) |
2430 | // |
2431 | // Having to swap operands is problematic when the second operand is a constant. The constant |
2432 | // moves to the first operand where it cannot be contained and thus needs a register. This can |
2433 | // be avoided by changing the constant such that LE|GT becomes LT|GE: |
2434 | // (x LE|GT 41) becomes (x LT|GE 42) |
2435 | // |
2436 | |
2437 | if (cmp->OperIs(GT_LE, GT_GT)) |
2438 | { |
2439 | bool mustSwap = true; |
2440 | |
2441 | if (loSrc2->OperIs(GT_CNS_INT) && hiSrc2->OperIs(GT_CNS_INT)) |
2442 | { |
2443 | uint32_t loValue = static_cast<uint32_t>(loSrc2->AsIntCon()->IconValue()); |
2444 | uint32_t hiValue = static_cast<uint32_t>(hiSrc2->AsIntCon()->IconValue()); |
2445 | uint64_t value = static_cast<uint64_t>(loValue) | (static_cast<uint64_t>(hiValue) << 32); |
2446 | uint64_t maxValue = cmp->IsUnsigned() ? UINT64_MAX : INT64_MAX; |
2447 | |
2448 | if (value != maxValue) |
2449 | { |
2450 | value++; |
2451 | loValue = value & UINT32_MAX; |
2452 | hiValue = (value >> 32) & UINT32_MAX; |
2453 | loSrc2->AsIntCon()->SetIconValue(loValue); |
2454 | hiSrc2->AsIntCon()->SetIconValue(hiValue); |
2455 | |
2456 | condition = cmp->OperIs(GT_LE) ? GT_LT : GT_GE; |
2457 | mustSwap = false; |
2458 | } |
2459 | } |
2460 | |
2461 | if (mustSwap) |
2462 | { |
2463 | std::swap(loSrc1, loSrc2); |
2464 | std::swap(hiSrc1, hiSrc2); |
2465 | condition = GenTree::SwapRelop(condition); |
2466 | } |
2467 | } |
2468 | |
2469 | assert((condition == GT_LT) || (condition == GT_GE)); |
2470 | |
2471 | if (loSrc2->IsIntegralConst(0)) |
2472 | { |
2473 | BlockRange().Remove(loSrc2); |
2474 | |
2475 | // Very conservative dead code removal... but it helps. |
2476 | |
2477 | if (loSrc1->OperIs(GT_CNS_INT, GT_LCL_VAR, GT_LCL_FLD)) |
2478 | { |
2479 | BlockRange().Remove(loSrc1); |
2480 | } |
2481 | else |
2482 | { |
2483 | loSrc1->SetUnusedValue(); |
2484 | } |
2485 | |
2486 | hiCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, hiSrc1, hiSrc2); |
2487 | BlockRange().InsertBefore(cmp, hiCmp); |
2488 | ContainCheckCompare(hiCmp->AsOp()); |
2489 | } |
2490 | else |
2491 | { |
2492 | loCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, loSrc1, loSrc2); |
2493 | hiCmp = comp->gtNewOperNode(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2); |
2494 | BlockRange().InsertBefore(cmp, loCmp, hiCmp); |
2495 | ContainCheckCompare(loCmp->AsOp()); |
2496 | ContainCheckBinary(hiCmp->AsOp()); |
2497 | |
2498 | // |
2499 | // Try to move the first SUB_HI operands right in front of it, this allows using |
2500 | // a single temporary register instead of 2 (one for CMP and one for SUB_HI). Do |
2501 | // this only for locals as they won't change condition flags. Note that we could |
2502 | // move constants (except 0 which generates XOR reg, reg) but it's extremly rare |
2503 | // to have a constant as the first operand. |
2504 | // |
2505 | |
2506 | if (hiSrc1->OperIs(GT_LCL_VAR, GT_LCL_FLD)) |
2507 | { |
2508 | BlockRange().Remove(hiSrc1); |
2509 | BlockRange().InsertBefore(hiCmp, hiSrc1); |
2510 | } |
2511 | } |
2512 | } |
2513 | |
2514 | hiCmp->gtFlags |= GTF_SET_FLAGS; |
2515 | if (hiCmp->IsValue()) |
2516 | { |
2517 | hiCmp->SetUnusedValue(); |
2518 | } |
2519 | |
2520 | LIR::Use cmpUse; |
2521 | if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE)) |
2522 | { |
2523 | BlockRange().Remove(cmp); |
2524 | |
2525 | GenTree* jcc = cmpUse.User(); |
2526 | jcc->gtOp.gtOp1 = nullptr; |
2527 | jcc->ChangeOper(GT_JCC); |
2528 | jcc->gtFlags |= (cmp->gtFlags & GTF_UNSIGNED) | GTF_USE_FLAGS; |
2529 | jcc->AsCC()->gtCondition = condition; |
2530 | } |
2531 | else |
2532 | { |
2533 | cmp->gtOp.gtOp1 = nullptr; |
2534 | cmp->gtOp.gtOp2 = nullptr; |
2535 | cmp->ChangeOper(GT_SETCC); |
2536 | cmp->gtFlags |= GTF_USE_FLAGS; |
2537 | cmp->AsCC()->gtCondition = condition; |
2538 | } |
2539 | |
2540 | return cmp->gtNext; |
2541 | } |
2542 | #endif // !_TARGET_64BIT_ |
2543 | |
2544 | //------------------------------------------------------------------------ |
2545 | // Lowering::OptimizeConstCompare: Performs various "compare with const" optimizations. |
2546 | // |
2547 | // Arguments: |
2548 | // cmp - the compare node |
2549 | // |
2550 | // Return Value: |
2551 | // The original compare node if lowering should proceed as usual or the next node |
2552 | // to lower if the compare node was changed in such a way that lowering is no |
2553 | // longer needed. |
2554 | // |
2555 | // Notes: |
2556 | // - Narrow operands to enable memory operand containment (XARCH specific). |
2557 | // - Transform cmp(and(x, y), 0) into test(x, y) (XARCH/Arm64 specific but could |
2558 | // be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added). |
2559 | // - Transform TEST(x, LSH(1, y)) into BT(x, y) (XARCH specific) |
2560 | // - Transform RELOP(OP, 0) into SETCC(OP) or JCC(OP) if OP can set the |
2561 | // condition flags appropriately (XARCH/ARM64 specific but could be extended |
2562 | // to ARM32 as well if ARM32 codegen supports GTF_SET_FLAGS). |
2563 | // |
2564 | GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) |
2565 | { |
2566 | assert(cmp->gtGetOp2()->IsIntegralConst()); |
2567 | |
2568 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
2569 | GenTree* op1 = cmp->gtGetOp1(); |
2570 | var_types op1Type = op1->TypeGet(); |
2571 | GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon(); |
2572 | ssize_t op2Value = op2->IconValue(); |
2573 | |
2574 | #ifdef _TARGET_XARCH_ |
2575 | if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && genSmallTypeCanRepresentValue(op1Type, op2Value)) |
2576 | { |
2577 | // |
2578 | // If op1's type is small then try to narrow op2 so it has the same type as op1. |
2579 | // Small types are usually used by memory loads and if both compare operands have |
2580 | // the same type then the memory load can be contained. In certain situations |
2581 | // (e.g "cmp ubyte, 200") we also get a smaller instruction encoding. |
2582 | // |
2583 | |
2584 | op2->gtType = op1Type; |
2585 | } |
2586 | else |
2587 | #endif |
2588 | if (op1->OperIs(GT_CAST) && !op1->gtOverflow()) |
2589 | { |
2590 | GenTreeCast* cast = op1->AsCast(); |
2591 | var_types castToType = cast->CastToType(); |
2592 | GenTree* castOp = cast->gtGetOp1(); |
2593 | |
2594 | if (((castToType == TYP_BOOL) || (castToType == TYP_UBYTE)) && FitsIn<UINT8>(op2Value)) |
2595 | { |
2596 | // |
2597 | // Since we're going to remove the cast we need to be able to narrow the cast operand |
2598 | // to the cast type. This can be done safely only for certain opers (e.g AND, OR, XOR). |
2599 | // Some opers just can't be narrowed (e.g DIV, MUL) while other could be narrowed but |
2600 | // doing so would produce incorrect results (e.g. RSZ, RSH). |
2601 | // |
2602 | // The below list of handled opers is conservative but enough to handle the most common |
2603 | // situations. In particular this include CALL, sometimes the JIT unnecessarilly widens |
2604 | // the result of bool returning calls. |
2605 | // |
2606 | bool removeCast = |
2607 | #ifdef _TARGET_ARM64_ |
2608 | (op2Value == 0) && cmp->OperIs(GT_EQ, GT_NE, GT_GT) && |
2609 | #endif |
2610 | (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() |
2611 | #ifdef _TARGET_XARCH_ |
2612 | || IsContainableMemoryOp(castOp) |
2613 | #endif |
2614 | ); |
2615 | |
2616 | if (removeCast) |
2617 | { |
2618 | assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation |
2619 | |
2620 | #ifdef _TARGET_ARM64_ |
2621 | bool cmpEq = cmp->OperIs(GT_EQ); |
2622 | |
2623 | cmp->SetOperRaw(cmpEq ? GT_TEST_EQ : GT_TEST_NE); |
2624 | op2->SetIconValue(0xff); |
2625 | op2->gtType = castOp->gtType; |
2626 | #else |
2627 | castOp->gtType = castToType; |
2628 | op2->gtType = castToType; |
2629 | #endif |
2630 | // If we have any contained memory ops on castOp, they must now not be contained. |
2631 | if (castOp->OperIsLogical()) |
2632 | { |
2633 | GenTree* op1 = castOp->gtGetOp1(); |
2634 | if ((op1 != nullptr) && !op1->IsCnsIntOrI()) |
2635 | { |
2636 | op1->ClearContained(); |
2637 | } |
2638 | GenTree* op2 = castOp->gtGetOp2(); |
2639 | if ((op2 != nullptr) && !op2->IsCnsIntOrI()) |
2640 | { |
2641 | op2->ClearContained(); |
2642 | } |
2643 | } |
2644 | cmp->gtOp.gtOp1 = castOp; |
2645 | |
2646 | BlockRange().Remove(cast); |
2647 | } |
2648 | } |
2649 | } |
2650 | else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE)) |
2651 | { |
2652 | // |
2653 | // Transform ((x AND y) EQ|NE 0) into (x TEST_EQ|TEST_NE y) when possible. |
2654 | // |
2655 | |
2656 | GenTree* andOp1 = op1->gtGetOp1(); |
2657 | GenTree* andOp2 = op1->gtGetOp2(); |
2658 | |
2659 | if (op2Value != 0) |
2660 | { |
2661 | // |
2662 | // If we don't have a 0 compare we can get one by transforming ((x AND mask) EQ|NE mask) |
2663 | // into ((x AND mask) NE|EQ 0) when mask is a single bit. |
2664 | // |
2665 | |
2666 | if (isPow2(static_cast<size_t>(op2Value)) && andOp2->IsIntegralConst(op2Value)) |
2667 | { |
2668 | op2Value = 0; |
2669 | op2->SetIconValue(0); |
2670 | cmp->SetOperRaw(GenTree::ReverseRelop(cmp->OperGet())); |
2671 | } |
2672 | } |
2673 | |
2674 | if (op2Value == 0) |
2675 | { |
2676 | BlockRange().Remove(op1); |
2677 | BlockRange().Remove(op2); |
2678 | |
2679 | cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE); |
2680 | cmp->gtOp.gtOp1 = andOp1; |
2681 | cmp->gtOp.gtOp2 = andOp2; |
2682 | // We will re-evaluate containment below |
2683 | andOp1->ClearContained(); |
2684 | andOp2->ClearContained(); |
2685 | |
2686 | #ifdef _TARGET_XARCH_ |
2687 | if (IsContainableMemoryOp(andOp1) && andOp2->IsIntegralConst()) |
2688 | { |
2689 | // |
2690 | // For "test" we only care about the bits that are set in the second operand (mask). |
2691 | // If the mask fits in a small type then we can narrow both operands to generate a "test" |
2692 | // instruction with a smaller encoding ("test" does not have a r/m32, imm8 form) and avoid |
2693 | // a widening load in some cases. |
2694 | // |
2695 | // For 16 bit operands we narrow only if the memory operand is already 16 bit. This matches |
2696 | // the behavior of a previous implementation and avoids adding more cases where we generate |
2697 | // 16 bit instructions that require a length changing prefix (0x66). These suffer from |
2698 | // significant decoder stalls on Intel CPUs. |
2699 | // |
2700 | // We could also do this for 64 bit masks that fit into 32 bit but it doesn't help. |
2701 | // In such cases morph narrows down the existing GT_AND by inserting a cast between it and |
2702 | // the memory operand so we'd need to add more code to recognize and eliminate that cast. |
2703 | // |
2704 | |
2705 | size_t mask = static_cast<size_t>(andOp2->AsIntCon()->IconValue()); |
2706 | |
2707 | if (FitsIn<UINT8>(mask)) |
2708 | { |
2709 | andOp1->gtType = TYP_UBYTE; |
2710 | andOp2->gtType = TYP_UBYTE; |
2711 | } |
2712 | else if (FitsIn<UINT16>(mask) && genTypeSize(andOp1) == 2) |
2713 | { |
2714 | andOp1->gtType = TYP_USHORT; |
2715 | andOp2->gtType = TYP_USHORT; |
2716 | } |
2717 | } |
2718 | #endif |
2719 | } |
2720 | } |
2721 | |
2722 | if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) |
2723 | { |
2724 | #ifdef _TARGET_XARCH_ |
2725 | // |
2726 | // Transform TEST_EQ|NE(x, LSH(1, y)) into BT(x, y) when possible. Using BT |
2727 | // results in smaller and faster code. It also doesn't have special register |
2728 | // requirements, unlike LSH that requires the shift count to be in ECX. |
2729 | // Note that BT has the same behavior as LSH when the bit index exceeds the |
2730 | // operand bit size - it uses (bit_index MOD bit_size). |
2731 | // |
2732 | |
2733 | GenTree* lsh = cmp->gtGetOp2(); |
2734 | LIR::Use cmpUse; |
2735 | |
2736 | if (lsh->OperIs(GT_LSH) && varTypeIsIntOrI(lsh->TypeGet()) && lsh->gtGetOp1()->IsIntegralConst(1) && |
2737 | BlockRange().TryGetUse(cmp, &cmpUse)) |
2738 | { |
2739 | genTreeOps condition = cmp->OperIs(GT_TEST_NE) ? GT_LT : GT_GE; |
2740 | |
2741 | cmp->SetOper(GT_BT); |
2742 | cmp->gtType = TYP_VOID; |
2743 | cmp->gtFlags |= GTF_SET_FLAGS; |
2744 | cmp->gtOp.gtOp2 = lsh->gtGetOp2(); |
2745 | cmp->gtGetOp2()->ClearContained(); |
2746 | |
2747 | BlockRange().Remove(lsh->gtGetOp1()); |
2748 | BlockRange().Remove(lsh); |
2749 | |
2750 | GenTreeCC* cc; |
2751 | |
2752 | if (cmpUse.User()->OperIs(GT_JTRUE)) |
2753 | { |
2754 | cmpUse.User()->ChangeOper(GT_JCC); |
2755 | cc = cmpUse.User()->AsCC(); |
2756 | cc->gtCondition = condition; |
2757 | } |
2758 | else |
2759 | { |
2760 | cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT); |
2761 | BlockRange().InsertAfter(cmp, cc); |
2762 | cmpUse.ReplaceWith(comp, cc); |
2763 | } |
2764 | |
2765 | cc->gtFlags |= GTF_USE_FLAGS | GTF_UNSIGNED; |
2766 | |
2767 | return cmp->gtNext; |
2768 | } |
2769 | #endif // _TARGET_XARCH_ |
2770 | } |
2771 | else if (cmp->OperIs(GT_EQ, GT_NE)) |
2772 | { |
2773 | GenTree* op1 = cmp->gtGetOp1(); |
2774 | GenTree* op2 = cmp->gtGetOp2(); |
2775 | |
2776 | // TODO-CQ: right now the below peep is inexpensive and gets the benefit in most |
2777 | // cases because in majority of cases op1, op2 and cmp would be in that order in |
2778 | // execution. In general we should be able to check that all the nodes that come |
2779 | // after op1 do not modify the flags so that it is safe to avoid generating a |
2780 | // test instruction. |
2781 | |
2782 | if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && |
2783 | #ifdef _TARGET_XARCH_ |
2784 | op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)) |
2785 | #else // _TARGET_ARM64_ |
2786 | op1->OperIs(GT_AND, GT_ADD, GT_SUB)) |
2787 | #endif |
2788 | { |
2789 | op1->gtFlags |= GTF_SET_FLAGS; |
2790 | op1->SetUnusedValue(); |
2791 | |
2792 | BlockRange().Remove(op2); |
2793 | |
2794 | GenTree* next = cmp->gtNext; |
2795 | GenTree* cc; |
2796 | genTreeOps ccOp; |
2797 | LIR::Use cmpUse; |
2798 | |
2799 | // Fast check for the common case - relop used by a JTRUE that immediately follows it. |
2800 | if ((next != nullptr) && next->OperIs(GT_JTRUE) && (next->gtGetOp1() == cmp)) |
2801 | { |
2802 | cc = next; |
2803 | ccOp = GT_JCC; |
2804 | next = nullptr; |
2805 | BlockRange().Remove(cmp); |
2806 | } |
2807 | else if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE)) |
2808 | { |
2809 | cc = cmpUse.User(); |
2810 | ccOp = GT_JCC; |
2811 | next = nullptr; |
2812 | BlockRange().Remove(cmp); |
2813 | } |
2814 | else // The relop is not used by a JTRUE or it is not used at all. |
2815 | { |
2816 | // Transform the relop node it into a SETCC. If it's not used we could remove |
2817 | // it completely but that means doing more work to handle a rare case. |
2818 | cc = cmp; |
2819 | ccOp = GT_SETCC; |
2820 | } |
2821 | |
2822 | genTreeOps condition = cmp->OperGet(); |
2823 | cc->ChangeOper(ccOp); |
2824 | cc->AsCC()->gtCondition = condition; |
2825 | cc->gtFlags |= GTF_USE_FLAGS | (cmp->gtFlags & GTF_UNSIGNED); |
2826 | |
2827 | return next; |
2828 | } |
2829 | } |
2830 | #endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
2831 | |
2832 | return cmp; |
2833 | } |
2834 | |
2835 | //------------------------------------------------------------------------ |
2836 | // Lowering::LowerCompare: Lowers a compare node. |
2837 | // |
2838 | // Arguments: |
2839 | // cmp - the compare node |
2840 | // |
2841 | // Return Value: |
2842 | // The next node to lower. |
2843 | // |
2844 | GenTree* Lowering::LowerCompare(GenTree* cmp) |
2845 | { |
2846 | #ifndef _TARGET_64BIT_ |
2847 | if (cmp->gtGetOp1()->TypeGet() == TYP_LONG) |
2848 | { |
2849 | return DecomposeLongCompare(cmp); |
2850 | } |
2851 | #endif |
2852 | |
2853 | if (cmp->gtGetOp2()->IsIntegralConst() && !comp->opts.MinOpts()) |
2854 | { |
2855 | GenTree* next = OptimizeConstCompare(cmp); |
2856 | |
2857 | // If OptimizeConstCompare return the compare node as "next" then we need to continue lowering. |
2858 | if (next != cmp) |
2859 | { |
2860 | return next; |
2861 | } |
2862 | } |
2863 | |
2864 | #ifdef _TARGET_XARCH_ |
2865 | if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet()) |
2866 | { |
2867 | if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet())) |
2868 | { |
2869 | // |
2870 | // If both operands have the same type then codegen will use the common operand type to |
2871 | // determine the instruction type. For small types this would result in performing a |
2872 | // signed comparison of two small unsigned values without zero extending them to TYP_INT |
2873 | // which is incorrect. Note that making the comparison unsigned doesn't imply that codegen |
2874 | // has to generate a small comparison, it can still correctly generate a TYP_INT comparison. |
2875 | // |
2876 | |
2877 | cmp->gtFlags |= GTF_UNSIGNED; |
2878 | } |
2879 | } |
2880 | #endif // _TARGET_XARCH_ |
2881 | ContainCheckCompare(cmp->AsOp()); |
2882 | return cmp->gtNext; |
2883 | } |
2884 | |
2885 | //------------------------------------------------------------------------ |
2886 | // Lowering::LowerJTrue: Lowers a JTRUE node. |
2887 | // |
2888 | // Arguments: |
2889 | // jtrue - the JTRUE node |
2890 | // |
2891 | // Return Value: |
2892 | // The next node to lower (usually nullptr). |
2893 | // |
2894 | // Notes: |
2895 | // On ARM64 this may remove the JTRUE node and transform its associated |
2896 | // relop into a JCMP node. |
2897 | // |
2898 | GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) |
2899 | { |
2900 | #ifdef _TARGET_ARM64_ |
2901 | GenTree* relop = jtrue->gtGetOp1(); |
2902 | GenTree* relopOp2 = relop->gtOp.gtGetOp2(); |
2903 | |
2904 | if ((relop->gtNext == jtrue) && relopOp2->IsCnsIntOrI()) |
2905 | { |
2906 | bool useJCMP = false; |
2907 | unsigned flags = 0; |
2908 | |
2909 | if (relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0)) |
2910 | { |
2911 | // Codegen will use cbz or cbnz in codegen which do not affect the flag register |
2912 | flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : 0; |
2913 | useJCMP = true; |
2914 | } |
2915 | else if (relop->OperIs(GT_TEST_EQ, GT_TEST_NE) && isPow2(relopOp2->AsIntCon()->IconValue())) |
2916 | { |
2917 | // Codegen will use tbz or tbnz in codegen which do not affect the flag register |
2918 | flags = GTF_JCMP_TST | (relop->OperIs(GT_TEST_EQ) ? GTF_JCMP_EQ : 0); |
2919 | useJCMP = true; |
2920 | } |
2921 | |
2922 | if (useJCMP) |
2923 | { |
2924 | relop->SetOper(GT_JCMP); |
2925 | relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ); |
2926 | relop->gtFlags |= flags; |
2927 | relop->gtType = TYP_VOID; |
2928 | |
2929 | relopOp2->SetContained(); |
2930 | |
2931 | BlockRange().Remove(jtrue); |
2932 | |
2933 | assert(relop->gtNext == nullptr); |
2934 | return nullptr; |
2935 | } |
2936 | } |
2937 | #endif // _TARGET_ARM64_ |
2938 | |
2939 | ContainCheckJTrue(jtrue); |
2940 | |
2941 | assert(jtrue->gtNext == nullptr); |
2942 | return nullptr; |
2943 | } |
2944 | |
2945 | // Lower "jmp <method>" tail call to insert PInvoke method epilog if required. |
2946 | void Lowering::LowerJmpMethod(GenTree* jmp) |
2947 | { |
2948 | assert(jmp->OperGet() == GT_JMP); |
2949 | |
2950 | JITDUMP("lowering GT_JMP\n" ); |
2951 | DISPNODE(jmp); |
2952 | JITDUMP("============" ); |
2953 | |
2954 | // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that |
2955 | // a method returns. |
2956 | if (comp->info.compCallUnmanaged) |
2957 | { |
2958 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp)); |
2959 | } |
2960 | } |
2961 | |
2962 | // Lower GT_RETURN node to insert PInvoke method epilog if required. |
2963 | void Lowering::LowerRet(GenTree* ret) |
2964 | { |
2965 | assert(ret->OperGet() == GT_RETURN); |
2966 | |
2967 | JITDUMP("lowering GT_RETURN\n" ); |
2968 | DISPNODE(ret); |
2969 | JITDUMP("============" ); |
2970 | |
2971 | #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD) |
2972 | GenTreeUnOp* const unOp = ret->AsUnOp(); |
2973 | if ((unOp->TypeGet() == TYP_LONG) && (unOp->gtOp1->TypeGet() == TYP_SIMD8)) |
2974 | { |
2975 | GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, unOp->gtOp1, nullptr); |
2976 | unOp->gtOp1 = bitcast; |
2977 | BlockRange().InsertBefore(unOp, bitcast); |
2978 | } |
2979 | #endif // _TARGET_AMD64_ |
2980 | |
2981 | // Method doing PInvokes has exactly one return block unless it has tail calls. |
2982 | if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB)) |
2983 | { |
2984 | InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret)); |
2985 | } |
2986 | ContainCheckRet(ret->AsOp()); |
2987 | } |
2988 | |
2989 | GenTree* Lowering::LowerDirectCall(GenTreeCall* call) |
2990 | { |
2991 | noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER); |
2992 | |
2993 | // Don't support tail calling helper methods. |
2994 | // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper. |
2995 | noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC); |
2996 | |
2997 | // Non-virtual direct/indirect calls: Work out if the address of the |
2998 | // call is known at JIT time. If not it is either an indirect call |
2999 | // or the address must be accessed via an single/double indirection. |
3000 | |
3001 | void* addr; |
3002 | InfoAccessType accessType; |
3003 | CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd); |
3004 | |
3005 | #ifdef FEATURE_READYTORUN_COMPILER |
3006 | if (call->gtEntryPoint.addr != nullptr) |
3007 | { |
3008 | accessType = call->gtEntryPoint.accessType; |
3009 | addr = call->gtEntryPoint.addr; |
3010 | } |
3011 | else |
3012 | #endif |
3013 | if (call->gtCallType == CT_HELPER) |
3014 | { |
3015 | noway_assert(helperNum != CORINFO_HELP_UNDEF); |
3016 | |
3017 | // the convention on getHelperFtn seems to be (it's not documented) |
3018 | // that it returns an address or if it returns null, pAddr is set to |
3019 | // another address, which requires an indirection |
3020 | void* pAddr; |
3021 | addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr); |
3022 | |
3023 | if (addr != nullptr) |
3024 | { |
3025 | assert(pAddr == nullptr); |
3026 | accessType = IAT_VALUE; |
3027 | } |
3028 | else |
3029 | { |
3030 | accessType = IAT_PVALUE; |
3031 | addr = pAddr; |
3032 | } |
3033 | } |
3034 | else |
3035 | { |
3036 | noway_assert(helperNum == CORINFO_HELP_UNDEF); |
3037 | |
3038 | CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; |
3039 | |
3040 | if (call->IsSameThis()) |
3041 | { |
3042 | aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); |
3043 | } |
3044 | |
3045 | if (!call->NeedsNullCheck()) |
3046 | { |
3047 | aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); |
3048 | } |
3049 | |
3050 | CORINFO_CONST_LOOKUP addrInfo; |
3051 | comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags); |
3052 | |
3053 | accessType = addrInfo.accessType; |
3054 | addr = addrInfo.addr; |
3055 | } |
3056 | |
3057 | GenTree* result = nullptr; |
3058 | switch (accessType) |
3059 | { |
3060 | case IAT_VALUE: |
3061 | // Non-virtual direct call to known address |
3062 | if (!IsCallTargetInRange(addr) || call->IsTailCall()) |
3063 | { |
3064 | result = AddrGen(addr); |
3065 | } |
3066 | else |
3067 | { |
3068 | // a direct call within range of hardware relative call instruction |
3069 | // stash the address for codegen |
3070 | call->gtDirectCallAddress = addr; |
3071 | } |
3072 | break; |
3073 | |
3074 | case IAT_PVALUE: |
3075 | { |
3076 | // Non-virtual direct calls to addresses accessed by |
3077 | // a single indirection. |
3078 | GenTree* cellAddr = AddrGen(addr); |
3079 | GenTree* indir = Ind(cellAddr); |
3080 | result = indir; |
3081 | break; |
3082 | } |
3083 | |
3084 | case IAT_PPVALUE: |
3085 | // Non-virtual direct calls to addresses accessed by |
3086 | // a double indirection. |
3087 | // |
3088 | // Double-indirection. Load the address into a register |
3089 | // and call indirectly through the register |
3090 | noway_assert(helperNum == CORINFO_HELP_UNDEF); |
3091 | result = AddrGen(addr); |
3092 | result = Ind(Ind(result)); |
3093 | break; |
3094 | |
3095 | case IAT_RELPVALUE: |
3096 | { |
3097 | // Non-virtual direct calls to addresses accessed by |
3098 | // a single relative indirection. |
3099 | GenTree* cellAddr = AddrGen(addr); |
3100 | GenTree* indir = Ind(cellAddr); |
3101 | result = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, indir, AddrGen(addr)); |
3102 | break; |
3103 | } |
3104 | |
3105 | default: |
3106 | noway_assert(!"Bad accessType" ); |
3107 | break; |
3108 | } |
3109 | |
3110 | return result; |
3111 | } |
3112 | |
3113 | GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call) |
3114 | { |
3115 | noway_assert(call->gtCallType == CT_USER_FUNC); |
3116 | |
3117 | assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) & |
3118 | (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)); |
3119 | |
3120 | GenTree* thisArgNode; |
3121 | if (call->IsTailCallViaHelper()) |
3122 | { |
3123 | #ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args. |
3124 | const unsigned argNum = 0; |
3125 | #else // !_TARGET_X86_ |
3126 | // In case of helper dispatched tail calls, "thisptr" will be the third arg. |
3127 | // The first two args are: real call target and addr of args copy routine. |
3128 | const unsigned argNum = 2; |
3129 | #endif // !_TARGET_X86_ |
3130 | |
3131 | fgArgTabEntry* thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum); |
3132 | thisArgNode = thisArgTabEntry->node; |
3133 | } |
3134 | else |
3135 | { |
3136 | thisArgNode = comp->gtGetThisArg(call); |
3137 | } |
3138 | |
3139 | assert(thisArgNode->gtOper == GT_PUTARG_REG); |
3140 | GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1; |
3141 | GenTree* thisExpr = originalThisExpr; |
3142 | |
3143 | // We're going to use the 'this' expression multiple times, so make a local to copy it. |
3144 | |
3145 | unsigned lclNum; |
3146 | |
3147 | #ifdef _TARGET_X86_ |
3148 | if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal()) |
3149 | { |
3150 | // For ordering purposes for the special tailcall arguments on x86, we forced the |
3151 | // 'this' pointer in this case to a local in Compiler::fgMorphTailCall(). |
3152 | // We could possibly use this case to remove copies for all architectures and non-tailcall |
3153 | // calls by creating a new lcl var or lcl field reference, as is done in the |
3154 | // LowerVirtualVtableCall() code. |
3155 | assert(originalThisExpr->OperGet() == GT_LCL_VAR); |
3156 | lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum(); |
3157 | } |
3158 | else |
3159 | #endif // _TARGET_X86_ |
3160 | { |
3161 | unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call" )); |
3162 | |
3163 | LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode); |
3164 | ReplaceWithLclVar(thisExprUse, delegateInvokeTmp); |
3165 | |
3166 | thisExpr = thisExprUse.Def(); // it's changed; reload it. |
3167 | lclNum = delegateInvokeTmp; |
3168 | } |
3169 | |
3170 | // replace original expression feeding into thisPtr with |
3171 | // [originalThis + offsetOfDelegateInstance] |
3172 | |
3173 | GenTree* newThisAddr = new (comp, GT_LEA) |
3174 | GenTreeAddrMode(TYP_BYREF, thisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance); |
3175 | |
3176 | GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr); |
3177 | |
3178 | BlockRange().InsertAfter(thisExpr, newThisAddr, newThis); |
3179 | |
3180 | thisArgNode->gtOp.gtOp1 = newThis; |
3181 | ContainCheckIndir(newThis->AsIndir()); |
3182 | |
3183 | // the control target is |
3184 | // [originalThis + firstTgtOffs] |
3185 | |
3186 | GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET); |
3187 | |
3188 | unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget; |
3189 | GenTree* result = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs); |
3190 | GenTree* callTarget = Ind(result); |
3191 | |
3192 | // don't need to sequence and insert this tree, caller will do it |
3193 | |
3194 | return callTarget; |
3195 | } |
3196 | |
3197 | GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call) |
3198 | { |
3199 | #ifdef _TARGET_X86_ |
3200 | if (call->gtCallCookie != nullptr) |
3201 | { |
3202 | NYI_X86("Morphing indirect non-virtual call with non-standard args" ); |
3203 | } |
3204 | #endif |
3205 | |
3206 | // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args. |
3207 | // Hence we should never see this type of call in lower. |
3208 | |
3209 | noway_assert(call->gtCallCookie == nullptr); |
3210 | |
3211 | return nullptr; |
3212 | } |
3213 | |
3214 | //------------------------------------------------------------------------ |
3215 | // CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke |
3216 | // epilogs to invoke a GC under a condition. The return trap checks some global |
3217 | // location (the runtime tells us where that is and how many indirections to make), |
3218 | // then, based on the result, conditionally calls a GC helper. We use a special node |
3219 | // for this because at this time (late in the compilation phases), introducing flow |
3220 | // is tedious/difficult. |
3221 | // |
3222 | // This is used for PInvoke inlining. |
3223 | // |
3224 | // Return Value: |
3225 | // Code tree to perform the action. |
3226 | // |
3227 | GenTree* Lowering::CreateReturnTrapSeq() |
3228 | { |
3229 | // The GT_RETURNTRAP node expands to this: |
3230 | // if (g_TrapReturningThreads) |
3231 | // { |
3232 | // RareDisablePreemptiveGC(); |
3233 | // } |
3234 | |
3235 | // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'. |
3236 | |
3237 | void* pAddrOfCaptureThreadGlobal = nullptr; |
3238 | LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal); |
3239 | |
3240 | GenTree* testTree; |
3241 | if (addrOfCaptureThreadGlobal != nullptr) |
3242 | { |
3243 | testTree = Ind(AddrGen(addrOfCaptureThreadGlobal)); |
3244 | } |
3245 | else |
3246 | { |
3247 | testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal))); |
3248 | } |
3249 | return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree); |
3250 | } |
3251 | |
3252 | //------------------------------------------------------------------------ |
3253 | // SetGCState: Create a tree that stores the given constant (0 or 1) into the |
3254 | // thread's GC state field. |
3255 | // |
3256 | // This is used for PInvoke inlining. |
3257 | // |
3258 | // Arguments: |
3259 | // state - constant (0 or 1) to store into the thread's GC state field. |
3260 | // |
3261 | // Return Value: |
3262 | // Code tree to perform the action. |
3263 | // |
3264 | GenTree* Lowering::SetGCState(int state) |
3265 | { |
3266 | // Thread.offsetOfGcState = 0/1 |
3267 | |
3268 | assert(state == 0 || state == 1); |
3269 | |
3270 | const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo(); |
3271 | |
3272 | GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1); |
3273 | |
3274 | GenTree* stateNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state); |
3275 | GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState); |
3276 | GenTree* storeGcState = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_BYTE, addr, stateNode); |
3277 | return storeGcState; |
3278 | } |
3279 | |
3280 | //------------------------------------------------------------------------ |
3281 | // CreateFrameLinkUpdate: Create a tree that either links or unlinks the |
3282 | // locally-allocated InlinedCallFrame from the Frame list. |
3283 | // |
3284 | // This is used for PInvoke inlining. |
3285 | // |
3286 | // Arguments: |
3287 | // action - whether to link (push) or unlink (pop) the Frame |
3288 | // |
3289 | // Return Value: |
3290 | // Code tree to perform the action. |
3291 | // |
3292 | GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action) |
3293 | { |
3294 | const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo(); |
3295 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo; |
3296 | |
3297 | GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot, |
3298 | (IL_OFFSET)-1); // cast to resolve ambiguity. |
3299 | |
3300 | // Thread->m_pFrame |
3301 | GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame); |
3302 | |
3303 | GenTree* data = nullptr; |
3304 | |
3305 | if (action == PushFrame) |
3306 | { |
3307 | // Thread->m_pFrame = &inlinedCallFrame; |
3308 | data = new (comp, GT_LCL_FLD_ADDR) |
3309 | GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr); |
3310 | } |
3311 | else |
3312 | { |
3313 | assert(action == PopFrame); |
3314 | // Thread->m_pFrame = inlinedCallFrame.m_pNext; |
3315 | |
3316 | data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, |
3317 | pInfo->inlinedCallFrameInfo.offsetOfFrameLink); |
3318 | } |
3319 | GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data); |
3320 | return storeInd; |
3321 | } |
3322 | |
3323 | //------------------------------------------------------------------------ |
3324 | // InsertPInvokeMethodProlog: Create the code that runs at the start of |
3325 | // every method that has PInvoke calls. |
3326 | // |
3327 | // Initialize the TCB local and the InlinedCallFrame object. Then link ("push") |
3328 | // the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame |
3329 | // is defined in vm/frames.h. See also vm/jitinterface.cpp for more information. |
3330 | // The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo(). |
3331 | // |
3332 | // The (current) layout is as follows: |
3333 | // |
3334 | // 64-bit 32-bit CORINFO_EE_INFO |
3335 | // offset offset field name offset when set |
3336 | // ----------------------------------------------------------------------------------------- |
3337 | // +00h +00h GS cookie offsetOfGSCookie |
3338 | // +08h +04h vptr for class InlinedCallFrame offsetOfFrameVptr method prolog |
3339 | // +10h +08h m_Next offsetOfFrameLink method prolog |
3340 | // +18h +0Ch m_Datum offsetOfCallTarget call site |
3341 | // +20h n/a m_StubSecretArg not set by JIT |
3342 | // +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method |
3343 | // prolog; |
3344 | // non-x86: method prolog (SP remains |
3345 | // constant in function, after prolog: no |
3346 | // localloc and PInvoke in same function) |
3347 | // +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site |
3348 | // +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT |
3349 | // +1Ch JIT retval spill area (int) before call_gc ??? |
3350 | // +20h JIT retval spill area (long) before call_gc ??? |
3351 | // +24h Saved value of EBP method prolog ??? |
3352 | // |
3353 | // Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points |
3354 | // to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before* |
3355 | // the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location, |
3356 | // and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie. |
3357 | // |
3358 | // Return Value: |
3359 | // none |
3360 | // |
3361 | void Lowering::InsertPInvokeMethodProlog() |
3362 | { |
3363 | noway_assert(comp->info.compCallUnmanaged); |
3364 | noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); |
3365 | |
3366 | if (comp->opts.ShouldUsePInvokeHelpers()) |
3367 | { |
3368 | return; |
3369 | } |
3370 | |
3371 | JITDUMP("======= Inserting PInvoke method prolog\n" ); |
3372 | |
3373 | // The first BB must be a scratch BB in order for us to be able to safely insert the P/Invoke prolog. |
3374 | assert(comp->fgFirstBBisScratch()); |
3375 | |
3376 | LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB); |
3377 | |
3378 | const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo(); |
3379 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo; |
3380 | |
3381 | // First arg: &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr |
3382 | |
3383 | GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR) |
3384 | GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr); |
3385 | |
3386 | // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list: |
3387 | // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg); |
3388 | // for x86, don't pass the secretArg. |
3389 | CLANG_FORMAT_COMMENT_ANCHOR; |
3390 | |
3391 | #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) |
3392 | GenTreeArgList* argList = comp->gtNewArgList(frameAddr); |
3393 | #else |
3394 | GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM)); |
3395 | #endif |
3396 | |
3397 | GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, argList); |
3398 | |
3399 | // some sanity checks on the frame list root vardsc |
3400 | LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot]; |
3401 | noway_assert(!varDsc->lvIsParam); |
3402 | noway_assert(varDsc->lvType == TYP_I_IMPL); |
3403 | |
3404 | GenTree* store = |
3405 | new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot, |
3406 | (IL_OFFSET)-1); // cast to resolve ambiguity. |
3407 | store->gtOp.gtOp1 = call; |
3408 | store->gtFlags |= GTF_VAR_DEF; |
3409 | |
3410 | GenTree* const insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode(); |
3411 | |
3412 | comp->fgMorphTree(store); |
3413 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store)); |
3414 | DISPTREERANGE(firstBlockRange, store); |
3415 | |
3416 | #if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_) |
3417 | // For x86, this step is done at the call site (due to stack pointer not being static in the function). |
3418 | // For arm32, CallSiteSP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME. |
3419 | |
3420 | // -------------------------------------------------------- |
3421 | // InlinedCallFrame.m_pCallSiteSP = @RSP; |
3422 | |
3423 | GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD) |
3424 | GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP); |
3425 | storeSP->gtOp1 = PhysReg(REG_SPBASE); |
3426 | storeSP->gtFlags |= GTF_VAR_DEF; |
3427 | |
3428 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP)); |
3429 | DISPTREERANGE(firstBlockRange, storeSP); |
3430 | |
3431 | #endif // !defined(_TARGET_X86_) && !defined(_TARGET_ARM_) |
3432 | |
3433 | #if !defined(_TARGET_ARM_) |
3434 | // For arm32, CalleeSavedFP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME. |
3435 | |
3436 | // -------------------------------------------------------- |
3437 | // InlinedCallFrame.m_pCalleeSavedEBP = @RBP; |
3438 | |
3439 | GenTreeLclFld* storeFP = |
3440 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
3441 | callFrameInfo.offsetOfCalleeSavedFP); |
3442 | storeFP->gtOp1 = PhysReg(REG_FPBASE); |
3443 | storeFP->gtFlags |= GTF_VAR_DEF; |
3444 | |
3445 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP)); |
3446 | DISPTREERANGE(firstBlockRange, storeFP); |
3447 | #endif // !defined(_TARGET_ARM_) |
3448 | |
3449 | // -------------------------------------------------------- |
3450 | // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto |
3451 | // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame. |
3452 | CLANG_FORMAT_COMMENT_ANCHOR; |
3453 | |
3454 | #ifdef _TARGET_64BIT_ |
3455 | if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
3456 | { |
3457 | // Push a frame - if we are NOT in an IL stub, this is done right before the call |
3458 | // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack |
3459 | GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); |
3460 | firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); |
3461 | ContainCheckStoreIndir(frameUpd->AsIndir()); |
3462 | DISPTREERANGE(firstBlockRange, frameUpd); |
3463 | } |
3464 | #endif // _TARGET_64BIT_ |
3465 | } |
3466 | |
3467 | //------------------------------------------------------------------------ |
3468 | // InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method |
3469 | // that has PInvoke inlines. This needs to be inserted any place you can exit the |
3470 | // function: returns, tailcalls and jmps. |
3471 | // |
3472 | // Arguments: |
3473 | // returnBB - basic block from which a method can return |
3474 | // lastExpr - GenTree of the last top level stmnt of returnBB (debug only arg) |
3475 | // |
3476 | // Return Value: |
3477 | // Code tree to perform the action. |
3478 | // |
3479 | void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree* lastExpr)) |
3480 | { |
3481 | assert(returnBB != nullptr); |
3482 | assert(comp->info.compCallUnmanaged); |
3483 | |
3484 | if (comp->opts.ShouldUsePInvokeHelpers()) |
3485 | { |
3486 | return; |
3487 | } |
3488 | |
3489 | JITDUMP("======= Inserting PInvoke method epilog\n" ); |
3490 | |
3491 | // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls. |
3492 | assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) || |
3493 | returnBB->endsWithTailCallOrJmp(comp)); |
3494 | |
3495 | LIR::Range& returnBlockRange = LIR::AsRange(returnBB); |
3496 | |
3497 | GenTree* insertionPoint = returnBlockRange.LastNode(); |
3498 | assert(insertionPoint == lastExpr); |
3499 | |
3500 | // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution |
3501 | // order so that it is guaranteed that there will be no further PInvokes after that point in the method. |
3502 | // |
3503 | // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be |
3504 | // Op1, PME, GT_RETURN |
3505 | // |
3506 | // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be |
3507 | // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL |
3508 | // After inserting PME execution order would be: |
3509 | // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL |
3510 | // |
3511 | // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP |
3512 | // That is after PME, args for GT_JMP call will be setup. |
3513 | |
3514 | // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a |
3515 | // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant, |
3516 | // it is harmeless. |
3517 | // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has |
3518 | // PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot |
3519 | // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to |
3520 | // properly extend the life of compLvFrameListRoot var. |
3521 | // |
3522 | // Thread.offsetOfGcState = 0/1 |
3523 | // That is [tcb + offsetOfGcState] = 1 |
3524 | GenTree* storeGCState = SetGCState(1); |
3525 | returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState)); |
3526 | ContainCheckStoreIndir(storeGCState->AsIndir()); |
3527 | |
3528 | // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do |
3529 | // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call. |
3530 | CLANG_FORMAT_COMMENT_ANCHOR; |
3531 | |
3532 | #ifdef _TARGET_64BIT_ |
3533 | if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
3534 | #endif // _TARGET_64BIT_ |
3535 | { |
3536 | GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame); |
3537 | returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); |
3538 | ContainCheckStoreIndir(frameUpd->AsIndir()); |
3539 | } |
3540 | } |
3541 | |
3542 | //------------------------------------------------------------------------ |
3543 | // InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code. |
3544 | // It does all the necessary call-site setup of the InlinedCallFrame. |
3545 | // |
3546 | // Arguments: |
3547 | // call - the call for which we are inserting the PInvoke prolog. |
3548 | // |
3549 | // Return Value: |
3550 | // None. |
3551 | // |
3552 | void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) |
3553 | { |
3554 | JITDUMP("======= Inserting PInvoke call prolog\n" ); |
3555 | |
3556 | GenTree* insertBefore = call; |
3557 | if (call->gtCallType == CT_INDIRECT) |
3558 | { |
3559 | bool isClosed; |
3560 | insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode(); |
3561 | assert(isClosed); |
3562 | } |
3563 | |
3564 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo; |
3565 | |
3566 | gtCallTypes callType = (gtCallTypes)call->gtCallType; |
3567 | |
3568 | noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); |
3569 | |
3570 | if (comp->opts.ShouldUsePInvokeHelpers()) |
3571 | { |
3572 | // First argument is the address of the frame variable. |
3573 | GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR) |
3574 | GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET); |
3575 | |
3576 | // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN |
3577 | GenTree* helperCall = |
3578 | comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, comp->gtNewArgList(frameAddr)); |
3579 | |
3580 | comp->fgMorphTree(helperCall); |
3581 | BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall)); |
3582 | LowerNode(helperCall); // helper call is inserted before current node and should be lowered here. |
3583 | return; |
3584 | } |
3585 | |
3586 | // Emit the following sequence: |
3587 | // |
3588 | // InlinedCallFrame.callTarget = methodHandle // stored in m_Datum |
3589 | // InlinedCallFrame.m_pCallSiteSP = SP // x86 only |
3590 | // InlinedCallFrame.m_pCallerReturnAddress = return address |
3591 | // Thread.gcState = 0 |
3592 | // (non-stub) - update top Frame on TCB // 64-bit targets only |
3593 | |
3594 | // ---------------------------------------------------------------------------------- |
3595 | // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it). |
3596 | // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings. |
3597 | |
3598 | GenTree* src = nullptr; |
3599 | |
3600 | if (callType == CT_INDIRECT) |
3601 | { |
3602 | #if !defined(_TARGET_64BIT_) |
3603 | // On 32-bit targets, indirect calls need the size of the stack args in InlinedCallFrame.m_Datum. |
3604 | const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE; |
3605 | |
3606 | src = comp->gtNewIconNode(numStkArgBytes, TYP_INT); |
3607 | #else |
3608 | // On 64-bit targets, indirect calls may need the stub parameter value in InlinedCallFrame.m_Datum. |
3609 | // If the stub parameter value is not needed, m_Datum will be initialized by the VM. |
3610 | if (comp->info.compPublishStubParam) |
3611 | { |
3612 | src = comp->gtNewLclvNode(comp->lvaStubArgumentVar, TYP_I_IMPL); |
3613 | } |
3614 | #endif // !defined(_TARGET_64BIT_) |
3615 | } |
3616 | else |
3617 | { |
3618 | assert(callType == CT_USER_FUNC); |
3619 | |
3620 | void* pEmbedMethodHandle = nullptr; |
3621 | CORINFO_METHOD_HANDLE embedMethodHandle = |
3622 | comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle); |
3623 | |
3624 | noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle)); |
3625 | |
3626 | if (embedMethodHandle != nullptr) |
3627 | { |
3628 | // InlinedCallFrame.callSiteTarget = methodHandle |
3629 | src = AddrGen(embedMethodHandle); |
3630 | } |
3631 | else |
3632 | { |
3633 | // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle |
3634 | src = Ind(AddrGen(pEmbedMethodHandle)); |
3635 | } |
3636 | } |
3637 | |
3638 | if (src != nullptr) |
3639 | { |
3640 | // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget. |
3641 | GenTreeLclFld* store = |
3642 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
3643 | callFrameInfo.offsetOfCallTarget); |
3644 | store->gtOp1 = src; |
3645 | store->gtFlags |= GTF_VAR_DEF; |
3646 | |
3647 | InsertTreeBeforeAndContainCheck(insertBefore, store); |
3648 | } |
3649 | |
3650 | #ifdef _TARGET_X86_ |
3651 | |
3652 | // ---------------------------------------------------------------------------------- |
3653 | // InlinedCallFrame.m_pCallSiteSP = SP |
3654 | |
3655 | GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD) |
3656 | GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP); |
3657 | |
3658 | storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE); |
3659 | storeCallSiteSP->gtFlags |= GTF_VAR_DEF; |
3660 | |
3661 | InsertTreeBeforeAndContainCheck(insertBefore, storeCallSiteSP); |
3662 | |
3663 | #endif |
3664 | |
3665 | // ---------------------------------------------------------------------------------- |
3666 | // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call) |
3667 | |
3668 | GenTreeLclFld* storeLab = |
3669 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
3670 | callFrameInfo.offsetOfReturnAddress); |
3671 | |
3672 | // We don't have a real label, and inserting one is hard (even if we made a special node), |
3673 | // so for now we will just 'know' what this means in codegen. |
3674 | GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr); |
3675 | labelRef->gtType = TYP_I_IMPL; |
3676 | storeLab->gtOp1 = labelRef; |
3677 | storeLab->gtFlags |= GTF_VAR_DEF; |
3678 | |
3679 | InsertTreeBeforeAndContainCheck(insertBefore, storeLab); |
3680 | |
3681 | // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method |
3682 | // contains PInvokes; on 64-bit targets this is necessary in non-stubs. |
3683 | CLANG_FORMAT_COMMENT_ANCHOR; |
3684 | |
3685 | #ifdef _TARGET_64BIT_ |
3686 | if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
3687 | { |
3688 | // Set the TCB's frame to be the one we just created. |
3689 | // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME) |
3690 | // has prepended it to the linked list to maintain the stack of Frames. |
3691 | // |
3692 | // Stubs do this once per stub, not once per call. |
3693 | GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); |
3694 | BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd)); |
3695 | ContainCheckStoreIndir(frameUpd->AsIndir()); |
3696 | } |
3697 | #endif // _TARGET_64BIT_ |
3698 | |
3699 | // IMPORTANT **** This instruction must come last!!! **** |
3700 | // It changes the thread's state to Preemptive mode |
3701 | // ---------------------------------------------------------------------------------- |
3702 | // [tcb + offsetOfGcState] = 0 |
3703 | |
3704 | GenTree* storeGCState = SetGCState(0); |
3705 | BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState)); |
3706 | ContainCheckStoreIndir(storeGCState->AsIndir()); |
3707 | } |
3708 | |
3709 | //------------------------------------------------------------------------ |
3710 | // InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call. |
3711 | // |
3712 | // Arguments: |
3713 | // call - the call for which we are inserting the PInvoke epilog. |
3714 | // |
3715 | // Return Value: |
3716 | // None. |
3717 | // |
3718 | void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) |
3719 | { |
3720 | JITDUMP("======= Inserting PInvoke call epilog\n" ); |
3721 | |
3722 | if (comp->opts.ShouldUsePInvokeHelpers()) |
3723 | { |
3724 | noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); |
3725 | |
3726 | // First argument is the address of the frame variable. |
3727 | GenTree* frameAddr = |
3728 | new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET); |
3729 | frameAddr->SetOperRaw(GT_LCL_VAR_ADDR); |
3730 | |
3731 | // Insert call to CORINFO_HELP_JIT_PINVOKE_END |
3732 | GenTreeCall* helperCall = |
3733 | comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, comp->gtNewArgList(frameAddr)); |
3734 | |
3735 | comp->fgMorphTree(helperCall); |
3736 | BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall)); |
3737 | ContainCheckCallOperands(helperCall); |
3738 | return; |
3739 | } |
3740 | |
3741 | // gcstate = 1 |
3742 | GenTree* insertionPoint = call->gtNext; |
3743 | |
3744 | GenTree* tree = SetGCState(1); |
3745 | BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); |
3746 | ContainCheckStoreIndir(tree->AsIndir()); |
3747 | |
3748 | tree = CreateReturnTrapSeq(); |
3749 | BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); |
3750 | ContainCheckReturnTrap(tree->AsOp()); |
3751 | |
3752 | // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi |
3753 | // happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive. |
3754 | CLANG_FORMAT_COMMENT_ANCHOR; |
3755 | |
3756 | #ifdef _TARGET_64BIT_ |
3757 | if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) |
3758 | { |
3759 | tree = CreateFrameLinkUpdate(PopFrame); |
3760 | BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); |
3761 | ContainCheckStoreIndir(tree->AsIndir()); |
3762 | } |
3763 | #else |
3764 | const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo; |
3765 | |
3766 | // ---------------------------------------------------------------------------------- |
3767 | // InlinedCallFrame.m_pCallerReturnAddress = nullptr |
3768 | |
3769 | GenTreeLclFld* const storeCallSiteTracker = |
3770 | new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, |
3771 | callFrameInfo.offsetOfReturnAddress); |
3772 | |
3773 | GenTreeIntCon* const constantZero = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0); |
3774 | |
3775 | storeCallSiteTracker->gtOp1 = constantZero; |
3776 | storeCallSiteTracker->gtFlags |= GTF_VAR_DEF; |
3777 | |
3778 | BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker); |
3779 | ContainCheckStoreLoc(storeCallSiteTracker); |
3780 | #endif // _TARGET_64BIT_ |
3781 | } |
3782 | |
3783 | //------------------------------------------------------------------------ |
3784 | // LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call |
3785 | // |
3786 | // Arguments: |
3787 | // call - The call to lower. |
3788 | // |
3789 | // Return Value: |
3790 | // The lowered call tree. |
3791 | // |
3792 | GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call) |
3793 | { |
3794 | // PInvoke lowering varies depending on the flags passed in by the EE. By default, |
3795 | // GC transitions are generated inline; if CORJIT_FLAG_USE_PINVOKE_HELPERS is specified, |
3796 | // GC transitions are instead performed using helper calls. Examples of each case are given |
3797 | // below. Note that the data structure that is used to store information about a call frame |
3798 | // containing any P/Invoke calls is initialized in the method prolog (see |
3799 | // InsertPInvokeMethod{Prolog,Epilog} for details). |
3800 | // |
3801 | // Inline transitions: |
3802 | // InlinedCallFrame inlinedCallFrame; |
3803 | // |
3804 | // ... |
3805 | // |
3806 | // // Set up frame information |
3807 | // inlinedCallFrame.callTarget = methodHandle; // stored in m_Datum |
3808 | // inlinedCallFrame.m_pCallSiteSP = SP; // x86 only |
3809 | // inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the |
3810 | // call) |
3811 | // Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only) |
3812 | // |
3813 | // // Switch the thread's GC mode to preemptive mode |
3814 | // thread->m_fPreemptiveGCDisabled = 0; |
3815 | // |
3816 | // // Call the unmanaged method |
3817 | // target(); |
3818 | // |
3819 | // // Switch the thread's GC mode back to cooperative mode |
3820 | // thread->m_fPreemptiveGCDisabled = 1; |
3821 | // |
3822 | // // Rendezvous with a running collection if necessary |
3823 | // if (g_TrapReturningThreads) |
3824 | // RareDisablePreemptiveGC(); |
3825 | // |
3826 | // Transistions using helpers: |
3827 | // |
3828 | // OpaqueFrame opaqueFrame; |
3829 | // |
3830 | // ... |
3831 | // |
3832 | // // Call the JIT_PINVOKE_BEGIN helper |
3833 | // JIT_PINVOKE_BEGIN(&opaqueFrame); |
3834 | // |
3835 | // // Call the unmanaged method |
3836 | // target(); |
3837 | // |
3838 | // // Call the JIT_PINVOKE_END helper |
3839 | // JIT_PINVOKE_END(&opaqueFrame); |
3840 | // |
3841 | // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target |
3842 | // platform. They may be changed in the future such that they preserve all register values. |
3843 | |
3844 | GenTree* result = nullptr; |
3845 | void* addr = nullptr; |
3846 | |
3847 | // assert we have seen one of these |
3848 | noway_assert(comp->info.compCallUnmanaged != 0); |
3849 | |
3850 | // All code generated by this function must not contain the randomly-inserted NOPs |
3851 | // that we insert to inhibit JIT spraying in partial trust scenarios. |
3852 | // The PINVOKE_PROLOG op signals this to the code generator/emitter. |
3853 | |
3854 | GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID); |
3855 | BlockRange().InsertBefore(call, prolog); |
3856 | |
3857 | InsertPInvokeCallProlog(call); |
3858 | |
3859 | if (call->gtCallType != CT_INDIRECT) |
3860 | { |
3861 | noway_assert(call->gtCallType == CT_USER_FUNC); |
3862 | CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd; |
3863 | |
3864 | CORINFO_CONST_LOOKUP lookup; |
3865 | comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup); |
3866 | |
3867 | void* addr = lookup.addr; |
3868 | switch (lookup.accessType) |
3869 | { |
3870 | case IAT_VALUE: |
3871 | if (!IsCallTargetInRange(addr)) |
3872 | { |
3873 | result = AddrGen(addr); |
3874 | } |
3875 | else |
3876 | { |
3877 | // a direct call within range of hardware relative call instruction |
3878 | // stash the address for codegen |
3879 | call->gtDirectCallAddress = addr; |
3880 | #ifdef FEATURE_READYTORUN_COMPILER |
3881 | call->gtEntryPoint.addr = nullptr; |
3882 | call->gtEntryPoint.accessType = IAT_VALUE; |
3883 | #endif |
3884 | } |
3885 | break; |
3886 | |
3887 | case IAT_PVALUE: |
3888 | result = Ind(AddrGen(addr)); |
3889 | break; |
3890 | |
3891 | case IAT_PPVALUE: |
3892 | result = Ind(Ind(AddrGen(addr))); |
3893 | break; |
3894 | |
3895 | case IAT_RELPVALUE: |
3896 | unreached(); |
3897 | } |
3898 | } |
3899 | |
3900 | InsertPInvokeCallEpilog(call); |
3901 | |
3902 | return result; |
3903 | } |
3904 | |
3905 | // Expand the code necessary to calculate the control target. |
3906 | // Returns: the expression needed to calculate the control target |
3907 | // May insert embedded statements |
3908 | GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call) |
3909 | { |
3910 | noway_assert(call->gtCallType == CT_USER_FUNC); |
3911 | |
3912 | // If this is a tail call via helper, thisPtr will be the third argument. |
3913 | int thisPtrArgNum; |
3914 | regNumber thisPtrArgReg; |
3915 | |
3916 | #ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args. |
3917 | if (call->IsTailCallViaHelper()) |
3918 | { |
3919 | thisPtrArgNum = 2; |
3920 | thisPtrArgReg = REG_ARG_2; |
3921 | } |
3922 | else |
3923 | #endif // !_TARGET_X86_ |
3924 | { |
3925 | thisPtrArgNum = 0; |
3926 | thisPtrArgReg = comp->codeGen->genGetThisArgReg(call); |
3927 | } |
3928 | |
3929 | // get a reference to the thisPtr being passed |
3930 | fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum); |
3931 | assert(argEntry->regNum == thisPtrArgReg); |
3932 | assert(argEntry->node->gtOper == GT_PUTARG_REG); |
3933 | GenTree* thisPtr = argEntry->node->gtOp.gtOp1; |
3934 | |
3935 | // If what we are passing as the thisptr is not already a local, make a new local to place it in |
3936 | // because we will be creating expressions based on it. |
3937 | unsigned lclNum; |
3938 | if (thisPtr->IsLocal()) |
3939 | { |
3940 | lclNum = thisPtr->gtLclVarCommon.gtLclNum; |
3941 | } |
3942 | else |
3943 | { |
3944 | // Split off the thisPtr and store to a temporary variable. |
3945 | if (vtableCallTemp == BAD_VAR_NUM) |
3946 | { |
3947 | vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call" )); |
3948 | } |
3949 | |
3950 | LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node); |
3951 | ReplaceWithLclVar(thisPtrUse, vtableCallTemp); |
3952 | |
3953 | lclNum = vtableCallTemp; |
3954 | } |
3955 | |
3956 | // Get hold of the vtable offset (note: this might be expensive) |
3957 | unsigned vtabOffsOfIndirection; |
3958 | unsigned vtabOffsAfterIndirection; |
3959 | bool isRelative; |
3960 | comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, |
3961 | &vtabOffsAfterIndirection, &isRelative); |
3962 | |
3963 | // If the thisPtr is a local field, then construct a local field type node |
3964 | GenTree* local; |
3965 | if (thisPtr->isLclField()) |
3966 | { |
3967 | local = new (comp, GT_LCL_FLD) |
3968 | GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs); |
3969 | } |
3970 | else |
3971 | { |
3972 | local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET); |
3973 | } |
3974 | |
3975 | // pointer to virtual table = [REG_CALL_THIS + offs] |
3976 | GenTree* result = Ind(Offset(local, VPTR_OFFS)); |
3977 | |
3978 | // Get the appropriate vtable chunk |
3979 | if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK) |
3980 | { |
3981 | if (isRelative) |
3982 | { |
3983 | // MethodTable offset is a relative pointer. |
3984 | // |
3985 | // Additional temporary variable is used to store virtual table pointer. |
3986 | // Address of method is obtained by the next computations: |
3987 | // |
3988 | // Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of |
3989 | // vtable-1st-level-indirection): |
3990 | // tmp = vtab |
3991 | // |
3992 | // Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection): |
3993 | // result = [tmp + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp + vtabOffsOfIndirection]] |
3994 | // |
3995 | // |
3996 | // If relative pointers are also in second level indirection, additional temporary is used: |
3997 | // tmp1 = vtab |
3998 | // tmp2 = tmp1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp1 + vtabOffsOfIndirection] |
3999 | // result = tmp2 + [tmp2] |
4000 | // |
4001 | unsigned lclNumTmp = comp->lvaGrabTemp(true DEBUGARG("lclNumTmp" )); |
4002 | unsigned lclNumTmp2 = comp->lvaGrabTemp(true DEBUGARG("lclNumTmp2" )); |
4003 | |
4004 | GenTree* lclvNodeStore = comp->gtNewTempAssign(lclNumTmp, result); |
4005 | |
4006 | GenTree* tmpTree = comp->gtNewLclvNode(lclNumTmp, result->TypeGet()); |
4007 | tmpTree = Offset(tmpTree, vtabOffsOfIndirection); |
4008 | |
4009 | tmpTree = comp->gtNewOperNode(GT_IND, TYP_I_IMPL, tmpTree, false); |
4010 | GenTree* offs = comp->gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_INT); |
4011 | result = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, comp->gtNewLclvNode(lclNumTmp, result->TypeGet()), offs); |
4012 | |
4013 | GenTree* base = OffsetByIndexWithScale(result, tmpTree, 1); |
4014 | GenTree* lclvNodeStore2 = comp->gtNewTempAssign(lclNumTmp2, base); |
4015 | |
4016 | LIR::Range range = LIR::SeqTree(comp, lclvNodeStore); |
4017 | JITDUMP("result of obtaining pointer to virtual table:\n" ); |
4018 | DISPRANGE(range); |
4019 | BlockRange().InsertBefore(call, std::move(range)); |
4020 | |
4021 | LIR::Range range2 = LIR::SeqTree(comp, lclvNodeStore2); |
4022 | JITDUMP("result of obtaining pointer to virtual table 2nd level indirection:\n" ); |
4023 | DISPRANGE(range2); |
4024 | BlockRange().InsertAfter(lclvNodeStore, std::move(range2)); |
4025 | |
4026 | result = Ind(comp->gtNewLclvNode(lclNumTmp2, result->TypeGet())); |
4027 | result = |
4028 | comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, result, comp->gtNewLclvNode(lclNumTmp2, result->TypeGet())); |
4029 | } |
4030 | else |
4031 | { |
4032 | // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection] |
4033 | result = Ind(Offset(result, vtabOffsOfIndirection)); |
4034 | } |
4035 | } |
4036 | else |
4037 | { |
4038 | assert(!isRelative); |
4039 | } |
4040 | |
4041 | // Load the function address |
4042 | // result = [reg+vtabOffs] |
4043 | if (!isRelative) |
4044 | { |
4045 | result = Ind(Offset(result, vtabOffsAfterIndirection)); |
4046 | } |
4047 | |
4048 | return result; |
4049 | } |
4050 | |
4051 | // Lower stub dispatched virtual calls. |
4052 | GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) |
4053 | { |
4054 | assert(call->IsVirtualStub()); |
4055 | |
4056 | // An x86 JIT which uses full stub dispatch must generate only |
4057 | // the following stub dispatch calls: |
4058 | // |
4059 | // (1) isCallRelativeIndirect: |
4060 | // call dword ptr [rel32] ; FF 15 ---rel32---- |
4061 | // (2) isCallRelative: |
4062 | // call abc ; E8 ---rel32---- |
4063 | // (3) isCallRegisterIndirect: |
4064 | // 3-byte nop ; |
4065 | // call dword ptr [eax] ; FF 10 |
4066 | // |
4067 | // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN |
4068 | // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect. |
4069 | |
4070 | GenTree* result = nullptr; |
4071 | |
4072 | #ifdef _TARGET_64BIT_ |
4073 | // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef |
4074 | // exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates |
4075 | // an explicit null check. |
4076 | // |
4077 | // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit |
4078 | // null check. |
4079 | |
4080 | // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this. |
4081 | // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and |
4082 | // it would be difficult to change this in a way so that it affects only the right stubs. |
4083 | |
4084 | if (!call->IsTailCallViaHelper()) |
4085 | { |
4086 | call->gtFlags |= GTF_CALL_NULLCHECK; |
4087 | } |
4088 | #endif |
4089 | |
4090 | // This is code to set up an indirect call to a stub address computed |
4091 | // via dictionary lookup. |
4092 | if (call->gtCallType == CT_INDIRECT) |
4093 | { |
4094 | // The importer decided we needed a stub call via a computed |
4095 | // stub dispatch address, i.e. an address which came from a dictionary lookup. |
4096 | // - The dictionary lookup produces an indirected address, suitable for call |
4097 | // via "call [VirtualStubParam.reg]" |
4098 | // |
4099 | // This combination will only be generated for shared generic code and when |
4100 | // stub dispatch is active. |
4101 | |
4102 | // fgMorphArgs will have created trees to pass the address in VirtualStubParam.reg. |
4103 | // All we have to do here is add an indirection to generate the actual call target. |
4104 | |
4105 | GenTree* ind = Ind(call->gtCallAddr); |
4106 | BlockRange().InsertAfter(call->gtCallAddr, ind); |
4107 | call->gtCallAddr = ind; |
4108 | |
4109 | ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; |
4110 | |
4111 | ContainCheckIndir(ind->AsIndir()); |
4112 | } |
4113 | else |
4114 | { |
4115 | // Direct stub call. |
4116 | // Get stub addr. This will return NULL if virtual call stubs are not active |
4117 | void* stubAddr = call->gtStubCallStubAddr; |
4118 | noway_assert(stubAddr != nullptr); |
4119 | |
4120 | // If not CT_INDIRECT, then it should always be relative indir call. |
4121 | // This is ensured by VM. |
4122 | noway_assert(call->IsVirtualStubRelativeIndir()); |
4123 | |
4124 | // Direct stub calls, though the stubAddr itself may still need to be |
4125 | // accessed via an indirection. |
4126 | GenTree* addr = AddrGen(stubAddr); |
4127 | |
4128 | #ifdef _TARGET_X86_ |
4129 | // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as |
4130 | // the target address, and we set a flag that it's a VSD call. The helper then |
4131 | // handles any necessary indirection. |
4132 | if (call->IsTailCallViaHelper()) |
4133 | { |
4134 | result = addr; |
4135 | } |
4136 | #endif // _TARGET_X86_ |
4137 | |
4138 | if (result == nullptr) |
4139 | { |
4140 | result = Ind(addr); |
4141 | } |
4142 | } |
4143 | |
4144 | // TODO-Cleanup: start emitting random NOPS |
4145 | return result; |
4146 | } |
4147 | |
4148 | //------------------------------------------------------------------------ |
4149 | // AddrModeCleanupHelper: Remove the nodes that are no longer used after an |
4150 | // addressing mode is constructed |
4151 | // |
4152 | // Arguments: |
4153 | // addrMode - A pointer to a new GenTreeAddrMode |
4154 | // node - The node currently being considered for removal |
4155 | // |
4156 | // Return Value: |
4157 | // None. |
4158 | // |
4159 | // Assumptions: |
4160 | // 'addrMode' and 'node' must be contained in the current block |
4161 | // |
4162 | void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node) |
4163 | { |
4164 | if (node == addrMode->Base() || node == addrMode->Index()) |
4165 | { |
4166 | return; |
4167 | } |
4168 | |
4169 | // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing |
4170 | node->VisitOperands([this, addrMode](GenTree* operand) -> GenTree::VisitResult { |
4171 | AddrModeCleanupHelper(addrMode, operand); |
4172 | return GenTree::VisitResult::Continue; |
4173 | }); |
4174 | |
4175 | BlockRange().Remove(node); |
4176 | } |
4177 | |
4178 | //------------------------------------------------------------------------ |
4179 | // Lowering::AreSourcesPossibleModifiedLocals: |
4180 | // Given two nodes which will be used in an addressing mode (base, |
4181 | // index), check to see if they are lclVar reads, and if so, walk |
4182 | // backwards from the use until both reads have been visited to |
4183 | // determine if they are potentially modified in that range. |
4184 | // |
4185 | // Arguments: |
4186 | // addr - the node that uses the base and index nodes |
4187 | // base - the base node |
4188 | // index - the index node |
4189 | // |
4190 | // Returns: true if either the base or index may be modified between the |
4191 | // node and addr. |
4192 | // |
4193 | bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index) |
4194 | { |
4195 | assert(addr != nullptr); |
4196 | |
4197 | unsigned markCount = 0; |
4198 | |
4199 | SideEffectSet baseSideEffects; |
4200 | if (base != nullptr) |
4201 | { |
4202 | if (base->OperIsLocalRead()) |
4203 | { |
4204 | baseSideEffects.AddNode(comp, base); |
4205 | } |
4206 | else |
4207 | { |
4208 | base = nullptr; |
4209 | } |
4210 | } |
4211 | |
4212 | SideEffectSet indexSideEffects; |
4213 | if (index != nullptr) |
4214 | { |
4215 | if (index->OperIsLocalRead()) |
4216 | { |
4217 | indexSideEffects.AddNode(comp, index); |
4218 | } |
4219 | else |
4220 | { |
4221 | index = nullptr; |
4222 | } |
4223 | } |
4224 | |
4225 | for (GenTree* cursor = addr;; cursor = cursor->gtPrev) |
4226 | { |
4227 | assert(cursor != nullptr); |
4228 | |
4229 | if (cursor == base) |
4230 | { |
4231 | base = nullptr; |
4232 | } |
4233 | |
4234 | if (cursor == index) |
4235 | { |
4236 | index = nullptr; |
4237 | } |
4238 | |
4239 | if ((base == nullptr) && (index == nullptr)) |
4240 | { |
4241 | return false; |
4242 | } |
4243 | |
4244 | m_scratchSideEffects.Clear(); |
4245 | m_scratchSideEffects.AddNode(comp, cursor); |
4246 | if ((base != nullptr) && m_scratchSideEffects.InterferesWith(baseSideEffects, false)) |
4247 | { |
4248 | return true; |
4249 | } |
4250 | |
4251 | if ((index != nullptr) && m_scratchSideEffects.InterferesWith(indexSideEffects, false)) |
4252 | { |
4253 | return true; |
4254 | } |
4255 | } |
4256 | } |
4257 | |
4258 | //------------------------------------------------------------------------ |
4259 | // TryCreateAddrMode: recognize trees which can be implemented using an |
4260 | // addressing mode and transform them to a GT_LEA |
4261 | // |
4262 | // Arguments: |
4263 | // use: the use of the address we want to transform |
4264 | // isIndir: true if this addressing mode is the child of an indir |
4265 | // |
4266 | // Returns: |
4267 | // The created LEA node or the original address node if an LEA could |
4268 | // not be formed. |
4269 | // |
4270 | GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir) |
4271 | { |
4272 | GenTree* addr = use.Def(); |
4273 | GenTree* base = nullptr; |
4274 | GenTree* index = nullptr; |
4275 | unsigned scale = 0; |
4276 | ssize_t offset = 0; |
4277 | bool rev = false; |
4278 | |
4279 | // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously |
4280 | // block ops were not considered for addressing modes, but an add under it may have been. |
4281 | // This should be replaced with logic that more carefully determines when an addressing mode |
4282 | // would be beneficial for a block op. |
4283 | if (isIndir) |
4284 | { |
4285 | GenTree* indir = use.User(); |
4286 | if (indir->TypeGet() == TYP_STRUCT) |
4287 | { |
4288 | isIndir = false; |
4289 | } |
4290 | else if (varTypeIsStruct(indir)) |
4291 | { |
4292 | // We can have an indirection on the rhs of a block copy (it is the source |
4293 | // object). This is not a "regular" indirection. |
4294 | // (Note that the user check could be costly.) |
4295 | LIR::Use indirUse; |
4296 | if (BlockRange().TryGetUse(indir, &indirUse) && indirUse.User()->OperIsIndir()) |
4297 | { |
4298 | isIndir = false; |
4299 | } |
4300 | else |
4301 | { |
4302 | isIndir = !indir->OperIsBlk(); |
4303 | } |
4304 | } |
4305 | } |
4306 | |
4307 | // Find out if an addressing mode can be constructed |
4308 | bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address |
4309 | true, // fold |
4310 | &rev, // reverse ops |
4311 | &base, // base addr |
4312 | &index, // index val |
4313 | #if SCALED_ADDR_MODES |
4314 | &scale, // scaling |
4315 | #endif // SCALED_ADDR_MODES |
4316 | &offset); // displacement |
4317 | |
4318 | if (scale == 0) |
4319 | { |
4320 | scale = 1; |
4321 | } |
4322 | |
4323 | if (!isIndir) |
4324 | { |
4325 | // this is just a reg-const add |
4326 | if (index == nullptr) |
4327 | { |
4328 | return addr; |
4329 | } |
4330 | |
4331 | // this is just a reg-reg add |
4332 | if (scale == 1 && offset == 0) |
4333 | { |
4334 | return addr; |
4335 | } |
4336 | } |
4337 | |
4338 | // make sure there are not any side effects between def of leaves and use |
4339 | if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index)) |
4340 | { |
4341 | JITDUMP("No addressing mode:\n " ); |
4342 | DISPNODE(addr); |
4343 | return addr; |
4344 | } |
4345 | |
4346 | GenTree* arrLength = nullptr; |
4347 | |
4348 | JITDUMP("Addressing mode:\n" ); |
4349 | JITDUMP(" Base\n " ); |
4350 | DISPNODE(base); |
4351 | if (index != nullptr) |
4352 | { |
4353 | JITDUMP(" + Index * %u + %d\n " , scale, offset); |
4354 | DISPNODE(index); |
4355 | } |
4356 | else |
4357 | { |
4358 | JITDUMP(" + %d\n" , offset); |
4359 | } |
4360 | |
4361 | var_types addrModeType = addr->TypeGet(); |
4362 | if (addrModeType == TYP_REF) |
4363 | { |
4364 | addrModeType = TYP_BYREF; |
4365 | } |
4366 | |
4367 | GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset); |
4368 | |
4369 | // Neither the base nor the index should now be contained. |
4370 | if (base != nullptr) |
4371 | { |
4372 | base->ClearContained(); |
4373 | } |
4374 | if (index != nullptr) |
4375 | { |
4376 | index->ClearContained(); |
4377 | } |
4378 | addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS); |
4379 | addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free. |
4380 | |
4381 | JITDUMP("New addressing mode node:\n" ); |
4382 | DISPNODE(addrMode); |
4383 | JITDUMP("\n" ); |
4384 | |
4385 | BlockRange().InsertAfter(addr, addrMode); |
4386 | |
4387 | // Now we need to remove all the nodes subsumed by the addrMode |
4388 | AddrModeCleanupHelper(addrMode, addr); |
4389 | |
4390 | // Replace the original address node with the addrMode. |
4391 | use.ReplaceWith(comp, addrMode); |
4392 | |
4393 | return addrMode; |
4394 | } |
4395 | |
4396 | //------------------------------------------------------------------------ |
4397 | // LowerAdd: turn this add into a GT_LEA if that would be profitable |
4398 | // |
4399 | // Arguments: |
4400 | // node - the node we care about |
4401 | // |
4402 | // Returns: |
4403 | // The next node to lower if we have transformed the ADD; nullptr otherwise. |
4404 | // |
4405 | GenTree* Lowering::LowerAdd(GenTree* node) |
4406 | { |
4407 | GenTree* next = node->gtNext; |
4408 | |
4409 | #ifndef _TARGET_ARMARCH_ |
4410 | if (varTypeIsIntegralOrI(node)) |
4411 | { |
4412 | LIR::Use use; |
4413 | if (BlockRange().TryGetUse(node, &use)) |
4414 | { |
4415 | // If this is a child of an indir, let the parent handle it. |
4416 | // If there is a chain of adds, only look at the topmost one. |
4417 | GenTree* parent = use.User(); |
4418 | if (!parent->OperIsIndir() && (parent->gtOper != GT_ADD)) |
4419 | { |
4420 | GenTree* addr = TryCreateAddrMode(std::move(use), false); |
4421 | if (addr != node) |
4422 | { |
4423 | return addr->gtNext; |
4424 | } |
4425 | } |
4426 | } |
4427 | } |
4428 | #endif // !_TARGET_ARMARCH_ |
4429 | |
4430 | return nullptr; |
4431 | } |
4432 | |
4433 | //------------------------------------------------------------------------ |
4434 | // LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node. |
4435 | // |
4436 | // Arguments: |
4437 | // divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered |
4438 | // |
4439 | // Return Value: |
4440 | // Returns a boolean indicating whether the node was transformed. |
4441 | // |
4442 | // Notes: |
4443 | // - Transform UDIV/UMOD by power of 2 into RSZ/AND |
4444 | // - Transform UDIV by constant >= 2^(N-1) into GE |
4445 | // - Transform UDIV/UMOD by constant >= 3 into "magic division" |
4446 | // |
4447 | |
4448 | bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) |
4449 | { |
4450 | assert(divMod->OperIs(GT_UDIV, GT_UMOD)); |
4451 | |
4452 | #if defined(USE_HELPERS_FOR_INT_DIV) |
4453 | if (!varTypeIsIntegral(divMod->TypeGet())) |
4454 | { |
4455 | assert(!"unreachable: integral GT_UDIV/GT_UMOD should get morphed into helper calls" ); |
4456 | } |
4457 | assert(varTypeIsFloating(divMod->TypeGet())); |
4458 | #endif // USE_HELPERS_FOR_INT_DIV |
4459 | #if defined(_TARGET_ARM64_) |
4460 | assert(divMod->OperGet() != GT_UMOD); |
4461 | #endif // _TARGET_ARM64_ |
4462 | |
4463 | GenTree* next = divMod->gtNext; |
4464 | GenTree* dividend = divMod->gtGetOp1(); |
4465 | GenTree* divisor = divMod->gtGetOp2(); |
4466 | |
4467 | #if !defined(_TARGET_64BIT_) |
4468 | if (dividend->OperIs(GT_LONG)) |
4469 | { |
4470 | return false; |
4471 | } |
4472 | #endif |
4473 | |
4474 | if (!divisor->IsCnsIntOrI()) |
4475 | { |
4476 | return false; |
4477 | } |
4478 | |
4479 | if (dividend->IsCnsIntOrI()) |
4480 | { |
4481 | // We shouldn't see a divmod with constant operands here but if we do then it's likely |
4482 | // because optimizations are disabled or it's a case that's supposed to throw an exception. |
4483 | // Don't optimize this. |
4484 | return false; |
4485 | } |
4486 | |
4487 | const var_types type = divMod->TypeGet(); |
4488 | assert((type == TYP_INT) || (type == TYP_I_IMPL)); |
4489 | |
4490 | size_t divisorValue = static_cast<size_t>(divisor->AsIntCon()->IconValue()); |
4491 | |
4492 | if (type == TYP_INT) |
4493 | { |
4494 | // Clear up the upper 32 bits of the value, they may be set to 1 because constants |
4495 | // are treated as signed and stored in ssize_t which is 64 bit in size on 64 bit targets. |
4496 | divisorValue &= UINT32_MAX; |
4497 | } |
4498 | |
4499 | if (divisorValue == 0) |
4500 | { |
4501 | return false; |
4502 | } |
4503 | |
4504 | const bool isDiv = divMod->OperIs(GT_UDIV); |
4505 | |
4506 | if (isPow2(divisorValue)) |
4507 | { |
4508 | genTreeOps newOper; |
4509 | |
4510 | if (isDiv) |
4511 | { |
4512 | newOper = GT_RSZ; |
4513 | divisorValue = genLog2(divisorValue); |
4514 | } |
4515 | else |
4516 | { |
4517 | newOper = GT_AND; |
4518 | divisorValue -= 1; |
4519 | } |
4520 | |
4521 | divMod->SetOper(newOper); |
4522 | divisor->gtIntCon.SetIconValue(divisorValue); |
4523 | ContainCheckNode(divMod); |
4524 | return true; |
4525 | } |
4526 | if (isDiv) |
4527 | { |
4528 | // If the divisor is greater or equal than 2^(N - 1) then the result is 1 |
4529 | // iff the dividend is greater or equal than the divisor. |
4530 | if (((type == TYP_INT) && (divisorValue > (UINT32_MAX / 2))) || |
4531 | ((type == TYP_LONG) && (divisorValue > (UINT64_MAX / 2)))) |
4532 | { |
4533 | divMod->SetOper(GT_GE); |
4534 | divMod->gtFlags |= GTF_UNSIGNED; |
4535 | ContainCheckNode(divMod); |
4536 | return true; |
4537 | } |
4538 | } |
4539 | |
4540 | // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32 |
4541 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
4542 | if (!comp->opts.MinOpts() && (divisorValue >= 3)) |
4543 | { |
4544 | size_t magic; |
4545 | bool add; |
4546 | int shift; |
4547 | |
4548 | if (type == TYP_INT) |
4549 | { |
4550 | magic = MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &add, &shift); |
4551 | } |
4552 | else |
4553 | { |
4554 | #ifdef _TARGET_64BIT_ |
4555 | magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &add, &shift); |
4556 | #else |
4557 | unreached(); |
4558 | #endif |
4559 | } |
4560 | |
4561 | // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate: |
4562 | // add == false (when divisor == 3 for example): |
4563 | // div = (dividend MULHI magic) RSZ shift |
4564 | // add == true (when divisor == 7 for example): |
4565 | // mulhi = dividend MULHI magic |
4566 | // div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1) |
4567 | const bool requiresAdjustment = add; |
4568 | const bool requiresDividendMultiuse = requiresAdjustment || !isDiv; |
4569 | const unsigned curBBWeight = m_block->getBBWeight(comp); |
4570 | |
4571 | if (requiresDividendMultiuse) |
4572 | { |
4573 | LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod); |
4574 | dividend = ReplaceWithLclVar(dividendUse); |
4575 | } |
4576 | |
4577 | // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node. |
4578 | // The existing node will later be transformed into a GT_RSZ/GT_SUB that |
4579 | // computes the final result. This way don't need to find and change the use |
4580 | // of the existing node. |
4581 | GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, dividend, divisor); |
4582 | mulhi->gtFlags |= GTF_UNSIGNED; |
4583 | divisor->AsIntCon()->SetIconValue(magic); |
4584 | BlockRange().InsertBefore(divMod, mulhi); |
4585 | GenTree* firstNode = mulhi; |
4586 | |
4587 | if (requiresAdjustment) |
4588 | { |
4589 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
4590 | GenTree* sub = comp->gtNewOperNode(GT_SUB, type, dividend, mulhi); |
4591 | BlockRange().InsertBefore(divMod, dividend, sub); |
4592 | |
4593 | GenTree* one = comp->gtNewIconNode(1, TYP_INT); |
4594 | GenTree* rsz = comp->gtNewOperNode(GT_RSZ, type, sub, one); |
4595 | BlockRange().InsertBefore(divMod, one, rsz); |
4596 | |
4597 | LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub); |
4598 | mulhi = ReplaceWithLclVar(mulhiUse); |
4599 | |
4600 | mulhi = comp->gtNewLclvNode(mulhi->AsLclVar()->GetLclNum(), mulhi->TypeGet()); |
4601 | GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhi); |
4602 | BlockRange().InsertBefore(divMod, mulhi, add); |
4603 | |
4604 | mulhi = add; |
4605 | shift -= 1; |
4606 | } |
4607 | |
4608 | GenTree* shiftBy = comp->gtNewIconNode(shift, TYP_INT); |
4609 | BlockRange().InsertBefore(divMod, shiftBy); |
4610 | |
4611 | if (isDiv) |
4612 | { |
4613 | divMod->SetOper(GT_RSZ); |
4614 | divMod->gtOp1 = mulhi; |
4615 | divMod->gtOp2 = shiftBy; |
4616 | } |
4617 | else |
4618 | { |
4619 | GenTree* div = comp->gtNewOperNode(GT_RSZ, type, mulhi, shiftBy); |
4620 | |
4621 | // divisor UMOD dividend = dividend SUB (div MUL divisor) |
4622 | GenTree* divisor = comp->gtNewIconNode(divisorValue, type); |
4623 | GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor); |
4624 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
4625 | |
4626 | divMod->SetOper(GT_SUB); |
4627 | divMod->gtOp1 = dividend; |
4628 | divMod->gtOp2 = mul; |
4629 | |
4630 | BlockRange().InsertBefore(divMod, div, divisor, mul, dividend); |
4631 | } |
4632 | ContainCheckRange(firstNode, divMod); |
4633 | |
4634 | return true; |
4635 | } |
4636 | #endif |
4637 | return false; |
4638 | } |
4639 | |
4640 | // LowerConstIntDivOrMod: Transform integer GT_DIV/GT_MOD nodes with a power of 2 |
4641 | // const divisor into equivalent but faster sequences. |
4642 | // |
4643 | // Arguments: |
4644 | // node - pointer to the DIV or MOD node |
4645 | // |
4646 | // Returns: |
4647 | // nullptr if no transformation is done, or the next node in the transformed node sequence that |
4648 | // needs to be lowered. |
4649 | // |
4650 | GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) |
4651 | { |
4652 | assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); |
4653 | GenTree* divMod = node; |
4654 | GenTree* dividend = divMod->gtGetOp1(); |
4655 | GenTree* divisor = divMod->gtGetOp2(); |
4656 | |
4657 | const var_types type = divMod->TypeGet(); |
4658 | assert((type == TYP_INT) || (type == TYP_LONG)); |
4659 | |
4660 | #if defined(USE_HELPERS_FOR_INT_DIV) |
4661 | assert(!"unreachable: integral GT_DIV/GT_MOD should get morphed into helper calls" ); |
4662 | #endif // USE_HELPERS_FOR_INT_DIV |
4663 | #if defined(_TARGET_ARM64_) |
4664 | assert(node->OperGet() != GT_MOD); |
4665 | #endif // _TARGET_ARM64_ |
4666 | |
4667 | if (!divisor->IsCnsIntOrI()) |
4668 | { |
4669 | return nullptr; // no transformations to make |
4670 | } |
4671 | |
4672 | if (dividend->IsCnsIntOrI()) |
4673 | { |
4674 | // We shouldn't see a divmod with constant operands here but if we do then it's likely |
4675 | // because optimizations are disabled or it's a case that's supposed to throw an exception. |
4676 | // Don't optimize this. |
4677 | return nullptr; |
4678 | } |
4679 | |
4680 | ssize_t divisorValue = divisor->gtIntCon.IconValue(); |
4681 | |
4682 | if (divisorValue == -1 || divisorValue == 0) |
4683 | { |
4684 | // x / 0 and x % 0 can't be optimized because they are required to throw an exception. |
4685 | |
4686 | // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception. |
4687 | |
4688 | // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is |
4689 | // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this |
4690 | // case so optimizing this case would break C# code. |
4691 | |
4692 | // A runtime check could be used to handle this case but it's probably too rare to matter. |
4693 | return nullptr; |
4694 | } |
4695 | |
4696 | bool isDiv = divMod->OperGet() == GT_DIV; |
4697 | |
4698 | if (isDiv) |
4699 | { |
4700 | if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN)) |
4701 | { |
4702 | // If the divisor is the minimum representable integer value then we can use a compare, |
4703 | // the result is 1 iff the dividend equals divisor. |
4704 | divMod->SetOper(GT_EQ); |
4705 | return node; |
4706 | } |
4707 | } |
4708 | |
4709 | size_t absDivisorValue = |
4710 | (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue)); |
4711 | |
4712 | if (!isPow2(absDivisorValue)) |
4713 | { |
4714 | if (comp->opts.MinOpts()) |
4715 | { |
4716 | return nullptr; |
4717 | } |
4718 | |
4719 | #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) |
4720 | ssize_t magic; |
4721 | int shift; |
4722 | |
4723 | if (type == TYP_INT) |
4724 | { |
4725 | magic = MagicDivide::GetSigned32Magic(static_cast<int32_t>(divisorValue), &shift); |
4726 | } |
4727 | else |
4728 | { |
4729 | #ifdef _TARGET_64BIT_ |
4730 | magic = MagicDivide::GetSigned64Magic(static_cast<int64_t>(divisorValue), &shift); |
4731 | #else // !_TARGET_64BIT_ |
4732 | unreached(); |
4733 | #endif // !_TARGET_64BIT_ |
4734 | } |
4735 | |
4736 | divisor->gtIntConCommon.SetIconValue(magic); |
4737 | |
4738 | // Insert a new GT_MULHI node in front of the existing GT_DIV/GT_MOD node. |
4739 | // The existing node will later be transformed into a GT_ADD/GT_SUB that |
4740 | // computes the final result. This way don't need to find and change the |
4741 | // use of the existing node. |
4742 | GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, divisor, dividend); |
4743 | BlockRange().InsertBefore(divMod, mulhi); |
4744 | |
4745 | // mulhi was the easy part. Now we need to generate different code depending |
4746 | // on the divisor value: |
4747 | // For 3 we need: |
4748 | // div = signbit(mulhi) + mulhi |
4749 | // For 5 we need: |
4750 | // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust |
4751 | // For 7 we need: |
4752 | // mulhi += dividend ; requires add adjust |
4753 | // div = signbit(mulhi) + sar(mulhi, 2) ; requires shift adjust |
4754 | // For -3 we need: |
4755 | // mulhi -= dividend ; requires sub adjust |
4756 | // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust |
4757 | bool requiresAddSubAdjust = signum(divisorValue) != signum(magic); |
4758 | bool requiresShiftAdjust = shift != 0; |
4759 | bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv; |
4760 | unsigned curBBWeight = comp->compCurBB->getBBWeight(comp); |
4761 | |
4762 | if (requiresDividendMultiuse) |
4763 | { |
4764 | LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi); |
4765 | dividend = ReplaceWithLclVar(dividendUse); |
4766 | } |
4767 | |
4768 | GenTree* adjusted; |
4769 | |
4770 | if (requiresAddSubAdjust) |
4771 | { |
4772 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
4773 | adjusted = comp->gtNewOperNode(divisorValue > 0 ? GT_ADD : GT_SUB, type, mulhi, dividend); |
4774 | BlockRange().InsertBefore(divMod, dividend, adjusted); |
4775 | } |
4776 | else |
4777 | { |
4778 | adjusted = mulhi; |
4779 | } |
4780 | |
4781 | GenTree* shiftBy = comp->gtNewIconNode(genTypeSize(type) * 8 - 1, type); |
4782 | GenTree* signBit = comp->gtNewOperNode(GT_RSZ, type, adjusted, shiftBy); |
4783 | BlockRange().InsertBefore(divMod, shiftBy, signBit); |
4784 | |
4785 | LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit); |
4786 | adjusted = ReplaceWithLclVar(adjustedUse); |
4787 | adjusted = comp->gtNewLclvNode(adjusted->AsLclVar()->GetLclNum(), adjusted->TypeGet()); |
4788 | BlockRange().InsertBefore(divMod, adjusted); |
4789 | |
4790 | if (requiresShiftAdjust) |
4791 | { |
4792 | shiftBy = comp->gtNewIconNode(shift, TYP_INT); |
4793 | adjusted = comp->gtNewOperNode(GT_RSH, type, adjusted, shiftBy); |
4794 | BlockRange().InsertBefore(divMod, shiftBy, adjusted); |
4795 | } |
4796 | |
4797 | if (isDiv) |
4798 | { |
4799 | divMod->SetOperRaw(GT_ADD); |
4800 | divMod->gtOp.gtOp1 = adjusted; |
4801 | divMod->gtOp.gtOp2 = signBit; |
4802 | } |
4803 | else |
4804 | { |
4805 | GenTree* div = comp->gtNewOperNode(GT_ADD, type, adjusted, signBit); |
4806 | |
4807 | dividend = comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()); |
4808 | |
4809 | // divisor % dividend = dividend - divisor x div |
4810 | GenTree* divisor = comp->gtNewIconNode(divisorValue, type); |
4811 | GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor); |
4812 | BlockRange().InsertBefore(divMod, dividend, div, divisor, mul); |
4813 | |
4814 | divMod->SetOperRaw(GT_SUB); |
4815 | divMod->gtOp.gtOp1 = dividend; |
4816 | divMod->gtOp.gtOp2 = mul; |
4817 | } |
4818 | |
4819 | return mulhi; |
4820 | #elif defined(_TARGET_ARM_) |
4821 | // Currently there's no GT_MULHI for ARM32 |
4822 | return nullptr; |
4823 | #else |
4824 | #error Unsupported or unset target architecture |
4825 | #endif |
4826 | } |
4827 | |
4828 | // We're committed to the conversion now. Go find the use if any. |
4829 | LIR::Use use; |
4830 | if (!BlockRange().TryGetUse(node, &use)) |
4831 | { |
4832 | return nullptr; |
4833 | } |
4834 | |
4835 | // We need to use the dividend node multiple times so its value needs to be |
4836 | // computed once and stored in a temp variable. |
4837 | |
4838 | unsigned curBBWeight = comp->compCurBB->getBBWeight(comp); |
4839 | |
4840 | LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod); |
4841 | dividend = ReplaceWithLclVar(opDividend); |
4842 | |
4843 | GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63)); |
4844 | |
4845 | if (absDivisorValue == 2) |
4846 | { |
4847 | // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1. |
4848 | // We can get the same result by using GT_RSZ instead of GT_RSH. |
4849 | adjustment->SetOper(GT_RSZ); |
4850 | } |
4851 | else |
4852 | { |
4853 | adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type)); |
4854 | } |
4855 | |
4856 | GenTree* adjustedDividend = |
4857 | comp->gtNewOperNode(GT_ADD, type, adjustment, |
4858 | comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet())); |
4859 | |
4860 | GenTree* newDivMod; |
4861 | |
4862 | if (isDiv) |
4863 | { |
4864 | // perform the division by right shifting the adjusted dividend |
4865 | divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue)); |
4866 | |
4867 | newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor); |
4868 | ContainCheckShiftRotate(newDivMod->AsOp()); |
4869 | |
4870 | if (divisorValue < 0) |
4871 | { |
4872 | // negate the result if the divisor is negative |
4873 | newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod); |
4874 | ContainCheckNode(newDivMod); |
4875 | } |
4876 | } |
4877 | else |
4878 | { |
4879 | // divisor % dividend = dividend - divisor x (dividend / divisor) |
4880 | // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor) |
4881 | // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1) |
4882 | divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1)); |
4883 | |
4884 | newDivMod = comp->gtNewOperNode(GT_SUB, type, |
4885 | comp->gtNewLclvNode(dividend->AsLclVar()->GetLclNum(), dividend->TypeGet()), |
4886 | comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor)); |
4887 | } |
4888 | |
4889 | // Remove the divisor and dividend nodes from the linear order, |
4890 | // since we have reused them and will resequence the tree |
4891 | BlockRange().Remove(divisor); |
4892 | BlockRange().Remove(dividend); |
4893 | |
4894 | // linearize and insert the new tree before the original divMod node |
4895 | InsertTreeBeforeAndContainCheck(divMod, newDivMod); |
4896 | BlockRange().Remove(divMod); |
4897 | |
4898 | // replace the original divmod node with the new divmod tree |
4899 | use.ReplaceWith(comp, newDivMod); |
4900 | |
4901 | return newDivMod->gtNext; |
4902 | } |
4903 | //------------------------------------------------------------------------ |
4904 | // LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2 |
4905 | // const divisor into equivalent but faster sequences. |
4906 | // |
4907 | // Arguments: |
4908 | // node - the DIV or MOD node |
4909 | // |
4910 | // Returns: |
4911 | // The next node to lower. |
4912 | // |
4913 | GenTree* Lowering::LowerSignedDivOrMod(GenTree* node) |
4914 | { |
4915 | assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD)); |
4916 | GenTree* next = node->gtNext; |
4917 | GenTree* divMod = node; |
4918 | GenTree* dividend = divMod->gtGetOp1(); |
4919 | GenTree* divisor = divMod->gtGetOp2(); |
4920 | |
4921 | if (varTypeIsIntegral(node->TypeGet())) |
4922 | { |
4923 | // LowerConstIntDivOrMod will return nullptr if it doesn't transform the node. |
4924 | GenTree* newNode = LowerConstIntDivOrMod(node); |
4925 | if (newNode != nullptr) |
4926 | { |
4927 | return newNode; |
4928 | } |
4929 | } |
4930 | ContainCheckDivOrMod(node->AsOp()); |
4931 | |
4932 | return next; |
4933 | } |
4934 | |
4935 | //------------------------------------------------------------------------ |
4936 | // LowerShift: Lower shift nodes |
4937 | // |
4938 | // Arguments: |
4939 | // shift - the shift node (GT_LSH, GT_RSH or GT_RSZ) |
4940 | // |
4941 | // Notes: |
4942 | // Remove unnecessary shift count masking, xarch shift instructions |
4943 | // mask the shift count to 5 bits (or 6 bits for 64 bit operations). |
4944 | |
4945 | void Lowering::LowerShift(GenTreeOp* shift) |
4946 | { |
4947 | assert(shift->OperIs(GT_LSH, GT_RSH, GT_RSZ)); |
4948 | |
4949 | size_t mask = 0x1f; |
4950 | #ifdef _TARGET_64BIT_ |
4951 | if (varTypeIsLong(shift->TypeGet())) |
4952 | { |
4953 | mask = 0x3f; |
4954 | } |
4955 | #else |
4956 | assert(!varTypeIsLong(shift->TypeGet())); |
4957 | #endif |
4958 | |
4959 | for (GenTree* andOp = shift->gtGetOp2(); andOp->OperIs(GT_AND); andOp = andOp->gtGetOp1()) |
4960 | { |
4961 | GenTree* maskOp = andOp->gtGetOp2(); |
4962 | |
4963 | if (!maskOp->IsCnsIntOrI()) |
4964 | { |
4965 | break; |
4966 | } |
4967 | |
4968 | if ((static_cast<size_t>(maskOp->AsIntCon()->IconValue()) & mask) != mask) |
4969 | { |
4970 | break; |
4971 | } |
4972 | |
4973 | shift->gtOp2 = andOp->gtGetOp1(); |
4974 | BlockRange().Remove(andOp); |
4975 | BlockRange().Remove(maskOp); |
4976 | // The parent was replaced, clear contain and regOpt flag. |
4977 | shift->gtOp2->ClearContained(); |
4978 | } |
4979 | ContainCheckShiftRotate(shift); |
4980 | } |
4981 | |
4982 | void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node) |
4983 | { |
4984 | #ifdef FEATURE_SIMD |
4985 | if (node->TypeGet() == TYP_SIMD12) |
4986 | { |
4987 | // Assumption 1: |
4988 | // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off |
4989 | // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for |
4990 | // reading and writing purposes. |
4991 | // |
4992 | // Assumption 2: |
4993 | // RyuJit backend is making another implicit assumption that Vector3 type args when passed in |
4994 | // registers or on stack, the upper most 4-bytes will be zero. |
4995 | // |
4996 | // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee |
4997 | // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is |
4998 | // invalid. |
4999 | // |
5000 | // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12 |
5001 | // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and |
5002 | // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason, |
5003 | // there is no need to clear upper 4-bytes of Vector3 type args. |
5004 | // |
5005 | // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16. |
5006 | // Vector3 return values are returned two return registers and Caller assembles them into a |
5007 | // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3 |
5008 | // type args in prolog and Vector3 type return value of a call |
5009 | // |
5010 | // RyuJIT x86 Windows: all non-param Vector3 local vars are allocated as 16 bytes. Vector3 arguments |
5011 | // are pushed as 12 bytes. For return values, a 16-byte local is allocated and the address passed |
5012 | // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear |
5013 | // it either. |
5014 | |
5015 | unsigned varNum = node->AsLclVarCommon()->GetLclNum(); |
5016 | LclVarDsc* varDsc = &comp->lvaTable[varNum]; |
5017 | |
5018 | if (comp->lvaMapSimd12ToSimd16(varDsc)) |
5019 | { |
5020 | JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n" ); |
5021 | DISPNODE(node); |
5022 | JITDUMP("============" ); |
5023 | |
5024 | node->gtType = TYP_SIMD16; |
5025 | } |
5026 | } |
5027 | #endif // FEATURE_SIMD |
5028 | } |
5029 | |
5030 | //------------------------------------------------------------------------ |
5031 | // LowerArrElem: Lower a GT_ARR_ELEM node |
5032 | // |
5033 | // Arguments: |
5034 | // node - the GT_ARR_ELEM node to lower. |
5035 | // |
5036 | // Return Value: |
5037 | // The next node to lower. |
5038 | // |
5039 | // Assumptions: |
5040 | // pTree points to a pointer to a GT_ARR_ELEM node. |
5041 | // |
5042 | // Notes: |
5043 | // This performs the following lowering. We start with a node of the form: |
5044 | // /--* <arrObj> |
5045 | // +--* <index0> |
5046 | // +--* <index1> |
5047 | // /--* arrMD&[,] |
5048 | // |
5049 | // First, we create temps for arrObj if it is not already a lclVar, and for any of the index |
5050 | // expressions that have side-effects. |
5051 | // We then transform the tree into: |
5052 | // <offset is null - no accumulated offset for the first index> |
5053 | // /--* <arrObj> |
5054 | // +--* <index0> |
5055 | // /--* ArrIndex[i, ] |
5056 | // +--* <arrObj> |
5057 | // /--| arrOffs[i, ] |
5058 | // | +--* <arrObj> |
5059 | // | +--* <index1> |
5060 | // +--* ArrIndex[*,j] |
5061 | // +--* <arrObj> |
5062 | // /--| arrOffs[*,j] |
5063 | // +--* lclVar NewTemp |
5064 | // /--* lea (scale = element size, offset = offset of first element) |
5065 | // |
5066 | // The new stmtExpr may be omitted if the <arrObj> is a lclVar. |
5067 | // The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for |
5068 | // the statement containing the original arrMD. |
5069 | // Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second |
5070 | // reference to NewTemp), because that provides more accurate lifetimes. |
5071 | // There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively. |
5072 | // |
5073 | GenTree* Lowering::LowerArrElem(GenTree* node) |
5074 | { |
5075 | // This will assert if we don't have an ArrElem node |
5076 | GenTreeArrElem* arrElem = node->AsArrElem(); |
5077 | const unsigned char rank = arrElem->gtArrElem.gtArrRank; |
5078 | const unsigned blockWeight = m_block->getBBWeight(comp); |
5079 | |
5080 | JITDUMP("Lowering ArrElem\n" ); |
5081 | JITDUMP("============\n" ); |
5082 | DISPTREERANGE(BlockRange(), arrElem); |
5083 | JITDUMP("\n" ); |
5084 | |
5085 | assert(arrElem->gtArrObj->TypeGet() == TYP_REF); |
5086 | |
5087 | // We need to have the array object in a lclVar. |
5088 | if (!arrElem->gtArrObj->IsLocal()) |
5089 | { |
5090 | LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem); |
5091 | ReplaceWithLclVar(arrObjUse); |
5092 | } |
5093 | |
5094 | GenTree* arrObjNode = arrElem->gtArrObj; |
5095 | assert(arrObjNode->IsLocal()); |
5096 | |
5097 | LclVarDsc* const varDsc = &comp->lvaTable[arrElem->gtArrObj->AsLclVarCommon()->gtLclNum]; |
5098 | |
5099 | GenTree* insertionPoint = arrElem; |
5100 | |
5101 | // The first ArrOffs node will have 0 for the offset of the previous dimension. |
5102 | GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0); |
5103 | BlockRange().InsertBefore(insertionPoint, prevArrOffs); |
5104 | GenTree* nextToLower = prevArrOffs; |
5105 | |
5106 | for (unsigned char dim = 0; dim < rank; dim++) |
5107 | { |
5108 | GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim]; |
5109 | |
5110 | // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones. |
5111 | GenTree* idxArrObjNode; |
5112 | if (dim == 0) |
5113 | { |
5114 | idxArrObjNode = arrObjNode; |
5115 | } |
5116 | else |
5117 | { |
5118 | idxArrObjNode = comp->gtClone(arrObjNode); |
5119 | BlockRange().InsertBefore(insertionPoint, idxArrObjNode); |
5120 | } |
5121 | |
5122 | // Next comes the GT_ARR_INDEX node. |
5123 | GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX) |
5124 | GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType); |
5125 | arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT); |
5126 | BlockRange().InsertBefore(insertionPoint, arrMDIdx); |
5127 | |
5128 | GenTree* offsArrObjNode = comp->gtClone(arrObjNode); |
5129 | BlockRange().InsertBefore(insertionPoint, offsArrObjNode); |
5130 | |
5131 | GenTreeArrOffs* arrOffs = |
5132 | new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank, |
5133 | arrElem->gtArrElem.gtArrElemType); |
5134 | arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT); |
5135 | BlockRange().InsertBefore(insertionPoint, arrOffs); |
5136 | |
5137 | prevArrOffs = arrOffs; |
5138 | } |
5139 | |
5140 | // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the |
5141 | // base. |
5142 | unsigned scale = arrElem->gtArrElem.gtArrElemSize; |
5143 | unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank); |
5144 | |
5145 | GenTree* leaIndexNode = prevArrOffs; |
5146 | if (!jitIsScaleIndexMul(scale)) |
5147 | { |
5148 | // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are |
5149 | // TYP_INT |
5150 | GenTree* scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale); |
5151 | GenTree* mulNode = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode); |
5152 | BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode); |
5153 | leaIndexNode = mulNode; |
5154 | scale = 1; |
5155 | } |
5156 | |
5157 | GenTree* leaBase = comp->gtClone(arrObjNode); |
5158 | BlockRange().InsertBefore(insertionPoint, leaBase); |
5159 | |
5160 | GenTree* leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset); |
5161 | |
5162 | BlockRange().InsertBefore(insertionPoint, leaNode); |
5163 | |
5164 | LIR::Use arrElemUse; |
5165 | if (BlockRange().TryGetUse(arrElem, &arrElemUse)) |
5166 | { |
5167 | arrElemUse.ReplaceWith(comp, leaNode); |
5168 | } |
5169 | else |
5170 | { |
5171 | leaNode->SetUnusedValue(); |
5172 | } |
5173 | |
5174 | BlockRange().Remove(arrElem); |
5175 | |
5176 | JITDUMP("Results of lowering ArrElem:\n" ); |
5177 | DISPTREERANGE(BlockRange(), leaNode); |
5178 | JITDUMP("\n\n" ); |
5179 | |
5180 | return nextToLower; |
5181 | } |
5182 | |
5183 | void Lowering::DoPhase() |
5184 | { |
5185 | // If we have any PInvoke calls, insert the one-time prolog code. We'll inserted the epilog code in the |
5186 | // appropriate spots later. NOTE: there is a minor optimization opportunity here, as we still create p/invoke |
5187 | // data structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination. |
5188 | if (comp->info.compCallUnmanaged) |
5189 | { |
5190 | InsertPInvokeMethodProlog(); |
5191 | } |
5192 | |
5193 | #if !defined(_TARGET_64BIT_) |
5194 | DecomposeLongs decomp(comp); // Initialize the long decomposition class. |
5195 | if (comp->compLongUsed) |
5196 | { |
5197 | decomp.PrepareForDecomposition(); |
5198 | } |
5199 | #endif // !defined(_TARGET_64BIT_) |
5200 | |
5201 | for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext) |
5202 | { |
5203 | /* Make the block publicly available */ |
5204 | comp->compCurBB = block; |
5205 | |
5206 | #if !defined(_TARGET_64BIT_) |
5207 | if (comp->compLongUsed) |
5208 | { |
5209 | decomp.DecomposeBlock(block); |
5210 | } |
5211 | #endif //!_TARGET_64BIT_ |
5212 | |
5213 | LowerBlock(block); |
5214 | } |
5215 | |
5216 | #ifdef DEBUG |
5217 | JITDUMP("Lower has completed modifying nodes.\n" ); |
5218 | if (VERBOSE) |
5219 | { |
5220 | comp->fgDispBasicBlocks(true); |
5221 | } |
5222 | #endif |
5223 | |
5224 | // Recompute local var ref counts before potentially sorting for liveness. |
5225 | // Note this does minimal work in cases where we are not going to sort. |
5226 | const bool isRecompute = true; |
5227 | const bool setSlotNumbers = false; |
5228 | comp->lvaComputeRefCounts(isRecompute, setSlotNumbers); |
5229 | |
5230 | comp->fgLocalVarLiveness(); |
5231 | // local var liveness can delete code, which may create empty blocks |
5232 | if (comp->opts.OptimizationEnabled()) |
5233 | { |
5234 | comp->optLoopsMarked = false; |
5235 | bool modified = comp->fgUpdateFlowGraph(); |
5236 | if (modified) |
5237 | { |
5238 | JITDUMP("had to run another liveness pass:\n" ); |
5239 | comp->fgLocalVarLiveness(); |
5240 | } |
5241 | } |
5242 | |
5243 | // Recompute local var ref counts again after liveness to reflect |
5244 | // impact of any dead code removal. Note this may leave us with |
5245 | // tracked vars that have zero refs. |
5246 | comp->lvaComputeRefCounts(isRecompute, setSlotNumbers); |
5247 | |
5248 | #ifdef DEBUG |
5249 | JITDUMP("Liveness pass finished after lowering, IR:\n" ); |
5250 | if (VERBOSE) |
5251 | { |
5252 | comp->fgDispBasicBlocks(true); |
5253 | } |
5254 | |
5255 | for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext) |
5256 | { |
5257 | assert(LIR::AsRange(block).CheckLIR(comp, true)); |
5258 | } |
5259 | #endif |
5260 | } |
5261 | |
5262 | #ifdef DEBUG |
5263 | |
5264 | //------------------------------------------------------------------------ |
5265 | // Lowering::CheckCallArg: check that a call argument is in an expected |
5266 | // form after lowering. |
5267 | // |
5268 | // Arguments: |
5269 | // arg - the argument to check. |
5270 | // |
5271 | void Lowering::CheckCallArg(GenTree* arg) |
5272 | { |
5273 | if (!arg->IsValue() && !arg->OperIsPutArgStk()) |
5274 | { |
5275 | assert((arg->OperIsStore() && !arg->IsValue()) || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || |
5276 | arg->OperIsCopyBlkOp()); |
5277 | return; |
5278 | } |
5279 | |
5280 | switch (arg->OperGet()) |
5281 | { |
5282 | case GT_FIELD_LIST: |
5283 | { |
5284 | GenTreeFieldList* list = arg->AsFieldList(); |
5285 | assert(list->isContained()); |
5286 | assert(list->IsFieldListHead()); |
5287 | |
5288 | for (; list != nullptr; list = list->Rest()) |
5289 | { |
5290 | assert(list->Current()->OperIsPutArg()); |
5291 | } |
5292 | } |
5293 | break; |
5294 | |
5295 | default: |
5296 | assert(arg->OperIsPutArg()); |
5297 | break; |
5298 | } |
5299 | } |
5300 | |
5301 | //------------------------------------------------------------------------ |
5302 | // Lowering::CheckCall: check that a call is in an expected form after |
5303 | // lowering. Currently this amounts to checking its |
5304 | // arguments, but could be expanded to verify more |
5305 | // properties in the future. |
5306 | // |
5307 | // Arguments: |
5308 | // call - the call to check. |
5309 | // |
5310 | void Lowering::CheckCall(GenTreeCall* call) |
5311 | { |
5312 | if (call->gtCallObjp != nullptr) |
5313 | { |
5314 | CheckCallArg(call->gtCallObjp); |
5315 | } |
5316 | |
5317 | for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest()) |
5318 | { |
5319 | CheckCallArg(args->Current()); |
5320 | } |
5321 | |
5322 | for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest()) |
5323 | { |
5324 | CheckCallArg(args->Current()); |
5325 | } |
5326 | } |
5327 | |
5328 | //------------------------------------------------------------------------ |
5329 | // Lowering::CheckNode: check that an LIR node is in an expected form |
5330 | // after lowering. |
5331 | // |
5332 | // Arguments: |
5333 | // compiler - the compiler context. |
5334 | // node - the node to check. |
5335 | // |
5336 | void Lowering::CheckNode(Compiler* compiler, GenTree* node) |
5337 | { |
5338 | switch (node->OperGet()) |
5339 | { |
5340 | case GT_CALL: |
5341 | CheckCall(node->AsCall()); |
5342 | break; |
5343 | |
5344 | #ifdef FEATURE_SIMD |
5345 | case GT_SIMD: |
5346 | assert(node->TypeGet() != TYP_SIMD12); |
5347 | break; |
5348 | #ifdef _TARGET_64BIT_ |
5349 | case GT_LCL_VAR: |
5350 | case GT_STORE_LCL_VAR: |
5351 | { |
5352 | unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); |
5353 | LclVarDsc* lclVar = &compiler->lvaTable[lclNum]; |
5354 | assert(node->TypeGet() != TYP_SIMD12 || compiler->lvaIsFieldOfDependentlyPromotedStruct(lclVar)); |
5355 | } |
5356 | break; |
5357 | #endif // _TARGET_64BIT_ |
5358 | #endif // SIMD |
5359 | |
5360 | default: |
5361 | break; |
5362 | } |
5363 | } |
5364 | |
5365 | //------------------------------------------------------------------------ |
5366 | // Lowering::CheckBlock: check that the contents of an LIR block are in an |
5367 | // expected form after lowering. |
5368 | // |
5369 | // Arguments: |
5370 | // compiler - the compiler context. |
5371 | // block - the block to check. |
5372 | // |
5373 | bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block) |
5374 | { |
5375 | assert(block->isEmpty() || block->IsLIR()); |
5376 | |
5377 | LIR::Range& blockRange = LIR::AsRange(block); |
5378 | for (GenTree* node : blockRange) |
5379 | { |
5380 | CheckNode(compiler, node); |
5381 | } |
5382 | |
5383 | assert(blockRange.CheckLIR(compiler, true)); |
5384 | return true; |
5385 | } |
5386 | #endif |
5387 | |
5388 | void Lowering::LowerBlock(BasicBlock* block) |
5389 | { |
5390 | assert(block == comp->compCurBB); // compCurBB must already be set. |
5391 | assert(block->isEmpty() || block->IsLIR()); |
5392 | |
5393 | m_block = block; |
5394 | |
5395 | // NOTE: some of the lowering methods insert calls before the node being |
5396 | // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In |
5397 | // general, any code that is inserted before the current node should be |
5398 | // "pre-lowered" as they won't be subject to further processing. |
5399 | // Lowering::CheckBlock() runs some extra checks on call arguments in |
5400 | // order to help catch unlowered nodes. |
5401 | |
5402 | GenTree* node = BlockRange().FirstNode(); |
5403 | while (node != nullptr) |
5404 | { |
5405 | node = LowerNode(node); |
5406 | } |
5407 | |
5408 | assert(CheckBlock(comp, block)); |
5409 | } |
5410 | |
5411 | /** Verifies if both of these trees represent the same indirection. |
5412 | * Used by Lower to annotate if CodeGen generate an instruction of the |
5413 | * form *addrMode BinOp= expr |
5414 | * |
5415 | * Preconditions: both trees are children of GT_INDs and their underlying children |
5416 | * have the same gtOper. |
5417 | * |
5418 | * This is a first iteration to actually recognize trees that can be code-generated |
5419 | * as a single read-modify-write instruction on AMD64/x86. For now |
5420 | * this method only supports the recognition of simple addressing modes (through GT_LEA) |
5421 | * or local var indirections. Local fields, array access and other more complex nodes are |
5422 | * not yet supported. |
5423 | * |
5424 | * TODO-CQ: Perform tree recognition by using the Value Numbering Package, that way we can recognize |
5425 | * arbitrary complex trees and support much more addressing patterns. |
5426 | */ |
5427 | bool Lowering::IndirsAreEquivalent(GenTree* candidate, GenTree* storeInd) |
5428 | { |
5429 | assert(candidate->OperGet() == GT_IND); |
5430 | assert(storeInd->OperGet() == GT_STOREIND); |
5431 | |
5432 | // We should check the size of the indirections. If they are |
5433 | // different, say because of a cast, then we can't call them equivalent. Doing so could cause us |
5434 | // to drop a cast. |
5435 | // Signed-ness difference is okay and expected since a store indirection must always |
5436 | // be signed based on the CIL spec, but a load could be unsigned. |
5437 | if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType)) |
5438 | { |
5439 | return false; |
5440 | } |
5441 | |
5442 | GenTree* pTreeA = candidate->gtGetOp1(); |
5443 | GenTree* pTreeB = storeInd->gtGetOp1(); |
5444 | |
5445 | // This method will be called by codegen (as well as during lowering). |
5446 | // After register allocation, the sources may have been spilled and reloaded |
5447 | // to a different register, indicated by an inserted GT_RELOAD node. |
5448 | pTreeA = pTreeA->gtSkipReloadOrCopy(); |
5449 | pTreeB = pTreeB->gtSkipReloadOrCopy(); |
5450 | |
5451 | genTreeOps oper; |
5452 | |
5453 | if (pTreeA->OperGet() != pTreeB->OperGet()) |
5454 | { |
5455 | return false; |
5456 | } |
5457 | |
5458 | oper = pTreeA->OperGet(); |
5459 | switch (oper) |
5460 | { |
5461 | case GT_LCL_VAR: |
5462 | case GT_LCL_VAR_ADDR: |
5463 | case GT_CLS_VAR_ADDR: |
5464 | case GT_CNS_INT: |
5465 | return NodesAreEquivalentLeaves(pTreeA, pTreeB); |
5466 | |
5467 | case GT_LEA: |
5468 | { |
5469 | GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode(); |
5470 | GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode(); |
5471 | return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) && |
5472 | NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) && |
5473 | (gtAddr1->gtScale == gtAddr2->gtScale) && (gtAddr1->Offset() == gtAddr2->Offset()); |
5474 | } |
5475 | default: |
5476 | // We don't handle anything that is not either a constant, |
5477 | // a local var or LEA. |
5478 | return false; |
5479 | } |
5480 | } |
5481 | |
5482 | /** Test whether the two given nodes are the same leaves. |
5483 | * Right now, only constant integers and local variables are supported |
5484 | */ |
5485 | bool Lowering::NodesAreEquivalentLeaves(GenTree* tree1, GenTree* tree2) |
5486 | { |
5487 | if (tree1 == nullptr && tree2 == nullptr) |
5488 | { |
5489 | return true; |
5490 | } |
5491 | |
5492 | // both null, they are equivalent, otherwise if either is null not equivalent |
5493 | if (tree1 == nullptr || tree2 == nullptr) |
5494 | { |
5495 | return false; |
5496 | } |
5497 | |
5498 | tree1 = tree1->gtSkipReloadOrCopy(); |
5499 | tree2 = tree2->gtSkipReloadOrCopy(); |
5500 | |
5501 | if (tree1->TypeGet() != tree2->TypeGet()) |
5502 | { |
5503 | return false; |
5504 | } |
5505 | |
5506 | if (tree1->OperGet() != tree2->OperGet()) |
5507 | { |
5508 | return false; |
5509 | } |
5510 | |
5511 | if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf()) |
5512 | { |
5513 | return false; |
5514 | } |
5515 | |
5516 | switch (tree1->OperGet()) |
5517 | { |
5518 | case GT_CNS_INT: |
5519 | return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal && |
5520 | tree1->IsIconHandle() == tree2->IsIconHandle(); |
5521 | case GT_LCL_VAR: |
5522 | case GT_LCL_VAR_ADDR: |
5523 | return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum; |
5524 | case GT_CLS_VAR_ADDR: |
5525 | return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd; |
5526 | default: |
5527 | return false; |
5528 | } |
5529 | } |
5530 | |
5531 | //------------------------------------------------------------------------ |
5532 | // Containment Analysis |
5533 | //------------------------------------------------------------------------ |
5534 | void Lowering::ContainCheckNode(GenTree* node) |
5535 | { |
5536 | switch (node->gtOper) |
5537 | { |
5538 | case GT_STORE_LCL_VAR: |
5539 | case GT_STORE_LCL_FLD: |
5540 | ContainCheckStoreLoc(node->AsLclVarCommon()); |
5541 | break; |
5542 | |
5543 | case GT_EQ: |
5544 | case GT_NE: |
5545 | case GT_LT: |
5546 | case GT_LE: |
5547 | case GT_GE: |
5548 | case GT_GT: |
5549 | case GT_TEST_EQ: |
5550 | case GT_TEST_NE: |
5551 | case GT_CMP: |
5552 | case GT_JCMP: |
5553 | ContainCheckCompare(node->AsOp()); |
5554 | break; |
5555 | |
5556 | case GT_JTRUE: |
5557 | ContainCheckJTrue(node->AsOp()); |
5558 | break; |
5559 | |
5560 | case GT_ADD: |
5561 | case GT_SUB: |
5562 | #if !defined(_TARGET_64BIT_) |
5563 | case GT_ADD_LO: |
5564 | case GT_ADD_HI: |
5565 | case GT_SUB_LO: |
5566 | case GT_SUB_HI: |
5567 | #endif |
5568 | case GT_AND: |
5569 | case GT_OR: |
5570 | case GT_XOR: |
5571 | ContainCheckBinary(node->AsOp()); |
5572 | break; |
5573 | |
5574 | #if defined(_TARGET_X86_) |
5575 | case GT_MUL_LONG: |
5576 | #endif |
5577 | case GT_MUL: |
5578 | case GT_MULHI: |
5579 | ContainCheckMul(node->AsOp()); |
5580 | break; |
5581 | case GT_DIV: |
5582 | case GT_MOD: |
5583 | case GT_UDIV: |
5584 | case GT_UMOD: |
5585 | ContainCheckDivOrMod(node->AsOp()); |
5586 | break; |
5587 | case GT_LSH: |
5588 | case GT_RSH: |
5589 | case GT_RSZ: |
5590 | case GT_ROL: |
5591 | case GT_ROR: |
5592 | #ifndef _TARGET_64BIT_ |
5593 | case GT_LSH_HI: |
5594 | case GT_RSH_LO: |
5595 | #endif |
5596 | ContainCheckShiftRotate(node->AsOp()); |
5597 | break; |
5598 | case GT_ARR_OFFSET: |
5599 | ContainCheckArrOffset(node->AsArrOffs()); |
5600 | break; |
5601 | case GT_LCLHEAP: |
5602 | ContainCheckLclHeap(node->AsOp()); |
5603 | break; |
5604 | case GT_RETURN: |
5605 | ContainCheckRet(node->AsOp()); |
5606 | break; |
5607 | case GT_RETURNTRAP: |
5608 | ContainCheckReturnTrap(node->AsOp()); |
5609 | break; |
5610 | case GT_STOREIND: |
5611 | ContainCheckStoreIndir(node->AsIndir()); |
5612 | case GT_IND: |
5613 | ContainCheckIndir(node->AsIndir()); |
5614 | break; |
5615 | case GT_PUTARG_REG: |
5616 | case GT_PUTARG_STK: |
5617 | #if FEATURE_ARG_SPLIT |
5618 | case GT_PUTARG_SPLIT: |
5619 | #endif // FEATURE_ARG_SPLIT |
5620 | // The regNum must have been set by the lowering of the call. |
5621 | assert(node->gtRegNum != REG_NA); |
5622 | break; |
5623 | #ifdef _TARGET_XARCH_ |
5624 | case GT_INTRINSIC: |
5625 | ContainCheckIntrinsic(node->AsOp()); |
5626 | break; |
5627 | #endif // _TARGET_XARCH_ |
5628 | #ifdef FEATURE_SIMD |
5629 | case GT_SIMD: |
5630 | ContainCheckSIMD(node->AsSIMD()); |
5631 | break; |
5632 | #endif // FEATURE_SIMD |
5633 | #ifdef FEATURE_HW_INTRINSICS |
5634 | case GT_HWIntrinsic: |
5635 | ContainCheckHWIntrinsic(node->AsHWIntrinsic()); |
5636 | break; |
5637 | #endif // FEATURE_HW_INTRINSICS |
5638 | default: |
5639 | break; |
5640 | } |
5641 | } |
5642 | |
5643 | //------------------------------------------------------------------------ |
5644 | // ContainCheckReturnTrap: determine whether the source of a RETURNTRAP should be contained. |
5645 | // |
5646 | // Arguments: |
5647 | // node - pointer to the GT_RETURNTRAP node |
5648 | // |
5649 | void Lowering::ContainCheckReturnTrap(GenTreeOp* node) |
5650 | { |
5651 | #ifdef _TARGET_XARCH_ |
5652 | assert(node->OperIs(GT_RETURNTRAP)); |
5653 | // This just turns into a compare of its child with an int + a conditional call |
5654 | if (node->gtOp1->isIndir()) |
5655 | { |
5656 | MakeSrcContained(node, node->gtOp1); |
5657 | } |
5658 | #endif // _TARGET_XARCH_ |
5659 | } |
5660 | |
5661 | //------------------------------------------------------------------------ |
5662 | // ContainCheckArrOffset: determine whether the source of an ARR_OFFSET should be contained. |
5663 | // |
5664 | // Arguments: |
5665 | // node - pointer to the GT_ARR_OFFSET node |
5666 | // |
5667 | void Lowering::ContainCheckArrOffset(GenTreeArrOffs* node) |
5668 | { |
5669 | assert(node->OperIs(GT_ARR_OFFSET)); |
5670 | // we don't want to generate code for this |
5671 | if (node->gtOffset->IsIntegralConst(0)) |
5672 | { |
5673 | MakeSrcContained(node, node->gtArrOffs.gtOffset); |
5674 | } |
5675 | } |
5676 | |
5677 | //------------------------------------------------------------------------ |
5678 | // ContainCheckLclHeap: determine whether the source of a GT_LCLHEAP node should be contained. |
5679 | // |
5680 | // Arguments: |
5681 | // node - pointer to the node |
5682 | // |
5683 | void Lowering::ContainCheckLclHeap(GenTreeOp* node) |
5684 | { |
5685 | assert(node->OperIs(GT_LCLHEAP)); |
5686 | GenTree* size = node->gtOp.gtOp1; |
5687 | if (size->IsCnsIntOrI()) |
5688 | { |
5689 | MakeSrcContained(node, size); |
5690 | } |
5691 | } |
5692 | |
5693 | //------------------------------------------------------------------------ |
5694 | // ContainCheckRet: determine whether the source of a node should be contained. |
5695 | // |
5696 | // Arguments: |
5697 | // node - pointer to the node |
5698 | // |
5699 | void Lowering::ContainCheckRet(GenTreeOp* ret) |
5700 | { |
5701 | assert(ret->OperIs(GT_RETURN)); |
5702 | |
5703 | #if !defined(_TARGET_64BIT_) |
5704 | if (ret->TypeGet() == TYP_LONG) |
5705 | { |
5706 | GenTree* op1 = ret->gtGetOp1(); |
5707 | noway_assert(op1->OperGet() == GT_LONG); |
5708 | MakeSrcContained(ret, op1); |
5709 | } |
5710 | #endif // !defined(_TARGET_64BIT_) |
5711 | #if FEATURE_MULTIREG_RET |
5712 | if (varTypeIsStruct(ret)) |
5713 | { |
5714 | GenTree* op1 = ret->gtGetOp1(); |
5715 | // op1 must be either a lclvar or a multi-reg returning call |
5716 | if (op1->OperGet() == GT_LCL_VAR) |
5717 | { |
5718 | GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon(); |
5719 | LclVarDsc* varDsc = &(comp->lvaTable[lclVarCommon->gtLclNum]); |
5720 | assert(varDsc->lvIsMultiRegRet); |
5721 | |
5722 | // Mark var as contained if not enregistrable. |
5723 | if (!varTypeIsEnregisterableStruct(op1)) |
5724 | { |
5725 | MakeSrcContained(ret, op1); |
5726 | } |
5727 | } |
5728 | } |
5729 | #endif // FEATURE_MULTIREG_RET |
5730 | } |
5731 | |
5732 | //------------------------------------------------------------------------ |
5733 | // ContainCheckJTrue: determine whether the source of a JTRUE should be contained. |
5734 | // |
5735 | // Arguments: |
5736 | // node - pointer to the node |
5737 | // |
5738 | void Lowering::ContainCheckJTrue(GenTreeOp* node) |
5739 | { |
5740 | // The compare does not need to be generated into a register. |
5741 | GenTree* cmp = node->gtGetOp1(); |
5742 | cmp->gtType = TYP_VOID; |
5743 | cmp->gtFlags |= GTF_SET_FLAGS; |
5744 | } |
5745 | |