1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX Optimizer XX
9XX XX
10XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12*/
13
14#include "jitpch.h"
15#ifdef _MSC_VER
16#pragma hdrstop
17#pragma warning(disable : 4701)
18#endif
19
20/*****************************************************************************/
21
22void Compiler::optInit()
23{
24 optLoopsMarked = false;
25 fgHasLoops = false;
26
27 /* Initialize the # of tracked loops to 0 */
28 optLoopCount = 0;
29 optLoopTable = nullptr;
30
31 /* Keep track of the number of calls and indirect calls made by this method */
32 optCallCount = 0;
33 optIndirectCallCount = 0;
34 optNativeCallCount = 0;
35 optAssertionCount = 0;
36 optAssertionDep = nullptr;
37#if FEATURE_ANYCSE
38 optCSECandidateTotal = 0;
39 optCSEstart = UINT_MAX;
40 optCSEcount = 0;
41#endif // FEATURE_ANYCSE
42}
43
44DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
45{
46}
47
48/*****************************************************************************
49 *
50 */
51
52void Compiler::optSetBlockWeights()
53{
54 noway_assert(opts.OptimizationEnabled());
55 assert(fgDomsComputed);
56
57#ifdef DEBUG
58 bool changed = false;
59#endif
60
61 bool firstBBdomsRets = true;
62
63 BasicBlock* block;
64
65 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
66 {
67 /* Blocks that can't be reached via the first block are rarely executed */
68 if (!fgReachable(fgFirstBB, block))
69 {
70 block->bbSetRunRarely();
71 }
72
73 if (block->bbWeight != BB_ZERO_WEIGHT)
74 {
75 // Calculate our bbWeight:
76 //
77 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
78 // o otherwise BB_UNITY_WEIGHT / 2
79 //
80 bool domsRets = true; // Assume that we will dominate
81
82 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
83 {
84 if (!fgDominate(block, retBlocks->block))
85 {
86 domsRets = false;
87 break;
88 }
89 }
90
91 if (block == fgFirstBB)
92 {
93 firstBBdomsRets = domsRets;
94 }
95
96 // If we are not using profile weight then we lower the weight
97 // of blocks that do not dominate a return block
98 //
99 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
100 {
101#if DEBUG
102 changed = true;
103#endif
104 block->modifyBBWeight(block->bbWeight / 2);
105 noway_assert(block->bbWeight);
106 }
107 }
108 }
109
110#if DEBUG
111 if (changed && verbose)
112 {
113 printf("\nAfter optSetBlockWeights:\n");
114 fgDispBasicBlocks();
115 printf("\n");
116 }
117
118 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
119 fgDebugCheckBBlist();
120#endif
121}
122
123/*****************************************************************************
124 *
125 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
126 */
127
128void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
129{
130 /* Calculate the 'loopWeight',
131 this is the amount to increase each block in the loop
132 Our heuristic is that loops are weighted eight times more
133 than straight line code.
134 Thus we increase each block by 7 times the weight of
135 the loop header block,
136 if the loops are all properly formed gives us:
137 (assuming that BB_LOOP_WEIGHT is 8)
138
139 1 -- non loop basic block
140 8 -- single loop nesting
141 64 -- double loop nesting
142 512 -- triple loop nesting
143
144 */
145
146 noway_assert(begBlk->bbNum <= endBlk->bbNum);
147 noway_assert(begBlk->isLoopHead());
148 noway_assert(fgReachable(begBlk, endBlk));
149
150#ifdef DEBUG
151 if (verbose)
152 {
153 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
154 }
155#endif
156
157 noway_assert(!opts.MinOpts());
158
159 /* Build list of backedges for block begBlk */
160 flowList* backedgeList = nullptr;
161
162 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
163 {
164 /* Is this a backedge? */
165 if (pred->flBlock->bbNum >= begBlk->bbNum)
166 {
167 flowList* flow = new (this, CMK_FlowList) flowList();
168
169#if MEASURE_BLOCK_SIZE
170 genFlowNodeCnt += 1;
171 genFlowNodeSize += sizeof(flowList);
172#endif // MEASURE_BLOCK_SIZE
173
174 flow->flNext = backedgeList;
175 flow->flBlock = pred->flBlock;
176 backedgeList = flow;
177 }
178 }
179
180 /* At least one backedge must have been found (the one from endBlk) */
181 noway_assert(backedgeList);
182
183 BasicBlock* curBlk = begBlk;
184
185 while (true)
186 {
187 noway_assert(curBlk);
188
189 // For curBlk to be part of a loop that starts at begBlk
190 // curBlk must be reachable from begBlk and (since this is a loop)
191 // likewise begBlk must be reachable from curBlk.
192 //
193
194 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
195 {
196 /* If this block reaches any of the backedge blocks we set reachable */
197 /* If this block dominates any of the backedge blocks we set dominates */
198 bool reachable = false;
199 bool dominates = false;
200
201 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
202 {
203 BasicBlock* backedge = tmp->flBlock;
204
205 if (!curBlk->isRunRarely())
206 {
207 reachable |= fgReachable(curBlk, backedge);
208 dominates |= fgDominate(curBlk, backedge);
209
210 if (dominates && reachable)
211 {
212 break;
213 }
214 }
215 }
216
217 if (reachable)
218 {
219 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
220
221 unsigned weight;
222
223 if (curBlk->hasProfileWeight())
224 {
225 // We have real profile weights, so we aren't going to change this blocks weight
226 weight = curBlk->bbWeight;
227 }
228 else
229 {
230 if (dominates)
231 {
232 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
233 }
234 else
235 {
236 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
237 }
238
239 //
240 // The multiplication may have caused us to overflow
241 //
242 if (weight < curBlk->bbWeight)
243 {
244 // The multiplication caused us to overflow
245 weight = BB_MAX_WEIGHT;
246 }
247 //
248 // Set the new weight
249 //
250 curBlk->modifyBBWeight(weight);
251 }
252#ifdef DEBUG
253 if (verbose)
254 {
255 printf("\n " FMT_BB "(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
256 }
257#endif
258 }
259 }
260
261 /* Stop if we've reached the last block in the loop */
262
263 if (curBlk == endBlk)
264 {
265 break;
266 }
267
268 curBlk = curBlk->bbNext;
269
270 /* If we are excluding the endBlk then stop if we've reached endBlk */
271
272 if (excludeEndBlk && (curBlk == endBlk))
273 {
274 break;
275 }
276 }
277}
278
279/*****************************************************************************
280 *
281 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
282 */
283
284void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
285{
286 /* A set of blocks that were previously marked as a loop are now
287 to be unmarked, since we have decided that for some reason this
288 loop no longer exists.
289 Basically we are just reseting the blocks bbWeight to their
290 previous values.
291 */
292
293 noway_assert(begBlk->bbNum <= endBlk->bbNum);
294 noway_assert(begBlk->isLoopHead());
295
296 noway_assert(!opts.MinOpts());
297
298 BasicBlock* curBlk;
299 unsigned backEdgeCount = 0;
300
301 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
302 {
303 curBlk = pred->flBlock;
304
305 /* is this a backward edge? (from curBlk to begBlk) */
306
307 if (begBlk->bbNum > curBlk->bbNum)
308 {
309 continue;
310 }
311
312 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
313
314 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
315 {
316 continue;
317 }
318
319 backEdgeCount++;
320 }
321
322 /* Only unmark the loop blocks if we have exactly one loop back edge */
323 if (backEdgeCount != 1)
324 {
325#ifdef DEBUG
326 if (verbose)
327 {
328 if (backEdgeCount > 0)
329 {
330 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
331 }
332 else if (backEdgeCount == 0)
333 {
334 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
335 }
336 }
337#endif
338 return;
339 }
340 noway_assert(backEdgeCount == 1);
341 noway_assert(fgReachable(begBlk, endBlk));
342
343#ifdef DEBUG
344 if (verbose)
345 {
346 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
347 }
348#endif
349
350 curBlk = begBlk;
351 while (true)
352 {
353 noway_assert(curBlk);
354
355 // For curBlk to be part of a loop that starts at begBlk
356 // curBlk must be reachable from begBlk and (since this is a loop)
357 // likewise begBlk must be reachable from curBlk.
358 //
359 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
360 {
361 unsigned weight = curBlk->bbWeight;
362
363 // Don't unmark blocks that are set to BB_MAX_WEIGHT
364 // Don't unmark blocks when we are using profile weights
365 //
366 if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight())
367 {
368 if (!fgDominate(curBlk, endBlk))
369 {
370 weight *= 2;
371 }
372 else
373 {
374 /* Merging of blocks can disturb the Dominates
375 information (see RAID #46649) */
376 if (weight < BB_LOOP_WEIGHT)
377 {
378 weight *= 2;
379 }
380 }
381
382 // We can overflow here so check for it
383 if (weight < curBlk->bbWeight)
384 {
385 weight = BB_MAX_WEIGHT;
386 }
387
388 assert(weight >= BB_LOOP_WEIGHT);
389
390 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
391 }
392
393#ifdef DEBUG
394 if (verbose)
395 {
396 printf("\n " FMT_BB "(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
397 }
398#endif
399 }
400 /* Stop if we've reached the last block in the loop */
401
402 if (curBlk == endBlk)
403 {
404 break;
405 }
406
407 curBlk = curBlk->bbNext;
408
409 /* Stop if we go past the last block in the loop, as it may have been deleted */
410 if (curBlk->bbNum > endBlk->bbNum)
411 {
412 break;
413 }
414 }
415}
416
417/*****************************************************************************************************
418 *
419 * Function called to update the loop table and bbWeight before removing a block
420 */
421
422void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
423{
424 if (!optLoopsMarked)
425 {
426 return;
427 }
428
429 noway_assert(!opts.MinOpts());
430
431 bool removeLoop = false;
432
433 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
434 /* Special case: the block was the head of a loop - or pointing to a loop entry */
435
436 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
437 {
438 /* Some loops may have been already removed by
439 * loop unrolling or conditional folding */
440
441 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
442 {
443 continue;
444 }
445
446 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
447 {
448 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
449 continue;
450 }
451
452#ifdef DEBUG
453 if (verbose)
454 {
455 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
456 optPrintLoopInfo(loopNum);
457 }
458#endif
459
460 /* If the loop is still in the table
461 * any block in the loop must be reachable !!! */
462
463 noway_assert(optLoopTable[loopNum].lpEntry != block);
464 noway_assert(optLoopTable[loopNum].lpBottom != block);
465
466 if (optLoopTable[loopNum].lpExit == block)
467 {
468 optLoopTable[loopNum].lpExit = nullptr;
469 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
470 ;
471 }
472
473 /* If this points to the actual entry in the loop
474 * then the whole loop may become unreachable */
475
476 switch (block->bbJumpKind)
477 {
478 unsigned jumpCnt;
479 BasicBlock** jumpTab;
480
481 case BBJ_NONE:
482 case BBJ_COND:
483 if (block->bbNext == optLoopTable[loopNum].lpEntry)
484 {
485 removeLoop = true;
486 break;
487 }
488 if (block->bbJumpKind == BBJ_NONE)
489 {
490 break;
491 }
492
493 __fallthrough;
494
495 case BBJ_ALWAYS:
496 noway_assert(block->bbJumpDest);
497 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
498 {
499 removeLoop = true;
500 }
501 break;
502
503 case BBJ_SWITCH:
504 jumpCnt = block->bbJumpSwt->bbsCount;
505 jumpTab = block->bbJumpSwt->bbsDstTab;
506
507 do
508 {
509 noway_assert(*jumpTab);
510 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
511 {
512 removeLoop = true;
513 }
514 } while (++jumpTab, --jumpCnt);
515 break;
516
517 default:
518 break;
519 }
520
521 if (removeLoop)
522 {
523 /* Check if the entry has other predecessors outside the loop
524 * TODO: Replace this when predecessors are available */
525
526 BasicBlock* auxBlock;
527 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
528 {
529 /* Ignore blocks in the loop */
530
531 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
532 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
533 {
534 continue;
535 }
536
537 switch (auxBlock->bbJumpKind)
538 {
539 unsigned jumpCnt;
540 BasicBlock** jumpTab;
541
542 case BBJ_NONE:
543 case BBJ_COND:
544 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
545 {
546 removeLoop = false;
547 break;
548 }
549 if (auxBlock->bbJumpKind == BBJ_NONE)
550 {
551 break;
552 }
553
554 __fallthrough;
555
556 case BBJ_ALWAYS:
557 noway_assert(auxBlock->bbJumpDest);
558 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
559 {
560 removeLoop = false;
561 }
562 break;
563
564 case BBJ_SWITCH:
565 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
566 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
567
568 do
569 {
570 noway_assert(*jumpTab);
571 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
572 {
573 removeLoop = false;
574 }
575 } while (++jumpTab, --jumpCnt);
576 break;
577
578 default:
579 break;
580 }
581 }
582
583 if (removeLoop)
584 {
585 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
586 }
587 }
588 else if (optLoopTable[loopNum].lpHead == block)
589 {
590 /* The loop has a new head - Just update the loop table */
591 optLoopTable[loopNum].lpHead = block->bbPrev;
592 }
593
594#ifdef DEBUG
595 if (verbose)
596 {
597 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
598 optPrintLoopInfo(loopNum);
599 }
600#endif
601 }
602
603 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
604 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
605 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
606 {
607 optUnmarkLoopBlocks(block->bbJumpDest, block);
608 }
609}
610
611#ifdef DEBUG
612
613/*****************************************************************************
614 *
615 * Given the beginBlock of the loop, return the index of this loop
616 * to the loop table.
617 */
618
619unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
620{
621 unsigned lnum = 0;
622
623 for (lnum = 0; lnum < optLoopCount; lnum++)
624 {
625 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
626 {
627 // Found the loop.
628 return lnum;
629 }
630 }
631
632 noway_assert(!"Loop number not found.");
633
634 return optLoopCount;
635}
636
637/*****************************************************************************
638 *
639 * Print loop info in an uniform way.
640 */
641
642void Compiler::optPrintLoopInfo(unsigned loopInd,
643 BasicBlock* lpHead,
644 BasicBlock* lpFirst,
645 BasicBlock* lpTop,
646 BasicBlock* lpEntry,
647 BasicBlock* lpBottom,
648 unsigned char lpExitCnt,
649 BasicBlock* lpExit,
650 unsigned parentLoop)
651{
652 noway_assert(lpHead);
653
654 //
655 // NOTE: we take "loopInd" as an argument instead of using the one
656 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
657 // has not be set correctly. For example, in optRecordLoop().
658 // However, in most of the cases, loops should have been recorded.
659 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
660 // version of this method.
661 //
662 printf("L%02u, from " FMT_BB, loopInd, lpFirst->bbNum);
663 if (lpTop != lpFirst)
664 {
665 printf(" (loop top is " FMT_BB ")", lpTop->bbNum);
666 }
667
668 printf(" to " FMT_BB " (Head=" FMT_BB ", Entry=" FMT_BB ", ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum,
669 lpEntry->bbNum, lpExitCnt);
670
671 if (lpExitCnt == 1)
672 {
673 printf(" at " FMT_BB, lpExit->bbNum);
674 }
675
676 if (parentLoop != BasicBlock::NOT_IN_LOOP)
677 {
678 printf(", parent loop = L%02u", parentLoop);
679 }
680 printf(")");
681}
682
683/*****************************************************************************
684 *
685 * Print loop information given the index of the loop in the loop table.
686 */
687
688void Compiler::optPrintLoopInfo(unsigned lnum)
689{
690 noway_assert(lnum < optLoopCount);
691
692 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
693
694 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
695 ldsc->lpExit, ldsc->lpParent);
696}
697
698#endif
699
700//------------------------------------------------------------------------
701// optPopulateInitInfo: Populate loop init info in the loop table.
702//
703// Arguments:
704// init - the tree that is supposed to initialize the loop iterator.
705// iterVar - loop iteration variable.
706//
707// Return Value:
708// "false" if the loop table could not be populated with the loop iterVar init info.
709//
710// Operation:
711// The 'init' tree is checked if its lhs is a local and rhs is either
712// a const or a local.
713//
714bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTree* init, unsigned iterVar)
715{
716 // Operator should be =
717 if (init->gtOper != GT_ASG)
718 {
719 return false;
720 }
721
722 GenTree* lhs = init->gtOp.gtOp1;
723 GenTree* rhs = init->gtOp.gtOp2;
724 // LHS has to be local and should equal iterVar.
725 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
726 {
727 return false;
728 }
729
730 // RHS can be constant or local var.
731 // TODO-CQ: CLONE: Add arr length for descending loops.
732 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
733 {
734 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
735 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
736 }
737 else if (rhs->gtOper == GT_LCL_VAR)
738 {
739 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
740 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
741 }
742 else
743 {
744 return false;
745 }
746 return true;
747}
748
749//----------------------------------------------------------------------------------
750// optCheckIterInLoopTest: Check if iter var is used in loop test.
751//
752// Arguments:
753// test "jtrue" tree or an asg of the loop iter termination condition
754// from/to blocks (beg, end) which are part of the loop.
755// iterVar loop iteration variable.
756// loopInd loop index.
757//
758// Operation:
759// The test tree is parsed to check if "iterVar" matches the lhs of the condition
760// and the rhs limit is extracted from the "test" tree. The limit information is
761// added to the loop table.
762//
763// Return Value:
764// "false" if the loop table could not be populated with the loop test info or
765// if the test condition doesn't involve iterVar.
766//
767bool Compiler::optCheckIterInLoopTest(
768 unsigned loopInd, GenTree* test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
769{
770 // Obtain the relop from the "test" tree.
771 GenTree* relop;
772 if (test->gtOper == GT_JTRUE)
773 {
774 relop = test->gtGetOp1();
775 }
776 else
777 {
778 assert(test->gtOper == GT_ASG);
779 relop = test->gtGetOp2();
780 }
781
782 noway_assert(relop->OperKind() & GTK_RELOP);
783
784 GenTree* opr1 = relop->gtOp.gtOp1;
785 GenTree* opr2 = relop->gtOp.gtOp2;
786
787 GenTree* iterOp;
788 GenTree* limitOp;
789
790 // Make sure op1 or op2 is the iterVar.
791 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
792 {
793 iterOp = opr1;
794 limitOp = opr2;
795 }
796 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
797 {
798 iterOp = opr2;
799 limitOp = opr1;
800 }
801 else
802 {
803 return false;
804 }
805
806 if (iterOp->gtType != TYP_INT)
807 {
808 return false;
809 }
810
811 // Mark the iterator node.
812 iterOp->gtFlags |= GTF_VAR_ITERATOR;
813
814 // Check what type of limit we have - constant, variable or arr-len.
815 if (limitOp->gtOper == GT_CNS_INT)
816 {
817 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
818 if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
819 {
820 optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
821 }
822 }
823 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
824 {
825 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
826 }
827 else if (limitOp->gtOper == GT_ARR_LENGTH)
828 {
829 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
830 }
831 else
832 {
833 return false;
834 }
835 // Save the type of the comparison between the iterator and the limit.
836 optLoopTable[loopInd].lpTestTree = relop;
837 return true;
838}
839
840//----------------------------------------------------------------------------------
841// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
842//
843// Arguments:
844// incr The incr tree to be checked. Whether incr tree is
845// oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
846//
847// Operation:
848// The test tree is parsed to check if "iterVar" matches the lhs of the condition
849// and the rhs limit is extracted from the "test" tree. The limit information is
850// added to the loop table.
851//
852// Return Value:
853// iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
854//
855unsigned Compiler::optIsLoopIncrTree(GenTree* incr)
856{
857 GenTree* incrVal;
858 genTreeOps updateOper;
859 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
860 if (iterVar != BAD_VAR_NUM)
861 {
862 // We have v = v op y type asg node.
863 switch (updateOper)
864 {
865 case GT_ADD:
866 case GT_SUB:
867 case GT_MUL:
868 case GT_RSH:
869 case GT_LSH:
870 break;
871 default:
872 return BAD_VAR_NUM;
873 }
874
875 // Increment should be by a const int.
876 // TODO-CQ: CLONE: allow variable increments.
877 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
878 {
879 return BAD_VAR_NUM;
880 }
881 }
882
883 return iterVar;
884}
885
886//----------------------------------------------------------------------------------
887// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
888//
889// Arguments:
890// from, to - are blocks (beg, end) which are part of the loop.
891// incr - tree that increments the loop iterator. v+=1 or v=v+1.
892// pIterVar - see return value.
893//
894// Return Value:
895// Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
896// false.
897//
898// Operation:
899// Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
900// assigned in the loop.
901//
902bool Compiler::optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
903{
904
905 unsigned iterVar = optIsLoopIncrTree(incr);
906 if (iterVar == BAD_VAR_NUM)
907 {
908 return false;
909 }
910 if (optIsVarAssigned(from, to, incr, iterVar))
911 {
912 JITDUMP("iterVar is assigned in loop\n");
913 return false;
914 }
915
916 *pIterVar = iterVar;
917 return true;
918}
919
920//----------------------------------------------------------------------------------
921// optIsLoopTestEvalIntoTemp:
922// Pattern match if the test tree is computed into a tmp
923// and the "tmp" is used as jump condition for loop termination.
924//
925// Arguments:
926// testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
927// where Vtmp contains the actual loop test result.
928// newStmt - contains the statement that is the actual test stmt involving
929// the loop iterator.
930//
931// Return Value:
932// Returns true if a new test tree can be obtained.
933//
934// Operation:
935// Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
936// Then returns the rhs for def of Vtmp as the "test" node.
937//
938// Note:
939// This method just retrieves what it thinks is the "test" node,
940// the callers are expected to verify that "iterVar" is used in the test.
941//
942bool Compiler::optIsLoopTestEvalIntoTemp(GenTree* testStmt, GenTree** newTest)
943{
944 GenTree* test = testStmt->gtStmt.gtStmtExpr;
945
946 if (test->gtOper != GT_JTRUE)
947 {
948 return false;
949 }
950
951 GenTree* relop = test->gtGetOp1();
952 noway_assert(relop->OperIsCompare());
953
954 GenTree* opr1 = relop->gtOp.gtOp1;
955 GenTree* opr2 = relop->gtOp.gtOp2;
956
957 // Make sure we have jtrue (vtmp != 0)
958 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
959 opr2->IsIntegralConst(0))
960 {
961 // Get the previous statement to get the def (rhs) of Vtmp to see
962 // if the "test" is evaluated into Vtmp.
963 GenTree* prevStmt = testStmt->gtPrev;
964 if (prevStmt == nullptr)
965 {
966 return false;
967 }
968
969 GenTree* tree = prevStmt->gtStmt.gtStmtExpr;
970 if (tree->OperGet() == GT_ASG)
971 {
972 GenTree* lhs = tree->gtOp.gtOp1;
973 GenTree* rhs = tree->gtOp.gtOp2;
974
975 // Return as the new test node.
976 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
977 {
978 if (rhs->OperIsCompare())
979 {
980 *newTest = prevStmt;
981 return true;
982 }
983 }
984 }
985 }
986 return false;
987}
988
989//----------------------------------------------------------------------------------
990// optExtractInitTestIncr:
991// Extract the "init", "test" and "incr" nodes of the loop.
992//
993// Arguments:
994// head - Loop head block
995// bottom - Loop bottom block
996// top - Loop top block
997// ppInit - The init stmt of the loop if found.
998// ppTest - The test stmt of the loop if found.
999// ppIncr - The incr stmt of the loop if found.
1000//
1001// Return Value:
1002// The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1003// returns true. Returns false if the information can't be extracted.
1004//
1005// Operation:
1006// Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1007// "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1008// "test" to get the "incr" stmt. If it is not found it could be a loop of the
1009// below form.
1010//
1011// +-------<-----------------<-----------+
1012// | |
1013// v |
1014// BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1015//
1016// Check if the "incr" tree is present in the loop "top" node as the last stmt.
1017// Also check if the "test" tree is assigned to a tmp node and the tmp is used
1018// in the jtrue condition.
1019//
1020// Note:
1021// This method just retrieves what it thinks is the "test" node,
1022// the callers are expected to verify that "iterVar" is used in the test.
1023//
1024bool Compiler::optExtractInitTestIncr(
1025 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr)
1026{
1027 assert(ppInit != nullptr);
1028 assert(ppTest != nullptr);
1029 assert(ppIncr != nullptr);
1030
1031 // Check if last two statements in the loop body are the increment of the iterator
1032 // and the loop termination test.
1033 noway_assert(bottom->bbTreeList != nullptr);
1034 GenTree* test = bottom->bbTreeList->gtPrev;
1035 noway_assert(test != nullptr && test->gtNext == nullptr);
1036
1037 GenTree* newTest;
1038 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1039 {
1040 test = newTest;
1041 }
1042
1043 // Check if we have the incr tree before the test tree, if we don't,
1044 // check if incr is part of the loop "top".
1045 GenTree* incr = test->gtPrev;
1046 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1047 {
1048 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1049 {
1050 return false;
1051 }
1052
1053 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1054 GenTree* topLast = top->bbTreeList->gtPrev;
1055 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1056 {
1057 incr = topLast;
1058 }
1059 else
1060 {
1061 return false;
1062 }
1063 }
1064
1065 assert(test != incr);
1066
1067 // Find the last statement in the loop pre-header which we expect to be the initialization of
1068 // the loop iterator.
1069 GenTree* phdr = head->bbTreeList;
1070 if (phdr == nullptr)
1071 {
1072 return false;
1073 }
1074
1075 GenTree* init = phdr->gtPrev;
1076 noway_assert(init != nullptr && (init->gtNext == nullptr));
1077
1078 // If it is a duplicated loop condition, skip it.
1079 if (init->gtFlags & GTF_STMT_CMPADD)
1080 {
1081 bool doGetPrev = true;
1082#ifdef DEBUG
1083 if (opts.optRepeat)
1084 {
1085 // Previous optimization passes may have inserted compiler-generated
1086 // statements other than duplicated loop conditions.
1087 doGetPrev = (init->gtPrev != nullptr);
1088 }
1089 else
1090 {
1091 // Must be a duplicated loop condition.
1092 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1093 }
1094#endif // DEBUG
1095 if (doGetPrev)
1096 {
1097 init = init->gtPrev;
1098 }
1099 noway_assert(init != nullptr);
1100 }
1101
1102 noway_assert(init->gtOper == GT_STMT);
1103 noway_assert(test->gtOper == GT_STMT);
1104 noway_assert(incr->gtOper == GT_STMT);
1105
1106 *ppInit = init->gtStmt.gtStmtExpr;
1107 *ppTest = test->gtStmt.gtStmtExpr;
1108 *ppIncr = incr->gtStmt.gtStmtExpr;
1109
1110 return true;
1111}
1112
1113/*****************************************************************************
1114 *
1115 * Record the loop in the loop table. Return true if successful, false if
1116 * out of entries in loop table.
1117 */
1118
1119bool Compiler::optRecordLoop(BasicBlock* head,
1120 BasicBlock* first,
1121 BasicBlock* top,
1122 BasicBlock* entry,
1123 BasicBlock* bottom,
1124 BasicBlock* exit,
1125 unsigned char exitCnt)
1126{
1127 // Record this loop in the table, if there's room.
1128
1129 assert(optLoopCount <= MAX_LOOP_NUM);
1130 if (optLoopCount == MAX_LOOP_NUM)
1131 {
1132#if COUNT_LOOPS
1133 loopOverflowThisMethod = true;
1134#endif
1135 return false;
1136 }
1137
1138 // Assumed preconditions on the loop we're adding.
1139 assert(first->bbNum <= top->bbNum);
1140 assert(top->bbNum <= entry->bbNum);
1141 assert(entry->bbNum <= bottom->bbNum);
1142 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1143
1144 unsigned char loopInd = optLoopCount;
1145
1146 if (optLoopTable == nullptr)
1147 {
1148 assert(loopInd == 0);
1149 optLoopTable = getAllocator(CMK_LoopOpt).allocate<LoopDsc>(MAX_LOOP_NUM);
1150 }
1151 else
1152 {
1153 // If the new loop contains any existing ones, add it in the right place.
1154 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1155 {
1156 unsigned char prev = prevPlus1 - 1;
1157 if (optLoopTable[prev].lpContainedBy(first, bottom))
1158 {
1159 loopInd = prev;
1160 }
1161 }
1162 // Move up any loops if necessary.
1163 for (unsigned j = optLoopCount; j > loopInd; j--)
1164 {
1165 optLoopTable[j] = optLoopTable[j - 1];
1166 }
1167 }
1168
1169#ifdef DEBUG
1170 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1171 {
1172 // The loop is well-formed.
1173 assert(optLoopTable[i].lpWellFormed());
1174 // Check for disjoint.
1175 if (optLoopTable[i].lpDisjoint(first, bottom))
1176 {
1177 continue;
1178 }
1179 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1180 assert(optLoopTable[i].lpContainedBy(first, bottom));
1181 }
1182#endif // DEBUG
1183
1184 optLoopTable[loopInd].lpHead = head;
1185 optLoopTable[loopInd].lpFirst = first;
1186 optLoopTable[loopInd].lpTop = top;
1187 optLoopTable[loopInd].lpBottom = bottom;
1188 optLoopTable[loopInd].lpEntry = entry;
1189 optLoopTable[loopInd].lpExit = exit;
1190 optLoopTable[loopInd].lpExitCnt = exitCnt;
1191
1192 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1193 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1194 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1195
1196 optLoopTable[loopInd].lpAsgVars = AllVarSetOps::UninitVal();
1197
1198 optLoopTable[loopInd].lpFlags = 0;
1199
1200 // We haven't yet recorded any side effects.
1201 for (MemoryKind memoryKind : allMemoryKinds())
1202 {
1203 optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false;
1204 }
1205 optLoopTable[loopInd].lpFieldsModified = nullptr;
1206 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1207
1208 // If DO-WHILE loop mark it as such.
1209 if (head->bbNext == entry)
1210 {
1211 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1212 }
1213
1214 // If single exit loop mark it as such.
1215 if (exitCnt == 1)
1216 {
1217 noway_assert(exit);
1218 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1219 }
1220
1221 //
1222 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1223 // We have the following restrictions:
1224 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1225 // 2. There must be a loop iterator (a local var) that is
1226 // incremented (decremented or lsh, rsh, mul) with a constant value
1227 // 3. The iterator is incremented exactly once
1228 // 4. The loop condition must use the iterator.
1229 //
1230 if (bottom->bbJumpKind == BBJ_COND)
1231 {
1232 GenTree* init;
1233 GenTree* test;
1234 GenTree* incr;
1235 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1236 {
1237 goto DONE_LOOP;
1238 }
1239
1240 unsigned iterVar = BAD_VAR_NUM;
1241 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1242 {
1243 goto DONE_LOOP;
1244 }
1245
1246 // Make sure the "iterVar" initialization is never skipped,
1247 // i.e. every pred of ENTRY other than HEAD is in the loop.
1248 for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1249 {
1250 BasicBlock* predBlock = predEdge->flBlock;
1251 if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1252 {
1253 goto DONE_LOOP;
1254 }
1255 }
1256
1257 if (!optPopulateInitInfo(loopInd, init, iterVar))
1258 {
1259 goto DONE_LOOP;
1260 }
1261
1262 // Check that the iterator is used in the loop condition.
1263 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1264 {
1265 goto DONE_LOOP;
1266 }
1267
1268 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1269 // Record the iterator, the pointer to the test node
1270 // and the initial value of the iterator (constant or local var)
1271 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1272
1273 // Record iterator.
1274 optLoopTable[loopInd].lpIterTree = incr;
1275
1276#if COUNT_LOOPS
1277 // Save the initial value of the iterator - can be lclVar or constant
1278 // Flag the loop accordingly.
1279
1280 iterLoopCount++;
1281#endif
1282
1283#if COUNT_LOOPS
1284 simpleTestLoopCount++;
1285#endif
1286
1287 // Check if a constant iteration loop.
1288 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1289 {
1290 // This is a constant loop.
1291 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1292#if COUNT_LOOPS
1293 constIterLoopCount++;
1294#endif
1295 }
1296
1297#ifdef DEBUG
1298 if (verbose && 0)
1299 {
1300 printf("\nConstant loop initializer:\n");
1301 gtDispTree(init);
1302
1303 printf("\nConstant loop body:\n");
1304
1305 BasicBlock* block = head;
1306 do
1307 {
1308 block = block->bbNext;
1309 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1310 {
1311 if (stmt->gtStmt.gtStmtExpr == incr)
1312 {
1313 break;
1314 }
1315 printf("\n");
1316 gtDispTree(stmt->gtStmt.gtStmtExpr);
1317 }
1318 } while (block != bottom);
1319 }
1320#endif // DEBUG
1321 }
1322
1323DONE_LOOP:
1324 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1325 optLoopCount++;
1326 return true;
1327}
1328
1329#ifdef DEBUG
1330//------------------------------------------------------------------------
1331// optPrintLoopRecording: Print a recording of the loop.
1332//
1333// Arguments:
1334// loopInd - loop index.
1335//
1336void Compiler::optPrintLoopRecording(unsigned loopInd)
1337{
1338 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1339 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1340 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1341 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1342 optLoopTable[loopInd].lpExit);
1343
1344 // If an iterator loop print the iterator and the initialization.
1345 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1346 {
1347 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1348 printf(" (");
1349 printf(GenTree::OpName(optLoopTable[loopInd].lpIterOper()));
1350 printf(" ");
1351 printf("%d )", optLoopTable[loopInd].lpIterConst());
1352
1353 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1354 {
1355 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1356 }
1357 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1358 {
1359 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1360 }
1361
1362 // If a simple test condition print operator and the limits */
1363 printf(GenTree::OpName(optLoopTable[loopInd].lpTestOper()));
1364
1365 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1366 {
1367 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1368 }
1369
1370 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1371 {
1372 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1373 }
1374
1375 printf("]");
1376 }
1377
1378 printf("\n");
1379}
1380
1381void Compiler::optCheckPreds()
1382{
1383 BasicBlock* block;
1384 BasicBlock* blockPred;
1385 flowList* pred;
1386
1387 for (block = fgFirstBB; block; block = block->bbNext)
1388 {
1389 for (pred = block->bbPreds; pred; pred = pred->flNext)
1390 {
1391 // make sure this pred is part of the BB list
1392 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1393 {
1394 if (blockPred == pred->flBlock)
1395 {
1396 break;
1397 }
1398 }
1399 noway_assert(blockPred);
1400 switch (blockPred->bbJumpKind)
1401 {
1402 case BBJ_COND:
1403 if (blockPred->bbJumpDest == block)
1404 {
1405 break;
1406 }
1407 __fallthrough;
1408 case BBJ_NONE:
1409 noway_assert(blockPred->bbNext == block);
1410 break;
1411 case BBJ_EHFILTERRET:
1412 case BBJ_ALWAYS:
1413 case BBJ_EHCATCHRET:
1414 noway_assert(blockPred->bbJumpDest == block);
1415 break;
1416 default:
1417 break;
1418 }
1419 }
1420 }
1421}
1422
1423#endif // DEBUG
1424
1425namespace
1426{
1427//------------------------------------------------------------------------
1428// LoopSearch: Class that handles scanning a range of blocks to detect a loop,
1429// moving blocks to make the loop body contiguous, and recording
1430// the loop.
1431//
1432// We will use the following terminology:
1433// HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1434// Not part of the looping of the loop.
1435// FIRST - the lexically first basic block (in bbNext order) within this loop.
1436// TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1437// BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1438// EXIT - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr
1439// ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1440//
1441// We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1442// When the loop is identified, blocks will be moved out to make it a compact contiguous region if possible,
1443// and in cases where compaction is not possible, we'll subsequently treat all blocks in the lexical range
1444// between TOP and BOTTOM as part of the loop even if they aren't part of the SCC.
1445// Regarding nesting: Since a given block can only have one back-edge (we only detect loops with back-edges
1446// from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM. Two loops may share the
1447// same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write
1448// the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore.
1449//
1450// |
1451// v
1452// head
1453// |
1454// | top/first <--+
1455// | | |
1456// | ... |
1457// | | |
1458// | v |
1459// +---> entry |
1460// | |
1461// ... |
1462// | |
1463// v |
1464// +-- exit/tail |
1465// | | |
1466// | ... |
1467// | | |
1468// | v |
1469// | bottom ---+
1470// |
1471// +------+
1472// |
1473// v
1474//
1475class LoopSearch
1476{
1477
1478 // Keeping track of which blocks are in the loop requires two block sets since we may add blocks
1479 // as we go but the BlockSet type's max ID doesn't increase to accommodate them. Define a helper
1480 // struct to make the ensuing code more readable.
1481 struct LoopBlockSet
1482 {
1483 private:
1484 // Keep track of blocks with bbNum <= oldBlockMaxNum in a regular BlockSet, since
1485 // it can hold all of them.
1486 BlockSet oldBlocksInLoop; // Blocks with bbNum <= oldBlockMaxNum
1487
1488 // Keep track of blocks with bbNum > oldBlockMaxNum in a separate BlockSet, but
1489 // indexing them by (blockNum - oldBlockMaxNum); since we won't generate more than
1490 // one new block per old block, this must be sufficient to track any new blocks.
1491 BlockSet newBlocksInLoop; // Blocks with bbNum > oldBlockMaxNum
1492
1493 Compiler* comp;
1494 unsigned int oldBlockMaxNum;
1495
1496 public:
1497 LoopBlockSet(Compiler* comp)
1498 : oldBlocksInLoop(BlockSetOps::UninitVal())
1499 , newBlocksInLoop(BlockSetOps::UninitVal())
1500 , comp(comp)
1501 , oldBlockMaxNum(comp->fgBBNumMax)
1502 {
1503 }
1504
1505 void Reset(unsigned int seedBlockNum)
1506 {
1507 if (BlockSetOps::MayBeUninit(oldBlocksInLoop))
1508 {
1509 // Either the block sets are uninitialized (and long), so we need to initialize
1510 // them (and allocate their backing storage), or they are short and empty, so
1511 // assigning MakeEmpty to them is as cheap as ClearD.
1512 oldBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1513 newBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1514 }
1515 else
1516 {
1517 // We know the backing storage is already allocated, so just clear it.
1518 BlockSetOps::ClearD(comp, oldBlocksInLoop);
1519 BlockSetOps::ClearD(comp, newBlocksInLoop);
1520 }
1521 assert(seedBlockNum <= oldBlockMaxNum);
1522 BlockSetOps::AddElemD(comp, oldBlocksInLoop, seedBlockNum);
1523 }
1524
1525 bool CanRepresent(unsigned int blockNum)
1526 {
1527 // We can represent old blocks up to oldBlockMaxNum, and
1528 // new blocks up to 2 * oldBlockMaxNum.
1529 return (blockNum <= 2 * oldBlockMaxNum);
1530 }
1531
1532 bool IsMember(unsigned int blockNum)
1533 {
1534 if (blockNum > oldBlockMaxNum)
1535 {
1536 return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1537 }
1538 return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum);
1539 }
1540
1541 void Insert(unsigned int blockNum)
1542 {
1543 if (blockNum > oldBlockMaxNum)
1544 {
1545 BlockSetOps::AddElemD(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1546 }
1547 else
1548 {
1549 BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1550 }
1551 }
1552
1553 bool TestAndInsert(unsigned int blockNum)
1554 {
1555 if (blockNum > oldBlockMaxNum)
1556 {
1557 unsigned int shiftedNum = blockNum - oldBlockMaxNum;
1558 if (!BlockSetOps::IsMember(comp, newBlocksInLoop, shiftedNum))
1559 {
1560 BlockSetOps::AddElemD(comp, newBlocksInLoop, shiftedNum);
1561 return false;
1562 }
1563 }
1564 else
1565 {
1566 if (!BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum))
1567 {
1568 BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1569 return false;
1570 }
1571 }
1572 return true;
1573 }
1574 };
1575
1576 LoopBlockSet loopBlocks; // Set of blocks identified as part of the loop
1577 Compiler* comp;
1578
1579 // See LoopSearch class comment header for a diagram relating these fields:
1580 BasicBlock* head; // Predecessor of unique entry edge
1581 BasicBlock* first; // Lexically first in-loop block
1582 BasicBlock* top; // Successor of back-edge from BOTTOM
1583 BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block
1584 BasicBlock* entry; // Successor of unique entry edge
1585
1586 BasicBlock* lastExit; // Most recently discovered exit block
1587 unsigned char exitCount; // Number of discovered exit edges
1588 unsigned int oldBlockMaxNum; // Used to identify new blocks created during compaction
1589 BlockSet bottomBlocks; // BOTTOM blocks of already-recorded loops
1590#ifdef DEBUG
1591 bool forgotExit = false; // Flags a rare case where lastExit gets nulled out, for assertions
1592#endif
1593 bool changedFlowGraph = false; // Signals that loop compaction has modified the flow graph
1594
1595public:
1596 LoopSearch(Compiler* comp)
1597 : loopBlocks(comp), comp(comp), oldBlockMaxNum(comp->fgBBNumMax), bottomBlocks(BlockSetOps::MakeEmpty(comp))
1598 {
1599 // Make sure we've renumbered such that the bitsets can hold all the bits
1600 assert(comp->fgBBNumMax <= comp->fgCurBBEpochSize);
1601 }
1602
1603 //------------------------------------------------------------------------
1604 // RecordLoop: Notify the Compiler that a loop has been found.
1605 //
1606 // Return Value:
1607 // true - Loop successfully recorded.
1608 // false - Compiler has run out of loop descriptors; loop not recorded.
1609 //
1610 bool RecordLoop()
1611 {
1612 /* At this point we have a compact loop - record it in the loop table
1613 * If we found only one exit, record it in the table too
1614 * (otherwise an exit = nullptr in the loop table means multiple exits) */
1615
1616 BasicBlock* onlyExit = (exitCount == 1 ? lastExit : nullptr);
1617 if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount))
1618 {
1619 // Record the BOTTOM block for future reference before returning.
1620 assert(bottom->bbNum <= oldBlockMaxNum);
1621 BlockSetOps::AddElemD(comp, bottomBlocks, bottom->bbNum);
1622 return true;
1623 }
1624
1625 // Unable to record this loop because the loop descriptor table overflowed.
1626 return false;
1627 }
1628
1629 //------------------------------------------------------------------------
1630 // ChangedFlowGraph: Determine whether loop compaction has modified the flow graph.
1631 //
1632 // Return Value:
1633 // true - The flow graph has been modified; fgUpdateChangedFlowGraph should
1634 // be called (which is the caller's responsibility).
1635 // false - The flow graph has not been modified by this LoopSearch.
1636 //
1637 bool ChangedFlowGraph()
1638 {
1639 return changedFlowGraph;
1640 }
1641
1642 //------------------------------------------------------------------------
1643 // FindLoop: Search for a loop with the given HEAD block and back-edge.
1644 //
1645 // Arguments:
1646 // head - Block to be the HEAD of any loop identified
1647 // top - Block to be the TOP of any loop identified
1648 // bottom - Block to be the BOTTOM of any loop identified
1649 //
1650 // Return Value:
1651 // true - Found a valid loop.
1652 // false - Did not find a valid loop.
1653 //
1654 // Notes:
1655 // May modify flow graph to make loop compact before returning.
1656 // Will set instance fields to track loop's extent and exits if a valid
1657 // loop is found, and potentially trash them otherwise.
1658 //
1659 bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1660 {
1661 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1662 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1663 * as the definition says, but merely an indication that we have a loop there).
1664 * Thus, we have to be very careful and after entry discovery check that it is indeed
1665 * the only place we enter the loop (especially for non-reducible flow graphs).
1666 */
1667
1668 if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1669 {
1670 // Edge from BOTTOM to TOP is not a backward edge
1671 return false;
1672 }
1673
1674 if (bottom->bbNum > oldBlockMaxNum)
1675 {
1676 // Not a true back-edge; bottom is a block added to reconnect fall-through during
1677 // loop processing, so its block number does not reflect its position.
1678 return false;
1679 }
1680
1681 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1682 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1683 (bottom->bbJumpKind == BBJ_SWITCH))
1684 {
1685 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1686 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1687 return false;
1688 }
1689
1690 /* The presence of a "back edge" is an indication that a loop might be present here
1691 *
1692 * LOOP:
1693 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1694 * node in the loop to any other node in the loop (wholly within the loop)
1695 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1696 * in the loop from outside the loop, and that is through the ENTRY
1697 */
1698
1699 /* Let's find the loop ENTRY */
1700 BasicBlock* entry = FindEntry(head, top, bottom);
1701
1702 if (entry == nullptr)
1703 {
1704 // For now, we only recognize loops where HEAD has some successor ENTRY in the loop.
1705 return false;
1706 }
1707
1708 // Passed the basic checks; initialize instance state for this back-edge.
1709 this->head = head;
1710 this->top = top;
1711 this->entry = entry;
1712 this->bottom = bottom;
1713 this->lastExit = nullptr;
1714 this->exitCount = 0;
1715
1716 // Now we find the "first" block -- the earliest block reachable within the loop.
1717 // With our current algorithm, this is always the same as "top".
1718 this->first = top;
1719
1720 if (!HasSingleEntryCycle())
1721 {
1722 // There isn't actually a loop between TOP and BOTTOM
1723 return false;
1724 }
1725
1726 if (!loopBlocks.IsMember(top->bbNum))
1727 {
1728 // The "back-edge" we identified isn't actually part of the flow cycle containing ENTRY
1729 return false;
1730 }
1731
1732 // Disqualify loops where the first block of the loop is less nested in EH than
1733 // the bottom block. That is, we don't want to handle loops where the back edge
1734 // goes from within an EH region to a first block that is outside that same EH
1735 // region. Note that we *do* handle loops where the first block is the *first*
1736 // block of a more nested EH region (since it is legal to branch to the first
1737 // block of an immediately more nested EH region). So, for example, disqualify
1738 // this:
1739 //
1740 // BB02
1741 // ...
1742 // try {
1743 // ...
1744 // BB10 BBJ_COND => BB02
1745 // ...
1746 // }
1747 //
1748 // Here, BB10 is more nested than BB02.
1749
1750 if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first))
1751 {
1752 JITDUMP("Loop 'first' " FMT_BB " is in an outer EH region compared to loop 'bottom' " FMT_BB ". Rejecting "
1753 "loop.\n",
1754 first->bbNum, bottom->bbNum);
1755 return false;
1756 }
1757
1758#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1759 // Disqualify loops where the first block of the loop is a finally target.
1760 // The main problem is when multiple loops share a 'first' block that is a finally
1761 // target and we canonicalize the loops by adding a new loop head. In that case, we
1762 // need to update the blocks so the finally target bit is moved to the newly created
1763 // block, and removed from the old 'first' block. This is 'hard', so at this point
1764 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1765 // long-term), it's easier to disallow the loop than to update the flow graph to
1766 // support this case.
1767
1768 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1769 {
1770 JITDUMP("Loop 'first' " FMT_BB " is a finally target. Rejecting loop.\n", first->bbNum);
1771 return false;
1772 }
1773#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1774
1775 // Compact the loop (sweep through it and move out any blocks that aren't part of the
1776 // flow cycle), and find the exits.
1777 if (!MakeCompactAndFindExits())
1778 {
1779 // Unable to preserve well-formed loop during compaction.
1780 return false;
1781 }
1782
1783 // We have a valid loop.
1784 return true;
1785 }
1786
1787private:
1788 //------------------------------------------------------------------------
1789 // FindEntry: See if given HEAD flows to valid ENTRY between given TOP and BOTTOM
1790 //
1791 // Arguments:
1792 // head - Block to be the HEAD of any loop identified
1793 // top - Block to be the TOP of any loop identified
1794 // bottom - Block to be the BOTTOM of any loop identified
1795 //
1796 // Return Value:
1797 // Block to be the ENTRY of any loop identified, or nullptr if no
1798 // such entry meeting our criteria can be found.
1799 //
1800 // Notes:
1801 // Returns main entry if one is found, does not check for side-entries.
1802 //
1803 BasicBlock* FindEntry(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1804 {
1805 if (head->bbJumpKind == BBJ_ALWAYS)
1806 {
1807 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1808 {
1809 /* OK - we enter somewhere within the loop */
1810
1811 /* some useful asserts
1812 * Cannot enter at the top - should have being caught by redundant jumps */
1813
1814 assert((head->bbJumpDest != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1815
1816 return head->bbJumpDest;
1817 }
1818 else
1819 {
1820 /* special case - don't consider now */
1821 // assert (!"Loop entered in weird way!");
1822 return nullptr;
1823 }
1824 }
1825 // Can we fall through into the loop?
1826 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1827 {
1828 /* The ENTRY is at the TOP (a do-while loop) */
1829 return top;
1830 }
1831 else
1832 {
1833 return nullptr; // head does not flow into the loop bail for now
1834 }
1835 }
1836
1837 //------------------------------------------------------------------------
1838 // HasSingleEntryCycle: Perform a reverse flow walk from ENTRY, visiting
1839 // only blocks between TOP and BOTTOM, to determine if such a cycle
1840 // exists and if it has a single entry.
1841 //
1842 // Return Value:
1843 // true - Found a single-entry cycle.
1844 // false - Did not find a single-entry cycle.
1845 //
1846 // Notes:
1847 // Will mark (in `loopBlocks`) all blocks found to participate in the
1848 // cycle.
1849 //
1850 bool HasSingleEntryCycle()
1851 {
1852 // Now do a backwards flow walk from entry to see if we have a single-entry loop
1853 bool foundCycle = false;
1854
1855 // Seed the loop block set and worklist with the entry block.
1856 loopBlocks.Reset(entry->bbNum);
1857 jitstd::list<BasicBlock*> worklist(comp->getAllocator());
1858 worklist.push_back(entry);
1859
1860 while (!worklist.empty())
1861 {
1862 BasicBlock* block = worklist.back();
1863 worklist.pop_back();
1864
1865 /* Make sure ENTRY dominates all blocks in the loop
1866 * This is necessary to ensure condition 2. above
1867 */
1868 if (block->bbNum > oldBlockMaxNum)
1869 {
1870 // This is a new block we added to connect fall-through, so the
1871 // recorded dominator information doesn't cover it. Just continue,
1872 // and when we process its unique predecessor we'll abort if ENTRY
1873 // doesn't dominate that.
1874 }
1875 else if (!comp->fgDominate(entry, block))
1876 {
1877 return false;
1878 }
1879
1880 // Add preds to the worklist, checking for side-entries.
1881 for (flowList* predIter = block->bbPreds; predIter != nullptr; predIter = predIter->flNext)
1882 {
1883 BasicBlock* pred = predIter->flBlock;
1884
1885 unsigned int testNum = PositionNum(pred);
1886
1887 if ((testNum < top->bbNum) || (testNum > bottom->bbNum))
1888 {
1889 // Pred is out of loop range
1890 if (block == entry)
1891 {
1892 if (pred == head)
1893 {
1894 // This is the single entry we expect.
1895 continue;
1896 }
1897 // ENTRY has some pred other than head outside the loop. If ENTRY does not
1898 // dominate this pred, we'll consider this a side-entry and skip this loop;
1899 // otherwise the loop is still valid and this may be a (flow-wise) back-edge
1900 // of an outer loop. For the dominance test, if `pred` is a new block, use
1901 // its unique predecessor since the dominator tree has info for that.
1902 BasicBlock* effectivePred = (pred->bbNum > oldBlockMaxNum ? pred->bbPrev : pred);
1903 if (comp->fgDominate(entry, effectivePred))
1904 {
1905 // Outer loop back-edge
1906 continue;
1907 }
1908 }
1909
1910 // There are multiple entries to this loop, don't consider it.
1911 return false;
1912 }
1913
1914 bool isFirstVisit;
1915 if (pred == entry)
1916 {
1917 // We have indeed found a cycle in the flow graph.
1918 isFirstVisit = !foundCycle;
1919 foundCycle = true;
1920 assert(loopBlocks.IsMember(pred->bbNum));
1921 }
1922 else if (loopBlocks.TestAndInsert(pred->bbNum))
1923 {
1924 // Already visited this pred
1925 isFirstVisit = false;
1926 }
1927 else
1928 {
1929 // Add this pred to the worklist
1930 worklist.push_back(pred);
1931 isFirstVisit = true;
1932 }
1933
1934 if (isFirstVisit && (pred->bbNext != nullptr) && (PositionNum(pred->bbNext) == pred->bbNum))
1935 {
1936 // We've created a new block immediately after `pred` to
1937 // reconnect what was fall-through. Mark it as in-loop also;
1938 // it needs to stay with `prev` and if it exits the loop we'd
1939 // just need to re-create it if we tried to move it out.
1940 loopBlocks.Insert(pred->bbNext->bbNum);
1941 }
1942 }
1943 }
1944
1945 return foundCycle;
1946 }
1947
1948 //------------------------------------------------------------------------
1949 // PositionNum: Get the number identifying a block's position per the
1950 // lexical ordering that existed before searching for (and compacting)
1951 // loops.
1952 //
1953 // Arguments:
1954 // block - Block whose position is desired.
1955 //
1956 // Return Value:
1957 // A number indicating that block's position relative to others.
1958 //
1959 // Notes:
1960 // When the given block is a new one created during loop compaction,
1961 // the number of its unique predecessor is returned.
1962 //
1963 unsigned int PositionNum(BasicBlock* block)
1964 {
1965 if (block->bbNum > oldBlockMaxNum)
1966 {
1967 // This must be a block we inserted to connect fall-through after moving blocks.
1968 // To determine if it's in the loop or not, use the number of its unique predecessor
1969 // block.
1970 assert(block->bbPreds->flBlock == block->bbPrev);
1971 assert(block->bbPreds->flNext == nullptr);
1972 return block->bbPrev->bbNum;
1973 }
1974 return block->bbNum;
1975 }
1976
1977 //------------------------------------------------------------------------
1978 // MakeCompactAndFindExits: Compact the loop (sweep through it and move out
1979 // any blocks that aren't part of the flow cycle), and find the exits (set
1980 // lastExit and exitCount).
1981 //
1982 // Return Value:
1983 // true - Loop successfully compacted (or `loopBlocks` expanded to
1984 // include all blocks in the lexical range), exits enumerated.
1985 // false - Loop cannot be made compact and remain well-formed.
1986 //
1987 bool MakeCompactAndFindExits()
1988 {
1989 // Compaction (if it needs to happen) will require an insertion point.
1990 BasicBlock* moveAfter = nullptr;
1991
1992 for (BasicBlock* previous = top->bbPrev; previous != bottom;)
1993 {
1994 BasicBlock* block = previous->bbNext;
1995
1996 if (loopBlocks.IsMember(block->bbNum))
1997 {
1998 // This block is a member of the loop. Check to see if it may exit the loop.
1999 CheckForExit(block);
2000
2001 // Done processing this block; move on to the next.
2002 previous = block;
2003 continue;
2004 }
2005
2006 // This blocks is lexically between TOP and BOTTOM, but it does not
2007 // participate in the flow cycle. Check for a run of consecutive
2008 // such blocks.
2009 BasicBlock* lastNonLoopBlock = block;
2010 BasicBlock* nextLoopBlock = block->bbNext;
2011 while (!loopBlocks.IsMember(nextLoopBlock->bbNum))
2012 {
2013 lastNonLoopBlock = nextLoopBlock;
2014 nextLoopBlock = nextLoopBlock->bbNext;
2015 // This loop must terminate because we know BOTTOM is in loopBlocks.
2016 }
2017
2018 // Choose an insertion point for non-loop blocks if we haven't yet done so.
2019 if (moveAfter == nullptr)
2020 {
2021 moveAfter = FindInsertionPoint();
2022 }
2023
2024 if (!BasicBlock::sameEHRegion(previous, nextLoopBlock) || !BasicBlock::sameEHRegion(previous, moveAfter))
2025 {
2026 // EH regions would be ill-formed if we moved these blocks out.
2027 // See if we can consider them loop blocks without introducing
2028 // a side-entry.
2029 if (CanTreatAsLoopBlocks(block, lastNonLoopBlock))
2030 {
2031 // The call to `canTreatAsLoop` marked these blocks as part of the loop;
2032 // iterate without updating `previous` so that we'll analyze them as part
2033 // of the loop.
2034 continue;
2035 }
2036 else
2037 {
2038 // We can't move these out of the loop or leave them in, so just give
2039 // up on this loop.
2040 return false;
2041 }
2042 }
2043
2044 // Now physically move the blocks.
2045 BasicBlock* moveBefore = moveAfter->bbNext;
2046
2047 comp->fgUnlinkRange(block, lastNonLoopBlock);
2048 comp->fgMoveBlocksAfter(block, lastNonLoopBlock, moveAfter);
2049 comp->ehUpdateLastBlocks(moveAfter, lastNonLoopBlock);
2050
2051 // Apply any adjustments needed for fallthrough at the boundaries of the moved region.
2052 FixupFallThrough(moveAfter, moveBefore, block);
2053 FixupFallThrough(lastNonLoopBlock, nextLoopBlock, moveBefore);
2054 // Also apply any adjustments needed where the blocks were snipped out of the loop.
2055 BasicBlock* newBlock = FixupFallThrough(previous, block, nextLoopBlock);
2056 if (newBlock != nullptr)
2057 {
2058 // This new block is in the loop and is a loop exit.
2059 loopBlocks.Insert(newBlock->bbNum);
2060 lastExit = newBlock;
2061 ++exitCount;
2062 }
2063
2064 // Update moveAfter for the next insertion.
2065 moveAfter = lastNonLoopBlock;
2066
2067 // Note that we've changed the flow graph, and continue without updating
2068 // `previous` so that we'll process nextLoopBlock.
2069 changedFlowGraph = true;
2070 }
2071
2072 if ((exitCount == 1) && (lastExit == nullptr))
2073 {
2074 // If we happen to have a loop with two exits, one of which goes to an
2075 // infinite loop that's lexically nested inside it, where the inner loop
2076 // can't be moved out, we can end up in this situation (because
2077 // CanTreatAsLoopBlocks will have decremented the count expecting to find
2078 // another exit later). Bump the exit count to 2, since downstream code
2079 // will not be prepared for null lastExit with exitCount of 1.
2080 assert(forgotExit);
2081 exitCount = 2;
2082 }
2083
2084 // Loop compaction was successful
2085 return true;
2086 }
2087
2088 //------------------------------------------------------------------------
2089 // FindInsertionPoint: Find an appropriate spot to which blocks that are
2090 // lexically between TOP and BOTTOM but not part of the flow cycle
2091 // can be moved.
2092 //
2093 // Return Value:
2094 // Block after which to insert moved blocks.
2095 //
2096 BasicBlock* FindInsertionPoint()
2097 {
2098 // Find an insertion point for blocks we're going to move. Move them down
2099 // out of the loop, and if possible find a spot that won't break up fall-through.
2100 BasicBlock* moveAfter = bottom;
2101 while (moveAfter->bbFallsThrough())
2102 {
2103 // Keep looking for a better insertion point if we can.
2104 BasicBlock* newMoveAfter = TryAdvanceInsertionPoint(moveAfter);
2105
2106 if (newMoveAfter == nullptr)
2107 {
2108 // Ran out of candidate insertion points, so just split up the fall-through.
2109 return moveAfter;
2110 }
2111
2112 moveAfter = newMoveAfter;
2113 }
2114
2115 return moveAfter;
2116 }
2117
2118 //------------------------------------------------------------------------
2119 // TryAdvanceInsertionPoint: Find the next legal insertion point after
2120 // the given one, if one exists.
2121 //
2122 // Arguments:
2123 // oldMoveAfter - Prior insertion point; find the next after this.
2124 //
2125 // Return Value:
2126 // The next block after `oldMoveAfter` that is a legal insertion point
2127 // (i.e. blocks being swept out of the loop can be moved immediately
2128 // after it), if one exists, else nullptr.
2129 //
2130 BasicBlock* TryAdvanceInsertionPoint(BasicBlock* oldMoveAfter)
2131 {
2132 BasicBlock* newMoveAfter = oldMoveAfter->bbNext;
2133
2134 if (!BasicBlock::sameEHRegion(oldMoveAfter, newMoveAfter))
2135 {
2136 // Don't cross an EH region boundary.
2137 return nullptr;
2138 }
2139
2140 if ((newMoveAfter->bbJumpKind == BBJ_ALWAYS) || (newMoveAfter->bbJumpKind == BBJ_COND))
2141 {
2142 unsigned int destNum = newMoveAfter->bbJumpDest->bbNum;
2143 if ((destNum >= top->bbNum) && (destNum <= bottom->bbNum) && !loopBlocks.IsMember(destNum))
2144 {
2145 // Reversing this branch out of block `newMoveAfter` could confuse this algorithm
2146 // (in particular, the edge would still be numerically backwards but no longer be
2147 // lexically backwards, so a lexical forward walk from TOP would not find BOTTOM),
2148 // so don't do that.
2149 // We're checking for BBJ_ALWAYS and BBJ_COND only here -- we don't need to
2150 // check for BBJ_SWITCH because we'd never consider it a loop back-edge.
2151 return nullptr;
2152 }
2153 }
2154
2155 // Similarly check to see if advancing to `newMoveAfter` would reverse the lexical order
2156 // of an edge from the run of blocks being moved to `newMoveAfter` -- doing so would
2157 // introduce a new lexical back-edge, which could (maybe?) confuse the loop search
2158 // algorithm, and isn't desirable layout anyway.
2159 for (flowList* predIter = newMoveAfter->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2160 {
2161 unsigned int predNum = predIter->flBlock->bbNum;
2162
2163 if ((predNum >= top->bbNum) && (predNum <= bottom->bbNum) && !loopBlocks.IsMember(predNum))
2164 {
2165 // Don't make this forward edge a backwards edge.
2166 return nullptr;
2167 }
2168 }
2169
2170 if (IsRecordedBottom(newMoveAfter))
2171 {
2172 // This is the BOTTOM of another loop; don't move any blocks past it, to avoid moving them
2173 // out of that loop (we should have already done so when processing that loop if it were legal).
2174 return nullptr;
2175 }
2176
2177 // Advancing the insertion point is ok, except that we can't split up any CallFinally/BBJ_ALWAYS
2178 // pair, so if we've got such a pair recurse to see if we can move past the whole thing.
2179 return (newMoveAfter->isBBCallAlwaysPair() ? TryAdvanceInsertionPoint(newMoveAfter) : newMoveAfter);
2180 }
2181
2182 //------------------------------------------------------------------------
2183 // isOuterBottom: Determine if the given block is the BOTTOM of a previously
2184 // recorded loop.
2185 //
2186 // Arguments:
2187 // block - Block to check for BOTTOM-ness.
2188 //
2189 // Return Value:
2190 // true - The blocks was recorded as `bottom` of some earlier-processed loop.
2191 // false - No loops yet recorded have this block as their `bottom`.
2192 //
2193 bool IsRecordedBottom(BasicBlock* block)
2194 {
2195 if (block->bbNum > oldBlockMaxNum)
2196 {
2197 // This is a new block, which can't be an outer bottom block because we only allow old blocks
2198 // as BOTTOM.
2199 return false;
2200 }
2201 return BlockSetOps::IsMember(comp, bottomBlocks, block->bbNum);
2202 }
2203
2204 //------------------------------------------------------------------------
2205 // CanTreatAsLoopBlocks: If the given range of blocks can be treated as
2206 // loop blocks, add them to loopBlockSet and return true. Otherwise,
2207 // return false.
2208 //
2209 // Arguments:
2210 // firstNonLoopBlock - First block in the run to be subsumed.
2211 // lastNonLoopBlock - Last block in the run to be subsumed.
2212 //
2213 // Return Value:
2214 // true - The blocks from `fistNonLoopBlock` to `lastNonLoopBlock` were
2215 // successfully added to `loopBlocks`.
2216 // false - Treating the blocks from `fistNonLoopBlock` to `lastNonLoopBlock`
2217 // would not be legal (it would induce a side-entry).
2218 //
2219 // Notes:
2220 // `loopBlocks` may be modified even if `false` is returned.
2221 // `exitCount` and `lastExit` may be modified if this process identifies
2222 // in-loop edges that were previously counted as exits.
2223 //
2224 bool CanTreatAsLoopBlocks(BasicBlock* firstNonLoopBlock, BasicBlock* lastNonLoopBlock)
2225 {
2226 BasicBlock* nextLoopBlock = lastNonLoopBlock->bbNext;
2227 for (BasicBlock* testBlock = firstNonLoopBlock; testBlock != nextLoopBlock; testBlock = testBlock->bbNext)
2228 {
2229 for (flowList* predIter = testBlock->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2230 {
2231 BasicBlock* testPred = predIter->flBlock;
2232 unsigned int predPosNum = PositionNum(testPred);
2233 unsigned int firstNonLoopPosNum = PositionNum(firstNonLoopBlock);
2234 unsigned int lastNonLoopPosNum = PositionNum(lastNonLoopBlock);
2235
2236 if (loopBlocks.IsMember(predPosNum) ||
2237 ((predPosNum >= firstNonLoopPosNum) && (predPosNum <= lastNonLoopPosNum)))
2238 {
2239 // This pred is in the loop (or what will be the loop if we determine this
2240 // run of exit blocks doesn't include a side-entry).
2241
2242 if (predPosNum < firstNonLoopPosNum)
2243 {
2244 // We've already counted this block as an exit, so decrement the count.
2245 --exitCount;
2246 if (lastExit == testPred)
2247 {
2248 // Erase this now-bogus `lastExit` entry.
2249 lastExit = nullptr;
2250 INDEBUG(forgotExit = true);
2251 }
2252 }
2253 }
2254 else
2255 {
2256 // This pred is not in the loop, so this constitutes a side-entry.
2257 return false;
2258 }
2259 }
2260
2261 // Either we're going to abort the loop on a subsequent testBlock, or this
2262 // testBlock is part of the loop.
2263 loopBlocks.Insert(testBlock->bbNum);
2264 }
2265
2266 // All blocks were ok to leave in the loop.
2267 return true;
2268 }
2269
2270 //------------------------------------------------------------------------
2271 // FixupFallThrough: Re-establish any broken control flow connectivity
2272 // and eliminate any "goto-next"s that were created by changing the
2273 // given block's lexical follower.
2274 //
2275 // Arguments:
2276 // block - Block whose `bbNext` has changed.
2277 // oldNext - Previous value of `block->bbNext`.
2278 // newNext - New value of `block->bbNext`.
2279 //
2280 // Return Value:
2281 // If a new block is created to reconnect flow, the new block is
2282 // returned; otherwise, nullptr.
2283 //
2284 BasicBlock* FixupFallThrough(BasicBlock* block, BasicBlock* oldNext, BasicBlock* newNext)
2285 {
2286 // If we create a new block, that will be our return value.
2287 BasicBlock* newBlock = nullptr;
2288
2289 if (block->bbFallsThrough())
2290 {
2291 // Need to reconnect the flow from `block` to `oldNext`.
2292
2293 if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext))
2294 {
2295 /* Reverse the jump condition */
2296 GenTree* test = block->lastNode();
2297 noway_assert(test->OperIsConditionalJump());
2298
2299 if (test->OperGet() == GT_JTRUE)
2300 {
2301 GenTree* cond = comp->gtReverseCond(test->gtOp.gtOp1);
2302 assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
2303 test->gtOp.gtOp1 = cond;
2304 }
2305 else
2306 {
2307 comp->gtReverseCond(test);
2308 }
2309
2310 // Redirect the Conditional JUMP to go to `oldNext`
2311 block->bbJumpDest = oldNext;
2312 }
2313 else
2314 {
2315 // Insert an unconditional jump to `oldNext` just after `block`.
2316 newBlock = comp->fgConnectFallThrough(block, oldNext);
2317 noway_assert((newBlock == nullptr) || loopBlocks.CanRepresent(newBlock->bbNum));
2318 }
2319 }
2320 else if ((block->bbJumpKind == BBJ_ALWAYS) && (block->bbJumpDest == newNext))
2321 {
2322 // We've made `block`'s jump target its bbNext, so remove the jump.
2323 if (!comp->fgOptimizeBranchToNext(block, newNext, block->bbPrev))
2324 {
2325 // If optimizing away the goto-next failed for some reason, mark it KEEP_BBJ_ALWAYS to
2326 // prevent assertions from complaining about it.
2327 block->bbFlags |= BBF_KEEP_BBJ_ALWAYS;
2328 }
2329 }
2330
2331 // Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS.
2332 assert((block->bbJumpKind != BBJ_COND) || (block->bbJumpKind != BBJ_ALWAYS) || (block->bbJumpDest != newNext) ||
2333 ((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0));
2334 return newBlock;
2335 }
2336
2337 //------------------------------------------------------------------------
2338 // CheckForExit: Check if the given block has any successor edges that are
2339 // loop exits, and update `lastExit` and `exitCount` if so.
2340 //
2341 // Arguments:
2342 // block - Block whose successor edges are to be checked.
2343 //
2344 // Notes:
2345 // If one block has multiple exiting successor edges, those are counted
2346 // as multiple exits in `exitCount`.
2347 //
2348 void CheckForExit(BasicBlock* block)
2349 {
2350 BasicBlock* exitPoint;
2351
2352 switch (block->bbJumpKind)
2353 {
2354 case BBJ_COND:
2355 case BBJ_CALLFINALLY:
2356 case BBJ_ALWAYS:
2357 case BBJ_EHCATCHRET:
2358 assert(block->bbJumpDest);
2359 exitPoint = block->bbJumpDest;
2360
2361 if (!loopBlocks.IsMember(exitPoint->bbNum))
2362 {
2363 /* exit from a block other than BOTTOM */
2364 lastExit = block;
2365 exitCount++;
2366 }
2367 break;
2368
2369 case BBJ_NONE:
2370 break;
2371
2372 case BBJ_EHFINALLYRET:
2373 case BBJ_EHFILTERRET:
2374 /* The "try" associated with this "finally" must be in the
2375 * same loop, so the finally block will return control inside the loop */
2376 break;
2377
2378 case BBJ_THROW:
2379 case BBJ_RETURN:
2380 /* those are exits from the loop */
2381 lastExit = block;
2382 exitCount++;
2383 break;
2384
2385 case BBJ_SWITCH:
2386
2387 unsigned jumpCnt;
2388 jumpCnt = block->bbJumpSwt->bbsCount;
2389 BasicBlock** jumpTab;
2390 jumpTab = block->bbJumpSwt->bbsDstTab;
2391
2392 do
2393 {
2394 noway_assert(*jumpTab);
2395 exitPoint = *jumpTab;
2396
2397 if (!loopBlocks.IsMember(exitPoint->bbNum))
2398 {
2399 lastExit = block;
2400 exitCount++;
2401 }
2402 } while (++jumpTab, --jumpCnt);
2403 break;
2404
2405 default:
2406 noway_assert(!"Unexpected bbJumpKind");
2407 break;
2408 }
2409
2410 if (block->bbFallsThrough() && !loopBlocks.IsMember(block->bbNext->bbNum))
2411 {
2412 // Found a fall-through exit.
2413 lastExit = block;
2414 exitCount++;
2415 }
2416 }
2417};
2418}
2419
2420/*****************************************************************************
2421 * Find the natural loops, using dominators. Note that the test for
2422 * a loop is slightly different from the standard one, because we have
2423 * not done a depth first reordering of the basic blocks.
2424 */
2425
2426void Compiler::optFindNaturalLoops()
2427{
2428#ifdef DEBUG
2429 if (verbose)
2430 {
2431 printf("*************** In optFindNaturalLoops()\n");
2432 }
2433#endif // DEBUG
2434
2435 noway_assert(fgDomsComputed);
2436 assert(fgHasLoops);
2437
2438#if COUNT_LOOPS
2439 hasMethodLoops = false;
2440 loopsThisMethod = 0;
2441 loopOverflowThisMethod = false;
2442#endif
2443
2444 LoopSearch search(this);
2445
2446 for (BasicBlock* head = fgFirstBB; head->bbNext; head = head->bbNext)
2447 {
2448 BasicBlock* top = head->bbNext;
2449
2450 // Blocks that are rarely run have a zero bbWeight and should
2451 // never be optimized here
2452
2453 if (top->bbWeight == BB_ZERO_WEIGHT)
2454 {
2455 continue;
2456 }
2457
2458 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
2459 {
2460 if (search.FindLoop(head, top, pred->flBlock))
2461 {
2462 // Found a loop; record it and see if we've hit the limit.
2463 bool recordedLoop = search.RecordLoop();
2464
2465 (void)recordedLoop; // avoid unusued variable warnings in COUNT_LOOPS and !DEBUG
2466
2467#if COUNT_LOOPS
2468 if (!hasMethodLoops)
2469 {
2470 /* mark the method as containing natural loops */
2471 totalLoopMethods++;
2472 hasMethodLoops = true;
2473 }
2474
2475 /* increment total number of loops found */
2476 totalLoopCount++;
2477 loopsThisMethod++;
2478
2479 /* keep track of the number of exits */
2480 loopExitCountTable.record(static_cast<unsigned>(exitCount));
2481#else // COUNT_LOOPS
2482 assert(recordedLoop);
2483 if (optLoopCount == MAX_LOOP_NUM)
2484 {
2485 // We won't be able to record any more loops, so stop looking.
2486 goto NO_MORE_LOOPS;
2487 }
2488#endif // COUNT_LOOPS
2489
2490 // Continue searching preds of `top` to see if any other are
2491 // back-edges (this can happen for nested loops). The iteration
2492 // is safe because the compaction we do only modifies predecessor
2493 // lists of blocks that gain or lose fall-through from their
2494 // `bbPrev`, but since the motion is from within the loop to below
2495 // it, we know we're not altering the relationship between `top`
2496 // and its `bbPrev`.
2497 }
2498 }
2499 }
2500NO_MORE_LOOPS:
2501
2502#if COUNT_LOOPS
2503 loopCountTable.record(loopsThisMethod);
2504 if (maxLoopsPerMethod < loopsThisMethod)
2505 {
2506 maxLoopsPerMethod = loopsThisMethod;
2507 }
2508 if (loopOverflowThisMethod)
2509 {
2510 totalLoopOverflows++;
2511 }
2512#endif // COUNT_LOOPS
2513
2514 bool mod = search.ChangedFlowGraph();
2515
2516 if (mod)
2517 {
2518 // Need to renumber blocks now since loop canonicalization
2519 // depends on it; can defer the rest of fgUpdateChangedFlowGraph()
2520 // until after canonicalizing loops. Dominator information is
2521 // recorded in terms of block numbers, so flag it invalid.
2522 fgDomsComputed = false;
2523 fgRenumberBlocks();
2524 }
2525
2526 // Now the loop indices are stable. We can figure out parent/child relationships
2527 // (using table indices to name loops), and label blocks.
2528 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
2529 {
2530 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
2531 {
2532 possibleParent--;
2533 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
2534 {
2535 optLoopTable[loopInd].lpParent = possibleParent;
2536 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
2537 optLoopTable[possibleParent].lpChild = loopInd;
2538 break;
2539 }
2540 }
2541 }
2542
2543 // Now label the blocks with the innermost loop to which they belong. Since parents
2544 // precede children in the table, doing the labeling for each loop in order will achieve
2545 // this -- the innermost loop labeling will be done last.
2546 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
2547 {
2548 BasicBlock* first = optLoopTable[loopInd].lpFirst;
2549 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
2550 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
2551 {
2552 blk->bbNatLoopNum = loopInd;
2553 if (blk == bottom)
2554 {
2555 break;
2556 }
2557 assert(blk->bbNext != nullptr); // We should never reach nullptr.
2558 }
2559 }
2560
2561 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
2562 // one, if necessary, for loops containing others that share a "top."
2563 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
2564 {
2565 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
2566 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
2567 {
2568 continue;
2569 }
2570
2571 // Otherwise...
2572 if (optCanonicalizeLoopNest(loopInd))
2573 {
2574 mod = true;
2575 }
2576 }
2577 if (mod)
2578 {
2579 fgUpdateChangedFlowGraph();
2580 }
2581
2582#ifdef DEBUG
2583 if (verbose && optLoopCount > 0)
2584 {
2585 printf("\nFinal natural loop table:\n");
2586 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
2587 {
2588 optPrintLoopInfo(loopInd);
2589 printf("\n");
2590 }
2591 }
2592#endif // DEBUG
2593}
2594
2595void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
2596{
2597 BasicBlock* newJumpDest = nullptr;
2598 switch (blk->bbJumpKind)
2599 {
2600 case BBJ_THROW:
2601 case BBJ_RETURN:
2602 case BBJ_NONE:
2603 case BBJ_EHFILTERRET:
2604 case BBJ_EHFINALLYRET:
2605 case BBJ_EHCATCHRET:
2606 // These have no jump destination to update.
2607 break;
2608
2609 case BBJ_ALWAYS:
2610 case BBJ_LEAVE:
2611 case BBJ_CALLFINALLY:
2612 case BBJ_COND:
2613 // All of these have a single jump destination to update.
2614 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
2615 {
2616 blk->bbJumpDest = newJumpDest;
2617 }
2618 break;
2619
2620 case BBJ_SWITCH:
2621 {
2622 bool redirected = false;
2623 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
2624 {
2625 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
2626 {
2627 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
2628 redirected = true;
2629 }
2630 }
2631 // If any redirections happend, invalidate the switch table map for the switch.
2632 if (redirected)
2633 {
2634 // Don't create a new map just to try to remove an entry.
2635 BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/* createIfNull */ false);
2636 if (switchMap != nullptr)
2637 {
2638 switchMap->Remove(blk);
2639 }
2640 }
2641 }
2642 break;
2643
2644 default:
2645 unreached();
2646 }
2647}
2648
2649// TODO-Cleanup: This should be a static member of the BasicBlock class.
2650void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
2651{
2652 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
2653
2654 // copy the jump destination(s) from "from" to "to".
2655 switch (to->bbJumpKind)
2656 {
2657 case BBJ_ALWAYS:
2658 case BBJ_LEAVE:
2659 case BBJ_CALLFINALLY:
2660 case BBJ_COND:
2661 // All of these have a single jump destination to update.
2662 to->bbJumpDest = from->bbJumpDest;
2663 break;
2664
2665 case BBJ_SWITCH:
2666 {
2667 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
2668 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
2669 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2670
2671 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
2672 {
2673 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2674 }
2675 }
2676 break;
2677
2678 default:
2679 break;
2680 }
2681}
2682
2683// Canonicalize the loop nest rooted at parent loop 'loopInd'.
2684// Returns 'true' if the flow graph is modified.
2685bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2686{
2687 bool modified = false;
2688
2689 // Is the top of the current loop not in any nested loop?
2690 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2691 {
2692 if (optCanonicalizeLoop(loopInd))
2693 {
2694 modified = true;
2695 }
2696 }
2697
2698 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2699 child = optLoopTable[child].lpSibling)
2700 {
2701 if (optCanonicalizeLoopNest(child))
2702 {
2703 modified = true;
2704 }
2705 }
2706
2707 return modified;
2708}
2709
2710bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2711{
2712 // Is the top uniquely part of the current loop?
2713 BasicBlock* t = optLoopTable[loopInd].lpTop;
2714
2715 if (t->bbNatLoopNum == loopInd)
2716 {
2717 return false;
2718 }
2719
2720 JITDUMP("in optCanonicalizeLoop: L%02u has top " FMT_BB " (bottom " FMT_BB
2721 ") with natural loop number L%02u: need to "
2722 "canonicalize\n",
2723 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2724
2725 // Otherwise, the top of this loop is also part of a nested loop.
2726 //
2727 // Insert a new unique top for this loop. We must be careful to put this new
2728 // block in the correct EH region. Note that f->bbPrev might be in a different
2729 // EH region. For example:
2730 //
2731 // try {
2732 // ...
2733 // BB07
2734 // }
2735 // BB08 // "first"
2736 //
2737 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2738 // On the other hand, the first block of multiple loops might be the first
2739 // block of a 'try' region that is completely contained in the multiple loops.
2740 // for example:
2741 //
2742 // BB08 try { }
2743 // ...
2744 // BB10 BBJ_ALWAYS => BB08
2745 // ...
2746 // BB12 BBJ_ALWAYS => BB08
2747 //
2748 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2749 // is a single-block "try" region. Neither loop "bottom" block is in the same
2750 // "try" region as BB08. This is legal because you can jump to the first block
2751 // of a try region. With EH normalization, no two "try" regions will share
2752 // this block. In this case, we need to insert a new block for the outer loop
2753 // in the same EH region as the branch from the "bottom":
2754 //
2755 // BB30 BBJ_NONE
2756 // BB08 try { }
2757 // ...
2758 // BB10 BBJ_ALWAYS => BB08
2759 // ...
2760 // BB12 BBJ_ALWAYS => BB30
2761 //
2762 // Another possibility is that the "first" block of the loop nest can be the first block
2763 // of a "try" region that also has other predecessors than those in the loop, or even in
2764 // the "try" region (since blocks can target the first block of a "try" region). For example:
2765 //
2766 // BB08 try {
2767 // ...
2768 // BB10 BBJ_ALWAYS => BB08
2769 // ...
2770 // BB12 BBJ_ALWAYS => BB08
2771 // BB13 }
2772 // ...
2773 // BB20 BBJ_ALWAYS => BB08
2774 // ...
2775 // BB25 BBJ_ALWAYS => BB08
2776 //
2777 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2778 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2779 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2780 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2781 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2782 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2783 // situation of branching to a non-first block of a 'try' region.
2784 //
2785 // We can also have a loop nest where the "first" block is outside of a "try" region
2786 // and the back edges are inside a "try" region, for example:
2787 //
2788 // BB02 // "first"
2789 // ...
2790 // BB09 try { BBJ_COND => BB02
2791 // ...
2792 // BB15 BBJ_COND => BB02
2793 // ...
2794 // BB21 } // end of "try"
2795 //
2796 // In this case, both loop back edges were formed by "leave" instructions that were
2797 // imported into branches that were later made conditional. In this case, we don't
2798 // want to copy the EH region of the back edge, since that would create a block
2799 // outside of and disjoint with the "try" region of the back edge. However, to
2800 // simplify things, we disqualify this type of loop, so we should never see this here.
2801
2802 BasicBlock* h = optLoopTable[loopInd].lpHead;
2803 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2804 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2805
2806 // The loop must be entirely contained within a single handler region.
2807 assert(BasicBlock::sameHndRegion(f, b));
2808
2809 // If the bottom block is in the same "try" region, then we extend the EH
2810 // region. Otherwise, we add the new block outside the "try" region.
2811 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2812 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2813 if (!extendRegion)
2814 {
2815 // We need to set the EH region manually. Set it to be the same
2816 // as the bottom block.
2817 newT->copyEHRegion(b);
2818 }
2819
2820 // The new block can reach the same set of blocks as the old one, but don't try to reflect
2821 // that in its reachability set here -- creating the new block may have changed the BlockSet
2822 // representation from short to long, and canonicalizing loops is immediately followed by
2823 // a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway.
2824
2825 // Redirect the "bottom" of the current loop to "newT".
2826 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2827 blockMap->Set(t, newT);
2828 optRedirectBlock(b, blockMap);
2829
2830 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2831 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2832 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2833 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2834 // other reason.
2835 //
2836 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2837 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2838 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2839 // edge of the blockMap, so nothing will happen.
2840 bool firstPred = true;
2841 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2842 {
2843 BasicBlock* topPredBlock = topPred->flBlock;
2844
2845 // Skip if topPredBlock is in the loop.
2846 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2847 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2848 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2849 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2850 {
2851 JITDUMP("in optCanonicalizeLoop: 'top' predecessor " FMT_BB " is in the range of L%02u (" FMT_BB ".." FMT_BB
2852 "); not "
2853 "redirecting its bottom edge\n",
2854 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2855 continue;
2856 }
2857
2858 JITDUMP("in optCanonicalizeLoop: redirect top predecessor " FMT_BB " to " FMT_BB "\n", topPredBlock->bbNum,
2859 newT->bbNum);
2860 optRedirectBlock(topPredBlock, blockMap);
2861
2862 // When we have profile data then the 'newT' block will inherit topPredBlock profile weight
2863 if (topPredBlock->hasProfileWeight())
2864 {
2865 // This corrects an issue when the topPredBlock has a profile based weight
2866 //
2867 if (firstPred)
2868 {
2869 JITDUMP("in optCanonicalizeLoop: block " FMT_BB " will inheritWeight from " FMT_BB "\n", newT->bbNum,
2870 topPredBlock->bbNum);
2871
2872 newT->inheritWeight(topPredBlock);
2873 firstPred = false;
2874 }
2875 else
2876 {
2877 JITDUMP("in optCanonicalizeLoop: block " FMT_BB " will also contribute to the weight of " FMT_BB "\n",
2878 newT->bbNum, topPredBlock->bbNum);
2879
2880 BasicBlock::weight_t newWeight = newT->getBBWeight(this) + topPredBlock->getBBWeight(this);
2881 newT->setBBWeight(newWeight);
2882 }
2883 }
2884 }
2885
2886 assert(newT->bbNext == f);
2887 if (f != t)
2888 {
2889 newT->bbJumpKind = BBJ_ALWAYS;
2890 newT->bbJumpDest = t;
2891 newT->bbTreeList = nullptr;
2892 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2893 }
2894
2895 // If it had been a do-while loop (top == entry), update entry, as well.
2896 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2897 if (optLoopTable[loopInd].lpTop == origE)
2898 {
2899 optLoopTable[loopInd].lpEntry = newT;
2900 }
2901 optLoopTable[loopInd].lpTop = newT;
2902 optLoopTable[loopInd].lpFirst = newT;
2903
2904 newT->bbNatLoopNum = loopInd;
2905
2906 JITDUMP("in optCanonicalizeLoop: made new block " FMT_BB " [%p] the new unique top of loop %d.\n", newT->bbNum,
2907 dspPtr(newT), loopInd);
2908
2909 // Make sure the head block still goes to the entry...
2910 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2911 {
2912 h->bbJumpKind = BBJ_ALWAYS;
2913 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2914 }
2915 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2916 {
2917 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2918 optLoopTable[loopInd].lpHead = h2;
2919 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2920 h2->bbTreeList = nullptr;
2921 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2922 }
2923
2924 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2925 // it must be the case that they were do-while's (since "h" fell through to the entry).
2926 // The new node "newT" becomes the head of such loops.
2927 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2928 childLoop = optLoopTable[childLoop].lpSibling)
2929 {
2930 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2931 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2932 {
2933 optUpdateLoopHead(childLoop, h, newT);
2934 }
2935 }
2936 return true;
2937}
2938
2939bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2940{
2941 assert(l1 != BasicBlock::NOT_IN_LOOP);
2942 if (l1 == l2)
2943 {
2944 return true;
2945 }
2946 else if (l2 == BasicBlock::NOT_IN_LOOP)
2947 {
2948 return false;
2949 }
2950 else
2951 {
2952 return optLoopContains(l1, optLoopTable[l2].lpParent);
2953 }
2954}
2955
2956void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2957{
2958 assert(optLoopTable[loopInd].lpHead == from);
2959 optLoopTable[loopInd].lpHead = to;
2960 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2961 childLoop = optLoopTable[childLoop].lpSibling)
2962 {
2963 if (optLoopTable[childLoop].lpHead == from)
2964 {
2965 optUpdateLoopHead(childLoop, from, to);
2966 }
2967 }
2968}
2969
2970/*****************************************************************************
2971 * If the : i += const" will cause an overflow exception for the small types.
2972 */
2973
2974bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2975{
2976 int type_MAX;
2977
2978 switch (incrType)
2979 {
2980 case TYP_BYTE:
2981 type_MAX = SCHAR_MAX;
2982 break;
2983 case TYP_UBYTE:
2984 type_MAX = UCHAR_MAX;
2985 break;
2986 case TYP_SHORT:
2987 type_MAX = SHRT_MAX;
2988 break;
2989 case TYP_USHORT:
2990 type_MAX = USHRT_MAX;
2991 break;
2992
2993 case TYP_UINT: // Detected by checking for 32bit ....
2994 case TYP_INT:
2995 return false; // ... overflow same as done for TYP_INT
2996
2997 default:
2998 NO_WAY("Bad type");
2999 }
3000
3001 if (iterAtExit > type_MAX)
3002 {
3003 return true;
3004 }
3005 else
3006 {
3007 return false;
3008 }
3009}
3010
3011/*****************************************************************************
3012 * If the "i -= const" will cause an underflow exception for the small types
3013 */
3014
3015bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
3016{
3017 int type_MIN;
3018
3019 switch (decrType)
3020 {
3021 case TYP_BYTE:
3022 type_MIN = SCHAR_MIN;
3023 break;
3024 case TYP_SHORT:
3025 type_MIN = SHRT_MIN;
3026 break;
3027 case TYP_UBYTE:
3028 type_MIN = 0;
3029 break;
3030 case TYP_USHORT:
3031 type_MIN = 0;
3032 break;
3033
3034 case TYP_UINT: // Detected by checking for 32bit ....
3035 case TYP_INT:
3036 return false; // ... underflow same as done for TYP_INT
3037
3038 default:
3039 NO_WAY("Bad type");
3040 }
3041
3042 if (iterAtExit < type_MIN)
3043 {
3044 return true;
3045 }
3046 else
3047 {
3048 return false;
3049 }
3050}
3051
3052/*****************************************************************************
3053 *
3054 * Helper for unroll loops - Computes the number of repetitions
3055 * in a constant loop. If it cannot prove the number is constant returns false
3056 */
3057
3058bool Compiler::optComputeLoopRep(int constInit,
3059 int constLimit,
3060 int iterInc,
3061 genTreeOps iterOper,
3062 var_types iterOperType,
3063 genTreeOps testOper,
3064 bool unsTest,
3065 bool dupCond,
3066 unsigned* iterCount)
3067{
3068 noway_assert(genActualType(iterOperType) == TYP_INT);
3069
3070 __int64 constInitX;
3071 __int64 constLimitX;
3072
3073 unsigned loopCount;
3074 int iterSign;
3075
3076 // Using this, we can just do a signed comparison with other 32 bit values.
3077 if (unsTest)
3078 {
3079 constLimitX = (unsigned int)constLimit;
3080 }
3081 else
3082 {
3083 constLimitX = (signed int)constLimit;
3084 }
3085
3086 switch (iterOperType)
3087 {
3088// For small types, the iteration operator will narrow these values if big
3089
3090#define INIT_ITER_BY_TYPE(type) \
3091 constInitX = (type)constInit; \
3092 iterInc = (type)iterInc;
3093
3094 case TYP_BYTE:
3095 INIT_ITER_BY_TYPE(signed char);
3096 break;
3097 case TYP_UBYTE:
3098 INIT_ITER_BY_TYPE(unsigned char);
3099 break;
3100 case TYP_SHORT:
3101 INIT_ITER_BY_TYPE(signed short);
3102 break;
3103 case TYP_USHORT:
3104 INIT_ITER_BY_TYPE(unsigned short);
3105 break;
3106
3107 // For the big types, 32 bit arithmetic is performed
3108
3109 case TYP_INT:
3110 case TYP_UINT:
3111 if (unsTest)
3112 {
3113 constInitX = (unsigned int)constInit;
3114 }
3115 else
3116 {
3117 constInitX = (signed int)constInit;
3118 }
3119 break;
3120
3121 default:
3122 noway_assert(!"Bad type");
3123 NO_WAY("Bad type");
3124 }
3125
3126 /* If iterInc is zero we have an infinite loop */
3127 if (iterInc == 0)
3128 {
3129 return false;
3130 }
3131
3132 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
3133 iterSign = (iterInc > 0) ? +1 : -1;
3134
3135 /* Initialize loopCount to zero */
3136 loopCount = 0;
3137
3138 // If dupCond is true then the loop head contains a test which skips
3139 // this loop, if the constInit does not pass the loop test
3140 // Such a loop can execute zero times.
3141 // If dupCond is false then we have a true do-while loop which we
3142 // always execute the loop once before performing the loop test
3143 if (!dupCond)
3144 {
3145 loopCount += 1;
3146 constInitX += iterInc;
3147 }
3148
3149 // bail if count is based on wrap-around math
3150 if (iterInc > 0)
3151 {
3152 if (constLimitX < constInitX)
3153 {
3154 return false;
3155 }
3156 }
3157 else if (constLimitX > constInitX)
3158 {
3159 return false;
3160 }
3161
3162 /* Compute the number of repetitions */
3163
3164 switch (testOper)
3165 {
3166 __int64 iterAtExitX;
3167
3168 case GT_EQ:
3169 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
3170 return false;
3171
3172 case GT_NE:
3173 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
3174 * have a constant number of iterations or loop forever -
3175 * we have to compute (lim-init) mod iterInc to see if it is zero.
3176 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
3177 * which is probably not what the end user wanted, but it is legal.
3178 */
3179
3180 if (iterInc > 0)
3181 {
3182 /* Stepping by one, i.e. Mod with 1 is always zero */
3183 if (iterInc != 1)
3184 {
3185 if (((constLimitX - constInitX) % iterInc) != 0)
3186 {
3187 return false;
3188 }
3189 }
3190 }
3191 else
3192 {
3193 noway_assert(iterInc < 0);
3194 /* Stepping by -1, i.e. Mod with 1 is always zero */
3195 if (iterInc != -1)
3196 {
3197 if (((constInitX - constLimitX) % (-iterInc)) != 0)
3198 {
3199 return false;
3200 }
3201 }
3202 }
3203
3204 switch (iterOper)
3205 {
3206 case GT_SUB:
3207 iterInc = -iterInc;
3208 __fallthrough;
3209
3210 case GT_ADD:
3211 if (constInitX != constLimitX)
3212 {
3213 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3214 }
3215
3216 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3217
3218 if (unsTest)
3219 {
3220 iterAtExitX = (unsigned)iterAtExitX;
3221 }
3222
3223 // Check if iteration incr will cause overflow for small types
3224 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3225 {
3226 return false;
3227 }
3228
3229 // iterator with 32bit overflow. Bad for TYP_(U)INT
3230 if (iterAtExitX < constLimitX)
3231 {
3232 return false;
3233 }
3234
3235 *iterCount = loopCount;
3236 return true;
3237
3238 case GT_MUL:
3239 case GT_DIV:
3240 case GT_RSH:
3241 case GT_LSH:
3242 case GT_UDIV:
3243 return false;
3244
3245 default:
3246 noway_assert(!"Unknown operator for loop iterator");
3247 return false;
3248 }
3249
3250 case GT_LT:
3251 switch (iterOper)
3252 {
3253 case GT_SUB:
3254 iterInc = -iterInc;
3255 __fallthrough;
3256
3257 case GT_ADD:
3258 if (constInitX < constLimitX)
3259 {
3260 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3261 }
3262
3263 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3264
3265 if (unsTest)
3266 {
3267 iterAtExitX = (unsigned)iterAtExitX;
3268 }
3269
3270 // Check if iteration incr will cause overflow for small types
3271 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3272 {
3273 return false;
3274 }
3275
3276 // iterator with 32bit overflow. Bad for TYP_(U)INT
3277 if (iterAtExitX < constLimitX)
3278 {
3279 return false;
3280 }
3281
3282 *iterCount = loopCount;
3283 return true;
3284
3285 case GT_MUL:
3286 case GT_DIV:
3287 case GT_RSH:
3288 case GT_LSH:
3289 case GT_UDIV:
3290 return false;
3291
3292 default:
3293 noway_assert(!"Unknown operator for loop iterator");
3294 return false;
3295 }
3296
3297 case GT_LE:
3298 switch (iterOper)
3299 {
3300 case GT_SUB:
3301 iterInc = -iterInc;
3302 __fallthrough;
3303
3304 case GT_ADD:
3305 if (constInitX <= constLimitX)
3306 {
3307 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
3308 }
3309
3310 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3311
3312 if (unsTest)
3313 {
3314 iterAtExitX = (unsigned)iterAtExitX;
3315 }
3316
3317 // Check if iteration incr will cause overflow for small types
3318 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3319 {
3320 return false;
3321 }
3322
3323 // iterator with 32bit overflow. Bad for TYP_(U)INT
3324 if (iterAtExitX <= constLimitX)
3325 {
3326 return false;
3327 }
3328
3329 *iterCount = loopCount;
3330 return true;
3331
3332 case GT_MUL:
3333 case GT_DIV:
3334 case GT_RSH:
3335 case GT_LSH:
3336 case GT_UDIV:
3337 return false;
3338
3339 default:
3340 noway_assert(!"Unknown operator for loop iterator");
3341 return false;
3342 }
3343
3344 case GT_GT:
3345 switch (iterOper)
3346 {
3347 case GT_SUB:
3348 iterInc = -iterInc;
3349 __fallthrough;
3350
3351 case GT_ADD:
3352 if (constInitX > constLimitX)
3353 {
3354 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3355 }
3356
3357 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3358
3359 if (unsTest)
3360 {
3361 iterAtExitX = (unsigned)iterAtExitX;
3362 }
3363
3364 // Check if small types will underflow
3365 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3366 {
3367 return false;
3368 }
3369
3370 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3371 if (iterAtExitX > constLimitX)
3372 {
3373 return false;
3374 }
3375
3376 *iterCount = loopCount;
3377 return true;
3378
3379 case GT_MUL:
3380 case GT_DIV:
3381 case GT_RSH:
3382 case GT_LSH:
3383 case GT_UDIV:
3384 return false;
3385
3386 default:
3387 noway_assert(!"Unknown operator for loop iterator");
3388 return false;
3389 }
3390
3391 case GT_GE:
3392 switch (iterOper)
3393 {
3394 case GT_SUB:
3395 iterInc = -iterInc;
3396 __fallthrough;
3397
3398 case GT_ADD:
3399 if (constInitX >= constLimitX)
3400 {
3401 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
3402 }
3403
3404 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3405
3406 if (unsTest)
3407 {
3408 iterAtExitX = (unsigned)iterAtExitX;
3409 }
3410
3411 // Check if small types will underflow
3412 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3413 {
3414 return false;
3415 }
3416
3417 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3418 if (iterAtExitX >= constLimitX)
3419 {
3420 return false;
3421 }
3422
3423 *iterCount = loopCount;
3424 return true;
3425
3426 case GT_MUL:
3427 case GT_DIV:
3428 case GT_RSH:
3429 case GT_LSH:
3430 case GT_UDIV:
3431 return false;
3432
3433 default:
3434 noway_assert(!"Unknown operator for loop iterator");
3435 return false;
3436 }
3437
3438 default:
3439 noway_assert(!"Unknown operator for loop condition");
3440 }
3441
3442 return false;
3443}
3444
3445/*****************************************************************************
3446 *
3447 * Look for loop unrolling candidates and unroll them
3448 */
3449
3450#ifdef _PREFAST_
3451#pragma warning(push)
3452#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3453#endif
3454void Compiler::optUnrollLoops()
3455{
3456 if (compCodeOpt() == SMALL_CODE)
3457 {
3458 return;
3459 }
3460
3461 if (optLoopCount == 0)
3462 {
3463 return;
3464 }
3465
3466#ifdef DEBUG
3467 if (JitConfig.JitNoUnroll())
3468 {
3469 return;
3470 }
3471#endif
3472
3473#ifdef DEBUG
3474 if (verbose)
3475 {
3476 printf("*************** In optUnrollLoops()\n");
3477 }
3478#endif
3479 /* Look for loop unrolling candidates */
3480
3481 bool change = false;
3482
3483 // Visit loops from highest to lowest number to vist them in innermost
3484 // to outermost order
3485 for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum)
3486 {
3487 // This is necessary due to an apparent analysis limitation since
3488 // optLoopCount must be strictly greater than 0 upon entry and lnum
3489 // cannot wrap due to the loop termination condition.
3490 PREFAST_ASSUME(lnum != 0U - 1);
3491
3492 BasicBlock* block;
3493 BasicBlock* head;
3494 BasicBlock* bottom;
3495
3496 GenTree* loop;
3497 GenTree* test;
3498 GenTree* incr;
3499 GenTree* phdr;
3500 GenTree* init;
3501
3502 bool dupCond;
3503 int lval;
3504 int lbeg; // initial value for iterator
3505 int llim; // limit value for iterator
3506 unsigned lvar; // iterator lclVar #
3507 int iterInc; // value to increment the iterator
3508 genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.)
3509 var_types iterOperType; // type result of the oper (for overflow instrs)
3510 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
3511 bool unsTest; // Is the comparison u/int
3512
3513 unsigned loopRetCount; // number of BBJ_RETURN blocks in loop
3514 unsigned totalIter; // total number of iterations in the constant loop
3515 unsigned loopFlags; // actual lpFlags
3516 unsigned requiredFlags; // required lpFlags
3517
3518 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
3519 10, // BLENDED_CODE
3520 0, // SMALL_CODE
3521 20, // FAST_CODE
3522 0 // COUNT_OPT_CODE
3523 };
3524
3525 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
3526 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
3527
3528 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
3529
3530#ifdef DEBUG
3531 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
3532 {
3533 iterLimit *= 10;
3534 }
3535#endif
3536
3537 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
3538 300, // BLENDED_CODE
3539 0, // SMALL_CODE
3540 600, // FAST_CODE
3541 0 // COUNT_OPT_CODE
3542 };
3543
3544 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
3545 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
3546
3547 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
3548
3549 loopFlags = optLoopTable[lnum].lpFlags;
3550 // Check for required flags:
3551 // LPFLG_DO_WHILE - required because this transform only handles loops of this form
3552 // LPFLG_CONST - required because this transform only handles full unrolls
3553 // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons
3554 requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT;
3555
3556#ifdef DEBUG
3557 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
3558 {
3559 // In stress mode, quadruple the size limit, and drop
3560 // the restriction that loop limit must be Vector<T>.Count.
3561
3562 unrollLimitSz *= 4;
3563 requiredFlags &= ~LPFLG_SIMD_LIMIT;
3564 }
3565#endif
3566
3567 /* Ignore the loop if we don't have a do-while
3568 that has a constant number of iterations */
3569
3570 if ((loopFlags & requiredFlags) != requiredFlags)
3571 {
3572 continue;
3573 }
3574
3575 /* ignore if removed or marked as not unrollable */
3576
3577 if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
3578 {
3579 continue;
3580 }
3581
3582 head = optLoopTable[lnum].lpHead;
3583 noway_assert(head);
3584 bottom = optLoopTable[lnum].lpBottom;
3585 noway_assert(bottom);
3586
3587 /* Get the loop data:
3588 - initial constant
3589 - limit constant
3590 - iterator
3591 - iterator increment
3592 - increment operation type (i.e. ADD, SUB, etc...)
3593 - loop test type (i.e. GT_GE, GT_LT, etc...)
3594 */
3595
3596 lbeg = optLoopTable[lnum].lpConstInit;
3597 llim = optLoopTable[lnum].lpConstLimit();
3598 testOper = optLoopTable[lnum].lpTestOper();
3599
3600 lvar = optLoopTable[lnum].lpIterVar();
3601 iterInc = optLoopTable[lnum].lpIterConst();
3602 iterOper = optLoopTable[lnum].lpIterOper();
3603
3604 iterOperType = optLoopTable[lnum].lpIterOperType();
3605 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
3606
3607 if (lvaTable[lvar].lvAddrExposed)
3608 { // If the loop iteration variable is address-exposed then bail
3609 continue;
3610 }
3611 if (lvaTable[lvar].lvIsStructField)
3612 { // If the loop iteration variable is a promoted field from a struct then
3613 // bail
3614 continue;
3615 }
3616
3617 /* Locate the pre-header and initialization and increment/test statements */
3618
3619 phdr = head->bbTreeList;
3620 noway_assert(phdr);
3621 loop = bottom->bbTreeList;
3622 noway_assert(loop);
3623
3624 init = head->lastStmt();
3625 noway_assert(init && (init->gtNext == nullptr));
3626 test = bottom->lastStmt();
3627 noway_assert(test && (test->gtNext == nullptr));
3628 incr = test->gtPrev;
3629 noway_assert(incr);
3630
3631 if (init->gtFlags & GTF_STMT_CMPADD)
3632 {
3633 /* Must be a duplicated loop condition */
3634 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3635
3636 dupCond = true;
3637 init = init->gtPrev;
3638 noway_assert(init);
3639 }
3640 else
3641 {
3642 dupCond = false;
3643 }
3644
3645 /* Find the number of iterations - the function returns false if not a constant number */
3646
3647 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3648 {
3649 continue;
3650 }
3651
3652 /* Forget it if there are too many repetitions or not a constant loop */
3653
3654 if (totalIter > iterLimit)
3655 {
3656 continue;
3657 }
3658
3659 noway_assert(init->gtOper == GT_STMT);
3660 init = init->gtStmt.gtStmtExpr;
3661 noway_assert(test->gtOper == GT_STMT);
3662 test = test->gtStmt.gtStmtExpr;
3663 noway_assert(incr->gtOper == GT_STMT);
3664 incr = incr->gtStmt.gtStmtExpr;
3665
3666 // Don't unroll loops we don't understand.
3667 if (incr->gtOper != GT_ASG)
3668 {
3669 continue;
3670 }
3671 incr = incr->gtOp.gtOp2;
3672
3673 /* Make sure everything looks ok */
3674 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3675 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3676 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3677
3678 !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3679 (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3680 (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3681
3682 (test->gtOper != GT_JTRUE))
3683 {
3684 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3685 continue;
3686 }
3687
3688 /* heuristic - Estimated cost in code size of the unrolled loop */
3689
3690 {
3691 ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration
3692
3693 block = head->bbNext;
3694 auto tryIndex = block->bbTryIndex;
3695
3696 loopRetCount = 0;
3697 for (;; block = block->bbNext)
3698 {
3699 if (block->bbTryIndex != tryIndex)
3700 {
3701 // Unrolling would require cloning EH regions
3702 goto DONE_LOOP;
3703 }
3704
3705 if (block->bbJumpKind == BBJ_RETURN)
3706 {
3707 ++loopRetCount;
3708 }
3709
3710 /* Visit all the statements in the block */
3711
3712 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3713 {
3714 /* Calculate gtCostSz */
3715 gtSetStmtInfo(stmt);
3716
3717 /* Update loopCostSz */
3718 loopCostSz += stmt->gtCostSz;
3719 }
3720
3721 if (block == bottom)
3722 {
3723 break;
3724 }
3725 }
3726
3727#ifdef JIT32_GCENCODER
3728 if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX)
3729 {
3730 // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs.
3731 goto DONE_LOOP;
3732 }
3733#endif // !JIT32_GCENCODER
3734
3735 /* Compute the estimated increase in code size for the unrolled loop */
3736
3737 ClrSafeInt<unsigned> fixedLoopCostSz(8);
3738
3739 ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) -
3740 ClrSafeInt<int>(loopCostSz + fixedLoopCostSz);
3741
3742 /* Don't unroll if too much code duplication would result. */
3743
3744 if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz))
3745 {
3746 goto DONE_LOOP;
3747 }
3748
3749 /* Looks like a good idea to unroll this loop, let's do it! */
3750 CLANG_FORMAT_COMMENT_ANCHOR;
3751
3752#ifdef DEBUG
3753 if (verbose)
3754 {
3755 printf("\nUnrolling loop " FMT_BB, head->bbNext->bbNum);
3756 if (head->bbNext->bbNum != bottom->bbNum)
3757 {
3758 printf(".." FMT_BB, bottom->bbNum);
3759 }
3760 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3761 printf(" unrollCostSz = %d\n", unrollCostSz);
3762 printf("\n");
3763 }
3764#endif
3765 }
3766
3767 /* Create the unrolled loop statement list */
3768 {
3769 BlockToBlockMap blockMap(getAllocator());
3770 BasicBlock* insertAfter = bottom;
3771
3772 for (lval = lbeg; totalIter; totalIter--)
3773 {
3774 for (block = head->bbNext;; block = block->bbNext)
3775 {
3776 BasicBlock* newBlock = insertAfter =
3777 fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true);
3778 blockMap.Set(block, newBlock);
3779
3780 if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval))
3781 {
3782 // cloneExpr doesn't handle everything
3783 BasicBlock* oldBottomNext = insertAfter->bbNext;
3784 bottom->bbNext = oldBottomNext;
3785 oldBottomNext->bbPrev = bottom;
3786 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3787 goto DONE_LOOP;
3788 }
3789 // Block weight should no longer have the loop multiplier
3790 newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
3791 // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
3792 assert(newBlock->bbJumpDest == nullptr);
3793
3794 if (block == bottom)
3795 {
3796 // Remove the test; we're doing a full unroll.
3797
3798 GenTreeStmt* testCopyStmt = newBlock->lastStmt();
3799 GenTree* testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr;
3800 assert(testCopyExpr->gtOper == GT_JTRUE);
3801 GenTree* sideEffList = nullptr;
3802 gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF);
3803 if (sideEffList == nullptr)
3804 {
3805 fgRemoveStmt(newBlock, testCopyStmt);
3806 }
3807 else
3808 {
3809 testCopyStmt->gtStmt.gtStmtExpr = sideEffList;
3810 }
3811 newBlock->bbJumpKind = BBJ_NONE;
3812
3813 // Exit this loop; we've walked all the blocks.
3814 break;
3815 }
3816 }
3817
3818 // Now redirect any branches within the newly-cloned iteration
3819 for (block = head->bbNext; block != bottom; block = block->bbNext)
3820 {
3821 BasicBlock* newBlock = blockMap[block];
3822 optCopyBlkDest(block, newBlock);
3823 optRedirectBlock(newBlock, &blockMap);
3824 }
3825
3826 /* update the new value for the unrolled iterator */
3827
3828 switch (iterOper)
3829 {
3830 case GT_ADD:
3831 lval += iterInc;
3832 break;
3833
3834 case GT_SUB:
3835 lval -= iterInc;
3836 break;
3837
3838 case GT_RSH:
3839 case GT_LSH:
3840 noway_assert(!"Unrolling not implemented for this loop iterator");
3841 goto DONE_LOOP;
3842
3843 default:
3844 noway_assert(!"Unknown operator for constant loop iterator");
3845 goto DONE_LOOP;
3846 }
3847 }
3848
3849 // Gut the old loop body
3850 for (block = head->bbNext;; block = block->bbNext)
3851 {
3852 block->bbTreeList = nullptr;
3853 block->bbJumpKind = BBJ_NONE;
3854 block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD);
3855 if (block->bbJumpDest != nullptr)
3856 {
3857 block->bbJumpDest = nullptr;
3858 }
3859
3860 if (block == bottom)
3861 {
3862 break;
3863 }
3864 }
3865
3866 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3867
3868 if (head->bbJumpKind == BBJ_COND)
3869 {
3870 phdr = head->bbTreeList;
3871 noway_assert(phdr);
3872 test = phdr->gtPrev;
3873
3874 noway_assert(test && (test->gtNext == nullptr));
3875 noway_assert(test->gtOper == GT_STMT);
3876 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3877
3878 init = test->gtPrev;
3879 noway_assert(init && (init->gtNext == test));
3880 noway_assert(init->gtOper == GT_STMT);
3881
3882 init->gtNext = nullptr;
3883 phdr->gtPrev = init;
3884 head->bbJumpKind = BBJ_NONE;
3885 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3886 }
3887 else
3888 {
3889 /* the loop must execute */
3890 noway_assert(head->bbJumpKind == BBJ_NONE);
3891 }
3892
3893#ifdef DEBUG
3894 if (verbose)
3895 {
3896 printf("Whole unrolled loop:\n");
3897
3898 gtDispTree(init);
3899 printf("\n");
3900 fgDumpTrees(head->bbNext, insertAfter);
3901 }
3902#endif
3903
3904 /* Remember that something has changed */
3905
3906 change = true;
3907
3908 /* Make sure to update loop table */
3909
3910 /* Use the LPFLG_REMOVED flag and update the bbLoopMask accordingly
3911 * (also make head and bottom NULL - to hit an assert or GPF) */
3912
3913 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3914 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3915
3916 // Note if we created new BBJ_RETURNs
3917 fgReturnCount += loopRetCount * (totalIter - 1);
3918 }
3919
3920 DONE_LOOP:;
3921 }
3922
3923 if (change)
3924 {
3925 fgUpdateChangedFlowGraph();
3926 }
3927
3928#ifdef DEBUG
3929 fgDebugCheckBBlist(true);
3930#endif
3931}
3932#ifdef _PREFAST_
3933#pragma warning(pop)
3934#endif
3935
3936/*****************************************************************************
3937 *
3938 * Return false if there is a code path from 'topBB' to 'botBB' that might
3939 * not execute a method call.
3940 */
3941
3942bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3943{
3944 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3945 // as some helper calls are neither interruptible nor hijackable.
3946 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3947 // those helpers too.
3948
3949 noway_assert(topBB->bbNum <= botBB->bbNum);
3950
3951 // We can always check topBB and botBB for any gc safe points and early out
3952
3953 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3954 {
3955 return false;
3956 }
3957
3958 // Otherwise we will need to rely upon the dominator sets
3959
3960 if (!fgDomsComputed)
3961 {
3962 // return a conservative answer of true when we don't have the dominator sets
3963 return true;
3964 }
3965
3966 BasicBlock* curBB = topBB;
3967 for (;;)
3968 {
3969 noway_assert(curBB);
3970
3971 // If we added a loop pre-header block then we will
3972 // have a bbNum greater than fgLastBB, and we won't have
3973 // any dominator information about this block, so skip it.
3974 //
3975 if (curBB->bbNum <= fgLastBB->bbNum)
3976 {
3977 noway_assert(curBB->bbNum <= botBB->bbNum);
3978
3979 // Does this block contain a gc safe point?
3980
3981 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3982 {
3983 // Will this block always execute on the way to botBB ?
3984 //
3985 // Since we are checking every block in [topBB .. botBB] and we are using
3986 // a lexical definition of a loop.
3987 // (all that we know is that is that botBB is a back-edge to topBB)
3988 // Thus while walking blocks in this range we may encounter some blocks
3989 // that are not really part of the loop, and so we need to perform
3990 // some additional checks:
3991 //
3992 // We will check that the current 'curBB' is reachable from 'topBB'
3993 // and that it dominates the block containing the back-edge 'botBB'
3994 // When both of these are true then we know that the gcsafe point in 'curBB'
3995 // will be encountered in the loop and we can return false
3996 //
3997 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3998 {
3999 return false;
4000 }
4001 }
4002 else
4003 {
4004 // If we've reached the destination block, then we're done
4005
4006 if (curBB == botBB)
4007 {
4008 break;
4009 }
4010 }
4011 }
4012
4013 curBB = curBB->bbNext;
4014 }
4015
4016 // If we didn't find any blocks that contained a gc safe point and
4017 // also met the fgDominate and fgReachable criteria then we must return true
4018 //
4019 return true;
4020}
4021
4022/*****************************************************************************
4023 *
4024 * Find the loop termination test at the bottom of the loop
4025 */
4026
4027static GenTree* optFindLoopTermTest(BasicBlock* bottom)
4028{
4029 GenTree* testt = bottom->bbTreeList;
4030
4031 assert(testt && testt->gtOper == GT_STMT);
4032
4033 GenTree* result = testt->gtPrev;
4034
4035#ifdef DEBUG
4036 while (testt->gtNext)
4037 {
4038 testt = testt->gtNext;
4039 }
4040
4041 assert(testt == result);
4042#endif
4043
4044 return result;
4045}
4046
4047/*****************************************************************************
4048 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
4049 */
4050
4051void Compiler::fgOptWhileLoop(BasicBlock* block)
4052{
4053 noway_assert(opts.OptimizationEnabled());
4054 noway_assert(compCodeOpt() != SMALL_CODE);
4055
4056 /*
4057 Optimize while loops into do { } while loop
4058 Our loop hoisting logic requires do { } while loops.
4059 Specifically, we're looking for the following case:
4060
4061 ...
4062 jmp test
4063 loop:
4064 ...
4065 ...
4066 test:
4067 cond
4068 jtrue loop
4069
4070 If we find this, and the condition is simple enough, we change
4071 the loop to the following:
4072
4073 ...
4074 cond
4075 jfalse done
4076 // else fall-through
4077 loop:
4078 ...
4079 ...
4080 test:
4081 cond
4082 jtrue loop
4083 done:
4084
4085 */
4086
4087 /* Does the BB end with an unconditional jump? */
4088
4089 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
4090 { // It can't be one of the ones we use for our exception magic
4091 return;
4092 }
4093
4094 // It has to be a forward jump
4095 // TODO-CQ: Check if we can also optimize the backwards jump as well.
4096 //
4097 if (fgIsForwardBranch(block) == false)
4098 {
4099 return;
4100 }
4101
4102 // Get hold of the jump target
4103 BasicBlock* bTest = block->bbJumpDest;
4104
4105 // Does the block consist of 'jtrue(cond) block' ?
4106 if (bTest->bbJumpKind != BBJ_COND)
4107 {
4108 return;
4109 }
4110
4111 // bTest must be a backwards jump to block->bbNext
4112 if (bTest->bbJumpDest != block->bbNext)
4113 {
4114 return;
4115 }
4116
4117 // Since test is a BBJ_COND it will have a bbNext
4118 noway_assert(bTest->bbNext);
4119
4120 // 'block' must be in the same try region as the condition, since we're going to insert
4121 // a duplicated condition in 'block', and the condition might include exception throwing code.
4122 if (!BasicBlock::sameTryRegion(block, bTest))
4123 {
4124 return;
4125 }
4126
4127 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
4128 // same try region (or no try region) to avoid generating illegal flow.
4129 BasicBlock* bTestNext = bTest->bbNext;
4130 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
4131 {
4132 return;
4133 }
4134
4135 GenTree* condStmt = optFindLoopTermTest(bTest);
4136
4137 // bTest must only contain only a jtrue with no other stmts, we will only clone
4138 // the conditional, so any other statements will not get cloned
4139 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
4140 //
4141 if (bTest->bbTreeList != condStmt)
4142 {
4143 return;
4144 }
4145
4146 /* Get to the condition node from the statement tree */
4147
4148 noway_assert(condStmt->gtOper == GT_STMT);
4149
4150 GenTree* condTree = condStmt->gtStmt.gtStmtExpr;
4151 noway_assert(condTree->gtOper == GT_JTRUE);
4152
4153 condTree = condTree->gtOp.gtOp1;
4154
4155 // The condTree has to be a RelOp comparison
4156 // TODO-CQ: Check if we can also optimize the backwards jump as well.
4157 //
4158 if (condTree->OperIsCompare() == false)
4159 {
4160 return;
4161 }
4162
4163 /* We call gtPrepareCost to measure the cost of duplicating this tree */
4164
4165 gtPrepareCost(condTree);
4166 unsigned estDupCostSz = condTree->gtCostSz;
4167
4168 double loopIterations = (double)BB_LOOP_WEIGHT;
4169
4170 bool allProfileWeightsAreValid = false;
4171 BasicBlock::weight_t weightBlock = block->bbWeight;
4172 BasicBlock::weight_t weightTest = bTest->bbWeight;
4173 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
4174
4175 // If we have profile data then we calculate the number of time
4176 // the loop will iterate into loopIterations
4177 if (fgIsUsingProfileWeights())
4178 {
4179 // Only rely upon the profile weight when all three of these blocks
4180 // have good profile weights
4181 if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
4182 {
4183 allProfileWeightsAreValid = true;
4184
4185 // If this while loop never iterates then don't bother transforming
4186 if (weightNext == 0)
4187 {
4188 return;
4189 }
4190
4191 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
4192 // if the profile weights are all valid.
4193 //
4194 // weightNext is the number of time this loop iterates
4195 // weightBlock is the number of times that we enter the while loop
4196 // loopIterations is the average number of times that this loop iterates
4197 //
4198 if (weightTest >= weightBlock)
4199 {
4200 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
4201 }
4202 }
4203 }
4204
4205 unsigned maxDupCostSz = 32;
4206
4207 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
4208 // set loop weights yet
4209 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
4210 {
4211 maxDupCostSz *= 4;
4212 }
4213
4214 // If this loop iterates a lot then raise the maxDupCost
4215 if (loopIterations >= 12.0)
4216 {
4217 maxDupCostSz *= 2;
4218 }
4219 if (loopIterations >= 96.0)
4220 {
4221 maxDupCostSz *= 2;
4222 }
4223
4224 // If the loop condition has a shared static helper, we really want this loop converted
4225 // as not converting the loop will disable loop hoisting, meaning the shared helper will
4226 // be executed on every loop iteration.
4227 int countOfHelpers = 0;
4228 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
4229
4230 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
4231 {
4232 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
4233 }
4234
4235 // If the compare has too high cost then we don't want to dup
4236
4237 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
4238
4239#ifdef DEBUG
4240 if (verbose)
4241 {
4242 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
4243 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
4244 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
4245 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
4246 allProfileWeightsAreValid ? "true" : "false");
4247 }
4248#endif
4249
4250 if (costIsTooHigh)
4251 {
4252 return;
4253 }
4254
4255 /* Looks good - duplicate the condition test */
4256
4257 condTree->gtFlags |= GTF_RELOP_ZTT;
4258
4259 condTree = gtCloneExpr(condTree);
4260 gtReverseCond(condTree);
4261
4262 // Make sure clone expr copied the flag
4263 assert(condTree->gtFlags & GTF_RELOP_ZTT);
4264
4265 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
4266
4267 /* Create a statement entry out of the condition and
4268 append the condition test at the end of 'block' */
4269
4270 GenTree* copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
4271
4272 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
4273
4274 if (opts.compDbgInfo)
4275 {
4276 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
4277 }
4278
4279 // Flag the block that received the copy as potentially having an array/vtable
4280 // reference if the block copied from did; this is a conservative guess.
4281 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
4282 {
4283 block->bbFlags |= copyFlags;
4284 }
4285
4286 // If we have profile data for all blocks and we know that we are cloning the
4287 // bTest block into block and thus changing the control flow from block so
4288 // that it no longer goes directly to bTest anymore, we have to adjust the
4289 // weight of bTest by subtracting out the weight of block.
4290 //
4291 if (allProfileWeightsAreValid)
4292 {
4293 //
4294 // Some additional sanity checks before adjusting the weight of bTest
4295 //
4296 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
4297 {
4298 // Get the two edge that flow out of bTest
4299 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
4300 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
4301
4302 // Calculate the new weight for block bTest
4303
4304 BasicBlock::weight_t newWeightTest =
4305 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
4306 bTest->bbWeight = newWeightTest;
4307
4308 if (newWeightTest == BB_ZERO_WEIGHT)
4309 {
4310 bTest->bbFlags |= BBF_RUN_RARELY;
4311 // All out edge weights are set to zero
4312 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4313 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
4314 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4315 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
4316 }
4317 else
4318 {
4319 // Update the our edge weights
4320 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4321 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
4322 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4323 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
4324 }
4325 }
4326 }
4327
4328 /* Change the block to end with a conditional jump */
4329
4330 block->bbJumpKind = BBJ_COND;
4331 block->bbJumpDest = bTest->bbNext;
4332
4333 /* Mark the jump dest block as being a jump target */
4334 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
4335
4336 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
4337
4338 fgAddRefPred(block->bbNext, block);
4339
4340 fgRemoveRefPred(bTest, block);
4341 fgAddRefPred(bTest->bbNext, block);
4342
4343#ifdef DEBUG
4344 if (verbose)
4345 {
4346 printf("\nDuplicating loop condition in " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")", block->bbNum,
4347 block->bbNext->bbNum, bTest->bbNum);
4348 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
4349
4350 gtDispTree(copyOfCondStmt);
4351 }
4352
4353#endif
4354}
4355
4356/*****************************************************************************
4357 *
4358 * Optimize the BasicBlock layout of the method
4359 */
4360
4361void Compiler::optOptimizeLayout()
4362{
4363 noway_assert(opts.OptimizationEnabled());
4364
4365#ifdef DEBUG
4366 if (verbose)
4367 {
4368 printf("*************** In optOptimizeLayout()\n");
4369 fgDispHandlerTab();
4370 }
4371
4372 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
4373 fgDebugCheckBBlist();
4374#endif
4375
4376 noway_assert(fgModified == false);
4377
4378 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
4379 {
4380 /* Make sure the appropriate fields are initialized */
4381
4382 if (block->bbWeight == BB_ZERO_WEIGHT)
4383 {
4384 /* Zero weighted block can't have a LOOP_HEAD flag */
4385 noway_assert(block->isLoopHead() == false);
4386 continue;
4387 }
4388
4389 assert(block->bbLoopNum == 0);
4390
4391 if (compCodeOpt() != SMALL_CODE)
4392 {
4393 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
4394
4395 fgOptWhileLoop(block);
4396 }
4397 }
4398
4399 if (fgModified)
4400 {
4401 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
4402 fgComputeEdgeWeights();
4403 }
4404
4405 fgUpdateFlowGraph(true);
4406 fgReorderBlocks();
4407 fgUpdateFlowGraph();
4408}
4409
4410/*****************************************************************************
4411 *
4412 * Perform loop inversion, find and classify natural loops
4413 */
4414
4415void Compiler::optOptimizeLoops()
4416{
4417 noway_assert(opts.OptimizationEnabled());
4418
4419#ifdef DEBUG
4420 if (verbose)
4421 {
4422 printf("*************** In optOptimizeLoops()\n");
4423 }
4424#endif
4425
4426 optSetBlockWeights();
4427
4428 /* Were there any loops in the flow graph? */
4429
4430 if (fgHasLoops)
4431 {
4432 /* now that we have dominator information we can find loops */
4433
4434 optFindNaturalLoops();
4435
4436 unsigned loopNum = 0;
4437
4438 /* Iterate over the flow graph, marking all loops */
4439
4440 /* We will use the following terminology:
4441 * top - the first basic block in the loop (i.e. the head of the backward edge)
4442 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
4443 * lastBottom - used when we have multiple back-edges to the same top
4444 */
4445
4446 flowList* pred;
4447
4448 BasicBlock* top;
4449
4450 for (top = fgFirstBB; top; top = top->bbNext)
4451 {
4452 BasicBlock* foundBottom = nullptr;
4453
4454 for (pred = top->bbPreds; pred; pred = pred->flNext)
4455 {
4456 /* Is this a loop candidate? - We look for "back edges" */
4457
4458 BasicBlock* bottom = pred->flBlock;
4459
4460 /* is this a backward edge? (from BOTTOM to TOP) */
4461
4462 if (top->bbNum > bottom->bbNum)
4463 {
4464 continue;
4465 }
4466
4467 /* 'top' also must have the BBF_LOOP_HEAD flag set */
4468
4469 if (top->isLoopHead() == false)
4470 {
4471 continue;
4472 }
4473
4474 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
4475
4476 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
4477 {
4478 continue;
4479 }
4480
4481 /* the top block must be able to reach the bottom block */
4482 if (!fgReachable(top, bottom))
4483 {
4484 continue;
4485 }
4486
4487 /* Found a new loop, record the longest backedge in foundBottom */
4488
4489 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
4490 {
4491 foundBottom = bottom;
4492 }
4493 }
4494
4495 if (foundBottom)
4496 {
4497 loopNum++;
4498#ifdef DEBUG
4499 /* Mark the loop header as such */
4500 assert(FitsIn<unsigned char>(loopNum));
4501 top->bbLoopNum = (unsigned char)loopNum;
4502#endif
4503
4504 /* Mark all blocks between 'top' and 'bottom' */
4505
4506 optMarkLoopBlocks(top, foundBottom, false);
4507 }
4508
4509 // We track at most 255 loops
4510 if (loopNum == 255)
4511 {
4512#if COUNT_LOOPS
4513 totalUnnatLoopOverflows++;
4514#endif
4515 break;
4516 }
4517 }
4518
4519#if COUNT_LOOPS
4520 totalUnnatLoopCount += loopNum;
4521#endif
4522
4523#ifdef DEBUG
4524 if (verbose)
4525 {
4526 if (loopNum > 0)
4527 {
4528 printf("\nFound a total of %d loops.", loopNum);
4529 printf("\nAfter loop weight marking:\n");
4530 fgDispBasicBlocks();
4531 printf("\n");
4532 }
4533 }
4534#endif
4535 optLoopsMarked = true;
4536 }
4537}
4538
4539//------------------------------------------------------------------------
4540// optDeriveLoopCloningConditions: Derive loop cloning conditions.
4541//
4542// Arguments:
4543// loopNum - the current loop index for which conditions are derived.
4544// context - data structure where all loop cloning info is kept.
4545//
4546// Return Value:
4547// "false" if conditions cannot be obtained. "true" otherwise.
4548// The cloning conditions are updated in the "conditions"[loopNum] field
4549// of the "context" parameter.
4550//
4551// Operation:
4552// Inspect the loop cloning optimization candidates and populate the conditions necessary
4553// for each optimization candidate. Checks if the loop stride is "> 0" if the loop
4554// condition is "less than". If the initializer is "var" init then adds condition
4555// "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
4556// are added to "context". These conditions are checked in the pre-header block
4557// and the cloning choice is made.
4558//
4559// Assumption:
4560// Callers should assume AND operation is used i.e., if all conditions are
4561// true, then take the fast path.
4562//
4563bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
4564{
4565 JITDUMP("------------------------------------------------------------\n");
4566 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
4567
4568 LoopDsc* loop = &optLoopTable[loopNum];
4569 JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4570
4571 if (loop->lpTestOper() == GT_LT)
4572 {
4573 // Stride conditions
4574 if (loop->lpIterConst() <= 0)
4575 {
4576 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
4577 return false;
4578 }
4579
4580 // Init conditions
4581 if (loop->lpFlags & LPFLG_CONST_INIT)
4582 {
4583 // Only allowing const init at this time.
4584 if (loop->lpConstInit < 0)
4585 {
4586 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
4587 return false;
4588 }
4589 }
4590 else if (loop->lpFlags & LPFLG_VAR_INIT)
4591 {
4592 // limitVar >= 0
4593 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
4594 LC_Expr(LC_Ident(0, LC_Ident::Const)));
4595 context->EnsureConditions(loopNum)->Push(geZero);
4596 }
4597 else
4598 {
4599 JITDUMP("> Not variable init\n");
4600 return false;
4601 }
4602
4603 // Limit Conditions
4604 LC_Ident ident;
4605 if (loop->lpFlags & LPFLG_CONST_LIMIT)
4606 {
4607 int limit = loop->lpConstLimit();
4608 if (limit < 0)
4609 {
4610 JITDUMP("> limit %d is invalid\n", limit);
4611 return false;
4612 }
4613 ident = LC_Ident(static_cast<unsigned>(limit), LC_Ident::Const);
4614 }
4615 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
4616 {
4617 unsigned limitLcl = loop->lpVarLimit();
4618 ident = LC_Ident(limitLcl, LC_Ident::Var);
4619
4620 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
4621
4622 context->EnsureConditions(loopNum)->Push(geZero);
4623 }
4624 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4625 {
4626 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
4627 if (!loop->lpArrLenLimit(this, index))
4628 {
4629 JITDUMP("> ArrLen not matching");
4630 return false;
4631 }
4632 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4633
4634 // Ensure that this array must be dereference-able, before executing the actual condition.
4635 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4636 context->EnsureDerefs(loopNum)->Push(array);
4637 }
4638 else
4639 {
4640 JITDUMP("> Undetected limit\n");
4641 return false;
4642 }
4643
4644 for (unsigned i = 0; i < optInfos->Size(); ++i)
4645 {
4646 LcOptInfo* optInfo = optInfos->GetRef(i);
4647 switch (optInfo->GetOptType())
4648 {
4649 case LcOptInfo::LcJaggedArray:
4650 {
4651 // limit <= arrLen
4652 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4653 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4654 LC_Ident arrLenIdent = LC_Ident(arrLen);
4655
4656 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4657 context->EnsureConditions(loopNum)->Push(cond);
4658
4659 // Ensure that this array must be dereference-able, before executing the actual condition.
4660 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4661 context->EnsureDerefs(loopNum)->Push(array);
4662 }
4663 break;
4664 case LcOptInfo::LcMdArray:
4665 {
4666 // limit <= mdArrLen
4667 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4668 LC_Condition cond(GT_LE, LC_Expr(ident),
4669 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4670 mdArrInfo->GetArrIndexForDim(getAllocator()),
4671 mdArrInfo->dim, LC_Array::None))));
4672 context->EnsureConditions(loopNum)->Push(cond);
4673 }
4674 break;
4675
4676 default:
4677 JITDUMP("Unknown opt\n");
4678 return false;
4679 }
4680 }
4681 JITDUMP("Conditions: (");
4682 DBEXEC(verbose, context->PrintConditions(loopNum));
4683 JITDUMP(")\n");
4684 return true;
4685 }
4686 return false;
4687}
4688
4689//------------------------------------------------------------------------------------
4690// optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4691//
4692// Arguments:
4693// loopNum - the current loop index for which conditions are derived.
4694// context - data structure where all loop cloning info is kept.
4695//
4696// Return Value:
4697// "false" if conditions cannot be obtained. "true" otherwise.
4698// The deref conditions are updated in the "derefConditions"[loopNum] field
4699// of the "context" parameter.
4700//
4701// Definition of Deref Conditions:
4702// To be able to check for the loop cloning condition that (limitVar <= a.len)
4703// we should first be able to dereference "a". i.e., "a" is non-null.
4704//
4705// Example:
4706//
4707// for (i in 0..n)
4708// for (j in 0..n)
4709// for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4710// // (n <= a[i][j].len) and other safer conditions to take the fast path
4711// a[i][j][k] = 0;
4712//
4713// Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4714// This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4715// be true to do the deref.
4716//
4717// (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4718//
4719// Note the short circuiting AND. Implication: these conditions should be performed in separate
4720// blocks each of which will branch to slow path if the condition evaluates to false.
4721//
4722// Now, imagine a situation where we have
4723// a[x][y][k] = 20 and a[i][j][k] = 0
4724// also in the inner most loop where x, y are parameters, then our conditions will have
4725// to include
4726// (x < a.len) &&
4727// (y < a[x].len)
4728// in addition to the above conditions (1) to get rid of bounds check on index 'k'
4729//
4730// But these conditions can be checked together with conditions
4731// (i < a.len) without a need for a separate block. In summary, the conditions will be:
4732//
4733// (a != null) &&
4734// ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4735// (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4736// (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4737// (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4738//
4739// This naturally yields a tree style pattern, where the nodes of the tree are
4740// the array and indices respectively.
4741//
4742// Example:
4743// a => {
4744// i => {
4745// j => {
4746// k => {}
4747// }
4748// },
4749// x => {
4750// y => {
4751// k => {}
4752// }
4753// }
4754// }
4755//
4756// Notice that the variables in the same levels can have their conditions combined in the
4757// same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4758// combined with a short-circuiting AND (i.e., different basic blocks).
4759//
4760// Operation:
4761// Construct a tree of array indices and the array which will generate the optimal
4762// conditions for loop cloning.
4763//
4764// a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4765//
4766// a => {
4767// i => {
4768// j => {
4769// k => {}
4770// },
4771// y => {
4772// k => {}
4773// },
4774// }
4775// },
4776// b => {
4777// i => {}
4778// }
4779// In this method, we will construct such a tree by descending depth first into the array
4780// index operation and forming a tree structure as we encounter the array or the index variables.
4781//
4782// This tree structure will then be used to generate conditions like below:
4783// (a != null) & (b != null) && // from the first level of the tree.
4784//
4785// (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4786// (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4787//
4788// (j < a[i].len) & (y < a[i].len) && // from the third level.
4789// (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4790//
4791// and so on.
4792//
4793//
4794bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4795{
4796 JitExpandArrayStack<LC_Deref*> nodes(getAllocator());
4797 int maxRank = -1;
4798
4799 // Get the dereference-able arrays.
4800 JitExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4801
4802 // For each array in the dereference list, construct a tree,
4803 // where the nodes are array and index variables and an edge 'u-v'
4804 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4805 // 'u-v-w' transitively if u[v][w] occurs.
4806 for (unsigned i = 0; i < deref->Size(); ++i)
4807 {
4808 LC_Array& array = (*deref)[i];
4809
4810 // First populate the array base variable.
4811 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4812 if (node == nullptr)
4813 {
4814 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4815 nodes.Push(node);
4816 }
4817
4818 // For each dimension (level) for the array, populate the tree with the variable
4819 // from that dimension.
4820 unsigned rank = (unsigned)array.GetDimRank();
4821 for (unsigned i = 0; i < rank; ++i)
4822 {
4823 node->EnsureChildren(getAllocator());
4824 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4825 if (tmp == nullptr)
4826 {
4827 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4828 node->children->Push(tmp);
4829 }
4830
4831 // Descend one level down.
4832 node = tmp;
4833 }
4834
4835 // Keep the maxRank of all array dereferences.
4836 maxRank = max((int)rank, maxRank);
4837 }
4838
4839#ifdef DEBUG
4840 if (verbose)
4841 {
4842 for (unsigned i = 0; i < nodes.Size(); ++i)
4843 {
4844 if (i != 0)
4845 {
4846 printf(",");
4847 }
4848 nodes[i]->Print();
4849 printf("\n");
4850 }
4851 }
4852#endif
4853
4854 if (maxRank == -1)
4855 {
4856 return false;
4857 }
4858
4859 // First level will always yield the null-check, since it is made of the array base variables.
4860 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4861 // So add 1 after rank * 2.
4862 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4863
4864 // Heuristic to not create too many blocks;
4865 if (condBlocks > 4)
4866 {
4867 return false;
4868 }
4869
4870 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4871 JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond =
4872 context->EnsureBlockConditions(loopNum, condBlocks);
4873 for (unsigned i = 0; i < nodes.Size(); ++i)
4874 {
4875 nodes[i]->DeriveLevelConditions(levelCond);
4876 }
4877
4878 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4879 return true;
4880}
4881
4882#ifdef DEBUG
4883//----------------------------------------------------------------------------
4884// optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4885//
4886// Arguments:
4887// block - the block in which the helper call needs to be inserted.
4888// insertBefore - the tree before which the helper call will be inserted.
4889//
4890void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTree* insertBefore)
4891{
4892 if (JitConfig.JitDebugLogLoopCloning() == 0)
4893 {
4894 return;
4895 }
4896 GenTree* logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4897 GenTree* stmt = fgNewStmtFromTree(logCall);
4898 fgInsertStmtBefore(block, insertBefore, stmt);
4899 fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning"));
4900}
4901#endif
4902
4903//------------------------------------------------------------------------
4904// optPerformStaticOptimizations: Perform the optimizations for the optimization
4905// candidates gathered during the cloning phase.
4906//
4907// Arguments:
4908// loopNum - the current loop index for which the optimizations are performed.
4909// context - data structure where all loop cloning info is kept.
4910// dynamicPath - If true, the optimization is performed in the fast path among the
4911// cloned loops. If false, it means this is the only path (i.e.,
4912// there is no slow path.)
4913//
4914// Operation:
4915// Perform the optimizations on the fast path i.e., the path in which the
4916// optimization candidates were collected at the time of identifying them.
4917// The candidates store all the information necessary (the tree/stmt/block
4918// they are from) to perform the optimization.
4919//
4920// Assumption:
4921// The unoptimized path is either already cloned when this method is called or
4922// there is no unoptimized path (got eliminated statically.) So this method
4923// performs the optimizations assuming that the path in which the candidates
4924// were collected is the fast path in which the optimizations will be performed.
4925//
4926void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4927{
4928 JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4929 for (unsigned i = 0; i < optInfos->Size(); ++i)
4930 {
4931 LcOptInfo* optInfo = optInfos->GetRef(i);
4932 switch (optInfo->GetOptType())
4933 {
4934 case LcOptInfo::LcJaggedArray:
4935 {
4936 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4937 compCurBB = arrIndexInfo->arrIndex.useBlock;
4938 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt);
4939 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4940 }
4941 break;
4942 case LcOptInfo::LcMdArray:
4943 // TODO-CQ: CLONE: Implement.
4944 break;
4945 default:
4946 break;
4947 }
4948 }
4949}
4950
4951//----------------------------------------------------------------------------
4952// optCanCloneLoops: Use the environment flag to determine whether loop
4953// cloning is allowed to be performed.
4954//
4955// Return Value:
4956// Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4957// Disabled for retail for now.
4958//
4959bool Compiler::optCanCloneLoops()
4960{
4961 // Enabled for retail builds now.
4962 unsigned cloneLoopsFlag = 1;
4963#ifdef DEBUG
4964 cloneLoopsFlag = JitConfig.JitCloneLoops();
4965#endif
4966 return (cloneLoopsFlag != 0);
4967}
4968
4969//----------------------------------------------------------------------------
4970// optIsLoopClonable: Determine whether this loop can be cloned.
4971//
4972// Arguments:
4973// loopInd loop index which needs to be checked if it can be cloned.
4974//
4975// Return Value:
4976// Returns true if the loop can be cloned. If it returns false
4977// prints a message in debug as why the loop can't be cloned.
4978//
4979bool Compiler::optIsLoopClonable(unsigned loopInd)
4980{
4981 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4982 // inserting new EH regions in the exception table yet.
4983 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4984 unsigned loopRetCount = 0;
4985 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4986 {
4987 if (blk->bbJumpKind == BBJ_RETURN)
4988 {
4989 loopRetCount++;
4990 }
4991 if (bbIsTryBeg(blk))
4992 {
4993 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4994 return false;
4995 }
4996 }
4997
4998 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4999 // into the middle of a handler (to go to the cloned copy.) Reject.
5000 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
5001 {
5002 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
5003 return false;
5004 }
5005
5006 // If the head and entry are in different EH regions, reject.
5007 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
5008 {
5009 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
5010 return false;
5011 }
5012
5013 // Is the first block after the last block of the loop a handler or filter start?
5014 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
5015 // and go to where the original loop did. That raises problems when we don't actually go to
5016 // that block; this is one of those cases. This could be fixed fairly easily; for example,
5017 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
5018 // loop. This is just a corner to cut to get this working faster.
5019 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
5020 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
5021 {
5022 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
5023 return false;
5024 }
5025
5026 // We've previously made a decision whether to have separate return epilogs, or branch to one.
5027 // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate
5028 // epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited
5029 // (or return blocks should not be considered part of the loop, rendering this issue moot).
5030 unsigned epilogLimit = 4;
5031#ifdef JIT32_GCENCODER
5032 epilogLimit = SET_EPILOGCNT_MAX;
5033#endif // JIT32_GCENCODER
5034 if (fgReturnCount + loopRetCount > epilogLimit)
5035 {
5036 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
5037 "would exceed the limit of %d.\n",
5038 loopRetCount, fgReturnCount, epilogLimit);
5039 return false;
5040 }
5041
5042 // Otherwise, we're going to add those return blocks.
5043 fgReturnCount += loopRetCount;
5044
5045 return true;
5046}
5047
5048/*****************************************************************************
5049 *
5050 * Identify loop cloning opportunities, derive loop cloning conditions,
5051 * perform loop cloning, use the derived conditions to choose which
5052 * path to take.
5053 */
5054void Compiler::optCloneLoops()
5055{
5056 JITDUMP("\n*************** In optCloneLoops()\n");
5057 if (optLoopCount == 0 || !optCanCloneLoops())
5058 {
5059 return;
5060 }
5061
5062#ifdef DEBUG
5063 if (verbose)
5064 {
5065 printf("Blocks/Trees at start of phase\n");
5066 fgDispBasicBlocks(true);
5067 }
5068#endif
5069
5070 LoopCloneContext context(optLoopCount, getAllocator());
5071
5072 // Obtain array optimization candidates in the context.
5073 optObtainLoopCloningOpts(&context);
5074
5075 // For each loop, derive cloning conditions for the optimization candidates.
5076 for (unsigned i = 0; i < optLoopCount; ++i)
5077 {
5078 JitExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
5079 if (optInfos == nullptr)
5080 {
5081 continue;
5082 }
5083
5084 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
5085 {
5086 JITDUMP("> Conditions could not be obtained\n");
5087 context.CancelLoopOptInfo(i);
5088 }
5089 else
5090 {
5091 bool allTrue = false;
5092 bool anyFalse = false;
5093 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
5094 if (anyFalse)
5095 {
5096 context.CancelLoopOptInfo(i);
5097 }
5098 if (allTrue)
5099 {
5100 // Perform static optimizations on the fast path since we always
5101 // have to take the cloned path.
5102 optPerformStaticOptimizations(i, &context DEBUGARG(false));
5103
5104 // No need to clone.
5105 context.CancelLoopOptInfo(i);
5106 }
5107 }
5108 }
5109
5110#if 0
5111 // The code in this #if has been useful in debugging loop cloning issues, by
5112 // enabling selective enablement of the loop cloning optimization according to
5113 // method hash.
5114#ifdef DEBUG
5115 unsigned methHash = info.compMethodHash();
5116 char* lostr = getenv("loopclonehashlo");
5117 unsigned methHashLo = 0;
5118 if (lostr != NULL)
5119 {
5120 sscanf_s(lostr, "%x", &methHashLo);
5121 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5122 }
5123 char* histr = getenv("loopclonehashhi");
5124 unsigned methHashHi = UINT32_MAX;
5125 if (histr != NULL)
5126 {
5127 sscanf_s(histr, "%x", &methHashHi);
5128 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5129 }
5130 if (methHash < methHashLo || methHash > methHashHi)
5131 return;
5132#endif
5133#endif
5134
5135 for (unsigned i = 0; i < optLoopCount; ++i)
5136 {
5137 if (context.GetLoopOptInfo(i) != nullptr)
5138 {
5139 optLoopsCloned++;
5140 context.OptimizeConditions(i DEBUGARG(verbose));
5141 context.OptimizeBlockConditions(i DEBUGARG(verbose));
5142 optCloneLoop(i, &context);
5143 }
5144 }
5145
5146#ifdef DEBUG
5147 if (verbose)
5148 {
5149 printf("\nAfter loop cloning:\n");
5150 fgDispBasicBlocks(/*dumpTrees*/ true);
5151 }
5152#endif
5153}
5154
5155void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
5156{
5157 assert(loopInd < optLoopCount);
5158
5159 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
5160 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
5161 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
5162
5163 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
5164 unsigned depth = optLoopDepth(loopInd);
5165 unsigned ambientWeight = 1;
5166 for (unsigned j = 0; j < depth; j++)
5167 {
5168 unsigned lastWeight = ambientWeight;
5169 ambientWeight *= BB_LOOP_WEIGHT;
5170 // If the multiplication overflowed, stick at max.
5171 // (Strictly speaking, a multiplication could overflow and still have a result
5172 // that is >= lastWeight...but if so, the original weight must be pretty large,
5173 // and it got bigger, so that's OK.)
5174 if (ambientWeight < lastWeight)
5175 {
5176 ambientWeight = BB_MAX_WEIGHT;
5177 break;
5178 }
5179 }
5180
5181 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
5182 // Be safe by taking the max with the head block's weight.
5183 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
5184
5185 // This is the containing loop, if any -- to label any blocks we create that are outside
5186 // the loop being cloned.
5187 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
5188
5189 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
5190 optEnsureUniqueHead(loopInd, ambientWeight);
5191
5192 // We're going to make
5193
5194 // H --> E
5195 // F
5196 // T
5197 // E
5198 // B ?-> T
5199 // X
5200 //
5201 // become
5202 //
5203 // H ?-> E2
5204 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
5205 // F
5206 // T
5207 // E
5208 // B ?-> T
5209 // X2--> X
5210 // F2
5211 // T2
5212 // E2
5213 // B2 ?-> T2
5214 // X
5215
5216 BasicBlock* h = optLoopTable[loopInd].lpHead;
5217 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
5218 {
5219 // Make a new block to be the unique entry to the loop.
5220 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
5221 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
5222 /*extendRegion*/ true);
5223 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
5224 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
5225 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5226 newH->bbNatLoopNum = ambientLoop;
5227 h = newH;
5228 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
5229 }
5230
5231 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
5232 // "newPred" will be the predecessor of the blocks of the cloned loop.
5233 BasicBlock* b = optLoopTable[loopInd].lpBottom;
5234 BasicBlock* newPred = b;
5235 if (b->bbJumpKind != BBJ_ALWAYS)
5236 {
5237 BasicBlock* x = b->bbNext;
5238 if (x != nullptr)
5239 {
5240 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
5241 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
5242
5243 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5244 x2->bbNatLoopNum = ambientLoop;
5245
5246 x2->bbJumpDest = x;
5247 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
5248 newPred = x2;
5249 }
5250 }
5251
5252 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
5253 // so that "h" already falls through to "e" (e == t == f).
5254 BasicBlock* h2 = nullptr;
5255 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
5256 {
5257 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
5258 /*extendRegion*/ true);
5259 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
5260
5261 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5262 h2->bbNatLoopNum = ambientLoop;
5263
5264 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
5265 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5266 }
5267
5268 // Now we'll clone the blocks of the loop body.
5269 BasicBlock* newFirst = nullptr;
5270 BasicBlock* newBot = nullptr;
5271
5272 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
5273 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5274 blk = blk->bbNext)
5275 {
5276 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
5277 /*extendRegion*/ true);
5278
5279 // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
5280 // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
5281 // checked them to guarantee they are clonable.
5282 bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
5283 noway_assert(cloneOk);
5284 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
5285 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
5286 // loop, if one exists -- the parent of the loop we're cloning.
5287 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5288
5289 if (newFirst == nullptr)
5290 {
5291 newFirst = newBlk;
5292 }
5293 newBot = newBlk; // Continually overwrite to make sure we get the last one.
5294 newPred = newBlk;
5295 blockMap->Set(blk, newBlk);
5296 }
5297
5298 // Perform the static optimizations on the fast path.
5299 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
5300
5301 // Now go through the new blocks, remapping their jump targets within the loop.
5302 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5303 blk = blk->bbNext)
5304 {
5305
5306 BasicBlock* newblk = nullptr;
5307 bool b = blockMap->Lookup(blk, &newblk);
5308 assert(b && newblk != nullptr);
5309
5310 assert(blk->bbJumpKind == newblk->bbJumpKind);
5311
5312 // First copy the jump destination(s) from "blk".
5313 optCopyBlkDest(blk, newblk);
5314
5315 // Now redirect the new block according to "blockMap".
5316 optRedirectBlock(newblk, blockMap);
5317 }
5318
5319 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
5320 (h->bbJumpKind == BBJ_ALWAYS));
5321
5322 // If all the conditions are true, go to E2.
5323 BasicBlock* e2 = nullptr;
5324 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
5325
5326 h->bbJumpKind = BBJ_COND;
5327
5328 // We will create the following structure
5329 //
5330 // cond0 (in h) -?> cond1
5331 // slow --> e2 (slow) always
5332 // !cond1 -?> slow
5333 // !cond2 -?> slow
5334 // ...
5335 // !condn -?> slow
5336 // h2/entry (fast)
5337 //
5338 // We should always have block conditions, at the minimum, the array should be deref-able
5339 assert(context->HasBlockConditions(loopInd));
5340
5341 // Create a unique header for the slow path.
5342 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
5343 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
5344 slowHead->bbNatLoopNum = ambientLoop;
5345 slowHead->bbJumpDest = e2;
5346
5347 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
5348 condLast->bbJumpDest = slowHead;
5349
5350 // If h2 is present it is already the head or replace 'h' by 'condLast'.
5351 if (h2 == nullptr)
5352 {
5353 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
5354 }
5355 assert(foundIt && e2 != nullptr);
5356
5357 // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to
5358 // initialize the loop counter immediately before entering the loop, but we've left a shared
5359 // initialization of the loop counter up above the test that determines which version of the
5360 // loop to take.
5361 optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL;
5362
5363 fgUpdateChangedFlowGraph();
5364}
5365
5366//--------------------------------------------------------------------------------------------------
5367// optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
5368//
5369// Arguments:
5370// context loop cloning context variable
5371// loopNum the loop index
5372// head loop head for "loopNum"
5373// slowHead the slow path loop head
5374//
5375// Return Values:
5376// None.
5377//
5378// Operation:
5379// Create the following structure.
5380//
5381// Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
5382// condn cannot jtrue to loop head h2. It has to be from a direct pred block.
5383//
5384// cond0 (in h) -?> cond1
5385// slowHead --> e2 (slowHead) always
5386// !cond1 -?> slowHead
5387// !cond2 -?> slowHead
5388// ...
5389// !condn -?> slowHead
5390// h2/entry (fast)
5391//
5392// Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
5393//
5394BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
5395 unsigned loopNum,
5396 BasicBlock* head,
5397 BasicBlock* slowHead)
5398{
5399 JITDUMP("Inserting loop cloning conditions\n");
5400 assert(context->HasBlockConditions(loopNum));
5401
5402 BasicBlock* curCond = head;
5403 JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
5404 for (unsigned i = 0; i < levelCond->Size(); ++i)
5405 {
5406 bool isHeaderBlock = (curCond == head);
5407
5408 // Flip the condition if header block.
5409 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
5410
5411 // Create each condition block ensuring wiring between them.
5412 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
5413 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
5414 curCond = tmp;
5415
5416 curCond->inheritWeight(head);
5417 curCond->bbNatLoopNum = head->bbNatLoopNum;
5418 JITDUMP("Created new " FMT_BB " for new level\n", curCond->bbNum);
5419 }
5420
5421 // Finally insert cloning conditions after all deref conditions have been inserted.
5422 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
5423 return curCond;
5424}
5425
5426void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
5427{
5428 BasicBlock* h = optLoopTable[loopInd].lpHead;
5429 BasicBlock* t = optLoopTable[loopInd].lpTop;
5430 BasicBlock* e = optLoopTable[loopInd].lpEntry;
5431 BasicBlock* b = optLoopTable[loopInd].lpBottom;
5432
5433 // If "h" dominates the entry block, then it is the unique header.
5434 if (fgDominate(h, e))
5435 {
5436 return;
5437 }
5438
5439 // Otherwise, create a new empty header block, make it the pred of the entry block,
5440 // and redirect the preds of the entry block to go to this.
5441
5442 BasicBlock* beforeTop = t->bbPrev;
5443 // Make sure that the new block is in the same region as the loop.
5444 // (We will only create loops that are entirely within a region.)
5445 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
5446 // This is in the containing loop.
5447 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5448 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
5449
5450 // We don't care where it was put; splice it between beforeTop and top.
5451 if (beforeTop->bbNext != h2)
5452 {
5453 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
5454 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
5455 h2->setNext(t);
5456 }
5457
5458 if (h2->bbNext != e)
5459 {
5460 h2->bbJumpKind = BBJ_ALWAYS;
5461 h2->bbJumpDest = e;
5462 }
5463 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
5464
5465 // Redirect paths from preds of "e" to go to "h2" instead of "e".
5466 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
5467 blockMap->Set(e, h2);
5468
5469 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
5470 {
5471 BasicBlock* predBlock = predEntry->flBlock;
5472
5473 // Skip if predBlock is in the loop.
5474 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
5475 {
5476 continue;
5477 }
5478 optRedirectBlock(predBlock, blockMap);
5479 }
5480
5481 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5482}
5483
5484/*****************************************************************************
5485 *
5486 * Determine the kind of interference for the call.
5487 */
5488
5489/* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreeCall* call)
5490{
5491 // if not a helper, kills everything
5492 if (call->gtCallType != CT_HELPER)
5493 {
5494 return CALLINT_ALL;
5495 }
5496
5497 // setfield and array address store kill all indirections
5498 switch (eeGetHelperNum(call->gtCallMethHnd))
5499 {
5500 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
5501 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
5502 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
5503 case CORINFO_HELP_SETFIELDOBJ:
5504 case CORINFO_HELP_ARRADDR_ST:
5505
5506 return CALLINT_REF_INDIRS;
5507
5508 case CORINFO_HELP_SETFIELDFLOAT:
5509 case CORINFO_HELP_SETFIELDDOUBLE:
5510 case CORINFO_HELP_SETFIELD8:
5511 case CORINFO_HELP_SETFIELD16:
5512 case CORINFO_HELP_SETFIELD32:
5513 case CORINFO_HELP_SETFIELD64:
5514
5515 return CALLINT_SCL_INDIRS;
5516
5517 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this
5518 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
5519 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
5520 case CORINFO_HELP_SETFIELDSTRUCT:
5521
5522 return CALLINT_ALL_INDIRS;
5523
5524 default:
5525 break;
5526 }
5527
5528 // other helpers kill nothing
5529 return CALLINT_NONE;
5530}
5531
5532/*****************************************************************************
5533 *
5534 * See if the given tree can be computed in the given precision (which must
5535 * be smaller than the type of the tree for this to make sense). If 'doit'
5536 * is false, we merely check to see whether narrowing is possible; if we
5537 * get called with 'doit' being true, we actually perform the narrowing.
5538 */
5539
5540bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
5541{
5542 genTreeOps oper;
5543 unsigned kind;
5544
5545 noway_assert(tree);
5546 noway_assert(genActualType(tree->gtType) == genActualType(srct));
5547
5548 /* Assume we're only handling integer types */
5549 noway_assert(varTypeIsIntegral(srct));
5550 noway_assert(varTypeIsIntegral(dstt));
5551
5552 unsigned srcSize = genTypeSize(srct);
5553 unsigned dstSize = genTypeSize(dstt);
5554
5555 /* dstt must be smaller than srct to narrow */
5556 if (dstSize >= srcSize)
5557 {
5558 return false;
5559 }
5560
5561 /* Figure out what kind of a node we have */
5562 oper = tree->OperGet();
5563 kind = tree->OperKind();
5564
5565 if (oper == GT_ASG)
5566 {
5567 noway_assert(doit == false);
5568 return false;
5569 }
5570
5571 ValueNumPair NoVNPair = ValueNumPair();
5572
5573 if (kind & GTK_LEAF)
5574 {
5575 switch (oper)
5576 {
5577 /* Constants can usually be narrowed by changing their value */
5578 CLANG_FORMAT_COMMENT_ANCHOR;
5579
5580#ifndef _TARGET_64BIT_
5581 __int64 lval;
5582 __int64 lmask;
5583
5584 case GT_CNS_LNG:
5585 lval = tree->gtIntConCommon.LngValue();
5586 lmask = 0;
5587
5588 switch (dstt)
5589 {
5590 case TYP_BYTE:
5591 lmask = 0x0000007F;
5592 break;
5593 case TYP_BOOL:
5594 case TYP_UBYTE:
5595 lmask = 0x000000FF;
5596 break;
5597 case TYP_SHORT:
5598 lmask = 0x00007FFF;
5599 break;
5600 case TYP_USHORT:
5601 lmask = 0x0000FFFF;
5602 break;
5603 case TYP_INT:
5604 lmask = 0x7FFFFFFF;
5605 break;
5606 case TYP_UINT:
5607 lmask = 0xFFFFFFFF;
5608 break;
5609
5610 default:
5611 return false;
5612 }
5613
5614 if ((lval & lmask) != lval)
5615 return false;
5616
5617 if (doit)
5618 {
5619 tree->ChangeOperConst(GT_CNS_INT);
5620 tree->gtType = TYP_INT;
5621 tree->gtIntCon.gtIconVal = (int)lval;
5622 if (vnStore != nullptr)
5623 {
5624 fgValueNumberTreeConst(tree);
5625 }
5626 }
5627
5628 return true;
5629#endif
5630
5631 case GT_CNS_INT:
5632
5633 ssize_t ival;
5634 ival = tree->gtIntCon.gtIconVal;
5635 ssize_t imask;
5636 imask = 0;
5637
5638 switch (dstt)
5639 {
5640 case TYP_BYTE:
5641 imask = 0x0000007F;
5642 break;
5643 case TYP_BOOL:
5644 case TYP_UBYTE:
5645 imask = 0x000000FF;
5646 break;
5647 case TYP_SHORT:
5648 imask = 0x00007FFF;
5649 break;
5650 case TYP_USHORT:
5651 imask = 0x0000FFFF;
5652 break;
5653#ifdef _TARGET_64BIT_
5654 case TYP_INT:
5655 imask = 0x7FFFFFFF;
5656 break;
5657 case TYP_UINT:
5658 imask = 0xFFFFFFFF;
5659 break;
5660#endif // _TARGET_64BIT_
5661 default:
5662 return false;
5663 }
5664
5665 if ((ival & imask) != ival)
5666 {
5667 return false;
5668 }
5669
5670#ifdef _TARGET_64BIT_
5671 if (doit)
5672 {
5673 tree->gtType = TYP_INT;
5674 tree->gtIntCon.gtIconVal = (int)ival;
5675 if (vnStore != nullptr)
5676 {
5677 fgValueNumberTreeConst(tree);
5678 }
5679 }
5680#endif // _TARGET_64BIT_
5681
5682 return true;
5683
5684 /* Operands that are in memory can usually be narrowed
5685 simply by changing their gtType */
5686
5687 case GT_LCL_VAR:
5688 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5689 if (dstSize == sizeof(int))
5690 {
5691 goto NARROW_IND;
5692 }
5693 break;
5694
5695 case GT_CLS_VAR:
5696 case GT_LCL_FLD:
5697 goto NARROW_IND;
5698 default:
5699 break;
5700 }
5701
5702 noway_assert(doit == false);
5703 return false;
5704 }
5705
5706 if (kind & (GTK_BINOP | GTK_UNOP))
5707 {
5708 GenTree* op1;
5709 op1 = tree->gtOp.gtOp1;
5710 GenTree* op2;
5711 op2 = tree->gtOp.gtOp2;
5712
5713 switch (tree->gtOper)
5714 {
5715 case GT_AND:
5716 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5717 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5718
5719 GenTree* opToNarrow;
5720 opToNarrow = nullptr;
5721 GenTree** otherOpPtr;
5722 otherOpPtr = nullptr;
5723 bool foundOperandThatBlocksNarrowing;
5724 foundOperandThatBlocksNarrowing = false;
5725
5726 // If 'dstt' is unsigned and one of the operands can be narrowed into 'dsst',
5727 // the result of the GT_AND will also fit into 'dstt' and can be narrowed.
5728 // The same is true if one of the operands is an int const and can be narrowed into 'dsst'.
5729 if (!gtIsActiveCSE_Candidate(op2) && ((op2->gtOper == GT_CNS_INT) || varTypeIsUnsigned(dstt)))
5730 {
5731 if (optNarrowTree(op2, srct, dstt, NoVNPair, false))
5732 {
5733 opToNarrow = op2;
5734 otherOpPtr = &tree->gtOp.gtOp1;
5735 }
5736 else
5737 {
5738 foundOperandThatBlocksNarrowing = true;
5739 }
5740 }
5741
5742 if ((opToNarrow == nullptr) && !gtIsActiveCSE_Candidate(op1) &&
5743 ((op1->gtOper == GT_CNS_INT) || varTypeIsUnsigned(dstt)))
5744 {
5745 if (optNarrowTree(op1, srct, dstt, NoVNPair, false))
5746 {
5747 opToNarrow = op1;
5748 otherOpPtr = &tree->gtOp.gtOp2;
5749 }
5750 else
5751 {
5752 foundOperandThatBlocksNarrowing = true;
5753 }
5754 }
5755
5756 if (opToNarrow != nullptr)
5757 {
5758 // We will change the type of the tree and narrow opToNarrow
5759 //
5760 if (doit)
5761 {
5762 tree->gtType = genActualType(dstt);
5763 tree->SetVNs(vnpNarrow);
5764
5765 optNarrowTree(opToNarrow, srct, dstt, NoVNPair, true);
5766 // We may also need to cast away the upper bits of *otherOpPtr
5767 if (srcSize == 8)
5768 {
5769 assert(tree->gtType == TYP_INT);
5770 GenTree* castOp = gtNewCastNode(TYP_INT, *otherOpPtr, false, TYP_INT);
5771#ifdef DEBUG
5772 castOp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5773#endif
5774 *otherOpPtr = castOp;
5775 }
5776 }
5777 return true;
5778 }
5779
5780 if (foundOperandThatBlocksNarrowing)
5781 {
5782 noway_assert(doit == false);
5783 return false;
5784 }
5785
5786 goto COMMON_BINOP;
5787
5788 case GT_ADD:
5789 case GT_MUL:
5790
5791 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5792 {
5793 noway_assert(doit == false);
5794 return false;
5795 }
5796 __fallthrough;
5797
5798 case GT_OR:
5799 case GT_XOR:
5800 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5801 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5802 COMMON_BINOP:
5803 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5804 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5805 {
5806 noway_assert(doit == false);
5807 return false;
5808 }
5809
5810 /* Simply change the type of the tree */
5811
5812 if (doit)
5813 {
5814 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5815 {
5816 tree->gtFlags &= ~GTF_MUL_64RSLT;
5817 }
5818
5819 tree->gtType = genActualType(dstt);
5820 tree->SetVNs(vnpNarrow);
5821 }
5822
5823 return true;
5824
5825 case GT_IND:
5826
5827 NARROW_IND:
5828
5829 if ((dstSize > genTypeSize(tree->gtType)) &&
5830 (varTypeIsUnsigned(dstt) && !varTypeIsUnsigned(tree->gtType)))
5831 {
5832 return false;
5833 }
5834
5835 /* Simply change the type of the tree */
5836
5837 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5838 {
5839 tree->gtType = genSignedType(dstt);
5840 tree->SetVNs(vnpNarrow);
5841
5842 /* Make sure we don't mess up the variable type */
5843 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5844 {
5845 tree->gtFlags |= GTF_VAR_CAST;
5846 }
5847 }
5848
5849 return true;
5850
5851 case GT_EQ:
5852 case GT_NE:
5853 case GT_LT:
5854 case GT_LE:
5855 case GT_GT:
5856 case GT_GE:
5857
5858 /* These can always be narrowed since they only represent 0 or 1 */
5859 return true;
5860
5861 case GT_CAST:
5862 {
5863 var_types cast = tree->CastToType();
5864 var_types oprt = op1->TypeGet();
5865 unsigned oprSize = genTypeSize(oprt);
5866
5867 if (cast != srct)
5868 {
5869 return false;
5870 }
5871
5872 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5873 {
5874 return false;
5875 }
5876
5877 if (tree->gtOverflow())
5878 {
5879 return false;
5880 }
5881
5882 /* Is this a cast from the type we're narrowing to or a smaller one? */
5883
5884 if (oprSize <= dstSize)
5885 {
5886 /* Bash the target type of the cast */
5887
5888 if (doit)
5889 {
5890 dstt = genSignedType(dstt);
5891
5892 if ((oprSize == dstSize) &&
5893 ((varTypeIsUnsigned(dstt) == varTypeIsUnsigned(oprt)) || !varTypeIsSmall(dstt)))
5894 {
5895 // Same size and there is no signedness mismatch for small types: change the CAST
5896 // into a NOP
5897
5898 JITDUMP("Cast operation has no effect, bashing [%06d] GT_CAST into a GT_NOP.\n",
5899 dspTreeID(tree));
5900
5901 tree->ChangeOper(GT_NOP);
5902 tree->gtType = dstt;
5903 // Clear the GTF_UNSIGNED flag, as it may have been set on the cast node
5904 tree->gtFlags &= ~GTF_UNSIGNED;
5905 tree->gtOp.gtOp2 = nullptr;
5906 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5907 }
5908 else
5909 {
5910 // oprSize is smaller or there is a signedness mismatch for small types
5911
5912 // Change the CastToType in the GT_CAST node
5913 tree->CastToType() = dstt;
5914
5915 // The result type of a GT_CAST is never a small type.
5916 // Use genActualType to widen dstt when it is a small types.
5917 tree->gtType = genActualType(dstt);
5918 tree->SetVNs(vnpNarrow);
5919 }
5920 }
5921
5922 return true;
5923 }
5924 }
5925 return false;
5926
5927 case GT_COMMA:
5928 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5929 {
5930 /* Simply change the type of the tree */
5931
5932 if (doit)
5933 {
5934 tree->gtType = genActualType(dstt);
5935 tree->SetVNs(vnpNarrow);
5936 }
5937 return true;
5938 }
5939 return false;
5940
5941 default:
5942 noway_assert(doit == false);
5943 return false;
5944 }
5945 }
5946
5947 return false;
5948}
5949
5950/*****************************************************************************
5951 *
5952 * The following logic figures out whether the given variable is assigned
5953 * somewhere in a list of basic blocks (or in an entire loop).
5954 */
5955
5956Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTree** pTree, fgWalkData* data)
5957{
5958 GenTree* tree = *pTree;
5959
5960 if (tree->OperIs(GT_ASG))
5961 {
5962 GenTree* dest = tree->gtOp.gtOp1;
5963 genTreeOps destOper = dest->OperGet();
5964
5965 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5966 assert(desc && desc->ivaSelf == desc);
5967
5968 if (destOper == GT_LCL_VAR)
5969 {
5970 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5971 if (tvar < lclMAX_ALLSET_TRACKED)
5972 {
5973 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5974 }
5975 else
5976 {
5977 desc->ivaMaskIncomplete = true;
5978 }
5979
5980 if (tvar == desc->ivaVar)
5981 {
5982 if (tree != desc->ivaSkip)
5983 {
5984 return WALK_ABORT;
5985 }
5986 }
5987 }
5988 else if (destOper == GT_LCL_FLD)
5989 {
5990 /* We can't track every field of every var. Moreover, indirections
5991 may access different parts of the var as different (but
5992 overlapping) fields. So just treat them as indirect accesses */
5993
5994 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5995 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5996
5997 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5998 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5999 }
6000 else if (destOper == GT_CLS_VAR)
6001 {
6002 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
6003 }
6004 else if (destOper == GT_IND)
6005 {
6006 /* Set the proper indirection bits */
6007
6008 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
6009 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
6010 }
6011 }
6012 else if (tree->gtOper == GT_CALL)
6013 {
6014 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
6015 assert(desc && desc->ivaSelf == desc);
6016
6017 desc->ivaMaskCall = optCallInterf(tree->AsCall());
6018 }
6019
6020 return WALK_CONTINUE;
6021}
6022
6023/*****************************************************************************/
6024
6025bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTree* skip, unsigned var)
6026{
6027 bool result;
6028 isVarAssgDsc desc;
6029
6030 desc.ivaSkip = skip;
6031#ifdef DEBUG
6032 desc.ivaSelf = &desc;
6033#endif
6034 desc.ivaVar = var;
6035 desc.ivaMaskCall = CALLINT_NONE;
6036 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6037
6038 for (;;)
6039 {
6040 noway_assert(beg);
6041
6042 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
6043 {
6044 noway_assert(stmt->gtOper == GT_STMT);
6045 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
6046 {
6047 result = true;
6048 goto DONE;
6049 }
6050 }
6051
6052 if (beg == end)
6053 {
6054 break;
6055 }
6056
6057 beg = beg->bbNext;
6058 }
6059
6060 result = false;
6061
6062DONE:
6063
6064 return result;
6065}
6066
6067/*****************************************************************************/
6068int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
6069{
6070 LoopDsc* loop;
6071
6072 /* Get hold of the loop descriptor */
6073
6074 noway_assert(lnum < optLoopCount);
6075 loop = optLoopTable + lnum;
6076
6077 /* Do we already know what variables are assigned within this loop? */
6078
6079 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
6080 {
6081 isVarAssgDsc desc;
6082
6083 BasicBlock* beg;
6084 BasicBlock* end;
6085
6086 /* Prepare the descriptor used by the tree walker call-back */
6087
6088 desc.ivaVar = (unsigned)-1;
6089 desc.ivaSkip = nullptr;
6090#ifdef DEBUG
6091 desc.ivaSelf = &desc;
6092#endif
6093 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6094 desc.ivaMaskInd = VR_NONE;
6095 desc.ivaMaskCall = CALLINT_NONE;
6096 desc.ivaMaskIncomplete = false;
6097
6098 /* Now walk all the statements of the loop */
6099
6100 beg = loop->lpHead->bbNext;
6101 end = loop->lpBottom;
6102
6103 for (/**/; /**/; beg = beg->bbNext)
6104 {
6105 noway_assert(beg);
6106
6107 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6108 {
6109 noway_assert(stmt->gtOper == GT_STMT);
6110 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
6111
6112 if (desc.ivaMaskIncomplete)
6113 {
6114 loop->lpFlags |= LPFLG_ASGVARS_INC;
6115 }
6116 }
6117
6118 if (beg == end)
6119 {
6120 break;
6121 }
6122 }
6123
6124 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
6125 loop->lpAsgInds = desc.ivaMaskInd;
6126 loop->lpAsgCall = desc.ivaMaskCall;
6127
6128 /* Now we know what variables are assigned in the loop */
6129
6130 loop->lpFlags |= LPFLG_ASGVARS_YES;
6131 }
6132
6133 /* Now we can finally test the caller's mask against the loop's */
6134 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
6135 {
6136 return 1;
6137 }
6138
6139 switch (loop->lpAsgCall)
6140 {
6141 case CALLINT_ALL:
6142
6143 /* Can't hoist if the call might have side effect on an indirection. */
6144
6145 if (loop->lpAsgInds != VR_NONE)
6146 {
6147 return 1;
6148 }
6149
6150 break;
6151
6152 case CALLINT_REF_INDIRS:
6153
6154 /* Can't hoist if the call might have side effect on an ref indirection. */
6155
6156 if (loop->lpAsgInds & VR_IND_REF)
6157 {
6158 return 1;
6159 }
6160
6161 break;
6162
6163 case CALLINT_SCL_INDIRS:
6164
6165 /* Can't hoist if the call might have side effect on an non-ref indirection. */
6166
6167 if (loop->lpAsgInds & VR_IND_SCL)
6168 {
6169 return 1;
6170 }
6171
6172 break;
6173
6174 case CALLINT_ALL_INDIRS:
6175
6176 /* Can't hoist if the call might have side effect on any indirection. */
6177
6178 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
6179 {
6180 return 1;
6181 }
6182
6183 break;
6184
6185 case CALLINT_NONE:
6186
6187 /* Other helpers kill nothing */
6188
6189 break;
6190
6191 default:
6192 noway_assert(!"Unexpected lpAsgCall value");
6193 }
6194
6195 return 0;
6196}
6197
6198void Compiler::optPerformHoistExpr(GenTree* origExpr, unsigned lnum)
6199{
6200#ifdef DEBUG
6201 if (verbose)
6202 {
6203 printf("\nHoisting a copy of ");
6204 printTreeID(origExpr);
6205 printf(" into PreHeader for loop L%02u <" FMT_BB ".." FMT_BB ">:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
6206 optLoopTable[lnum].lpBottom->bbNum);
6207 gtDispTree(origExpr);
6208 printf("\n");
6209 }
6210#endif
6211
6212 // This loop has to be in a form that is approved for hoisting.
6213 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
6214
6215 // Create a copy of the expression and mark it for CSE's.
6216 GenTree* hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
6217
6218 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
6219 assert(hoistExpr != origExpr);
6220 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
6221
6222 GenTree* hoist = hoistExpr;
6223 // The value of the expression isn't used (unless it's an assignment).
6224 if (hoistExpr->OperGet() != GT_ASG)
6225 {
6226 hoist = gtUnusedValNode(hoistExpr);
6227 }
6228
6229 /* Put the statement in the preheader */
6230
6231 fgCreateLoopPreHeader(lnum);
6232
6233 BasicBlock* preHead = optLoopTable[lnum].lpHead;
6234 assert(preHead->bbJumpKind == BBJ_NONE);
6235
6236 // fgMorphTree requires that compCurBB be the block that contains
6237 // (or in this case, will contain) the expression.
6238 compCurBB = preHead;
6239 hoist = fgMorphTree(hoist);
6240
6241 GenTree* hoistStmt = gtNewStmt(hoist);
6242 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
6243
6244 /* simply append the statement at the end of the preHead's list */
6245
6246 GenTree* treeList = preHead->bbTreeList;
6247
6248 if (treeList)
6249 {
6250 /* append after last statement */
6251
6252 GenTree* last = treeList->gtPrev;
6253 assert(last->gtNext == nullptr);
6254
6255 last->gtNext = hoistStmt;
6256 hoistStmt->gtPrev = last;
6257 treeList->gtPrev = hoistStmt;
6258 }
6259 else
6260 {
6261 /* Empty pre-header - store the single statement in the block */
6262
6263 preHead->bbTreeList = hoistStmt;
6264 hoistStmt->gtPrev = hoistStmt;
6265 }
6266
6267 hoistStmt->gtNext = nullptr;
6268
6269#ifdef DEBUG
6270 if (verbose)
6271 {
6272 printf("This hoisted copy placed in PreHeader (" FMT_BB "):\n", preHead->bbNum);
6273 gtDispTree(hoist);
6274 }
6275#endif
6276
6277 if (fgStmtListThreaded)
6278 {
6279 gtSetStmtInfo(hoistStmt);
6280 fgSetStmtSeq(hoistStmt);
6281 }
6282
6283#ifdef DEBUG
6284 if (m_nodeTestData != nullptr)
6285 {
6286
6287 // What is the depth of the loop "lnum"?
6288 ssize_t depth = 0;
6289 unsigned lnumIter = lnum;
6290 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
6291 {
6292 depth++;
6293 lnumIter = optLoopTable[lnumIter].lpParent;
6294 }
6295
6296 NodeToTestDataMap* testData = GetNodeTestData();
6297
6298 TestLabelAndNum tlAndN;
6299 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
6300 {
6301 if (tlAndN.m_num == -1)
6302 {
6303 printf("Node ");
6304 printTreeID(origExpr);
6305 printf(" was declared 'do not hoist', but is being hoisted.\n");
6306 assert(false);
6307 }
6308 else if (tlAndN.m_num != depth)
6309 {
6310 printf("Node ");
6311 printTreeID(origExpr);
6312 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
6313 "%d.\n",
6314 tlAndN.m_num, depth);
6315 assert(false);
6316 }
6317 else
6318 {
6319 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
6320 // hoist" annotations.
6321 testData->Remove(origExpr);
6322 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
6323 tlAndN.m_tl = TL_CSE_Def;
6324 tlAndN.m_num = m_loopHoistCSEClass++;
6325 testData->Set(hoistExpr, tlAndN);
6326 }
6327 }
6328 }
6329#endif
6330
6331#if LOOP_HOIST_STATS
6332 if (!m_curLoopHasHoistedExpression)
6333 {
6334 m_loopsWithHoistedExpressions++;
6335 m_curLoopHasHoistedExpression = true;
6336 }
6337 m_totalHoistedExpressions++;
6338#endif // LOOP_HOIST_STATS
6339}
6340
6341void Compiler::optHoistLoopCode()
6342{
6343 // If we don't have any loops in the method then take an early out now.
6344 if (optLoopCount == 0)
6345 {
6346 return;
6347 }
6348
6349#ifdef DEBUG
6350 unsigned jitNoHoist = JitConfig.JitNoHoist();
6351 if (jitNoHoist > 0)
6352 {
6353 return;
6354 }
6355#endif
6356
6357#if 0
6358 // The code in this #if has been useful in debugging loop cloning issues, by
6359 // enabling selective enablement of the loop cloning optimization according to
6360 // method hash.
6361#ifdef DEBUG
6362 unsigned methHash = info.compMethodHash();
6363 char* lostr = getenv("loophoisthashlo");
6364 unsigned methHashLo = 0;
6365 if (lostr != NULL)
6366 {
6367 sscanf_s(lostr, "%x", &methHashLo);
6368 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
6369 }
6370 char* histr = getenv("loophoisthashhi");
6371 unsigned methHashHi = UINT32_MAX;
6372 if (histr != NULL)
6373 {
6374 sscanf_s(histr, "%x", &methHashHi);
6375 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
6376 }
6377 if (methHash < methHashLo || methHash > methHashHi)
6378 return;
6379 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
6380#endif // DEBUG
6381#endif // 0 -- debugging loop cloning issues
6382
6383#ifdef DEBUG
6384 if (verbose)
6385 {
6386 printf("\n*************** In optHoistLoopCode()\n");
6387 printf("Blocks/Trees before phase\n");
6388 fgDispBasicBlocks(true);
6389 printf("");
6390 }
6391#endif
6392
6393 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
6394 // they are invariant.)
6395 LoopHoistContext hoistCtxt(this);
6396 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
6397 {
6398 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6399 {
6400 continue;
6401 }
6402
6403 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6404 {
6405 optHoistLoopNest(lnum, &hoistCtxt);
6406 }
6407 }
6408
6409#if DEBUG
6410 if (fgModified)
6411 {
6412 if (verbose)
6413 {
6414 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
6415 fgDispBasicBlocks(true);
6416 printf("");
6417 }
6418
6419 // Make sure that the predecessor lists are accurate
6420 fgDebugCheckBBlist();
6421 }
6422#endif
6423
6424#ifdef DEBUG
6425 // Test Data stuff..
6426 // If we have no test data, early out.
6427 if (m_nodeTestData == nullptr)
6428 {
6429 return;
6430 }
6431 NodeToTestDataMap* testData = GetNodeTestData();
6432 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
6433 {
6434 TestLabelAndNum tlAndN;
6435 GenTree* node = ki.Get();
6436 bool b = testData->Lookup(node, &tlAndN);
6437 assert(b);
6438 if (tlAndN.m_tl != TL_LoopHoist)
6439 {
6440 continue;
6441 }
6442 // Otherwise, it is a loop hoist annotation.
6443 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
6444 if (tlAndN.m_num >= 0)
6445 {
6446 printf("Node ");
6447 printTreeID(node);
6448 printf(" was declared 'must hoist', but has not been hoisted.\n");
6449 assert(false);
6450 }
6451 }
6452#endif // DEBUG
6453}
6454
6455void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
6456{
6457 // Do this loop, then recursively do all nested loops.
6458 CLANG_FORMAT_COMMENT_ANCHOR;
6459
6460#if LOOP_HOIST_STATS
6461 // Record stats
6462 m_curLoopHasHoistedExpression = false;
6463 m_loopsConsidered++;
6464#endif // LOOP_HOIST_STATS
6465
6466 optHoistThisLoop(lnum, hoistCtxt);
6467
6468 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
6469
6470 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
6471 {
6472 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
6473 // TODO-Cleanup: we should have a set abstraction for loops.
6474 if (hoistedInCurLoop != nullptr)
6475 {
6476 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6477 {
6478#ifdef DEBUG
6479 bool b;
6480 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
6481#endif
6482 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
6483 }
6484 }
6485
6486 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
6487 child = optLoopTable[child].lpSibling)
6488 {
6489 optHoistLoopNest(child, hoistCtxt);
6490 }
6491
6492 // Now remove them.
6493 // TODO-Cleanup: we should have a set abstraction for loops.
6494 if (hoistedInCurLoop != nullptr)
6495 {
6496 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6497 {
6498 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
6499 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
6500 }
6501 }
6502 }
6503}
6504
6505void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
6506{
6507 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6508
6509 /* If loop was removed continue */
6510
6511 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
6512 {
6513 return;
6514 }
6515
6516 /* Get the head and tail of the loop */
6517
6518 BasicBlock* head = pLoopDsc->lpHead;
6519 BasicBlock* tail = pLoopDsc->lpBottom;
6520 BasicBlock* lbeg = pLoopDsc->lpEntry;
6521
6522 // We must have a do-while loop
6523 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
6524 {
6525 return;
6526 }
6527
6528 // The loop-head must dominate the loop-entry.
6529 // TODO-CQ: Couldn't we make this true if it's not?
6530 if (!fgDominate(head, lbeg))
6531 {
6532 return;
6533 }
6534
6535 // if lbeg is the start of a new try block then we won't be able to hoist
6536 if (!BasicBlock::sameTryRegion(head, lbeg))
6537 {
6538 return;
6539 }
6540
6541 // We don't bother hoisting when inside of a catch block
6542 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
6543 {
6544 return;
6545 }
6546
6547 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
6548
6549 unsigned begn = lbeg->bbNum;
6550 unsigned endn = tail->bbNum;
6551
6552 // Ensure the per-loop sets/tables are empty.
6553 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
6554
6555#ifdef DEBUG
6556 if (verbose)
6557 {
6558 printf("optHoistLoopCode for loop L%02u <" FMT_BB ".." FMT_BB ">:\n", lnum, begn, endn);
6559 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
6560 }
6561#endif
6562
6563 VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
6564
6565 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
6566 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
6567 pLoopDsc->lpHoistedExprCount = 0;
6568
6569#ifndef _TARGET_64BIT_
6570 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
6571
6572 if (longVarsCount > 0)
6573 {
6574 // Since 64-bit variables take up two registers on 32-bit targets, we increase
6575 // the Counts such that each TYP_LONG variable counts twice.
6576 //
6577 VARSET_TP loopLongVars(VarSetOps::Intersection(this, loopVars, lvaLongVars));
6578 VARSET_TP inOutLongVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
6579
6580#ifdef DEBUG
6581 if (verbose)
6582 {
6583 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
6584 lvaDispVarSet(lvaLongVars);
6585 }
6586#endif
6587 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
6588 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
6589 }
6590#endif // !_TARGET_64BIT_
6591
6592#ifdef DEBUG
6593 if (verbose)
6594 {
6595 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
6596 lvaDispVarSet(pLoopDsc->lpVarUseDef);
6597
6598 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
6599 lvaDispVarSet(pLoopDsc->lpVarInOut);
6600
6601 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
6602 lvaDispVarSet(loopVars);
6603 printf("\n");
6604 }
6605#endif
6606
6607 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
6608
6609 if (floatVarsCount > 0)
6610 {
6611 VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars));
6612 VARSET_TP inOutFPVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
6613
6614 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
6615 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
6616 pLoopDsc->lpHoistedFPExprCount = 0;
6617
6618 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
6619 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
6620
6621#ifdef DEBUG
6622 if (verbose)
6623 {
6624 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
6625 lvaDispVarSet(inOutFPVars);
6626
6627 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
6628 lvaDispVarSet(loopFPVars);
6629 }
6630#endif
6631 }
6632 else // (floatVarsCount == 0)
6633 {
6634 pLoopDsc->lpLoopVarFPCount = 0;
6635 pLoopDsc->lpVarInOutFPCount = 0;
6636 pLoopDsc->lpHoistedFPExprCount = 0;
6637 }
6638
6639 // Find the set of definitely-executed blocks.
6640 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
6641 // Until we have post-dominators, we'll special-case for single-exit blocks.
6642 JitExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
6643 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
6644 {
6645 assert(pLoopDsc->lpExit != nullptr);
6646 BasicBlock* cur = pLoopDsc->lpExit;
6647 // Push dominators, until we reach "entry" or exit the loop.
6648 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
6649 {
6650 defExec.Push(cur);
6651 cur = cur->bbIDom;
6652 }
6653 // If we didn't reach the entry block, give up and *just* push the entry block.
6654 if (cur != pLoopDsc->lpEntry)
6655 {
6656 defExec.Reset();
6657 }
6658 defExec.Push(pLoopDsc->lpEntry);
6659 }
6660 else // More than one exit
6661 {
6662 // We'll assume that only the entry block is definitely executed.
6663 // We could in the future do better.
6664 defExec.Push(pLoopDsc->lpEntry);
6665 }
6666
6667 while (defExec.Size() > 0)
6668 {
6669 // Consider in reverse order: dominator before dominatee.
6670 BasicBlock* blk = defExec.Pop();
6671 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
6672 }
6673}
6674
6675// Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
6676void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
6677{
6678 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6679 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
6680 unsigned blkWeight = blk->getBBWeight(this);
6681
6682#ifdef DEBUG
6683 if (verbose)
6684 {
6685 printf(" optHoistLoopExprsForBlock " FMT_BB " (weight=%6s) of loop L%02u <" FMT_BB ".." FMT_BB
6686 ">, firstBlock is %s\n",
6687 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6688 firstBlockAndBeforeSideEffect ? "true" : "false");
6689 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6690 {
6691 printf(" block weight is too small to perform hoisting.\n");
6692 }
6693 }
6694#endif
6695
6696 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6697 {
6698 // Block weight is too small to perform hoisting.
6699 return;
6700 }
6701
6702 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6703 {
6704 GenTree* stmtTree = stmt->gtStmtExpr;
6705 bool hoistable;
6706 bool cctorDependent;
6707 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable,
6708 &cctorDependent);
6709 if (hoistable)
6710 {
6711 // we will try to hoist the top-level stmtTree
6712 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6713 }
6714 }
6715}
6716
6717bool Compiler::optIsProfitableToHoistableTree(GenTree* tree, unsigned lnum)
6718{
6719 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6720
6721 bool loopContainsCall = pLoopDsc->lpContainsCall;
6722
6723 int availRegCount;
6724 int hoistedExprCount;
6725 int loopVarCount;
6726 int varInOutCount;
6727
6728 if (varTypeIsFloating(tree->TypeGet()))
6729 {
6730 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6731 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6732 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6733
6734 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6735 if (!loopContainsCall)
6736 {
6737 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6738 }
6739#ifdef _TARGET_ARM_
6740 // For ARM each double takes two FP registers
6741 // For now on ARM we won't track singles/doubles
6742 // and instead just assume that we always have doubles.
6743 //
6744 availRegCount /= 2;
6745#endif
6746 }
6747 else
6748 {
6749 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6750 loopVarCount = pLoopDsc->lpLoopVarCount;
6751 varInOutCount = pLoopDsc->lpVarInOutCount;
6752
6753 availRegCount = CNT_CALLEE_SAVED - 1;
6754 if (!loopContainsCall)
6755 {
6756 availRegCount += CNT_CALLEE_TRASH - 1;
6757 }
6758#ifndef _TARGET_64BIT_
6759 // For our 32-bit targets Long types take two registers.
6760 if (varTypeIsLong(tree->TypeGet()))
6761 {
6762 availRegCount = (availRegCount + 1) / 2;
6763 }
6764#endif
6765 }
6766
6767 // decrement the availRegCount by the count of expression that we have already hoisted.
6768 availRegCount -= hoistedExprCount;
6769
6770 // the variables that are read/written inside the loop should
6771 // always be a subset of the InOut variables for the loop
6772 assert(loopVarCount <= varInOutCount);
6773
6774 // When loopVarCount >= availRegCount we believe that all of the
6775 // available registers will get used to hold LclVars inside the loop.
6776 // This pessimistically assumes that each loopVar has a conflicting
6777 // lifetime with every other loopVar.
6778 // For this case we will hoist the expression only if is profitable
6779 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6780 // as we believe it will be placed in the stack or one of the other
6781 // loopVars will be spilled into the stack
6782 //
6783 if (loopVarCount >= availRegCount)
6784 {
6785 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6786 if (tree->gtCostEx < (2 * IND_COST_EX))
6787 {
6788 return false;
6789 }
6790 }
6791
6792 // When varInOutCount < availRegCount we are know that there are
6793 // some available register(s) when we enter the loop body.
6794 // When varInOutCount == availRegCount there often will be a register
6795 // available when we enter the loop body, since a loop often defines a
6796 // LclVar on exit or there is often at least one LclVar that is worth
6797 // spilling to the stack to make way for this hoisted expression.
6798 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6799 //
6800 if (varInOutCount > availRegCount)
6801 {
6802 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6803 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6804 {
6805 return false;
6806 }
6807 }
6808
6809 return true;
6810}
6811
6812//
6813// This function returns true if 'tree' is a loop invariant expression.
6814// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block,
6815// and sets '*pCctorDependent' if 'tree' is a function of a static field that must not be
6816// hoisted (even if '*pHoistable' is true) unless a preceding corresponding cctor init helper
6817// call is also hoisted.
6818//
6819bool Compiler::optHoistLoopExprsForTree(GenTree* tree,
6820 unsigned lnum,
6821 LoopHoistContext* hoistCtxt,
6822 bool* pFirstBlockAndBeforeSideEffect,
6823 bool* pHoistable,
6824 bool* pCctorDependent)
6825{
6826 // First do the children.
6827 // We must keep track of whether each child node was hoistable or not
6828 //
6829 unsigned nChildren = tree->NumChildren();
6830 bool childrenHoistable[GenTree::MAX_CHILDREN];
6831 bool childrenCctorDependent[GenTree::MAX_CHILDREN];
6832
6833 // Initialize the array elements for childrenHoistable[] to false
6834 for (unsigned i = 0; i < nChildren; i++)
6835 {
6836 childrenHoistable[i] = false;
6837 childrenCctorDependent[i] = false;
6838 }
6839
6840 // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees.
6841 // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is
6842 // truly dependent on the cctor. So a more precise approach would be to separately propagate
6843 // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput;
6844 // the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns
6845 // false for constants.
6846 bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != 0)) ||
6847 (tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0)));
6848 bool treeIsInvariant = true;
6849 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6850 {
6851 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6852 &childrenHoistable[childNum], &childrenCctorDependent[childNum]))
6853 {
6854 treeIsInvariant = false;
6855 }
6856
6857 if (childrenCctorDependent[childNum])
6858 {
6859 // Normally, a parent of a cctor-dependent tree is also cctor-dependent.
6860 treeIsCctorDependent = true;
6861
6862 // Check for the case where we can stop propagating cctor-dependent upwards.
6863 if (tree->OperIs(GT_COMMA) && (childNum == 1))
6864 {
6865 GenTree* op1 = tree->gtGetOp1();
6866 if (op1->OperIs(GT_CALL))
6867 {
6868 GenTreeCall* call = op1->AsCall();
6869 if ((call->gtCallType == CT_HELPER) &&
6870 s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd)))
6871 {
6872 // Hoisting the comma is ok because it would hoist the initialization along
6873 // with the static field reference.
6874 treeIsCctorDependent = false;
6875 // Hoisting the static field without hoisting the initialization would be
6876 // incorrect, make sure we consider the field (which we flagged as
6877 // cctor-dependent) non-hoistable.
6878 noway_assert(!childrenHoistable[childNum]);
6879 }
6880 }
6881 }
6882 }
6883 }
6884
6885 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted,
6886 // unless it has a static var reference that can't be hoisted past its cctor call.
6887 bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent;
6888
6889 // But we must see if anything else prevents "tree" from being hoisted.
6890 //
6891 if (treeIsInvariant)
6892 {
6893 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6894 treeIsHoistable &= optIsCSEcandidate(tree);
6895
6896 // If it's a call, it must be a helper call, and be pure.
6897 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6898 // (meaning it won't run a cctor because the class is not precise-init).
6899 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6900 {
6901 GenTreeCall* call = tree->AsCall();
6902 if (call->gtCallType != CT_HELPER)
6903 {
6904 treeIsHoistable = false;
6905 }
6906 else
6907 {
6908 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6909 if (!s_helperCallProperties.IsPure(helpFunc))
6910 {
6911 treeIsHoistable = false;
6912 }
6913 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6914 {
6915 treeIsHoistable = false;
6916 }
6917 }
6918 }
6919
6920 if (treeIsHoistable)
6921 {
6922 if (!(*pFirstBlockAndBeforeSideEffect))
6923 {
6924 // For now, we give up on an expression that might raise an exception if it is after the
6925 // first possible global side effect (and we assume we're after that if we're not in the first block).
6926 // TODO-CQ: this is when we might do loop cloning.
6927 //
6928 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6929 {
6930 treeIsHoistable = false;
6931 }
6932 }
6933 }
6934
6935 // Is the value of the whole tree loop invariant?
6936 treeIsInvariant =
6937 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6938
6939 // Is the value of the whole tree loop invariant?
6940 if (!treeIsInvariant)
6941 {
6942 treeIsHoistable = false;
6943 }
6944 }
6945
6946 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6947 // If we encounter a tree with a call in it
6948 // or if we see an assignment to global we set it to false.
6949 //
6950 // If we are already set to false then we can skip these checks
6951 //
6952 if (*pFirstBlockAndBeforeSideEffect)
6953 {
6954 // For this purpose, we only care about memory side effects. We assume that expressions will
6955 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6956 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6957 // here, since that includes exceptions.)
6958 if (tree->IsCall())
6959 {
6960 // If it's a call, it must be a helper call that does not mutate the heap.
6961 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6962 // (meaning it won't run a cctor because the class is not precise-init).
6963 GenTreeCall* call = tree->AsCall();
6964 if (call->gtCallType != CT_HELPER)
6965 {
6966 *pFirstBlockAndBeforeSideEffect = false;
6967 }
6968 else
6969 {
6970 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6971 if (s_helperCallProperties.MutatesHeap(helpFunc))
6972 {
6973 *pFirstBlockAndBeforeSideEffect = false;
6974 }
6975 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6976 {
6977 *pFirstBlockAndBeforeSideEffect = false;
6978 }
6979 }
6980 }
6981 else if (tree->OperIs(GT_ASG))
6982 {
6983 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6984 GenTree* lhs = tree->gtOp.gtOp1;
6985 if (lhs->gtFlags & GTF_GLOB_REF)
6986 {
6987 *pFirstBlockAndBeforeSideEffect = false;
6988 }
6989 }
6990 else if (tree->OperIsCopyBlkOp())
6991 {
6992 GenTree* args = tree->gtOp.gtOp1;
6993 assert(args->OperGet() == GT_LIST);
6994 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6995 {
6996 *pFirstBlockAndBeforeSideEffect = false;
6997 }
6998 }
6999 }
7000
7001 // If this 'tree' is hoistable then we return and the caller will
7002 // decide to hoist it as part of larger hoistable expression.
7003 //
7004 if (!treeIsHoistable)
7005 {
7006 // We are not hoistable so we will now hoist any hoistable children.
7007 //
7008 for (unsigned childNum = 0; childNum < nChildren; childNum++)
7009 {
7010 if (childrenHoistable[childNum])
7011 {
7012 // We can't hoist the LHS of an assignment, isn't a real use.
7013 if ((childNum == 0) && tree->OperIs(GT_ASG))
7014 {
7015 continue;
7016 }
7017
7018 GenTree* child = tree->GetChild(childNum);
7019
7020 // We try to hoist this 'child' tree
7021 optHoistCandidate(child, lnum, hoistCtxt);
7022 }
7023 }
7024 }
7025
7026 *pHoistable = treeIsHoistable;
7027 *pCctorDependent = treeIsCctorDependent;
7028 return treeIsInvariant;
7029}
7030
7031void Compiler::optHoistCandidate(GenTree* tree, unsigned lnum, LoopHoistContext* hoistCtxt)
7032{
7033 if (lnum == BasicBlock::NOT_IN_LOOP)
7034 {
7035 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
7036 return;
7037 }
7038
7039 // The outer loop also must be suitable for hoisting...
7040 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
7041 {
7042 return;
7043 }
7044
7045 // If the hoisted expression isn't valid at this loop head then break
7046 if (!optTreeIsValidAtLoopHead(tree, lnum))
7047 {
7048 return;
7049 }
7050
7051 // It must pass the hoistable profitablity tests for this loop level
7052 if (!optIsProfitableToHoistableTree(tree, lnum))
7053 {
7054 return;
7055 }
7056
7057 bool b;
7058 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
7059 {
7060 // already hoisted in a parent loop, so don't hoist this expression.
7061 return;
7062 }
7063
7064 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
7065 {
7066 // already hoisted this expression in the current loop, so don't hoist this expression.
7067 return;
7068 }
7069
7070 // Expression can be hoisted
7071 optPerformHoistExpr(tree, lnum);
7072
7073 // Increment lpHoistedExprCount or lpHoistedFPExprCount
7074 if (!varTypeIsFloating(tree->TypeGet()))
7075 {
7076 optLoopTable[lnum].lpHoistedExprCount++;
7077#ifndef _TARGET_64BIT_
7078 // For our 32-bit targets Long types take two registers.
7079 if (varTypeIsLong(tree->TypeGet()))
7080 {
7081 optLoopTable[lnum].lpHoistedExprCount++;
7082 }
7083#endif
7084 }
7085 else // Floating point expr hoisted
7086 {
7087 optLoopTable[lnum].lpHoistedFPExprCount++;
7088 }
7089
7090 // Record the hoisted expression in hoistCtxt
7091 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
7092}
7093
7094bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
7095{
7096 // If it is not a VN, is not loop-invariant.
7097 if (vn == ValueNumStore::NoVN)
7098 {
7099 return false;
7100 }
7101
7102 // We'll always short-circuit constants.
7103 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
7104 {
7105 return true;
7106 }
7107
7108 // If we've done this query previously, don't repeat.
7109 bool previousRes = false;
7110 if (loopVnInvariantCache->Lookup(vn, &previousRes))
7111 {
7112 return previousRes;
7113 }
7114
7115 bool res = true;
7116 VNFuncApp funcApp;
7117 if (vnStore->GetVNFunc(vn, &funcApp))
7118 {
7119 if (funcApp.m_func == VNF_PhiDef)
7120 {
7121 // First, make sure it's a "proper" phi -- the definition is a Phi application.
7122 VNFuncApp phiDefValFuncApp;
7123 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
7124 {
7125 // It's not *really* a definition, rather a pass-through of some other VN.
7126 // (This could occur, say if both sides of an if-then-else diamond made the
7127 // same assignment to a variable.)
7128 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
7129 }
7130 else
7131 {
7132 // Is the definition within the loop? If so, is not loop-invariant.
7133 unsigned lclNum = funcApp.m_args[0];
7134 unsigned ssaNum = funcApp.m_args[1];
7135 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
7136 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
7137 }
7138 }
7139 else if (funcApp.m_func == VNF_PhiMemoryDef)
7140 {
7141 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
7142 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
7143 }
7144 else
7145 {
7146 for (unsigned i = 0; i < funcApp.m_arity; i++)
7147 {
7148 // TODO-CQ: We need to either make sure that *all* VN functions
7149 // always take VN args, or else have a list of arg positions to exempt, as implicitly
7150 // constant.
7151 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
7152 {
7153 res = false;
7154 break;
7155 }
7156 }
7157 }
7158 }
7159 else
7160 {
7161 // Non-function "new, unique" VN's may be annotated with the loop nest where
7162 // their definition occurs.
7163 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
7164
7165 if (vnLoopNum == MAX_LOOP_NUM)
7166 {
7167 res = false;
7168 }
7169 else
7170 {
7171 res = !optLoopContains(lnum, vnLoopNum);
7172 }
7173 }
7174
7175 loopVnInvariantCache->Set(vn, res);
7176 return res;
7177}
7178
7179bool Compiler::optTreeIsValidAtLoopHead(GenTree* tree, unsigned lnum)
7180{
7181 if (tree->OperIsLocal())
7182 {
7183 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
7184 unsigned lclNum = lclVar->gtLclNum;
7185
7186 // The lvlVar must be have an Ssa tracked lifetime
7187 if (!lvaInSsa(lclNum))
7188 {
7189 return false;
7190 }
7191
7192 // If the loop does not contains the SSA def we can hoist it.
7193 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
7194 {
7195 return true;
7196 }
7197 }
7198 else if (tree->OperIsConst())
7199 {
7200 return true;
7201 }
7202 else // If every one of the children nodes are valid at this Loop's Head.
7203 {
7204 unsigned nChildren = tree->NumChildren();
7205 for (unsigned childNum = 0; childNum < nChildren; childNum++)
7206 {
7207 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
7208 {
7209 return false;
7210 }
7211 }
7212 return true;
7213 }
7214 return false;
7215}
7216
7217/*****************************************************************************
7218 *
7219 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
7220 * header. The pre-header will replace the current lpHead in the loop table.
7221 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
7222 * will also be dominated by the loop-top, lpHead->bbNext.
7223 *
7224 */
7225
7226void Compiler::fgCreateLoopPreHeader(unsigned lnum)
7227{
7228 LoopDsc* pLoopDsc = &optLoopTable[lnum];
7229
7230 /* This loop has to be a "do-while" loop */
7231
7232 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
7233
7234 /* Have we already created a loop-preheader block? */
7235
7236 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
7237 {
7238 return;
7239 }
7240
7241 BasicBlock* head = pLoopDsc->lpHead;
7242 BasicBlock* top = pLoopDsc->lpTop;
7243 BasicBlock* entry = pLoopDsc->lpEntry;
7244
7245 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
7246 if (!BasicBlock::sameTryRegion(head, entry))
7247 {
7248 return;
7249 }
7250
7251 // Ensure that lpHead always dominates lpEntry
7252
7253 noway_assert(fgDominate(head, entry));
7254
7255 /* Get hold of the first block of the loop body */
7256
7257 assert(top == entry);
7258
7259 /* Allocate a new basic block */
7260
7261 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
7262 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
7263
7264 // Must set IL code offset
7265 preHead->bbCodeOffs = top->bbCodeOffs;
7266
7267 // Set the default value of the preHead weight in case we don't have
7268 // valid profile data and since this blocks weight is just an estimate
7269 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
7270 //
7271 preHead->inheritWeight(head);
7272 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
7273
7274#ifdef DEBUG
7275 if (verbose)
7276 {
7277 printf("\nCreated PreHeader (" FMT_BB ") for loop L%02u (" FMT_BB " - " FMT_BB "), with weight = %s\n",
7278 preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
7279 }
7280#endif
7281
7282 // The preheader block is part of the containing loop (if any).
7283 preHead->bbNatLoopNum = pLoopDsc->lpParent;
7284
7285 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
7286 {
7287 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
7288 {
7289 preHead->bbWeight = 0;
7290 preHead->bbFlags |= BBF_RUN_RARELY;
7291 }
7292 else
7293 {
7294 bool allValidProfileWeights =
7295 (head->hasProfileWeight() && head->bbJumpDest->hasProfileWeight() && head->bbNext->hasProfileWeight());
7296
7297 if (allValidProfileWeights)
7298 {
7299 double loopEnteredCount;
7300 double loopSkippedCount;
7301
7302 if (fgHaveValidEdgeWeights)
7303 {
7304 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
7305 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
7306 noway_assert(edgeToNext != nullptr);
7307 noway_assert(edgeToJump != nullptr);
7308
7309 loopEnteredCount =
7310 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
7311 loopSkippedCount =
7312 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
7313 }
7314 else
7315 {
7316 loopEnteredCount = (double)head->bbNext->bbWeight;
7317 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
7318 }
7319
7320 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
7321
7322 // Calculate a good approximation of the preHead's block weight
7323 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
7324 preHead->setBBWeight(max(preHeadWeight, 1));
7325 noway_assert(!preHead->isRunRarely());
7326 }
7327 }
7328 }
7329
7330 // Link in the preHead block.
7331 fgInsertBBbefore(top, preHead);
7332
7333 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
7334 // However, that is too expensive at this point. Instead, we update the phi
7335 // node block references, if we created pre-header block due to hoisting.
7336 // This is sufficient because any definition participating in SSA that flowed
7337 // into the phi via the loop header block will now flow through the preheader
7338 // block from the header block.
7339
7340 for (GenTree* stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
7341 {
7342 GenTree* tree = stmt->gtStmt.gtStmtExpr;
7343 if (tree->OperGet() != GT_ASG)
7344 {
7345 break;
7346 }
7347 GenTree* op2 = tree->gtGetOp2();
7348 if (op2->OperGet() != GT_PHI)
7349 {
7350 break;
7351 }
7352 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
7353 while (args != nullptr)
7354 {
7355 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
7356 if (phiArg->gtPredBB == head)
7357 {
7358 phiArg->gtPredBB = preHead;
7359 }
7360 args = args->Rest();
7361 }
7362 }
7363
7364 // The handler can't begin at the top of the loop. If it did, it would be incorrect
7365 // to set the handler index on the pre header without updating the exception table.
7366 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
7367
7368 // Update the EH table to make the hoisted block part of the loop's EH block.
7369 fgExtendEHRegionBefore(top);
7370
7371 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
7372 // (e.g: hoisting expression in a loop with the same 'head' as this one)
7373
7374 /* Update the loop entry */
7375
7376 pLoopDsc->lpHead = preHead;
7377 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
7378
7379 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
7380 All predecessors of 'beg', (which is the entry in the loop)
7381 now have to jump to 'preHead', unless they are dominated by 'head' */
7382
7383 preHead->bbRefs = 0;
7384 fgAddRefPred(preHead, head);
7385 bool checkNestedLoops = false;
7386
7387 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
7388 {
7389 BasicBlock* predBlock = pred->flBlock;
7390
7391 if (fgDominate(top, predBlock))
7392 {
7393 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
7394 // (we know that 'head' dominates 'top'), but using 'top' instead of
7395 // 'head' in the test allows us to not enter here if 'predBlock == head'
7396
7397 if (predBlock != pLoopDsc->lpBottom)
7398 {
7399 noway_assert(predBlock != head);
7400 checkNestedLoops = true;
7401 }
7402 continue;
7403 }
7404
7405 switch (predBlock->bbJumpKind)
7406 {
7407 case BBJ_NONE:
7408 noway_assert(predBlock == head);
7409 break;
7410
7411 case BBJ_COND:
7412 if (predBlock == head)
7413 {
7414 noway_assert(predBlock->bbJumpDest != top);
7415 break;
7416 }
7417 __fallthrough;
7418
7419 case BBJ_ALWAYS:
7420 case BBJ_EHCATCHRET:
7421 noway_assert(predBlock->bbJumpDest == top);
7422 predBlock->bbJumpDest = preHead;
7423 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
7424
7425 if (predBlock == head)
7426 {
7427 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
7428 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
7429 // Just break, pred will be removed after switch.
7430 }
7431 else
7432 {
7433 fgRemoveRefPred(top, predBlock);
7434 fgAddRefPred(preHead, predBlock);
7435 }
7436 break;
7437
7438 case BBJ_SWITCH:
7439 unsigned jumpCnt;
7440 jumpCnt = predBlock->bbJumpSwt->bbsCount;
7441 BasicBlock** jumpTab;
7442 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
7443
7444 do
7445 {
7446 assert(*jumpTab);
7447 if ((*jumpTab) == top)
7448 {
7449 (*jumpTab) = preHead;
7450
7451 fgRemoveRefPred(top, predBlock);
7452 fgAddRefPred(preHead, predBlock);
7453 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
7454 }
7455 } while (++jumpTab, --jumpCnt);
7456
7457 default:
7458 noway_assert(!"Unexpected bbJumpKind");
7459 break;
7460 }
7461 }
7462
7463 noway_assert(!fgGetPredForBlock(top, preHead));
7464 fgRemoveRefPred(top, head);
7465 fgAddRefPred(top, preHead);
7466
7467 /*
7468 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
7469 (other than the back-edge of the loop we are considering) then we likely have nested
7470 do-while loops with the same entry block and inserting the preheader block changes the head
7471 of all the nested loops. Now we will update this piece of information in the loop table, and
7472 mark all nested loops as having a preheader (the preheader block can be shared among all nested
7473 do-while loops with the same entry block).
7474 */
7475 if (checkNestedLoops)
7476 {
7477 for (unsigned l = 0; l < optLoopCount; l++)
7478 {
7479 if (optLoopTable[l].lpHead == head)
7480 {
7481 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
7482 noway_assert(optLoopTable[l].lpEntry == top);
7483 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
7484 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
7485#ifdef DEBUG
7486 if (verbose)
7487 {
7488 printf("Same PreHeader (" FMT_BB ") can be used for loop L%02u (" FMT_BB " - " FMT_BB ")\n\n",
7489 preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
7490 }
7491#endif
7492 }
7493 }
7494 }
7495}
7496
7497bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
7498{
7499 for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
7500 {
7501 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7502 {
7503 continue;
7504 }
7505 if (optLoopTable[lnum].lpEntry == blk)
7506 {
7507 *pLnum = lnum;
7508 return true;
7509 }
7510 }
7511 return false;
7512}
7513
7514void Compiler::optComputeLoopSideEffects()
7515{
7516 unsigned lnum;
7517 for (lnum = 0; lnum < optLoopCount; lnum++)
7518 {
7519 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
7520 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
7521 optLoopTable[lnum].lpContainsCall = false;
7522 }
7523
7524 for (lnum = 0; lnum < optLoopCount; lnum++)
7525 {
7526 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7527 {
7528 continue;
7529 }
7530
7531 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
7532 { // Is outermost...
7533 optComputeLoopNestSideEffects(lnum);
7534 }
7535 }
7536
7537 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
7538#ifndef _TARGET_64BIT_
7539 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
7540#endif
7541
7542 for (unsigned i = 0; i < lvaCount; i++)
7543 {
7544 LclVarDsc* varDsc = &lvaTable[i];
7545 if (varDsc->lvTracked)
7546 {
7547 if (varTypeIsFloating(varDsc->lvType))
7548 {
7549 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
7550 }
7551#ifndef _TARGET_64BIT_
7552 else if (varTypeIsLong(varDsc->lvType))
7553 {
7554 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
7555 }
7556#endif
7557 }
7558 }
7559}
7560
7561void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
7562{
7563 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
7564 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
7565 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
7566 {
7567 optComputeLoopSideEffectsOfBlock(bbInLoop);
7568 }
7569}
7570
7571void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
7572{
7573 unsigned mostNestedLoop = blk->bbNatLoopNum;
7574 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
7575
7576 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
7577
7578 // MemoryKinds for which an in-loop call or store has arbitrary effects.
7579 MemoryKindSet memoryHavoc = emptyMemoryKindSet;
7580
7581 // Now iterate over the remaining statements, and their trees.
7582 for (GenTree* stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
7583 {
7584 for (GenTree* tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
7585 {
7586 genTreeOps oper = tree->OperGet();
7587
7588 // Even after we set memoryHavoc we still may want to know if a loop contains calls
7589 if (memoryHavoc == fullMemoryKindSet)
7590 {
7591 if (oper == GT_CALL)
7592 {
7593 // Record that this loop contains a call
7594 AddContainsCallAllContainingLoops(mostNestedLoop);
7595 }
7596
7597 // If we just set lpContainsCall or it was previously set
7598 if (optLoopTable[mostNestedLoop].lpContainsCall)
7599 {
7600 // We can early exit after both memoryHavoc and lpContainsCall are both set to true.
7601 break;
7602 }
7603
7604 // We are just looking for GT_CALL nodes after memoryHavoc was set.
7605 continue;
7606 }
7607
7608 // otherwise memoryHavoc is not set for at least one heap ID
7609 assert(memoryHavoc != fullMemoryKindSet);
7610
7611 // This body is a distillation of the memory side-effect code of value numbering.
7612 // We also do a very limited analysis if byref PtrTo values, to cover some cases
7613 // that the compiler creates.
7614
7615 if (oper == GT_ASG)
7616 {
7617 GenTree* lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
7618
7619 if (lhs->OperGet() == GT_IND)
7620 {
7621 GenTree* arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
7622 FieldSeqNode* fldSeqArrElem = nullptr;
7623
7624 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
7625 {
7626 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7627 continue;
7628 }
7629
7630 ArrayInfo arrInfo;
7631
7632 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
7633 {
7634 // If it's a local byref for which we recorded a value number, use that...
7635 GenTreeLclVar* argLcl = arg->AsLclVar();
7636 if (lvaInSsa(argLcl->GetLclNum()))
7637 {
7638 ValueNum argVN =
7639 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
7640 VNFuncApp funcApp;
7641 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
7642 funcApp.m_func == VNF_PtrToArrElem)
7643 {
7644 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
7645 CORINFO_CLASS_HANDLE elemType =
7646 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
7647 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
7648 // Don't set memoryHavoc for GcHeap below. Do set memoryHavoc for ByrefExposed
7649 // (conservatively assuming that a byref may alias the array element)
7650 memoryHavoc |= memoryKindSet(ByrefExposed);
7651 continue;
7652 }
7653 }
7654 // Otherwise...
7655 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7656 }
7657 // Is the LHS an array index expression?
7658 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
7659 {
7660 // We actually ignore "fldSeq" -- any modification to an S[], at any
7661 // field of "S", will lose all information about the array type.
7662 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7663 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
7664 // Conservatively assume byrefs may alias this array element
7665 memoryHavoc |= memoryKindSet(ByrefExposed);
7666 }
7667 else
7668 {
7669 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
7670 //
7671 GenTree* obj = nullptr; // unused
7672 GenTree* staticOffset = nullptr; // unused
7673 FieldSeqNode* fldSeq = nullptr;
7674
7675 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
7676 (fldSeq != FieldSeqStore::NotAField()))
7677 {
7678 // Get the first (object) field from field seq. GcHeap[field] will yield the "field map".
7679 assert(fldSeq != nullptr);
7680 if (fldSeq->IsFirstElemFieldSeq())
7681 {
7682 fldSeq = fldSeq->m_next;
7683 assert(fldSeq != nullptr);
7684 }
7685
7686 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
7687 // Conservatively assume byrefs may alias this object.
7688 memoryHavoc |= memoryKindSet(ByrefExposed);
7689 }
7690 else
7691 {
7692 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7693 }
7694 }
7695 }
7696 else if (lhs->OperIsBlk())
7697 {
7698 GenTreeLclVarCommon* lclVarTree;
7699 bool isEntire;
7700 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
7701 {
7702 // For now, assume arbitrary side effects on GcHeap/ByrefExposed...
7703 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7704 }
7705 else if (lvaVarAddrExposed(lclVarTree->gtLclNum))
7706 {
7707 memoryHavoc |= memoryKindSet(ByrefExposed);
7708 }
7709 }
7710 else if (lhs->OperGet() == GT_CLS_VAR)
7711 {
7712 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
7713 // Conservatively assume byrefs may alias this static field
7714 memoryHavoc |= memoryKindSet(ByrefExposed);
7715 }
7716 // Otherwise, must be local lhs form. I should assert that.
7717 else if (lhs->OperGet() == GT_LCL_VAR)
7718 {
7719 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
7720 GenTree* rhs = tree->gtOp.gtOp2;
7721 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
7722 // If we gave the RHS a value number, propagate it.
7723 if (rhsVN != ValueNumStore::NoVN)
7724 {
7725 rhsVN = vnStore->VNNormalValue(rhsVN);
7726 if (lvaInSsa(lhsLcl->GetLclNum()))
7727 {
7728 lvaTable[lhsLcl->GetLclNum()]
7729 .GetPerSsaData(lhsLcl->GetSsaNum())
7730 ->m_vnPair.SetLiberal(rhsVN);
7731 }
7732 }
7733 // If the local is address-exposed, count this as ByrefExposed havoc
7734 if (lvaVarAddrExposed(lhsLcl->gtLclNum))
7735 {
7736 memoryHavoc |= memoryKindSet(ByrefExposed);
7737 }
7738 }
7739 }
7740 else // if (oper != GT_ASG)
7741 {
7742 switch (oper)
7743 {
7744 case GT_COMMA:
7745 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7746 break;
7747
7748 case GT_ADDR:
7749 // Is it an addr of a array index expression?
7750 {
7751 GenTree* addrArg = tree->gtOp.gtOp1;
7752 if (addrArg->OperGet() == GT_IND)
7753 {
7754 // Is the LHS an array index expression?
7755 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7756 {
7757 ArrayInfo arrInfo;
7758 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7759 assert(b);
7760 CORINFO_CLASS_HANDLE elemTypeEq =
7761 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7762 ValueNum elemTypeEqVN =
7763 vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
7764 ValueNum ptrToArrElemVN =
7765 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, elemTypeEqVN,
7766 // The rest are dummy arguments.
7767 vnStore->VNForNull(), vnStore->VNForNull(),
7768 vnStore->VNForNull());
7769 tree->gtVNPair.SetBoth(ptrToArrElemVN);
7770 }
7771 }
7772 }
7773 break;
7774
7775 case GT_LOCKADD: // Binop
7776 case GT_XADD: // Binop
7777 case GT_XCHG: // Binop
7778 case GT_CMPXCHG: // Specialop
7779 {
7780 assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering");
7781 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7782 }
7783 break;
7784
7785 case GT_CALL:
7786 {
7787 GenTreeCall* call = tree->AsCall();
7788
7789 // Record that this loop contains a call
7790 AddContainsCallAllContainingLoops(mostNestedLoop);
7791
7792 if (call->gtCallType == CT_HELPER)
7793 {
7794 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7795 if (s_helperCallProperties.MutatesHeap(helpFunc))
7796 {
7797 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7798 }
7799 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7800 {
7801 // If the call is labeled as "Hoistable", then we've checked the
7802 // class that would be constructed, and it is not precise-init, so
7803 // the cctor will not be run by this call. Otherwise, it might be,
7804 // and might have arbitrary side effects.
7805 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7806 {
7807 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7808 }
7809 }
7810 }
7811 else
7812 {
7813 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7814 }
7815 break;
7816 }
7817
7818 default:
7819 // All other gtOper node kinds, leave 'memoryHavoc' unchanged (i.e. false)
7820 break;
7821 }
7822 }
7823 }
7824 }
7825
7826 if (memoryHavoc != emptyMemoryKindSet)
7827 {
7828 // Record that all loops containing this block have memory havoc effects.
7829 unsigned lnum = mostNestedLoop;
7830 while (lnum != BasicBlock::NOT_IN_LOOP)
7831 {
7832 for (MemoryKind memoryKind : allMemoryKinds())
7833 {
7834 if ((memoryHavoc & memoryKindSet(memoryKind)) != 0)
7835 {
7836 optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true;
7837 }
7838 }
7839 lnum = optLoopTable[lnum].lpParent;
7840 }
7841 }
7842}
7843
7844// Marks the containsCall information to "lnum" and any parent loops.
7845void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7846{
7847 assert(0 <= lnum && lnum < optLoopCount);
7848 while (lnum != BasicBlock::NOT_IN_LOOP)
7849 {
7850 optLoopTable[lnum].lpContainsCall = true;
7851 lnum = optLoopTable[lnum].lpParent;
7852 }
7853}
7854
7855// Adds the variable liveness information for 'blk' to 'this' LoopDsc
7856void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7857{
7858 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7859 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7860
7861 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7862 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7863}
7864
7865// Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7866void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7867{
7868 assert(0 <= lnum && lnum < optLoopCount);
7869 while (lnum != BasicBlock::NOT_IN_LOOP)
7870 {
7871 optLoopTable[lnum].AddVariableLiveness(this, blk);
7872 lnum = optLoopTable[lnum].lpParent;
7873 }
7874}
7875
7876// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7877void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7878{
7879 assert(0 <= lnum && lnum < optLoopCount);
7880 while (lnum != BasicBlock::NOT_IN_LOOP)
7881 {
7882 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7883 lnum = optLoopTable[lnum].lpParent;
7884 }
7885}
7886
7887// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7888void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7889{
7890 assert(0 <= lnum && lnum < optLoopCount);
7891 while (lnum != BasicBlock::NOT_IN_LOOP)
7892 {
7893 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7894 lnum = optLoopTable[lnum].lpParent;
7895 }
7896}
7897
7898/*****************************************************************************
7899 *
7900 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7901 * The 'keepList'is either a single tree or a list of trees that are formed by
7902 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7903 * gtExtractSideEffList method.
7904 */
7905
7906/* static */
7907Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTree** pTree, fgWalkData* data)
7908{
7909 GenTree* tree = *pTree;
7910 Compiler* comp = data->compiler;
7911 GenTree* keepList = (GenTree*)(data->pCallbackData);
7912
7913 // We may have a non-NULL side effect list that is being kept
7914 //
7915 if (keepList)
7916 {
7917 GenTree* keptTree = keepList;
7918 while (keptTree->OperGet() == GT_COMMA)
7919 {
7920 assert(keptTree->OperKind() & GTK_SMPOP);
7921 GenTree* op1 = keptTree->gtOp.gtOp1;
7922 GenTree* op2 = keptTree->gtGetOp2();
7923
7924 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7925 // that is being kept because it contains some side-effect
7926 //
7927 if (tree == op1)
7928 {
7929 // This tree and all of its sub trees are being kept.
7930 return WALK_SKIP_SUBTREES;
7931 }
7932
7933 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7934 // which can again be another GT_COMMA or the final side-effect part
7935 //
7936 keptTree = op2;
7937 }
7938 if (tree == keptTree)
7939 {
7940 // This tree and all of its sub trees are being kept.
7941 return WALK_SKIP_SUBTREES;
7942 }
7943 }
7944
7945 return WALK_CONTINUE;
7946}
7947
7948/*****************************************************************************
7949 *
7950 * Routine called to decrement the LclVar ref counts when removing a tree
7951 * during the remove RangeCheck phase.
7952 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7953 * unless the node is found in the 'keepList' (which are saved side effects)
7954 * The keepList is communicated using the walkData.pCallbackData field
7955 * Also the compCurBB must be set to the current BasicBlock which contains
7956 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7957 */
7958
7959void Compiler::optRemoveTree(GenTree* deadTree, GenTree* keepList)
7960{
7961 // We communicate this value using the walkData.pCallbackData field
7962 //
7963 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7964}
7965
7966//------------------------------------------------------------------------------
7967// optRemoveRangeCheck : Given an array index node, mark it as not needing a range check.
7968//
7969// Arguments:
7970// tree - Range check tree
7971// stmt - Statement the tree belongs to
7972
7973void Compiler::optRemoveRangeCheck(GenTree* tree, GenTree* stmt)
7974{
7975#if !REARRANGE_ADDS
7976 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7977#endif
7978
7979 noway_assert(stmt->gtOper == GT_STMT);
7980 noway_assert(tree->gtOper == GT_COMMA);
7981
7982 GenTree* bndsChkTree = tree->gtOp.gtOp1;
7983
7984 noway_assert(bndsChkTree->OperIsBoundsCheck());
7985
7986 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7987
7988#ifdef DEBUG
7989 if (verbose)
7990 {
7991 printf("Before optRemoveRangeCheck:\n");
7992 gtDispTree(tree);
7993 }
7994#endif
7995
7996 GenTree* sideEffList = nullptr;
7997
7998 gtExtractSideEffList(bndsChkTree, &sideEffList, GTF_ASG);
7999
8000 // Decrement the ref counts for any LclVars that are being deleted
8001 //
8002 optRemoveTree(bndsChkTree, sideEffList);
8003
8004 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
8005 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
8006 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
8007 tree->gtFlags |= GTF_DONT_CSE;
8008
8009 gtUpdateSideEffects(stmt, tree);
8010
8011 /* Recalculate the gtCostSz, etc... */
8012 gtSetStmtInfo(stmt);
8013
8014 /* Re-thread the nodes if necessary */
8015 if (fgStmtListThreaded)
8016 {
8017 fgSetStmtSeq(stmt);
8018 }
8019
8020#ifdef DEBUG
8021 if (verbose)
8022 {
8023 printf("After optRemoveRangeCheck:\n");
8024 gtDispTree(tree);
8025 }
8026#endif
8027}
8028
8029/*****************************************************************************
8030 * Return the scale in an array reference, given a pointer to the
8031 * multiplication node.
8032 */
8033
8034ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk))
8035{
8036 assert(mul);
8037 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
8038 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
8039
8040 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
8041
8042 if (mul->gtOper == GT_LSH)
8043 {
8044 scale = ((ssize_t)1) << scale;
8045 }
8046
8047 GenTree* index = mul->gtOp.gtOp1;
8048
8049 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
8050 {
8051 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
8052 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
8053 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
8054 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
8055 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
8056 index = index->gtOp.gtOp1;
8057 }
8058
8059 assert(!bRngChk || index->gtOper != GT_COMMA);
8060
8061 if (pIndex)
8062 {
8063 *pIndex = index;
8064 }
8065
8066 return scale;
8067}
8068
8069//------------------------------------------------------------------------------
8070// optObtainLoopCloningOpts: Identify optimization candidates and update
8071// the "context" for array optimizations.
8072//
8073// Arguments:
8074// context - data structure where all loop cloning info is kept. The
8075// optInfo fields of the context are updated with the
8076// identified optimization candidates.
8077//
8078void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
8079{
8080 for (unsigned i = 0; i < optLoopCount; i++)
8081 {
8082 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
8083 if (optIsLoopClonable(i))
8084 {
8085 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
8086 {
8087 optIdentifyLoopOptInfo(i, context);
8088 }
8089 }
8090 JITDUMP("------------------------------------------------------------\n");
8091 }
8092 JITDUMP("\n");
8093}
8094
8095//------------------------------------------------------------------------
8096// optIdentifyLoopOptInfo: Identify loop optimization candidates an also
8097// check if the loop is suitable for the optimizations performed.
8098//
8099// Arguments:
8100// loopNum - the current loop index for which conditions are derived.
8101// context - data structure where all loop cloning candidates will be
8102// updated.
8103//
8104// Return Value:
8105// If the loop is not suitable for the optimizations, return false - context
8106// should not contain any optimization candidate for the loop if false.
8107// Else return true.
8108//
8109// Operation:
8110// Check if the loop is well formed for this optimization and identify the
8111// optimization candidates and update the "context" parameter with all the
8112// contextual information necessary to perform the optimization later.
8113//
8114bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
8115{
8116 noway_assert(loopNum < optLoopCount);
8117
8118 LoopDsc* pLoop = &optLoopTable[loopNum];
8119
8120 if (!(pLoop->lpFlags & LPFLG_ITER))
8121 {
8122 JITDUMP("> No iter flag on loop %d.\n", loopNum);
8123 return false;
8124 }
8125
8126 unsigned ivLclNum = pLoop->lpIterVar();
8127 if (lvaVarAddrExposed(ivLclNum))
8128 {
8129 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
8130 return false;
8131 }
8132
8133 BasicBlock* head = pLoop->lpHead;
8134 BasicBlock* end = pLoop->lpBottom;
8135 BasicBlock* beg = head->bbNext;
8136
8137 if (end->bbJumpKind != BBJ_COND)
8138 {
8139 JITDUMP("> Couldn't find termination test.\n");
8140 return false;
8141 }
8142
8143 if (end->bbJumpDest != beg)
8144 {
8145 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
8146 return false;
8147 }
8148
8149 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
8150 if ((pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
8151 {
8152 JITDUMP("> Loop iteration operator not matching\n");
8153 return false;
8154 }
8155
8156 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
8157 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
8158 {
8159 JITDUMP("> Loop limit is neither constant, variable or array length\n");
8160 return false;
8161 }
8162
8163 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD)) ||
8164 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB))))
8165 {
8166 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
8167 GenTree::OpName(pLoop->lpTestOper()), GenTree::OpName(pLoop->lpIterOper()));
8168 return false;
8169 }
8170
8171 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
8172 {
8173 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
8174 pLoop->lpTestTree->gtTreeID);
8175 return false;
8176 }
8177
8178#ifdef DEBUG
8179 GenTree* op1 = pLoop->lpIterator();
8180 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
8181#endif
8182
8183 JITDUMP("Checking blocks " FMT_BB ".." FMT_BB " for optimization candidates\n", beg->bbNum,
8184 end->bbNext ? end->bbNext->bbNum : 0);
8185
8186 LoopCloneVisitorInfo info(context, loopNum, nullptr);
8187 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
8188 {
8189 compCurBB = block;
8190 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
8191 {
8192 info.stmt = stmt;
8193 const bool lclVarsOnly = false;
8194 const bool computeStack = false;
8195 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly,
8196 computeStack);
8197 }
8198 }
8199
8200 return true;
8201}
8202
8203//---------------------------------------------------------------------------------------------------------------
8204// optExtractArrIndex: Try to extract the array index from "tree".
8205//
8206// Arguments:
8207// tree the tree to be checked if it is the array [] operation.
8208// result the extracted GT_INDEX information is updated in result.
8209// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
8210//
8211// Return Value:
8212// Returns true if array index can be extracted, else, return false. See assumption about
8213// what will be extracted. The "result" variable's rank parameter is advanced for every
8214// dimension of [] encountered.
8215//
8216// Operation:
8217// Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
8218// we have converted a GT_INDEX tree into a scaled index base offset expression. We need
8219// to reconstruct this to be able to know if this is an array access.
8220//
8221// Assumption:
8222// The method extracts only if the array base and indices are GT_LCL_VAR.
8223//
8224// TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
8225//
8226// [000024] ------------ * STMT void(IL 0x007...0x00C)
8227// [000021] a--XG+------ | /--* IND int
8228// [000045] -----+------ | | | /--* CNS_INT long 16 Fseq[#FirstElem]
8229// [000046] -----+------ | | | /--* ADD long
8230// [000043] -----+-N---- | | | | | /--* CNS_INT long 2
8231// [000044] -----+------ | | | | \--* LSH long
8232// [000042] -----+------ | | | | \--* CAST long < -int
8233// [000039] i----+------ | | | | \--* LCL_VAR int V04 loc0
8234// [000047] -----+------ | | \--* ADD byref
8235// [000038] -----+------ | | \--* LCL_VAR ref V00 arg0
8236// [000048] ---XG+------ | /--* COMMA int
8237// [000041] ---X-+------ | | \--* ARR_BOUNDS_CHECK_Rng void
8238// [000020] -----+------ | | +--* LCL_VAR int V04 loc0
8239// [000040] ---X-+------ | | \--* ARR_LENGTH int
8240// [000019] -----+------ | | \--* LCL_VAR ref V00 arg0
8241// [000023] -A-XG+------ \--* ASG int
8242// [000022] D----+-N---- \--* LCL_VAR int V06 tmp1
8243
8244bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8245{
8246 if (tree->gtOper != GT_COMMA)
8247 {
8248 return false;
8249 }
8250 GenTree* before = tree->gtGetOp1();
8251 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
8252 {
8253 return false;
8254 }
8255 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
8256 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
8257 {
8258 return false;
8259 }
8260
8261 // For span we may see gtArrLen is a local var or local field or constant.
8262 // We won't try and extract those.
8263 if (arrBndsChk->gtArrLen->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_CNS_INT))
8264 {
8265 return false;
8266 }
8267 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
8268 {
8269 return false;
8270 }
8271 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
8272 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
8273 {
8274 return false;
8275 }
8276
8277 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
8278
8279 GenTree* after = tree->gtGetOp2();
8280
8281 if (after->gtOper != GT_IND)
8282 {
8283 return false;
8284 }
8285 // It used to be the case that arrBndsChks for struct types would fail the previous check because
8286 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
8287 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
8288 // that we were not previously cloning.)
8289 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
8290 // types.
8291 if (varTypeIsStruct(after))
8292 {
8293 return false;
8294 }
8295
8296 GenTree* sibo = after->gtGetOp1(); // sibo = scale*index + base + offset
8297 if (sibo->gtOper != GT_ADD)
8298 {
8299 return false;
8300 }
8301 GenTree* base = sibo->gtGetOp1();
8302 GenTree* sio = sibo->gtGetOp2(); // sio == scale*index + offset
8303 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
8304 {
8305 return false;
8306 }
8307 if (sio->gtOper != GT_ADD)
8308 {
8309 return false;
8310 }
8311 GenTree* ofs = sio->gtGetOp2();
8312 GenTree* si = sio->gtGetOp1(); // si = scale*index
8313 if (ofs->gtOper != GT_CNS_INT)
8314 {
8315 return false;
8316 }
8317 if (si->gtOper != GT_LSH)
8318 {
8319 return false;
8320 }
8321 GenTree* scale = si->gtGetOp2();
8322 GenTree* index = si->gtGetOp1();
8323 if (scale->gtOper != GT_CNS_INT)
8324 {
8325 return false;
8326 }
8327#ifdef _TARGET_64BIT_
8328 if (index->gtOper != GT_CAST)
8329 {
8330 return false;
8331 }
8332 GenTree* indexVar = index->gtGetOp1();
8333#else
8334 GenTree* indexVar = index;
8335#endif
8336 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
8337 {
8338 return false;
8339 }
8340 if (lhsNum == BAD_VAR_NUM)
8341 {
8342 result->arrLcl = arrLcl;
8343 }
8344 result->indLcls.Push(indLcl);
8345 result->bndsChks.Push(tree);
8346 result->useBlock = compCurBB;
8347 result->rank++;
8348
8349 return true;
8350}
8351
8352//---------------------------------------------------------------------------------------------------------------
8353// optReconstructArrIndex: Reconstruct array index.
8354//
8355// Arguments:
8356// tree the tree to be checked if it is an array [][][] operation.
8357// result the extracted GT_INDEX information.
8358// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
8359//
8360// Return Value:
8361// Returns true if array index can be extracted, else, return false. "rank" field in
8362// "result" contains the array access depth. The "indLcls" fields contain the indices.
8363//
8364// Operation:
8365// Recursively look for a list of array indices. In the example below, we encounter,
8366// V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
8367// The return value would then be:
8368// ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
8369//
8370// V00[V01][V02] would be morphed as:
8371//
8372// [000000001B366848] ---XG------- indir int
8373// [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
8374// [000000001B36C200] ---XG------- comma int
8375// [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
8376// [000000001B36C278] -A-XG------- comma int
8377// [000000001B366730] R--XG------- indir ref
8378// [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
8379// [000000001B36C818] ---XG------- comma ref
8380// [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
8381// [000000001B36BB60] -A-XG------- = ref
8382// [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
8383// [000000001B36A668] -A-XG------- = int
8384// [000000001B36A5F0] D------N---- lclVar int V03 tmp0
8385//
8386// Assumption:
8387// The method extracts only if the array base and indices are GT_LCL_VAR.
8388//
8389bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8390{
8391 // If we can extract "tree" (which is a top level comma) return.
8392 if (optExtractArrIndex(tree, result, lhsNum))
8393 {
8394 return true;
8395 }
8396 // We have a comma (check if array base expr is computed in "before"), descend further.
8397 else if (tree->OperGet() == GT_COMMA)
8398 {
8399 GenTree* before = tree->gtGetOp1();
8400 // "before" should evaluate an array base for the "after" indexing.
8401 if (before->OperGet() != GT_ASG)
8402 {
8403 return false;
8404 }
8405 GenTree* lhs = before->gtGetOp1();
8406 GenTree* rhs = before->gtGetOp2();
8407
8408 // "rhs" should contain an GT_INDEX
8409 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
8410 {
8411 return false;
8412 }
8413 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
8414 GenTree* after = tree->gtGetOp2();
8415 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
8416 return optExtractArrIndex(after, result, lhsNum);
8417 }
8418 return false;
8419}
8420
8421/* static */
8422Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pTree, Compiler::fgWalkData* data)
8423{
8424 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
8425}
8426
8427//-------------------------------------------------------------------------
8428// optIsStackLocalInvariant: Is stack local invariant in loop.
8429//
8430// Arguments:
8431// loopNum The loop in which the variable is tested for invariance.
8432// lclNum The local that is tested for invariance in the loop.
8433//
8434// Return Value:
8435// Returns true if the variable is loop invariant in loopNum.
8436//
8437bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
8438{
8439 if (lvaVarAddrExposed(lclNum))
8440 {
8441 return false;
8442 }
8443 if (optIsVarAssgLoop(loopNum, lclNum))
8444 {
8445 return false;
8446 }
8447 return true;
8448}
8449
8450//----------------------------------------------------------------------------------------------
8451// optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
8452// identify as potential candidate and update the loop context.
8453//
8454// Arguments:
8455// tree The tree encountered during the tree walk.
8456// info Supplies information about the current block or stmt in which the tree is.
8457// Also supplies the "context" pointer for updating with loop cloning
8458// candidates. Also supplies loopNum.
8459//
8460// Operation:
8461// If array index can be reconstructed, check if the iter var of the loop matches the
8462// array index var in some dim. Also ensure other index vars before the identified
8463// dim are loop invariant.
8464//
8465// Return Value:
8466// Skip sub trees if the optimization candidate is identified or else continue walking
8467//
8468Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info)
8469{
8470 ArrIndex arrIndex(getAllocator());
8471
8472 // Check if array index can be optimized.
8473 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
8474 {
8475 assert(tree->gtOper == GT_COMMA);
8476#ifdef DEBUG
8477 if (verbose)
8478 {
8479 JITDUMP("Found ArrIndex at tree ");
8480 printTreeID(tree);
8481 printf(" which is equivalent to: ");
8482 arrIndex.Print();
8483 JITDUMP("\n");
8484 }
8485#endif
8486 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
8487 {
8488 return WALK_SKIP_SUBTREES;
8489 }
8490
8491 // Walk the dimensions and see if iterVar of the loop is used as index.
8492 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
8493 {
8494 // Is index variable also used as the loop iter var.
8495 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
8496 {
8497 // Check the previous indices are all loop invariant.
8498 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
8499 {
8500 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
8501 {
8502 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
8503 return WALK_SKIP_SUBTREES;
8504 }
8505 }
8506#ifdef DEBUG
8507 if (verbose)
8508 {
8509 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
8510 arrIndex.Print();
8511 JITDUMP(" on dim %d\n", dim);
8512 }
8513#endif
8514 // Update the loop context.
8515 info->context->EnsureLoopOptInfo(info->loopNum)
8516 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
8517 }
8518 else
8519 {
8520 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
8521 dim);
8522 }
8523 }
8524 return WALK_SKIP_SUBTREES;
8525 }
8526 else if (tree->gtOper == GT_ARR_ELEM)
8527 {
8528 // TODO-CQ: CLONE: Implement.
8529 return WALK_SKIP_SUBTREES;
8530 }
8531 return WALK_CONTINUE;
8532}
8533
8534struct optRangeCheckDsc
8535{
8536 Compiler* pCompiler;
8537 bool bValidIndex;
8538};
8539/*
8540 Walk to make sure that only locals and constants are contained in the index
8541 for a range check
8542*/
8543Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTree** pTree, fgWalkData* data)
8544{
8545 GenTree* tree = *pTree;
8546 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
8547
8548 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
8549 {
8550 pData->bValidIndex = false;
8551 return WALK_ABORT;
8552 }
8553
8554 if (tree->gtOper == GT_LCL_VAR)
8555 {
8556 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
8557 {
8558 pData->bValidIndex = false;
8559 return WALK_ABORT;
8560 }
8561 }
8562
8563 return WALK_CONTINUE;
8564}
8565
8566/*
8567 returns true if a range check can legally be removed (for the moment it checks
8568 that the array is a local array (non subject to racing conditions) and that the
8569 index is either a constant or a local
8570*/
8571bool Compiler::optIsRangeCheckRemovable(GenTree* tree)
8572{
8573 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
8574 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
8575 GenTree* pArray = bndsChk->GetArray();
8576 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
8577 {
8578 return false;
8579 }
8580 GenTree* pIndex = bndsChk->gtIndex;
8581
8582 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8583 // Otherwise we can be targeted by malicious race-conditions.
8584 if (pArray != nullptr)
8585 {
8586 if (pArray->gtOper != GT_LCL_VAR)
8587 {
8588
8589#ifdef DEBUG
8590 if (verbose)
8591 {
8592 printf("Can't remove range check if the array isn't referenced with a local\n");
8593 gtDispTree(pArray);
8594 }
8595#endif
8596 return false;
8597 }
8598 else
8599 {
8600 noway_assert(pArray->gtType == TYP_REF);
8601 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8602
8603 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8604 {
8605 // If the array address has been taken, don't do the optimization
8606 // (this restriction can be lowered a bit, but i don't think it's worth it)
8607 CLANG_FORMAT_COMMENT_ANCHOR;
8608#ifdef DEBUG
8609 if (verbose)
8610 {
8611 printf("Can't remove range check if the array has its address taken\n");
8612 gtDispTree(pArray);
8613 }
8614#endif
8615 return false;
8616 }
8617 }
8618 }
8619
8620 optRangeCheckDsc Data;
8621 Data.pCompiler = this;
8622 Data.bValidIndex = true;
8623
8624 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8625
8626 if (!Data.bValidIndex)
8627 {
8628#ifdef DEBUG
8629 if (verbose)
8630 {
8631 printf("Can't remove range check with this index");
8632 gtDispTree(pIndex);
8633 }
8634#endif
8635
8636 return false;
8637 }
8638
8639 return true;
8640}
8641
8642/******************************************************************************
8643 *
8644 * Replace x==null with (x|x)==0 if x is a GC-type.
8645 * This will stress code-gen and the emitter to make sure they support such trees.
8646 */
8647
8648#ifdef DEBUG
8649
8650void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8651{
8652 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8653 {
8654 return;
8655 }
8656
8657 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8658 GenTree* condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8659
8660 noway_assert(condStmt->gtOper == GT_JTRUE);
8661
8662 bool isBool;
8663 GenTree* relop;
8664
8665 GenTree* comparand = optIsBoolCond(condStmt, &relop, &isBool);
8666
8667 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8668 {
8669 return;
8670 }
8671
8672 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8673 {
8674 return;
8675 }
8676
8677 GenTree* comparandClone = gtCloneExpr(comparand);
8678
8679 noway_assert(relop->gtOp.gtOp1 == comparand);
8680 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8681 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8682
8683 // Comparand type is already checked, and we have const int, there is no harm
8684 // morphing it into a TYP_I_IMPL.
8685 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8686 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8687}
8688
8689#endif
8690
8691/******************************************************************************
8692 * Function used by folding of boolean conditionals
8693 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8694 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8695 * being a boolean lclVar and "op2" the const 0/1.
8696 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8697 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8698 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8699 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8700 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8701 */
8702
8703GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8704{
8705 bool isBool = false;
8706
8707 noway_assert(condBranch->gtOper == GT_JTRUE);
8708 GenTree* cond = condBranch->gtOp.gtOp1;
8709
8710 /* The condition must be "!= 0" or "== 0" */
8711
8712 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8713 {
8714 return nullptr;
8715 }
8716
8717 /* Return the compare node to the caller */
8718
8719 *compPtr = cond;
8720
8721 /* Get hold of the comparands */
8722
8723 GenTree* opr1 = cond->gtOp.gtOp1;
8724 GenTree* opr2 = cond->gtOp.gtOp2;
8725
8726 if (opr2->gtOper != GT_CNS_INT)
8727 {
8728 return nullptr;
8729 }
8730
8731 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8732 {
8733 return nullptr;
8734 }
8735
8736 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8737
8738 /* Is the value a boolean?
8739 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8740 * a local variable that is marked as being boolean (lvIsBoolean) */
8741
8742 if (opr1->gtFlags & GTF_BOOLEAN)
8743 {
8744 isBool = true;
8745 }
8746 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8747 {
8748 isBool = true;
8749 }
8750 else if (opr1->gtOper == GT_LCL_VAR)
8751 {
8752 /* is it a boolean local variable */
8753
8754 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8755 noway_assert(lclNum < lvaCount);
8756
8757 if (lvaTable[lclNum].lvIsBoolean)
8758 {
8759 isBool = true;
8760 }
8761 }
8762
8763 /* Was our comparison against the constant 1 (i.e. true) */
8764 if (ival2 == 1)
8765 {
8766 // If this is a boolean expression tree we can reverse the relop
8767 // and change the true to false.
8768 if (isBool)
8769 {
8770 gtReverseCond(cond);
8771 opr2->gtIntCon.gtIconVal = 0;
8772 }
8773 else
8774 {
8775 return nullptr;
8776 }
8777 }
8778
8779 *boolPtr = isBool;
8780 return opr1;
8781}
8782
8783void Compiler::optOptimizeBools()
8784{
8785#ifdef DEBUG
8786 if (verbose)
8787 {
8788 printf("*************** In optOptimizeBools()\n");
8789 if (verboseTrees)
8790 {
8791 printf("Blocks/Trees before phase\n");
8792 fgDispBasicBlocks(true);
8793 }
8794 }
8795#endif
8796 bool change;
8797
8798 do
8799 {
8800 change = false;
8801
8802 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8803 {
8804 /* We're only interested in conditional jumps here */
8805
8806 if (b1->bbJumpKind != BBJ_COND)
8807 {
8808 continue;
8809 }
8810
8811 /* If there is no next block, we're done */
8812
8813 BasicBlock* b2 = b1->bbNext;
8814 if (!b2)
8815 {
8816 break;
8817 }
8818
8819 /* The next block must not be marked as BBF_DONT_REMOVE */
8820 if (b2->bbFlags & BBF_DONT_REMOVE)
8821 {
8822 continue;
8823 }
8824
8825 /* The next block also needs to be a condition */
8826
8827 if (b2->bbJumpKind != BBJ_COND)
8828 {
8829#ifdef DEBUG
8830 optOptimizeBoolsGcStress(b1);
8831#endif
8832 continue;
8833 }
8834
8835 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8836
8837 if (b1->bbJumpDest == b2->bbJumpDest)
8838 {
8839 /* Given the following sequence of blocks :
8840 B1: brtrue(t1, BX)
8841 B2: brtrue(t2, BX)
8842 B3:
8843 we will try to fold it to :
8844 B1: brtrue(t1|t2, BX)
8845 B3:
8846 */
8847
8848 sameTarget = true;
8849 }
8850 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8851 {
8852 /* Given the following sequence of blocks :
8853 B1: brtrue(t1, B3)
8854 B2: brtrue(t2, BX)
8855 B3:
8856 we will try to fold it to :
8857 B1: brtrue((!t1)&&t2, BX)
8858 B3:
8859 */
8860
8861 sameTarget = false;
8862 }
8863 else
8864 {
8865 continue;
8866 }
8867
8868 /* The second block must contain a single statement */
8869
8870 GenTree* s2 = b2->bbTreeList;
8871 if (s2->gtPrev != s2)
8872 {
8873 continue;
8874 }
8875
8876 noway_assert(s2->gtOper == GT_STMT);
8877 GenTree* t2 = s2->gtStmt.gtStmtExpr;
8878 noway_assert(t2->gtOper == GT_JTRUE);
8879
8880 /* Find the condition for the first block */
8881
8882 GenTree* s1 = b1->bbTreeList->gtPrev;
8883
8884 noway_assert(s1->gtOper == GT_STMT);
8885 GenTree* t1 = s1->gtStmt.gtStmtExpr;
8886 noway_assert(t1->gtOper == GT_JTRUE);
8887
8888 if (b2->countOfInEdges() > 1)
8889 {
8890 continue;
8891 }
8892
8893 /* Find the branch conditions of b1 and b2 */
8894
8895 bool bool1, bool2;
8896
8897 GenTree* c1 = optIsBoolCond(t1, &t1, &bool1);
8898 if (!c1)
8899 {
8900 continue;
8901 }
8902
8903 GenTree* c2 = optIsBoolCond(t2, &t2, &bool2);
8904 if (!c2)
8905 {
8906 continue;
8907 }
8908
8909 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8910 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8911
8912 // Leave out floats where the bit-representation is more complicated
8913 // - there are two representations for 0.
8914 //
8915 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8916 {
8917 continue;
8918 }
8919
8920 // Make sure the types involved are of the same sizes
8921 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8922 {
8923 continue;
8924 }
8925 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8926 {
8927 continue;
8928 }
8929#ifdef _TARGET_ARMARCH_
8930 // Skip the small operand which we cannot encode.
8931 if (varTypeIsSmall(c1->TypeGet()))
8932 continue;
8933#endif
8934 /* The second condition must not contain side effects */
8935
8936 if (c2->gtFlags & GTF_GLOB_EFFECT)
8937 {
8938 continue;
8939 }
8940
8941 /* The second condition must not be too expensive */
8942
8943 gtPrepareCost(c2);
8944
8945 if (c2->gtCostEx > 12)
8946 {
8947 continue;
8948 }
8949
8950 genTreeOps foldOp;
8951 genTreeOps cmpOp;
8952 var_types foldType = c1->TypeGet();
8953 if (varTypeIsGC(foldType))
8954 {
8955 foldType = TYP_I_IMPL;
8956 }
8957
8958 if (sameTarget)
8959 {
8960 /* Both conditions must be the same */
8961
8962 if (t1->gtOper != t2->gtOper)
8963 {
8964 continue;
8965 }
8966
8967 if (t1->gtOper == GT_EQ)
8968 {
8969 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8970 So we will branch to BX if (c1&c2)==0 */
8971
8972 foldOp = GT_AND;
8973 cmpOp = GT_EQ;
8974 }
8975 else
8976 {
8977 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8978 So we will branch to BX if (c1|c2)!=0 */
8979
8980 foldOp = GT_OR;
8981 cmpOp = GT_NE;
8982 }
8983 }
8984 else
8985 {
8986 /* The b1 condition must be the reverse of the b2 condition */
8987
8988 if (t1->gtOper == t2->gtOper)
8989 {
8990 continue;
8991 }
8992
8993 if (t1->gtOper == GT_EQ)
8994 {
8995 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8996 So we will branch to BX if (c1&c2)!=0 */
8997
8998 foldOp = GT_AND;
8999 cmpOp = GT_NE;
9000 }
9001 else
9002 {
9003 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
9004 So we will branch to BX if (c1|c2)==0 */
9005
9006 foldOp = GT_OR;
9007 cmpOp = GT_EQ;
9008 }
9009 }
9010
9011 // Anding requires both values to be 0 or 1
9012
9013 if ((foldOp == GT_AND) && (!bool1 || !bool2))
9014 {
9015 continue;
9016 }
9017
9018 //
9019 // Now update the trees
9020 //
9021 GenTree* cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
9022 if (bool1 && bool2)
9023 {
9024 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
9025 cmpOp1->gtFlags |= GTF_BOOLEAN;
9026 }
9027
9028 t1->SetOper(cmpOp);
9029 t1->gtOp.gtOp1 = cmpOp1;
9030 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
9031
9032#if FEATURE_SET_FLAGS
9033 // For comparisons against zero we will have the GTF_SET_FLAGS set
9034 // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree)
9035 // during the CSE phase.
9036 //
9037 // So make sure to clear any GTF_SET_FLAGS bit on these operations
9038 // as they are no longer feeding directly into a comparisons against zero
9039
9040 // Make sure that the GTF_SET_FLAGS bit is cleared.
9041 // Fix 388436 ARM JitStress WP7
9042 c1->gtFlags &= ~GTF_SET_FLAGS;
9043 c2->gtFlags &= ~GTF_SET_FLAGS;
9044
9045 // The new top level node that we just created does feed directly into
9046 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
9047 // we generate an instruction that sets the flags, which allows us
9048 // to omit the cmp with zero instruction.
9049
9050 // Request that the codegen for cmpOp1 sets the condition flags
9051 // when it generates the code for cmpOp1.
9052 //
9053 cmpOp1->gtRequestSetFlags();
9054#endif
9055
9056 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
9057 flowList* edge2;
9058
9059 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
9060
9061 if (sameTarget)
9062 {
9063 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
9064 }
9065 else
9066 {
9067 edge2 = fgGetPredForBlock(b2->bbNext, b2);
9068
9069 fgRemoveRefPred(b1->bbJumpDest, b1);
9070
9071 b1->bbJumpDest = b2->bbJumpDest;
9072
9073 fgAddRefPred(b2->bbJumpDest, b1);
9074 }
9075
9076 noway_assert(edge1 != nullptr);
9077 noway_assert(edge2 != nullptr);
9078
9079 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
9080 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
9081 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
9082 {
9083 edge1->flEdgeWeightMin = edgeSumMin;
9084 edge1->flEdgeWeightMax = edgeSumMax;
9085 }
9086 else
9087 {
9088 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
9089 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
9090 }
9091
9092 /* Get rid of the second block (which is a BBJ_COND) */
9093
9094 noway_assert(b1->bbJumpKind == BBJ_COND);
9095 noway_assert(b2->bbJumpKind == BBJ_COND);
9096 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
9097 noway_assert(b1->bbNext == b2);
9098 noway_assert(b2->bbNext);
9099
9100 fgUnlinkBlock(b2);
9101 b2->bbFlags |= BBF_REMOVED;
9102
9103 // If b2 was the last block of a try or handler, update the EH table.
9104
9105 ehUpdateForDeletedBlock(b2);
9106
9107 /* Update bbRefs and bbPreds */
9108
9109 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
9110 * Remove pred 'b2' for 'b2->bbJumpDest' */
9111
9112 fgReplacePred(b2->bbNext, b2, b1);
9113
9114 fgRemoveRefPred(b2->bbJumpDest, b2);
9115
9116 /* Update the block numbers and try again */
9117
9118 change = true;
9119 /*
9120 do
9121 {
9122 b2->bbNum = ++n1;
9123 b2 = b2->bbNext;
9124 }
9125 while (b2);
9126 */
9127
9128 // Update loop table
9129 fgUpdateLoopsAfterCompacting(b1, b2);
9130
9131#ifdef DEBUG
9132 if (verbose)
9133 {
9134 printf("Folded %sboolean conditions of " FMT_BB " and " FMT_BB " to :\n",
9135 c2->OperIsLeaf() ? "" : "non-leaf ", b1->bbNum, b2->bbNum);
9136 gtDispTree(s1);
9137 printf("\n");
9138 }
9139#endif
9140 }
9141 } while (change);
9142
9143#ifdef DEBUG
9144 fgDebugCheckBBlist();
9145#endif
9146}
9147