1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX Optimizer XX |
9 | XX XX |
10 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
11 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
12 | */ |
13 | |
14 | #include "jitpch.h" |
15 | #ifdef _MSC_VER |
16 | #pragma hdrstop |
17 | #pragma warning(disable : 4701) |
18 | #endif |
19 | |
20 | /*****************************************************************************/ |
21 | |
22 | void Compiler::optInit() |
23 | { |
24 | optLoopsMarked = false; |
25 | fgHasLoops = false; |
26 | |
27 | /* Initialize the # of tracked loops to 0 */ |
28 | optLoopCount = 0; |
29 | optLoopTable = nullptr; |
30 | |
31 | /* Keep track of the number of calls and indirect calls made by this method */ |
32 | optCallCount = 0; |
33 | optIndirectCallCount = 0; |
34 | optNativeCallCount = 0; |
35 | optAssertionCount = 0; |
36 | optAssertionDep = nullptr; |
37 | #if FEATURE_ANYCSE |
38 | optCSECandidateTotal = 0; |
39 | optCSEstart = UINT_MAX; |
40 | optCSEcount = 0; |
41 | #endif // FEATURE_ANYCSE |
42 | } |
43 | |
44 | DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler) |
45 | { |
46 | } |
47 | |
48 | /***************************************************************************** |
49 | * |
50 | */ |
51 | |
52 | void Compiler::optSetBlockWeights() |
53 | { |
54 | noway_assert(opts.OptimizationEnabled()); |
55 | assert(fgDomsComputed); |
56 | |
57 | #ifdef DEBUG |
58 | bool changed = false; |
59 | #endif |
60 | |
61 | bool firstBBdomsRets = true; |
62 | |
63 | BasicBlock* block; |
64 | |
65 | for (block = fgFirstBB; (block != nullptr); block = block->bbNext) |
66 | { |
67 | /* Blocks that can't be reached via the first block are rarely executed */ |
68 | if (!fgReachable(fgFirstBB, block)) |
69 | { |
70 | block->bbSetRunRarely(); |
71 | } |
72 | |
73 | if (block->bbWeight != BB_ZERO_WEIGHT) |
74 | { |
75 | // Calculate our bbWeight: |
76 | // |
77 | // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks |
78 | // o otherwise BB_UNITY_WEIGHT / 2 |
79 | // |
80 | bool domsRets = true; // Assume that we will dominate |
81 | |
82 | for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next) |
83 | { |
84 | if (!fgDominate(block, retBlocks->block)) |
85 | { |
86 | domsRets = false; |
87 | break; |
88 | } |
89 | } |
90 | |
91 | if (block == fgFirstBB) |
92 | { |
93 | firstBBdomsRets = domsRets; |
94 | } |
95 | |
96 | // If we are not using profile weight then we lower the weight |
97 | // of blocks that do not dominate a return block |
98 | // |
99 | if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false)) |
100 | { |
101 | #if DEBUG |
102 | changed = true; |
103 | #endif |
104 | block->modifyBBWeight(block->bbWeight / 2); |
105 | noway_assert(block->bbWeight); |
106 | } |
107 | } |
108 | } |
109 | |
110 | #if DEBUG |
111 | if (changed && verbose) |
112 | { |
113 | printf("\nAfter optSetBlockWeights:\n" ); |
114 | fgDispBasicBlocks(); |
115 | printf("\n" ); |
116 | } |
117 | |
118 | /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */ |
119 | fgDebugCheckBBlist(); |
120 | #endif |
121 | } |
122 | |
123 | /***************************************************************************** |
124 | * |
125 | * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop. |
126 | */ |
127 | |
128 | void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk) |
129 | { |
130 | /* Calculate the 'loopWeight', |
131 | this is the amount to increase each block in the loop |
132 | Our heuristic is that loops are weighted eight times more |
133 | than straight line code. |
134 | Thus we increase each block by 7 times the weight of |
135 | the loop header block, |
136 | if the loops are all properly formed gives us: |
137 | (assuming that BB_LOOP_WEIGHT is 8) |
138 | |
139 | 1 -- non loop basic block |
140 | 8 -- single loop nesting |
141 | 64 -- double loop nesting |
142 | 512 -- triple loop nesting |
143 | |
144 | */ |
145 | |
146 | noway_assert(begBlk->bbNum <= endBlk->bbNum); |
147 | noway_assert(begBlk->isLoopHead()); |
148 | noway_assert(fgReachable(begBlk, endBlk)); |
149 | |
150 | #ifdef DEBUG |
151 | if (verbose) |
152 | { |
153 | printf("\nMarking loop L%02u" , begBlk->bbLoopNum); |
154 | } |
155 | #endif |
156 | |
157 | noway_assert(!opts.MinOpts()); |
158 | |
159 | /* Build list of backedges for block begBlk */ |
160 | flowList* backedgeList = nullptr; |
161 | |
162 | for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext) |
163 | { |
164 | /* Is this a backedge? */ |
165 | if (pred->flBlock->bbNum >= begBlk->bbNum) |
166 | { |
167 | flowList* flow = new (this, CMK_FlowList) flowList(); |
168 | |
169 | #if MEASURE_BLOCK_SIZE |
170 | genFlowNodeCnt += 1; |
171 | genFlowNodeSize += sizeof(flowList); |
172 | #endif // MEASURE_BLOCK_SIZE |
173 | |
174 | flow->flNext = backedgeList; |
175 | flow->flBlock = pred->flBlock; |
176 | backedgeList = flow; |
177 | } |
178 | } |
179 | |
180 | /* At least one backedge must have been found (the one from endBlk) */ |
181 | noway_assert(backedgeList); |
182 | |
183 | BasicBlock* curBlk = begBlk; |
184 | |
185 | while (true) |
186 | { |
187 | noway_assert(curBlk); |
188 | |
189 | // For curBlk to be part of a loop that starts at begBlk |
190 | // curBlk must be reachable from begBlk and (since this is a loop) |
191 | // likewise begBlk must be reachable from curBlk. |
192 | // |
193 | |
194 | if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) |
195 | { |
196 | /* If this block reaches any of the backedge blocks we set reachable */ |
197 | /* If this block dominates any of the backedge blocks we set dominates */ |
198 | bool reachable = false; |
199 | bool dominates = false; |
200 | |
201 | for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext) |
202 | { |
203 | BasicBlock* backedge = tmp->flBlock; |
204 | |
205 | if (!curBlk->isRunRarely()) |
206 | { |
207 | reachable |= fgReachable(curBlk, backedge); |
208 | dominates |= fgDominate(curBlk, backedge); |
209 | |
210 | if (dominates && reachable) |
211 | { |
212 | break; |
213 | } |
214 | } |
215 | } |
216 | |
217 | if (reachable) |
218 | { |
219 | noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT); |
220 | |
221 | unsigned weight; |
222 | |
223 | if (curBlk->hasProfileWeight()) |
224 | { |
225 | // We have real profile weights, so we aren't going to change this blocks weight |
226 | weight = curBlk->bbWeight; |
227 | } |
228 | else |
229 | { |
230 | if (dominates) |
231 | { |
232 | weight = curBlk->bbWeight * BB_LOOP_WEIGHT; |
233 | } |
234 | else |
235 | { |
236 | weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2); |
237 | } |
238 | |
239 | // |
240 | // The multiplication may have caused us to overflow |
241 | // |
242 | if (weight < curBlk->bbWeight) |
243 | { |
244 | // The multiplication caused us to overflow |
245 | weight = BB_MAX_WEIGHT; |
246 | } |
247 | // |
248 | // Set the new weight |
249 | // |
250 | curBlk->modifyBBWeight(weight); |
251 | } |
252 | #ifdef DEBUG |
253 | if (verbose) |
254 | { |
255 | printf("\n " FMT_BB "(wt=%s)" , curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this))); |
256 | } |
257 | #endif |
258 | } |
259 | } |
260 | |
261 | /* Stop if we've reached the last block in the loop */ |
262 | |
263 | if (curBlk == endBlk) |
264 | { |
265 | break; |
266 | } |
267 | |
268 | curBlk = curBlk->bbNext; |
269 | |
270 | /* If we are excluding the endBlk then stop if we've reached endBlk */ |
271 | |
272 | if (excludeEndBlk && (curBlk == endBlk)) |
273 | { |
274 | break; |
275 | } |
276 | } |
277 | } |
278 | |
279 | /***************************************************************************** |
280 | * |
281 | * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop. |
282 | */ |
283 | |
284 | void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) |
285 | { |
286 | /* A set of blocks that were previously marked as a loop are now |
287 | to be unmarked, since we have decided that for some reason this |
288 | loop no longer exists. |
289 | Basically we are just reseting the blocks bbWeight to their |
290 | previous values. |
291 | */ |
292 | |
293 | noway_assert(begBlk->bbNum <= endBlk->bbNum); |
294 | noway_assert(begBlk->isLoopHead()); |
295 | |
296 | noway_assert(!opts.MinOpts()); |
297 | |
298 | BasicBlock* curBlk; |
299 | unsigned backEdgeCount = 0; |
300 | |
301 | for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext) |
302 | { |
303 | curBlk = pred->flBlock; |
304 | |
305 | /* is this a backward edge? (from curBlk to begBlk) */ |
306 | |
307 | if (begBlk->bbNum > curBlk->bbNum) |
308 | { |
309 | continue; |
310 | } |
311 | |
312 | /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ |
313 | |
314 | if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS)) |
315 | { |
316 | continue; |
317 | } |
318 | |
319 | backEdgeCount++; |
320 | } |
321 | |
322 | /* Only unmark the loop blocks if we have exactly one loop back edge */ |
323 | if (backEdgeCount != 1) |
324 | { |
325 | #ifdef DEBUG |
326 | if (verbose) |
327 | { |
328 | if (backEdgeCount > 0) |
329 | { |
330 | printf("\nNot removing loop L%02u, due to an additional back edge" , begBlk->bbLoopNum); |
331 | } |
332 | else if (backEdgeCount == 0) |
333 | { |
334 | printf("\nNot removing loop L%02u, due to no back edge" , begBlk->bbLoopNum); |
335 | } |
336 | } |
337 | #endif |
338 | return; |
339 | } |
340 | noway_assert(backEdgeCount == 1); |
341 | noway_assert(fgReachable(begBlk, endBlk)); |
342 | |
343 | #ifdef DEBUG |
344 | if (verbose) |
345 | { |
346 | printf("\nUnmarking loop L%02u" , begBlk->bbLoopNum); |
347 | } |
348 | #endif |
349 | |
350 | curBlk = begBlk; |
351 | while (true) |
352 | { |
353 | noway_assert(curBlk); |
354 | |
355 | // For curBlk to be part of a loop that starts at begBlk |
356 | // curBlk must be reachable from begBlk and (since this is a loop) |
357 | // likewise begBlk must be reachable from curBlk. |
358 | // |
359 | if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) |
360 | { |
361 | unsigned weight = curBlk->bbWeight; |
362 | |
363 | // Don't unmark blocks that are set to BB_MAX_WEIGHT |
364 | // Don't unmark blocks when we are using profile weights |
365 | // |
366 | if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight()) |
367 | { |
368 | if (!fgDominate(curBlk, endBlk)) |
369 | { |
370 | weight *= 2; |
371 | } |
372 | else |
373 | { |
374 | /* Merging of blocks can disturb the Dominates |
375 | information (see RAID #46649) */ |
376 | if (weight < BB_LOOP_WEIGHT) |
377 | { |
378 | weight *= 2; |
379 | } |
380 | } |
381 | |
382 | // We can overflow here so check for it |
383 | if (weight < curBlk->bbWeight) |
384 | { |
385 | weight = BB_MAX_WEIGHT; |
386 | } |
387 | |
388 | assert(weight >= BB_LOOP_WEIGHT); |
389 | |
390 | curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT); |
391 | } |
392 | |
393 | #ifdef DEBUG |
394 | if (verbose) |
395 | { |
396 | printf("\n " FMT_BB "(wt=%s)" , curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this))); |
397 | } |
398 | #endif |
399 | } |
400 | /* Stop if we've reached the last block in the loop */ |
401 | |
402 | if (curBlk == endBlk) |
403 | { |
404 | break; |
405 | } |
406 | |
407 | curBlk = curBlk->bbNext; |
408 | |
409 | /* Stop if we go past the last block in the loop, as it may have been deleted */ |
410 | if (curBlk->bbNum > endBlk->bbNum) |
411 | { |
412 | break; |
413 | } |
414 | } |
415 | } |
416 | |
417 | /***************************************************************************************************** |
418 | * |
419 | * Function called to update the loop table and bbWeight before removing a block |
420 | */ |
421 | |
422 | void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop) |
423 | { |
424 | if (!optLoopsMarked) |
425 | { |
426 | return; |
427 | } |
428 | |
429 | noway_assert(!opts.MinOpts()); |
430 | |
431 | bool removeLoop = false; |
432 | |
433 | /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */ |
434 | /* Special case: the block was the head of a loop - or pointing to a loop entry */ |
435 | |
436 | for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) |
437 | { |
438 | /* Some loops may have been already removed by |
439 | * loop unrolling or conditional folding */ |
440 | |
441 | if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) |
442 | { |
443 | continue; |
444 | } |
445 | |
446 | if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom) |
447 | { |
448 | optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; |
449 | continue; |
450 | } |
451 | |
452 | #ifdef DEBUG |
453 | if (verbose) |
454 | { |
455 | printf("\nUpdateLoopsBeforeRemoveBlock Before: " ); |
456 | optPrintLoopInfo(loopNum); |
457 | } |
458 | #endif |
459 | |
460 | /* If the loop is still in the table |
461 | * any block in the loop must be reachable !!! */ |
462 | |
463 | noway_assert(optLoopTable[loopNum].lpEntry != block); |
464 | noway_assert(optLoopTable[loopNum].lpBottom != block); |
465 | |
466 | if (optLoopTable[loopNum].lpExit == block) |
467 | { |
468 | optLoopTable[loopNum].lpExit = nullptr; |
469 | optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT; |
470 | ; |
471 | } |
472 | |
473 | /* If this points to the actual entry in the loop |
474 | * then the whole loop may become unreachable */ |
475 | |
476 | switch (block->bbJumpKind) |
477 | { |
478 | unsigned jumpCnt; |
479 | BasicBlock** jumpTab; |
480 | |
481 | case BBJ_NONE: |
482 | case BBJ_COND: |
483 | if (block->bbNext == optLoopTable[loopNum].lpEntry) |
484 | { |
485 | removeLoop = true; |
486 | break; |
487 | } |
488 | if (block->bbJumpKind == BBJ_NONE) |
489 | { |
490 | break; |
491 | } |
492 | |
493 | __fallthrough; |
494 | |
495 | case BBJ_ALWAYS: |
496 | noway_assert(block->bbJumpDest); |
497 | if (block->bbJumpDest == optLoopTable[loopNum].lpEntry) |
498 | { |
499 | removeLoop = true; |
500 | } |
501 | break; |
502 | |
503 | case BBJ_SWITCH: |
504 | jumpCnt = block->bbJumpSwt->bbsCount; |
505 | jumpTab = block->bbJumpSwt->bbsDstTab; |
506 | |
507 | do |
508 | { |
509 | noway_assert(*jumpTab); |
510 | if ((*jumpTab) == optLoopTable[loopNum].lpEntry) |
511 | { |
512 | removeLoop = true; |
513 | } |
514 | } while (++jumpTab, --jumpCnt); |
515 | break; |
516 | |
517 | default: |
518 | break; |
519 | } |
520 | |
521 | if (removeLoop) |
522 | { |
523 | /* Check if the entry has other predecessors outside the loop |
524 | * TODO: Replace this when predecessors are available */ |
525 | |
526 | BasicBlock* auxBlock; |
527 | for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext) |
528 | { |
529 | /* Ignore blocks in the loop */ |
530 | |
531 | if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum && |
532 | auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum) |
533 | { |
534 | continue; |
535 | } |
536 | |
537 | switch (auxBlock->bbJumpKind) |
538 | { |
539 | unsigned jumpCnt; |
540 | BasicBlock** jumpTab; |
541 | |
542 | case BBJ_NONE: |
543 | case BBJ_COND: |
544 | if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry) |
545 | { |
546 | removeLoop = false; |
547 | break; |
548 | } |
549 | if (auxBlock->bbJumpKind == BBJ_NONE) |
550 | { |
551 | break; |
552 | } |
553 | |
554 | __fallthrough; |
555 | |
556 | case BBJ_ALWAYS: |
557 | noway_assert(auxBlock->bbJumpDest); |
558 | if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry) |
559 | { |
560 | removeLoop = false; |
561 | } |
562 | break; |
563 | |
564 | case BBJ_SWITCH: |
565 | jumpCnt = auxBlock->bbJumpSwt->bbsCount; |
566 | jumpTab = auxBlock->bbJumpSwt->bbsDstTab; |
567 | |
568 | do |
569 | { |
570 | noway_assert(*jumpTab); |
571 | if ((*jumpTab) == optLoopTable[loopNum].lpEntry) |
572 | { |
573 | removeLoop = false; |
574 | } |
575 | } while (++jumpTab, --jumpCnt); |
576 | break; |
577 | |
578 | default: |
579 | break; |
580 | } |
581 | } |
582 | |
583 | if (removeLoop) |
584 | { |
585 | optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; |
586 | } |
587 | } |
588 | else if (optLoopTable[loopNum].lpHead == block) |
589 | { |
590 | /* The loop has a new head - Just update the loop table */ |
591 | optLoopTable[loopNum].lpHead = block->bbPrev; |
592 | } |
593 | |
594 | #ifdef DEBUG |
595 | if (verbose) |
596 | { |
597 | printf("\nUpdateLoopsBeforeRemoveBlock After: " ); |
598 | optPrintLoopInfo(loopNum); |
599 | } |
600 | #endif |
601 | } |
602 | |
603 | if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) && |
604 | (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed && |
605 | (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block)) |
606 | { |
607 | optUnmarkLoopBlocks(block->bbJumpDest, block); |
608 | } |
609 | } |
610 | |
611 | #ifdef DEBUG |
612 | |
613 | /***************************************************************************** |
614 | * |
615 | * Given the beginBlock of the loop, return the index of this loop |
616 | * to the loop table. |
617 | */ |
618 | |
619 | unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk) |
620 | { |
621 | unsigned lnum = 0; |
622 | |
623 | for (lnum = 0; lnum < optLoopCount; lnum++) |
624 | { |
625 | if (optLoopTable[lnum].lpHead->bbNext == begBlk) |
626 | { |
627 | // Found the loop. |
628 | return lnum; |
629 | } |
630 | } |
631 | |
632 | noway_assert(!"Loop number not found." ); |
633 | |
634 | return optLoopCount; |
635 | } |
636 | |
637 | /***************************************************************************** |
638 | * |
639 | * Print loop info in an uniform way. |
640 | */ |
641 | |
642 | void Compiler::optPrintLoopInfo(unsigned loopInd, |
643 | BasicBlock* lpHead, |
644 | BasicBlock* lpFirst, |
645 | BasicBlock* lpTop, |
646 | BasicBlock* lpEntry, |
647 | BasicBlock* lpBottom, |
648 | unsigned char lpExitCnt, |
649 | BasicBlock* lpExit, |
650 | unsigned parentLoop) |
651 | { |
652 | noway_assert(lpHead); |
653 | |
654 | // |
655 | // NOTE: we take "loopInd" as an argument instead of using the one |
656 | // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum |
657 | // has not be set correctly. For example, in optRecordLoop(). |
658 | // However, in most of the cases, loops should have been recorded. |
659 | // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum) |
660 | // version of this method. |
661 | // |
662 | printf("L%02u, from " FMT_BB, loopInd, lpFirst->bbNum); |
663 | if (lpTop != lpFirst) |
664 | { |
665 | printf(" (loop top is " FMT_BB ")" , lpTop->bbNum); |
666 | } |
667 | |
668 | printf(" to " FMT_BB " (Head=" FMT_BB ", Entry=" FMT_BB ", ExitCnt=%d" , lpBottom->bbNum, lpHead->bbNum, |
669 | lpEntry->bbNum, lpExitCnt); |
670 | |
671 | if (lpExitCnt == 1) |
672 | { |
673 | printf(" at " FMT_BB, lpExit->bbNum); |
674 | } |
675 | |
676 | if (parentLoop != BasicBlock::NOT_IN_LOOP) |
677 | { |
678 | printf(", parent loop = L%02u" , parentLoop); |
679 | } |
680 | printf(")" ); |
681 | } |
682 | |
683 | /***************************************************************************** |
684 | * |
685 | * Print loop information given the index of the loop in the loop table. |
686 | */ |
687 | |
688 | void Compiler::optPrintLoopInfo(unsigned lnum) |
689 | { |
690 | noway_assert(lnum < optLoopCount); |
691 | |
692 | LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table. |
693 | |
694 | optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt, |
695 | ldsc->lpExit, ldsc->lpParent); |
696 | } |
697 | |
698 | #endif |
699 | |
700 | //------------------------------------------------------------------------ |
701 | // optPopulateInitInfo: Populate loop init info in the loop table. |
702 | // |
703 | // Arguments: |
704 | // init - the tree that is supposed to initialize the loop iterator. |
705 | // iterVar - loop iteration variable. |
706 | // |
707 | // Return Value: |
708 | // "false" if the loop table could not be populated with the loop iterVar init info. |
709 | // |
710 | // Operation: |
711 | // The 'init' tree is checked if its lhs is a local and rhs is either |
712 | // a const or a local. |
713 | // |
714 | bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTree* init, unsigned iterVar) |
715 | { |
716 | // Operator should be = |
717 | if (init->gtOper != GT_ASG) |
718 | { |
719 | return false; |
720 | } |
721 | |
722 | GenTree* lhs = init->gtOp.gtOp1; |
723 | GenTree* rhs = init->gtOp.gtOp2; |
724 | // LHS has to be local and should equal iterVar. |
725 | if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar) |
726 | { |
727 | return false; |
728 | } |
729 | |
730 | // RHS can be constant or local var. |
731 | // TODO-CQ: CLONE: Add arr length for descending loops. |
732 | if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT) |
733 | { |
734 | optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT; |
735 | optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal; |
736 | } |
737 | else if (rhs->gtOper == GT_LCL_VAR) |
738 | { |
739 | optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT; |
740 | optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum; |
741 | } |
742 | else |
743 | { |
744 | return false; |
745 | } |
746 | return true; |
747 | } |
748 | |
749 | //---------------------------------------------------------------------------------- |
750 | // optCheckIterInLoopTest: Check if iter var is used in loop test. |
751 | // |
752 | // Arguments: |
753 | // test "jtrue" tree or an asg of the loop iter termination condition |
754 | // from/to blocks (beg, end) which are part of the loop. |
755 | // iterVar loop iteration variable. |
756 | // loopInd loop index. |
757 | // |
758 | // Operation: |
759 | // The test tree is parsed to check if "iterVar" matches the lhs of the condition |
760 | // and the rhs limit is extracted from the "test" tree. The limit information is |
761 | // added to the loop table. |
762 | // |
763 | // Return Value: |
764 | // "false" if the loop table could not be populated with the loop test info or |
765 | // if the test condition doesn't involve iterVar. |
766 | // |
767 | bool Compiler::optCheckIterInLoopTest( |
768 | unsigned loopInd, GenTree* test, BasicBlock* from, BasicBlock* to, unsigned iterVar) |
769 | { |
770 | // Obtain the relop from the "test" tree. |
771 | GenTree* relop; |
772 | if (test->gtOper == GT_JTRUE) |
773 | { |
774 | relop = test->gtGetOp1(); |
775 | } |
776 | else |
777 | { |
778 | assert(test->gtOper == GT_ASG); |
779 | relop = test->gtGetOp2(); |
780 | } |
781 | |
782 | noway_assert(relop->OperKind() & GTK_RELOP); |
783 | |
784 | GenTree* opr1 = relop->gtOp.gtOp1; |
785 | GenTree* opr2 = relop->gtOp.gtOp2; |
786 | |
787 | GenTree* iterOp; |
788 | GenTree* limitOp; |
789 | |
790 | // Make sure op1 or op2 is the iterVar. |
791 | if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar) |
792 | { |
793 | iterOp = opr1; |
794 | limitOp = opr2; |
795 | } |
796 | else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar) |
797 | { |
798 | iterOp = opr2; |
799 | limitOp = opr1; |
800 | } |
801 | else |
802 | { |
803 | return false; |
804 | } |
805 | |
806 | if (iterOp->gtType != TYP_INT) |
807 | { |
808 | return false; |
809 | } |
810 | |
811 | // Mark the iterator node. |
812 | iterOp->gtFlags |= GTF_VAR_ITERATOR; |
813 | |
814 | // Check what type of limit we have - constant, variable or arr-len. |
815 | if (limitOp->gtOper == GT_CNS_INT) |
816 | { |
817 | optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT; |
818 | if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0) |
819 | { |
820 | optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT; |
821 | } |
822 | } |
823 | else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum)) |
824 | { |
825 | optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT; |
826 | } |
827 | else if (limitOp->gtOper == GT_ARR_LENGTH) |
828 | { |
829 | optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT; |
830 | } |
831 | else |
832 | { |
833 | return false; |
834 | } |
835 | // Save the type of the comparison between the iterator and the limit. |
836 | optLoopTable[loopInd].lpTestTree = relop; |
837 | return true; |
838 | } |
839 | |
840 | //---------------------------------------------------------------------------------- |
841 | // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1 |
842 | // |
843 | // Arguments: |
844 | // incr The incr tree to be checked. Whether incr tree is |
845 | // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes. |
846 | // |
847 | // Operation: |
848 | // The test tree is parsed to check if "iterVar" matches the lhs of the condition |
849 | // and the rhs limit is extracted from the "test" tree. The limit information is |
850 | // added to the loop table. |
851 | // |
852 | // Return Value: |
853 | // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM. |
854 | // |
855 | unsigned Compiler::optIsLoopIncrTree(GenTree* incr) |
856 | { |
857 | GenTree* incrVal; |
858 | genTreeOps updateOper; |
859 | unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper); |
860 | if (iterVar != BAD_VAR_NUM) |
861 | { |
862 | // We have v = v op y type asg node. |
863 | switch (updateOper) |
864 | { |
865 | case GT_ADD: |
866 | case GT_SUB: |
867 | case GT_MUL: |
868 | case GT_RSH: |
869 | case GT_LSH: |
870 | break; |
871 | default: |
872 | return BAD_VAR_NUM; |
873 | } |
874 | |
875 | // Increment should be by a const int. |
876 | // TODO-CQ: CLONE: allow variable increments. |
877 | if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT)) |
878 | { |
879 | return BAD_VAR_NUM; |
880 | } |
881 | } |
882 | |
883 | return iterVar; |
884 | } |
885 | |
886 | //---------------------------------------------------------------------------------- |
887 | // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant. |
888 | // |
889 | // Arguments: |
890 | // from, to - are blocks (beg, end) which are part of the loop. |
891 | // incr - tree that increments the loop iterator. v+=1 or v=v+1. |
892 | // pIterVar - see return value. |
893 | // |
894 | // Return Value: |
895 | // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns |
896 | // false. |
897 | // |
898 | // Operation: |
899 | // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not |
900 | // assigned in the loop. |
901 | // |
902 | bool Compiler::optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar) |
903 | { |
904 | |
905 | unsigned iterVar = optIsLoopIncrTree(incr); |
906 | if (iterVar == BAD_VAR_NUM) |
907 | { |
908 | return false; |
909 | } |
910 | if (optIsVarAssigned(from, to, incr, iterVar)) |
911 | { |
912 | JITDUMP("iterVar is assigned in loop\n" ); |
913 | return false; |
914 | } |
915 | |
916 | *pIterVar = iterVar; |
917 | return true; |
918 | } |
919 | |
920 | //---------------------------------------------------------------------------------- |
921 | // optIsLoopTestEvalIntoTemp: |
922 | // Pattern match if the test tree is computed into a tmp |
923 | // and the "tmp" is used as jump condition for loop termination. |
924 | // |
925 | // Arguments: |
926 | // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0) |
927 | // where Vtmp contains the actual loop test result. |
928 | // newStmt - contains the statement that is the actual test stmt involving |
929 | // the loop iterator. |
930 | // |
931 | // Return Value: |
932 | // Returns true if a new test tree can be obtained. |
933 | // |
934 | // Operation: |
935 | // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition |
936 | // Then returns the rhs for def of Vtmp as the "test" node. |
937 | // |
938 | // Note: |
939 | // This method just retrieves what it thinks is the "test" node, |
940 | // the callers are expected to verify that "iterVar" is used in the test. |
941 | // |
942 | bool Compiler::optIsLoopTestEvalIntoTemp(GenTree* testStmt, GenTree** newTest) |
943 | { |
944 | GenTree* test = testStmt->gtStmt.gtStmtExpr; |
945 | |
946 | if (test->gtOper != GT_JTRUE) |
947 | { |
948 | return false; |
949 | } |
950 | |
951 | GenTree* relop = test->gtGetOp1(); |
952 | noway_assert(relop->OperIsCompare()); |
953 | |
954 | GenTree* opr1 = relop->gtOp.gtOp1; |
955 | GenTree* opr2 = relop->gtOp.gtOp2; |
956 | |
957 | // Make sure we have jtrue (vtmp != 0) |
958 | if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) && |
959 | opr2->IsIntegralConst(0)) |
960 | { |
961 | // Get the previous statement to get the def (rhs) of Vtmp to see |
962 | // if the "test" is evaluated into Vtmp. |
963 | GenTree* prevStmt = testStmt->gtPrev; |
964 | if (prevStmt == nullptr) |
965 | { |
966 | return false; |
967 | } |
968 | |
969 | GenTree* tree = prevStmt->gtStmt.gtStmtExpr; |
970 | if (tree->OperGet() == GT_ASG) |
971 | { |
972 | GenTree* lhs = tree->gtOp.gtOp1; |
973 | GenTree* rhs = tree->gtOp.gtOp2; |
974 | |
975 | // Return as the new test node. |
976 | if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum()) |
977 | { |
978 | if (rhs->OperIsCompare()) |
979 | { |
980 | *newTest = prevStmt; |
981 | return true; |
982 | } |
983 | } |
984 | } |
985 | } |
986 | return false; |
987 | } |
988 | |
989 | //---------------------------------------------------------------------------------- |
990 | // optExtractInitTestIncr: |
991 | // Extract the "init", "test" and "incr" nodes of the loop. |
992 | // |
993 | // Arguments: |
994 | // head - Loop head block |
995 | // bottom - Loop bottom block |
996 | // top - Loop top block |
997 | // ppInit - The init stmt of the loop if found. |
998 | // ppTest - The test stmt of the loop if found. |
999 | // ppIncr - The incr stmt of the loop if found. |
1000 | // |
1001 | // Return Value: |
1002 | // The results are put in "ppInit", "ppTest" and "ppIncr" if the method |
1003 | // returns true. Returns false if the information can't be extracted. |
1004 | // |
1005 | // Operation: |
1006 | // Check if the "test" stmt is last stmt in the loop "bottom". If found good, |
1007 | // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of |
1008 | // "test" to get the "incr" stmt. If it is not found it could be a loop of the |
1009 | // below form. |
1010 | // |
1011 | // +-------<-----------------<-----------+ |
1012 | // | | |
1013 | // v | |
1014 | // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^ |
1015 | // |
1016 | // Check if the "incr" tree is present in the loop "top" node as the last stmt. |
1017 | // Also check if the "test" tree is assigned to a tmp node and the tmp is used |
1018 | // in the jtrue condition. |
1019 | // |
1020 | // Note: |
1021 | // This method just retrieves what it thinks is the "test" node, |
1022 | // the callers are expected to verify that "iterVar" is used in the test. |
1023 | // |
1024 | bool Compiler::( |
1025 | BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr) |
1026 | { |
1027 | assert(ppInit != nullptr); |
1028 | assert(ppTest != nullptr); |
1029 | assert(ppIncr != nullptr); |
1030 | |
1031 | // Check if last two statements in the loop body are the increment of the iterator |
1032 | // and the loop termination test. |
1033 | noway_assert(bottom->bbTreeList != nullptr); |
1034 | GenTree* test = bottom->bbTreeList->gtPrev; |
1035 | noway_assert(test != nullptr && test->gtNext == nullptr); |
1036 | |
1037 | GenTree* newTest; |
1038 | if (optIsLoopTestEvalIntoTemp(test, &newTest)) |
1039 | { |
1040 | test = newTest; |
1041 | } |
1042 | |
1043 | // Check if we have the incr tree before the test tree, if we don't, |
1044 | // check if incr is part of the loop "top". |
1045 | GenTree* incr = test->gtPrev; |
1046 | if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM) |
1047 | { |
1048 | if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr) |
1049 | { |
1050 | return false; |
1051 | } |
1052 | |
1053 | // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp. |
1054 | GenTree* topLast = top->bbTreeList->gtPrev; |
1055 | if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM) |
1056 | { |
1057 | incr = topLast; |
1058 | } |
1059 | else |
1060 | { |
1061 | return false; |
1062 | } |
1063 | } |
1064 | |
1065 | assert(test != incr); |
1066 | |
1067 | // Find the last statement in the loop pre-header which we expect to be the initialization of |
1068 | // the loop iterator. |
1069 | GenTree* phdr = head->bbTreeList; |
1070 | if (phdr == nullptr) |
1071 | { |
1072 | return false; |
1073 | } |
1074 | |
1075 | GenTree* init = phdr->gtPrev; |
1076 | noway_assert(init != nullptr && (init->gtNext == nullptr)); |
1077 | |
1078 | // If it is a duplicated loop condition, skip it. |
1079 | if (init->gtFlags & GTF_STMT_CMPADD) |
1080 | { |
1081 | bool doGetPrev = true; |
1082 | #ifdef DEBUG |
1083 | if (opts.optRepeat) |
1084 | { |
1085 | // Previous optimization passes may have inserted compiler-generated |
1086 | // statements other than duplicated loop conditions. |
1087 | doGetPrev = (init->gtPrev != nullptr); |
1088 | } |
1089 | else |
1090 | { |
1091 | // Must be a duplicated loop condition. |
1092 | noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); |
1093 | } |
1094 | #endif // DEBUG |
1095 | if (doGetPrev) |
1096 | { |
1097 | init = init->gtPrev; |
1098 | } |
1099 | noway_assert(init != nullptr); |
1100 | } |
1101 | |
1102 | noway_assert(init->gtOper == GT_STMT); |
1103 | noway_assert(test->gtOper == GT_STMT); |
1104 | noway_assert(incr->gtOper == GT_STMT); |
1105 | |
1106 | *ppInit = init->gtStmt.gtStmtExpr; |
1107 | *ppTest = test->gtStmt.gtStmtExpr; |
1108 | *ppIncr = incr->gtStmt.gtStmtExpr; |
1109 | |
1110 | return true; |
1111 | } |
1112 | |
1113 | /***************************************************************************** |
1114 | * |
1115 | * Record the loop in the loop table. Return true if successful, false if |
1116 | * out of entries in loop table. |
1117 | */ |
1118 | |
1119 | bool Compiler::optRecordLoop(BasicBlock* head, |
1120 | BasicBlock* first, |
1121 | BasicBlock* top, |
1122 | BasicBlock* entry, |
1123 | BasicBlock* bottom, |
1124 | BasicBlock* exit, |
1125 | unsigned char exitCnt) |
1126 | { |
1127 | // Record this loop in the table, if there's room. |
1128 | |
1129 | assert(optLoopCount <= MAX_LOOP_NUM); |
1130 | if (optLoopCount == MAX_LOOP_NUM) |
1131 | { |
1132 | #if COUNT_LOOPS |
1133 | loopOverflowThisMethod = true; |
1134 | #endif |
1135 | return false; |
1136 | } |
1137 | |
1138 | // Assumed preconditions on the loop we're adding. |
1139 | assert(first->bbNum <= top->bbNum); |
1140 | assert(top->bbNum <= entry->bbNum); |
1141 | assert(entry->bbNum <= bottom->bbNum); |
1142 | assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum); |
1143 | |
1144 | unsigned char loopInd = optLoopCount; |
1145 | |
1146 | if (optLoopTable == nullptr) |
1147 | { |
1148 | assert(loopInd == 0); |
1149 | optLoopTable = getAllocator(CMK_LoopOpt).allocate<LoopDsc>(MAX_LOOP_NUM); |
1150 | } |
1151 | else |
1152 | { |
1153 | // If the new loop contains any existing ones, add it in the right place. |
1154 | for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--) |
1155 | { |
1156 | unsigned char prev = prevPlus1 - 1; |
1157 | if (optLoopTable[prev].lpContainedBy(first, bottom)) |
1158 | { |
1159 | loopInd = prev; |
1160 | } |
1161 | } |
1162 | // Move up any loops if necessary. |
1163 | for (unsigned j = optLoopCount; j > loopInd; j--) |
1164 | { |
1165 | optLoopTable[j] = optLoopTable[j - 1]; |
1166 | } |
1167 | } |
1168 | |
1169 | #ifdef DEBUG |
1170 | for (unsigned i = loopInd + 1; i < optLoopCount; i++) |
1171 | { |
1172 | // The loop is well-formed. |
1173 | assert(optLoopTable[i].lpWellFormed()); |
1174 | // Check for disjoint. |
1175 | if (optLoopTable[i].lpDisjoint(first, bottom)) |
1176 | { |
1177 | continue; |
1178 | } |
1179 | // Otherwise, assert complete containment (of optLoopTable[i] in new loop). |
1180 | assert(optLoopTable[i].lpContainedBy(first, bottom)); |
1181 | } |
1182 | #endif // DEBUG |
1183 | |
1184 | optLoopTable[loopInd].lpHead = head; |
1185 | optLoopTable[loopInd].lpFirst = first; |
1186 | optLoopTable[loopInd].lpTop = top; |
1187 | optLoopTable[loopInd].lpBottom = bottom; |
1188 | optLoopTable[loopInd].lpEntry = entry; |
1189 | optLoopTable[loopInd].lpExit = exit; |
1190 | optLoopTable[loopInd].lpExitCnt = exitCnt; |
1191 | |
1192 | optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP; |
1193 | optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP; |
1194 | optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP; |
1195 | |
1196 | optLoopTable[loopInd].lpAsgVars = AllVarSetOps::UninitVal(); |
1197 | |
1198 | optLoopTable[loopInd].lpFlags = 0; |
1199 | |
1200 | // We haven't yet recorded any side effects. |
1201 | for (MemoryKind memoryKind : allMemoryKinds()) |
1202 | { |
1203 | optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false; |
1204 | } |
1205 | optLoopTable[loopInd].lpFieldsModified = nullptr; |
1206 | optLoopTable[loopInd].lpArrayElemTypesModified = nullptr; |
1207 | |
1208 | // If DO-WHILE loop mark it as such. |
1209 | if (head->bbNext == entry) |
1210 | { |
1211 | optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE; |
1212 | } |
1213 | |
1214 | // If single exit loop mark it as such. |
1215 | if (exitCnt == 1) |
1216 | { |
1217 | noway_assert(exit); |
1218 | optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT; |
1219 | } |
1220 | |
1221 | // |
1222 | // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }" |
1223 | // We have the following restrictions: |
1224 | // 1. The loop condition must be a simple one i.e. only one JTRUE node |
1225 | // 2. There must be a loop iterator (a local var) that is |
1226 | // incremented (decremented or lsh, rsh, mul) with a constant value |
1227 | // 3. The iterator is incremented exactly once |
1228 | // 4. The loop condition must use the iterator. |
1229 | // |
1230 | if (bottom->bbJumpKind == BBJ_COND) |
1231 | { |
1232 | GenTree* init; |
1233 | GenTree* test; |
1234 | GenTree* incr; |
1235 | if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr)) |
1236 | { |
1237 | goto DONE_LOOP; |
1238 | } |
1239 | |
1240 | unsigned iterVar = BAD_VAR_NUM; |
1241 | if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar)) |
1242 | { |
1243 | goto DONE_LOOP; |
1244 | } |
1245 | |
1246 | // Make sure the "iterVar" initialization is never skipped, |
1247 | // i.e. every pred of ENTRY other than HEAD is in the loop. |
1248 | for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext) |
1249 | { |
1250 | BasicBlock* predBlock = predEdge->flBlock; |
1251 | if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock)) |
1252 | { |
1253 | goto DONE_LOOP; |
1254 | } |
1255 | } |
1256 | |
1257 | if (!optPopulateInitInfo(loopInd, init, iterVar)) |
1258 | { |
1259 | goto DONE_LOOP; |
1260 | } |
1261 | |
1262 | // Check that the iterator is used in the loop condition. |
1263 | if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar)) |
1264 | { |
1265 | goto DONE_LOOP; |
1266 | } |
1267 | |
1268 | // We know the loop has an iterator at this point ->flag it as LPFLG_ITER |
1269 | // Record the iterator, the pointer to the test node |
1270 | // and the initial value of the iterator (constant or local var) |
1271 | optLoopTable[loopInd].lpFlags |= LPFLG_ITER; |
1272 | |
1273 | // Record iterator. |
1274 | optLoopTable[loopInd].lpIterTree = incr; |
1275 | |
1276 | #if COUNT_LOOPS |
1277 | // Save the initial value of the iterator - can be lclVar or constant |
1278 | // Flag the loop accordingly. |
1279 | |
1280 | iterLoopCount++; |
1281 | #endif |
1282 | |
1283 | #if COUNT_LOOPS |
1284 | simpleTestLoopCount++; |
1285 | #endif |
1286 | |
1287 | // Check if a constant iteration loop. |
1288 | if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)) |
1289 | { |
1290 | // This is a constant loop. |
1291 | optLoopTable[loopInd].lpFlags |= LPFLG_CONST; |
1292 | #if COUNT_LOOPS |
1293 | constIterLoopCount++; |
1294 | #endif |
1295 | } |
1296 | |
1297 | #ifdef DEBUG |
1298 | if (verbose && 0) |
1299 | { |
1300 | printf("\nConstant loop initializer:\n" ); |
1301 | gtDispTree(init); |
1302 | |
1303 | printf("\nConstant loop body:\n" ); |
1304 | |
1305 | BasicBlock* block = head; |
1306 | do |
1307 | { |
1308 | block = block->bbNext; |
1309 | for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) |
1310 | { |
1311 | if (stmt->gtStmt.gtStmtExpr == incr) |
1312 | { |
1313 | break; |
1314 | } |
1315 | printf("\n" ); |
1316 | gtDispTree(stmt->gtStmt.gtStmtExpr); |
1317 | } |
1318 | } while (block != bottom); |
1319 | } |
1320 | #endif // DEBUG |
1321 | } |
1322 | |
1323 | DONE_LOOP: |
1324 | DBEXEC(verbose, optPrintLoopRecording(loopInd)); |
1325 | optLoopCount++; |
1326 | return true; |
1327 | } |
1328 | |
1329 | #ifdef DEBUG |
1330 | //------------------------------------------------------------------------ |
1331 | // optPrintLoopRecording: Print a recording of the loop. |
1332 | // |
1333 | // Arguments: |
1334 | // loopInd - loop index. |
1335 | // |
1336 | void Compiler::optPrintLoopRecording(unsigned loopInd) |
1337 | { |
1338 | printf("Recorded loop %s" , (loopInd != optLoopCount ? "(extended) " : "" )); |
1339 | optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added. |
1340 | optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop, |
1341 | optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt, |
1342 | optLoopTable[loopInd].lpExit); |
1343 | |
1344 | // If an iterator loop print the iterator and the initialization. |
1345 | if (optLoopTable[loopInd].lpFlags & LPFLG_ITER) |
1346 | { |
1347 | printf(" [over V%02u" , optLoopTable[loopInd].lpIterVar()); |
1348 | printf(" (" ); |
1349 | printf(GenTree::OpName(optLoopTable[loopInd].lpIterOper())); |
1350 | printf(" " ); |
1351 | printf("%d )" , optLoopTable[loopInd].lpIterConst()); |
1352 | |
1353 | if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) |
1354 | { |
1355 | printf(" from %d" , optLoopTable[loopInd].lpConstInit); |
1356 | } |
1357 | if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT) |
1358 | { |
1359 | printf(" from V%02u" , optLoopTable[loopInd].lpVarInit); |
1360 | } |
1361 | |
1362 | // If a simple test condition print operator and the limits */ |
1363 | printf(GenTree::OpName(optLoopTable[loopInd].lpTestOper())); |
1364 | |
1365 | if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT) |
1366 | { |
1367 | printf("%d " , optLoopTable[loopInd].lpConstLimit()); |
1368 | } |
1369 | |
1370 | if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT) |
1371 | { |
1372 | printf("V%02u " , optLoopTable[loopInd].lpVarLimit()); |
1373 | } |
1374 | |
1375 | printf("]" ); |
1376 | } |
1377 | |
1378 | printf("\n" ); |
1379 | } |
1380 | |
1381 | void Compiler::optCheckPreds() |
1382 | { |
1383 | BasicBlock* block; |
1384 | BasicBlock* blockPred; |
1385 | flowList* pred; |
1386 | |
1387 | for (block = fgFirstBB; block; block = block->bbNext) |
1388 | { |
1389 | for (pred = block->bbPreds; pred; pred = pred->flNext) |
1390 | { |
1391 | // make sure this pred is part of the BB list |
1392 | for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext) |
1393 | { |
1394 | if (blockPred == pred->flBlock) |
1395 | { |
1396 | break; |
1397 | } |
1398 | } |
1399 | noway_assert(blockPred); |
1400 | switch (blockPred->bbJumpKind) |
1401 | { |
1402 | case BBJ_COND: |
1403 | if (blockPred->bbJumpDest == block) |
1404 | { |
1405 | break; |
1406 | } |
1407 | __fallthrough; |
1408 | case BBJ_NONE: |
1409 | noway_assert(blockPred->bbNext == block); |
1410 | break; |
1411 | case BBJ_EHFILTERRET: |
1412 | case BBJ_ALWAYS: |
1413 | case BBJ_EHCATCHRET: |
1414 | noway_assert(blockPred->bbJumpDest == block); |
1415 | break; |
1416 | default: |
1417 | break; |
1418 | } |
1419 | } |
1420 | } |
1421 | } |
1422 | |
1423 | #endif // DEBUG |
1424 | |
1425 | namespace |
1426 | { |
1427 | //------------------------------------------------------------------------ |
1428 | // LoopSearch: Class that handles scanning a range of blocks to detect a loop, |
1429 | // moving blocks to make the loop body contiguous, and recording |
1430 | // the loop. |
1431 | // |
1432 | // We will use the following terminology: |
1433 | // HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry). |
1434 | // Not part of the looping of the loop. |
1435 | // FIRST - the lexically first basic block (in bbNext order) within this loop. |
1436 | // TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same. |
1437 | // BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top) |
1438 | // EXIT - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr |
1439 | // ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry |
1440 | // |
1441 | // We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks. |
1442 | // When the loop is identified, blocks will be moved out to make it a compact contiguous region if possible, |
1443 | // and in cases where compaction is not possible, we'll subsequently treat all blocks in the lexical range |
1444 | // between TOP and BOTTOM as part of the loop even if they aren't part of the SCC. |
1445 | // Regarding nesting: Since a given block can only have one back-edge (we only detect loops with back-edges |
1446 | // from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM. Two loops may share the |
1447 | // same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write |
1448 | // the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore. |
1449 | // |
1450 | // | |
1451 | // v |
1452 | // head |
1453 | // | |
1454 | // | top/first <--+ |
1455 | // | | | |
1456 | // | ... | |
1457 | // | | | |
1458 | // | v | |
1459 | // +---> entry | |
1460 | // | | |
1461 | // ... | |
1462 | // | | |
1463 | // v | |
1464 | // +-- exit/tail | |
1465 | // | | | |
1466 | // | ... | |
1467 | // | | | |
1468 | // | v | |
1469 | // | bottom ---+ |
1470 | // | |
1471 | // +------+ |
1472 | // | |
1473 | // v |
1474 | // |
1475 | class LoopSearch |
1476 | { |
1477 | |
1478 | // Keeping track of which blocks are in the loop requires two block sets since we may add blocks |
1479 | // as we go but the BlockSet type's max ID doesn't increase to accommodate them. Define a helper |
1480 | // struct to make the ensuing code more readable. |
1481 | struct LoopBlockSet |
1482 | { |
1483 | private: |
1484 | // Keep track of blocks with bbNum <= oldBlockMaxNum in a regular BlockSet, since |
1485 | // it can hold all of them. |
1486 | BlockSet oldBlocksInLoop; // Blocks with bbNum <= oldBlockMaxNum |
1487 | |
1488 | // Keep track of blocks with bbNum > oldBlockMaxNum in a separate BlockSet, but |
1489 | // indexing them by (blockNum - oldBlockMaxNum); since we won't generate more than |
1490 | // one new block per old block, this must be sufficient to track any new blocks. |
1491 | BlockSet newBlocksInLoop; // Blocks with bbNum > oldBlockMaxNum |
1492 | |
1493 | Compiler* comp; |
1494 | unsigned int oldBlockMaxNum; |
1495 | |
1496 | public: |
1497 | LoopBlockSet(Compiler* comp) |
1498 | : oldBlocksInLoop(BlockSetOps::UninitVal()) |
1499 | , newBlocksInLoop(BlockSetOps::UninitVal()) |
1500 | , comp(comp) |
1501 | , oldBlockMaxNum(comp->fgBBNumMax) |
1502 | { |
1503 | } |
1504 | |
1505 | void Reset(unsigned int seedBlockNum) |
1506 | { |
1507 | if (BlockSetOps::MayBeUninit(oldBlocksInLoop)) |
1508 | { |
1509 | // Either the block sets are uninitialized (and long), so we need to initialize |
1510 | // them (and allocate their backing storage), or they are short and empty, so |
1511 | // assigning MakeEmpty to them is as cheap as ClearD. |
1512 | oldBlocksInLoop = BlockSetOps::MakeEmpty(comp); |
1513 | newBlocksInLoop = BlockSetOps::MakeEmpty(comp); |
1514 | } |
1515 | else |
1516 | { |
1517 | // We know the backing storage is already allocated, so just clear it. |
1518 | BlockSetOps::ClearD(comp, oldBlocksInLoop); |
1519 | BlockSetOps::ClearD(comp, newBlocksInLoop); |
1520 | } |
1521 | assert(seedBlockNum <= oldBlockMaxNum); |
1522 | BlockSetOps::AddElemD(comp, oldBlocksInLoop, seedBlockNum); |
1523 | } |
1524 | |
1525 | bool CanRepresent(unsigned int blockNum) |
1526 | { |
1527 | // We can represent old blocks up to oldBlockMaxNum, and |
1528 | // new blocks up to 2 * oldBlockMaxNum. |
1529 | return (blockNum <= 2 * oldBlockMaxNum); |
1530 | } |
1531 | |
1532 | bool IsMember(unsigned int blockNum) |
1533 | { |
1534 | if (blockNum > oldBlockMaxNum) |
1535 | { |
1536 | return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum); |
1537 | } |
1538 | return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum); |
1539 | } |
1540 | |
1541 | void Insert(unsigned int blockNum) |
1542 | { |
1543 | if (blockNum > oldBlockMaxNum) |
1544 | { |
1545 | BlockSetOps::AddElemD(comp, newBlocksInLoop, blockNum - oldBlockMaxNum); |
1546 | } |
1547 | else |
1548 | { |
1549 | BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum); |
1550 | } |
1551 | } |
1552 | |
1553 | bool TestAndInsert(unsigned int blockNum) |
1554 | { |
1555 | if (blockNum > oldBlockMaxNum) |
1556 | { |
1557 | unsigned int shiftedNum = blockNum - oldBlockMaxNum; |
1558 | if (!BlockSetOps::IsMember(comp, newBlocksInLoop, shiftedNum)) |
1559 | { |
1560 | BlockSetOps::AddElemD(comp, newBlocksInLoop, shiftedNum); |
1561 | return false; |
1562 | } |
1563 | } |
1564 | else |
1565 | { |
1566 | if (!BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum)) |
1567 | { |
1568 | BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum); |
1569 | return false; |
1570 | } |
1571 | } |
1572 | return true; |
1573 | } |
1574 | }; |
1575 | |
1576 | LoopBlockSet loopBlocks; // Set of blocks identified as part of the loop |
1577 | Compiler* comp; |
1578 | |
1579 | // See LoopSearch class comment header for a diagram relating these fields: |
1580 | BasicBlock* head; // Predecessor of unique entry edge |
1581 | BasicBlock* first; // Lexically first in-loop block |
1582 | BasicBlock* top; // Successor of back-edge from BOTTOM |
1583 | BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block |
1584 | BasicBlock* entry; // Successor of unique entry edge |
1585 | |
1586 | BasicBlock* lastExit; // Most recently discovered exit block |
1587 | unsigned char exitCount; // Number of discovered exit edges |
1588 | unsigned int oldBlockMaxNum; // Used to identify new blocks created during compaction |
1589 | BlockSet bottomBlocks; // BOTTOM blocks of already-recorded loops |
1590 | #ifdef DEBUG |
1591 | bool forgotExit = false; // Flags a rare case where lastExit gets nulled out, for assertions |
1592 | #endif |
1593 | bool changedFlowGraph = false; // Signals that loop compaction has modified the flow graph |
1594 | |
1595 | public: |
1596 | LoopSearch(Compiler* comp) |
1597 | : loopBlocks(comp), comp(comp), oldBlockMaxNum(comp->fgBBNumMax), bottomBlocks(BlockSetOps::MakeEmpty(comp)) |
1598 | { |
1599 | // Make sure we've renumbered such that the bitsets can hold all the bits |
1600 | assert(comp->fgBBNumMax <= comp->fgCurBBEpochSize); |
1601 | } |
1602 | |
1603 | //------------------------------------------------------------------------ |
1604 | // RecordLoop: Notify the Compiler that a loop has been found. |
1605 | // |
1606 | // Return Value: |
1607 | // true - Loop successfully recorded. |
1608 | // false - Compiler has run out of loop descriptors; loop not recorded. |
1609 | // |
1610 | bool RecordLoop() |
1611 | { |
1612 | /* At this point we have a compact loop - record it in the loop table |
1613 | * If we found only one exit, record it in the table too |
1614 | * (otherwise an exit = nullptr in the loop table means multiple exits) */ |
1615 | |
1616 | BasicBlock* onlyExit = (exitCount == 1 ? lastExit : nullptr); |
1617 | if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount)) |
1618 | { |
1619 | // Record the BOTTOM block for future reference before returning. |
1620 | assert(bottom->bbNum <= oldBlockMaxNum); |
1621 | BlockSetOps::AddElemD(comp, bottomBlocks, bottom->bbNum); |
1622 | return true; |
1623 | } |
1624 | |
1625 | // Unable to record this loop because the loop descriptor table overflowed. |
1626 | return false; |
1627 | } |
1628 | |
1629 | //------------------------------------------------------------------------ |
1630 | // ChangedFlowGraph: Determine whether loop compaction has modified the flow graph. |
1631 | // |
1632 | // Return Value: |
1633 | // true - The flow graph has been modified; fgUpdateChangedFlowGraph should |
1634 | // be called (which is the caller's responsibility). |
1635 | // false - The flow graph has not been modified by this LoopSearch. |
1636 | // |
1637 | bool ChangedFlowGraph() |
1638 | { |
1639 | return changedFlowGraph; |
1640 | } |
1641 | |
1642 | //------------------------------------------------------------------------ |
1643 | // FindLoop: Search for a loop with the given HEAD block and back-edge. |
1644 | // |
1645 | // Arguments: |
1646 | // head - Block to be the HEAD of any loop identified |
1647 | // top - Block to be the TOP of any loop identified |
1648 | // bottom - Block to be the BOTTOM of any loop identified |
1649 | // |
1650 | // Return Value: |
1651 | // true - Found a valid loop. |
1652 | // false - Did not find a valid loop. |
1653 | // |
1654 | // Notes: |
1655 | // May modify flow graph to make loop compact before returning. |
1656 | // Will set instance fields to track loop's extent and exits if a valid |
1657 | // loop is found, and potentially trash them otherwise. |
1658 | // |
1659 | bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom) |
1660 | { |
1661 | /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM |
1662 | * to TOP (note that this is an abuse of notation since this is not necessarily a back edge |
1663 | * as the definition says, but merely an indication that we have a loop there). |
1664 | * Thus, we have to be very careful and after entry discovery check that it is indeed |
1665 | * the only place we enter the loop (especially for non-reducible flow graphs). |
1666 | */ |
1667 | |
1668 | if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP) |
1669 | { |
1670 | // Edge from BOTTOM to TOP is not a backward edge |
1671 | return false; |
1672 | } |
1673 | |
1674 | if (bottom->bbNum > oldBlockMaxNum) |
1675 | { |
1676 | // Not a true back-edge; bottom is a block added to reconnect fall-through during |
1677 | // loop processing, so its block number does not reflect its position. |
1678 | return false; |
1679 | } |
1680 | |
1681 | if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) || |
1682 | (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) || |
1683 | (bottom->bbJumpKind == BBJ_SWITCH)) |
1684 | { |
1685 | /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop. |
1686 | * BBJ_SWITCH that has a backward jump appears only for labeled break. */ |
1687 | return false; |
1688 | } |
1689 | |
1690 | /* The presence of a "back edge" is an indication that a loop might be present here |
1691 | * |
1692 | * LOOP: |
1693 | * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any |
1694 | * node in the loop to any other node in the loop (wholly within the loop) |
1695 | * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node |
1696 | * in the loop from outside the loop, and that is through the ENTRY |
1697 | */ |
1698 | |
1699 | /* Let's find the loop ENTRY */ |
1700 | BasicBlock* entry = FindEntry(head, top, bottom); |
1701 | |
1702 | if (entry == nullptr) |
1703 | { |
1704 | // For now, we only recognize loops where HEAD has some successor ENTRY in the loop. |
1705 | return false; |
1706 | } |
1707 | |
1708 | // Passed the basic checks; initialize instance state for this back-edge. |
1709 | this->head = head; |
1710 | this->top = top; |
1711 | this->entry = entry; |
1712 | this->bottom = bottom; |
1713 | this->lastExit = nullptr; |
1714 | this->exitCount = 0; |
1715 | |
1716 | // Now we find the "first" block -- the earliest block reachable within the loop. |
1717 | // With our current algorithm, this is always the same as "top". |
1718 | this->first = top; |
1719 | |
1720 | if (!HasSingleEntryCycle()) |
1721 | { |
1722 | // There isn't actually a loop between TOP and BOTTOM |
1723 | return false; |
1724 | } |
1725 | |
1726 | if (!loopBlocks.IsMember(top->bbNum)) |
1727 | { |
1728 | // The "back-edge" we identified isn't actually part of the flow cycle containing ENTRY |
1729 | return false; |
1730 | } |
1731 | |
1732 | // Disqualify loops where the first block of the loop is less nested in EH than |
1733 | // the bottom block. That is, we don't want to handle loops where the back edge |
1734 | // goes from within an EH region to a first block that is outside that same EH |
1735 | // region. Note that we *do* handle loops where the first block is the *first* |
1736 | // block of a more nested EH region (since it is legal to branch to the first |
1737 | // block of an immediately more nested EH region). So, for example, disqualify |
1738 | // this: |
1739 | // |
1740 | // BB02 |
1741 | // ... |
1742 | // try { |
1743 | // ... |
1744 | // BB10 BBJ_COND => BB02 |
1745 | // ... |
1746 | // } |
1747 | // |
1748 | // Here, BB10 is more nested than BB02. |
1749 | |
1750 | if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first)) |
1751 | { |
1752 | JITDUMP("Loop 'first' " FMT_BB " is in an outer EH region compared to loop 'bottom' " FMT_BB ". Rejecting " |
1753 | "loop.\n" , |
1754 | first->bbNum, bottom->bbNum); |
1755 | return false; |
1756 | } |
1757 | |
1758 | #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) |
1759 | // Disqualify loops where the first block of the loop is a finally target. |
1760 | // The main problem is when multiple loops share a 'first' block that is a finally |
1761 | // target and we canonicalize the loops by adding a new loop head. In that case, we |
1762 | // need to update the blocks so the finally target bit is moved to the newly created |
1763 | // block, and removed from the old 'first' block. This is 'hard', so at this point |
1764 | // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator |
1765 | // long-term), it's easier to disallow the loop than to update the flow graph to |
1766 | // support this case. |
1767 | |
1768 | if ((first->bbFlags & BBF_FINALLY_TARGET) != 0) |
1769 | { |
1770 | JITDUMP("Loop 'first' " FMT_BB " is a finally target. Rejecting loop.\n" , first->bbNum); |
1771 | return false; |
1772 | } |
1773 | #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) |
1774 | |
1775 | // Compact the loop (sweep through it and move out any blocks that aren't part of the |
1776 | // flow cycle), and find the exits. |
1777 | if (!MakeCompactAndFindExits()) |
1778 | { |
1779 | // Unable to preserve well-formed loop during compaction. |
1780 | return false; |
1781 | } |
1782 | |
1783 | // We have a valid loop. |
1784 | return true; |
1785 | } |
1786 | |
1787 | private: |
1788 | //------------------------------------------------------------------------ |
1789 | // FindEntry: See if given HEAD flows to valid ENTRY between given TOP and BOTTOM |
1790 | // |
1791 | // Arguments: |
1792 | // head - Block to be the HEAD of any loop identified |
1793 | // top - Block to be the TOP of any loop identified |
1794 | // bottom - Block to be the BOTTOM of any loop identified |
1795 | // |
1796 | // Return Value: |
1797 | // Block to be the ENTRY of any loop identified, or nullptr if no |
1798 | // such entry meeting our criteria can be found. |
1799 | // |
1800 | // Notes: |
1801 | // Returns main entry if one is found, does not check for side-entries. |
1802 | // |
1803 | BasicBlock* FindEntry(BasicBlock* head, BasicBlock* top, BasicBlock* bottom) |
1804 | { |
1805 | if (head->bbJumpKind == BBJ_ALWAYS) |
1806 | { |
1807 | if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum) |
1808 | { |
1809 | /* OK - we enter somewhere within the loop */ |
1810 | |
1811 | /* some useful asserts |
1812 | * Cannot enter at the top - should have being caught by redundant jumps */ |
1813 | |
1814 | assert((head->bbJumpDest != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS)); |
1815 | |
1816 | return head->bbJumpDest; |
1817 | } |
1818 | else |
1819 | { |
1820 | /* special case - don't consider now */ |
1821 | // assert (!"Loop entered in weird way!"); |
1822 | return nullptr; |
1823 | } |
1824 | } |
1825 | // Can we fall through into the loop? |
1826 | else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND) |
1827 | { |
1828 | /* The ENTRY is at the TOP (a do-while loop) */ |
1829 | return top; |
1830 | } |
1831 | else |
1832 | { |
1833 | return nullptr; // head does not flow into the loop bail for now |
1834 | } |
1835 | } |
1836 | |
1837 | //------------------------------------------------------------------------ |
1838 | // HasSingleEntryCycle: Perform a reverse flow walk from ENTRY, visiting |
1839 | // only blocks between TOP and BOTTOM, to determine if such a cycle |
1840 | // exists and if it has a single entry. |
1841 | // |
1842 | // Return Value: |
1843 | // true - Found a single-entry cycle. |
1844 | // false - Did not find a single-entry cycle. |
1845 | // |
1846 | // Notes: |
1847 | // Will mark (in `loopBlocks`) all blocks found to participate in the |
1848 | // cycle. |
1849 | // |
1850 | bool HasSingleEntryCycle() |
1851 | { |
1852 | // Now do a backwards flow walk from entry to see if we have a single-entry loop |
1853 | bool foundCycle = false; |
1854 | |
1855 | // Seed the loop block set and worklist with the entry block. |
1856 | loopBlocks.Reset(entry->bbNum); |
1857 | jitstd::list<BasicBlock*> worklist(comp->getAllocator()); |
1858 | worklist.push_back(entry); |
1859 | |
1860 | while (!worklist.empty()) |
1861 | { |
1862 | BasicBlock* block = worklist.back(); |
1863 | worklist.pop_back(); |
1864 | |
1865 | /* Make sure ENTRY dominates all blocks in the loop |
1866 | * This is necessary to ensure condition 2. above |
1867 | */ |
1868 | if (block->bbNum > oldBlockMaxNum) |
1869 | { |
1870 | // This is a new block we added to connect fall-through, so the |
1871 | // recorded dominator information doesn't cover it. Just continue, |
1872 | // and when we process its unique predecessor we'll abort if ENTRY |
1873 | // doesn't dominate that. |
1874 | } |
1875 | else if (!comp->fgDominate(entry, block)) |
1876 | { |
1877 | return false; |
1878 | } |
1879 | |
1880 | // Add preds to the worklist, checking for side-entries. |
1881 | for (flowList* predIter = block->bbPreds; predIter != nullptr; predIter = predIter->flNext) |
1882 | { |
1883 | BasicBlock* pred = predIter->flBlock; |
1884 | |
1885 | unsigned int testNum = PositionNum(pred); |
1886 | |
1887 | if ((testNum < top->bbNum) || (testNum > bottom->bbNum)) |
1888 | { |
1889 | // Pred is out of loop range |
1890 | if (block == entry) |
1891 | { |
1892 | if (pred == head) |
1893 | { |
1894 | // This is the single entry we expect. |
1895 | continue; |
1896 | } |
1897 | // ENTRY has some pred other than head outside the loop. If ENTRY does not |
1898 | // dominate this pred, we'll consider this a side-entry and skip this loop; |
1899 | // otherwise the loop is still valid and this may be a (flow-wise) back-edge |
1900 | // of an outer loop. For the dominance test, if `pred` is a new block, use |
1901 | // its unique predecessor since the dominator tree has info for that. |
1902 | BasicBlock* effectivePred = (pred->bbNum > oldBlockMaxNum ? pred->bbPrev : pred); |
1903 | if (comp->fgDominate(entry, effectivePred)) |
1904 | { |
1905 | // Outer loop back-edge |
1906 | continue; |
1907 | } |
1908 | } |
1909 | |
1910 | // There are multiple entries to this loop, don't consider it. |
1911 | return false; |
1912 | } |
1913 | |
1914 | bool isFirstVisit; |
1915 | if (pred == entry) |
1916 | { |
1917 | // We have indeed found a cycle in the flow graph. |
1918 | isFirstVisit = !foundCycle; |
1919 | foundCycle = true; |
1920 | assert(loopBlocks.IsMember(pred->bbNum)); |
1921 | } |
1922 | else if (loopBlocks.TestAndInsert(pred->bbNum)) |
1923 | { |
1924 | // Already visited this pred |
1925 | isFirstVisit = false; |
1926 | } |
1927 | else |
1928 | { |
1929 | // Add this pred to the worklist |
1930 | worklist.push_back(pred); |
1931 | isFirstVisit = true; |
1932 | } |
1933 | |
1934 | if (isFirstVisit && (pred->bbNext != nullptr) && (PositionNum(pred->bbNext) == pred->bbNum)) |
1935 | { |
1936 | // We've created a new block immediately after `pred` to |
1937 | // reconnect what was fall-through. Mark it as in-loop also; |
1938 | // it needs to stay with `prev` and if it exits the loop we'd |
1939 | // just need to re-create it if we tried to move it out. |
1940 | loopBlocks.Insert(pred->bbNext->bbNum); |
1941 | } |
1942 | } |
1943 | } |
1944 | |
1945 | return foundCycle; |
1946 | } |
1947 | |
1948 | //------------------------------------------------------------------------ |
1949 | // PositionNum: Get the number identifying a block's position per the |
1950 | // lexical ordering that existed before searching for (and compacting) |
1951 | // loops. |
1952 | // |
1953 | // Arguments: |
1954 | // block - Block whose position is desired. |
1955 | // |
1956 | // Return Value: |
1957 | // A number indicating that block's position relative to others. |
1958 | // |
1959 | // Notes: |
1960 | // When the given block is a new one created during loop compaction, |
1961 | // the number of its unique predecessor is returned. |
1962 | // |
1963 | unsigned int PositionNum(BasicBlock* block) |
1964 | { |
1965 | if (block->bbNum > oldBlockMaxNum) |
1966 | { |
1967 | // This must be a block we inserted to connect fall-through after moving blocks. |
1968 | // To determine if it's in the loop or not, use the number of its unique predecessor |
1969 | // block. |
1970 | assert(block->bbPreds->flBlock == block->bbPrev); |
1971 | assert(block->bbPreds->flNext == nullptr); |
1972 | return block->bbPrev->bbNum; |
1973 | } |
1974 | return block->bbNum; |
1975 | } |
1976 | |
1977 | //------------------------------------------------------------------------ |
1978 | // MakeCompactAndFindExits: Compact the loop (sweep through it and move out |
1979 | // any blocks that aren't part of the flow cycle), and find the exits (set |
1980 | // lastExit and exitCount). |
1981 | // |
1982 | // Return Value: |
1983 | // true - Loop successfully compacted (or `loopBlocks` expanded to |
1984 | // include all blocks in the lexical range), exits enumerated. |
1985 | // false - Loop cannot be made compact and remain well-formed. |
1986 | // |
1987 | bool MakeCompactAndFindExits() |
1988 | { |
1989 | // Compaction (if it needs to happen) will require an insertion point. |
1990 | BasicBlock* moveAfter = nullptr; |
1991 | |
1992 | for (BasicBlock* previous = top->bbPrev; previous != bottom;) |
1993 | { |
1994 | BasicBlock* block = previous->bbNext; |
1995 | |
1996 | if (loopBlocks.IsMember(block->bbNum)) |
1997 | { |
1998 | // This block is a member of the loop. Check to see if it may exit the loop. |
1999 | CheckForExit(block); |
2000 | |
2001 | // Done processing this block; move on to the next. |
2002 | previous = block; |
2003 | continue; |
2004 | } |
2005 | |
2006 | // This blocks is lexically between TOP and BOTTOM, but it does not |
2007 | // participate in the flow cycle. Check for a run of consecutive |
2008 | // such blocks. |
2009 | BasicBlock* lastNonLoopBlock = block; |
2010 | BasicBlock* nextLoopBlock = block->bbNext; |
2011 | while (!loopBlocks.IsMember(nextLoopBlock->bbNum)) |
2012 | { |
2013 | lastNonLoopBlock = nextLoopBlock; |
2014 | nextLoopBlock = nextLoopBlock->bbNext; |
2015 | // This loop must terminate because we know BOTTOM is in loopBlocks. |
2016 | } |
2017 | |
2018 | // Choose an insertion point for non-loop blocks if we haven't yet done so. |
2019 | if (moveAfter == nullptr) |
2020 | { |
2021 | moveAfter = FindInsertionPoint(); |
2022 | } |
2023 | |
2024 | if (!BasicBlock::sameEHRegion(previous, nextLoopBlock) || !BasicBlock::sameEHRegion(previous, moveAfter)) |
2025 | { |
2026 | // EH regions would be ill-formed if we moved these blocks out. |
2027 | // See if we can consider them loop blocks without introducing |
2028 | // a side-entry. |
2029 | if (CanTreatAsLoopBlocks(block, lastNonLoopBlock)) |
2030 | { |
2031 | // The call to `canTreatAsLoop` marked these blocks as part of the loop; |
2032 | // iterate without updating `previous` so that we'll analyze them as part |
2033 | // of the loop. |
2034 | continue; |
2035 | } |
2036 | else |
2037 | { |
2038 | // We can't move these out of the loop or leave them in, so just give |
2039 | // up on this loop. |
2040 | return false; |
2041 | } |
2042 | } |
2043 | |
2044 | // Now physically move the blocks. |
2045 | BasicBlock* moveBefore = moveAfter->bbNext; |
2046 | |
2047 | comp->fgUnlinkRange(block, lastNonLoopBlock); |
2048 | comp->fgMoveBlocksAfter(block, lastNonLoopBlock, moveAfter); |
2049 | comp->ehUpdateLastBlocks(moveAfter, lastNonLoopBlock); |
2050 | |
2051 | // Apply any adjustments needed for fallthrough at the boundaries of the moved region. |
2052 | FixupFallThrough(moveAfter, moveBefore, block); |
2053 | FixupFallThrough(lastNonLoopBlock, nextLoopBlock, moveBefore); |
2054 | // Also apply any adjustments needed where the blocks were snipped out of the loop. |
2055 | BasicBlock* newBlock = FixupFallThrough(previous, block, nextLoopBlock); |
2056 | if (newBlock != nullptr) |
2057 | { |
2058 | // This new block is in the loop and is a loop exit. |
2059 | loopBlocks.Insert(newBlock->bbNum); |
2060 | lastExit = newBlock; |
2061 | ++exitCount; |
2062 | } |
2063 | |
2064 | // Update moveAfter for the next insertion. |
2065 | moveAfter = lastNonLoopBlock; |
2066 | |
2067 | // Note that we've changed the flow graph, and continue without updating |
2068 | // `previous` so that we'll process nextLoopBlock. |
2069 | changedFlowGraph = true; |
2070 | } |
2071 | |
2072 | if ((exitCount == 1) && (lastExit == nullptr)) |
2073 | { |
2074 | // If we happen to have a loop with two exits, one of which goes to an |
2075 | // infinite loop that's lexically nested inside it, where the inner loop |
2076 | // can't be moved out, we can end up in this situation (because |
2077 | // CanTreatAsLoopBlocks will have decremented the count expecting to find |
2078 | // another exit later). Bump the exit count to 2, since downstream code |
2079 | // will not be prepared for null lastExit with exitCount of 1. |
2080 | assert(forgotExit); |
2081 | exitCount = 2; |
2082 | } |
2083 | |
2084 | // Loop compaction was successful |
2085 | return true; |
2086 | } |
2087 | |
2088 | //------------------------------------------------------------------------ |
2089 | // FindInsertionPoint: Find an appropriate spot to which blocks that are |
2090 | // lexically between TOP and BOTTOM but not part of the flow cycle |
2091 | // can be moved. |
2092 | // |
2093 | // Return Value: |
2094 | // Block after which to insert moved blocks. |
2095 | // |
2096 | BasicBlock* FindInsertionPoint() |
2097 | { |
2098 | // Find an insertion point for blocks we're going to move. Move them down |
2099 | // out of the loop, and if possible find a spot that won't break up fall-through. |
2100 | BasicBlock* moveAfter = bottom; |
2101 | while (moveAfter->bbFallsThrough()) |
2102 | { |
2103 | // Keep looking for a better insertion point if we can. |
2104 | BasicBlock* newMoveAfter = TryAdvanceInsertionPoint(moveAfter); |
2105 | |
2106 | if (newMoveAfter == nullptr) |
2107 | { |
2108 | // Ran out of candidate insertion points, so just split up the fall-through. |
2109 | return moveAfter; |
2110 | } |
2111 | |
2112 | moveAfter = newMoveAfter; |
2113 | } |
2114 | |
2115 | return moveAfter; |
2116 | } |
2117 | |
2118 | //------------------------------------------------------------------------ |
2119 | // TryAdvanceInsertionPoint: Find the next legal insertion point after |
2120 | // the given one, if one exists. |
2121 | // |
2122 | // Arguments: |
2123 | // oldMoveAfter - Prior insertion point; find the next after this. |
2124 | // |
2125 | // Return Value: |
2126 | // The next block after `oldMoveAfter` that is a legal insertion point |
2127 | // (i.e. blocks being swept out of the loop can be moved immediately |
2128 | // after it), if one exists, else nullptr. |
2129 | // |
2130 | BasicBlock* TryAdvanceInsertionPoint(BasicBlock* oldMoveAfter) |
2131 | { |
2132 | BasicBlock* newMoveAfter = oldMoveAfter->bbNext; |
2133 | |
2134 | if (!BasicBlock::sameEHRegion(oldMoveAfter, newMoveAfter)) |
2135 | { |
2136 | // Don't cross an EH region boundary. |
2137 | return nullptr; |
2138 | } |
2139 | |
2140 | if ((newMoveAfter->bbJumpKind == BBJ_ALWAYS) || (newMoveAfter->bbJumpKind == BBJ_COND)) |
2141 | { |
2142 | unsigned int destNum = newMoveAfter->bbJumpDest->bbNum; |
2143 | if ((destNum >= top->bbNum) && (destNum <= bottom->bbNum) && !loopBlocks.IsMember(destNum)) |
2144 | { |
2145 | // Reversing this branch out of block `newMoveAfter` could confuse this algorithm |
2146 | // (in particular, the edge would still be numerically backwards but no longer be |
2147 | // lexically backwards, so a lexical forward walk from TOP would not find BOTTOM), |
2148 | // so don't do that. |
2149 | // We're checking for BBJ_ALWAYS and BBJ_COND only here -- we don't need to |
2150 | // check for BBJ_SWITCH because we'd never consider it a loop back-edge. |
2151 | return nullptr; |
2152 | } |
2153 | } |
2154 | |
2155 | // Similarly check to see if advancing to `newMoveAfter` would reverse the lexical order |
2156 | // of an edge from the run of blocks being moved to `newMoveAfter` -- doing so would |
2157 | // introduce a new lexical back-edge, which could (maybe?) confuse the loop search |
2158 | // algorithm, and isn't desirable layout anyway. |
2159 | for (flowList* predIter = newMoveAfter->bbPreds; predIter != nullptr; predIter = predIter->flNext) |
2160 | { |
2161 | unsigned int predNum = predIter->flBlock->bbNum; |
2162 | |
2163 | if ((predNum >= top->bbNum) && (predNum <= bottom->bbNum) && !loopBlocks.IsMember(predNum)) |
2164 | { |
2165 | // Don't make this forward edge a backwards edge. |
2166 | return nullptr; |
2167 | } |
2168 | } |
2169 | |
2170 | if (IsRecordedBottom(newMoveAfter)) |
2171 | { |
2172 | // This is the BOTTOM of another loop; don't move any blocks past it, to avoid moving them |
2173 | // out of that loop (we should have already done so when processing that loop if it were legal). |
2174 | return nullptr; |
2175 | } |
2176 | |
2177 | // Advancing the insertion point is ok, except that we can't split up any CallFinally/BBJ_ALWAYS |
2178 | // pair, so if we've got such a pair recurse to see if we can move past the whole thing. |
2179 | return (newMoveAfter->isBBCallAlwaysPair() ? TryAdvanceInsertionPoint(newMoveAfter) : newMoveAfter); |
2180 | } |
2181 | |
2182 | //------------------------------------------------------------------------ |
2183 | // isOuterBottom: Determine if the given block is the BOTTOM of a previously |
2184 | // recorded loop. |
2185 | // |
2186 | // Arguments: |
2187 | // block - Block to check for BOTTOM-ness. |
2188 | // |
2189 | // Return Value: |
2190 | // true - The blocks was recorded as `bottom` of some earlier-processed loop. |
2191 | // false - No loops yet recorded have this block as their `bottom`. |
2192 | // |
2193 | bool IsRecordedBottom(BasicBlock* block) |
2194 | { |
2195 | if (block->bbNum > oldBlockMaxNum) |
2196 | { |
2197 | // This is a new block, which can't be an outer bottom block because we only allow old blocks |
2198 | // as BOTTOM. |
2199 | return false; |
2200 | } |
2201 | return BlockSetOps::IsMember(comp, bottomBlocks, block->bbNum); |
2202 | } |
2203 | |
2204 | //------------------------------------------------------------------------ |
2205 | // CanTreatAsLoopBlocks: If the given range of blocks can be treated as |
2206 | // loop blocks, add them to loopBlockSet and return true. Otherwise, |
2207 | // return false. |
2208 | // |
2209 | // Arguments: |
2210 | // firstNonLoopBlock - First block in the run to be subsumed. |
2211 | // lastNonLoopBlock - Last block in the run to be subsumed. |
2212 | // |
2213 | // Return Value: |
2214 | // true - The blocks from `fistNonLoopBlock` to `lastNonLoopBlock` were |
2215 | // successfully added to `loopBlocks`. |
2216 | // false - Treating the blocks from `fistNonLoopBlock` to `lastNonLoopBlock` |
2217 | // would not be legal (it would induce a side-entry). |
2218 | // |
2219 | // Notes: |
2220 | // `loopBlocks` may be modified even if `false` is returned. |
2221 | // `exitCount` and `lastExit` may be modified if this process identifies |
2222 | // in-loop edges that were previously counted as exits. |
2223 | // |
2224 | bool CanTreatAsLoopBlocks(BasicBlock* firstNonLoopBlock, BasicBlock* lastNonLoopBlock) |
2225 | { |
2226 | BasicBlock* nextLoopBlock = lastNonLoopBlock->bbNext; |
2227 | for (BasicBlock* testBlock = firstNonLoopBlock; testBlock != nextLoopBlock; testBlock = testBlock->bbNext) |
2228 | { |
2229 | for (flowList* predIter = testBlock->bbPreds; predIter != nullptr; predIter = predIter->flNext) |
2230 | { |
2231 | BasicBlock* testPred = predIter->flBlock; |
2232 | unsigned int predPosNum = PositionNum(testPred); |
2233 | unsigned int firstNonLoopPosNum = PositionNum(firstNonLoopBlock); |
2234 | unsigned int lastNonLoopPosNum = PositionNum(lastNonLoopBlock); |
2235 | |
2236 | if (loopBlocks.IsMember(predPosNum) || |
2237 | ((predPosNum >= firstNonLoopPosNum) && (predPosNum <= lastNonLoopPosNum))) |
2238 | { |
2239 | // This pred is in the loop (or what will be the loop if we determine this |
2240 | // run of exit blocks doesn't include a side-entry). |
2241 | |
2242 | if (predPosNum < firstNonLoopPosNum) |
2243 | { |
2244 | // We've already counted this block as an exit, so decrement the count. |
2245 | --exitCount; |
2246 | if (lastExit == testPred) |
2247 | { |
2248 | // Erase this now-bogus `lastExit` entry. |
2249 | lastExit = nullptr; |
2250 | INDEBUG(forgotExit = true); |
2251 | } |
2252 | } |
2253 | } |
2254 | else |
2255 | { |
2256 | // This pred is not in the loop, so this constitutes a side-entry. |
2257 | return false; |
2258 | } |
2259 | } |
2260 | |
2261 | // Either we're going to abort the loop on a subsequent testBlock, or this |
2262 | // testBlock is part of the loop. |
2263 | loopBlocks.Insert(testBlock->bbNum); |
2264 | } |
2265 | |
2266 | // All blocks were ok to leave in the loop. |
2267 | return true; |
2268 | } |
2269 | |
2270 | //------------------------------------------------------------------------ |
2271 | // FixupFallThrough: Re-establish any broken control flow connectivity |
2272 | // and eliminate any "goto-next"s that were created by changing the |
2273 | // given block's lexical follower. |
2274 | // |
2275 | // Arguments: |
2276 | // block - Block whose `bbNext` has changed. |
2277 | // oldNext - Previous value of `block->bbNext`. |
2278 | // newNext - New value of `block->bbNext`. |
2279 | // |
2280 | // Return Value: |
2281 | // If a new block is created to reconnect flow, the new block is |
2282 | // returned; otherwise, nullptr. |
2283 | // |
2284 | BasicBlock* FixupFallThrough(BasicBlock* block, BasicBlock* oldNext, BasicBlock* newNext) |
2285 | { |
2286 | // If we create a new block, that will be our return value. |
2287 | BasicBlock* newBlock = nullptr; |
2288 | |
2289 | if (block->bbFallsThrough()) |
2290 | { |
2291 | // Need to reconnect the flow from `block` to `oldNext`. |
2292 | |
2293 | if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext)) |
2294 | { |
2295 | /* Reverse the jump condition */ |
2296 | GenTree* test = block->lastNode(); |
2297 | noway_assert(test->OperIsConditionalJump()); |
2298 | |
2299 | if (test->OperGet() == GT_JTRUE) |
2300 | { |
2301 | GenTree* cond = comp->gtReverseCond(test->gtOp.gtOp1); |
2302 | assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node. |
2303 | test->gtOp.gtOp1 = cond; |
2304 | } |
2305 | else |
2306 | { |
2307 | comp->gtReverseCond(test); |
2308 | } |
2309 | |
2310 | // Redirect the Conditional JUMP to go to `oldNext` |
2311 | block->bbJumpDest = oldNext; |
2312 | } |
2313 | else |
2314 | { |
2315 | // Insert an unconditional jump to `oldNext` just after `block`. |
2316 | newBlock = comp->fgConnectFallThrough(block, oldNext); |
2317 | noway_assert((newBlock == nullptr) || loopBlocks.CanRepresent(newBlock->bbNum)); |
2318 | } |
2319 | } |
2320 | else if ((block->bbJumpKind == BBJ_ALWAYS) && (block->bbJumpDest == newNext)) |
2321 | { |
2322 | // We've made `block`'s jump target its bbNext, so remove the jump. |
2323 | if (!comp->fgOptimizeBranchToNext(block, newNext, block->bbPrev)) |
2324 | { |
2325 | // If optimizing away the goto-next failed for some reason, mark it KEEP_BBJ_ALWAYS to |
2326 | // prevent assertions from complaining about it. |
2327 | block->bbFlags |= BBF_KEEP_BBJ_ALWAYS; |
2328 | } |
2329 | } |
2330 | |
2331 | // Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS. |
2332 | assert((block->bbJumpKind != BBJ_COND) || (block->bbJumpKind != BBJ_ALWAYS) || (block->bbJumpDest != newNext) || |
2333 | ((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0)); |
2334 | return newBlock; |
2335 | } |
2336 | |
2337 | //------------------------------------------------------------------------ |
2338 | // CheckForExit: Check if the given block has any successor edges that are |
2339 | // loop exits, and update `lastExit` and `exitCount` if so. |
2340 | // |
2341 | // Arguments: |
2342 | // block - Block whose successor edges are to be checked. |
2343 | // |
2344 | // Notes: |
2345 | // If one block has multiple exiting successor edges, those are counted |
2346 | // as multiple exits in `exitCount`. |
2347 | // |
2348 | void CheckForExit(BasicBlock* block) |
2349 | { |
2350 | BasicBlock* exitPoint; |
2351 | |
2352 | switch (block->bbJumpKind) |
2353 | { |
2354 | case BBJ_COND: |
2355 | case BBJ_CALLFINALLY: |
2356 | case BBJ_ALWAYS: |
2357 | case BBJ_EHCATCHRET: |
2358 | assert(block->bbJumpDest); |
2359 | exitPoint = block->bbJumpDest; |
2360 | |
2361 | if (!loopBlocks.IsMember(exitPoint->bbNum)) |
2362 | { |
2363 | /* exit from a block other than BOTTOM */ |
2364 | lastExit = block; |
2365 | exitCount++; |
2366 | } |
2367 | break; |
2368 | |
2369 | case BBJ_NONE: |
2370 | break; |
2371 | |
2372 | case BBJ_EHFINALLYRET: |
2373 | case BBJ_EHFILTERRET: |
2374 | /* The "try" associated with this "finally" must be in the |
2375 | * same loop, so the finally block will return control inside the loop */ |
2376 | break; |
2377 | |
2378 | case BBJ_THROW: |
2379 | case BBJ_RETURN: |
2380 | /* those are exits from the loop */ |
2381 | lastExit = block; |
2382 | exitCount++; |
2383 | break; |
2384 | |
2385 | case BBJ_SWITCH: |
2386 | |
2387 | unsigned jumpCnt; |
2388 | jumpCnt = block->bbJumpSwt->bbsCount; |
2389 | BasicBlock** jumpTab; |
2390 | jumpTab = block->bbJumpSwt->bbsDstTab; |
2391 | |
2392 | do |
2393 | { |
2394 | noway_assert(*jumpTab); |
2395 | exitPoint = *jumpTab; |
2396 | |
2397 | if (!loopBlocks.IsMember(exitPoint->bbNum)) |
2398 | { |
2399 | lastExit = block; |
2400 | exitCount++; |
2401 | } |
2402 | } while (++jumpTab, --jumpCnt); |
2403 | break; |
2404 | |
2405 | default: |
2406 | noway_assert(!"Unexpected bbJumpKind" ); |
2407 | break; |
2408 | } |
2409 | |
2410 | if (block->bbFallsThrough() && !loopBlocks.IsMember(block->bbNext->bbNum)) |
2411 | { |
2412 | // Found a fall-through exit. |
2413 | lastExit = block; |
2414 | exitCount++; |
2415 | } |
2416 | } |
2417 | }; |
2418 | } |
2419 | |
2420 | /***************************************************************************** |
2421 | * Find the natural loops, using dominators. Note that the test for |
2422 | * a loop is slightly different from the standard one, because we have |
2423 | * not done a depth first reordering of the basic blocks. |
2424 | */ |
2425 | |
2426 | void Compiler::optFindNaturalLoops() |
2427 | { |
2428 | #ifdef DEBUG |
2429 | if (verbose) |
2430 | { |
2431 | printf("*************** In optFindNaturalLoops()\n" ); |
2432 | } |
2433 | #endif // DEBUG |
2434 | |
2435 | noway_assert(fgDomsComputed); |
2436 | assert(fgHasLoops); |
2437 | |
2438 | #if COUNT_LOOPS |
2439 | hasMethodLoops = false; |
2440 | loopsThisMethod = 0; |
2441 | loopOverflowThisMethod = false; |
2442 | #endif |
2443 | |
2444 | LoopSearch search(this); |
2445 | |
2446 | for (BasicBlock* head = fgFirstBB; head->bbNext; head = head->bbNext) |
2447 | { |
2448 | BasicBlock* top = head->bbNext; |
2449 | |
2450 | // Blocks that are rarely run have a zero bbWeight and should |
2451 | // never be optimized here |
2452 | |
2453 | if (top->bbWeight == BB_ZERO_WEIGHT) |
2454 | { |
2455 | continue; |
2456 | } |
2457 | |
2458 | for (flowList* pred = top->bbPreds; pred; pred = pred->flNext) |
2459 | { |
2460 | if (search.FindLoop(head, top, pred->flBlock)) |
2461 | { |
2462 | // Found a loop; record it and see if we've hit the limit. |
2463 | bool recordedLoop = search.RecordLoop(); |
2464 | |
2465 | (void)recordedLoop; // avoid unusued variable warnings in COUNT_LOOPS and !DEBUG |
2466 | |
2467 | #if COUNT_LOOPS |
2468 | if (!hasMethodLoops) |
2469 | { |
2470 | /* mark the method as containing natural loops */ |
2471 | totalLoopMethods++; |
2472 | hasMethodLoops = true; |
2473 | } |
2474 | |
2475 | /* increment total number of loops found */ |
2476 | totalLoopCount++; |
2477 | loopsThisMethod++; |
2478 | |
2479 | /* keep track of the number of exits */ |
2480 | loopExitCountTable.record(static_cast<unsigned>(exitCount)); |
2481 | #else // COUNT_LOOPS |
2482 | assert(recordedLoop); |
2483 | if (optLoopCount == MAX_LOOP_NUM) |
2484 | { |
2485 | // We won't be able to record any more loops, so stop looking. |
2486 | goto NO_MORE_LOOPS; |
2487 | } |
2488 | #endif // COUNT_LOOPS |
2489 | |
2490 | // Continue searching preds of `top` to see if any other are |
2491 | // back-edges (this can happen for nested loops). The iteration |
2492 | // is safe because the compaction we do only modifies predecessor |
2493 | // lists of blocks that gain or lose fall-through from their |
2494 | // `bbPrev`, but since the motion is from within the loop to below |
2495 | // it, we know we're not altering the relationship between `top` |
2496 | // and its `bbPrev`. |
2497 | } |
2498 | } |
2499 | } |
2500 | NO_MORE_LOOPS: |
2501 | |
2502 | #if COUNT_LOOPS |
2503 | loopCountTable.record(loopsThisMethod); |
2504 | if (maxLoopsPerMethod < loopsThisMethod) |
2505 | { |
2506 | maxLoopsPerMethod = loopsThisMethod; |
2507 | } |
2508 | if (loopOverflowThisMethod) |
2509 | { |
2510 | totalLoopOverflows++; |
2511 | } |
2512 | #endif // COUNT_LOOPS |
2513 | |
2514 | bool mod = search.ChangedFlowGraph(); |
2515 | |
2516 | if (mod) |
2517 | { |
2518 | // Need to renumber blocks now since loop canonicalization |
2519 | // depends on it; can defer the rest of fgUpdateChangedFlowGraph() |
2520 | // until after canonicalizing loops. Dominator information is |
2521 | // recorded in terms of block numbers, so flag it invalid. |
2522 | fgDomsComputed = false; |
2523 | fgRenumberBlocks(); |
2524 | } |
2525 | |
2526 | // Now the loop indices are stable. We can figure out parent/child relationships |
2527 | // (using table indices to name loops), and label blocks. |
2528 | for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++) |
2529 | { |
2530 | for (unsigned char possibleParent = loopInd; possibleParent > 0;) |
2531 | { |
2532 | possibleParent--; |
2533 | if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd])) |
2534 | { |
2535 | optLoopTable[loopInd].lpParent = possibleParent; |
2536 | optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild; |
2537 | optLoopTable[possibleParent].lpChild = loopInd; |
2538 | break; |
2539 | } |
2540 | } |
2541 | } |
2542 | |
2543 | // Now label the blocks with the innermost loop to which they belong. Since parents |
2544 | // precede children in the table, doing the labeling for each loop in order will achieve |
2545 | // this -- the innermost loop labeling will be done last. |
2546 | for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) |
2547 | { |
2548 | BasicBlock* first = optLoopTable[loopInd].lpFirst; |
2549 | BasicBlock* bottom = optLoopTable[loopInd].lpBottom; |
2550 | for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext) |
2551 | { |
2552 | blk->bbNatLoopNum = loopInd; |
2553 | if (blk == bottom) |
2554 | { |
2555 | break; |
2556 | } |
2557 | assert(blk->bbNext != nullptr); // We should never reach nullptr. |
2558 | } |
2559 | } |
2560 | |
2561 | // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop" |
2562 | // one, if necessary, for loops containing others that share a "top." |
2563 | for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) |
2564 | { |
2565 | // Traverse the outermost loops as entries into the loop nest; so skip non-outermost. |
2566 | if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP) |
2567 | { |
2568 | continue; |
2569 | } |
2570 | |
2571 | // Otherwise... |
2572 | if (optCanonicalizeLoopNest(loopInd)) |
2573 | { |
2574 | mod = true; |
2575 | } |
2576 | } |
2577 | if (mod) |
2578 | { |
2579 | fgUpdateChangedFlowGraph(); |
2580 | } |
2581 | |
2582 | #ifdef DEBUG |
2583 | if (verbose && optLoopCount > 0) |
2584 | { |
2585 | printf("\nFinal natural loop table:\n" ); |
2586 | for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++) |
2587 | { |
2588 | optPrintLoopInfo(loopInd); |
2589 | printf("\n" ); |
2590 | } |
2591 | } |
2592 | #endif // DEBUG |
2593 | } |
2594 | |
2595 | void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap) |
2596 | { |
2597 | BasicBlock* newJumpDest = nullptr; |
2598 | switch (blk->bbJumpKind) |
2599 | { |
2600 | case BBJ_THROW: |
2601 | case BBJ_RETURN: |
2602 | case BBJ_NONE: |
2603 | case BBJ_EHFILTERRET: |
2604 | case BBJ_EHFINALLYRET: |
2605 | case BBJ_EHCATCHRET: |
2606 | // These have no jump destination to update. |
2607 | break; |
2608 | |
2609 | case BBJ_ALWAYS: |
2610 | case BBJ_LEAVE: |
2611 | case BBJ_CALLFINALLY: |
2612 | case BBJ_COND: |
2613 | // All of these have a single jump destination to update. |
2614 | if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest)) |
2615 | { |
2616 | blk->bbJumpDest = newJumpDest; |
2617 | } |
2618 | break; |
2619 | |
2620 | case BBJ_SWITCH: |
2621 | { |
2622 | bool redirected = false; |
2623 | for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++) |
2624 | { |
2625 | if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest)) |
2626 | { |
2627 | blk->bbJumpSwt->bbsDstTab[i] = newJumpDest; |
2628 | redirected = true; |
2629 | } |
2630 | } |
2631 | // If any redirections happend, invalidate the switch table map for the switch. |
2632 | if (redirected) |
2633 | { |
2634 | // Don't create a new map just to try to remove an entry. |
2635 | BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/* createIfNull */ false); |
2636 | if (switchMap != nullptr) |
2637 | { |
2638 | switchMap->Remove(blk); |
2639 | } |
2640 | } |
2641 | } |
2642 | break; |
2643 | |
2644 | default: |
2645 | unreached(); |
2646 | } |
2647 | } |
2648 | |
2649 | // TODO-Cleanup: This should be a static member of the BasicBlock class. |
2650 | void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to) |
2651 | { |
2652 | assert(from->bbJumpKind == to->bbJumpKind); // Precondition. |
2653 | |
2654 | // copy the jump destination(s) from "from" to "to". |
2655 | switch (to->bbJumpKind) |
2656 | { |
2657 | case BBJ_ALWAYS: |
2658 | case BBJ_LEAVE: |
2659 | case BBJ_CALLFINALLY: |
2660 | case BBJ_COND: |
2661 | // All of these have a single jump destination to update. |
2662 | to->bbJumpDest = from->bbJumpDest; |
2663 | break; |
2664 | |
2665 | case BBJ_SWITCH: |
2666 | { |
2667 | to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc(); |
2668 | to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount; |
2669 | to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount]; |
2670 | |
2671 | for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++) |
2672 | { |
2673 | to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i]; |
2674 | } |
2675 | } |
2676 | break; |
2677 | |
2678 | default: |
2679 | break; |
2680 | } |
2681 | } |
2682 | |
2683 | // Canonicalize the loop nest rooted at parent loop 'loopInd'. |
2684 | // Returns 'true' if the flow graph is modified. |
2685 | bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd) |
2686 | { |
2687 | bool modified = false; |
2688 | |
2689 | // Is the top of the current loop not in any nested loop? |
2690 | if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd) |
2691 | { |
2692 | if (optCanonicalizeLoop(loopInd)) |
2693 | { |
2694 | modified = true; |
2695 | } |
2696 | } |
2697 | |
2698 | for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP; |
2699 | child = optLoopTable[child].lpSibling) |
2700 | { |
2701 | if (optCanonicalizeLoopNest(child)) |
2702 | { |
2703 | modified = true; |
2704 | } |
2705 | } |
2706 | |
2707 | return modified; |
2708 | } |
2709 | |
2710 | bool Compiler::optCanonicalizeLoop(unsigned char loopInd) |
2711 | { |
2712 | // Is the top uniquely part of the current loop? |
2713 | BasicBlock* t = optLoopTable[loopInd].lpTop; |
2714 | |
2715 | if (t->bbNatLoopNum == loopInd) |
2716 | { |
2717 | return false; |
2718 | } |
2719 | |
2720 | JITDUMP("in optCanonicalizeLoop: L%02u has top " FMT_BB " (bottom " FMT_BB |
2721 | ") with natural loop number L%02u: need to " |
2722 | "canonicalize\n" , |
2723 | loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum); |
2724 | |
2725 | // Otherwise, the top of this loop is also part of a nested loop. |
2726 | // |
2727 | // Insert a new unique top for this loop. We must be careful to put this new |
2728 | // block in the correct EH region. Note that f->bbPrev might be in a different |
2729 | // EH region. For example: |
2730 | // |
2731 | // try { |
2732 | // ... |
2733 | // BB07 |
2734 | // } |
2735 | // BB08 // "first" |
2736 | // |
2737 | // In this case, first->bbPrev is BB07, which is in a different 'try' region. |
2738 | // On the other hand, the first block of multiple loops might be the first |
2739 | // block of a 'try' region that is completely contained in the multiple loops. |
2740 | // for example: |
2741 | // |
2742 | // BB08 try { } |
2743 | // ... |
2744 | // BB10 BBJ_ALWAYS => BB08 |
2745 | // ... |
2746 | // BB12 BBJ_ALWAYS => BB08 |
2747 | // |
2748 | // Here, we have two loops, both with BB08 as the "first" block. Block BB08 |
2749 | // is a single-block "try" region. Neither loop "bottom" block is in the same |
2750 | // "try" region as BB08. This is legal because you can jump to the first block |
2751 | // of a try region. With EH normalization, no two "try" regions will share |
2752 | // this block. In this case, we need to insert a new block for the outer loop |
2753 | // in the same EH region as the branch from the "bottom": |
2754 | // |
2755 | // BB30 BBJ_NONE |
2756 | // BB08 try { } |
2757 | // ... |
2758 | // BB10 BBJ_ALWAYS => BB08 |
2759 | // ... |
2760 | // BB12 BBJ_ALWAYS => BB30 |
2761 | // |
2762 | // Another possibility is that the "first" block of the loop nest can be the first block |
2763 | // of a "try" region that also has other predecessors than those in the loop, or even in |
2764 | // the "try" region (since blocks can target the first block of a "try" region). For example: |
2765 | // |
2766 | // BB08 try { |
2767 | // ... |
2768 | // BB10 BBJ_ALWAYS => BB08 |
2769 | // ... |
2770 | // BB12 BBJ_ALWAYS => BB08 |
2771 | // BB13 } |
2772 | // ... |
2773 | // BB20 BBJ_ALWAYS => BB08 |
2774 | // ... |
2775 | // BB25 BBJ_ALWAYS => BB08 |
2776 | // |
2777 | // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop |
2778 | // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the |
2779 | // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within |
2780 | // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the |
2781 | // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches |
2782 | // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal |
2783 | // situation of branching to a non-first block of a 'try' region. |
2784 | // |
2785 | // We can also have a loop nest where the "first" block is outside of a "try" region |
2786 | // and the back edges are inside a "try" region, for example: |
2787 | // |
2788 | // BB02 // "first" |
2789 | // ... |
2790 | // BB09 try { BBJ_COND => BB02 |
2791 | // ... |
2792 | // BB15 BBJ_COND => BB02 |
2793 | // ... |
2794 | // BB21 } // end of "try" |
2795 | // |
2796 | // In this case, both loop back edges were formed by "leave" instructions that were |
2797 | // imported into branches that were later made conditional. In this case, we don't |
2798 | // want to copy the EH region of the back edge, since that would create a block |
2799 | // outside of and disjoint with the "try" region of the back edge. However, to |
2800 | // simplify things, we disqualify this type of loop, so we should never see this here. |
2801 | |
2802 | BasicBlock* h = optLoopTable[loopInd].lpHead; |
2803 | BasicBlock* f = optLoopTable[loopInd].lpFirst; |
2804 | BasicBlock* b = optLoopTable[loopInd].lpBottom; |
2805 | |
2806 | // The loop must be entirely contained within a single handler region. |
2807 | assert(BasicBlock::sameHndRegion(f, b)); |
2808 | |
2809 | // If the bottom block is in the same "try" region, then we extend the EH |
2810 | // region. Otherwise, we add the new block outside the "try" region. |
2811 | bool extendRegion = BasicBlock::sameTryRegion(f, b); |
2812 | BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion); |
2813 | if (!extendRegion) |
2814 | { |
2815 | // We need to set the EH region manually. Set it to be the same |
2816 | // as the bottom block. |
2817 | newT->copyEHRegion(b); |
2818 | } |
2819 | |
2820 | // The new block can reach the same set of blocks as the old one, but don't try to reflect |
2821 | // that in its reachability set here -- creating the new block may have changed the BlockSet |
2822 | // representation from short to long, and canonicalizing loops is immediately followed by |
2823 | // a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway. |
2824 | |
2825 | // Redirect the "bottom" of the current loop to "newT". |
2826 | BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist()); |
2827 | blockMap->Set(t, newT); |
2828 | optRedirectBlock(b, blockMap); |
2829 | |
2830 | // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue |
2831 | // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated |
2832 | // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However, |
2833 | // they might have been prevented from participating in the loop nest due to different EH nesting, or some |
2834 | // other reason. |
2835 | // |
2836 | // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed |
2837 | // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times. |
2838 | // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source |
2839 | // edge of the blockMap, so nothing will happen. |
2840 | bool firstPred = true; |
2841 | for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext) |
2842 | { |
2843 | BasicBlock* topPredBlock = topPred->flBlock; |
2844 | |
2845 | // Skip if topPredBlock is in the loop. |
2846 | // Note that this uses block number to detect membership in the loop. We are adding blocks during |
2847 | // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work |
2848 | // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists. |
2849 | if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum) |
2850 | { |
2851 | JITDUMP("in optCanonicalizeLoop: 'top' predecessor " FMT_BB " is in the range of L%02u (" FMT_BB ".." FMT_BB |
2852 | "); not " |
2853 | "redirecting its bottom edge\n" , |
2854 | topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum); |
2855 | continue; |
2856 | } |
2857 | |
2858 | JITDUMP("in optCanonicalizeLoop: redirect top predecessor " FMT_BB " to " FMT_BB "\n" , topPredBlock->bbNum, |
2859 | newT->bbNum); |
2860 | optRedirectBlock(topPredBlock, blockMap); |
2861 | |
2862 | // When we have profile data then the 'newT' block will inherit topPredBlock profile weight |
2863 | if (topPredBlock->hasProfileWeight()) |
2864 | { |
2865 | // This corrects an issue when the topPredBlock has a profile based weight |
2866 | // |
2867 | if (firstPred) |
2868 | { |
2869 | JITDUMP("in optCanonicalizeLoop: block " FMT_BB " will inheritWeight from " FMT_BB "\n" , newT->bbNum, |
2870 | topPredBlock->bbNum); |
2871 | |
2872 | newT->inheritWeight(topPredBlock); |
2873 | firstPred = false; |
2874 | } |
2875 | else |
2876 | { |
2877 | JITDUMP("in optCanonicalizeLoop: block " FMT_BB " will also contribute to the weight of " FMT_BB "\n" , |
2878 | newT->bbNum, topPredBlock->bbNum); |
2879 | |
2880 | BasicBlock::weight_t newWeight = newT->getBBWeight(this) + topPredBlock->getBBWeight(this); |
2881 | newT->setBBWeight(newWeight); |
2882 | } |
2883 | } |
2884 | } |
2885 | |
2886 | assert(newT->bbNext == f); |
2887 | if (f != t) |
2888 | { |
2889 | newT->bbJumpKind = BBJ_ALWAYS; |
2890 | newT->bbJumpDest = t; |
2891 | newT->bbTreeList = nullptr; |
2892 | fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr))); |
2893 | } |
2894 | |
2895 | // If it had been a do-while loop (top == entry), update entry, as well. |
2896 | BasicBlock* origE = optLoopTable[loopInd].lpEntry; |
2897 | if (optLoopTable[loopInd].lpTop == origE) |
2898 | { |
2899 | optLoopTable[loopInd].lpEntry = newT; |
2900 | } |
2901 | optLoopTable[loopInd].lpTop = newT; |
2902 | optLoopTable[loopInd].lpFirst = newT; |
2903 | |
2904 | newT->bbNatLoopNum = loopInd; |
2905 | |
2906 | JITDUMP("in optCanonicalizeLoop: made new block " FMT_BB " [%p] the new unique top of loop %d.\n" , newT->bbNum, |
2907 | dspPtr(newT), loopInd); |
2908 | |
2909 | // Make sure the head block still goes to the entry... |
2910 | if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry) |
2911 | { |
2912 | h->bbJumpKind = BBJ_ALWAYS; |
2913 | h->bbJumpDest = optLoopTable[loopInd].lpEntry; |
2914 | } |
2915 | else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry) |
2916 | { |
2917 | BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true); |
2918 | optLoopTable[loopInd].lpHead = h2; |
2919 | h2->bbJumpDest = optLoopTable[loopInd].lpEntry; |
2920 | h2->bbTreeList = nullptr; |
2921 | fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr))); |
2922 | } |
2923 | |
2924 | // If any loops nested in "loopInd" have the same head and entry as "loopInd", |
2925 | // it must be the case that they were do-while's (since "h" fell through to the entry). |
2926 | // The new node "newT" becomes the head of such loops. |
2927 | for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP; |
2928 | childLoop = optLoopTable[childLoop].lpSibling) |
2929 | { |
2930 | if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h && |
2931 | newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE) |
2932 | { |
2933 | optUpdateLoopHead(childLoop, h, newT); |
2934 | } |
2935 | } |
2936 | return true; |
2937 | } |
2938 | |
2939 | bool Compiler::optLoopContains(unsigned l1, unsigned l2) |
2940 | { |
2941 | assert(l1 != BasicBlock::NOT_IN_LOOP); |
2942 | if (l1 == l2) |
2943 | { |
2944 | return true; |
2945 | } |
2946 | else if (l2 == BasicBlock::NOT_IN_LOOP) |
2947 | { |
2948 | return false; |
2949 | } |
2950 | else |
2951 | { |
2952 | return optLoopContains(l1, optLoopTable[l2].lpParent); |
2953 | } |
2954 | } |
2955 | |
2956 | void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to) |
2957 | { |
2958 | assert(optLoopTable[loopInd].lpHead == from); |
2959 | optLoopTable[loopInd].lpHead = to; |
2960 | for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP; |
2961 | childLoop = optLoopTable[childLoop].lpSibling) |
2962 | { |
2963 | if (optLoopTable[childLoop].lpHead == from) |
2964 | { |
2965 | optUpdateLoopHead(childLoop, from, to); |
2966 | } |
2967 | } |
2968 | } |
2969 | |
2970 | /***************************************************************************** |
2971 | * If the : i += const" will cause an overflow exception for the small types. |
2972 | */ |
2973 | |
2974 | bool jitIterSmallOverflow(int iterAtExit, var_types incrType) |
2975 | { |
2976 | int type_MAX; |
2977 | |
2978 | switch (incrType) |
2979 | { |
2980 | case TYP_BYTE: |
2981 | type_MAX = SCHAR_MAX; |
2982 | break; |
2983 | case TYP_UBYTE: |
2984 | type_MAX = UCHAR_MAX; |
2985 | break; |
2986 | case TYP_SHORT: |
2987 | type_MAX = SHRT_MAX; |
2988 | break; |
2989 | case TYP_USHORT: |
2990 | type_MAX = USHRT_MAX; |
2991 | break; |
2992 | |
2993 | case TYP_UINT: // Detected by checking for 32bit .... |
2994 | case TYP_INT: |
2995 | return false; // ... overflow same as done for TYP_INT |
2996 | |
2997 | default: |
2998 | NO_WAY("Bad type" ); |
2999 | } |
3000 | |
3001 | if (iterAtExit > type_MAX) |
3002 | { |
3003 | return true; |
3004 | } |
3005 | else |
3006 | { |
3007 | return false; |
3008 | } |
3009 | } |
3010 | |
3011 | /***************************************************************************** |
3012 | * If the "i -= const" will cause an underflow exception for the small types |
3013 | */ |
3014 | |
3015 | bool jitIterSmallUnderflow(int iterAtExit, var_types decrType) |
3016 | { |
3017 | int type_MIN; |
3018 | |
3019 | switch (decrType) |
3020 | { |
3021 | case TYP_BYTE: |
3022 | type_MIN = SCHAR_MIN; |
3023 | break; |
3024 | case TYP_SHORT: |
3025 | type_MIN = SHRT_MIN; |
3026 | break; |
3027 | case TYP_UBYTE: |
3028 | type_MIN = 0; |
3029 | break; |
3030 | case TYP_USHORT: |
3031 | type_MIN = 0; |
3032 | break; |
3033 | |
3034 | case TYP_UINT: // Detected by checking for 32bit .... |
3035 | case TYP_INT: |
3036 | return false; // ... underflow same as done for TYP_INT |
3037 | |
3038 | default: |
3039 | NO_WAY("Bad type" ); |
3040 | } |
3041 | |
3042 | if (iterAtExit < type_MIN) |
3043 | { |
3044 | return true; |
3045 | } |
3046 | else |
3047 | { |
3048 | return false; |
3049 | } |
3050 | } |
3051 | |
3052 | /***************************************************************************** |
3053 | * |
3054 | * Helper for unroll loops - Computes the number of repetitions |
3055 | * in a constant loop. If it cannot prove the number is constant returns false |
3056 | */ |
3057 | |
3058 | bool Compiler::optComputeLoopRep(int constInit, |
3059 | int constLimit, |
3060 | int iterInc, |
3061 | genTreeOps iterOper, |
3062 | var_types iterOperType, |
3063 | genTreeOps testOper, |
3064 | bool unsTest, |
3065 | bool dupCond, |
3066 | unsigned* iterCount) |
3067 | { |
3068 | noway_assert(genActualType(iterOperType) == TYP_INT); |
3069 | |
3070 | __int64 constInitX; |
3071 | __int64 constLimitX; |
3072 | |
3073 | unsigned loopCount; |
3074 | int iterSign; |
3075 | |
3076 | // Using this, we can just do a signed comparison with other 32 bit values. |
3077 | if (unsTest) |
3078 | { |
3079 | constLimitX = (unsigned int)constLimit; |
3080 | } |
3081 | else |
3082 | { |
3083 | constLimitX = (signed int)constLimit; |
3084 | } |
3085 | |
3086 | switch (iterOperType) |
3087 | { |
3088 | // For small types, the iteration operator will narrow these values if big |
3089 | |
3090 | #define INIT_ITER_BY_TYPE(type) \ |
3091 | constInitX = (type)constInit; \ |
3092 | iterInc = (type)iterInc; |
3093 | |
3094 | case TYP_BYTE: |
3095 | INIT_ITER_BY_TYPE(signed char); |
3096 | break; |
3097 | case TYP_UBYTE: |
3098 | INIT_ITER_BY_TYPE(unsigned char); |
3099 | break; |
3100 | case TYP_SHORT: |
3101 | INIT_ITER_BY_TYPE(signed short); |
3102 | break; |
3103 | case TYP_USHORT: |
3104 | INIT_ITER_BY_TYPE(unsigned short); |
3105 | break; |
3106 | |
3107 | // For the big types, 32 bit arithmetic is performed |
3108 | |
3109 | case TYP_INT: |
3110 | case TYP_UINT: |
3111 | if (unsTest) |
3112 | { |
3113 | constInitX = (unsigned int)constInit; |
3114 | } |
3115 | else |
3116 | { |
3117 | constInitX = (signed int)constInit; |
3118 | } |
3119 | break; |
3120 | |
3121 | default: |
3122 | noway_assert(!"Bad type" ); |
3123 | NO_WAY("Bad type" ); |
3124 | } |
3125 | |
3126 | /* If iterInc is zero we have an infinite loop */ |
3127 | if (iterInc == 0) |
3128 | { |
3129 | return false; |
3130 | } |
3131 | |
3132 | /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */ |
3133 | iterSign = (iterInc > 0) ? +1 : -1; |
3134 | |
3135 | /* Initialize loopCount to zero */ |
3136 | loopCount = 0; |
3137 | |
3138 | // If dupCond is true then the loop head contains a test which skips |
3139 | // this loop, if the constInit does not pass the loop test |
3140 | // Such a loop can execute zero times. |
3141 | // If dupCond is false then we have a true do-while loop which we |
3142 | // always execute the loop once before performing the loop test |
3143 | if (!dupCond) |
3144 | { |
3145 | loopCount += 1; |
3146 | constInitX += iterInc; |
3147 | } |
3148 | |
3149 | // bail if count is based on wrap-around math |
3150 | if (iterInc > 0) |
3151 | { |
3152 | if (constLimitX < constInitX) |
3153 | { |
3154 | return false; |
3155 | } |
3156 | } |
3157 | else if (constLimitX > constInitX) |
3158 | { |
3159 | return false; |
3160 | } |
3161 | |
3162 | /* Compute the number of repetitions */ |
3163 | |
3164 | switch (testOper) |
3165 | { |
3166 | __int64 iterAtExitX; |
3167 | |
3168 | case GT_EQ: |
3169 | /* something like "for (i=init; i == lim; i++)" doesn't make any sense */ |
3170 | return false; |
3171 | |
3172 | case GT_NE: |
3173 | /* "for (i=init; i != lim; i+=const)" - this is tricky since it may |
3174 | * have a constant number of iterations or loop forever - |
3175 | * we have to compute (lim-init) mod iterInc to see if it is zero. |
3176 | * If mod iterInc is not zero then the limit test will miss an a wrap will occur |
3177 | * which is probably not what the end user wanted, but it is legal. |
3178 | */ |
3179 | |
3180 | if (iterInc > 0) |
3181 | { |
3182 | /* Stepping by one, i.e. Mod with 1 is always zero */ |
3183 | if (iterInc != 1) |
3184 | { |
3185 | if (((constLimitX - constInitX) % iterInc) != 0) |
3186 | { |
3187 | return false; |
3188 | } |
3189 | } |
3190 | } |
3191 | else |
3192 | { |
3193 | noway_assert(iterInc < 0); |
3194 | /* Stepping by -1, i.e. Mod with 1 is always zero */ |
3195 | if (iterInc != -1) |
3196 | { |
3197 | if (((constInitX - constLimitX) % (-iterInc)) != 0) |
3198 | { |
3199 | return false; |
3200 | } |
3201 | } |
3202 | } |
3203 | |
3204 | switch (iterOper) |
3205 | { |
3206 | case GT_SUB: |
3207 | iterInc = -iterInc; |
3208 | __fallthrough; |
3209 | |
3210 | case GT_ADD: |
3211 | if (constInitX != constLimitX) |
3212 | { |
3213 | loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; |
3214 | } |
3215 | |
3216 | iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); |
3217 | |
3218 | if (unsTest) |
3219 | { |
3220 | iterAtExitX = (unsigned)iterAtExitX; |
3221 | } |
3222 | |
3223 | // Check if iteration incr will cause overflow for small types |
3224 | if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) |
3225 | { |
3226 | return false; |
3227 | } |
3228 | |
3229 | // iterator with 32bit overflow. Bad for TYP_(U)INT |
3230 | if (iterAtExitX < constLimitX) |
3231 | { |
3232 | return false; |
3233 | } |
3234 | |
3235 | *iterCount = loopCount; |
3236 | return true; |
3237 | |
3238 | case GT_MUL: |
3239 | case GT_DIV: |
3240 | case GT_RSH: |
3241 | case GT_LSH: |
3242 | case GT_UDIV: |
3243 | return false; |
3244 | |
3245 | default: |
3246 | noway_assert(!"Unknown operator for loop iterator" ); |
3247 | return false; |
3248 | } |
3249 | |
3250 | case GT_LT: |
3251 | switch (iterOper) |
3252 | { |
3253 | case GT_SUB: |
3254 | iterInc = -iterInc; |
3255 | __fallthrough; |
3256 | |
3257 | case GT_ADD: |
3258 | if (constInitX < constLimitX) |
3259 | { |
3260 | loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; |
3261 | } |
3262 | |
3263 | iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); |
3264 | |
3265 | if (unsTest) |
3266 | { |
3267 | iterAtExitX = (unsigned)iterAtExitX; |
3268 | } |
3269 | |
3270 | // Check if iteration incr will cause overflow for small types |
3271 | if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) |
3272 | { |
3273 | return false; |
3274 | } |
3275 | |
3276 | // iterator with 32bit overflow. Bad for TYP_(U)INT |
3277 | if (iterAtExitX < constLimitX) |
3278 | { |
3279 | return false; |
3280 | } |
3281 | |
3282 | *iterCount = loopCount; |
3283 | return true; |
3284 | |
3285 | case GT_MUL: |
3286 | case GT_DIV: |
3287 | case GT_RSH: |
3288 | case GT_LSH: |
3289 | case GT_UDIV: |
3290 | return false; |
3291 | |
3292 | default: |
3293 | noway_assert(!"Unknown operator for loop iterator" ); |
3294 | return false; |
3295 | } |
3296 | |
3297 | case GT_LE: |
3298 | switch (iterOper) |
3299 | { |
3300 | case GT_SUB: |
3301 | iterInc = -iterInc; |
3302 | __fallthrough; |
3303 | |
3304 | case GT_ADD: |
3305 | if (constInitX <= constLimitX) |
3306 | { |
3307 | loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1; |
3308 | } |
3309 | |
3310 | iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); |
3311 | |
3312 | if (unsTest) |
3313 | { |
3314 | iterAtExitX = (unsigned)iterAtExitX; |
3315 | } |
3316 | |
3317 | // Check if iteration incr will cause overflow for small types |
3318 | if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) |
3319 | { |
3320 | return false; |
3321 | } |
3322 | |
3323 | // iterator with 32bit overflow. Bad for TYP_(U)INT |
3324 | if (iterAtExitX <= constLimitX) |
3325 | { |
3326 | return false; |
3327 | } |
3328 | |
3329 | *iterCount = loopCount; |
3330 | return true; |
3331 | |
3332 | case GT_MUL: |
3333 | case GT_DIV: |
3334 | case GT_RSH: |
3335 | case GT_LSH: |
3336 | case GT_UDIV: |
3337 | return false; |
3338 | |
3339 | default: |
3340 | noway_assert(!"Unknown operator for loop iterator" ); |
3341 | return false; |
3342 | } |
3343 | |
3344 | case GT_GT: |
3345 | switch (iterOper) |
3346 | { |
3347 | case GT_SUB: |
3348 | iterInc = -iterInc; |
3349 | __fallthrough; |
3350 | |
3351 | case GT_ADD: |
3352 | if (constInitX > constLimitX) |
3353 | { |
3354 | loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; |
3355 | } |
3356 | |
3357 | iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); |
3358 | |
3359 | if (unsTest) |
3360 | { |
3361 | iterAtExitX = (unsigned)iterAtExitX; |
3362 | } |
3363 | |
3364 | // Check if small types will underflow |
3365 | if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType)) |
3366 | { |
3367 | return false; |
3368 | } |
3369 | |
3370 | // iterator with 32bit underflow. Bad for TYP_INT and unsigneds |
3371 | if (iterAtExitX > constLimitX) |
3372 | { |
3373 | return false; |
3374 | } |
3375 | |
3376 | *iterCount = loopCount; |
3377 | return true; |
3378 | |
3379 | case GT_MUL: |
3380 | case GT_DIV: |
3381 | case GT_RSH: |
3382 | case GT_LSH: |
3383 | case GT_UDIV: |
3384 | return false; |
3385 | |
3386 | default: |
3387 | noway_assert(!"Unknown operator for loop iterator" ); |
3388 | return false; |
3389 | } |
3390 | |
3391 | case GT_GE: |
3392 | switch (iterOper) |
3393 | { |
3394 | case GT_SUB: |
3395 | iterInc = -iterInc; |
3396 | __fallthrough; |
3397 | |
3398 | case GT_ADD: |
3399 | if (constInitX >= constLimitX) |
3400 | { |
3401 | loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1; |
3402 | } |
3403 | |
3404 | iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); |
3405 | |
3406 | if (unsTest) |
3407 | { |
3408 | iterAtExitX = (unsigned)iterAtExitX; |
3409 | } |
3410 | |
3411 | // Check if small types will underflow |
3412 | if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType)) |
3413 | { |
3414 | return false; |
3415 | } |
3416 | |
3417 | // iterator with 32bit underflow. Bad for TYP_INT and unsigneds |
3418 | if (iterAtExitX >= constLimitX) |
3419 | { |
3420 | return false; |
3421 | } |
3422 | |
3423 | *iterCount = loopCount; |
3424 | return true; |
3425 | |
3426 | case GT_MUL: |
3427 | case GT_DIV: |
3428 | case GT_RSH: |
3429 | case GT_LSH: |
3430 | case GT_UDIV: |
3431 | return false; |
3432 | |
3433 | default: |
3434 | noway_assert(!"Unknown operator for loop iterator" ); |
3435 | return false; |
3436 | } |
3437 | |
3438 | default: |
3439 | noway_assert(!"Unknown operator for loop condition" ); |
3440 | } |
3441 | |
3442 | return false; |
3443 | } |
3444 | |
3445 | /***************************************************************************** |
3446 | * |
3447 | * Look for loop unrolling candidates and unroll them |
3448 | */ |
3449 | |
3450 | #ifdef _PREFAST_ |
3451 | #pragma warning(push) |
3452 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
3453 | #endif |
3454 | void Compiler::optUnrollLoops() |
3455 | { |
3456 | if (compCodeOpt() == SMALL_CODE) |
3457 | { |
3458 | return; |
3459 | } |
3460 | |
3461 | if (optLoopCount == 0) |
3462 | { |
3463 | return; |
3464 | } |
3465 | |
3466 | #ifdef DEBUG |
3467 | if (JitConfig.JitNoUnroll()) |
3468 | { |
3469 | return; |
3470 | } |
3471 | #endif |
3472 | |
3473 | #ifdef DEBUG |
3474 | if (verbose) |
3475 | { |
3476 | printf("*************** In optUnrollLoops()\n" ); |
3477 | } |
3478 | #endif |
3479 | /* Look for loop unrolling candidates */ |
3480 | |
3481 | bool change = false; |
3482 | |
3483 | // Visit loops from highest to lowest number to vist them in innermost |
3484 | // to outermost order |
3485 | for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum) |
3486 | { |
3487 | // This is necessary due to an apparent analysis limitation since |
3488 | // optLoopCount must be strictly greater than 0 upon entry and lnum |
3489 | // cannot wrap due to the loop termination condition. |
3490 | PREFAST_ASSUME(lnum != 0U - 1); |
3491 | |
3492 | BasicBlock* block; |
3493 | BasicBlock* head; |
3494 | BasicBlock* bottom; |
3495 | |
3496 | GenTree* loop; |
3497 | GenTree* test; |
3498 | GenTree* incr; |
3499 | GenTree* phdr; |
3500 | GenTree* init; |
3501 | |
3502 | bool dupCond; |
3503 | int lval; |
3504 | int lbeg; // initial value for iterator |
3505 | int llim; // limit value for iterator |
3506 | unsigned lvar; // iterator lclVar # |
3507 | int iterInc; // value to increment the iterator |
3508 | genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.) |
3509 | var_types iterOperType; // type result of the oper (for overflow instrs) |
3510 | genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.) |
3511 | bool unsTest; // Is the comparison u/int |
3512 | |
3513 | unsigned loopRetCount; // number of BBJ_RETURN blocks in loop |
3514 | unsigned totalIter; // total number of iterations in the constant loop |
3515 | unsigned loopFlags; // actual lpFlags |
3516 | unsigned requiredFlags; // required lpFlags |
3517 | |
3518 | static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = { |
3519 | 10, // BLENDED_CODE |
3520 | 0, // SMALL_CODE |
3521 | 20, // FAST_CODE |
3522 | 0 // COUNT_OPT_CODE |
3523 | }; |
3524 | |
3525 | noway_assert(ITER_LIMIT[SMALL_CODE] == 0); |
3526 | noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0); |
3527 | |
3528 | unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()]; |
3529 | |
3530 | #ifdef DEBUG |
3531 | if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) |
3532 | { |
3533 | iterLimit *= 10; |
3534 | } |
3535 | #endif |
3536 | |
3537 | static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = { |
3538 | 300, // BLENDED_CODE |
3539 | 0, // SMALL_CODE |
3540 | 600, // FAST_CODE |
3541 | 0 // COUNT_OPT_CODE |
3542 | }; |
3543 | |
3544 | noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); |
3545 | noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); |
3546 | |
3547 | int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()]; |
3548 | |
3549 | loopFlags = optLoopTable[lnum].lpFlags; |
3550 | // Check for required flags: |
3551 | // LPFLG_DO_WHILE - required because this transform only handles loops of this form |
3552 | // LPFLG_CONST - required because this transform only handles full unrolls |
3553 | // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons |
3554 | requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT; |
3555 | |
3556 | #ifdef DEBUG |
3557 | if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) |
3558 | { |
3559 | // In stress mode, quadruple the size limit, and drop |
3560 | // the restriction that loop limit must be Vector<T>.Count. |
3561 | |
3562 | unrollLimitSz *= 4; |
3563 | requiredFlags &= ~LPFLG_SIMD_LIMIT; |
3564 | } |
3565 | #endif |
3566 | |
3567 | /* Ignore the loop if we don't have a do-while |
3568 | that has a constant number of iterations */ |
3569 | |
3570 | if ((loopFlags & requiredFlags) != requiredFlags) |
3571 | { |
3572 | continue; |
3573 | } |
3574 | |
3575 | /* ignore if removed or marked as not unrollable */ |
3576 | |
3577 | if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED)) |
3578 | { |
3579 | continue; |
3580 | } |
3581 | |
3582 | head = optLoopTable[lnum].lpHead; |
3583 | noway_assert(head); |
3584 | bottom = optLoopTable[lnum].lpBottom; |
3585 | noway_assert(bottom); |
3586 | |
3587 | /* Get the loop data: |
3588 | - initial constant |
3589 | - limit constant |
3590 | - iterator |
3591 | - iterator increment |
3592 | - increment operation type (i.e. ADD, SUB, etc...) |
3593 | - loop test type (i.e. GT_GE, GT_LT, etc...) |
3594 | */ |
3595 | |
3596 | lbeg = optLoopTable[lnum].lpConstInit; |
3597 | llim = optLoopTable[lnum].lpConstLimit(); |
3598 | testOper = optLoopTable[lnum].lpTestOper(); |
3599 | |
3600 | lvar = optLoopTable[lnum].lpIterVar(); |
3601 | iterInc = optLoopTable[lnum].lpIterConst(); |
3602 | iterOper = optLoopTable[lnum].lpIterOper(); |
3603 | |
3604 | iterOperType = optLoopTable[lnum].lpIterOperType(); |
3605 | unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0; |
3606 | |
3607 | if (lvaTable[lvar].lvAddrExposed) |
3608 | { // If the loop iteration variable is address-exposed then bail |
3609 | continue; |
3610 | } |
3611 | if (lvaTable[lvar].lvIsStructField) |
3612 | { // If the loop iteration variable is a promoted field from a struct then |
3613 | // bail |
3614 | continue; |
3615 | } |
3616 | |
3617 | /* Locate the pre-header and initialization and increment/test statements */ |
3618 | |
3619 | phdr = head->bbTreeList; |
3620 | noway_assert(phdr); |
3621 | loop = bottom->bbTreeList; |
3622 | noway_assert(loop); |
3623 | |
3624 | init = head->lastStmt(); |
3625 | noway_assert(init && (init->gtNext == nullptr)); |
3626 | test = bottom->lastStmt(); |
3627 | noway_assert(test && (test->gtNext == nullptr)); |
3628 | incr = test->gtPrev; |
3629 | noway_assert(incr); |
3630 | |
3631 | if (init->gtFlags & GTF_STMT_CMPADD) |
3632 | { |
3633 | /* Must be a duplicated loop condition */ |
3634 | noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); |
3635 | |
3636 | dupCond = true; |
3637 | init = init->gtPrev; |
3638 | noway_assert(init); |
3639 | } |
3640 | else |
3641 | { |
3642 | dupCond = false; |
3643 | } |
3644 | |
3645 | /* Find the number of iterations - the function returns false if not a constant number */ |
3646 | |
3647 | if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter)) |
3648 | { |
3649 | continue; |
3650 | } |
3651 | |
3652 | /* Forget it if there are too many repetitions or not a constant loop */ |
3653 | |
3654 | if (totalIter > iterLimit) |
3655 | { |
3656 | continue; |
3657 | } |
3658 | |
3659 | noway_assert(init->gtOper == GT_STMT); |
3660 | init = init->gtStmt.gtStmtExpr; |
3661 | noway_assert(test->gtOper == GT_STMT); |
3662 | test = test->gtStmt.gtStmtExpr; |
3663 | noway_assert(incr->gtOper == GT_STMT); |
3664 | incr = incr->gtStmt.gtStmtExpr; |
3665 | |
3666 | // Don't unroll loops we don't understand. |
3667 | if (incr->gtOper != GT_ASG) |
3668 | { |
3669 | continue; |
3670 | } |
3671 | incr = incr->gtOp.gtOp2; |
3672 | |
3673 | /* Make sure everything looks ok */ |
3674 | if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) || |
3675 | (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) || |
3676 | (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) || |
3677 | |
3678 | !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || |
3679 | (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || |
3680 | (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) || |
3681 | |
3682 | (test->gtOper != GT_JTRUE)) |
3683 | { |
3684 | noway_assert(!"Bad precondition in Compiler::optUnrollLoops()" ); |
3685 | continue; |
3686 | } |
3687 | |
3688 | /* heuristic - Estimated cost in code size of the unrolled loop */ |
3689 | |
3690 | { |
3691 | ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration |
3692 | |
3693 | block = head->bbNext; |
3694 | auto tryIndex = block->bbTryIndex; |
3695 | |
3696 | loopRetCount = 0; |
3697 | for (;; block = block->bbNext) |
3698 | { |
3699 | if (block->bbTryIndex != tryIndex) |
3700 | { |
3701 | // Unrolling would require cloning EH regions |
3702 | goto DONE_LOOP; |
3703 | } |
3704 | |
3705 | if (block->bbJumpKind == BBJ_RETURN) |
3706 | { |
3707 | ++loopRetCount; |
3708 | } |
3709 | |
3710 | /* Visit all the statements in the block */ |
3711 | |
3712 | for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) |
3713 | { |
3714 | /* Calculate gtCostSz */ |
3715 | gtSetStmtInfo(stmt); |
3716 | |
3717 | /* Update loopCostSz */ |
3718 | loopCostSz += stmt->gtCostSz; |
3719 | } |
3720 | |
3721 | if (block == bottom) |
3722 | { |
3723 | break; |
3724 | } |
3725 | } |
3726 | |
3727 | #ifdef JIT32_GCENCODER |
3728 | if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX) |
3729 | { |
3730 | // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs. |
3731 | goto DONE_LOOP; |
3732 | } |
3733 | #endif // !JIT32_GCENCODER |
3734 | |
3735 | /* Compute the estimated increase in code size for the unrolled loop */ |
3736 | |
3737 | ClrSafeInt<unsigned> fixedLoopCostSz(8); |
3738 | |
3739 | ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) - |
3740 | ClrSafeInt<int>(loopCostSz + fixedLoopCostSz); |
3741 | |
3742 | /* Don't unroll if too much code duplication would result. */ |
3743 | |
3744 | if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz)) |
3745 | { |
3746 | goto DONE_LOOP; |
3747 | } |
3748 | |
3749 | /* Looks like a good idea to unroll this loop, let's do it! */ |
3750 | CLANG_FORMAT_COMMENT_ANCHOR; |
3751 | |
3752 | #ifdef DEBUG |
3753 | if (verbose) |
3754 | { |
3755 | printf("\nUnrolling loop " FMT_BB, head->bbNext->bbNum); |
3756 | if (head->bbNext->bbNum != bottom->bbNum) |
3757 | { |
3758 | printf(".." FMT_BB, bottom->bbNum); |
3759 | } |
3760 | printf(" over V%02u from %u to %u" , lvar, lbeg, llim); |
3761 | printf(" unrollCostSz = %d\n" , unrollCostSz); |
3762 | printf("\n" ); |
3763 | } |
3764 | #endif |
3765 | } |
3766 | |
3767 | /* Create the unrolled loop statement list */ |
3768 | { |
3769 | BlockToBlockMap blockMap(getAllocator()); |
3770 | BasicBlock* insertAfter = bottom; |
3771 | |
3772 | for (lval = lbeg; totalIter; totalIter--) |
3773 | { |
3774 | for (block = head->bbNext;; block = block->bbNext) |
3775 | { |
3776 | BasicBlock* newBlock = insertAfter = |
3777 | fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true); |
3778 | blockMap.Set(block, newBlock); |
3779 | |
3780 | if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval)) |
3781 | { |
3782 | // cloneExpr doesn't handle everything |
3783 | BasicBlock* oldBottomNext = insertAfter->bbNext; |
3784 | bottom->bbNext = oldBottomNext; |
3785 | oldBottomNext->bbPrev = bottom; |
3786 | optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; |
3787 | goto DONE_LOOP; |
3788 | } |
3789 | // Block weight should no longer have the loop multiplier |
3790 | newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT); |
3791 | // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them. |
3792 | assert(newBlock->bbJumpDest == nullptr); |
3793 | |
3794 | if (block == bottom) |
3795 | { |
3796 | // Remove the test; we're doing a full unroll. |
3797 | |
3798 | GenTreeStmt* testCopyStmt = newBlock->lastStmt(); |
3799 | GenTree* testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr; |
3800 | assert(testCopyExpr->gtOper == GT_JTRUE); |
3801 | GenTree* sideEffList = nullptr; |
3802 | gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF); |
3803 | if (sideEffList == nullptr) |
3804 | { |
3805 | fgRemoveStmt(newBlock, testCopyStmt); |
3806 | } |
3807 | else |
3808 | { |
3809 | testCopyStmt->gtStmt.gtStmtExpr = sideEffList; |
3810 | } |
3811 | newBlock->bbJumpKind = BBJ_NONE; |
3812 | |
3813 | // Exit this loop; we've walked all the blocks. |
3814 | break; |
3815 | } |
3816 | } |
3817 | |
3818 | // Now redirect any branches within the newly-cloned iteration |
3819 | for (block = head->bbNext; block != bottom; block = block->bbNext) |
3820 | { |
3821 | BasicBlock* newBlock = blockMap[block]; |
3822 | optCopyBlkDest(block, newBlock); |
3823 | optRedirectBlock(newBlock, &blockMap); |
3824 | } |
3825 | |
3826 | /* update the new value for the unrolled iterator */ |
3827 | |
3828 | switch (iterOper) |
3829 | { |
3830 | case GT_ADD: |
3831 | lval += iterInc; |
3832 | break; |
3833 | |
3834 | case GT_SUB: |
3835 | lval -= iterInc; |
3836 | break; |
3837 | |
3838 | case GT_RSH: |
3839 | case GT_LSH: |
3840 | noway_assert(!"Unrolling not implemented for this loop iterator" ); |
3841 | goto DONE_LOOP; |
3842 | |
3843 | default: |
3844 | noway_assert(!"Unknown operator for constant loop iterator" ); |
3845 | goto DONE_LOOP; |
3846 | } |
3847 | } |
3848 | |
3849 | // Gut the old loop body |
3850 | for (block = head->bbNext;; block = block->bbNext) |
3851 | { |
3852 | block->bbTreeList = nullptr; |
3853 | block->bbJumpKind = BBJ_NONE; |
3854 | block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD); |
3855 | if (block->bbJumpDest != nullptr) |
3856 | { |
3857 | block->bbJumpDest = nullptr; |
3858 | } |
3859 | |
3860 | if (block == bottom) |
3861 | { |
3862 | break; |
3863 | } |
3864 | } |
3865 | |
3866 | /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */ |
3867 | |
3868 | if (head->bbJumpKind == BBJ_COND) |
3869 | { |
3870 | phdr = head->bbTreeList; |
3871 | noway_assert(phdr); |
3872 | test = phdr->gtPrev; |
3873 | |
3874 | noway_assert(test && (test->gtNext == nullptr)); |
3875 | noway_assert(test->gtOper == GT_STMT); |
3876 | noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); |
3877 | |
3878 | init = test->gtPrev; |
3879 | noway_assert(init && (init->gtNext == test)); |
3880 | noway_assert(init->gtOper == GT_STMT); |
3881 | |
3882 | init->gtNext = nullptr; |
3883 | phdr->gtPrev = init; |
3884 | head->bbJumpKind = BBJ_NONE; |
3885 | head->bbFlags &= ~BBF_NEEDS_GCPOLL; |
3886 | } |
3887 | else |
3888 | { |
3889 | /* the loop must execute */ |
3890 | noway_assert(head->bbJumpKind == BBJ_NONE); |
3891 | } |
3892 | |
3893 | #ifdef DEBUG |
3894 | if (verbose) |
3895 | { |
3896 | printf("Whole unrolled loop:\n" ); |
3897 | |
3898 | gtDispTree(init); |
3899 | printf("\n" ); |
3900 | fgDumpTrees(head->bbNext, insertAfter); |
3901 | } |
3902 | #endif |
3903 | |
3904 | /* Remember that something has changed */ |
3905 | |
3906 | change = true; |
3907 | |
3908 | /* Make sure to update loop table */ |
3909 | |
3910 | /* Use the LPFLG_REMOVED flag and update the bbLoopMask accordingly |
3911 | * (also make head and bottom NULL - to hit an assert or GPF) */ |
3912 | |
3913 | optLoopTable[lnum].lpFlags |= LPFLG_REMOVED; |
3914 | optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr; |
3915 | |
3916 | // Note if we created new BBJ_RETURNs |
3917 | fgReturnCount += loopRetCount * (totalIter - 1); |
3918 | } |
3919 | |
3920 | DONE_LOOP:; |
3921 | } |
3922 | |
3923 | if (change) |
3924 | { |
3925 | fgUpdateChangedFlowGraph(); |
3926 | } |
3927 | |
3928 | #ifdef DEBUG |
3929 | fgDebugCheckBBlist(true); |
3930 | #endif |
3931 | } |
3932 | #ifdef _PREFAST_ |
3933 | #pragma warning(pop) |
3934 | #endif |
3935 | |
3936 | /***************************************************************************** |
3937 | * |
3938 | * Return false if there is a code path from 'topBB' to 'botBB' that might |
3939 | * not execute a method call. |
3940 | */ |
3941 | |
3942 | bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB) |
3943 | { |
3944 | // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls, |
3945 | // as some helper calls are neither interruptible nor hijackable. |
3946 | // When we can determine this, then we can set BBF_GC_SAFE_POINT for |
3947 | // those helpers too. |
3948 | |
3949 | noway_assert(topBB->bbNum <= botBB->bbNum); |
3950 | |
3951 | // We can always check topBB and botBB for any gc safe points and early out |
3952 | |
3953 | if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT) |
3954 | { |
3955 | return false; |
3956 | } |
3957 | |
3958 | // Otherwise we will need to rely upon the dominator sets |
3959 | |
3960 | if (!fgDomsComputed) |
3961 | { |
3962 | // return a conservative answer of true when we don't have the dominator sets |
3963 | return true; |
3964 | } |
3965 | |
3966 | BasicBlock* curBB = topBB; |
3967 | for (;;) |
3968 | { |
3969 | noway_assert(curBB); |
3970 | |
3971 | // If we added a loop pre-header block then we will |
3972 | // have a bbNum greater than fgLastBB, and we won't have |
3973 | // any dominator information about this block, so skip it. |
3974 | // |
3975 | if (curBB->bbNum <= fgLastBB->bbNum) |
3976 | { |
3977 | noway_assert(curBB->bbNum <= botBB->bbNum); |
3978 | |
3979 | // Does this block contain a gc safe point? |
3980 | |
3981 | if (curBB->bbFlags & BBF_GC_SAFE_POINT) |
3982 | { |
3983 | // Will this block always execute on the way to botBB ? |
3984 | // |
3985 | // Since we are checking every block in [topBB .. botBB] and we are using |
3986 | // a lexical definition of a loop. |
3987 | // (all that we know is that is that botBB is a back-edge to topBB) |
3988 | // Thus while walking blocks in this range we may encounter some blocks |
3989 | // that are not really part of the loop, and so we need to perform |
3990 | // some additional checks: |
3991 | // |
3992 | // We will check that the current 'curBB' is reachable from 'topBB' |
3993 | // and that it dominates the block containing the back-edge 'botBB' |
3994 | // When both of these are true then we know that the gcsafe point in 'curBB' |
3995 | // will be encountered in the loop and we can return false |
3996 | // |
3997 | if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB)) |
3998 | { |
3999 | return false; |
4000 | } |
4001 | } |
4002 | else |
4003 | { |
4004 | // If we've reached the destination block, then we're done |
4005 | |
4006 | if (curBB == botBB) |
4007 | { |
4008 | break; |
4009 | } |
4010 | } |
4011 | } |
4012 | |
4013 | curBB = curBB->bbNext; |
4014 | } |
4015 | |
4016 | // If we didn't find any blocks that contained a gc safe point and |
4017 | // also met the fgDominate and fgReachable criteria then we must return true |
4018 | // |
4019 | return true; |
4020 | } |
4021 | |
4022 | /***************************************************************************** |
4023 | * |
4024 | * Find the loop termination test at the bottom of the loop |
4025 | */ |
4026 | |
4027 | static GenTree* optFindLoopTermTest(BasicBlock* bottom) |
4028 | { |
4029 | GenTree* testt = bottom->bbTreeList; |
4030 | |
4031 | assert(testt && testt->gtOper == GT_STMT); |
4032 | |
4033 | GenTree* result = testt->gtPrev; |
4034 | |
4035 | #ifdef DEBUG |
4036 | while (testt->gtNext) |
4037 | { |
4038 | testt = testt->gtNext; |
4039 | } |
4040 | |
4041 | assert(testt == result); |
4042 | #endif |
4043 | |
4044 | return result; |
4045 | } |
4046 | |
4047 | /***************************************************************************** |
4048 | * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }" |
4049 | */ |
4050 | |
4051 | void Compiler::fgOptWhileLoop(BasicBlock* block) |
4052 | { |
4053 | noway_assert(opts.OptimizationEnabled()); |
4054 | noway_assert(compCodeOpt() != SMALL_CODE); |
4055 | |
4056 | /* |
4057 | Optimize while loops into do { } while loop |
4058 | Our loop hoisting logic requires do { } while loops. |
4059 | Specifically, we're looking for the following case: |
4060 | |
4061 | ... |
4062 | jmp test |
4063 | loop: |
4064 | ... |
4065 | ... |
4066 | test: |
4067 | cond |
4068 | jtrue loop |
4069 | |
4070 | If we find this, and the condition is simple enough, we change |
4071 | the loop to the following: |
4072 | |
4073 | ... |
4074 | cond |
4075 | jfalse done |
4076 | // else fall-through |
4077 | loop: |
4078 | ... |
4079 | ... |
4080 | test: |
4081 | cond |
4082 | jtrue loop |
4083 | done: |
4084 | |
4085 | */ |
4086 | |
4087 | /* Does the BB end with an unconditional jump? */ |
4088 | |
4089 | if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)) |
4090 | { // It can't be one of the ones we use for our exception magic |
4091 | return; |
4092 | } |
4093 | |
4094 | // It has to be a forward jump |
4095 | // TODO-CQ: Check if we can also optimize the backwards jump as well. |
4096 | // |
4097 | if (fgIsForwardBranch(block) == false) |
4098 | { |
4099 | return; |
4100 | } |
4101 | |
4102 | // Get hold of the jump target |
4103 | BasicBlock* bTest = block->bbJumpDest; |
4104 | |
4105 | // Does the block consist of 'jtrue(cond) block' ? |
4106 | if (bTest->bbJumpKind != BBJ_COND) |
4107 | { |
4108 | return; |
4109 | } |
4110 | |
4111 | // bTest must be a backwards jump to block->bbNext |
4112 | if (bTest->bbJumpDest != block->bbNext) |
4113 | { |
4114 | return; |
4115 | } |
4116 | |
4117 | // Since test is a BBJ_COND it will have a bbNext |
4118 | noway_assert(bTest->bbNext); |
4119 | |
4120 | // 'block' must be in the same try region as the condition, since we're going to insert |
4121 | // a duplicated condition in 'block', and the condition might include exception throwing code. |
4122 | if (!BasicBlock::sameTryRegion(block, bTest)) |
4123 | { |
4124 | return; |
4125 | } |
4126 | |
4127 | // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the |
4128 | // same try region (or no try region) to avoid generating illegal flow. |
4129 | BasicBlock* bTestNext = bTest->bbNext; |
4130 | if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext)) |
4131 | { |
4132 | return; |
4133 | } |
4134 | |
4135 | GenTree* condStmt = optFindLoopTermTest(bTest); |
4136 | |
4137 | // bTest must only contain only a jtrue with no other stmts, we will only clone |
4138 | // the conditional, so any other statements will not get cloned |
4139 | // TODO-CQ: consider cloning the whole bTest block as inserting it after block. |
4140 | // |
4141 | if (bTest->bbTreeList != condStmt) |
4142 | { |
4143 | return; |
4144 | } |
4145 | |
4146 | /* Get to the condition node from the statement tree */ |
4147 | |
4148 | noway_assert(condStmt->gtOper == GT_STMT); |
4149 | |
4150 | GenTree* condTree = condStmt->gtStmt.gtStmtExpr; |
4151 | noway_assert(condTree->gtOper == GT_JTRUE); |
4152 | |
4153 | condTree = condTree->gtOp.gtOp1; |
4154 | |
4155 | // The condTree has to be a RelOp comparison |
4156 | // TODO-CQ: Check if we can also optimize the backwards jump as well. |
4157 | // |
4158 | if (condTree->OperIsCompare() == false) |
4159 | { |
4160 | return; |
4161 | } |
4162 | |
4163 | /* We call gtPrepareCost to measure the cost of duplicating this tree */ |
4164 | |
4165 | gtPrepareCost(condTree); |
4166 | unsigned estDupCostSz = condTree->gtCostSz; |
4167 | |
4168 | double loopIterations = (double)BB_LOOP_WEIGHT; |
4169 | |
4170 | bool allProfileWeightsAreValid = false; |
4171 | BasicBlock::weight_t weightBlock = block->bbWeight; |
4172 | BasicBlock::weight_t weightTest = bTest->bbWeight; |
4173 | BasicBlock::weight_t weightNext = block->bbNext->bbWeight; |
4174 | |
4175 | // If we have profile data then we calculate the number of time |
4176 | // the loop will iterate into loopIterations |
4177 | if (fgIsUsingProfileWeights()) |
4178 | { |
4179 | // Only rely upon the profile weight when all three of these blocks |
4180 | // have good profile weights |
4181 | if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight()) |
4182 | { |
4183 | allProfileWeightsAreValid = true; |
4184 | |
4185 | // If this while loop never iterates then don't bother transforming |
4186 | if (weightNext == 0) |
4187 | { |
4188 | return; |
4189 | } |
4190 | |
4191 | // with (weighNext > 0) we should also have (weightTest >= weightBlock) |
4192 | // if the profile weights are all valid. |
4193 | // |
4194 | // weightNext is the number of time this loop iterates |
4195 | // weightBlock is the number of times that we enter the while loop |
4196 | // loopIterations is the average number of times that this loop iterates |
4197 | // |
4198 | if (weightTest >= weightBlock) |
4199 | { |
4200 | loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight; |
4201 | } |
4202 | } |
4203 | } |
4204 | |
4205 | unsigned maxDupCostSz = 32; |
4206 | |
4207 | // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not |
4208 | // set loop weights yet |
4209 | if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30)) |
4210 | { |
4211 | maxDupCostSz *= 4; |
4212 | } |
4213 | |
4214 | // If this loop iterates a lot then raise the maxDupCost |
4215 | if (loopIterations >= 12.0) |
4216 | { |
4217 | maxDupCostSz *= 2; |
4218 | } |
4219 | if (loopIterations >= 96.0) |
4220 | { |
4221 | maxDupCostSz *= 2; |
4222 | } |
4223 | |
4224 | // If the loop condition has a shared static helper, we really want this loop converted |
4225 | // as not converting the loop will disable loop hoisting, meaning the shared helper will |
4226 | // be executed on every loop iteration. |
4227 | int countOfHelpers = 0; |
4228 | fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers); |
4229 | |
4230 | if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE) |
4231 | { |
4232 | maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5)); |
4233 | } |
4234 | |
4235 | // If the compare has too high cost then we don't want to dup |
4236 | |
4237 | bool costIsTooHigh = (estDupCostSz > maxDupCostSz); |
4238 | |
4239 | #ifdef DEBUG |
4240 | if (verbose) |
4241 | { |
4242 | printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," |
4243 | "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n" , |
4244 | condTree->gtTreeID, costIsTooHigh ? "not done" : "performed" , estDupCostSz, |
4245 | costIsTooHigh ? "greater" : "less or equal" , maxDupCostSz, loopIterations, countOfHelpers, |
4246 | allProfileWeightsAreValid ? "true" : "false" ); |
4247 | } |
4248 | #endif |
4249 | |
4250 | if (costIsTooHigh) |
4251 | { |
4252 | return; |
4253 | } |
4254 | |
4255 | /* Looks good - duplicate the condition test */ |
4256 | |
4257 | condTree->gtFlags |= GTF_RELOP_ZTT; |
4258 | |
4259 | condTree = gtCloneExpr(condTree); |
4260 | gtReverseCond(condTree); |
4261 | |
4262 | // Make sure clone expr copied the flag |
4263 | assert(condTree->gtFlags & GTF_RELOP_ZTT); |
4264 | |
4265 | condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree); |
4266 | |
4267 | /* Create a statement entry out of the condition and |
4268 | append the condition test at the end of 'block' */ |
4269 | |
4270 | GenTree* copyOfCondStmt = fgInsertStmtAtEnd(block, condTree); |
4271 | |
4272 | copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD; |
4273 | |
4274 | if (opts.compDbgInfo) |
4275 | { |
4276 | copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx; |
4277 | } |
4278 | |
4279 | // Flag the block that received the copy as potentially having an array/vtable |
4280 | // reference if the block copied from did; this is a conservative guess. |
4281 | if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN)) |
4282 | { |
4283 | block->bbFlags |= copyFlags; |
4284 | } |
4285 | |
4286 | // If we have profile data for all blocks and we know that we are cloning the |
4287 | // bTest block into block and thus changing the control flow from block so |
4288 | // that it no longer goes directly to bTest anymore, we have to adjust the |
4289 | // weight of bTest by subtracting out the weight of block. |
4290 | // |
4291 | if (allProfileWeightsAreValid) |
4292 | { |
4293 | // |
4294 | // Some additional sanity checks before adjusting the weight of bTest |
4295 | // |
4296 | if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT)) |
4297 | { |
4298 | // Get the two edge that flow out of bTest |
4299 | flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest); |
4300 | flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest); |
4301 | |
4302 | // Calculate the new weight for block bTest |
4303 | |
4304 | BasicBlock::weight_t newWeightTest = |
4305 | (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT; |
4306 | bTest->bbWeight = newWeightTest; |
4307 | |
4308 | if (newWeightTest == BB_ZERO_WEIGHT) |
4309 | { |
4310 | bTest->bbFlags |= BBF_RUN_RARELY; |
4311 | // All out edge weights are set to zero |
4312 | edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT; |
4313 | edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT; |
4314 | edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT; |
4315 | edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT; |
4316 | } |
4317 | else |
4318 | { |
4319 | // Update the our edge weights |
4320 | edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT; |
4321 | edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest); |
4322 | edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT; |
4323 | edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest); |
4324 | } |
4325 | } |
4326 | } |
4327 | |
4328 | /* Change the block to end with a conditional jump */ |
4329 | |
4330 | block->bbJumpKind = BBJ_COND; |
4331 | block->bbJumpDest = bTest->bbNext; |
4332 | |
4333 | /* Mark the jump dest block as being a jump target */ |
4334 | block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; |
4335 | |
4336 | /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */ |
4337 | |
4338 | fgAddRefPred(block->bbNext, block); |
4339 | |
4340 | fgRemoveRefPred(bTest, block); |
4341 | fgAddRefPred(bTest->bbNext, block); |
4342 | |
4343 | #ifdef DEBUG |
4344 | if (verbose) |
4345 | { |
4346 | printf("\nDuplicating loop condition in " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")" , block->bbNum, |
4347 | block->bbNext->bbNum, bTest->bbNum); |
4348 | printf("\nEstimated code size expansion is %d\n " , estDupCostSz); |
4349 | |
4350 | gtDispTree(copyOfCondStmt); |
4351 | } |
4352 | |
4353 | #endif |
4354 | } |
4355 | |
4356 | /***************************************************************************** |
4357 | * |
4358 | * Optimize the BasicBlock layout of the method |
4359 | */ |
4360 | |
4361 | void Compiler::optOptimizeLayout() |
4362 | { |
4363 | noway_assert(opts.OptimizationEnabled()); |
4364 | |
4365 | #ifdef DEBUG |
4366 | if (verbose) |
4367 | { |
4368 | printf("*************** In optOptimizeLayout()\n" ); |
4369 | fgDispHandlerTab(); |
4370 | } |
4371 | |
4372 | /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */ |
4373 | fgDebugCheckBBlist(); |
4374 | #endif |
4375 | |
4376 | noway_assert(fgModified == false); |
4377 | |
4378 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
4379 | { |
4380 | /* Make sure the appropriate fields are initialized */ |
4381 | |
4382 | if (block->bbWeight == BB_ZERO_WEIGHT) |
4383 | { |
4384 | /* Zero weighted block can't have a LOOP_HEAD flag */ |
4385 | noway_assert(block->isLoopHead() == false); |
4386 | continue; |
4387 | } |
4388 | |
4389 | assert(block->bbLoopNum == 0); |
4390 | |
4391 | if (compCodeOpt() != SMALL_CODE) |
4392 | { |
4393 | /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */ |
4394 | |
4395 | fgOptWhileLoop(block); |
4396 | } |
4397 | } |
4398 | |
4399 | if (fgModified) |
4400 | { |
4401 | // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop |
4402 | fgComputeEdgeWeights(); |
4403 | } |
4404 | |
4405 | fgUpdateFlowGraph(true); |
4406 | fgReorderBlocks(); |
4407 | fgUpdateFlowGraph(); |
4408 | } |
4409 | |
4410 | /***************************************************************************** |
4411 | * |
4412 | * Perform loop inversion, find and classify natural loops |
4413 | */ |
4414 | |
4415 | void Compiler::optOptimizeLoops() |
4416 | { |
4417 | noway_assert(opts.OptimizationEnabled()); |
4418 | |
4419 | #ifdef DEBUG |
4420 | if (verbose) |
4421 | { |
4422 | printf("*************** In optOptimizeLoops()\n" ); |
4423 | } |
4424 | #endif |
4425 | |
4426 | optSetBlockWeights(); |
4427 | |
4428 | /* Were there any loops in the flow graph? */ |
4429 | |
4430 | if (fgHasLoops) |
4431 | { |
4432 | /* now that we have dominator information we can find loops */ |
4433 | |
4434 | optFindNaturalLoops(); |
4435 | |
4436 | unsigned loopNum = 0; |
4437 | |
4438 | /* Iterate over the flow graph, marking all loops */ |
4439 | |
4440 | /* We will use the following terminology: |
4441 | * top - the first basic block in the loop (i.e. the head of the backward edge) |
4442 | * bottom - the last block in the loop (i.e. the block from which we jump to the top) |
4443 | * lastBottom - used when we have multiple back-edges to the same top |
4444 | */ |
4445 | |
4446 | flowList* pred; |
4447 | |
4448 | BasicBlock* top; |
4449 | |
4450 | for (top = fgFirstBB; top; top = top->bbNext) |
4451 | { |
4452 | BasicBlock* foundBottom = nullptr; |
4453 | |
4454 | for (pred = top->bbPreds; pred; pred = pred->flNext) |
4455 | { |
4456 | /* Is this a loop candidate? - We look for "back edges" */ |
4457 | |
4458 | BasicBlock* bottom = pred->flBlock; |
4459 | |
4460 | /* is this a backward edge? (from BOTTOM to TOP) */ |
4461 | |
4462 | if (top->bbNum > bottom->bbNum) |
4463 | { |
4464 | continue; |
4465 | } |
4466 | |
4467 | /* 'top' also must have the BBF_LOOP_HEAD flag set */ |
4468 | |
4469 | if (top->isLoopHead() == false) |
4470 | { |
4471 | continue; |
4472 | } |
4473 | |
4474 | /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ |
4475 | |
4476 | if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS)) |
4477 | { |
4478 | continue; |
4479 | } |
4480 | |
4481 | /* the top block must be able to reach the bottom block */ |
4482 | if (!fgReachable(top, bottom)) |
4483 | { |
4484 | continue; |
4485 | } |
4486 | |
4487 | /* Found a new loop, record the longest backedge in foundBottom */ |
4488 | |
4489 | if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum)) |
4490 | { |
4491 | foundBottom = bottom; |
4492 | } |
4493 | } |
4494 | |
4495 | if (foundBottom) |
4496 | { |
4497 | loopNum++; |
4498 | #ifdef DEBUG |
4499 | /* Mark the loop header as such */ |
4500 | assert(FitsIn<unsigned char>(loopNum)); |
4501 | top->bbLoopNum = (unsigned char)loopNum; |
4502 | #endif |
4503 | |
4504 | /* Mark all blocks between 'top' and 'bottom' */ |
4505 | |
4506 | optMarkLoopBlocks(top, foundBottom, false); |
4507 | } |
4508 | |
4509 | // We track at most 255 loops |
4510 | if (loopNum == 255) |
4511 | { |
4512 | #if COUNT_LOOPS |
4513 | totalUnnatLoopOverflows++; |
4514 | #endif |
4515 | break; |
4516 | } |
4517 | } |
4518 | |
4519 | #if COUNT_LOOPS |
4520 | totalUnnatLoopCount += loopNum; |
4521 | #endif |
4522 | |
4523 | #ifdef DEBUG |
4524 | if (verbose) |
4525 | { |
4526 | if (loopNum > 0) |
4527 | { |
4528 | printf("\nFound a total of %d loops." , loopNum); |
4529 | printf("\nAfter loop weight marking:\n" ); |
4530 | fgDispBasicBlocks(); |
4531 | printf("\n" ); |
4532 | } |
4533 | } |
4534 | #endif |
4535 | optLoopsMarked = true; |
4536 | } |
4537 | } |
4538 | |
4539 | //------------------------------------------------------------------------ |
4540 | // optDeriveLoopCloningConditions: Derive loop cloning conditions. |
4541 | // |
4542 | // Arguments: |
4543 | // loopNum - the current loop index for which conditions are derived. |
4544 | // context - data structure where all loop cloning info is kept. |
4545 | // |
4546 | // Return Value: |
4547 | // "false" if conditions cannot be obtained. "true" otherwise. |
4548 | // The cloning conditions are updated in the "conditions"[loopNum] field |
4549 | // of the "context" parameter. |
4550 | // |
4551 | // Operation: |
4552 | // Inspect the loop cloning optimization candidates and populate the conditions necessary |
4553 | // for each optimization candidate. Checks if the loop stride is "> 0" if the loop |
4554 | // condition is "less than". If the initializer is "var" init then adds condition |
4555 | // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len" |
4556 | // are added to "context". These conditions are checked in the pre-header block |
4557 | // and the cloning choice is made. |
4558 | // |
4559 | // Assumption: |
4560 | // Callers should assume AND operation is used i.e., if all conditions are |
4561 | // true, then take the fast path. |
4562 | // |
4563 | bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context) |
4564 | { |
4565 | JITDUMP("------------------------------------------------------------\n" ); |
4566 | JITDUMP("Deriving cloning conditions for L%02u\n" , loopNum); |
4567 | |
4568 | LoopDsc* loop = &optLoopTable[loopNum]; |
4569 | JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum); |
4570 | |
4571 | if (loop->lpTestOper() == GT_LT) |
4572 | { |
4573 | // Stride conditions |
4574 | if (loop->lpIterConst() <= 0) |
4575 | { |
4576 | JITDUMP("> Stride %d is invalid\n" , loop->lpIterConst()); |
4577 | return false; |
4578 | } |
4579 | |
4580 | // Init conditions |
4581 | if (loop->lpFlags & LPFLG_CONST_INIT) |
4582 | { |
4583 | // Only allowing const init at this time. |
4584 | if (loop->lpConstInit < 0) |
4585 | { |
4586 | JITDUMP("> Init %d is invalid\n" , loop->lpConstInit); |
4587 | return false; |
4588 | } |
4589 | } |
4590 | else if (loop->lpFlags & LPFLG_VAR_INIT) |
4591 | { |
4592 | // limitVar >= 0 |
4593 | LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)), |
4594 | LC_Expr(LC_Ident(0, LC_Ident::Const))); |
4595 | context->EnsureConditions(loopNum)->Push(geZero); |
4596 | } |
4597 | else |
4598 | { |
4599 | JITDUMP("> Not variable init\n" ); |
4600 | return false; |
4601 | } |
4602 | |
4603 | // Limit Conditions |
4604 | LC_Ident ident; |
4605 | if (loop->lpFlags & LPFLG_CONST_LIMIT) |
4606 | { |
4607 | int limit = loop->lpConstLimit(); |
4608 | if (limit < 0) |
4609 | { |
4610 | JITDUMP("> limit %d is invalid\n" , limit); |
4611 | return false; |
4612 | } |
4613 | ident = LC_Ident(static_cast<unsigned>(limit), LC_Ident::Const); |
4614 | } |
4615 | else if (loop->lpFlags & LPFLG_VAR_LIMIT) |
4616 | { |
4617 | unsigned limitLcl = loop->lpVarLimit(); |
4618 | ident = LC_Ident(limitLcl, LC_Ident::Var); |
4619 | |
4620 | LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); |
4621 | |
4622 | context->EnsureConditions(loopNum)->Push(geZero); |
4623 | } |
4624 | else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) |
4625 | { |
4626 | ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator()); |
4627 | if (!loop->lpArrLenLimit(this, index)) |
4628 | { |
4629 | JITDUMP("> ArrLen not matching" ); |
4630 | return false; |
4631 | } |
4632 | ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); |
4633 | |
4634 | // Ensure that this array must be dereference-able, before executing the actual condition. |
4635 | LC_Array array(LC_Array::Jagged, index, LC_Array::None); |
4636 | context->EnsureDerefs(loopNum)->Push(array); |
4637 | } |
4638 | else |
4639 | { |
4640 | JITDUMP("> Undetected limit\n" ); |
4641 | return false; |
4642 | } |
4643 | |
4644 | for (unsigned i = 0; i < optInfos->Size(); ++i) |
4645 | { |
4646 | LcOptInfo* optInfo = optInfos->GetRef(i); |
4647 | switch (optInfo->GetOptType()) |
4648 | { |
4649 | case LcOptInfo::LcJaggedArray: |
4650 | { |
4651 | // limit <= arrLen |
4652 | LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); |
4653 | LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); |
4654 | LC_Ident arrLenIdent = LC_Ident(arrLen); |
4655 | |
4656 | LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent)); |
4657 | context->EnsureConditions(loopNum)->Push(cond); |
4658 | |
4659 | // Ensure that this array must be dereference-able, before executing the actual condition. |
4660 | LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); |
4661 | context->EnsureDerefs(loopNum)->Push(array); |
4662 | } |
4663 | break; |
4664 | case LcOptInfo::LcMdArray: |
4665 | { |
4666 | // limit <= mdArrLen |
4667 | LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); |
4668 | LC_Condition cond(GT_LE, LC_Expr(ident), |
4669 | LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray, |
4670 | mdArrInfo->GetArrIndexForDim(getAllocator()), |
4671 | mdArrInfo->dim, LC_Array::None)))); |
4672 | context->EnsureConditions(loopNum)->Push(cond); |
4673 | } |
4674 | break; |
4675 | |
4676 | default: |
4677 | JITDUMP("Unknown opt\n" ); |
4678 | return false; |
4679 | } |
4680 | } |
4681 | JITDUMP("Conditions: (" ); |
4682 | DBEXEC(verbose, context->PrintConditions(loopNum)); |
4683 | JITDUMP(")\n" ); |
4684 | return true; |
4685 | } |
4686 | return false; |
4687 | } |
4688 | |
4689 | //------------------------------------------------------------------------------------ |
4690 | // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays. |
4691 | // |
4692 | // Arguments: |
4693 | // loopNum - the current loop index for which conditions are derived. |
4694 | // context - data structure where all loop cloning info is kept. |
4695 | // |
4696 | // Return Value: |
4697 | // "false" if conditions cannot be obtained. "true" otherwise. |
4698 | // The deref conditions are updated in the "derefConditions"[loopNum] field |
4699 | // of the "context" parameter. |
4700 | // |
4701 | // Definition of Deref Conditions: |
4702 | // To be able to check for the loop cloning condition that (limitVar <= a.len) |
4703 | // we should first be able to dereference "a". i.e., "a" is non-null. |
4704 | // |
4705 | // Example: |
4706 | // |
4707 | // for (i in 0..n) |
4708 | // for (j in 0..n) |
4709 | // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if |
4710 | // // (n <= a[i][j].len) and other safer conditions to take the fast path |
4711 | // a[i][j][k] = 0; |
4712 | // |
4713 | // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check. |
4714 | // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first |
4715 | // be true to do the deref. |
4716 | // |
4717 | // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1) |
4718 | // |
4719 | // Note the short circuiting AND. Implication: these conditions should be performed in separate |
4720 | // blocks each of which will branch to slow path if the condition evaluates to false. |
4721 | // |
4722 | // Now, imagine a situation where we have |
4723 | // a[x][y][k] = 20 and a[i][j][k] = 0 |
4724 | // also in the inner most loop where x, y are parameters, then our conditions will have |
4725 | // to include |
4726 | // (x < a.len) && |
4727 | // (y < a[x].len) |
4728 | // in addition to the above conditions (1) to get rid of bounds check on index 'k' |
4729 | // |
4730 | // But these conditions can be checked together with conditions |
4731 | // (i < a.len) without a need for a separate block. In summary, the conditions will be: |
4732 | // |
4733 | // (a != null) && |
4734 | // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here. |
4735 | // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here. |
4736 | // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here. |
4737 | // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here. |
4738 | // |
4739 | // This naturally yields a tree style pattern, where the nodes of the tree are |
4740 | // the array and indices respectively. |
4741 | // |
4742 | // Example: |
4743 | // a => { |
4744 | // i => { |
4745 | // j => { |
4746 | // k => {} |
4747 | // } |
4748 | // }, |
4749 | // x => { |
4750 | // y => { |
4751 | // k => {} |
4752 | // } |
4753 | // } |
4754 | // } |
4755 | // |
4756 | // Notice that the variables in the same levels can have their conditions combined in the |
4757 | // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be |
4758 | // combined with a short-circuiting AND (i.e., different basic blocks). |
4759 | // |
4760 | // Operation: |
4761 | // Construct a tree of array indices and the array which will generate the optimal |
4762 | // conditions for loop cloning. |
4763 | // |
4764 | // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be: |
4765 | // |
4766 | // a => { |
4767 | // i => { |
4768 | // j => { |
4769 | // k => {} |
4770 | // }, |
4771 | // y => { |
4772 | // k => {} |
4773 | // }, |
4774 | // } |
4775 | // }, |
4776 | // b => { |
4777 | // i => {} |
4778 | // } |
4779 | // In this method, we will construct such a tree by descending depth first into the array |
4780 | // index operation and forming a tree structure as we encounter the array or the index variables. |
4781 | // |
4782 | // This tree structure will then be used to generate conditions like below: |
4783 | // (a != null) & (b != null) && // from the first level of the tree. |
4784 | // |
4785 | // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined. |
4786 | // (a[i] != null) & (b[i] != null) && // from the second level of the tree. |
4787 | // |
4788 | // (j < a[i].len) & (y < a[i].len) && // from the third level. |
4789 | // (a[i][j] != null) & (a[i][y] != null) && // from the third level. |
4790 | // |
4791 | // and so on. |
4792 | // |
4793 | // |
4794 | bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) |
4795 | { |
4796 | JitExpandArrayStack<LC_Deref*> nodes(getAllocator()); |
4797 | int maxRank = -1; |
4798 | |
4799 | // Get the dereference-able arrays. |
4800 | JitExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum); |
4801 | |
4802 | // For each array in the dereference list, construct a tree, |
4803 | // where the nodes are array and index variables and an edge 'u-v' |
4804 | // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge |
4805 | // 'u-v-w' transitively if u[v][w] occurs. |
4806 | for (unsigned i = 0; i < deref->Size(); ++i) |
4807 | { |
4808 | LC_Array& array = (*deref)[i]; |
4809 | |
4810 | // First populate the array base variable. |
4811 | LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl); |
4812 | if (node == nullptr) |
4813 | { |
4814 | node = new (getAllocator()) LC_Deref(array, 0 /*level*/); |
4815 | nodes.Push(node); |
4816 | } |
4817 | |
4818 | // For each dimension (level) for the array, populate the tree with the variable |
4819 | // from that dimension. |
4820 | unsigned rank = (unsigned)array.GetDimRank(); |
4821 | for (unsigned i = 0; i < rank; ++i) |
4822 | { |
4823 | node->EnsureChildren(getAllocator()); |
4824 | LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]); |
4825 | if (tmp == nullptr) |
4826 | { |
4827 | tmp = new (getAllocator()) LC_Deref(array, node->level + 1); |
4828 | node->children->Push(tmp); |
4829 | } |
4830 | |
4831 | // Descend one level down. |
4832 | node = tmp; |
4833 | } |
4834 | |
4835 | // Keep the maxRank of all array dereferences. |
4836 | maxRank = max((int)rank, maxRank); |
4837 | } |
4838 | |
4839 | #ifdef DEBUG |
4840 | if (verbose) |
4841 | { |
4842 | for (unsigned i = 0; i < nodes.Size(); ++i) |
4843 | { |
4844 | if (i != 0) |
4845 | { |
4846 | printf("," ); |
4847 | } |
4848 | nodes[i]->Print(); |
4849 | printf("\n" ); |
4850 | } |
4851 | } |
4852 | #endif |
4853 | |
4854 | if (maxRank == -1) |
4855 | { |
4856 | return false; |
4857 | } |
4858 | |
4859 | // First level will always yield the null-check, since it is made of the array base variables. |
4860 | // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null) |
4861 | // So add 1 after rank * 2. |
4862 | unsigned condBlocks = (unsigned)maxRank * 2 + 1; |
4863 | |
4864 | // Heuristic to not create too many blocks; |
4865 | if (condBlocks > 4) |
4866 | { |
4867 | return false; |
4868 | } |
4869 | |
4870 | // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] |
4871 | JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond = |
4872 | context->EnsureBlockConditions(loopNum, condBlocks); |
4873 | for (unsigned i = 0; i < nodes.Size(); ++i) |
4874 | { |
4875 | nodes[i]->DeriveLevelConditions(levelCond); |
4876 | } |
4877 | |
4878 | DBEXEC(verbose, context->PrintBlockConditions(loopNum)); |
4879 | return true; |
4880 | } |
4881 | |
4882 | #ifdef DEBUG |
4883 | //---------------------------------------------------------------------------- |
4884 | // optDebugLogLoopCloning: Insert a call to jithelper that prints a message. |
4885 | // |
4886 | // Arguments: |
4887 | // block - the block in which the helper call needs to be inserted. |
4888 | // insertBefore - the tree before which the helper call will be inserted. |
4889 | // |
4890 | void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTree* insertBefore) |
4891 | { |
4892 | if (JitConfig.JitDebugLogLoopCloning() == 0) |
4893 | { |
4894 | return; |
4895 | } |
4896 | GenTree* logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID); |
4897 | GenTree* stmt = fgNewStmtFromTree(logCall); |
4898 | fgInsertStmtBefore(block, insertBefore, stmt); |
4899 | fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning" )); |
4900 | } |
4901 | #endif |
4902 | |
4903 | //------------------------------------------------------------------------ |
4904 | // optPerformStaticOptimizations: Perform the optimizations for the optimization |
4905 | // candidates gathered during the cloning phase. |
4906 | // |
4907 | // Arguments: |
4908 | // loopNum - the current loop index for which the optimizations are performed. |
4909 | // context - data structure where all loop cloning info is kept. |
4910 | // dynamicPath - If true, the optimization is performed in the fast path among the |
4911 | // cloned loops. If false, it means this is the only path (i.e., |
4912 | // there is no slow path.) |
4913 | // |
4914 | // Operation: |
4915 | // Perform the optimizations on the fast path i.e., the path in which the |
4916 | // optimization candidates were collected at the time of identifying them. |
4917 | // The candidates store all the information necessary (the tree/stmt/block |
4918 | // they are from) to perform the optimization. |
4919 | // |
4920 | // Assumption: |
4921 | // The unoptimized path is either already cloned when this method is called or |
4922 | // there is no unoptimized path (got eliminated statically.) So this method |
4923 | // performs the optimizations assuming that the path in which the candidates |
4924 | // were collected is the fast path in which the optimizations will be performed. |
4925 | // |
4926 | void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath)) |
4927 | { |
4928 | JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum); |
4929 | for (unsigned i = 0; i < optInfos->Size(); ++i) |
4930 | { |
4931 | LcOptInfo* optInfo = optInfos->GetRef(i); |
4932 | switch (optInfo->GetOptType()) |
4933 | { |
4934 | case LcOptInfo::LcJaggedArray: |
4935 | { |
4936 | LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); |
4937 | compCurBB = arrIndexInfo->arrIndex.useBlock; |
4938 | optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt); |
4939 | DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt)); |
4940 | } |
4941 | break; |
4942 | case LcOptInfo::LcMdArray: |
4943 | // TODO-CQ: CLONE: Implement. |
4944 | break; |
4945 | default: |
4946 | break; |
4947 | } |
4948 | } |
4949 | } |
4950 | |
4951 | //---------------------------------------------------------------------------- |
4952 | // optCanCloneLoops: Use the environment flag to determine whether loop |
4953 | // cloning is allowed to be performed. |
4954 | // |
4955 | // Return Value: |
4956 | // Returns true in debug builds if COMPlus_JitCloneLoops flag is set. |
4957 | // Disabled for retail for now. |
4958 | // |
4959 | bool Compiler::optCanCloneLoops() |
4960 | { |
4961 | // Enabled for retail builds now. |
4962 | unsigned cloneLoopsFlag = 1; |
4963 | #ifdef DEBUG |
4964 | cloneLoopsFlag = JitConfig.JitCloneLoops(); |
4965 | #endif |
4966 | return (cloneLoopsFlag != 0); |
4967 | } |
4968 | |
4969 | //---------------------------------------------------------------------------- |
4970 | // optIsLoopClonable: Determine whether this loop can be cloned. |
4971 | // |
4972 | // Arguments: |
4973 | // loopInd loop index which needs to be checked if it can be cloned. |
4974 | // |
4975 | // Return Value: |
4976 | // Returns true if the loop can be cloned. If it returns false |
4977 | // prints a message in debug as why the loop can't be cloned. |
4978 | // |
4979 | bool Compiler::optIsLoopClonable(unsigned loopInd) |
4980 | { |
4981 | // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle |
4982 | // inserting new EH regions in the exception table yet. |
4983 | BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext; |
4984 | unsigned loopRetCount = 0; |
4985 | for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext) |
4986 | { |
4987 | if (blk->bbJumpKind == BBJ_RETURN) |
4988 | { |
4989 | loopRetCount++; |
4990 | } |
4991 | if (bbIsTryBeg(blk)) |
4992 | { |
4993 | JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n" , loopInd, info.compFullName); |
4994 | return false; |
4995 | } |
4996 | } |
4997 | |
4998 | // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump |
4999 | // into the middle of a handler (to go to the cloned copy.) Reject. |
5000 | if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry)) |
5001 | { |
5002 | JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n" ); |
5003 | return false; |
5004 | } |
5005 | |
5006 | // If the head and entry are in different EH regions, reject. |
5007 | if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry)) |
5008 | { |
5009 | JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n" ); |
5010 | return false; |
5011 | } |
5012 | |
5013 | // Is the first block after the last block of the loop a handler or filter start? |
5014 | // Usually, we create a dummy block after the orginal loop, to skip over the loop clone |
5015 | // and go to where the original loop did. That raises problems when we don't actually go to |
5016 | // that block; this is one of those cases. This could be fixed fairly easily; for example, |
5017 | // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the |
5018 | // loop. This is just a corner to cut to get this working faster. |
5019 | BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext; |
5020 | if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop)) |
5021 | { |
5022 | JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n" ); |
5023 | return false; |
5024 | } |
5025 | |
5026 | // We've previously made a decision whether to have separate return epilogs, or branch to one. |
5027 | // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate |
5028 | // epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited |
5029 | // (or return blocks should not be considered part of the loop, rendering this issue moot). |
5030 | unsigned epilogLimit = 4; |
5031 | #ifdef JIT32_GCENCODER |
5032 | epilogLimit = SET_EPILOGCNT_MAX; |
5033 | #endif // JIT32_GCENCODER |
5034 | if (fgReturnCount + loopRetCount > epilogLimit) |
5035 | { |
5036 | JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, " |
5037 | "would exceed the limit of %d.\n" , |
5038 | loopRetCount, fgReturnCount, epilogLimit); |
5039 | return false; |
5040 | } |
5041 | |
5042 | // Otherwise, we're going to add those return blocks. |
5043 | fgReturnCount += loopRetCount; |
5044 | |
5045 | return true; |
5046 | } |
5047 | |
5048 | /***************************************************************************** |
5049 | * |
5050 | * Identify loop cloning opportunities, derive loop cloning conditions, |
5051 | * perform loop cloning, use the derived conditions to choose which |
5052 | * path to take. |
5053 | */ |
5054 | void Compiler::optCloneLoops() |
5055 | { |
5056 | JITDUMP("\n*************** In optCloneLoops()\n" ); |
5057 | if (optLoopCount == 0 || !optCanCloneLoops()) |
5058 | { |
5059 | return; |
5060 | } |
5061 | |
5062 | #ifdef DEBUG |
5063 | if (verbose) |
5064 | { |
5065 | printf("Blocks/Trees at start of phase\n" ); |
5066 | fgDispBasicBlocks(true); |
5067 | } |
5068 | #endif |
5069 | |
5070 | LoopCloneContext context(optLoopCount, getAllocator()); |
5071 | |
5072 | // Obtain array optimization candidates in the context. |
5073 | optObtainLoopCloningOpts(&context); |
5074 | |
5075 | // For each loop, derive cloning conditions for the optimization candidates. |
5076 | for (unsigned i = 0; i < optLoopCount; ++i) |
5077 | { |
5078 | JitExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i); |
5079 | if (optInfos == nullptr) |
5080 | { |
5081 | continue; |
5082 | } |
5083 | |
5084 | if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context)) |
5085 | { |
5086 | JITDUMP("> Conditions could not be obtained\n" ); |
5087 | context.CancelLoopOptInfo(i); |
5088 | } |
5089 | else |
5090 | { |
5091 | bool allTrue = false; |
5092 | bool anyFalse = false; |
5093 | context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose)); |
5094 | if (anyFalse) |
5095 | { |
5096 | context.CancelLoopOptInfo(i); |
5097 | } |
5098 | if (allTrue) |
5099 | { |
5100 | // Perform static optimizations on the fast path since we always |
5101 | // have to take the cloned path. |
5102 | optPerformStaticOptimizations(i, &context DEBUGARG(false)); |
5103 | |
5104 | // No need to clone. |
5105 | context.CancelLoopOptInfo(i); |
5106 | } |
5107 | } |
5108 | } |
5109 | |
5110 | #if 0 |
5111 | // The code in this #if has been useful in debugging loop cloning issues, by |
5112 | // enabling selective enablement of the loop cloning optimization according to |
5113 | // method hash. |
5114 | #ifdef DEBUG |
5115 | unsigned methHash = info.compMethodHash(); |
5116 | char* lostr = getenv("loopclonehashlo" ); |
5117 | unsigned methHashLo = 0; |
5118 | if (lostr != NULL) |
5119 | { |
5120 | sscanf_s(lostr, "%x" , &methHashLo); |
5121 | // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers. |
5122 | } |
5123 | char* histr = getenv("loopclonehashhi" ); |
5124 | unsigned methHashHi = UINT32_MAX; |
5125 | if (histr != NULL) |
5126 | { |
5127 | sscanf_s(histr, "%x" , &methHashHi); |
5128 | // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. |
5129 | } |
5130 | if (methHash < methHashLo || methHash > methHashHi) |
5131 | return; |
5132 | #endif |
5133 | #endif |
5134 | |
5135 | for (unsigned i = 0; i < optLoopCount; ++i) |
5136 | { |
5137 | if (context.GetLoopOptInfo(i) != nullptr) |
5138 | { |
5139 | optLoopsCloned++; |
5140 | context.OptimizeConditions(i DEBUGARG(verbose)); |
5141 | context.OptimizeBlockConditions(i DEBUGARG(verbose)); |
5142 | optCloneLoop(i, &context); |
5143 | } |
5144 | } |
5145 | |
5146 | #ifdef DEBUG |
5147 | if (verbose) |
5148 | { |
5149 | printf("\nAfter loop cloning:\n" ); |
5150 | fgDispBasicBlocks(/*dumpTrees*/ true); |
5151 | } |
5152 | #endif |
5153 | } |
5154 | |
5155 | void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) |
5156 | { |
5157 | assert(loopInd < optLoopCount); |
5158 | |
5159 | JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n" , loopInd, optLoopTable[loopInd].lpHead->bbNum, |
5160 | optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum, |
5161 | optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum); |
5162 | |
5163 | // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). |
5164 | unsigned depth = optLoopDepth(loopInd); |
5165 | unsigned ambientWeight = 1; |
5166 | for (unsigned j = 0; j < depth; j++) |
5167 | { |
5168 | unsigned lastWeight = ambientWeight; |
5169 | ambientWeight *= BB_LOOP_WEIGHT; |
5170 | // If the multiplication overflowed, stick at max. |
5171 | // (Strictly speaking, a multiplication could overflow and still have a result |
5172 | // that is >= lastWeight...but if so, the original weight must be pretty large, |
5173 | // and it got bigger, so that's OK.) |
5174 | if (ambientWeight < lastWeight) |
5175 | { |
5176 | ambientWeight = BB_MAX_WEIGHT; |
5177 | break; |
5178 | } |
5179 | } |
5180 | |
5181 | // If we're in a non-natural loop, the ambient weight might be higher than we computed above. |
5182 | // Be safe by taking the max with the head block's weight. |
5183 | ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight); |
5184 | |
5185 | // This is the containing loop, if any -- to label any blocks we create that are outside |
5186 | // the loop being cloned. |
5187 | unsigned char ambientLoop = optLoopTable[loopInd].lpParent; |
5188 | |
5189 | // First, make sure that the loop has a unique header block, creating an empty one if necessary. |
5190 | optEnsureUniqueHead(loopInd, ambientWeight); |
5191 | |
5192 | // We're going to make |
5193 | |
5194 | // H --> E |
5195 | // F |
5196 | // T |
5197 | // E |
5198 | // B ?-> T |
5199 | // X |
5200 | // |
5201 | // become |
5202 | // |
5203 | // H ?-> E2 |
5204 | // H2--> E (Optional; if E == T == F, let H fall through to F/T/E) |
5205 | // F |
5206 | // T |
5207 | // E |
5208 | // B ?-> T |
5209 | // X2--> X |
5210 | // F2 |
5211 | // T2 |
5212 | // E2 |
5213 | // B2 ?-> T2 |
5214 | // X |
5215 | |
5216 | BasicBlock* h = optLoopTable[loopInd].lpHead; |
5217 | if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS) |
5218 | { |
5219 | // Make a new block to be the unique entry to the loop. |
5220 | assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry); |
5221 | BasicBlock* newH = fgNewBBafter(BBJ_NONE, h, |
5222 | /*extendRegion*/ true); |
5223 | newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight); |
5224 | BlockSetOps::Assign(this, newH->bbReach, h->bbReach); |
5225 | // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. |
5226 | newH->bbNatLoopNum = ambientLoop; |
5227 | h = newH; |
5228 | optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h); |
5229 | } |
5230 | |
5231 | // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.) |
5232 | // "newPred" will be the predecessor of the blocks of the cloned loop. |
5233 | BasicBlock* b = optLoopTable[loopInd].lpBottom; |
5234 | BasicBlock* newPred = b; |
5235 | if (b->bbJumpKind != BBJ_ALWAYS) |
5236 | { |
5237 | BasicBlock* x = b->bbNext; |
5238 | if (x != nullptr) |
5239 | { |
5240 | BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true); |
5241 | x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight); |
5242 | |
5243 | // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. |
5244 | x2->bbNatLoopNum = ambientLoop; |
5245 | |
5246 | x2->bbJumpDest = x; |
5247 | BlockSetOps::Assign(this, x2->bbReach, h->bbReach); |
5248 | newPred = x2; |
5249 | } |
5250 | } |
5251 | |
5252 | // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while, |
5253 | // so that "h" already falls through to "e" (e == t == f). |
5254 | BasicBlock* h2 = nullptr; |
5255 | if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry) |
5256 | { |
5257 | BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead, |
5258 | /*extendRegion*/ true); |
5259 | h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight); |
5260 | |
5261 | // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. |
5262 | h2->bbNatLoopNum = ambientLoop; |
5263 | |
5264 | h2->bbJumpDest = optLoopTable[loopInd].lpEntry; |
5265 | optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2); |
5266 | } |
5267 | |
5268 | // Now we'll clone the blocks of the loop body. |
5269 | BasicBlock* newFirst = nullptr; |
5270 | BasicBlock* newBot = nullptr; |
5271 | |
5272 | BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator()); |
5273 | for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext; |
5274 | blk = blk->bbNext) |
5275 | { |
5276 | BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred, |
5277 | /*extendRegion*/ true); |
5278 | |
5279 | // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it |
5280 | // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already |
5281 | // checked them to guarantee they are clonable. |
5282 | bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk); |
5283 | noway_assert(cloneOk); |
5284 | // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert |
5285 | // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding |
5286 | // loop, if one exists -- the parent of the loop we're cloning. |
5287 | newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent; |
5288 | |
5289 | if (newFirst == nullptr) |
5290 | { |
5291 | newFirst = newBlk; |
5292 | } |
5293 | newBot = newBlk; // Continually overwrite to make sure we get the last one. |
5294 | newPred = newBlk; |
5295 | blockMap->Set(blk, newBlk); |
5296 | } |
5297 | |
5298 | // Perform the static optimizations on the fast path. |
5299 | optPerformStaticOptimizations(loopInd, context DEBUGARG(true)); |
5300 | |
5301 | // Now go through the new blocks, remapping their jump targets within the loop. |
5302 | for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext; |
5303 | blk = blk->bbNext) |
5304 | { |
5305 | |
5306 | BasicBlock* newblk = nullptr; |
5307 | bool b = blockMap->Lookup(blk, &newblk); |
5308 | assert(b && newblk != nullptr); |
5309 | |
5310 | assert(blk->bbJumpKind == newblk->bbJumpKind); |
5311 | |
5312 | // First copy the jump destination(s) from "blk". |
5313 | optCopyBlkDest(blk, newblk); |
5314 | |
5315 | // Now redirect the new block according to "blockMap". |
5316 | optRedirectBlock(newblk, blockMap); |
5317 | } |
5318 | |
5319 | assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) || |
5320 | (h->bbJumpKind == BBJ_ALWAYS)); |
5321 | |
5322 | // If all the conditions are true, go to E2. |
5323 | BasicBlock* e2 = nullptr; |
5324 | bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2); |
5325 | |
5326 | h->bbJumpKind = BBJ_COND; |
5327 | |
5328 | // We will create the following structure |
5329 | // |
5330 | // cond0 (in h) -?> cond1 |
5331 | // slow --> e2 (slow) always |
5332 | // !cond1 -?> slow |
5333 | // !cond2 -?> slow |
5334 | // ... |
5335 | // !condn -?> slow |
5336 | // h2/entry (fast) |
5337 | // |
5338 | // We should always have block conditions, at the minimum, the array should be deref-able |
5339 | assert(context->HasBlockConditions(loopInd)); |
5340 | |
5341 | // Create a unique header for the slow path. |
5342 | BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true); |
5343 | slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight); |
5344 | slowHead->bbNatLoopNum = ambientLoop; |
5345 | slowHead->bbJumpDest = e2; |
5346 | |
5347 | BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead); |
5348 | condLast->bbJumpDest = slowHead; |
5349 | |
5350 | // If h2 is present it is already the head or replace 'h' by 'condLast'. |
5351 | if (h2 == nullptr) |
5352 | { |
5353 | optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast); |
5354 | } |
5355 | assert(foundIt && e2 != nullptr); |
5356 | |
5357 | // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to |
5358 | // initialize the loop counter immediately before entering the loop, but we've left a shared |
5359 | // initialization of the loop counter up above the test that determines which version of the |
5360 | // loop to take. |
5361 | optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL; |
5362 | |
5363 | fgUpdateChangedFlowGraph(); |
5364 | } |
5365 | |
5366 | //-------------------------------------------------------------------------------------------------- |
5367 | // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry |
5368 | // |
5369 | // Arguments: |
5370 | // context loop cloning context variable |
5371 | // loopNum the loop index |
5372 | // head loop head for "loopNum" |
5373 | // slowHead the slow path loop head |
5374 | // |
5375 | // Return Values: |
5376 | // None. |
5377 | // |
5378 | // Operation: |
5379 | // Create the following structure. |
5380 | // |
5381 | // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because |
5382 | // condn cannot jtrue to loop head h2. It has to be from a direct pred block. |
5383 | // |
5384 | // cond0 (in h) -?> cond1 |
5385 | // slowHead --> e2 (slowHead) always |
5386 | // !cond1 -?> slowHead |
5387 | // !cond2 -?> slowHead |
5388 | // ... |
5389 | // !condn -?> slowHead |
5390 | // h2/entry (fast) |
5391 | // |
5392 | // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them. |
5393 | // |
5394 | BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, |
5395 | unsigned loopNum, |
5396 | BasicBlock* head, |
5397 | BasicBlock* slowHead) |
5398 | { |
5399 | JITDUMP("Inserting loop cloning conditions\n" ); |
5400 | assert(context->HasBlockConditions(loopNum)); |
5401 | |
5402 | BasicBlock* curCond = head; |
5403 | JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum); |
5404 | for (unsigned i = 0; i < levelCond->Size(); ++i) |
5405 | { |
5406 | bool = (curCond == head); |
5407 | |
5408 | // Flip the condition if header block. |
5409 | context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock); |
5410 | |
5411 | // Create each condition block ensuring wiring between them. |
5412 | BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true); |
5413 | curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead; |
5414 | curCond = tmp; |
5415 | |
5416 | curCond->inheritWeight(head); |
5417 | curCond->bbNatLoopNum = head->bbNatLoopNum; |
5418 | JITDUMP("Created new " FMT_BB " for new level\n" , curCond->bbNum); |
5419 | } |
5420 | |
5421 | // Finally insert cloning conditions after all deref conditions have been inserted. |
5422 | context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false); |
5423 | return curCond; |
5424 | } |
5425 | |
5426 | void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight) |
5427 | { |
5428 | BasicBlock* h = optLoopTable[loopInd].lpHead; |
5429 | BasicBlock* t = optLoopTable[loopInd].lpTop; |
5430 | BasicBlock* e = optLoopTable[loopInd].lpEntry; |
5431 | BasicBlock* b = optLoopTable[loopInd].lpBottom; |
5432 | |
5433 | // If "h" dominates the entry block, then it is the unique header. |
5434 | if (fgDominate(h, e)) |
5435 | { |
5436 | return; |
5437 | } |
5438 | |
5439 | // Otherwise, create a new empty header block, make it the pred of the entry block, |
5440 | // and redirect the preds of the entry block to go to this. |
5441 | |
5442 | BasicBlock* beforeTop = t->bbPrev; |
5443 | // Make sure that the new block is in the same region as the loop. |
5444 | // (We will only create loops that are entirely within a region.) |
5445 | BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true); |
5446 | // This is in the containing loop. |
5447 | h2->bbNatLoopNum = optLoopTable[loopInd].lpParent; |
5448 | h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight); |
5449 | |
5450 | // We don't care where it was put; splice it between beforeTop and top. |
5451 | if (beforeTop->bbNext != h2) |
5452 | { |
5453 | h2->bbPrev->setNext(h2->bbNext); // Splice h2 out. |
5454 | beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t. |
5455 | h2->setNext(t); |
5456 | } |
5457 | |
5458 | if (h2->bbNext != e) |
5459 | { |
5460 | h2->bbJumpKind = BBJ_ALWAYS; |
5461 | h2->bbJumpDest = e; |
5462 | } |
5463 | BlockSetOps::Assign(this, h2->bbReach, e->bbReach); |
5464 | |
5465 | // Redirect paths from preds of "e" to go to "h2" instead of "e". |
5466 | BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator()); |
5467 | blockMap->Set(e, h2); |
5468 | |
5469 | for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext) |
5470 | { |
5471 | BasicBlock* predBlock = predEntry->flBlock; |
5472 | |
5473 | // Skip if predBlock is in the loop. |
5474 | if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum) |
5475 | { |
5476 | continue; |
5477 | } |
5478 | optRedirectBlock(predBlock, blockMap); |
5479 | } |
5480 | |
5481 | optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2); |
5482 | } |
5483 | |
5484 | /***************************************************************************** |
5485 | * |
5486 | * Determine the kind of interference for the call. |
5487 | */ |
5488 | |
5489 | /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreeCall* call) |
5490 | { |
5491 | // if not a helper, kills everything |
5492 | if (call->gtCallType != CT_HELPER) |
5493 | { |
5494 | return CALLINT_ALL; |
5495 | } |
5496 | |
5497 | // setfield and array address store kill all indirections |
5498 | switch (eeGetHelperNum(call->gtCallMethHnd)) |
5499 | { |
5500 | case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this |
5501 | case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this |
5502 | case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this |
5503 | case CORINFO_HELP_SETFIELDOBJ: |
5504 | case CORINFO_HELP_ARRADDR_ST: |
5505 | |
5506 | return CALLINT_REF_INDIRS; |
5507 | |
5508 | case CORINFO_HELP_SETFIELDFLOAT: |
5509 | case CORINFO_HELP_SETFIELDDOUBLE: |
5510 | case CORINFO_HELP_SETFIELD8: |
5511 | case CORINFO_HELP_SETFIELD16: |
5512 | case CORINFO_HELP_SETFIELD32: |
5513 | case CORINFO_HELP_SETFIELD64: |
5514 | |
5515 | return CALLINT_SCL_INDIRS; |
5516 | |
5517 | case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this |
5518 | case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this |
5519 | case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this |
5520 | case CORINFO_HELP_SETFIELDSTRUCT: |
5521 | |
5522 | return CALLINT_ALL_INDIRS; |
5523 | |
5524 | default: |
5525 | break; |
5526 | } |
5527 | |
5528 | // other helpers kill nothing |
5529 | return CALLINT_NONE; |
5530 | } |
5531 | |
5532 | /***************************************************************************** |
5533 | * |
5534 | * See if the given tree can be computed in the given precision (which must |
5535 | * be smaller than the type of the tree for this to make sense). If 'doit' |
5536 | * is false, we merely check to see whether narrowing is possible; if we |
5537 | * get called with 'doit' being true, we actually perform the narrowing. |
5538 | */ |
5539 | |
5540 | bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit) |
5541 | { |
5542 | genTreeOps oper; |
5543 | unsigned kind; |
5544 | |
5545 | noway_assert(tree); |
5546 | noway_assert(genActualType(tree->gtType) == genActualType(srct)); |
5547 | |
5548 | /* Assume we're only handling integer types */ |
5549 | noway_assert(varTypeIsIntegral(srct)); |
5550 | noway_assert(varTypeIsIntegral(dstt)); |
5551 | |
5552 | unsigned srcSize = genTypeSize(srct); |
5553 | unsigned dstSize = genTypeSize(dstt); |
5554 | |
5555 | /* dstt must be smaller than srct to narrow */ |
5556 | if (dstSize >= srcSize) |
5557 | { |
5558 | return false; |
5559 | } |
5560 | |
5561 | /* Figure out what kind of a node we have */ |
5562 | oper = tree->OperGet(); |
5563 | kind = tree->OperKind(); |
5564 | |
5565 | if (oper == GT_ASG) |
5566 | { |
5567 | noway_assert(doit == false); |
5568 | return false; |
5569 | } |
5570 | |
5571 | ValueNumPair NoVNPair = ValueNumPair(); |
5572 | |
5573 | if (kind & GTK_LEAF) |
5574 | { |
5575 | switch (oper) |
5576 | { |
5577 | /* Constants can usually be narrowed by changing their value */ |
5578 | CLANG_FORMAT_COMMENT_ANCHOR; |
5579 | |
5580 | #ifndef _TARGET_64BIT_ |
5581 | __int64 lval; |
5582 | __int64 lmask; |
5583 | |
5584 | case GT_CNS_LNG: |
5585 | lval = tree->gtIntConCommon.LngValue(); |
5586 | lmask = 0; |
5587 | |
5588 | switch (dstt) |
5589 | { |
5590 | case TYP_BYTE: |
5591 | lmask = 0x0000007F; |
5592 | break; |
5593 | case TYP_BOOL: |
5594 | case TYP_UBYTE: |
5595 | lmask = 0x000000FF; |
5596 | break; |
5597 | case TYP_SHORT: |
5598 | lmask = 0x00007FFF; |
5599 | break; |
5600 | case TYP_USHORT: |
5601 | lmask = 0x0000FFFF; |
5602 | break; |
5603 | case TYP_INT: |
5604 | lmask = 0x7FFFFFFF; |
5605 | break; |
5606 | case TYP_UINT: |
5607 | lmask = 0xFFFFFFFF; |
5608 | break; |
5609 | |
5610 | default: |
5611 | return false; |
5612 | } |
5613 | |
5614 | if ((lval & lmask) != lval) |
5615 | return false; |
5616 | |
5617 | if (doit) |
5618 | { |
5619 | tree->ChangeOperConst(GT_CNS_INT); |
5620 | tree->gtType = TYP_INT; |
5621 | tree->gtIntCon.gtIconVal = (int)lval; |
5622 | if (vnStore != nullptr) |
5623 | { |
5624 | fgValueNumberTreeConst(tree); |
5625 | } |
5626 | } |
5627 | |
5628 | return true; |
5629 | #endif |
5630 | |
5631 | case GT_CNS_INT: |
5632 | |
5633 | ssize_t ival; |
5634 | ival = tree->gtIntCon.gtIconVal; |
5635 | ssize_t imask; |
5636 | imask = 0; |
5637 | |
5638 | switch (dstt) |
5639 | { |
5640 | case TYP_BYTE: |
5641 | imask = 0x0000007F; |
5642 | break; |
5643 | case TYP_BOOL: |
5644 | case TYP_UBYTE: |
5645 | imask = 0x000000FF; |
5646 | break; |
5647 | case TYP_SHORT: |
5648 | imask = 0x00007FFF; |
5649 | break; |
5650 | case TYP_USHORT: |
5651 | imask = 0x0000FFFF; |
5652 | break; |
5653 | #ifdef _TARGET_64BIT_ |
5654 | case TYP_INT: |
5655 | imask = 0x7FFFFFFF; |
5656 | break; |
5657 | case TYP_UINT: |
5658 | imask = 0xFFFFFFFF; |
5659 | break; |
5660 | #endif // _TARGET_64BIT_ |
5661 | default: |
5662 | return false; |
5663 | } |
5664 | |
5665 | if ((ival & imask) != ival) |
5666 | { |
5667 | return false; |
5668 | } |
5669 | |
5670 | #ifdef _TARGET_64BIT_ |
5671 | if (doit) |
5672 | { |
5673 | tree->gtType = TYP_INT; |
5674 | tree->gtIntCon.gtIconVal = (int)ival; |
5675 | if (vnStore != nullptr) |
5676 | { |
5677 | fgValueNumberTreeConst(tree); |
5678 | } |
5679 | } |
5680 | #endif // _TARGET_64BIT_ |
5681 | |
5682 | return true; |
5683 | |
5684 | /* Operands that are in memory can usually be narrowed |
5685 | simply by changing their gtType */ |
5686 | |
5687 | case GT_LCL_VAR: |
5688 | /* We only allow narrowing long -> int for a GT_LCL_VAR */ |
5689 | if (dstSize == sizeof(int)) |
5690 | { |
5691 | goto NARROW_IND; |
5692 | } |
5693 | break; |
5694 | |
5695 | case GT_CLS_VAR: |
5696 | case GT_LCL_FLD: |
5697 | goto NARROW_IND; |
5698 | default: |
5699 | break; |
5700 | } |
5701 | |
5702 | noway_assert(doit == false); |
5703 | return false; |
5704 | } |
5705 | |
5706 | if (kind & (GTK_BINOP | GTK_UNOP)) |
5707 | { |
5708 | GenTree* op1; |
5709 | op1 = tree->gtOp.gtOp1; |
5710 | GenTree* op2; |
5711 | op2 = tree->gtOp.gtOp2; |
5712 | |
5713 | switch (tree->gtOper) |
5714 | { |
5715 | case GT_AND: |
5716 | noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType)); |
5717 | noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); |
5718 | |
5719 | GenTree* opToNarrow; |
5720 | opToNarrow = nullptr; |
5721 | GenTree** otherOpPtr; |
5722 | otherOpPtr = nullptr; |
5723 | bool foundOperandThatBlocksNarrowing; |
5724 | foundOperandThatBlocksNarrowing = false; |
5725 | |
5726 | // If 'dstt' is unsigned and one of the operands can be narrowed into 'dsst', |
5727 | // the result of the GT_AND will also fit into 'dstt' and can be narrowed. |
5728 | // The same is true if one of the operands is an int const and can be narrowed into 'dsst'. |
5729 | if (!gtIsActiveCSE_Candidate(op2) && ((op2->gtOper == GT_CNS_INT) || varTypeIsUnsigned(dstt))) |
5730 | { |
5731 | if (optNarrowTree(op2, srct, dstt, NoVNPair, false)) |
5732 | { |
5733 | opToNarrow = op2; |
5734 | otherOpPtr = &tree->gtOp.gtOp1; |
5735 | } |
5736 | else |
5737 | { |
5738 | foundOperandThatBlocksNarrowing = true; |
5739 | } |
5740 | } |
5741 | |
5742 | if ((opToNarrow == nullptr) && !gtIsActiveCSE_Candidate(op1) && |
5743 | ((op1->gtOper == GT_CNS_INT) || varTypeIsUnsigned(dstt))) |
5744 | { |
5745 | if (optNarrowTree(op1, srct, dstt, NoVNPair, false)) |
5746 | { |
5747 | opToNarrow = op1; |
5748 | otherOpPtr = &tree->gtOp.gtOp2; |
5749 | } |
5750 | else |
5751 | { |
5752 | foundOperandThatBlocksNarrowing = true; |
5753 | } |
5754 | } |
5755 | |
5756 | if (opToNarrow != nullptr) |
5757 | { |
5758 | // We will change the type of the tree and narrow opToNarrow |
5759 | // |
5760 | if (doit) |
5761 | { |
5762 | tree->gtType = genActualType(dstt); |
5763 | tree->SetVNs(vnpNarrow); |
5764 | |
5765 | optNarrowTree(opToNarrow, srct, dstt, NoVNPair, true); |
5766 | // We may also need to cast away the upper bits of *otherOpPtr |
5767 | if (srcSize == 8) |
5768 | { |
5769 | assert(tree->gtType == TYP_INT); |
5770 | GenTree* castOp = gtNewCastNode(TYP_INT, *otherOpPtr, false, TYP_INT); |
5771 | #ifdef DEBUG |
5772 | castOp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; |
5773 | #endif |
5774 | *otherOpPtr = castOp; |
5775 | } |
5776 | } |
5777 | return true; |
5778 | } |
5779 | |
5780 | if (foundOperandThatBlocksNarrowing) |
5781 | { |
5782 | noway_assert(doit == false); |
5783 | return false; |
5784 | } |
5785 | |
5786 | goto COMMON_BINOP; |
5787 | |
5788 | case GT_ADD: |
5789 | case GT_MUL: |
5790 | |
5791 | if (tree->gtOverflow() || varTypeIsSmall(dstt)) |
5792 | { |
5793 | noway_assert(doit == false); |
5794 | return false; |
5795 | } |
5796 | __fallthrough; |
5797 | |
5798 | case GT_OR: |
5799 | case GT_XOR: |
5800 | noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType)); |
5801 | noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); |
5802 | COMMON_BINOP: |
5803 | if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) || |
5804 | !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit)) |
5805 | { |
5806 | noway_assert(doit == false); |
5807 | return false; |
5808 | } |
5809 | |
5810 | /* Simply change the type of the tree */ |
5811 | |
5812 | if (doit) |
5813 | { |
5814 | if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT)) |
5815 | { |
5816 | tree->gtFlags &= ~GTF_MUL_64RSLT; |
5817 | } |
5818 | |
5819 | tree->gtType = genActualType(dstt); |
5820 | tree->SetVNs(vnpNarrow); |
5821 | } |
5822 | |
5823 | return true; |
5824 | |
5825 | case GT_IND: |
5826 | |
5827 | NARROW_IND: |
5828 | |
5829 | if ((dstSize > genTypeSize(tree->gtType)) && |
5830 | (varTypeIsUnsigned(dstt) && !varTypeIsUnsigned(tree->gtType))) |
5831 | { |
5832 | return false; |
5833 | } |
5834 | |
5835 | /* Simply change the type of the tree */ |
5836 | |
5837 | if (doit && (dstSize <= genTypeSize(tree->gtType))) |
5838 | { |
5839 | tree->gtType = genSignedType(dstt); |
5840 | tree->SetVNs(vnpNarrow); |
5841 | |
5842 | /* Make sure we don't mess up the variable type */ |
5843 | if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD)) |
5844 | { |
5845 | tree->gtFlags |= GTF_VAR_CAST; |
5846 | } |
5847 | } |
5848 | |
5849 | return true; |
5850 | |
5851 | case GT_EQ: |
5852 | case GT_NE: |
5853 | case GT_LT: |
5854 | case GT_LE: |
5855 | case GT_GT: |
5856 | case GT_GE: |
5857 | |
5858 | /* These can always be narrowed since they only represent 0 or 1 */ |
5859 | return true; |
5860 | |
5861 | case GT_CAST: |
5862 | { |
5863 | var_types cast = tree->CastToType(); |
5864 | var_types oprt = op1->TypeGet(); |
5865 | unsigned oprSize = genTypeSize(oprt); |
5866 | |
5867 | if (cast != srct) |
5868 | { |
5869 | return false; |
5870 | } |
5871 | |
5872 | if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt)) |
5873 | { |
5874 | return false; |
5875 | } |
5876 | |
5877 | if (tree->gtOverflow()) |
5878 | { |
5879 | return false; |
5880 | } |
5881 | |
5882 | /* Is this a cast from the type we're narrowing to or a smaller one? */ |
5883 | |
5884 | if (oprSize <= dstSize) |
5885 | { |
5886 | /* Bash the target type of the cast */ |
5887 | |
5888 | if (doit) |
5889 | { |
5890 | dstt = genSignedType(dstt); |
5891 | |
5892 | if ((oprSize == dstSize) && |
5893 | ((varTypeIsUnsigned(dstt) == varTypeIsUnsigned(oprt)) || !varTypeIsSmall(dstt))) |
5894 | { |
5895 | // Same size and there is no signedness mismatch for small types: change the CAST |
5896 | // into a NOP |
5897 | |
5898 | JITDUMP("Cast operation has no effect, bashing [%06d] GT_CAST into a GT_NOP.\n" , |
5899 | dspTreeID(tree)); |
5900 | |
5901 | tree->ChangeOper(GT_NOP); |
5902 | tree->gtType = dstt; |
5903 | // Clear the GTF_UNSIGNED flag, as it may have been set on the cast node |
5904 | tree->gtFlags &= ~GTF_UNSIGNED; |
5905 | tree->gtOp.gtOp2 = nullptr; |
5906 | tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber |
5907 | } |
5908 | else |
5909 | { |
5910 | // oprSize is smaller or there is a signedness mismatch for small types |
5911 | |
5912 | // Change the CastToType in the GT_CAST node |
5913 | tree->CastToType() = dstt; |
5914 | |
5915 | // The result type of a GT_CAST is never a small type. |
5916 | // Use genActualType to widen dstt when it is a small types. |
5917 | tree->gtType = genActualType(dstt); |
5918 | tree->SetVNs(vnpNarrow); |
5919 | } |
5920 | } |
5921 | |
5922 | return true; |
5923 | } |
5924 | } |
5925 | return false; |
5926 | |
5927 | case GT_COMMA: |
5928 | if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit)) |
5929 | { |
5930 | /* Simply change the type of the tree */ |
5931 | |
5932 | if (doit) |
5933 | { |
5934 | tree->gtType = genActualType(dstt); |
5935 | tree->SetVNs(vnpNarrow); |
5936 | } |
5937 | return true; |
5938 | } |
5939 | return false; |
5940 | |
5941 | default: |
5942 | noway_assert(doit == false); |
5943 | return false; |
5944 | } |
5945 | } |
5946 | |
5947 | return false; |
5948 | } |
5949 | |
5950 | /***************************************************************************** |
5951 | * |
5952 | * The following logic figures out whether the given variable is assigned |
5953 | * somewhere in a list of basic blocks (or in an entire loop). |
5954 | */ |
5955 | |
5956 | Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTree** pTree, fgWalkData* data) |
5957 | { |
5958 | GenTree* tree = *pTree; |
5959 | |
5960 | if (tree->OperIs(GT_ASG)) |
5961 | { |
5962 | GenTree* dest = tree->gtOp.gtOp1; |
5963 | genTreeOps destOper = dest->OperGet(); |
5964 | |
5965 | isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData; |
5966 | assert(desc && desc->ivaSelf == desc); |
5967 | |
5968 | if (destOper == GT_LCL_VAR) |
5969 | { |
5970 | unsigned tvar = dest->gtLclVarCommon.gtLclNum; |
5971 | if (tvar < lclMAX_ALLSET_TRACKED) |
5972 | { |
5973 | AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar); |
5974 | } |
5975 | else |
5976 | { |
5977 | desc->ivaMaskIncomplete = true; |
5978 | } |
5979 | |
5980 | if (tvar == desc->ivaVar) |
5981 | { |
5982 | if (tree != desc->ivaSkip) |
5983 | { |
5984 | return WALK_ABORT; |
5985 | } |
5986 | } |
5987 | } |
5988 | else if (destOper == GT_LCL_FLD) |
5989 | { |
5990 | /* We can't track every field of every var. Moreover, indirections |
5991 | may access different parts of the var as different (but |
5992 | overlapping) fields. So just treat them as indirect accesses */ |
5993 | |
5994 | // unsigned lclNum = dest->gtLclFld.gtLclNum; |
5995 | // noway_assert(lvaTable[lclNum].lvAddrTaken); |
5996 | |
5997 | varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL; |
5998 | desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs); |
5999 | } |
6000 | else if (destOper == GT_CLS_VAR) |
6001 | { |
6002 | desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR); |
6003 | } |
6004 | else if (destOper == GT_IND) |
6005 | { |
6006 | /* Set the proper indirection bits */ |
6007 | |
6008 | varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL; |
6009 | desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs); |
6010 | } |
6011 | } |
6012 | else if (tree->gtOper == GT_CALL) |
6013 | { |
6014 | isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData; |
6015 | assert(desc && desc->ivaSelf == desc); |
6016 | |
6017 | desc->ivaMaskCall = optCallInterf(tree->AsCall()); |
6018 | } |
6019 | |
6020 | return WALK_CONTINUE; |
6021 | } |
6022 | |
6023 | /*****************************************************************************/ |
6024 | |
6025 | bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTree* skip, unsigned var) |
6026 | { |
6027 | bool result; |
6028 | isVarAssgDsc desc; |
6029 | |
6030 | desc.ivaSkip = skip; |
6031 | #ifdef DEBUG |
6032 | desc.ivaSelf = &desc; |
6033 | #endif |
6034 | desc.ivaVar = var; |
6035 | desc.ivaMaskCall = CALLINT_NONE; |
6036 | AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this)); |
6037 | |
6038 | for (;;) |
6039 | { |
6040 | noway_assert(beg); |
6041 | |
6042 | for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt) |
6043 | { |
6044 | noway_assert(stmt->gtOper == GT_STMT); |
6045 | if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc)) |
6046 | { |
6047 | result = true; |
6048 | goto DONE; |
6049 | } |
6050 | } |
6051 | |
6052 | if (beg == end) |
6053 | { |
6054 | break; |
6055 | } |
6056 | |
6057 | beg = beg->bbNext; |
6058 | } |
6059 | |
6060 | result = false; |
6061 | |
6062 | DONE: |
6063 | |
6064 | return result; |
6065 | } |
6066 | |
6067 | /*****************************************************************************/ |
6068 | int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds) |
6069 | { |
6070 | LoopDsc* loop; |
6071 | |
6072 | /* Get hold of the loop descriptor */ |
6073 | |
6074 | noway_assert(lnum < optLoopCount); |
6075 | loop = optLoopTable + lnum; |
6076 | |
6077 | /* Do we already know what variables are assigned within this loop? */ |
6078 | |
6079 | if (!(loop->lpFlags & LPFLG_ASGVARS_YES)) |
6080 | { |
6081 | isVarAssgDsc desc; |
6082 | |
6083 | BasicBlock* beg; |
6084 | BasicBlock* end; |
6085 | |
6086 | /* Prepare the descriptor used by the tree walker call-back */ |
6087 | |
6088 | desc.ivaVar = (unsigned)-1; |
6089 | desc.ivaSkip = nullptr; |
6090 | #ifdef DEBUG |
6091 | desc.ivaSelf = &desc; |
6092 | #endif |
6093 | AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this)); |
6094 | desc.ivaMaskInd = VR_NONE; |
6095 | desc.ivaMaskCall = CALLINT_NONE; |
6096 | desc.ivaMaskIncomplete = false; |
6097 | |
6098 | /* Now walk all the statements of the loop */ |
6099 | |
6100 | beg = loop->lpHead->bbNext; |
6101 | end = loop->lpBottom; |
6102 | |
6103 | for (/**/; /**/; beg = beg->bbNext) |
6104 | { |
6105 | noway_assert(beg); |
6106 | |
6107 | for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) |
6108 | { |
6109 | noway_assert(stmt->gtOper == GT_STMT); |
6110 | fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc); |
6111 | |
6112 | if (desc.ivaMaskIncomplete) |
6113 | { |
6114 | loop->lpFlags |= LPFLG_ASGVARS_INC; |
6115 | } |
6116 | } |
6117 | |
6118 | if (beg == end) |
6119 | { |
6120 | break; |
6121 | } |
6122 | } |
6123 | |
6124 | AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal); |
6125 | loop->lpAsgInds = desc.ivaMaskInd; |
6126 | loop->lpAsgCall = desc.ivaMaskCall; |
6127 | |
6128 | /* Now we know what variables are assigned in the loop */ |
6129 | |
6130 | loop->lpFlags |= LPFLG_ASGVARS_YES; |
6131 | } |
6132 | |
6133 | /* Now we can finally test the caller's mask against the loop's */ |
6134 | if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds)) |
6135 | { |
6136 | return 1; |
6137 | } |
6138 | |
6139 | switch (loop->lpAsgCall) |
6140 | { |
6141 | case CALLINT_ALL: |
6142 | |
6143 | /* Can't hoist if the call might have side effect on an indirection. */ |
6144 | |
6145 | if (loop->lpAsgInds != VR_NONE) |
6146 | { |
6147 | return 1; |
6148 | } |
6149 | |
6150 | break; |
6151 | |
6152 | case CALLINT_REF_INDIRS: |
6153 | |
6154 | /* Can't hoist if the call might have side effect on an ref indirection. */ |
6155 | |
6156 | if (loop->lpAsgInds & VR_IND_REF) |
6157 | { |
6158 | return 1; |
6159 | } |
6160 | |
6161 | break; |
6162 | |
6163 | case CALLINT_SCL_INDIRS: |
6164 | |
6165 | /* Can't hoist if the call might have side effect on an non-ref indirection. */ |
6166 | |
6167 | if (loop->lpAsgInds & VR_IND_SCL) |
6168 | { |
6169 | return 1; |
6170 | } |
6171 | |
6172 | break; |
6173 | |
6174 | case CALLINT_ALL_INDIRS: |
6175 | |
6176 | /* Can't hoist if the call might have side effect on any indirection. */ |
6177 | |
6178 | if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL)) |
6179 | { |
6180 | return 1; |
6181 | } |
6182 | |
6183 | break; |
6184 | |
6185 | case CALLINT_NONE: |
6186 | |
6187 | /* Other helpers kill nothing */ |
6188 | |
6189 | break; |
6190 | |
6191 | default: |
6192 | noway_assert(!"Unexpected lpAsgCall value" ); |
6193 | } |
6194 | |
6195 | return 0; |
6196 | } |
6197 | |
6198 | void Compiler::optPerformHoistExpr(GenTree* origExpr, unsigned lnum) |
6199 | { |
6200 | #ifdef DEBUG |
6201 | if (verbose) |
6202 | { |
6203 | printf("\nHoisting a copy of " ); |
6204 | printTreeID(origExpr); |
6205 | printf(" into PreHeader for loop L%02u <" FMT_BB ".." FMT_BB ">:\n" , lnum, optLoopTable[lnum].lpFirst->bbNum, |
6206 | optLoopTable[lnum].lpBottom->bbNum); |
6207 | gtDispTree(origExpr); |
6208 | printf("\n" ); |
6209 | } |
6210 | #endif |
6211 | |
6212 | // This loop has to be in a form that is approved for hoisting. |
6213 | assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE); |
6214 | |
6215 | // Create a copy of the expression and mark it for CSE's. |
6216 | GenTree* hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE); |
6217 | |
6218 | // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag |
6219 | assert(hoistExpr != origExpr); |
6220 | assert(hoistExpr->gtFlags & GTF_MAKE_CSE); |
6221 | |
6222 | GenTree* hoist = hoistExpr; |
6223 | // The value of the expression isn't used (unless it's an assignment). |
6224 | if (hoistExpr->OperGet() != GT_ASG) |
6225 | { |
6226 | hoist = gtUnusedValNode(hoistExpr); |
6227 | } |
6228 | |
6229 | /* Put the statement in the preheader */ |
6230 | |
6231 | fgCreateLoopPreHeader(lnum); |
6232 | |
6233 | BasicBlock* preHead = optLoopTable[lnum].lpHead; |
6234 | assert(preHead->bbJumpKind == BBJ_NONE); |
6235 | |
6236 | // fgMorphTree requires that compCurBB be the block that contains |
6237 | // (or in this case, will contain) the expression. |
6238 | compCurBB = preHead; |
6239 | hoist = fgMorphTree(hoist); |
6240 | |
6241 | GenTree* hoistStmt = gtNewStmt(hoist); |
6242 | hoistStmt->gtFlags |= GTF_STMT_CMPADD; |
6243 | |
6244 | /* simply append the statement at the end of the preHead's list */ |
6245 | |
6246 | GenTree* treeList = preHead->bbTreeList; |
6247 | |
6248 | if (treeList) |
6249 | { |
6250 | /* append after last statement */ |
6251 | |
6252 | GenTree* last = treeList->gtPrev; |
6253 | assert(last->gtNext == nullptr); |
6254 | |
6255 | last->gtNext = hoistStmt; |
6256 | hoistStmt->gtPrev = last; |
6257 | treeList->gtPrev = hoistStmt; |
6258 | } |
6259 | else |
6260 | { |
6261 | /* Empty pre-header - store the single statement in the block */ |
6262 | |
6263 | preHead->bbTreeList = hoistStmt; |
6264 | hoistStmt->gtPrev = hoistStmt; |
6265 | } |
6266 | |
6267 | hoistStmt->gtNext = nullptr; |
6268 | |
6269 | #ifdef DEBUG |
6270 | if (verbose) |
6271 | { |
6272 | printf("This hoisted copy placed in PreHeader (" FMT_BB "):\n" , preHead->bbNum); |
6273 | gtDispTree(hoist); |
6274 | } |
6275 | #endif |
6276 | |
6277 | if (fgStmtListThreaded) |
6278 | { |
6279 | gtSetStmtInfo(hoistStmt); |
6280 | fgSetStmtSeq(hoistStmt); |
6281 | } |
6282 | |
6283 | #ifdef DEBUG |
6284 | if (m_nodeTestData != nullptr) |
6285 | { |
6286 | |
6287 | // What is the depth of the loop "lnum"? |
6288 | ssize_t depth = 0; |
6289 | unsigned lnumIter = lnum; |
6290 | while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP) |
6291 | { |
6292 | depth++; |
6293 | lnumIter = optLoopTable[lnumIter].lpParent; |
6294 | } |
6295 | |
6296 | NodeToTestDataMap* testData = GetNodeTestData(); |
6297 | |
6298 | TestLabelAndNum tlAndN; |
6299 | if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist) |
6300 | { |
6301 | if (tlAndN.m_num == -1) |
6302 | { |
6303 | printf("Node " ); |
6304 | printTreeID(origExpr); |
6305 | printf(" was declared 'do not hoist', but is being hoisted.\n" ); |
6306 | assert(false); |
6307 | } |
6308 | else if (tlAndN.m_num != depth) |
6309 | { |
6310 | printf("Node " ); |
6311 | printTreeID(origExpr); |
6312 | printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth " |
6313 | "%d.\n" , |
6314 | tlAndN.m_num, depth); |
6315 | assert(false); |
6316 | } |
6317 | else |
6318 | { |
6319 | // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must |
6320 | // hoist" annotations. |
6321 | testData->Remove(origExpr); |
6322 | // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd. |
6323 | tlAndN.m_tl = TL_CSE_Def; |
6324 | tlAndN.m_num = m_loopHoistCSEClass++; |
6325 | testData->Set(hoistExpr, tlAndN); |
6326 | } |
6327 | } |
6328 | } |
6329 | #endif |
6330 | |
6331 | #if LOOP_HOIST_STATS |
6332 | if (!m_curLoopHasHoistedExpression) |
6333 | { |
6334 | m_loopsWithHoistedExpressions++; |
6335 | m_curLoopHasHoistedExpression = true; |
6336 | } |
6337 | m_totalHoistedExpressions++; |
6338 | #endif // LOOP_HOIST_STATS |
6339 | } |
6340 | |
6341 | void Compiler::optHoistLoopCode() |
6342 | { |
6343 | // If we don't have any loops in the method then take an early out now. |
6344 | if (optLoopCount == 0) |
6345 | { |
6346 | return; |
6347 | } |
6348 | |
6349 | #ifdef DEBUG |
6350 | unsigned jitNoHoist = JitConfig.JitNoHoist(); |
6351 | if (jitNoHoist > 0) |
6352 | { |
6353 | return; |
6354 | } |
6355 | #endif |
6356 | |
6357 | #if 0 |
6358 | // The code in this #if has been useful in debugging loop cloning issues, by |
6359 | // enabling selective enablement of the loop cloning optimization according to |
6360 | // method hash. |
6361 | #ifdef DEBUG |
6362 | unsigned methHash = info.compMethodHash(); |
6363 | char* lostr = getenv("loophoisthashlo" ); |
6364 | unsigned methHashLo = 0; |
6365 | if (lostr != NULL) |
6366 | { |
6367 | sscanf_s(lostr, "%x" , &methHashLo); |
6368 | // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers. |
6369 | } |
6370 | char* histr = getenv("loophoisthashhi" ); |
6371 | unsigned methHashHi = UINT32_MAX; |
6372 | if (histr != NULL) |
6373 | { |
6374 | sscanf_s(histr, "%x" , &methHashHi); |
6375 | // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. |
6376 | } |
6377 | if (methHash < methHashLo || methHash > methHashHi) |
6378 | return; |
6379 | printf("Doing loop hoisting in %s (0x%x).\n" , info.compFullName, methHash); |
6380 | #endif // DEBUG |
6381 | #endif // 0 -- debugging loop cloning issues |
6382 | |
6383 | #ifdef DEBUG |
6384 | if (verbose) |
6385 | { |
6386 | printf("\n*************** In optHoistLoopCode()\n" ); |
6387 | printf("Blocks/Trees before phase\n" ); |
6388 | fgDispBasicBlocks(true); |
6389 | printf("" ); |
6390 | } |
6391 | #endif |
6392 | |
6393 | // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which |
6394 | // they are invariant.) |
6395 | LoopHoistContext hoistCtxt(this); |
6396 | for (unsigned lnum = 0; lnum < optLoopCount; lnum++) |
6397 | { |
6398 | if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) |
6399 | { |
6400 | continue; |
6401 | } |
6402 | |
6403 | if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) |
6404 | { |
6405 | optHoistLoopNest(lnum, &hoistCtxt); |
6406 | } |
6407 | } |
6408 | |
6409 | #if DEBUG |
6410 | if (fgModified) |
6411 | { |
6412 | if (verbose) |
6413 | { |
6414 | printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n" ); |
6415 | fgDispBasicBlocks(true); |
6416 | printf("" ); |
6417 | } |
6418 | |
6419 | // Make sure that the predecessor lists are accurate |
6420 | fgDebugCheckBBlist(); |
6421 | } |
6422 | #endif |
6423 | |
6424 | #ifdef DEBUG |
6425 | // Test Data stuff.. |
6426 | // If we have no test data, early out. |
6427 | if (m_nodeTestData == nullptr) |
6428 | { |
6429 | return; |
6430 | } |
6431 | NodeToTestDataMap* testData = GetNodeTestData(); |
6432 | for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki) |
6433 | { |
6434 | TestLabelAndNum tlAndN; |
6435 | GenTree* node = ki.Get(); |
6436 | bool b = testData->Lookup(node, &tlAndN); |
6437 | assert(b); |
6438 | if (tlAndN.m_tl != TL_LoopHoist) |
6439 | { |
6440 | continue; |
6441 | } |
6442 | // Otherwise, it is a loop hoist annotation. |
6443 | assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved. |
6444 | if (tlAndN.m_num >= 0) |
6445 | { |
6446 | printf("Node " ); |
6447 | printTreeID(node); |
6448 | printf(" was declared 'must hoist', but has not been hoisted.\n" ); |
6449 | assert(false); |
6450 | } |
6451 | } |
6452 | #endif // DEBUG |
6453 | } |
6454 | |
6455 | void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) |
6456 | { |
6457 | // Do this loop, then recursively do all nested loops. |
6458 | CLANG_FORMAT_COMMENT_ANCHOR; |
6459 | |
6460 | #if LOOP_HOIST_STATS |
6461 | // Record stats |
6462 | m_curLoopHasHoistedExpression = false; |
6463 | m_loopsConsidered++; |
6464 | #endif // LOOP_HOIST_STATS |
6465 | |
6466 | optHoistThisLoop(lnum, hoistCtxt); |
6467 | |
6468 | VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); |
6469 | |
6470 | if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) |
6471 | { |
6472 | // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops. |
6473 | // TODO-Cleanup: we should have a set abstraction for loops. |
6474 | if (hoistedInCurLoop != nullptr) |
6475 | { |
6476 | for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) |
6477 | { |
6478 | #ifdef DEBUG |
6479 | bool b; |
6480 | assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b)); |
6481 | #endif |
6482 | hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true); |
6483 | } |
6484 | } |
6485 | |
6486 | for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; |
6487 | child = optLoopTable[child].lpSibling) |
6488 | { |
6489 | optHoistLoopNest(child, hoistCtxt); |
6490 | } |
6491 | |
6492 | // Now remove them. |
6493 | // TODO-Cleanup: we should have a set abstraction for loops. |
6494 | if (hoistedInCurLoop != nullptr) |
6495 | { |
6496 | for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) |
6497 | { |
6498 | // Note that we asserted when we added these that they hadn't been members, so removing is appropriate. |
6499 | hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get()); |
6500 | } |
6501 | } |
6502 | } |
6503 | } |
6504 | |
6505 | void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) |
6506 | { |
6507 | LoopDsc* pLoopDsc = &optLoopTable[lnum]; |
6508 | |
6509 | /* If loop was removed continue */ |
6510 | |
6511 | if (pLoopDsc->lpFlags & LPFLG_REMOVED) |
6512 | { |
6513 | return; |
6514 | } |
6515 | |
6516 | /* Get the head and tail of the loop */ |
6517 | |
6518 | BasicBlock* head = pLoopDsc->lpHead; |
6519 | BasicBlock* tail = pLoopDsc->lpBottom; |
6520 | BasicBlock* lbeg = pLoopDsc->lpEntry; |
6521 | |
6522 | // We must have a do-while loop |
6523 | if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0) |
6524 | { |
6525 | return; |
6526 | } |
6527 | |
6528 | // The loop-head must dominate the loop-entry. |
6529 | // TODO-CQ: Couldn't we make this true if it's not? |
6530 | if (!fgDominate(head, lbeg)) |
6531 | { |
6532 | return; |
6533 | } |
6534 | |
6535 | // if lbeg is the start of a new try block then we won't be able to hoist |
6536 | if (!BasicBlock::sameTryRegion(head, lbeg)) |
6537 | { |
6538 | return; |
6539 | } |
6540 | |
6541 | // We don't bother hoisting when inside of a catch block |
6542 | if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY)) |
6543 | { |
6544 | return; |
6545 | } |
6546 | |
6547 | pLoopDsc->lpFlags |= LPFLG_HOISTABLE; |
6548 | |
6549 | unsigned begn = lbeg->bbNum; |
6550 | unsigned endn = tail->bbNum; |
6551 | |
6552 | // Ensure the per-loop sets/tables are empty. |
6553 | hoistCtxt->m_curLoopVnInvariantCache.RemoveAll(); |
6554 | |
6555 | #ifdef DEBUG |
6556 | if (verbose) |
6557 | { |
6558 | printf("optHoistLoopCode for loop L%02u <" FMT_BB ".." FMT_BB ">:\n" , lnum, begn, endn); |
6559 | printf(" Loop body %s a call\n" , pLoopDsc->lpContainsCall ? "contains" : "does not contain" ); |
6560 | } |
6561 | #endif |
6562 | |
6563 | VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef)); |
6564 | |
6565 | pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut); |
6566 | pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars); |
6567 | pLoopDsc->lpHoistedExprCount = 0; |
6568 | |
6569 | #ifndef _TARGET_64BIT_ |
6570 | unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars); |
6571 | |
6572 | if (longVarsCount > 0) |
6573 | { |
6574 | // Since 64-bit variables take up two registers on 32-bit targets, we increase |
6575 | // the Counts such that each TYP_LONG variable counts twice. |
6576 | // |
6577 | VARSET_TP loopLongVars(VarSetOps::Intersection(this, loopVars, lvaLongVars)); |
6578 | VARSET_TP inOutLongVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars)); |
6579 | |
6580 | #ifdef DEBUG |
6581 | if (verbose) |
6582 | { |
6583 | printf("\n LONGVARS(%d)=" , VarSetOps::Count(this, lvaLongVars)); |
6584 | lvaDispVarSet(lvaLongVars); |
6585 | } |
6586 | #endif |
6587 | pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars); |
6588 | pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars); |
6589 | } |
6590 | #endif // !_TARGET_64BIT_ |
6591 | |
6592 | #ifdef DEBUG |
6593 | if (verbose) |
6594 | { |
6595 | printf("\n USEDEF (%d)=" , VarSetOps::Count(this, pLoopDsc->lpVarUseDef)); |
6596 | lvaDispVarSet(pLoopDsc->lpVarUseDef); |
6597 | |
6598 | printf("\n INOUT (%d)=" , pLoopDsc->lpVarInOutCount); |
6599 | lvaDispVarSet(pLoopDsc->lpVarInOut); |
6600 | |
6601 | printf("\n LOOPVARS(%d)=" , pLoopDsc->lpLoopVarCount); |
6602 | lvaDispVarSet(loopVars); |
6603 | printf("\n" ); |
6604 | } |
6605 | #endif |
6606 | |
6607 | unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars); |
6608 | |
6609 | if (floatVarsCount > 0) |
6610 | { |
6611 | VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars)); |
6612 | VARSET_TP inOutFPVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars)); |
6613 | |
6614 | pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars); |
6615 | pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars); |
6616 | pLoopDsc->lpHoistedFPExprCount = 0; |
6617 | |
6618 | pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount; |
6619 | pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount; |
6620 | |
6621 | #ifdef DEBUG |
6622 | if (verbose) |
6623 | { |
6624 | printf(" INOUT-FP(%d)=" , pLoopDsc->lpVarInOutFPCount); |
6625 | lvaDispVarSet(inOutFPVars); |
6626 | |
6627 | printf("\n LOOPV-FP(%d)=" , pLoopDsc->lpLoopVarFPCount); |
6628 | lvaDispVarSet(loopFPVars); |
6629 | } |
6630 | #endif |
6631 | } |
6632 | else // (floatVarsCount == 0) |
6633 | { |
6634 | pLoopDsc->lpLoopVarFPCount = 0; |
6635 | pLoopDsc->lpVarInOutFPCount = 0; |
6636 | pLoopDsc->lpHoistedFPExprCount = 0; |
6637 | } |
6638 | |
6639 | // Find the set of definitely-executed blocks. |
6640 | // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block. |
6641 | // Until we have post-dominators, we'll special-case for single-exit blocks. |
6642 | JitExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist()); |
6643 | if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT) |
6644 | { |
6645 | assert(pLoopDsc->lpExit != nullptr); |
6646 | BasicBlock* cur = pLoopDsc->lpExit; |
6647 | // Push dominators, until we reach "entry" or exit the loop. |
6648 | while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) |
6649 | { |
6650 | defExec.Push(cur); |
6651 | cur = cur->bbIDom; |
6652 | } |
6653 | // If we didn't reach the entry block, give up and *just* push the entry block. |
6654 | if (cur != pLoopDsc->lpEntry) |
6655 | { |
6656 | defExec.Reset(); |
6657 | } |
6658 | defExec.Push(pLoopDsc->lpEntry); |
6659 | } |
6660 | else // More than one exit |
6661 | { |
6662 | // We'll assume that only the entry block is definitely executed. |
6663 | // We could in the future do better. |
6664 | defExec.Push(pLoopDsc->lpEntry); |
6665 | } |
6666 | |
6667 | while (defExec.Size() > 0) |
6668 | { |
6669 | // Consider in reverse order: dominator before dominatee. |
6670 | BasicBlock* blk = defExec.Pop(); |
6671 | optHoistLoopExprsForBlock(blk, lnum, hoistCtxt); |
6672 | } |
6673 | } |
6674 | |
6675 | // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum". |
6676 | void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt) |
6677 | { |
6678 | LoopDsc* pLoopDsc = &optLoopTable[lnum]; |
6679 | bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry); |
6680 | unsigned blkWeight = blk->getBBWeight(this); |
6681 | |
6682 | #ifdef DEBUG |
6683 | if (verbose) |
6684 | { |
6685 | printf(" optHoistLoopExprsForBlock " FMT_BB " (weight=%6s) of loop L%02u <" FMT_BB ".." FMT_BB |
6686 | ">, firstBlock is %s\n" , |
6687 | blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum, |
6688 | firstBlockAndBeforeSideEffect ? "true" : "false" ); |
6689 | if (blkWeight < (BB_UNITY_WEIGHT / 10)) |
6690 | { |
6691 | printf(" block weight is too small to perform hoisting.\n" ); |
6692 | } |
6693 | } |
6694 | #endif |
6695 | |
6696 | if (blkWeight < (BB_UNITY_WEIGHT / 10)) |
6697 | { |
6698 | // Block weight is too small to perform hoisting. |
6699 | return; |
6700 | } |
6701 | |
6702 | for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) |
6703 | { |
6704 | GenTree* stmtTree = stmt->gtStmtExpr; |
6705 | bool hoistable; |
6706 | bool cctorDependent; |
6707 | (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable, |
6708 | &cctorDependent); |
6709 | if (hoistable) |
6710 | { |
6711 | // we will try to hoist the top-level stmtTree |
6712 | optHoistCandidate(stmtTree, lnum, hoistCtxt); |
6713 | } |
6714 | } |
6715 | } |
6716 | |
6717 | bool Compiler::optIsProfitableToHoistableTree(GenTree* tree, unsigned lnum) |
6718 | { |
6719 | LoopDsc* pLoopDsc = &optLoopTable[lnum]; |
6720 | |
6721 | bool loopContainsCall = pLoopDsc->lpContainsCall; |
6722 | |
6723 | int availRegCount; |
6724 | int hoistedExprCount; |
6725 | int loopVarCount; |
6726 | int varInOutCount; |
6727 | |
6728 | if (varTypeIsFloating(tree->TypeGet())) |
6729 | { |
6730 | hoistedExprCount = pLoopDsc->lpHoistedFPExprCount; |
6731 | loopVarCount = pLoopDsc->lpLoopVarFPCount; |
6732 | varInOutCount = pLoopDsc->lpVarInOutFPCount; |
6733 | |
6734 | availRegCount = CNT_CALLEE_SAVED_FLOAT; |
6735 | if (!loopContainsCall) |
6736 | { |
6737 | availRegCount += CNT_CALLEE_TRASH_FLOAT - 1; |
6738 | } |
6739 | #ifdef _TARGET_ARM_ |
6740 | // For ARM each double takes two FP registers |
6741 | // For now on ARM we won't track singles/doubles |
6742 | // and instead just assume that we always have doubles. |
6743 | // |
6744 | availRegCount /= 2; |
6745 | #endif |
6746 | } |
6747 | else |
6748 | { |
6749 | hoistedExprCount = pLoopDsc->lpHoistedExprCount; |
6750 | loopVarCount = pLoopDsc->lpLoopVarCount; |
6751 | varInOutCount = pLoopDsc->lpVarInOutCount; |
6752 | |
6753 | availRegCount = CNT_CALLEE_SAVED - 1; |
6754 | if (!loopContainsCall) |
6755 | { |
6756 | availRegCount += CNT_CALLEE_TRASH - 1; |
6757 | } |
6758 | #ifndef _TARGET_64BIT_ |
6759 | // For our 32-bit targets Long types take two registers. |
6760 | if (varTypeIsLong(tree->TypeGet())) |
6761 | { |
6762 | availRegCount = (availRegCount + 1) / 2; |
6763 | } |
6764 | #endif |
6765 | } |
6766 | |
6767 | // decrement the availRegCount by the count of expression that we have already hoisted. |
6768 | availRegCount -= hoistedExprCount; |
6769 | |
6770 | // the variables that are read/written inside the loop should |
6771 | // always be a subset of the InOut variables for the loop |
6772 | assert(loopVarCount <= varInOutCount); |
6773 | |
6774 | // When loopVarCount >= availRegCount we believe that all of the |
6775 | // available registers will get used to hold LclVars inside the loop. |
6776 | // This pessimistically assumes that each loopVar has a conflicting |
6777 | // lifetime with every other loopVar. |
6778 | // For this case we will hoist the expression only if is profitable |
6779 | // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX) |
6780 | // as we believe it will be placed in the stack or one of the other |
6781 | // loopVars will be spilled into the stack |
6782 | // |
6783 | if (loopVarCount >= availRegCount) |
6784 | { |
6785 | // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX) |
6786 | if (tree->gtCostEx < (2 * IND_COST_EX)) |
6787 | { |
6788 | return false; |
6789 | } |
6790 | } |
6791 | |
6792 | // When varInOutCount < availRegCount we are know that there are |
6793 | // some available register(s) when we enter the loop body. |
6794 | // When varInOutCount == availRegCount there often will be a register |
6795 | // available when we enter the loop body, since a loop often defines a |
6796 | // LclVar on exit or there is often at least one LclVar that is worth |
6797 | // spilling to the stack to make way for this hoisted expression. |
6798 | // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST |
6799 | // |
6800 | if (varInOutCount > availRegCount) |
6801 | { |
6802 | // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST |
6803 | if (tree->gtCostEx <= MIN_CSE_COST + 1) |
6804 | { |
6805 | return false; |
6806 | } |
6807 | } |
6808 | |
6809 | return true; |
6810 | } |
6811 | |
6812 | // |
6813 | // This function returns true if 'tree' is a loop invariant expression. |
6814 | // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block, |
6815 | // and sets '*pCctorDependent' if 'tree' is a function of a static field that must not be |
6816 | // hoisted (even if '*pHoistable' is true) unless a preceding corresponding cctor init helper |
6817 | // call is also hoisted. |
6818 | // |
6819 | bool Compiler::optHoistLoopExprsForTree(GenTree* tree, |
6820 | unsigned lnum, |
6821 | LoopHoistContext* hoistCtxt, |
6822 | bool* pFirstBlockAndBeforeSideEffect, |
6823 | bool* pHoistable, |
6824 | bool* pCctorDependent) |
6825 | { |
6826 | // First do the children. |
6827 | // We must keep track of whether each child node was hoistable or not |
6828 | // |
6829 | unsigned nChildren = tree->NumChildren(); |
6830 | bool childrenHoistable[GenTree::MAX_CHILDREN]; |
6831 | bool childrenCctorDependent[GenTree::MAX_CHILDREN]; |
6832 | |
6833 | // Initialize the array elements for childrenHoistable[] to false |
6834 | for (unsigned i = 0; i < nChildren; i++) |
6835 | { |
6836 | childrenHoistable[i] = false; |
6837 | childrenCctorDependent[i] = false; |
6838 | } |
6839 | |
6840 | // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees. |
6841 | // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is |
6842 | // truly dependent on the cctor. So a more precise approach would be to separately propagate |
6843 | // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput; |
6844 | // the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns |
6845 | // false for constants. |
6846 | bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != 0)) || |
6847 | (tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0))); |
6848 | bool treeIsInvariant = true; |
6849 | for (unsigned childNum = 0; childNum < nChildren; childNum++) |
6850 | { |
6851 | if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, |
6852 | &childrenHoistable[childNum], &childrenCctorDependent[childNum])) |
6853 | { |
6854 | treeIsInvariant = false; |
6855 | } |
6856 | |
6857 | if (childrenCctorDependent[childNum]) |
6858 | { |
6859 | // Normally, a parent of a cctor-dependent tree is also cctor-dependent. |
6860 | treeIsCctorDependent = true; |
6861 | |
6862 | // Check for the case where we can stop propagating cctor-dependent upwards. |
6863 | if (tree->OperIs(GT_COMMA) && (childNum == 1)) |
6864 | { |
6865 | GenTree* op1 = tree->gtGetOp1(); |
6866 | if (op1->OperIs(GT_CALL)) |
6867 | { |
6868 | GenTreeCall* call = op1->AsCall(); |
6869 | if ((call->gtCallType == CT_HELPER) && |
6870 | s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd))) |
6871 | { |
6872 | // Hoisting the comma is ok because it would hoist the initialization along |
6873 | // with the static field reference. |
6874 | treeIsCctorDependent = false; |
6875 | // Hoisting the static field without hoisting the initialization would be |
6876 | // incorrect, make sure we consider the field (which we flagged as |
6877 | // cctor-dependent) non-hoistable. |
6878 | noway_assert(!childrenHoistable[childNum]); |
6879 | } |
6880 | } |
6881 | } |
6882 | } |
6883 | } |
6884 | |
6885 | // If all the children of "tree" are hoistable, then "tree" itself can be hoisted, |
6886 | // unless it has a static var reference that can't be hoisted past its cctor call. |
6887 | bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent; |
6888 | |
6889 | // But we must see if anything else prevents "tree" from being hoisted. |
6890 | // |
6891 | if (treeIsInvariant) |
6892 | { |
6893 | // Tree must be a suitable CSE candidate for us to be able to hoist it. |
6894 | treeIsHoistable &= optIsCSEcandidate(tree); |
6895 | |
6896 | // If it's a call, it must be a helper call, and be pure. |
6897 | // Further, if it may run a cctor, it must be labeled as "Hoistable" |
6898 | // (meaning it won't run a cctor because the class is not precise-init). |
6899 | if (treeIsHoistable && tree->OperGet() == GT_CALL) |
6900 | { |
6901 | GenTreeCall* call = tree->AsCall(); |
6902 | if (call->gtCallType != CT_HELPER) |
6903 | { |
6904 | treeIsHoistable = false; |
6905 | } |
6906 | else |
6907 | { |
6908 | CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); |
6909 | if (!s_helperCallProperties.IsPure(helpFunc)) |
6910 | { |
6911 | treeIsHoistable = false; |
6912 | } |
6913 | else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0) |
6914 | { |
6915 | treeIsHoistable = false; |
6916 | } |
6917 | } |
6918 | } |
6919 | |
6920 | if (treeIsHoistable) |
6921 | { |
6922 | if (!(*pFirstBlockAndBeforeSideEffect)) |
6923 | { |
6924 | // For now, we give up on an expression that might raise an exception if it is after the |
6925 | // first possible global side effect (and we assume we're after that if we're not in the first block). |
6926 | // TODO-CQ: this is when we might do loop cloning. |
6927 | // |
6928 | if ((tree->gtFlags & GTF_EXCEPT) != 0) |
6929 | { |
6930 | treeIsHoistable = false; |
6931 | } |
6932 | } |
6933 | } |
6934 | |
6935 | // Is the value of the whole tree loop invariant? |
6936 | treeIsInvariant = |
6937 | optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache); |
6938 | |
6939 | // Is the value of the whole tree loop invariant? |
6940 | if (!treeIsInvariant) |
6941 | { |
6942 | treeIsHoistable = false; |
6943 | } |
6944 | } |
6945 | |
6946 | // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false. |
6947 | // If we encounter a tree with a call in it |
6948 | // or if we see an assignment to global we set it to false. |
6949 | // |
6950 | // If we are already set to false then we can skip these checks |
6951 | // |
6952 | if (*pFirstBlockAndBeforeSideEffect) |
6953 | { |
6954 | // For this purpose, we only care about memory side effects. We assume that expressions will |
6955 | // be hoisted so that they are evaluated in the same order as they would have been in the loop, |
6956 | // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS |
6957 | // here, since that includes exceptions.) |
6958 | if (tree->IsCall()) |
6959 | { |
6960 | // If it's a call, it must be a helper call that does not mutate the heap. |
6961 | // Further, if it may run a cctor, it must be labeled as "Hoistable" |
6962 | // (meaning it won't run a cctor because the class is not precise-init). |
6963 | GenTreeCall* call = tree->AsCall(); |
6964 | if (call->gtCallType != CT_HELPER) |
6965 | { |
6966 | *pFirstBlockAndBeforeSideEffect = false; |
6967 | } |
6968 | else |
6969 | { |
6970 | CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); |
6971 | if (s_helperCallProperties.MutatesHeap(helpFunc)) |
6972 | { |
6973 | *pFirstBlockAndBeforeSideEffect = false; |
6974 | } |
6975 | else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0) |
6976 | { |
6977 | *pFirstBlockAndBeforeSideEffect = false; |
6978 | } |
6979 | } |
6980 | } |
6981 | else if (tree->OperIs(GT_ASG)) |
6982 | { |
6983 | // If the LHS of the assignment has a global reference, then assume it's a global side effect. |
6984 | GenTree* lhs = tree->gtOp.gtOp1; |
6985 | if (lhs->gtFlags & GTF_GLOB_REF) |
6986 | { |
6987 | *pFirstBlockAndBeforeSideEffect = false; |
6988 | } |
6989 | } |
6990 | else if (tree->OperIsCopyBlkOp()) |
6991 | { |
6992 | GenTree* args = tree->gtOp.gtOp1; |
6993 | assert(args->OperGet() == GT_LIST); |
6994 | if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) |
6995 | { |
6996 | *pFirstBlockAndBeforeSideEffect = false; |
6997 | } |
6998 | } |
6999 | } |
7000 | |
7001 | // If this 'tree' is hoistable then we return and the caller will |
7002 | // decide to hoist it as part of larger hoistable expression. |
7003 | // |
7004 | if (!treeIsHoistable) |
7005 | { |
7006 | // We are not hoistable so we will now hoist any hoistable children. |
7007 | // |
7008 | for (unsigned childNum = 0; childNum < nChildren; childNum++) |
7009 | { |
7010 | if (childrenHoistable[childNum]) |
7011 | { |
7012 | // We can't hoist the LHS of an assignment, isn't a real use. |
7013 | if ((childNum == 0) && tree->OperIs(GT_ASG)) |
7014 | { |
7015 | continue; |
7016 | } |
7017 | |
7018 | GenTree* child = tree->GetChild(childNum); |
7019 | |
7020 | // We try to hoist this 'child' tree |
7021 | optHoistCandidate(child, lnum, hoistCtxt); |
7022 | } |
7023 | } |
7024 | } |
7025 | |
7026 | *pHoistable = treeIsHoistable; |
7027 | *pCctorDependent = treeIsCctorDependent; |
7028 | return treeIsInvariant; |
7029 | } |
7030 | |
7031 | void Compiler::optHoistCandidate(GenTree* tree, unsigned lnum, LoopHoistContext* hoistCtxt) |
7032 | { |
7033 | if (lnum == BasicBlock::NOT_IN_LOOP) |
7034 | { |
7035 | // The hoisted expression isn't valid at any loop head so don't hoist this expression. |
7036 | return; |
7037 | } |
7038 | |
7039 | // The outer loop also must be suitable for hoisting... |
7040 | if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0) |
7041 | { |
7042 | return; |
7043 | } |
7044 | |
7045 | // If the hoisted expression isn't valid at this loop head then break |
7046 | if (!optTreeIsValidAtLoopHead(tree, lnum)) |
7047 | { |
7048 | return; |
7049 | } |
7050 | |
7051 | // It must pass the hoistable profitablity tests for this loop level |
7052 | if (!optIsProfitableToHoistableTree(tree, lnum)) |
7053 | { |
7054 | return; |
7055 | } |
7056 | |
7057 | bool b; |
7058 | if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b)) |
7059 | { |
7060 | // already hoisted in a parent loop, so don't hoist this expression. |
7061 | return; |
7062 | } |
7063 | |
7064 | if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b)) |
7065 | { |
7066 | // already hoisted this expression in the current loop, so don't hoist this expression. |
7067 | return; |
7068 | } |
7069 | |
7070 | // Expression can be hoisted |
7071 | optPerformHoistExpr(tree, lnum); |
7072 | |
7073 | // Increment lpHoistedExprCount or lpHoistedFPExprCount |
7074 | if (!varTypeIsFloating(tree->TypeGet())) |
7075 | { |
7076 | optLoopTable[lnum].lpHoistedExprCount++; |
7077 | #ifndef _TARGET_64BIT_ |
7078 | // For our 32-bit targets Long types take two registers. |
7079 | if (varTypeIsLong(tree->TypeGet())) |
7080 | { |
7081 | optLoopTable[lnum].lpHoistedExprCount++; |
7082 | } |
7083 | #endif |
7084 | } |
7085 | else // Floating point expr hoisted |
7086 | { |
7087 | optLoopTable[lnum].lpHoistedFPExprCount++; |
7088 | } |
7089 | |
7090 | // Record the hoisted expression in hoistCtxt |
7091 | hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); |
7092 | } |
7093 | |
7094 | bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache) |
7095 | { |
7096 | // If it is not a VN, is not loop-invariant. |
7097 | if (vn == ValueNumStore::NoVN) |
7098 | { |
7099 | return false; |
7100 | } |
7101 | |
7102 | // We'll always short-circuit constants. |
7103 | if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid()) |
7104 | { |
7105 | return true; |
7106 | } |
7107 | |
7108 | // If we've done this query previously, don't repeat. |
7109 | bool previousRes = false; |
7110 | if (loopVnInvariantCache->Lookup(vn, &previousRes)) |
7111 | { |
7112 | return previousRes; |
7113 | } |
7114 | |
7115 | bool res = true; |
7116 | VNFuncApp funcApp; |
7117 | if (vnStore->GetVNFunc(vn, &funcApp)) |
7118 | { |
7119 | if (funcApp.m_func == VNF_PhiDef) |
7120 | { |
7121 | // First, make sure it's a "proper" phi -- the definition is a Phi application. |
7122 | VNFuncApp phiDefValFuncApp; |
7123 | if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi) |
7124 | { |
7125 | // It's not *really* a definition, rather a pass-through of some other VN. |
7126 | // (This could occur, say if both sides of an if-then-else diamond made the |
7127 | // same assignment to a variable.) |
7128 | res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache); |
7129 | } |
7130 | else |
7131 | { |
7132 | // Is the definition within the loop? If so, is not loop-invariant. |
7133 | unsigned lclNum = funcApp.m_args[0]; |
7134 | unsigned ssaNum = funcApp.m_args[1]; |
7135 | LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum); |
7136 | res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum); |
7137 | } |
7138 | } |
7139 | else if (funcApp.m_func == VNF_PhiMemoryDef) |
7140 | { |
7141 | BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0])); |
7142 | res = !optLoopContains(lnum, defnBlk->bbNatLoopNum); |
7143 | } |
7144 | else |
7145 | { |
7146 | for (unsigned i = 0; i < funcApp.m_arity; i++) |
7147 | { |
7148 | // TODO-CQ: We need to either make sure that *all* VN functions |
7149 | // always take VN args, or else have a list of arg positions to exempt, as implicitly |
7150 | // constant. |
7151 | if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache)) |
7152 | { |
7153 | res = false; |
7154 | break; |
7155 | } |
7156 | } |
7157 | } |
7158 | } |
7159 | else |
7160 | { |
7161 | // Non-function "new, unique" VN's may be annotated with the loop nest where |
7162 | // their definition occurs. |
7163 | BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn); |
7164 | |
7165 | if (vnLoopNum == MAX_LOOP_NUM) |
7166 | { |
7167 | res = false; |
7168 | } |
7169 | else |
7170 | { |
7171 | res = !optLoopContains(lnum, vnLoopNum); |
7172 | } |
7173 | } |
7174 | |
7175 | loopVnInvariantCache->Set(vn, res); |
7176 | return res; |
7177 | } |
7178 | |
7179 | bool Compiler::optTreeIsValidAtLoopHead(GenTree* tree, unsigned lnum) |
7180 | { |
7181 | if (tree->OperIsLocal()) |
7182 | { |
7183 | GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon(); |
7184 | unsigned lclNum = lclVar->gtLclNum; |
7185 | |
7186 | // The lvlVar must be have an Ssa tracked lifetime |
7187 | if (!lvaInSsa(lclNum)) |
7188 | { |
7189 | return false; |
7190 | } |
7191 | |
7192 | // If the loop does not contains the SSA def we can hoist it. |
7193 | if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk)) |
7194 | { |
7195 | return true; |
7196 | } |
7197 | } |
7198 | else if (tree->OperIsConst()) |
7199 | { |
7200 | return true; |
7201 | } |
7202 | else // If every one of the children nodes are valid at this Loop's Head. |
7203 | { |
7204 | unsigned nChildren = tree->NumChildren(); |
7205 | for (unsigned childNum = 0; childNum < nChildren; childNum++) |
7206 | { |
7207 | if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum)) |
7208 | { |
7209 | return false; |
7210 | } |
7211 | } |
7212 | return true; |
7213 | } |
7214 | return false; |
7215 | } |
7216 | |
7217 | /***************************************************************************** |
7218 | * |
7219 | * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE |
7220 | * header. The pre-header will replace the current lpHead in the loop table. |
7221 | * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead |
7222 | * will also be dominated by the loop-top, lpHead->bbNext. |
7223 | * |
7224 | */ |
7225 | |
7226 | void Compiler::(unsigned lnum) |
7227 | { |
7228 | LoopDsc* pLoopDsc = &optLoopTable[lnum]; |
7229 | |
7230 | /* This loop has to be a "do-while" loop */ |
7231 | |
7232 | assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE); |
7233 | |
7234 | /* Have we already created a loop-preheader block? */ |
7235 | |
7236 | if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD) |
7237 | { |
7238 | return; |
7239 | } |
7240 | |
7241 | BasicBlock* head = pLoopDsc->lpHead; |
7242 | BasicBlock* top = pLoopDsc->lpTop; |
7243 | BasicBlock* entry = pLoopDsc->lpEntry; |
7244 | |
7245 | // if 'entry' and 'head' are in different try regions then we won't be able to hoist |
7246 | if (!BasicBlock::sameTryRegion(head, entry)) |
7247 | { |
7248 | return; |
7249 | } |
7250 | |
7251 | // Ensure that lpHead always dominates lpEntry |
7252 | |
7253 | noway_assert(fgDominate(head, entry)); |
7254 | |
7255 | /* Get hold of the first block of the loop body */ |
7256 | |
7257 | assert(top == entry); |
7258 | |
7259 | /* Allocate a new basic block */ |
7260 | |
7261 | BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE); |
7262 | preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER; |
7263 | |
7264 | // Must set IL code offset |
7265 | preHead->bbCodeOffs = top->bbCodeOffs; |
7266 | |
7267 | // Set the default value of the preHead weight in case we don't have |
7268 | // valid profile data and since this blocks weight is just an estimate |
7269 | // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head. |
7270 | // |
7271 | preHead->inheritWeight(head); |
7272 | preHead->bbFlags &= ~BBF_PROF_WEIGHT; |
7273 | |
7274 | #ifdef DEBUG |
7275 | if (verbose) |
7276 | { |
7277 | printf("\nCreated PreHeader (" FMT_BB ") for loop L%02u (" FMT_BB " - " FMT_BB "), with weight = %s\n" , |
7278 | preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this))); |
7279 | } |
7280 | #endif |
7281 | |
7282 | // The preheader block is part of the containing loop (if any). |
7283 | preHead->bbNatLoopNum = pLoopDsc->lpParent; |
7284 | |
7285 | if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND)) |
7286 | { |
7287 | if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0)) |
7288 | { |
7289 | preHead->bbWeight = 0; |
7290 | preHead->bbFlags |= BBF_RUN_RARELY; |
7291 | } |
7292 | else |
7293 | { |
7294 | bool allValidProfileWeights = |
7295 | (head->hasProfileWeight() && head->bbJumpDest->hasProfileWeight() && head->bbNext->hasProfileWeight()); |
7296 | |
7297 | if (allValidProfileWeights) |
7298 | { |
7299 | double loopEnteredCount; |
7300 | double loopSkippedCount; |
7301 | |
7302 | if (fgHaveValidEdgeWeights) |
7303 | { |
7304 | flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head); |
7305 | flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head); |
7306 | noway_assert(edgeToNext != nullptr); |
7307 | noway_assert(edgeToJump != nullptr); |
7308 | |
7309 | loopEnteredCount = |
7310 | ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0; |
7311 | loopSkippedCount = |
7312 | ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0; |
7313 | } |
7314 | else |
7315 | { |
7316 | loopEnteredCount = (double)head->bbNext->bbWeight; |
7317 | loopSkippedCount = (double)head->bbJumpDest->bbWeight; |
7318 | } |
7319 | |
7320 | double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount); |
7321 | |
7322 | // Calculate a good approximation of the preHead's block weight |
7323 | unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5); |
7324 | preHead->setBBWeight(max(preHeadWeight, 1)); |
7325 | noway_assert(!preHead->isRunRarely()); |
7326 | } |
7327 | } |
7328 | } |
7329 | |
7330 | // Link in the preHead block. |
7331 | fgInsertBBbefore(top, preHead); |
7332 | |
7333 | // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting. |
7334 | // However, that is too expensive at this point. Instead, we update the phi |
7335 | // node block references, if we created pre-header block due to hoisting. |
7336 | // This is sufficient because any definition participating in SSA that flowed |
7337 | // into the phi via the loop header block will now flow through the preheader |
7338 | // block from the header block. |
7339 | |
7340 | for (GenTree* stmt = top->bbTreeList; stmt; stmt = stmt->gtNext) |
7341 | { |
7342 | GenTree* tree = stmt->gtStmt.gtStmtExpr; |
7343 | if (tree->OperGet() != GT_ASG) |
7344 | { |
7345 | break; |
7346 | } |
7347 | GenTree* op2 = tree->gtGetOp2(); |
7348 | if (op2->OperGet() != GT_PHI) |
7349 | { |
7350 | break; |
7351 | } |
7352 | GenTreeArgList* args = op2->gtGetOp1()->AsArgList(); |
7353 | while (args != nullptr) |
7354 | { |
7355 | GenTreePhiArg* phiArg = args->Current()->AsPhiArg(); |
7356 | if (phiArg->gtPredBB == head) |
7357 | { |
7358 | phiArg->gtPredBB = preHead; |
7359 | } |
7360 | args = args->Rest(); |
7361 | } |
7362 | } |
7363 | |
7364 | // The handler can't begin at the top of the loop. If it did, it would be incorrect |
7365 | // to set the handler index on the pre header without updating the exception table. |
7366 | noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top); |
7367 | |
7368 | // Update the EH table to make the hoisted block part of the loop's EH block. |
7369 | fgExtendEHRegionBefore(top); |
7370 | |
7371 | // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them |
7372 | // (e.g: hoisting expression in a loop with the same 'head' as this one) |
7373 | |
7374 | /* Update the loop entry */ |
7375 | |
7376 | pLoopDsc->lpHead = preHead; |
7377 | pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD; |
7378 | |
7379 | /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds |
7380 | All predecessors of 'beg', (which is the entry in the loop) |
7381 | now have to jump to 'preHead', unless they are dominated by 'head' */ |
7382 | |
7383 | preHead->bbRefs = 0; |
7384 | fgAddRefPred(preHead, head); |
7385 | bool checkNestedLoops = false; |
7386 | |
7387 | for (flowList* pred = top->bbPreds; pred; pred = pred->flNext) |
7388 | { |
7389 | BasicBlock* predBlock = pred->flBlock; |
7390 | |
7391 | if (fgDominate(top, predBlock)) |
7392 | { |
7393 | // note: if 'top' dominates predBlock, 'head' dominates predBlock too |
7394 | // (we know that 'head' dominates 'top'), but using 'top' instead of |
7395 | // 'head' in the test allows us to not enter here if 'predBlock == head' |
7396 | |
7397 | if (predBlock != pLoopDsc->lpBottom) |
7398 | { |
7399 | noway_assert(predBlock != head); |
7400 | checkNestedLoops = true; |
7401 | } |
7402 | continue; |
7403 | } |
7404 | |
7405 | switch (predBlock->bbJumpKind) |
7406 | { |
7407 | case BBJ_NONE: |
7408 | noway_assert(predBlock == head); |
7409 | break; |
7410 | |
7411 | case BBJ_COND: |
7412 | if (predBlock == head) |
7413 | { |
7414 | noway_assert(predBlock->bbJumpDest != top); |
7415 | break; |
7416 | } |
7417 | __fallthrough; |
7418 | |
7419 | case BBJ_ALWAYS: |
7420 | case BBJ_EHCATCHRET: |
7421 | noway_assert(predBlock->bbJumpDest == top); |
7422 | predBlock->bbJumpDest = preHead; |
7423 | preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; |
7424 | |
7425 | if (predBlock == head) |
7426 | { |
7427 | // This is essentially the same case of predBlock being a BBJ_NONE. We may not be |
7428 | // able to make this a BBJ_NONE if it's an internal block (for example, a leave). |
7429 | // Just break, pred will be removed after switch. |
7430 | } |
7431 | else |
7432 | { |
7433 | fgRemoveRefPred(top, predBlock); |
7434 | fgAddRefPred(preHead, predBlock); |
7435 | } |
7436 | break; |
7437 | |
7438 | case BBJ_SWITCH: |
7439 | unsigned jumpCnt; |
7440 | jumpCnt = predBlock->bbJumpSwt->bbsCount; |
7441 | BasicBlock** jumpTab; |
7442 | jumpTab = predBlock->bbJumpSwt->bbsDstTab; |
7443 | |
7444 | do |
7445 | { |
7446 | assert(*jumpTab); |
7447 | if ((*jumpTab) == top) |
7448 | { |
7449 | (*jumpTab) = preHead; |
7450 | |
7451 | fgRemoveRefPred(top, predBlock); |
7452 | fgAddRefPred(preHead, predBlock); |
7453 | preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; |
7454 | } |
7455 | } while (++jumpTab, --jumpCnt); |
7456 | |
7457 | default: |
7458 | noway_assert(!"Unexpected bbJumpKind" ); |
7459 | break; |
7460 | } |
7461 | } |
7462 | |
7463 | noway_assert(!fgGetPredForBlock(top, preHead)); |
7464 | fgRemoveRefPred(top, head); |
7465 | fgAddRefPred(top, preHead); |
7466 | |
7467 | /* |
7468 | If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop |
7469 | (other than the back-edge of the loop we are considering) then we likely have nested |
7470 | do-while loops with the same entry block and inserting the preheader block changes the head |
7471 | of all the nested loops. Now we will update this piece of information in the loop table, and |
7472 | mark all nested loops as having a preheader (the preheader block can be shared among all nested |
7473 | do-while loops with the same entry block). |
7474 | */ |
7475 | if (checkNestedLoops) |
7476 | { |
7477 | for (unsigned l = 0; l < optLoopCount; l++) |
7478 | { |
7479 | if (optLoopTable[l].lpHead == head) |
7480 | { |
7481 | noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead' |
7482 | noway_assert(optLoopTable[l].lpEntry == top); |
7483 | optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead); |
7484 | optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD; |
7485 | #ifdef DEBUG |
7486 | if (verbose) |
7487 | { |
7488 | printf("Same PreHeader (" FMT_BB ") can be used for loop L%02u (" FMT_BB " - " FMT_BB ")\n\n" , |
7489 | preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum); |
7490 | } |
7491 | #endif |
7492 | } |
7493 | } |
7494 | } |
7495 | } |
7496 | |
7497 | bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum) |
7498 | { |
7499 | for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent) |
7500 | { |
7501 | if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) |
7502 | { |
7503 | continue; |
7504 | } |
7505 | if (optLoopTable[lnum].lpEntry == blk) |
7506 | { |
7507 | *pLnum = lnum; |
7508 | return true; |
7509 | } |
7510 | } |
7511 | return false; |
7512 | } |
7513 | |
7514 | void Compiler::optComputeLoopSideEffects() |
7515 | { |
7516 | unsigned lnum; |
7517 | for (lnum = 0; lnum < optLoopCount; lnum++) |
7518 | { |
7519 | VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this)); |
7520 | VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this)); |
7521 | optLoopTable[lnum].lpContainsCall = false; |
7522 | } |
7523 | |
7524 | for (lnum = 0; lnum < optLoopCount; lnum++) |
7525 | { |
7526 | if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) |
7527 | { |
7528 | continue; |
7529 | } |
7530 | |
7531 | if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) |
7532 | { // Is outermost... |
7533 | optComputeLoopNestSideEffects(lnum); |
7534 | } |
7535 | } |
7536 | |
7537 | VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this)); |
7538 | #ifndef _TARGET_64BIT_ |
7539 | VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this)); |
7540 | #endif |
7541 | |
7542 | for (unsigned i = 0; i < lvaCount; i++) |
7543 | { |
7544 | LclVarDsc* varDsc = &lvaTable[i]; |
7545 | if (varDsc->lvTracked) |
7546 | { |
7547 | if (varTypeIsFloating(varDsc->lvType)) |
7548 | { |
7549 | VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex); |
7550 | } |
7551 | #ifndef _TARGET_64BIT_ |
7552 | else if (varTypeIsLong(varDsc->lvType)) |
7553 | { |
7554 | VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex); |
7555 | } |
7556 | #endif |
7557 | } |
7558 | } |
7559 | } |
7560 | |
7561 | void Compiler::optComputeLoopNestSideEffects(unsigned lnum) |
7562 | { |
7563 | assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost. |
7564 | BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext; |
7565 | for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext) |
7566 | { |
7567 | optComputeLoopSideEffectsOfBlock(bbInLoop); |
7568 | } |
7569 | } |
7570 | |
7571 | void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) |
7572 | { |
7573 | unsigned mostNestedLoop = blk->bbNatLoopNum; |
7574 | assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP); |
7575 | |
7576 | AddVariableLivenessAllContainingLoops(mostNestedLoop, blk); |
7577 | |
7578 | // MemoryKinds for which an in-loop call or store has arbitrary effects. |
7579 | MemoryKindSet memoryHavoc = emptyMemoryKindSet; |
7580 | |
7581 | // Now iterate over the remaining statements, and their trees. |
7582 | for (GenTree* stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext) |
7583 | { |
7584 | for (GenTree* tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext) |
7585 | { |
7586 | genTreeOps oper = tree->OperGet(); |
7587 | |
7588 | // Even after we set memoryHavoc we still may want to know if a loop contains calls |
7589 | if (memoryHavoc == fullMemoryKindSet) |
7590 | { |
7591 | if (oper == GT_CALL) |
7592 | { |
7593 | // Record that this loop contains a call |
7594 | AddContainsCallAllContainingLoops(mostNestedLoop); |
7595 | } |
7596 | |
7597 | // If we just set lpContainsCall or it was previously set |
7598 | if (optLoopTable[mostNestedLoop].lpContainsCall) |
7599 | { |
7600 | // We can early exit after both memoryHavoc and lpContainsCall are both set to true. |
7601 | break; |
7602 | } |
7603 | |
7604 | // We are just looking for GT_CALL nodes after memoryHavoc was set. |
7605 | continue; |
7606 | } |
7607 | |
7608 | // otherwise memoryHavoc is not set for at least one heap ID |
7609 | assert(memoryHavoc != fullMemoryKindSet); |
7610 | |
7611 | // This body is a distillation of the memory side-effect code of value numbering. |
7612 | // We also do a very limited analysis if byref PtrTo values, to cover some cases |
7613 | // that the compiler creates. |
7614 | |
7615 | if (oper == GT_ASG) |
7616 | { |
7617 | GenTree* lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); |
7618 | |
7619 | if (lhs->OperGet() == GT_IND) |
7620 | { |
7621 | GenTree* arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); |
7622 | FieldSeqNode* fldSeqArrElem = nullptr; |
7623 | |
7624 | if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) |
7625 | { |
7626 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7627 | continue; |
7628 | } |
7629 | |
7630 | ArrayInfo arrInfo; |
7631 | |
7632 | if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR) |
7633 | { |
7634 | // If it's a local byref for which we recorded a value number, use that... |
7635 | GenTreeLclVar* argLcl = arg->AsLclVar(); |
7636 | if (lvaInSsa(argLcl->GetLclNum())) |
7637 | { |
7638 | ValueNum argVN = |
7639 | lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal(); |
7640 | VNFuncApp funcApp; |
7641 | if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) && |
7642 | funcApp.m_func == VNF_PtrToArrElem) |
7643 | { |
7644 | assert(vnStore->IsVNHandle(funcApp.m_args[0])); |
7645 | CORINFO_CLASS_HANDLE elemType = |
7646 | CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0])); |
7647 | AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType); |
7648 | // Don't set memoryHavoc for GcHeap below. Do set memoryHavoc for ByrefExposed |
7649 | // (conservatively assuming that a byref may alias the array element) |
7650 | memoryHavoc |= memoryKindSet(ByrefExposed); |
7651 | continue; |
7652 | } |
7653 | } |
7654 | // Otherwise... |
7655 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7656 | } |
7657 | // Is the LHS an array index expression? |
7658 | else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem)) |
7659 | { |
7660 | // We actually ignore "fldSeq" -- any modification to an S[], at any |
7661 | // field of "S", will lose all information about the array type. |
7662 | CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); |
7663 | AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq); |
7664 | // Conservatively assume byrefs may alias this array element |
7665 | memoryHavoc |= memoryKindSet(ByrefExposed); |
7666 | } |
7667 | else |
7668 | { |
7669 | // We are only interested in IsFieldAddr()'s fldSeq out parameter. |
7670 | // |
7671 | GenTree* obj = nullptr; // unused |
7672 | GenTree* staticOffset = nullptr; // unused |
7673 | FieldSeqNode* fldSeq = nullptr; |
7674 | |
7675 | if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) && |
7676 | (fldSeq != FieldSeqStore::NotAField())) |
7677 | { |
7678 | // Get the first (object) field from field seq. GcHeap[field] will yield the "field map". |
7679 | assert(fldSeq != nullptr); |
7680 | if (fldSeq->IsFirstElemFieldSeq()) |
7681 | { |
7682 | fldSeq = fldSeq->m_next; |
7683 | assert(fldSeq != nullptr); |
7684 | } |
7685 | |
7686 | AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd); |
7687 | // Conservatively assume byrefs may alias this object. |
7688 | memoryHavoc |= memoryKindSet(ByrefExposed); |
7689 | } |
7690 | else |
7691 | { |
7692 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7693 | } |
7694 | } |
7695 | } |
7696 | else if (lhs->OperIsBlk()) |
7697 | { |
7698 | GenTreeLclVarCommon* lclVarTree; |
7699 | bool isEntire; |
7700 | if (!tree->DefinesLocal(this, &lclVarTree, &isEntire)) |
7701 | { |
7702 | // For now, assume arbitrary side effects on GcHeap/ByrefExposed... |
7703 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7704 | } |
7705 | else if (lvaVarAddrExposed(lclVarTree->gtLclNum)) |
7706 | { |
7707 | memoryHavoc |= memoryKindSet(ByrefExposed); |
7708 | } |
7709 | } |
7710 | else if (lhs->OperGet() == GT_CLS_VAR) |
7711 | { |
7712 | AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd); |
7713 | // Conservatively assume byrefs may alias this static field |
7714 | memoryHavoc |= memoryKindSet(ByrefExposed); |
7715 | } |
7716 | // Otherwise, must be local lhs form. I should assert that. |
7717 | else if (lhs->OperGet() == GT_LCL_VAR) |
7718 | { |
7719 | GenTreeLclVar* lhsLcl = lhs->AsLclVar(); |
7720 | GenTree* rhs = tree->gtOp.gtOp2; |
7721 | ValueNum rhsVN = rhs->gtVNPair.GetLiberal(); |
7722 | // If we gave the RHS a value number, propagate it. |
7723 | if (rhsVN != ValueNumStore::NoVN) |
7724 | { |
7725 | rhsVN = vnStore->VNNormalValue(rhsVN); |
7726 | if (lvaInSsa(lhsLcl->GetLclNum())) |
7727 | { |
7728 | lvaTable[lhsLcl->GetLclNum()] |
7729 | .GetPerSsaData(lhsLcl->GetSsaNum()) |
7730 | ->m_vnPair.SetLiberal(rhsVN); |
7731 | } |
7732 | } |
7733 | // If the local is address-exposed, count this as ByrefExposed havoc |
7734 | if (lvaVarAddrExposed(lhsLcl->gtLclNum)) |
7735 | { |
7736 | memoryHavoc |= memoryKindSet(ByrefExposed); |
7737 | } |
7738 | } |
7739 | } |
7740 | else // if (oper != GT_ASG) |
7741 | { |
7742 | switch (oper) |
7743 | { |
7744 | case GT_COMMA: |
7745 | tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair; |
7746 | break; |
7747 | |
7748 | case GT_ADDR: |
7749 | // Is it an addr of a array index expression? |
7750 | { |
7751 | GenTree* addrArg = tree->gtOp.gtOp1; |
7752 | if (addrArg->OperGet() == GT_IND) |
7753 | { |
7754 | // Is the LHS an array index expression? |
7755 | if (addrArg->gtFlags & GTF_IND_ARR_INDEX) |
7756 | { |
7757 | ArrayInfo arrInfo; |
7758 | bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo); |
7759 | assert(b); |
7760 | CORINFO_CLASS_HANDLE elemTypeEq = |
7761 | EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); |
7762 | ValueNum elemTypeEqVN = |
7763 | vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL); |
7764 | ValueNum ptrToArrElemVN = |
7765 | vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, elemTypeEqVN, |
7766 | // The rest are dummy arguments. |
7767 | vnStore->VNForNull(), vnStore->VNForNull(), |
7768 | vnStore->VNForNull()); |
7769 | tree->gtVNPair.SetBoth(ptrToArrElemVN); |
7770 | } |
7771 | } |
7772 | } |
7773 | break; |
7774 | |
7775 | case GT_LOCKADD: // Binop |
7776 | case GT_XADD: // Binop |
7777 | case GT_XCHG: // Binop |
7778 | case GT_CMPXCHG: // Specialop |
7779 | { |
7780 | assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering" ); |
7781 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7782 | } |
7783 | break; |
7784 | |
7785 | case GT_CALL: |
7786 | { |
7787 | GenTreeCall* call = tree->AsCall(); |
7788 | |
7789 | // Record that this loop contains a call |
7790 | AddContainsCallAllContainingLoops(mostNestedLoop); |
7791 | |
7792 | if (call->gtCallType == CT_HELPER) |
7793 | { |
7794 | CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); |
7795 | if (s_helperCallProperties.MutatesHeap(helpFunc)) |
7796 | { |
7797 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7798 | } |
7799 | else if (s_helperCallProperties.MayRunCctor(helpFunc)) |
7800 | { |
7801 | // If the call is labeled as "Hoistable", then we've checked the |
7802 | // class that would be constructed, and it is not precise-init, so |
7803 | // the cctor will not be run by this call. Otherwise, it might be, |
7804 | // and might have arbitrary side effects. |
7805 | if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0) |
7806 | { |
7807 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7808 | } |
7809 | } |
7810 | } |
7811 | else |
7812 | { |
7813 | memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); |
7814 | } |
7815 | break; |
7816 | } |
7817 | |
7818 | default: |
7819 | // All other gtOper node kinds, leave 'memoryHavoc' unchanged (i.e. false) |
7820 | break; |
7821 | } |
7822 | } |
7823 | } |
7824 | } |
7825 | |
7826 | if (memoryHavoc != emptyMemoryKindSet) |
7827 | { |
7828 | // Record that all loops containing this block have memory havoc effects. |
7829 | unsigned lnum = mostNestedLoop; |
7830 | while (lnum != BasicBlock::NOT_IN_LOOP) |
7831 | { |
7832 | for (MemoryKind memoryKind : allMemoryKinds()) |
7833 | { |
7834 | if ((memoryHavoc & memoryKindSet(memoryKind)) != 0) |
7835 | { |
7836 | optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true; |
7837 | } |
7838 | } |
7839 | lnum = optLoopTable[lnum].lpParent; |
7840 | } |
7841 | } |
7842 | } |
7843 | |
7844 | // Marks the containsCall information to "lnum" and any parent loops. |
7845 | void Compiler::AddContainsCallAllContainingLoops(unsigned lnum) |
7846 | { |
7847 | assert(0 <= lnum && lnum < optLoopCount); |
7848 | while (lnum != BasicBlock::NOT_IN_LOOP) |
7849 | { |
7850 | optLoopTable[lnum].lpContainsCall = true; |
7851 | lnum = optLoopTable[lnum].lpParent; |
7852 | } |
7853 | } |
7854 | |
7855 | // Adds the variable liveness information for 'blk' to 'this' LoopDsc |
7856 | void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk) |
7857 | { |
7858 | VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn); |
7859 | VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut); |
7860 | |
7861 | VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse); |
7862 | VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef); |
7863 | } |
7864 | |
7865 | // Adds the variable liveness information for 'blk' to "lnum" and any parent loops. |
7866 | void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk) |
7867 | { |
7868 | assert(0 <= lnum && lnum < optLoopCount); |
7869 | while (lnum != BasicBlock::NOT_IN_LOOP) |
7870 | { |
7871 | optLoopTable[lnum].AddVariableLiveness(this, blk); |
7872 | lnum = optLoopTable[lnum].lpParent; |
7873 | } |
7874 | } |
7875 | |
7876 | // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops. |
7877 | void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd) |
7878 | { |
7879 | assert(0 <= lnum && lnum < optLoopCount); |
7880 | while (lnum != BasicBlock::NOT_IN_LOOP) |
7881 | { |
7882 | optLoopTable[lnum].AddModifiedField(this, fldHnd); |
7883 | lnum = optLoopTable[lnum].lpParent; |
7884 | } |
7885 | } |
7886 | |
7887 | // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops. |
7888 | void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd) |
7889 | { |
7890 | assert(0 <= lnum && lnum < optLoopCount); |
7891 | while (lnum != BasicBlock::NOT_IN_LOOP) |
7892 | { |
7893 | optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd); |
7894 | lnum = optLoopTable[lnum].lpParent; |
7895 | } |
7896 | } |
7897 | |
7898 | /***************************************************************************** |
7899 | * |
7900 | * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts |
7901 | * The 'keepList'is either a single tree or a list of trees that are formed by |
7902 | * one or more GT_COMMA nodes. It is the kept side-effects as returned by the |
7903 | * gtExtractSideEffList method. |
7904 | */ |
7905 | |
7906 | /* static */ |
7907 | Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTree** pTree, fgWalkData* data) |
7908 | { |
7909 | GenTree* tree = *pTree; |
7910 | Compiler* comp = data->compiler; |
7911 | GenTree* keepList = (GenTree*)(data->pCallbackData); |
7912 | |
7913 | // We may have a non-NULL side effect list that is being kept |
7914 | // |
7915 | if (keepList) |
7916 | { |
7917 | GenTree* keptTree = keepList; |
7918 | while (keptTree->OperGet() == GT_COMMA) |
7919 | { |
7920 | assert(keptTree->OperKind() & GTK_SMPOP); |
7921 | GenTree* op1 = keptTree->gtOp.gtOp1; |
7922 | GenTree* op2 = keptTree->gtGetOp2(); |
7923 | |
7924 | // For the GT_COMMA case the op1 is part of the orginal CSE tree |
7925 | // that is being kept because it contains some side-effect |
7926 | // |
7927 | if (tree == op1) |
7928 | { |
7929 | // This tree and all of its sub trees are being kept. |
7930 | return WALK_SKIP_SUBTREES; |
7931 | } |
7932 | |
7933 | // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree |
7934 | // which can again be another GT_COMMA or the final side-effect part |
7935 | // |
7936 | keptTree = op2; |
7937 | } |
7938 | if (tree == keptTree) |
7939 | { |
7940 | // This tree and all of its sub trees are being kept. |
7941 | return WALK_SKIP_SUBTREES; |
7942 | } |
7943 | } |
7944 | |
7945 | return WALK_CONTINUE; |
7946 | } |
7947 | |
7948 | /***************************************************************************** |
7949 | * |
7950 | * Routine called to decrement the LclVar ref counts when removing a tree |
7951 | * during the remove RangeCheck phase. |
7952 | * This method will decrement the refcounts for any LclVars used below 'deadTree', |
7953 | * unless the node is found in the 'keepList' (which are saved side effects) |
7954 | * The keepList is communicated using the walkData.pCallbackData field |
7955 | * Also the compCurBB must be set to the current BasicBlock which contains |
7956 | * 'deadTree' as we need to fetch the block weight when decrementing the ref counts. |
7957 | */ |
7958 | |
7959 | void Compiler::optRemoveTree(GenTree* deadTree, GenTree* keepList) |
7960 | { |
7961 | // We communicate this value using the walkData.pCallbackData field |
7962 | // |
7963 | fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList); |
7964 | } |
7965 | |
7966 | //------------------------------------------------------------------------------ |
7967 | // optRemoveRangeCheck : Given an array index node, mark it as not needing a range check. |
7968 | // |
7969 | // Arguments: |
7970 | // tree - Range check tree |
7971 | // stmt - Statement the tree belongs to |
7972 | |
7973 | void Compiler::optRemoveRangeCheck(GenTree* tree, GenTree* stmt) |
7974 | { |
7975 | #if !REARRANGE_ADDS |
7976 | noway_assert(!"can't remove range checks without REARRANGE_ADDS right now" ); |
7977 | #endif |
7978 | |
7979 | noway_assert(stmt->gtOper == GT_STMT); |
7980 | noway_assert(tree->gtOper == GT_COMMA); |
7981 | |
7982 | GenTree* bndsChkTree = tree->gtOp.gtOp1; |
7983 | |
7984 | noway_assert(bndsChkTree->OperIsBoundsCheck()); |
7985 | |
7986 | GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk(); |
7987 | |
7988 | #ifdef DEBUG |
7989 | if (verbose) |
7990 | { |
7991 | printf("Before optRemoveRangeCheck:\n" ); |
7992 | gtDispTree(tree); |
7993 | } |
7994 | #endif |
7995 | |
7996 | GenTree* sideEffList = nullptr; |
7997 | |
7998 | gtExtractSideEffList(bndsChkTree, &sideEffList, GTF_ASG); |
7999 | |
8000 | // Decrement the ref counts for any LclVars that are being deleted |
8001 | // |
8002 | optRemoveTree(bndsChkTree, sideEffList); |
8003 | |
8004 | // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects. |
8005 | tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode(); |
8006 | // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA. |
8007 | tree->gtFlags |= GTF_DONT_CSE; |
8008 | |
8009 | gtUpdateSideEffects(stmt, tree); |
8010 | |
8011 | /* Recalculate the gtCostSz, etc... */ |
8012 | gtSetStmtInfo(stmt); |
8013 | |
8014 | /* Re-thread the nodes if necessary */ |
8015 | if (fgStmtListThreaded) |
8016 | { |
8017 | fgSetStmtSeq(stmt); |
8018 | } |
8019 | |
8020 | #ifdef DEBUG |
8021 | if (verbose) |
8022 | { |
8023 | printf("After optRemoveRangeCheck:\n" ); |
8024 | gtDispTree(tree); |
8025 | } |
8026 | #endif |
8027 | } |
8028 | |
8029 | /***************************************************************************** |
8030 | * Return the scale in an array reference, given a pointer to the |
8031 | * multiplication node. |
8032 | */ |
8033 | |
8034 | ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk)) |
8035 | { |
8036 | assert(mul); |
8037 | assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH); |
8038 | assert(mul->gtOp.gtOp2->IsCnsIntOrI()); |
8039 | |
8040 | ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue(); |
8041 | |
8042 | if (mul->gtOper == GT_LSH) |
8043 | { |
8044 | scale = ((ssize_t)1) << scale; |
8045 | } |
8046 | |
8047 | GenTree* index = mul->gtOp.gtOp1; |
8048 | |
8049 | if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI()) |
8050 | { |
8051 | // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4): |
8052 | // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5), |
8053 | // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1. |
8054 | // Otherwise, we cannot optimize it. We will simply keep the original scale and index. |
8055 | scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue(); |
8056 | index = index->gtOp.gtOp1; |
8057 | } |
8058 | |
8059 | assert(!bRngChk || index->gtOper != GT_COMMA); |
8060 | |
8061 | if (pIndex) |
8062 | { |
8063 | *pIndex = index; |
8064 | } |
8065 | |
8066 | return scale; |
8067 | } |
8068 | |
8069 | //------------------------------------------------------------------------------ |
8070 | // optObtainLoopCloningOpts: Identify optimization candidates and update |
8071 | // the "context" for array optimizations. |
8072 | // |
8073 | // Arguments: |
8074 | // context - data structure where all loop cloning info is kept. The |
8075 | // optInfo fields of the context are updated with the |
8076 | // identified optimization candidates. |
8077 | // |
8078 | void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context) |
8079 | { |
8080 | for (unsigned i = 0; i < optLoopCount; i++) |
8081 | { |
8082 | JITDUMP("Considering loop %d to clone for optimizations.\n" , i); |
8083 | if (optIsLoopClonable(i)) |
8084 | { |
8085 | if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED)) |
8086 | { |
8087 | optIdentifyLoopOptInfo(i, context); |
8088 | } |
8089 | } |
8090 | JITDUMP("------------------------------------------------------------\n" ); |
8091 | } |
8092 | JITDUMP("\n" ); |
8093 | } |
8094 | |
8095 | //------------------------------------------------------------------------ |
8096 | // optIdentifyLoopOptInfo: Identify loop optimization candidates an also |
8097 | // check if the loop is suitable for the optimizations performed. |
8098 | // |
8099 | // Arguments: |
8100 | // loopNum - the current loop index for which conditions are derived. |
8101 | // context - data structure where all loop cloning candidates will be |
8102 | // updated. |
8103 | // |
8104 | // Return Value: |
8105 | // If the loop is not suitable for the optimizations, return false - context |
8106 | // should not contain any optimization candidate for the loop if false. |
8107 | // Else return true. |
8108 | // |
8109 | // Operation: |
8110 | // Check if the loop is well formed for this optimization and identify the |
8111 | // optimization candidates and update the "context" parameter with all the |
8112 | // contextual information necessary to perform the optimization later. |
8113 | // |
8114 | bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) |
8115 | { |
8116 | noway_assert(loopNum < optLoopCount); |
8117 | |
8118 | LoopDsc* pLoop = &optLoopTable[loopNum]; |
8119 | |
8120 | if (!(pLoop->lpFlags & LPFLG_ITER)) |
8121 | { |
8122 | JITDUMP("> No iter flag on loop %d.\n" , loopNum); |
8123 | return false; |
8124 | } |
8125 | |
8126 | unsigned ivLclNum = pLoop->lpIterVar(); |
8127 | if (lvaVarAddrExposed(ivLclNum)) |
8128 | { |
8129 | JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n" , ivLclNum); |
8130 | return false; |
8131 | } |
8132 | |
8133 | BasicBlock* head = pLoop->lpHead; |
8134 | BasicBlock* end = pLoop->lpBottom; |
8135 | BasicBlock* beg = head->bbNext; |
8136 | |
8137 | if (end->bbJumpKind != BBJ_COND) |
8138 | { |
8139 | JITDUMP("> Couldn't find termination test.\n" ); |
8140 | return false; |
8141 | } |
8142 | |
8143 | if (end->bbJumpDest != beg) |
8144 | { |
8145 | JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n" ); |
8146 | return false; |
8147 | } |
8148 | |
8149 | // TODO-CQ: CLONE: Mark increasing or decreasing loops. |
8150 | if ((pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1)) |
8151 | { |
8152 | JITDUMP("> Loop iteration operator not matching\n" ); |
8153 | return false; |
8154 | } |
8155 | |
8156 | if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 && |
8157 | (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0) |
8158 | { |
8159 | JITDUMP("> Loop limit is neither constant, variable or array length\n" ); |
8160 | return false; |
8161 | } |
8162 | |
8163 | if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD)) || |
8164 | ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB)))) |
8165 | { |
8166 | JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n" , |
8167 | GenTree::OpName(pLoop->lpTestOper()), GenTree::OpName(pLoop->lpIterOper())); |
8168 | return false; |
8169 | } |
8170 | |
8171 | if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT)) |
8172 | { |
8173 | JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n" , |
8174 | pLoop->lpTestTree->gtTreeID); |
8175 | return false; |
8176 | } |
8177 | |
8178 | #ifdef DEBUG |
8179 | GenTree* op1 = pLoop->lpIterator(); |
8180 | noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum)); |
8181 | #endif |
8182 | |
8183 | JITDUMP("Checking blocks " FMT_BB ".." FMT_BB " for optimization candidates\n" , beg->bbNum, |
8184 | end->bbNext ? end->bbNext->bbNum : 0); |
8185 | |
8186 | LoopCloneVisitorInfo info(context, loopNum, nullptr); |
8187 | for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext) |
8188 | { |
8189 | compCurBB = block; |
8190 | for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) |
8191 | { |
8192 | info.stmt = stmt; |
8193 | const bool lclVarsOnly = false; |
8194 | const bool computeStack = false; |
8195 | fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly, |
8196 | computeStack); |
8197 | } |
8198 | } |
8199 | |
8200 | return true; |
8201 | } |
8202 | |
8203 | //--------------------------------------------------------------------------------------------------------------- |
8204 | // optExtractArrIndex: Try to extract the array index from "tree". |
8205 | // |
8206 | // Arguments: |
8207 | // tree the tree to be checked if it is the array [] operation. |
8208 | // result the extracted GT_INDEX information is updated in result. |
8209 | // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM. |
8210 | // |
8211 | // Return Value: |
8212 | // Returns true if array index can be extracted, else, return false. See assumption about |
8213 | // what will be extracted. The "result" variable's rank parameter is advanced for every |
8214 | // dimension of [] encountered. |
8215 | // |
8216 | // Operation: |
8217 | // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph |
8218 | // we have converted a GT_INDEX tree into a scaled index base offset expression. We need |
8219 | // to reconstruct this to be able to know if this is an array access. |
8220 | // |
8221 | // Assumption: |
8222 | // The method extracts only if the array base and indices are GT_LCL_VAR. |
8223 | // |
8224 | // TODO-CQ: CLONE: After morph make sure this method extracts values before morph. |
8225 | // |
8226 | // [000024] ------------ * STMT void(IL 0x007...0x00C) |
8227 | // [000021] a--XG+------ | /--* IND int |
8228 | // [000045] -----+------ | | | /--* CNS_INT long 16 Fseq[#FirstElem] |
8229 | // [000046] -----+------ | | | /--* ADD long |
8230 | // [000043] -----+-N---- | | | | | /--* CNS_INT long 2 |
8231 | // [000044] -----+------ | | | | \--* LSH long |
8232 | // [000042] -----+------ | | | | \--* CAST long < -int |
8233 | // [000039] i----+------ | | | | \--* LCL_VAR int V04 loc0 |
8234 | // [000047] -----+------ | | \--* ADD byref |
8235 | // [000038] -----+------ | | \--* LCL_VAR ref V00 arg0 |
8236 | // [000048] ---XG+------ | /--* COMMA int |
8237 | // [000041] ---X-+------ | | \--* ARR_BOUNDS_CHECK_Rng void |
8238 | // [000020] -----+------ | | +--* LCL_VAR int V04 loc0 |
8239 | // [000040] ---X-+------ | | \--* ARR_LENGTH int |
8240 | // [000019] -----+------ | | \--* LCL_VAR ref V00 arg0 |
8241 | // [000023] -A-XG+------ \--* ASG int |
8242 | // [000022] D----+-N---- \--* LCL_VAR int V06 tmp1 |
8243 | |
8244 | bool Compiler::(GenTree* tree, ArrIndex* result, unsigned lhsNum) |
8245 | { |
8246 | if (tree->gtOper != GT_COMMA) |
8247 | { |
8248 | return false; |
8249 | } |
8250 | GenTree* before = tree->gtGetOp1(); |
8251 | if (before->gtOper != GT_ARR_BOUNDS_CHECK) |
8252 | { |
8253 | return false; |
8254 | } |
8255 | GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk(); |
8256 | if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR) |
8257 | { |
8258 | return false; |
8259 | } |
8260 | |
8261 | // For span we may see gtArrLen is a local var or local field or constant. |
8262 | // We won't try and extract those. |
8263 | if (arrBndsChk->gtArrLen->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_CNS_INT)) |
8264 | { |
8265 | return false; |
8266 | } |
8267 | if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR) |
8268 | { |
8269 | return false; |
8270 | } |
8271 | unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum; |
8272 | if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum) |
8273 | { |
8274 | return false; |
8275 | } |
8276 | |
8277 | unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum; |
8278 | |
8279 | GenTree* after = tree->gtGetOp2(); |
8280 | |
8281 | if (after->gtOper != GT_IND) |
8282 | { |
8283 | return false; |
8284 | } |
8285 | // It used to be the case that arrBndsChks for struct types would fail the previous check because |
8286 | // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now |
8287 | // return false if the type of 'after' is a struct type. (This was causing us to clone loops |
8288 | // that we were not previously cloning.) |
8289 | // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct |
8290 | // types. |
8291 | if (varTypeIsStruct(after)) |
8292 | { |
8293 | return false; |
8294 | } |
8295 | |
8296 | GenTree* sibo = after->gtGetOp1(); // sibo = scale*index + base + offset |
8297 | if (sibo->gtOper != GT_ADD) |
8298 | { |
8299 | return false; |
8300 | } |
8301 | GenTree* base = sibo->gtGetOp1(); |
8302 | GenTree* sio = sibo->gtGetOp2(); // sio == scale*index + offset |
8303 | if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl) |
8304 | { |
8305 | return false; |
8306 | } |
8307 | if (sio->gtOper != GT_ADD) |
8308 | { |
8309 | return false; |
8310 | } |
8311 | GenTree* ofs = sio->gtGetOp2(); |
8312 | GenTree* si = sio->gtGetOp1(); // si = scale*index |
8313 | if (ofs->gtOper != GT_CNS_INT) |
8314 | { |
8315 | return false; |
8316 | } |
8317 | if (si->gtOper != GT_LSH) |
8318 | { |
8319 | return false; |
8320 | } |
8321 | GenTree* scale = si->gtGetOp2(); |
8322 | GenTree* index = si->gtGetOp1(); |
8323 | if (scale->gtOper != GT_CNS_INT) |
8324 | { |
8325 | return false; |
8326 | } |
8327 | #ifdef _TARGET_64BIT_ |
8328 | if (index->gtOper != GT_CAST) |
8329 | { |
8330 | return false; |
8331 | } |
8332 | GenTree* indexVar = index->gtGetOp1(); |
8333 | #else |
8334 | GenTree* indexVar = index; |
8335 | #endif |
8336 | if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl) |
8337 | { |
8338 | return false; |
8339 | } |
8340 | if (lhsNum == BAD_VAR_NUM) |
8341 | { |
8342 | result->arrLcl = arrLcl; |
8343 | } |
8344 | result->indLcls.Push(indLcl); |
8345 | result->bndsChks.Push(tree); |
8346 | result->useBlock = compCurBB; |
8347 | result->rank++; |
8348 | |
8349 | return true; |
8350 | } |
8351 | |
8352 | //--------------------------------------------------------------------------------------------------------------- |
8353 | // optReconstructArrIndex: Reconstruct array index. |
8354 | // |
8355 | // Arguments: |
8356 | // tree the tree to be checked if it is an array [][][] operation. |
8357 | // result the extracted GT_INDEX information. |
8358 | // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM. |
8359 | // |
8360 | // Return Value: |
8361 | // Returns true if array index can be extracted, else, return false. "rank" field in |
8362 | // "result" contains the array access depth. The "indLcls" fields contain the indices. |
8363 | // |
8364 | // Operation: |
8365 | // Recursively look for a list of array indices. In the example below, we encounter, |
8366 | // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02] |
8367 | // The return value would then be: |
8368 | // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 } |
8369 | // |
8370 | // V00[V01][V02] would be morphed as: |
8371 | // |
8372 | // [000000001B366848] ---XG------- indir int |
8373 | // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16 |
8374 | // [000000001B36C200] ---XG------- comma int |
8375 | // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02) |
8376 | // [000000001B36C278] -A-XG------- comma int |
8377 | // [000000001B366730] R--XG------- indir ref |
8378 | // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24 |
8379 | // [000000001B36C818] ---XG------- comma ref |
8380 | // [000000001B36C458] ---X-------- arrBndsChk(V00, V01) |
8381 | // [000000001B36BB60] -A-XG------- = ref |
8382 | // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2 |
8383 | // [000000001B36A668] -A-XG------- = int |
8384 | // [000000001B36A5F0] D------N---- lclVar int V03 tmp0 |
8385 | // |
8386 | // Assumption: |
8387 | // The method extracts only if the array base and indices are GT_LCL_VAR. |
8388 | // |
8389 | bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum) |
8390 | { |
8391 | // If we can extract "tree" (which is a top level comma) return. |
8392 | if (optExtractArrIndex(tree, result, lhsNum)) |
8393 | { |
8394 | return true; |
8395 | } |
8396 | // We have a comma (check if array base expr is computed in "before"), descend further. |
8397 | else if (tree->OperGet() == GT_COMMA) |
8398 | { |
8399 | GenTree* before = tree->gtGetOp1(); |
8400 | // "before" should evaluate an array base for the "after" indexing. |
8401 | if (before->OperGet() != GT_ASG) |
8402 | { |
8403 | return false; |
8404 | } |
8405 | GenTree* lhs = before->gtGetOp1(); |
8406 | GenTree* rhs = before->gtGetOp2(); |
8407 | |
8408 | // "rhs" should contain an GT_INDEX |
8409 | if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum)) |
8410 | { |
8411 | return false; |
8412 | } |
8413 | unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum; |
8414 | GenTree* after = tree->gtGetOp2(); |
8415 | // Pass the "lhsNum", so we can verify if indeed it is used as the array base. |
8416 | return optExtractArrIndex(after, result, lhsNum); |
8417 | } |
8418 | return false; |
8419 | } |
8420 | |
8421 | /* static */ |
8422 | Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pTree, Compiler::fgWalkData* data) |
8423 | { |
8424 | return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData); |
8425 | } |
8426 | |
8427 | //------------------------------------------------------------------------- |
8428 | // optIsStackLocalInvariant: Is stack local invariant in loop. |
8429 | // |
8430 | // Arguments: |
8431 | // loopNum The loop in which the variable is tested for invariance. |
8432 | // lclNum The local that is tested for invariance in the loop. |
8433 | // |
8434 | // Return Value: |
8435 | // Returns true if the variable is loop invariant in loopNum. |
8436 | // |
8437 | bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum) |
8438 | { |
8439 | if (lvaVarAddrExposed(lclNum)) |
8440 | { |
8441 | return false; |
8442 | } |
8443 | if (optIsVarAssgLoop(loopNum, lclNum)) |
8444 | { |
8445 | return false; |
8446 | } |
8447 | return true; |
8448 | } |
8449 | |
8450 | //---------------------------------------------------------------------------------------------- |
8451 | // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so, |
8452 | // identify as potential candidate and update the loop context. |
8453 | // |
8454 | // Arguments: |
8455 | // tree The tree encountered during the tree walk. |
8456 | // info Supplies information about the current block or stmt in which the tree is. |
8457 | // Also supplies the "context" pointer for updating with loop cloning |
8458 | // candidates. Also supplies loopNum. |
8459 | // |
8460 | // Operation: |
8461 | // If array index can be reconstructed, check if the iter var of the loop matches the |
8462 | // array index var in some dim. Also ensure other index vars before the identified |
8463 | // dim are loop invariant. |
8464 | // |
8465 | // Return Value: |
8466 | // Skip sub trees if the optimization candidate is identified or else continue walking |
8467 | // |
8468 | Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info) |
8469 | { |
8470 | ArrIndex arrIndex(getAllocator()); |
8471 | |
8472 | // Check if array index can be optimized. |
8473 | if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM)) |
8474 | { |
8475 | assert(tree->gtOper == GT_COMMA); |
8476 | #ifdef DEBUG |
8477 | if (verbose) |
8478 | { |
8479 | JITDUMP("Found ArrIndex at tree " ); |
8480 | printTreeID(tree); |
8481 | printf(" which is equivalent to: " ); |
8482 | arrIndex.Print(); |
8483 | JITDUMP("\n" ); |
8484 | } |
8485 | #endif |
8486 | if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl)) |
8487 | { |
8488 | return WALK_SKIP_SUBTREES; |
8489 | } |
8490 | |
8491 | // Walk the dimensions and see if iterVar of the loop is used as index. |
8492 | for (unsigned dim = 0; dim < arrIndex.rank; ++dim) |
8493 | { |
8494 | // Is index variable also used as the loop iter var. |
8495 | if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar()) |
8496 | { |
8497 | // Check the previous indices are all loop invariant. |
8498 | for (unsigned dim2 = 0; dim2 < dim; ++dim2) |
8499 | { |
8500 | if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2])) |
8501 | { |
8502 | JITDUMP("V%02d is assigned in loop\n" , arrIndex.indLcls[dim2]); |
8503 | return WALK_SKIP_SUBTREES; |
8504 | } |
8505 | } |
8506 | #ifdef DEBUG |
8507 | if (verbose) |
8508 | { |
8509 | JITDUMP("Loop %d can be cloned for ArrIndex " , info->loopNum); |
8510 | arrIndex.Print(); |
8511 | JITDUMP(" on dim %d\n" , dim); |
8512 | } |
8513 | #endif |
8514 | // Update the loop context. |
8515 | info->context->EnsureLoopOptInfo(info->loopNum) |
8516 | ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt)); |
8517 | } |
8518 | else |
8519 | { |
8520 | JITDUMP("Induction V%02d is not used as index on dim %d\n" , optLoopTable[info->loopNum].lpIterVar(), |
8521 | dim); |
8522 | } |
8523 | } |
8524 | return WALK_SKIP_SUBTREES; |
8525 | } |
8526 | else if (tree->gtOper == GT_ARR_ELEM) |
8527 | { |
8528 | // TODO-CQ: CLONE: Implement. |
8529 | return WALK_SKIP_SUBTREES; |
8530 | } |
8531 | return WALK_CONTINUE; |
8532 | } |
8533 | |
8534 | struct optRangeCheckDsc |
8535 | { |
8536 | Compiler* pCompiler; |
8537 | bool bValidIndex; |
8538 | }; |
8539 | /* |
8540 | Walk to make sure that only locals and constants are contained in the index |
8541 | for a range check |
8542 | */ |
8543 | Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTree** pTree, fgWalkData* data) |
8544 | { |
8545 | GenTree* tree = *pTree; |
8546 | optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData; |
8547 | |
8548 | if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD) |
8549 | { |
8550 | pData->bValidIndex = false; |
8551 | return WALK_ABORT; |
8552 | } |
8553 | |
8554 | if (tree->gtOper == GT_LCL_VAR) |
8555 | { |
8556 | if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed) |
8557 | { |
8558 | pData->bValidIndex = false; |
8559 | return WALK_ABORT; |
8560 | } |
8561 | } |
8562 | |
8563 | return WALK_CONTINUE; |
8564 | } |
8565 | |
8566 | /* |
8567 | returns true if a range check can legally be removed (for the moment it checks |
8568 | that the array is a local array (non subject to racing conditions) and that the |
8569 | index is either a constant or a local |
8570 | */ |
8571 | bool Compiler::optIsRangeCheckRemovable(GenTree* tree) |
8572 | { |
8573 | noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK); |
8574 | GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); |
8575 | GenTree* pArray = bndsChk->GetArray(); |
8576 | if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI()) |
8577 | { |
8578 | return false; |
8579 | } |
8580 | GenTree* pIndex = bndsChk->gtIndex; |
8581 | |
8582 | // The length must be a constant (the pArray == NULL case) or the array reference must be a local. |
8583 | // Otherwise we can be targeted by malicious race-conditions. |
8584 | if (pArray != nullptr) |
8585 | { |
8586 | if (pArray->gtOper != GT_LCL_VAR) |
8587 | { |
8588 | |
8589 | #ifdef DEBUG |
8590 | if (verbose) |
8591 | { |
8592 | printf("Can't remove range check if the array isn't referenced with a local\n" ); |
8593 | gtDispTree(pArray); |
8594 | } |
8595 | #endif |
8596 | return false; |
8597 | } |
8598 | else |
8599 | { |
8600 | noway_assert(pArray->gtType == TYP_REF); |
8601 | noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount); |
8602 | |
8603 | if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed) |
8604 | { |
8605 | // If the array address has been taken, don't do the optimization |
8606 | // (this restriction can be lowered a bit, but i don't think it's worth it) |
8607 | CLANG_FORMAT_COMMENT_ANCHOR; |
8608 | #ifdef DEBUG |
8609 | if (verbose) |
8610 | { |
8611 | printf("Can't remove range check if the array has its address taken\n" ); |
8612 | gtDispTree(pArray); |
8613 | } |
8614 | #endif |
8615 | return false; |
8616 | } |
8617 | } |
8618 | } |
8619 | |
8620 | optRangeCheckDsc Data; |
8621 | Data.pCompiler = this; |
8622 | Data.bValidIndex = true; |
8623 | |
8624 | fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data); |
8625 | |
8626 | if (!Data.bValidIndex) |
8627 | { |
8628 | #ifdef DEBUG |
8629 | if (verbose) |
8630 | { |
8631 | printf("Can't remove range check with this index" ); |
8632 | gtDispTree(pIndex); |
8633 | } |
8634 | #endif |
8635 | |
8636 | return false; |
8637 | } |
8638 | |
8639 | return true; |
8640 | } |
8641 | |
8642 | /****************************************************************************** |
8643 | * |
8644 | * Replace x==null with (x|x)==0 if x is a GC-type. |
8645 | * This will stress code-gen and the emitter to make sure they support such trees. |
8646 | */ |
8647 | |
8648 | #ifdef DEBUG |
8649 | |
8650 | void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock) |
8651 | { |
8652 | if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20)) |
8653 | { |
8654 | return; |
8655 | } |
8656 | |
8657 | noway_assert(condBlock->bbJumpKind == BBJ_COND); |
8658 | GenTree* condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr; |
8659 | |
8660 | noway_assert(condStmt->gtOper == GT_JTRUE); |
8661 | |
8662 | bool isBool; |
8663 | GenTree* relop; |
8664 | |
8665 | GenTree* comparand = optIsBoolCond(condStmt, &relop, &isBool); |
8666 | |
8667 | if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet())) |
8668 | { |
8669 | return; |
8670 | } |
8671 | |
8672 | if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF)) |
8673 | { |
8674 | return; |
8675 | } |
8676 | |
8677 | GenTree* comparandClone = gtCloneExpr(comparand); |
8678 | |
8679 | noway_assert(relop->gtOp.gtOp1 == comparand); |
8680 | genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND; |
8681 | relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone); |
8682 | |
8683 | // Comparand type is already checked, and we have const int, there is no harm |
8684 | // morphing it into a TYP_I_IMPL. |
8685 | noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT); |
8686 | relop->gtOp.gtOp2->gtType = TYP_I_IMPL; |
8687 | } |
8688 | |
8689 | #endif |
8690 | |
8691 | /****************************************************************************** |
8692 | * Function used by folding of boolean conditionals |
8693 | * Given a GT_JTRUE node, checks that it is a boolean comparison of the form |
8694 | * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1" |
8695 | * being a boolean lclVar and "op2" the const 0/1. |
8696 | * On success, the comparand (ie. boolVal) is returned. Else NULL. |
8697 | * compPtr returns the compare node (i.e. GT_EQ or GT_NE node) |
8698 | * boolPtr returns whether the comparand is a boolean value (must be 0 or 1). |
8699 | * When return boolPtr equal to true, if the comparison was against a 1 (i.e true) |
8700 | * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0. |
8701 | */ |
8702 | |
8703 | GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr) |
8704 | { |
8705 | bool isBool = false; |
8706 | |
8707 | noway_assert(condBranch->gtOper == GT_JTRUE); |
8708 | GenTree* cond = condBranch->gtOp.gtOp1; |
8709 | |
8710 | /* The condition must be "!= 0" or "== 0" */ |
8711 | |
8712 | if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE)) |
8713 | { |
8714 | return nullptr; |
8715 | } |
8716 | |
8717 | /* Return the compare node to the caller */ |
8718 | |
8719 | *compPtr = cond; |
8720 | |
8721 | /* Get hold of the comparands */ |
8722 | |
8723 | GenTree* opr1 = cond->gtOp.gtOp1; |
8724 | GenTree* opr2 = cond->gtOp.gtOp2; |
8725 | |
8726 | if (opr2->gtOper != GT_CNS_INT) |
8727 | { |
8728 | return nullptr; |
8729 | } |
8730 | |
8731 | if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1)) |
8732 | { |
8733 | return nullptr; |
8734 | } |
8735 | |
8736 | ssize_t ival2 = opr2->gtIntCon.gtIconVal; |
8737 | |
8738 | /* Is the value a boolean? |
8739 | * We can either have a boolean expression (marked GTF_BOOLEAN) or |
8740 | * a local variable that is marked as being boolean (lvIsBoolean) */ |
8741 | |
8742 | if (opr1->gtFlags & GTF_BOOLEAN) |
8743 | { |
8744 | isBool = true; |
8745 | } |
8746 | else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) |
8747 | { |
8748 | isBool = true; |
8749 | } |
8750 | else if (opr1->gtOper == GT_LCL_VAR) |
8751 | { |
8752 | /* is it a boolean local variable */ |
8753 | |
8754 | unsigned lclNum = opr1->gtLclVarCommon.gtLclNum; |
8755 | noway_assert(lclNum < lvaCount); |
8756 | |
8757 | if (lvaTable[lclNum].lvIsBoolean) |
8758 | { |
8759 | isBool = true; |
8760 | } |
8761 | } |
8762 | |
8763 | /* Was our comparison against the constant 1 (i.e. true) */ |
8764 | if (ival2 == 1) |
8765 | { |
8766 | // If this is a boolean expression tree we can reverse the relop |
8767 | // and change the true to false. |
8768 | if (isBool) |
8769 | { |
8770 | gtReverseCond(cond); |
8771 | opr2->gtIntCon.gtIconVal = 0; |
8772 | } |
8773 | else |
8774 | { |
8775 | return nullptr; |
8776 | } |
8777 | } |
8778 | |
8779 | *boolPtr = isBool; |
8780 | return opr1; |
8781 | } |
8782 | |
8783 | void Compiler::optOptimizeBools() |
8784 | { |
8785 | #ifdef DEBUG |
8786 | if (verbose) |
8787 | { |
8788 | printf("*************** In optOptimizeBools()\n" ); |
8789 | if (verboseTrees) |
8790 | { |
8791 | printf("Blocks/Trees before phase\n" ); |
8792 | fgDispBasicBlocks(true); |
8793 | } |
8794 | } |
8795 | #endif |
8796 | bool change; |
8797 | |
8798 | do |
8799 | { |
8800 | change = false; |
8801 | |
8802 | for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext) |
8803 | { |
8804 | /* We're only interested in conditional jumps here */ |
8805 | |
8806 | if (b1->bbJumpKind != BBJ_COND) |
8807 | { |
8808 | continue; |
8809 | } |
8810 | |
8811 | /* If there is no next block, we're done */ |
8812 | |
8813 | BasicBlock* b2 = b1->bbNext; |
8814 | if (!b2) |
8815 | { |
8816 | break; |
8817 | } |
8818 | |
8819 | /* The next block must not be marked as BBF_DONT_REMOVE */ |
8820 | if (b2->bbFlags & BBF_DONT_REMOVE) |
8821 | { |
8822 | continue; |
8823 | } |
8824 | |
8825 | /* The next block also needs to be a condition */ |
8826 | |
8827 | if (b2->bbJumpKind != BBJ_COND) |
8828 | { |
8829 | #ifdef DEBUG |
8830 | optOptimizeBoolsGcStress(b1); |
8831 | #endif |
8832 | continue; |
8833 | } |
8834 | |
8835 | bool sameTarget; // Do b1 and b2 have the same bbJumpDest? |
8836 | |
8837 | if (b1->bbJumpDest == b2->bbJumpDest) |
8838 | { |
8839 | /* Given the following sequence of blocks : |
8840 | B1: brtrue(t1, BX) |
8841 | B2: brtrue(t2, BX) |
8842 | B3: |
8843 | we will try to fold it to : |
8844 | B1: brtrue(t1|t2, BX) |
8845 | B3: |
8846 | */ |
8847 | |
8848 | sameTarget = true; |
8849 | } |
8850 | else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/ |
8851 | { |
8852 | /* Given the following sequence of blocks : |
8853 | B1: brtrue(t1, B3) |
8854 | B2: brtrue(t2, BX) |
8855 | B3: |
8856 | we will try to fold it to : |
8857 | B1: brtrue((!t1)&&t2, BX) |
8858 | B3: |
8859 | */ |
8860 | |
8861 | sameTarget = false; |
8862 | } |
8863 | else |
8864 | { |
8865 | continue; |
8866 | } |
8867 | |
8868 | /* The second block must contain a single statement */ |
8869 | |
8870 | GenTree* s2 = b2->bbTreeList; |
8871 | if (s2->gtPrev != s2) |
8872 | { |
8873 | continue; |
8874 | } |
8875 | |
8876 | noway_assert(s2->gtOper == GT_STMT); |
8877 | GenTree* t2 = s2->gtStmt.gtStmtExpr; |
8878 | noway_assert(t2->gtOper == GT_JTRUE); |
8879 | |
8880 | /* Find the condition for the first block */ |
8881 | |
8882 | GenTree* s1 = b1->bbTreeList->gtPrev; |
8883 | |
8884 | noway_assert(s1->gtOper == GT_STMT); |
8885 | GenTree* t1 = s1->gtStmt.gtStmtExpr; |
8886 | noway_assert(t1->gtOper == GT_JTRUE); |
8887 | |
8888 | if (b2->countOfInEdges() > 1) |
8889 | { |
8890 | continue; |
8891 | } |
8892 | |
8893 | /* Find the branch conditions of b1 and b2 */ |
8894 | |
8895 | bool bool1, bool2; |
8896 | |
8897 | GenTree* c1 = optIsBoolCond(t1, &t1, &bool1); |
8898 | if (!c1) |
8899 | { |
8900 | continue; |
8901 | } |
8902 | |
8903 | GenTree* c2 = optIsBoolCond(t2, &t2, &bool2); |
8904 | if (!c2) |
8905 | { |
8906 | continue; |
8907 | } |
8908 | |
8909 | noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1); |
8910 | noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2); |
8911 | |
8912 | // Leave out floats where the bit-representation is more complicated |
8913 | // - there are two representations for 0. |
8914 | // |
8915 | if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet())) |
8916 | { |
8917 | continue; |
8918 | } |
8919 | |
8920 | // Make sure the types involved are of the same sizes |
8921 | if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet())) |
8922 | { |
8923 | continue; |
8924 | } |
8925 | if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet())) |
8926 | { |
8927 | continue; |
8928 | } |
8929 | #ifdef _TARGET_ARMARCH_ |
8930 | // Skip the small operand which we cannot encode. |
8931 | if (varTypeIsSmall(c1->TypeGet())) |
8932 | continue; |
8933 | #endif |
8934 | /* The second condition must not contain side effects */ |
8935 | |
8936 | if (c2->gtFlags & GTF_GLOB_EFFECT) |
8937 | { |
8938 | continue; |
8939 | } |
8940 | |
8941 | /* The second condition must not be too expensive */ |
8942 | |
8943 | gtPrepareCost(c2); |
8944 | |
8945 | if (c2->gtCostEx > 12) |
8946 | { |
8947 | continue; |
8948 | } |
8949 | |
8950 | genTreeOps foldOp; |
8951 | genTreeOps cmpOp; |
8952 | var_types foldType = c1->TypeGet(); |
8953 | if (varTypeIsGC(foldType)) |
8954 | { |
8955 | foldType = TYP_I_IMPL; |
8956 | } |
8957 | |
8958 | if (sameTarget) |
8959 | { |
8960 | /* Both conditions must be the same */ |
8961 | |
8962 | if (t1->gtOper != t2->gtOper) |
8963 | { |
8964 | continue; |
8965 | } |
8966 | |
8967 | if (t1->gtOper == GT_EQ) |
8968 | { |
8969 | /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 |
8970 | So we will branch to BX if (c1&c2)==0 */ |
8971 | |
8972 | foldOp = GT_AND; |
8973 | cmpOp = GT_EQ; |
8974 | } |
8975 | else |
8976 | { |
8977 | /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 |
8978 | So we will branch to BX if (c1|c2)!=0 */ |
8979 | |
8980 | foldOp = GT_OR; |
8981 | cmpOp = GT_NE; |
8982 | } |
8983 | } |
8984 | else |
8985 | { |
8986 | /* The b1 condition must be the reverse of the b2 condition */ |
8987 | |
8988 | if (t1->gtOper == t2->gtOper) |
8989 | { |
8990 | continue; |
8991 | } |
8992 | |
8993 | if (t1->gtOper == GT_EQ) |
8994 | { |
8995 | /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 |
8996 | So we will branch to BX if (c1&c2)!=0 */ |
8997 | |
8998 | foldOp = GT_AND; |
8999 | cmpOp = GT_NE; |
9000 | } |
9001 | else |
9002 | { |
9003 | /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 |
9004 | So we will branch to BX if (c1|c2)==0 */ |
9005 | |
9006 | foldOp = GT_OR; |
9007 | cmpOp = GT_EQ; |
9008 | } |
9009 | } |
9010 | |
9011 | // Anding requires both values to be 0 or 1 |
9012 | |
9013 | if ((foldOp == GT_AND) && (!bool1 || !bool2)) |
9014 | { |
9015 | continue; |
9016 | } |
9017 | |
9018 | // |
9019 | // Now update the trees |
9020 | // |
9021 | GenTree* cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2); |
9022 | if (bool1 && bool2) |
9023 | { |
9024 | /* When we 'OR'/'AND' two booleans, the result is boolean as well */ |
9025 | cmpOp1->gtFlags |= GTF_BOOLEAN; |
9026 | } |
9027 | |
9028 | t1->SetOper(cmpOp); |
9029 | t1->gtOp.gtOp1 = cmpOp1; |
9030 | t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC() |
9031 | |
9032 | #if FEATURE_SET_FLAGS |
9033 | // For comparisons against zero we will have the GTF_SET_FLAGS set |
9034 | // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree) |
9035 | // during the CSE phase. |
9036 | // |
9037 | // So make sure to clear any GTF_SET_FLAGS bit on these operations |
9038 | // as they are no longer feeding directly into a comparisons against zero |
9039 | |
9040 | // Make sure that the GTF_SET_FLAGS bit is cleared. |
9041 | // Fix 388436 ARM JitStress WP7 |
9042 | c1->gtFlags &= ~GTF_SET_FLAGS; |
9043 | c2->gtFlags &= ~GTF_SET_FLAGS; |
9044 | |
9045 | // The new top level node that we just created does feed directly into |
9046 | // a comparison against zero, so set the GTF_SET_FLAGS bit so that |
9047 | // we generate an instruction that sets the flags, which allows us |
9048 | // to omit the cmp with zero instruction. |
9049 | |
9050 | // Request that the codegen for cmpOp1 sets the condition flags |
9051 | // when it generates the code for cmpOp1. |
9052 | // |
9053 | cmpOp1->gtRequestSetFlags(); |
9054 | #endif |
9055 | |
9056 | flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1); |
9057 | flowList* edge2; |
9058 | |
9059 | /* Modify the target of the conditional jump and update bbRefs and bbPreds */ |
9060 | |
9061 | if (sameTarget) |
9062 | { |
9063 | edge2 = fgGetPredForBlock(b2->bbJumpDest, b2); |
9064 | } |
9065 | else |
9066 | { |
9067 | edge2 = fgGetPredForBlock(b2->bbNext, b2); |
9068 | |
9069 | fgRemoveRefPred(b1->bbJumpDest, b1); |
9070 | |
9071 | b1->bbJumpDest = b2->bbJumpDest; |
9072 | |
9073 | fgAddRefPred(b2->bbJumpDest, b1); |
9074 | } |
9075 | |
9076 | noway_assert(edge1 != nullptr); |
9077 | noway_assert(edge2 != nullptr); |
9078 | |
9079 | BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin; |
9080 | BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax; |
9081 | if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax)) |
9082 | { |
9083 | edge1->flEdgeWeightMin = edgeSumMin; |
9084 | edge1->flEdgeWeightMax = edgeSumMax; |
9085 | } |
9086 | else |
9087 | { |
9088 | edge1->flEdgeWeightMin = BB_ZERO_WEIGHT; |
9089 | edge1->flEdgeWeightMax = BB_MAX_WEIGHT; |
9090 | } |
9091 | |
9092 | /* Get rid of the second block (which is a BBJ_COND) */ |
9093 | |
9094 | noway_assert(b1->bbJumpKind == BBJ_COND); |
9095 | noway_assert(b2->bbJumpKind == BBJ_COND); |
9096 | noway_assert(b1->bbJumpDest == b2->bbJumpDest); |
9097 | noway_assert(b1->bbNext == b2); |
9098 | noway_assert(b2->bbNext); |
9099 | |
9100 | fgUnlinkBlock(b2); |
9101 | b2->bbFlags |= BBF_REMOVED; |
9102 | |
9103 | // If b2 was the last block of a try or handler, update the EH table. |
9104 | |
9105 | ehUpdateForDeletedBlock(b2); |
9106 | |
9107 | /* Update bbRefs and bbPreds */ |
9108 | |
9109 | /* Replace pred 'b2' for 'b2->bbNext' with 'b1' |
9110 | * Remove pred 'b2' for 'b2->bbJumpDest' */ |
9111 | |
9112 | fgReplacePred(b2->bbNext, b2, b1); |
9113 | |
9114 | fgRemoveRefPred(b2->bbJumpDest, b2); |
9115 | |
9116 | /* Update the block numbers and try again */ |
9117 | |
9118 | change = true; |
9119 | /* |
9120 | do |
9121 | { |
9122 | b2->bbNum = ++n1; |
9123 | b2 = b2->bbNext; |
9124 | } |
9125 | while (b2); |
9126 | */ |
9127 | |
9128 | // Update loop table |
9129 | fgUpdateLoopsAfterCompacting(b1, b2); |
9130 | |
9131 | #ifdef DEBUG |
9132 | if (verbose) |
9133 | { |
9134 | printf("Folded %sboolean conditions of " FMT_BB " and " FMT_BB " to :\n" , |
9135 | c2->OperIsLeaf() ? "" : "non-leaf " , b1->bbNum, b2->bbNum); |
9136 | gtDispTree(s1); |
9137 | printf("\n" ); |
9138 | } |
9139 | #endif |
9140 | } |
9141 | } while (change); |
9142 | |
9143 | #ifdef DEBUG |
9144 | fgDebugCheckBBlist(); |
9145 | #endif |
9146 | } |
9147 | |