optimizer.cpp source code [CoreCLR/jit/optimizer.cpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4
5	/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*
6	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7	XX XX
8	XX Optimizer XX
9	XX XX
10	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12	*/
13
14	#include "jitpch.h"
15	#ifdef _MSC_VER
16	#pragma hdrstop
17	#pragma warning(disable : 4701)
18	#endif
19
20	/***************************************************************************/
21
22	void Compiler::optInit()
23	{
24	optLoopsMarked = false;
25	fgHasLoops = false;
26
27	/ Initialize the # of tracked loops to 0 /
28	optLoopCount = `0`;
29	optLoopTable = nullptr;
30
31	/ Keep track of the number of calls and indirect calls made by this method /
32	optCallCount = `0`;
33	optIndirectCallCount = `0`;
34	optNativeCallCount = `0`;
35	optAssertionCount = `0`;
36	optAssertionDep = nullptr;
37	#if FEATURE_ANYCSE
38	optCSECandidateTotal = `0`;
39	optCSEstart = UINT_MAX;
40	optCSEcount = `0`;
41	#endif // FEATURE_ANYCSE
42	}
43
44	DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
45	{
46	}
47
48	/*****************************************************************************
49	*
50	*/
51
52	void Compiler::optSetBlockWeights()
53	{
54	noway_assert(opts.OptimizationEnabled());
55	assert(fgDomsComputed);
56
57	#ifdef DEBUG
58	bool changed = false;
59	#endif
60
61	bool firstBBdomsRets = true;
62
63	BasicBlock* block;
64
65	for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
66	{
67	/ Blocks that can't be reached via the first block are rarely executed /
68	if (!fgReachable(fgFirstBB, block))
69	{
70	block->bbSetRunRarely();
71	}
72
73	if (block->bbWeight != BB_ZERO_WEIGHT)
74	{
75	// Calculate our bbWeight:
76	//
77	// o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
78	// o otherwise BB_UNITY_WEIGHT / 2
79	//
80	bool domsRets = true; // Assume that we will dominate
81
82	for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
83	{
84	if (!fgDominate(block, retBlocks->block))
85	{
86	domsRets = false;
87	break;
88	}
89	}
90
91	if (block == fgFirstBB)
92	{
93	firstBBdomsRets = domsRets;
94	}
95
96	// If we are not using profile weight then we lower the weight
97	// of blocks that do not dominate a return block
98	//
99	if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
100	{
101	#if DEBUG
102	changed = true;
103	#endif
104	block->modifyBBWeight(block->bbWeight / `2`);
105	noway_assert(block->bbWeight);
106	}
107	}
108	}
109
110	#if DEBUG
111	if (changed && verbose)
112	{
113	printf("\nAfter optSetBlockWeights:\n");
114	fgDispBasicBlocks();
115	printf("\n");
116	}
117
118	/ Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date /
119	fgDebugCheckBBlist();
120	#endif
121	}
122
123	/*****************************************************************************
124	*
125	* Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
126	*/
127
128	void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
129	{
130	/ Calculate the 'loopWeight',*
131	this is the amount to increase each block in the loop
132	Our heuristic is that loops are weighted eight times more
133	than straight line code.
134	Thus we increase each block by 7 times the weight of
135	the loop header block,
136	if the loops are all properly formed gives us:
137	(assuming that BB_LOOP_WEIGHT is 8)
138
139	1 -- non loop basic block
140	8 -- single loop nesting
141	64 -- double loop nesting
142	512 -- triple loop nesting
143
144	*/
145
146	noway_assert(begBlk->bbNum <= endBlk->bbNum);
147	noway_assert(begBlk->isLoopHead());
148	noway_assert(fgReachable(begBlk, endBlk));
149
150	#ifdef DEBUG
151	if (verbose)
152	{
153	printf("\nMarking loop L%02u", begBlk->bbLoopNum);
154	}
155	#endif
156
157	noway_assert(!opts.MinOpts());
158
159	/ Build list of backedges for block begBlk /
160	flowList* backedgeList = nullptr;
161
162	for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
163	{
164	/ Is this a backedge? /
165	if (pred->flBlock->bbNum >= begBlk->bbNum)
166	{
167	flowList* flow = new (this, CMK_FlowList) flowList ();
168
169	#if MEASURE_BLOCK_SIZE
170	genFlowNodeCnt += `1`;
171	genFlowNodeSize += sizeof(flowList);
172	#endif // MEASURE_BLOCK_SIZE
173
174	flow->flNext = backedgeList;
175	flow->flBlock = pred->flBlock;
176	backedgeList = flow;
177	}
178	}
179
180	/ At least one backedge must have been found (the one from endBlk) /
181	noway_assert(backedgeList);
182
183	BasicBlock* curBlk = begBlk;
184
185	while (true)
186	{
187	noway_assert(curBlk);
188
189	// For curBlk to be part of a loop that starts at begBlk
190	// curBlk must be reachable from begBlk and (since this is a loop)
191	// likewise begBlk must be reachable from curBlk.
192	//
193
194	if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
195	{
196	/ If this block reaches any of the backedge blocks we set reachable /
197	/ If this block dominates any of the backedge blocks we set dominates /
198	bool reachable = false;
199	bool dominates = false;
200
201	for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
202	{
203	BasicBlock* backedge = tmp->flBlock;
204
205	if (!curBlk->isRunRarely())
206	{
207	reachable \|= fgReachable(curBlk, backedge);
208	dominates \|= fgDominate(curBlk, backedge);
209
210	if (dominates && reachable)
211	{
212	break;
213	}
214	}
215	}
216
217	if (reachable)
218	{
219	noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
220
221	unsigned weight;
222
223	if (curBlk->hasProfileWeight())
224	{
225	// We have real profile weights, so we aren't going to change this blocks weight
226	weight = curBlk->bbWeight;
227	}
228	else
229	{
230	if (dominates)
231	{
232	weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
233	}
234	else
235	{
236	weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / `2`);
237	}
238
239	//
240	// The multiplication may have caused us to overflow
241	//
242	if (weight < curBlk->bbWeight)
243	{
244	// The multiplication caused us to overflow
245	weight = BB_MAX_WEIGHT;
246	}
247	//
248	// Set the new weight
249	//
250	curBlk->modifyBBWeight(weight);
251	}
252	#ifdef DEBUG
253	if (verbose)
254	{
255	printf("\n " FMT_BB "(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
256	}
257	#endif
258	}
259	}
260
261	/ Stop if we've reached the last block in the loop /
262
263	if (curBlk == endBlk)
264	{
265	break;
266	}
267
268	curBlk = curBlk->bbNext;
269
270	/ If we are excluding the endBlk then stop if we've reached endBlk /
271
272	if (excludeEndBlk && (curBlk == endBlk))
273	{
274	break;
275	}
276	}
277	}
278
279	/*****************************************************************************
280	*
281	* Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
282	*/
283
284	void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
285	{
286	/ A set of blocks that were previously marked as a loop are now*
287	to be unmarked, since we have decided that for some reason this
288	loop no longer exists.
289	Basically we are just reseting the blocks bbWeight to their
290	previous values.
291	*/
292
293	noway_assert(begBlk->bbNum <= endBlk->bbNum);
294	noway_assert(begBlk->isLoopHead());
295
296	noway_assert(!opts.MinOpts());
297
298	BasicBlock* curBlk;
299	unsigned backEdgeCount = `0`;
300
301	for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
302	{
303	curBlk = pred->flBlock;
304
305	/ is this a backward edge? (from curBlk to begBlk) /
306
307	if (begBlk->bbNum > curBlk->bbNum)
308	{
309	continue;
310	}
311
312	/ We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops /
313
314	if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
315	{
316	continue;
317	}
318
319	backEdgeCount++;
320	}
321
322	/ Only unmark the loop blocks if we have exactly one loop back edge /
323	if (backEdgeCount != `1`)
324	{
325	#ifdef DEBUG
326	if (verbose)
327	{
328	if (backEdgeCount > `0`)
329	{
330	printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
331	}
332	else if (backEdgeCount == `0`)
333	{
334	printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
335	}
336	}
337	#endif
338	return;
339	}
340	noway_assert(backEdgeCount == `1`);
341	noway_assert(fgReachable(begBlk, endBlk));
342
343	#ifdef DEBUG
344	if (verbose)
345	{
346	printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
347	}
348	#endif
349
350	curBlk = begBlk;
351	while (true)
352	{
353	noway_assert(curBlk);
354
355	// For curBlk to be part of a loop that starts at begBlk
356	// curBlk must be reachable from begBlk and (since this is a loop)
357	// likewise begBlk must be reachable from curBlk.
358	//
359	if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
360	{
361	unsigned weight = curBlk->bbWeight;
362
363	// Don't unmark blocks that are set to BB_MAX_WEIGHT
364	// Don't unmark blocks when we are using profile weights
365	//
366	if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight())
367	{
368	if (!fgDominate(curBlk, endBlk))
369	{
370	weight *= `2`;
371	}
372	else
373	{
374	/ Merging of blocks can disturb the Dominates*
375	information (see RAID #46649) /*
376	if (weight < BB_LOOP_WEIGHT)
377	{
378	weight *= `2`;
379	}
380	}
381
382	// We can overflow here so check for it
383	if (weight < curBlk->bbWeight)
384	{
385	weight = BB_MAX_WEIGHT;
386	}
387
388	assert(weight >= BB_LOOP_WEIGHT);
389
390	curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
391	}
392
393	#ifdef DEBUG
394	if (verbose)
395	{
396	printf("\n " FMT_BB "(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
397	}
398	#endif
399	}
400	/ Stop if we've reached the last block in the loop /
401
402	if (curBlk == endBlk)
403	{
404	break;
405	}
406
407	curBlk = curBlk->bbNext;
408
409	/ Stop if we go past the last block in the loop, as it may have been deleted /
410	if (curBlk->bbNum > endBlk->bbNum)
411	{
412	break;
413	}
414	}
415	}
416
417	/*****************************************************************************************************
418	*
419	* Function called to update the loop table and bbWeight before removing a block
420	*/
421
422	void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
423	{
424	if (!optLoopsMarked)
425	{
426	return;
427	}
428
429	noway_assert(!opts.MinOpts());
430
431	bool removeLoop = false;
432
433	/ If an unreachable block was part of a loop entry or bottom then the loop is unreachable /
434	/ Special case: the block was the head of a loop - or pointing to a loop entry /
435
436	for (unsigned loopNum = `0`; loopNum < optLoopCount; loopNum++)
437	{
438	/ Some loops may have been already removed by*
439	* loop unrolling or conditional folding */
440
441	if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
442	{
443	continue;
444	}
445
446	if (block == optLoopTable[loopNum].lpEntry \|\| block == optLoopTable[loopNum].lpBottom)
447	{
448	optLoopTable[loopNum].lpFlags \|= LPFLG_REMOVED;
449	continue;
450	}
451
452	#ifdef DEBUG
453	if (verbose)
454	{
455	printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
456	optPrintLoopInfo(loopNum);
457	}
458	#endif
459
460	/ If the loop is still in the table*
461	* any block in the loop must be reachable !!! */
462
463	noway_assert(optLoopTable[loopNum].lpEntry != block);
464	noway_assert(optLoopTable[loopNum].lpBottom != block);
465
466	if (optLoopTable[loopNum].lpExit == block)
467	{
468	optLoopTable[loopNum].lpExit = nullptr;
469	optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
470	;
471	}
472
473	/ If this points to the actual entry in the loop*
474	* then the whole loop may become unreachable */
475
476	switch (block->bbJumpKind)
477	{
478	unsigned jumpCnt;
479	BasicBlock** jumpTab;
480
481	case BBJ_NONE:
482	case BBJ_COND:
483	if (block->bbNext == optLoopTable[loopNum].lpEntry)
484	{
485	removeLoop = true;
486	break;
487	}
488	if (block->bbJumpKind == BBJ_NONE)
489	{
490	break;
491	}
492
493	__fallthrough;
494
495	case BBJ_ALWAYS:
496	noway_assert(block->bbJumpDest);
497	if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
498	{
499	removeLoop = true;
500	}
501	break;
502
503	case BBJ_SWITCH:
504	jumpCnt = block->bbJumpSwt->bbsCount;
505	jumpTab = block->bbJumpSwt->bbsDstTab;
506
507	do
508	{
509	noway_assert(*jumpTab);
510	if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
511	{
512	removeLoop = true;
513	}
514	} while (++jumpTab, --jumpCnt);
515	break;
516
517	default:
518	break;
519	}
520
521	if (removeLoop)
522	{
523	/ Check if the entry has other predecessors outside the loop*
524	* TODO: Replace this when predecessors are available */
525
526	BasicBlock* auxBlock;
527	for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
528	{
529	/ Ignore blocks in the loop /
530
531	if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
532	auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
533	{
534	continue;
535	}
536
537	switch (auxBlock->bbJumpKind)
538	{
539	unsigned jumpCnt;
540	BasicBlock** jumpTab;
541
542	case BBJ_NONE:
543	case BBJ_COND:
544	if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
545	{
546	removeLoop = false;
547	break;
548	}
549	if (auxBlock->bbJumpKind == BBJ_NONE)
550	{
551	break;
552	}
553
554	__fallthrough;
555
556	case BBJ_ALWAYS:
557	noway_assert(auxBlock->bbJumpDest);
558	if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
559	{
560	removeLoop = false;
561	}
562	break;
563
564	case BBJ_SWITCH:
565	jumpCnt = auxBlock->bbJumpSwt->bbsCount;
566	jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
567
568	do
569	{
570	noway_assert(*jumpTab);
571	if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
572	{
573	removeLoop = false;
574	}
575	} while (++jumpTab, --jumpCnt);
576	break;
577
578	default:
579	break;
580	}
581	}
582
583	if (removeLoop)
584	{
585	optLoopTable[loopNum].lpFlags \|= LPFLG_REMOVED;
586	}
587	}
588	else if (optLoopTable[loopNum].lpHead == block)
589	{
590	/ The loop has a new head - Just update the loop table /
591	optLoopTable[loopNum].lpHead = block->bbPrev;
592	}
593
594	#ifdef DEBUG
595	if (verbose)
596	{
597	printf("\nUpdateLoopsBeforeRemoveBlock After: ");
598	optPrintLoopInfo(loopNum);
599	}
600	#endif
601	}
602
603	if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) \|\| (block->bbJumpKind == BBJ_COND)) &&
604	(block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
605	(fgCurBBEpochSize == fgDomBBcount + `1`) && fgReachable(block->bbJumpDest, block))
606	{
607	optUnmarkLoopBlocks(block->bbJumpDest, block);
608	}
609	}
610
611	#ifdef DEBUG
612
613	/*****************************************************************************
614	*
615	* Given the beginBlock of the loop, return the index of this loop
616	* to the loop table.
617	*/
618
619	unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
620	{
621	unsigned lnum = `0`;
622
623	for (lnum = `0`; lnum < optLoopCount; lnum++)
624	{
625	if (optLoopTable[lnum].lpHead->bbNext == begBlk)
626	{
627	// Found the loop.
628	return lnum;
629	}
630	}
631
632	noway_assert(!"Loop number not found.");
633
634	return optLoopCount;
635	}
636
637	/*****************************************************************************
638	*
639	* Print loop info in an uniform way.
640	*/
641
642	void Compiler::optPrintLoopInfo(unsigned loopInd,
643	BasicBlock* lpHead,
644	BasicBlock* lpFirst,
645	BasicBlock* lpTop,
646	BasicBlock* lpEntry,
647	BasicBlock* lpBottom,
648	unsigned char lpExitCnt,
649	BasicBlock* lpExit,
650	unsigned parentLoop)
651	{
652	noway_assert(lpHead);
653
654	//
655	// NOTE: we take "loopInd" as an argument instead of using the one
656	// stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
657	// has not be set correctly. For example, in optRecordLoop().
658	// However, in most of the cases, loops should have been recorded.
659	// Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
660	// version of this method.
661	//
662	printf("L%02u, from " FMT_BB, loopInd, lpFirst->bbNum);
663	if (lpTop != lpFirst)
664	{
665	printf(" (loop top is " FMT_BB ")", lpTop->bbNum);
666	}
667
668	printf(" to " FMT_BB " (Head=" FMT_BB ", Entry=" FMT_BB ", ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum,
669	lpEntry->bbNum, lpExitCnt);
670
671	if (lpExitCnt == `1`)
672	{
673	printf(" at " FMT_BB, lpExit->bbNum);
674	}
675
676	if (parentLoop != BasicBlock::NOT_IN_LOOP)
677	{
678	printf(", parent loop = L%02u", parentLoop);
679	}
680	printf(")");
681	}
682
683	/*****************************************************************************
684	*
685	* Print loop information given the index of the loop in the loop table.
686	*/
687
688	void Compiler::optPrintLoopInfo(unsigned lnum)
689	{
690	noway_assert(lnum < optLoopCount);
691
692	LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
693
694	optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
695	ldsc->lpExit, ldsc->lpParent);
696	}
697
698	#endif
699
700	//------------------------------------------------------------------------
701	// optPopulateInitInfo: Populate loop init info in the loop table.
702	//
703	// Arguments:
704	// init - the tree that is supposed to initialize the loop iterator.
705	// iterVar - loop iteration variable.
706	//
707	// Return Value:
708	// "false" if the loop table could not be populated with the loop iterVar init info.
709	//
710	// Operation:
711	// The 'init' tree is checked if its lhs is a local and rhs is either
712	// a const or a local.
713	//
714	bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTree* init, unsigned iterVar)
715	{
716	// Operator should be =
717	if (init->gtOper != GT_ASG)
718	{
719	return false;
720	}
721
722	GenTree* lhs = init->gtOp.gtOp1;
723	GenTree* rhs = init->gtOp.gtOp2;
724	// LHS has to be local and should equal iterVar.
725	if (lhs->gtOper != GT_LCL_VAR \|\| lhs->gtLclVarCommon.gtLclNum != iterVar)
726	{
727	return false;
728	}
729
730	// RHS can be constant or local var.
731	// TODO-CQ: CLONE: Add arr length for descending loops.
732	if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
733	{
734	optLoopTable[loopInd].lpFlags \|= LPFLG_CONST_INIT;
735	optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
736	}
737	else if (rhs->gtOper == GT_LCL_VAR)
738	{
739	optLoopTable[loopInd].lpFlags \|= LPFLG_VAR_INIT;
740	optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
741	}
742	else
743	{
744	return false;
745	}
746	return true;
747	}
748
749	//----------------------------------------------------------------------------------
750	// optCheckIterInLoopTest: Check if iter var is used in loop test.
751	//
752	// Arguments:
753	// test "jtrue" tree or an asg of the loop iter termination condition
754	// from/to blocks (beg, end) which are part of the loop.
755	// iterVar loop iteration variable.
756	// loopInd loop index.
757	//
758	// Operation:
759	// The test tree is parsed to check if "iterVar" matches the lhs of the condition
760	// and the rhs limit is extracted from the "test" tree. The limit information is
761	// added to the loop table.
762	//
763	// Return Value:
764	// "false" if the loop table could not be populated with the loop test info or
765	// if the test condition doesn't involve iterVar.
766	//
767	bool Compiler::optCheckIterInLoopTest(
768	unsigned loopInd, GenTree* test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
769	{
770	// Obtain the relop from the "test" tree.
771	GenTree* relop;
772	if (test->gtOper == GT_JTRUE)
773	{
774	relop = test->gtGetOp1();
775	}
776	else
777	{
778	assert(test->gtOper == GT_ASG);
779	relop = test->gtGetOp2();
780	}
781
782	noway_assert(relop->OperKind() & GTK_RELOP);
783
784	GenTree* opr1 = relop->gtOp.gtOp1;
785	GenTree* opr2 = relop->gtOp.gtOp2;
786
787	GenTree* iterOp;
788	GenTree* limitOp;
789
790	// Make sure op1 or op2 is the iterVar.
791	if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
792	{
793	iterOp = opr1;
794	limitOp = opr2;
795	}
796	else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
797	{
798	iterOp = opr2;
799	limitOp = opr1;
800	}
801	else
802	{
803	return false;
804	}
805
806	if (iterOp->gtType != TYP_INT)
807	{
808	return false;
809	}
810
811	// Mark the iterator node.
812	iterOp->gtFlags \|= GTF_VAR_ITERATOR;
813
814	// Check what type of limit we have - constant, variable or arr-len.
815	if (limitOp->gtOper == GT_CNS_INT)
816	{
817	optLoopTable[loopInd].lpFlags \|= LPFLG_CONST_LIMIT;
818	if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != `0`)
819	{
820	optLoopTable[loopInd].lpFlags \|= LPFLG_SIMD_LIMIT;
821	}
822	}
823	else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
824	{
825	optLoopTable[loopInd].lpFlags \|= LPFLG_VAR_LIMIT;
826	}
827	else if (limitOp->gtOper == GT_ARR_LENGTH)
828	{
829	optLoopTable[loopInd].lpFlags \|= LPFLG_ARRLEN_LIMIT;
830	}
831	else
832	{
833	return false;
834	}
835	// Save the type of the comparison between the iterator and the limit.
836	optLoopTable[loopInd].lpTestTree = relop;
837	return true;
838	}
839
840	//----------------------------------------------------------------------------------
841	// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
842	//
843	// Arguments:
844	// incr The incr tree to be checked. Whether incr tree is
845	// oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
846	//
847	// Operation:
848	// The test tree is parsed to check if "iterVar" matches the lhs of the condition
849	// and the rhs limit is extracted from the "test" tree. The limit information is
850	// added to the loop table.
851	//
852	// Return Value:
853	// iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
854	//
855	unsigned Compiler::optIsLoopIncrTree(GenTree* incr)
856	{
857	GenTree* incrVal;
858	genTreeOps updateOper;
859	unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
860	if (iterVar != BAD_VAR_NUM)
861	{
862	// We have v = v op y type asg node.
863	switch (updateOper)
864	{
865	case GT_ADD:
866	case GT_SUB:
867	case GT_MUL:
868	case GT_RSH:
869	case GT_LSH:
870	break;
871	default:
872	return BAD_VAR_NUM;
873	}
874
875	// Increment should be by a const int.
876	// TODO-CQ: CLONE: allow variable increments.
877	if ((incrVal->gtOper != GT_CNS_INT) \|\| (incrVal->TypeGet() != TYP_INT))
878	{
879	return BAD_VAR_NUM;
880	}
881	}
882
883	return iterVar;
884	}
885
886	//----------------------------------------------------------------------------------
887	// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
888	//
889	// Arguments:
890	// from, to - are blocks (beg, end) which are part of the loop.
891	// incr - tree that increments the loop iterator. v+=1 or v=v+1.
892	// pIterVar - see return value.
893	//
894	// Return Value:
895	// Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
896	// false.
897	//
898	// Operation:
899	// Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
900	// assigned in the loop.
901	//
902	bool Compiler::optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
903	{
904
905	unsigned iterVar = optIsLoopIncrTree(incr);
906	if (iterVar == BAD_VAR_NUM)
907	{
908	return false;
909	}
910	if (optIsVarAssigned(from, to, incr, iterVar))
911	{
912	JITDUMP("iterVar is assigned in loop\n");
913	return false;
914	}
915
916	*pIterVar = iterVar;
917	return true;
918	}
919
920	//----------------------------------------------------------------------------------
921	// optIsLoopTestEvalIntoTemp:
922	// Pattern match if the test tree is computed into a tmp
923	// and the "tmp" is used as jump condition for loop termination.
924	//
925	// Arguments:
926	// testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
927	// where Vtmp contains the actual loop test result.
928	// newStmt - contains the statement that is the actual test stmt involving
929	// the loop iterator.
930	//
931	// Return Value:
932	// Returns true if a new test tree can be obtained.
933	//
934	// Operation:
935	// Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
936	// Then returns the rhs for def of Vtmp as the "test" node.
937	//
938	// Note:
939	// This method just retrieves what it thinks is the "test" node,
940	// the callers are expected to verify that "iterVar" is used in the test.
941	//
942	bool Compiler::optIsLoopTestEvalIntoTemp(GenTree* testStmt, GenTree** newTest)
943	{
944	GenTree* test = testStmt->gtStmt.gtStmtExpr;
945
946	if (test->gtOper != GT_JTRUE)
947	{
948	return false;
949	}
950
951	GenTree* relop = test->gtGetOp1();
952	noway_assert(relop->OperIsCompare());
953
954	GenTree* opr1 = relop->gtOp.gtOp1;
955	GenTree* opr2 = relop->gtOp.gtOp2;
956
957	// Make sure we have jtrue (vtmp != 0)
958	if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
959	opr2->IsIntegralConst(`0`))
960	{
961	// Get the previous statement to get the def (rhs) of Vtmp to see
962	// if the "test" is evaluated into Vtmp.
963	GenTree* prevStmt = testStmt->gtPrev;
964	if (prevStmt == nullptr)
965	{
966	return false;
967	}
968
969	GenTree* tree = prevStmt->gtStmt.gtStmtExpr;
970	if (tree->OperGet() == GT_ASG)
971	{
972	GenTree* lhs = tree->gtOp.gtOp1;
973	GenTree* rhs = tree->gtOp.gtOp2;
974
975	// Return as the new test node.
976	if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
977	{
978	if (rhs->OperIsCompare())
979	{
980	*newTest = prevStmt;
981	return true;
982	}
983	}
984	}
985	}
986	return false;
987	}
988
989	//----------------------------------------------------------------------------------
990	// optExtractInitTestIncr:
991	// Extract the "init", "test" and "incr" nodes of the loop.
992	//
993	// Arguments:
994	// head - Loop head block
995	// bottom - Loop bottom block
996	// top - Loop top block
997	// ppInit - The init stmt of the loop if found.
998	// ppTest - The test stmt of the loop if found.
999	// ppIncr - The incr stmt of the loop if found.
1000	//
1001	// Return Value:
1002	// The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1003	// returns true. Returns false if the information can't be extracted.
1004	//
1005	// Operation:
1006	// Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1007	// "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1008	// "test" to get the "incr" stmt. If it is not found it could be a loop of the
1009	// below form.
1010	//
1011	// +-------<-----------------<-----------+
1012	// \| \|
1013	// v \|
1014	// BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1015	//
1016	// Check if the "incr" tree is present in the loop "top" node as the last stmt.
1017	// Also check if the "test" tree is assigned to a tmp node and the tmp is used
1018	// in the jtrue condition.
1019	//
1020	// Note:
1021	// This method just retrieves what it thinks is the "test" node,
1022	// the callers are expected to verify that "iterVar" is used in the test.
1023	//
1024	bool Compiler::optExtractInitTestIncr(
1025	BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTree ppInit, GenTree ppTest, GenTree** ppIncr)
1026	{
1027	assert(ppInit != nullptr);
1028	assert(ppTest != nullptr);
1029	assert(ppIncr != nullptr);
1030
1031	// Check if last two statements in the loop body are the increment of the iterator
1032	// and the loop termination test.
1033	noway_assert(bottom->bbTreeList != nullptr);
1034	GenTree* test = bottom->bbTreeList->gtPrev;
1035	noway_assert(test != nullptr && test->gtNext == nullptr);
1036
1037	GenTree* newTest;
1038	if (optIsLoopTestEvalIntoTemp(test, &newTest))
1039	{
1040	test = newTest;
1041	}
1042
1043	// Check if we have the incr tree before the test tree, if we don't,
1044	// check if incr is part of the loop "top".
1045	GenTree* incr = test->gtPrev;
1046	if (incr == nullptr \|\| optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1047	{
1048	if (top == nullptr \|\| top->bbTreeList == nullptr \|\| top->bbTreeList->gtPrev == nullptr)
1049	{
1050	return false;
1051	}
1052
1053	// If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1054	GenTree* topLast = top->bbTreeList->gtPrev;
1055	if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1056	{
1057	incr = topLast;
1058	}
1059	else
1060	{
1061	return false;
1062	}
1063	}
1064
1065	assert(test != incr);
1066
1067	// Find the last statement in the loop pre-header which we expect to be the initialization of
1068	// the loop iterator.
1069	GenTree* phdr = head->bbTreeList;
1070	if (phdr == nullptr)
1071	{
1072	return false;
1073	}
1074
1075	GenTree* init = phdr->gtPrev;
1076	noway_assert(init != nullptr && (init->gtNext == nullptr));
1077
1078	// If it is a duplicated loop condition, skip it.
1079	if (init->gtFlags & GTF_STMT_CMPADD)
1080	{
1081	bool doGetPrev = true;
1082	#ifdef DEBUG
1083	if (opts.optRepeat)
1084	{
1085	// Previous optimization passes may have inserted compiler-generated
1086	// statements other than duplicated loop conditions.
1087	doGetPrev = (init->gtPrev != nullptr);
1088	}
1089	else
1090	{
1091	// Must be a duplicated loop condition.
1092	noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1093	}
1094	#endif // DEBUG
1095	if (doGetPrev)
1096	{
1097	init = init->gtPrev;
1098	}
1099	noway_assert(init != nullptr);
1100	}
1101
1102	noway_assert(init->gtOper == GT_STMT);
1103	noway_assert(test->gtOper == GT_STMT);
1104	noway_assert(incr->gtOper == GT_STMT);
1105
1106	*ppInit = init->gtStmt.gtStmtExpr;
1107	*ppTest = test->gtStmt.gtStmtExpr;
1108	*ppIncr = incr->gtStmt.gtStmtExpr;
1109
1110	return true;
1111	}
1112
1113	/*****************************************************************************
1114	*
1115	* Record the loop in the loop table. Return true if successful, false if
1116	* out of entries in loop table.
1117	*/
1118
1119	bool Compiler::optRecordLoop(BasicBlock* head,
1120	BasicBlock* first,
1121	BasicBlock* top,
1122	BasicBlock* entry,
1123	BasicBlock* bottom,
1124	BasicBlock* exit,
1125	unsigned char exitCnt)
1126	{
1127	// Record this loop in the table, if there's room.
1128
1129	assert(optLoopCount <= MAX_LOOP_NUM);
1130	if (optLoopCount == MAX_LOOP_NUM)
1131	{
1132	#if COUNT_LOOPS
1133	loopOverflowThisMethod = true;
1134	#endif
1135	return false;
1136	}
1137
1138	// Assumed preconditions on the loop we're adding.
1139	assert(first->bbNum <= top->bbNum);
1140	assert(top->bbNum <= entry->bbNum);
1141	assert(entry->bbNum <= bottom->bbNum);
1142	assert(head->bbNum < top->bbNum \|\| head->bbNum > bottom->bbNum);
1143
1144	unsigned char loopInd = optLoopCount;
1145
1146	if (optLoopTable == nullptr)
1147	{
1148	assert(loopInd == `0`);
1149	optLoopTable = getAllocator(CMK_LoopOpt).allocate<LoopDsc>(MAX_LOOP_NUM);
1150	}
1151	else
1152	{
1153	// If the new loop contains any existing ones, add it in the right place.
1154	for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > `0`; prevPlus1--)
1155	{
1156	unsigned char prev = prevPlus1 - `1`;
1157	if (optLoopTable[prev].lpContainedBy(first, bottom))
1158	{
1159	loopInd = prev;
1160	}
1161	}
1162	// Move up any loops if necessary.
1163	for (unsigned j = optLoopCount; j > loopInd; j--)
1164	{
1165	optLoopTable[j] = optLoopTable[j - `1`];
1166	}
1167	}
1168
1169	#ifdef DEBUG
1170	for (unsigned i = loopInd + `1`; i < optLoopCount; i++)
1171	{
1172	// The loop is well-formed.
1173	assert(optLoopTable[i].lpWellFormed());
1174	// Check for disjoint.
1175	if (optLoopTable[i].lpDisjoint(first, bottom))
1176	{
1177	continue;
1178	}
1179	// Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1180	assert(optLoopTable[i].lpContainedBy(first, bottom));
1181	}
1182	#endif // DEBUG
1183
1184	optLoopTable[loopInd].lpHead = head;
1185	optLoopTable[loopInd].lpFirst = first;
1186	optLoopTable[loopInd].lpTop = top;
1187	optLoopTable[loopInd].lpBottom = bottom;
1188	optLoopTable[loopInd].lpEntry = entry;
1189	optLoopTable[loopInd].lpExit = exit;
1190	optLoopTable[loopInd].lpExitCnt = exitCnt;
1191
1192	optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1193	optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1194	optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1195
1196	optLoopTable[loopInd].lpAsgVars = AllVarSetOps::UninitVal();
1197
1198	optLoopTable[loopInd].lpFlags = `0`;
1199
1200	// We haven't yet recorded any side effects.
1201	for (MemoryKind memoryKind : allMemoryKinds ())
1202	{
1203	optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false;
1204	}
1205	optLoopTable[loopInd].lpFieldsModified = nullptr;
1206	optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1207
1208	// If DO-WHILE loop mark it as such.
1209	if (head->bbNext == entry)
1210	{
1211	optLoopTable[loopInd].lpFlags \|= LPFLG_DO_WHILE;
1212	}
1213
1214	// If single exit loop mark it as such.
1215	if (exitCnt == `1`)
1216	{
1217	noway_assert(exit);
1218	optLoopTable[loopInd].lpFlags \|= LPFLG_ONE_EXIT;
1219	}
1220
1221	//
1222	// Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1223	// We have the following restrictions:
1224	// 1. The loop condition must be a simple one i.e. only one JTRUE node
1225	// 2. There must be a loop iterator (a local var) that is
1226	// incremented (decremented or lsh, rsh, mul) with a constant value
1227	// 3. The iterator is incremented exactly once
1228	// 4. The loop condition must use the iterator.
1229	//
1230	if (bottom->bbJumpKind == BBJ_COND)
1231	{
1232	GenTree* init;
1233	GenTree* test;
1234	GenTree* incr;
1235	if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1236	{
1237	goto DONE_LOOP;
1238	}
1239
1240	unsigned iterVar = BAD_VAR_NUM;
1241	if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1242	{
1243	goto DONE_LOOP;
1244	}
1245
1246	// Make sure the "iterVar" initialization is never skipped,
1247	// i.e. every pred of ENTRY other than HEAD is in the loop.
1248	for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1249	{
1250	BasicBlock* predBlock = predEdge->flBlock;
1251	if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1252	{
1253	goto DONE_LOOP;
1254	}
1255	}
1256
1257	if (!optPopulateInitInfo(loopInd, init, iterVar))
1258	{
1259	goto DONE_LOOP;
1260	}
1261
1262	// Check that the iterator is used in the loop condition.
1263	if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1264	{
1265	goto DONE_LOOP;
1266	}
1267
1268	// We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1269	// Record the iterator, the pointer to the test node
1270	// and the initial value of the iterator (constant or local var)
1271	optLoopTable[loopInd].lpFlags \|= LPFLG_ITER;
1272
1273	// Record iterator.
1274	optLoopTable[loopInd].lpIterTree = incr;
1275
1276	#if COUNT_LOOPS
1277	// Save the initial value of the iterator - can be lclVar or constant
1278	// Flag the loop accordingly.
1279
1280	iterLoopCount++;
1281	#endif
1282
1283	#if COUNT_LOOPS
1284	simpleTestLoopCount++;
1285	#endif
1286
1287	// Check if a constant iteration loop.
1288	if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1289	{
1290	// This is a constant loop.
1291	optLoopTable[loopInd].lpFlags \|= LPFLG_CONST;
1292	#if COUNT_LOOPS
1293	constIterLoopCount++;
1294	#endif
1295	}
1296
1297	#ifdef DEBUG
1298	if (verbose && `0`)
1299	{
1300	printf("\nConstant loop initializer:\n");
1301	gtDispTree(init);
1302
1303	printf("\nConstant loop body:\n");
1304
1305	BasicBlock* block = head;
1306	do
1307	{
1308	block = block->bbNext;
1309	for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1310	{
1311	if (stmt->gtStmt.gtStmtExpr == incr)
1312	{
1313	break;
1314	}
1315	printf("\n");
1316	gtDispTree(stmt->gtStmt.gtStmtExpr);
1317	}
1318	} while (block != bottom);
1319	}
1320	#endif // DEBUG
1321	}
1322
1323	DONE_LOOP:
1324	DBEXEC(verbose, optPrintLoopRecording(loopInd));
1325	optLoopCount++;
1326	return true;
1327	}
1328
1329	#ifdef DEBUG
1330	//------------------------------------------------------------------------
1331	// optPrintLoopRecording: Print a recording of the loop.
1332	//
1333	// Arguments:
1334	// loopInd - loop index.
1335	//
1336	void Compiler::optPrintLoopRecording(unsigned loopInd)
1337	{
1338	printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1339	optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1340	optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1341	optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1342	optLoopTable[loopInd].lpExit);
1343
1344	// If an iterator loop print the iterator and the initialization.
1345	if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1346	{
1347	printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1348	printf(" (");
1349	printf(GenTree::OpName(optLoopTable[loopInd].lpIterOper()));
1350	printf(" ");
1351	printf("%d )", optLoopTable[loopInd].lpIterConst());
1352
1353	if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1354	{
1355	printf(" from %d", optLoopTable[loopInd].lpConstInit);
1356	}
1357	if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1358	{
1359	printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1360	}
1361
1362	// If a simple test condition print operator and the limits /*
1363	printf(GenTree::OpName(optLoopTable[loopInd].lpTestOper()));
1364
1365	if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1366	{
1367	printf("%d ", optLoopTable[loopInd].lpConstLimit());
1368	}
1369
1370	if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1371	{
1372	printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1373	}
1374
1375	printf("]");
1376	}
1377
1378	printf("\n");
1379	}
1380
1381	void Compiler::optCheckPreds()
1382	{
1383	BasicBlock* block;
1384	BasicBlock* blockPred;
1385	flowList* pred;
1386
1387	for (block = fgFirstBB; block; block = block->bbNext)
1388	{
1389	for (pred = block->bbPreds; pred; pred = pred->flNext)
1390	{
1391	// make sure this pred is part of the BB list
1392	for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1393	{
1394	if (blockPred == pred->flBlock)
1395	{
1396	break;
1397	}
1398	}
1399	noway_assert(blockPred);
1400	switch (blockPred->bbJumpKind)
1401	{
1402	case BBJ_COND:
1403	if (blockPred->bbJumpDest == block)
1404	{
1405	break;
1406	}
1407	__fallthrough;
1408	case BBJ_NONE:
1409	noway_assert(blockPred->bbNext == block);
1410	break;
1411	case BBJ_EHFILTERRET:
1412	case BBJ_ALWAYS:
1413	case BBJ_EHCATCHRET:
1414	noway_assert(blockPred->bbJumpDest == block);
1415	break;
1416	default:
1417	break;
1418	}
1419	}
1420	}
1421	}
1422
1423	#endif // DEBUG
1424
1425	namespace
1426	{
1427	//------------------------------------------------------------------------
1428	// LoopSearch: Class that handles scanning a range of blocks to detect a loop,
1429	// moving blocks to make the loop body contiguous, and recording
1430	// the loop.
1431	//
1432	// We will use the following terminology:
1433	// HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1434	// Not part of the looping of the loop.
1435	// FIRST - the lexically first basic block (in bbNext order) within this loop.
1436	// TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1437	// BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1438	// EXIT - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr
1439	// ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1440	//
1441	// We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1442	// When the loop is identified, blocks will be moved out to make it a compact contiguous region if possible,
1443	// and in cases where compaction is not possible, we'll subsequently treat all blocks in the lexical range
1444	// between TOP and BOTTOM as part of the loop even if they aren't part of the SCC.
1445	// Regarding nesting: Since a given block can only have one back-edge (we only detect loops with back-edges
1446	// from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM. Two loops may share the
1447	// same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write
1448	// the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore.
1449	//
1450	// \|
1451	// v
1452	// head
1453	// \|
1454	// \| top/first <--+
1455	// \| \| \|
1456	// \| ... \|
1457	// \| \| \|
1458	// \| v \|
1459	// +---> entry \|
1460	// \| \|
1461	// ... \|
1462	// \| \|
1463	// v \|
1464	// +-- exit/tail \|
1465	// \| \| \|
1466	// \| ... \|
1467	// \| \| \|
1468	// \| v \|
1469	// \| bottom ---+
1470	// \|
1471	// +------+
1472	// \|
1473	// v
1474	//
1475	class LoopSearch
1476	{
1477
1478	// Keeping track of which blocks are in the loop requires two block sets since we may add blocks
1479	// as we go but the BlockSet type's max ID doesn't increase to accommodate them. Define a helper
1480	// struct to make the ensuing code more readable.
1481	struct LoopBlockSet
1482	{
1483	private:
1484	// Keep track of blocks with bbNum <= oldBlockMaxNum in a regular BlockSet, since
1485	// it can hold all of them.
1486	BlockSet oldBlocksInLoop; // Blocks with bbNum <= oldBlockMaxNum
1487
1488	// Keep track of blocks with bbNum > oldBlockMaxNum in a separate BlockSet, but
1489	// indexing them by (blockNum - oldBlockMaxNum); since we won't generate more than
1490	// one new block per old block, this must be sufficient to track any new blocks.
1491	BlockSet newBlocksInLoop; // Blocks with bbNum > oldBlockMaxNum
1492
1493	Compiler* comp;
1494	unsigned int oldBlockMaxNum;
1495
1496	public:
1497	LoopBlockSet(Compiler* comp)
1498	: oldBlocksInLoop(BlockSetOps::UninitVal())
1499	, newBlocksInLoop(BlockSetOps::UninitVal())
1500	, comp(comp)
1501	, oldBlockMaxNum(comp->fgBBNumMax)
1502	{
1503	}
1504
1505	void Reset(unsigned int seedBlockNum)
1506	{
1507	if (BlockSetOps::MayBeUninit(oldBlocksInLoop))
1508	{
1509	// Either the block sets are uninitialized (and long), so we need to initialize
1510	// them (and allocate their backing storage), or they are short and empty, so
1511	// assigning MakeEmpty to them is as cheap as ClearD.
1512	oldBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1513	newBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1514	}
1515	else
1516	{
1517	// We know the backing storage is already allocated, so just clear it.
1518	BlockSetOps::ClearD(comp, oldBlocksInLoop);
1519	BlockSetOps::ClearD(comp, newBlocksInLoop);
1520	}
1521	assert(seedBlockNum <= oldBlockMaxNum);
1522	BlockSetOps::AddElemD(comp, oldBlocksInLoop, seedBlockNum);
1523	}
1524
1525	bool CanRepresent(unsigned int blockNum)
1526	{
1527	// We can represent old blocks up to oldBlockMaxNum, and
1528	// new blocks up to 2 oldBlockMaxNum.*
1529	return (blockNum <= `2` * oldBlockMaxNum);
1530	}
1531
1532	bool IsMember(unsigned int blockNum)
1533	{
1534	if (blockNum > oldBlockMaxNum)
1535	{
1536	return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1537	}
1538	return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum);
1539	}
1540
1541	void Insert(unsigned int blockNum)
1542	{
1543	if (blockNum > oldBlockMaxNum)
1544	{
1545	BlockSetOps::AddElemD(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1546	}
1547	else
1548	{
1549	BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1550	}
1551	}
1552
1553	bool TestAndInsert(unsigned int blockNum)
1554	{
1555	if (blockNum > oldBlockMaxNum)
1556	{
1557	unsigned int shiftedNum = blockNum - oldBlockMaxNum;
1558	if (!BlockSetOps::IsMember(comp, newBlocksInLoop, shiftedNum))
1559	{
1560	BlockSetOps::AddElemD(comp, newBlocksInLoop, shiftedNum);
1561	return false;
1562	}
1563	}
1564	else
1565	{
1566	if (!BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum))
1567	{
1568	BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1569	return false;
1570	}
1571	}
1572	return true;
1573	}
1574	};
1575
1576	LoopBlockSet loopBlocks; // Set of blocks identified as part of the loop
1577	Compiler* comp;
1578
1579	// See LoopSearch class comment header for a diagram relating these fields:
1580	BasicBlock* head; // Predecessor of unique entry edge
1581	BasicBlock* first; // Lexically first in-loop block
1582	BasicBlock* top; // Successor of back-edge from BOTTOM
1583	BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block
1584	BasicBlock* entry; // Successor of unique entry edge
1585
1586	BasicBlock* lastExit; // Most recently discovered exit block
1587	unsigned char exitCount; // Number of discovered exit edges
1588	unsigned int oldBlockMaxNum; // Used to identify new blocks created during compaction
1589	BlockSet bottomBlocks; // BOTTOM blocks of already-recorded loops
1590	#ifdef DEBUG
1591	bool forgotExit = false; // Flags a rare case where lastExit gets nulled out, for assertions
1592	#endif
1593	bool changedFlowGraph = false; // Signals that loop compaction has modified the flow graph
1594
1595	public:
1596	LoopSearch(Compiler* comp)
1597	: loopBlocks (comp), comp(comp), oldBlockMaxNum(comp->fgBBNumMax), bottomBlocks(BlockSetOps::MakeEmpty(comp))
1598	{
1599	// Make sure we've renumbered such that the bitsets can hold all the bits
1600	assert(comp->fgBBNumMax <= comp->fgCurBBEpochSize);
1601	}
1602
1603	//------------------------------------------------------------------------
1604	// RecordLoop: Notify the Compiler that a loop has been found.
1605	//
1606	// Return Value:
1607	// true - Loop successfully recorded.
1608	// false - Compiler has run out of loop descriptors; loop not recorded.
1609	//
1610	bool RecordLoop()
1611	{
1612	/ At this point we have a compact loop - record it in the loop table*
1613	* If we found only one exit, record it in the table too
1614	* (otherwise an exit = nullptr in the loop table means multiple exits) */
1615
1616	BasicBlock* onlyExit = (exitCount == `1` ? lastExit : nullptr);
1617	if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount))
1618	{
1619	// Record the BOTTOM block for future reference before returning.
1620	assert(bottom->bbNum <= oldBlockMaxNum);
1621	BlockSetOps::AddElemD(comp, bottomBlocks, bottom->bbNum);
1622	return true;
1623	}
1624
1625	// Unable to record this loop because the loop descriptor table overflowed.
1626	return false;
1627	}
1628
1629	//------------------------------------------------------------------------
1630	// ChangedFlowGraph: Determine whether loop compaction has modified the flow graph.
1631	//
1632	// Return Value:
1633	// true - The flow graph has been modified; fgUpdateChangedFlowGraph should
1634	// be called (which is the caller's responsibility).
1635	// false - The flow graph has not been modified by this LoopSearch.
1636	//
1637	bool ChangedFlowGraph()
1638	{
1639	return changedFlowGraph;
1640	}
1641
1642	//------------------------------------------------------------------------
1643	// FindLoop: Search for a loop with the given HEAD block and back-edge.
1644	//
1645	// Arguments:
1646	// head - Block to be the HEAD of any loop identified
1647	// top - Block to be the TOP of any loop identified
1648	// bottom - Block to be the BOTTOM of any loop identified
1649	//
1650	// Return Value:
1651	// true - Found a valid loop.
1652	// false - Did not find a valid loop.
1653	//
1654	// Notes:
1655	// May modify flow graph to make loop compact before returning.
1656	// Will set instance fields to track loop's extent and exits if a valid
1657	// loop is found, and potentially trash them otherwise.
1658	//
1659	bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1660	{
1661	/ Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM*
1662	* to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1663	* as the definition says, but merely an indication that we have a loop there).
1664	* Thus, we have to be very careful and after entry discovery check that it is indeed
1665	* the only place we enter the loop (especially for non-reducible flow graphs).
1666	*/
1667
1668	if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1669	{
1670	// Edge from BOTTOM to TOP is not a backward edge
1671	return false;
1672	}
1673
1674	if (bottom->bbNum > oldBlockMaxNum)
1675	{
1676	// Not a true back-edge; bottom is a block added to reconnect fall-through during
1677	// loop processing, so its block number does not reflect its position.
1678	return false;
1679	}
1680
1681	if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) \|\| (bottom->bbJumpKind == BBJ_EHFILTERRET) \|\|
1682	(bottom->bbJumpKind == BBJ_EHCATCHRET) \|\| (bottom->bbJumpKind == BBJ_CALLFINALLY) \|\|
1683	(bottom->bbJumpKind == BBJ_SWITCH))
1684	{
1685	/ BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.*
1686	* BBJ_SWITCH that has a backward jump appears only for labeled break. */
1687	return false;
1688	}
1689
1690	/ The presence of a "back edge" is an indication that a loop might be present here*
1691	*
1692	* LOOP:
1693	* 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1694	* node in the loop to any other node in the loop (wholly within the loop)
1695	* 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1696	* in the loop from outside the loop, and that is through the ENTRY
1697	*/
1698
1699	/ Let's find the loop ENTRY /
1700	BasicBlock* entry = FindEntry(head, top, bottom);
1701
1702	if (entry == nullptr)
1703	{
1704	// For now, we only recognize loops where HEAD has some successor ENTRY in the loop.
1705	return false;
1706	}
1707
1708	// Passed the basic checks; initialize instance state for this back-edge.
1709	this->head = head;
1710	this->top = top;
1711	this->entry = entry;
1712	this->bottom = bottom;
1713	this->lastExit = nullptr;
1714	this->exitCount = `0`;
1715
1716	// Now we find the "first" block -- the earliest block reachable within the loop.
1717	// With our current algorithm, this is always the same as "top".
1718	this->first = top;
1719
1720	if (!HasSingleEntryCycle())
1721	{
1722	// There isn't actually a loop between TOP and BOTTOM
1723	return false;
1724	}
1725
1726	if (!loopBlocks.IsMember(top->bbNum))
1727	{
1728	// The "back-edge" we identified isn't actually part of the flow cycle containing ENTRY
1729	return false;
1730	}
1731
1732	// Disqualify loops where the first block of the loop is less nested in EH than
1733	// the bottom block. That is, we don't want to handle loops where the back edge
1734	// goes from within an EH region to a first block that is outside that same EH
1735	// region. Note that we do handle loops where the first block is the first
1736	// block of a more nested EH region (since it is legal to branch to the first
1737	// block of an immediately more nested EH region). So, for example, disqualify
1738	// this:
1739	//
1740	// BB02
1741	// ...
1742	// try {
1743	// ...
1744	// BB10 BBJ_COND => BB02
1745	// ...
1746	// }
1747	//
1748	// Here, BB10 is more nested than BB02.
1749
1750	if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first))
1751	{
1752	JITDUMP("Loop 'first' " FMT_BB " is in an outer EH region compared to loop 'bottom' " FMT_BB ". Rejecting "
1753	"loop.\n",
1754	first->bbNum, bottom->bbNum);
1755	return false;
1756	}
1757
1758	#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1759	// Disqualify loops where the first block of the loop is a finally target.
1760	// The main problem is when multiple loops share a 'first' block that is a finally
1761	// target and we canonicalize the loops by adding a new loop head. In that case, we
1762	// need to update the blocks so the finally target bit is moved to the newly created
1763	// block, and removed from the old 'first' block. This is 'hard', so at this point
1764	// in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1765	// long-term), it's easier to disallow the loop than to update the flow graph to
1766	// support this case.
1767
1768	if ((first->bbFlags & BBF_FINALLY_TARGET) != `0`)
1769	{
1770	JITDUMP("Loop 'first' " FMT_BB " is a finally target. Rejecting loop.\n", first->bbNum);
1771	return false;
1772	}
1773	#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1774
1775	// Compact the loop (sweep through it and move out any blocks that aren't part of the
1776	// flow cycle), and find the exits.
1777	if (!MakeCompactAndFindExits())
1778	{
1779	// Unable to preserve well-formed loop during compaction.
1780	return false;
1781	}
1782
1783	// We have a valid loop.
1784	return true;
1785	}
1786
1787	private:
1788	//------------------------------------------------------------------------
1789	// FindEntry: See if given HEAD flows to valid ENTRY between given TOP and BOTTOM
1790	//
1791	// Arguments:
1792	// head - Block to be the HEAD of any loop identified
1793	// top - Block to be the TOP of any loop identified
1794	// bottom - Block to be the BOTTOM of any loop identified
1795	//
1796	// Return Value:
1797	// Block to be the ENTRY of any loop identified, or nullptr if no
1798	// such entry meeting our criteria can be found.
1799	//
1800	// Notes:
1801	// Returns main entry if one is found, does not check for side-entries.
1802	//
1803	BasicBlock* FindEntry(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1804	{
1805	if (head->bbJumpKind == BBJ_ALWAYS)
1806	{
1807	if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1808	{
1809	/ OK - we enter somewhere within the loop /
1810
1811	/ some useful asserts*
1812	* Cannot enter at the top - should have being caught by redundant jumps */
1813
1814	assert((head->bbJumpDest != top) \|\| (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1815
1816	return head->bbJumpDest;
1817	}
1818	else
1819	{
1820	/ special case - don't consider now /
1821	// assert (!"Loop entered in weird way!");
1822	return nullptr;
1823	}
1824	}
1825	// Can we fall through into the loop?
1826	else if (head->bbJumpKind == BBJ_NONE \|\| head->bbJumpKind == BBJ_COND)
1827	{
1828	/ The ENTRY is at the TOP (a do-while loop) /
1829	return top;
1830	}
1831	else
1832	{
1833	return nullptr; // head does not flow into the loop bail for now
1834	}
1835	}
1836
1837	//------------------------------------------------------------------------
1838	// HasSingleEntryCycle: Perform a reverse flow walk from ENTRY, visiting
1839	// only blocks between TOP and BOTTOM, to determine if such a cycle
1840	// exists and if it has a single entry.
1841	//
1842	// Return Value:
1843	// true - Found a single-entry cycle.
1844	// false - Did not find a single-entry cycle.
1845	//
1846	// Notes:
1847	// Will mark (in `loopBlocks`) all blocks found to participate in the
1848	// cycle.
1849	//
1850	bool HasSingleEntryCycle()
1851	{
1852	// Now do a backwards flow walk from entry to see if we have a single-entry loop
1853	bool foundCycle = false;
1854
1855	// Seed the loop block set and worklist with the entry block.
1856	loopBlocks.Reset(entry->bbNum);
1857	jitstd::list<BasicBlock*> worklist(comp->getAllocator());
1858	worklist.push_back(entry);
1859
1860	while (!worklist.empty())
1861	{
1862	BasicBlock* block = worklist.back();
1863	worklist.pop_back();
1864
1865	/ Make sure ENTRY dominates all blocks in the loop*
1866	* This is necessary to ensure condition 2. above
1867	*/
1868	if (block->bbNum > oldBlockMaxNum)
1869	{
1870	// This is a new block we added to connect fall-through, so the
1871	// recorded dominator information doesn't cover it. Just continue,
1872	// and when we process its unique predecessor we'll abort if ENTRY
1873	// doesn't dominate that.
1874	}
1875	else if (!comp->fgDominate(entry, block))
1876	{
1877	return false;
1878	}
1879
1880	// Add preds to the worklist, checking for side-entries.
1881	for (flowList* predIter = block->bbPreds; predIter != nullptr; predIter = predIter->flNext)
1882	{
1883	BasicBlock* pred = predIter->flBlock;
1884
1885	unsigned int testNum = PositionNum(pred);
1886
1887	if ((testNum < top->bbNum) \|\| (testNum > bottom->bbNum))
1888	{
1889	// Pred is out of loop range
1890	if (block == entry)
1891	{
1892	if (pred == head)
1893	{
1894	// This is the single entry we expect.
1895	continue;
1896	}
1897	// ENTRY has some pred other than head outside the loop. If ENTRY does not
1898	// dominate this pred, we'll consider this a side-entry and skip this loop;
1899	// otherwise the loop is still valid and this may be a (flow-wise) back-edge
1900	// of an outer loop. For the dominance test, if `pred` is a new block, use
1901	// its unique predecessor since the dominator tree has info for that.
1902	BasicBlock* effectivePred = (pred->bbNum > oldBlockMaxNum ? pred->bbPrev : pred);
1903	if (comp->fgDominate(entry, effectivePred))
1904	{
1905	// Outer loop back-edge
1906	continue;
1907	}
1908	}
1909
1910	// There are multiple entries to this loop, don't consider it.
1911	return false;
1912	}
1913
1914	bool isFirstVisit;
1915	if (pred == entry)
1916	{
1917	// We have indeed found a cycle in the flow graph.
1918	isFirstVisit = !foundCycle;
1919	foundCycle = true;
1920	assert(loopBlocks.IsMember(pred->bbNum));
1921	}
1922	else if (loopBlocks.TestAndInsert(pred->bbNum))
1923	{
1924	// Already visited this pred
1925	isFirstVisit = false;
1926	}
1927	else
1928	{
1929	// Add this pred to the worklist
1930	worklist.push_back(pred);
1931	isFirstVisit = true;
1932	}
1933
1934	if (isFirstVisit && (pred->bbNext != nullptr) && (PositionNum(pred->bbNext) == pred->bbNum))
1935	{
1936	// We've created a new block immediately after `pred` to
1937	// reconnect what was fall-through. Mark it as in-loop also;
1938	// it needs to stay with `prev` and if it exits the loop we'd
1939	// just need to re-create it if we tried to move it out.
1940	loopBlocks.Insert(pred->bbNext->bbNum);
1941	}
1942	}
1943	}
1944
1945	return foundCycle;
1946	}
1947
1948	//------------------------------------------------------------------------
1949	// PositionNum: Get the number identifying a block's position per the
1950	// lexical ordering that existed before searching for (and compacting)
1951	// loops.
1952	//
1953	// Arguments:
1954	// block - Block whose position is desired.
1955	//
1956	// Return Value:
1957	// A number indicating that block's position relative to others.
1958	//
1959	// Notes:
1960	// When the given block is a new one created during loop compaction,
1961	// the number of its unique predecessor is returned.
1962	//
1963	unsigned int PositionNum(BasicBlock* block)
1964	{
1965	if (block->bbNum > oldBlockMaxNum)
1966	{
1967	// This must be a block we inserted to connect fall-through after moving blocks.
1968	// To determine if it's in the loop or not, use the number of its unique predecessor
1969	// block.
1970	assert(block->bbPreds->flBlock == block->bbPrev);
1971	assert(block->bbPreds->flNext == nullptr);
1972	return block->bbPrev->bbNum;
1973	}
1974	return block->bbNum;
1975	}
1976
1977	//------------------------------------------------------------------------
1978	// MakeCompactAndFindExits: Compact the loop (sweep through it and move out
1979	// any blocks that aren't part of the flow cycle), and find the exits (set
1980	// lastExit and exitCount).
1981	//
1982	// Return Value:
1983	// true - Loop successfully compacted (or `loopBlocks` expanded to
1984	// include all blocks in the lexical range), exits enumerated.
1985	// false - Loop cannot be made compact and remain well-formed.
1986	//
1987	bool MakeCompactAndFindExits()
1988	{
1989	// Compaction (if it needs to happen) will require an insertion point.
1990	BasicBlock* moveAfter = nullptr;
1991
1992	for (BasicBlock* previous = top->bbPrev; previous != bottom;)
1993	{
1994	BasicBlock* block = previous->bbNext;
1995
1996	if (loopBlocks.IsMember(block->bbNum))
1997	{
1998	// This block is a member of the loop. Check to see if it may exit the loop.
1999	CheckForExit(block);
2000
2001	// Done processing this block; move on to the next.
2002	previous = block;
2003	continue;
2004	}
2005
2006	// This blocks is lexically between TOP and BOTTOM, but it does not
2007	// participate in the flow cycle. Check for a run of consecutive
2008	// such blocks.
2009	BasicBlock* lastNonLoopBlock = block;
2010	BasicBlock* nextLoopBlock = block->bbNext;
2011	while (!loopBlocks.IsMember(nextLoopBlock->bbNum))
2012	{
2013	lastNonLoopBlock = nextLoopBlock;
2014	nextLoopBlock = nextLoopBlock->bbNext;
2015	// This loop must terminate because we know BOTTOM is in loopBlocks.
2016	}
2017
2018	// Choose an insertion point for non-loop blocks if we haven't yet done so.
2019	if (moveAfter == nullptr)
2020	{
2021	moveAfter = FindInsertionPoint();
2022	}
2023
2024	if (!BasicBlock::sameEHRegion(previous, nextLoopBlock) \|\| !BasicBlock::sameEHRegion(previous, moveAfter))
2025	{
2026	// EH regions would be ill-formed if we moved these blocks out.
2027	// See if we can consider them loop blocks without introducing
2028	// a side-entry.
2029	if (CanTreatAsLoopBlocks(block, lastNonLoopBlock))
2030	{
2031	// The call to `canTreatAsLoop` marked these blocks as part of the loop;
2032	// iterate without updating `previous` so that we'll analyze them as part
2033	// of the loop.
2034	continue;
2035	}
2036	else
2037	{
2038	// We can't move these out of the loop or leave them in, so just give
2039	// up on this loop.
2040	return false;
2041	}
2042	}
2043
2044	// Now physically move the blocks.
2045	BasicBlock* moveBefore = moveAfter->bbNext;
2046
2047	comp->fgUnlinkRange(block, lastNonLoopBlock);
2048	comp->fgMoveBlocksAfter(block, lastNonLoopBlock, moveAfter);
2049	comp->ehUpdateLastBlocks(moveAfter, lastNonLoopBlock);
2050
2051	// Apply any adjustments needed for fallthrough at the boundaries of the moved region.
2052	FixupFallThrough(moveAfter, moveBefore, block);
2053	FixupFallThrough(lastNonLoopBlock, nextLoopBlock, moveBefore);
2054	// Also apply any adjustments needed where the blocks were snipped out of the loop.
2055	BasicBlock* newBlock = FixupFallThrough(previous, block, nextLoopBlock);
2056	if (newBlock != nullptr)
2057	{
2058	// This new block is in the loop and is a loop exit.
2059	loopBlocks.Insert(newBlock->bbNum);
2060	lastExit = newBlock;
2061	++exitCount;
2062	}
2063
2064	// Update moveAfter for the next insertion.
2065	moveAfter = lastNonLoopBlock;
2066
2067	// Note that we've changed the flow graph, and continue without updating
2068	// `previous` so that we'll process nextLoopBlock.
2069	changedFlowGraph = true;
2070	}
2071
2072	if ((exitCount == `1`) && (lastExit == nullptr))
2073	{
2074	// If we happen to have a loop with two exits, one of which goes to an
2075	// infinite loop that's lexically nested inside it, where the inner loop
2076	// can't be moved out, we can end up in this situation (because
2077	// CanTreatAsLoopBlocks will have decremented the count expecting to find
2078	// another exit later). Bump the exit count to 2, since downstream code
2079	// will not be prepared for null lastExit with exitCount of 1.
2080	assert(forgotExit);
2081	exitCount = `2`;
2082	}
2083
2084	// Loop compaction was successful
2085	return true;
2086	}
2087
2088	//------------------------------------------------------------------------
2089	// FindInsertionPoint: Find an appropriate spot to which blocks that are
2090	// lexically between TOP and BOTTOM but not part of the flow cycle
2091	// can be moved.
2092	//
2093	// Return Value:
2094	// Block after which to insert moved blocks.
2095	//
2096	BasicBlock* FindInsertionPoint()
2097	{
2098	// Find an insertion point for blocks we're going to move. Move them down
2099	// out of the loop, and if possible find a spot that won't break up fall-through.
2100	BasicBlock* moveAfter = bottom;
2101	while (moveAfter->bbFallsThrough())
2102	{
2103	// Keep looking for a better insertion point if we can.
2104	BasicBlock* newMoveAfter = TryAdvanceInsertionPoint(moveAfter);
2105
2106	if (newMoveAfter == nullptr)
2107	{
2108	// Ran out of candidate insertion points, so just split up the fall-through.
2109	return moveAfter;
2110	}
2111
2112	moveAfter = newMoveAfter;
2113	}
2114
2115	return moveAfter;
2116	}
2117
2118	//------------------------------------------------------------------------
2119	// TryAdvanceInsertionPoint: Find the next legal insertion point after
2120	// the given one, if one exists.
2121	//
2122	// Arguments:
2123	// oldMoveAfter - Prior insertion point; find the next after this.
2124	//
2125	// Return Value:
2126	// The next block after `oldMoveAfter` that is a legal insertion point
2127	// (i.e. blocks being swept out of the loop can be moved immediately
2128	// after it), if one exists, else nullptr.
2129	//
2130	BasicBlock* TryAdvanceInsertionPoint(BasicBlock* oldMoveAfter)
2131	{
2132	BasicBlock* newMoveAfter = oldMoveAfter->bbNext;
2133
2134	if (!BasicBlock::sameEHRegion(oldMoveAfter, newMoveAfter))
2135	{
2136	// Don't cross an EH region boundary.
2137	return nullptr;
2138	}
2139
2140	if ((newMoveAfter->bbJumpKind == BBJ_ALWAYS) \|\| (newMoveAfter->bbJumpKind == BBJ_COND))
2141	{
2142	unsigned int destNum = newMoveAfter->bbJumpDest->bbNum;
2143	if ((destNum >= top->bbNum) && (destNum <= bottom->bbNum) && !loopBlocks.IsMember(destNum))
2144	{
2145	// Reversing this branch out of block `newMoveAfter` could confuse this algorithm
2146	// (in particular, the edge would still be numerically backwards but no longer be
2147	// lexically backwards, so a lexical forward walk from TOP would not find BOTTOM),
2148	// so don't do that.
2149	// We're checking for BBJ_ALWAYS and BBJ_COND only here -- we don't need to
2150	// check for BBJ_SWITCH because we'd never consider it a loop back-edge.
2151	return nullptr;
2152	}
2153	}
2154
2155	// Similarly check to see if advancing to `newMoveAfter` would reverse the lexical order
2156	// of an edge from the run of blocks being moved to `newMoveAfter` -- doing so would
2157	// introduce a new lexical back-edge, which could (maybe?) confuse the loop search
2158	// algorithm, and isn't desirable layout anyway.
2159	for (flowList* predIter = newMoveAfter->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2160	{
2161	unsigned int predNum = predIter->flBlock->bbNum;
2162
2163	if ((predNum >= top->bbNum) && (predNum <= bottom->bbNum) && !loopBlocks.IsMember(predNum))
2164	{
2165	// Don't make this forward edge a backwards edge.
2166	return nullptr;
2167	}
2168	}
2169
2170	if (IsRecordedBottom(newMoveAfter))
2171	{
2172	// This is the BOTTOM of another loop; don't move any blocks past it, to avoid moving them
2173	// out of that loop (we should have already done so when processing that loop if it were legal).
2174	return nullptr;
2175	}
2176
2177	// Advancing the insertion point is ok, except that we can't split up any CallFinally/BBJ_ALWAYS
2178	// pair, so if we've got such a pair recurse to see if we can move past the whole thing.
2179	return (newMoveAfter->isBBCallAlwaysPair() ? TryAdvanceInsertionPoint(newMoveAfter) : newMoveAfter);
2180	}
2181
2182	//------------------------------------------------------------------------
2183	// isOuterBottom: Determine if the given block is the BOTTOM of a previously
2184	// recorded loop.
2185	//
2186	// Arguments:
2187	// block - Block to check for BOTTOM-ness.
2188	//
2189	// Return Value:
2190	// true - The blocks was recorded as `bottom` of some earlier-processed loop.
2191	// false - No loops yet recorded have this block as their `bottom`.
2192	//
2193	bool IsRecordedBottom(BasicBlock* block)
2194	{
2195	if (block->bbNum > oldBlockMaxNum)
2196	{
2197	// This is a new block, which can't be an outer bottom block because we only allow old blocks
2198	// as BOTTOM.
2199	return false;
2200	}
2201	return BlockSetOps::IsMember(comp, bottomBlocks, block->bbNum);
2202	}
2203
2204	//------------------------------------------------------------------------
2205	// CanTreatAsLoopBlocks: If the given range of blocks can be treated as
2206	// loop blocks, add them to loopBlockSet and return true. Otherwise,
2207	// return false.
2208	//
2209	// Arguments:
2210	// firstNonLoopBlock - First block in the run to be subsumed.
2211	// lastNonLoopBlock - Last block in the run to be subsumed.
2212	//
2213	// Return Value:
2214	// true - The blocks from `fistNonLoopBlock` to `lastNonLoopBlock` were
2215	// successfully added to `loopBlocks`.
2216	// false - Treating the blocks from `fistNonLoopBlock` to `lastNonLoopBlock`
2217	// would not be legal (it would induce a side-entry).
2218	//
2219	// Notes:
2220	// `loopBlocks` may be modified even if `false` is returned.
2221	// `exitCount` and `lastExit` may be modified if this process identifies
2222	// in-loop edges that were previously counted as exits.
2223	//
2224	bool CanTreatAsLoopBlocks(BasicBlock* firstNonLoopBlock, BasicBlock* lastNonLoopBlock)
2225	{
2226	BasicBlock* nextLoopBlock = lastNonLoopBlock->bbNext;
2227	for (BasicBlock* testBlock = firstNonLoopBlock; testBlock != nextLoopBlock; testBlock = testBlock->bbNext)
2228	{
2229	for (flowList* predIter = testBlock->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2230	{
2231	BasicBlock* testPred = predIter->flBlock;
2232	unsigned int predPosNum = PositionNum(testPred);
2233	unsigned int firstNonLoopPosNum = PositionNum(firstNonLoopBlock);
2234	unsigned int lastNonLoopPosNum = PositionNum(lastNonLoopBlock);
2235
2236	if (loopBlocks.IsMember(predPosNum) \|\|
2237	((predPosNum >= firstNonLoopPosNum) && (predPosNum <= lastNonLoopPosNum)))
2238	{
2239	// This pred is in the loop (or what will be the loop if we determine this
2240	// run of exit blocks doesn't include a side-entry).
2241
2242	if (predPosNum < firstNonLoopPosNum)
2243	{
2244	// We've already counted this block as an exit, so decrement the count.
2245	--exitCount;
2246	if (lastExit == testPred)
2247	{
2248	// Erase this now-bogus `lastExit` entry.
2249	lastExit = nullptr;
2250	INDEBUG(forgotExit = true);
2251	}
2252	}
2253	}
2254	else
2255	{
2256	// This pred is not in the loop, so this constitutes a side-entry.
2257	return false;
2258	}
2259	}
2260
2261	// Either we're going to abort the loop on a subsequent testBlock, or this
2262	// testBlock is part of the loop.
2263	loopBlocks.Insert(testBlock->bbNum);
2264	}
2265
2266	// All blocks were ok to leave in the loop.
2267	return true;
2268	}
2269
2270	//------------------------------------------------------------------------
2271	// FixupFallThrough: Re-establish any broken control flow connectivity
2272	// and eliminate any "goto-next"s that were created by changing the
2273	// given block's lexical follower.
2274	//
2275	// Arguments:
2276	// block - Block whose `bbNext` has changed.
2277	// oldNext - Previous value of `block->bbNext`.
2278	// newNext - New value of `block->bbNext`.
2279	//
2280	// Return Value:
2281	// If a new block is created to reconnect flow, the new block is
2282	// returned; otherwise, nullptr.
2283	//
2284	BasicBlock* FixupFallThrough(BasicBlock* block, BasicBlock* oldNext, BasicBlock* newNext)
2285	{
2286	// If we create a new block, that will be our return value.
2287	BasicBlock* newBlock = nullptr;
2288
2289	if (block->bbFallsThrough())
2290	{
2291	// Need to reconnect the flow from `block` to `oldNext`.
2292
2293	if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext))
2294	{
2295	/ Reverse the jump condition /
2296	GenTree* test = block->lastNode();
2297	noway_assert(test->OperIsConditionalJump());
2298
2299	if (test->OperGet() == GT_JTRUE)
2300	{
2301	GenTree* cond = comp->gtReverseCond(test->gtOp.gtOp1);
2302	assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
2303	test->gtOp.gtOp1 = cond;
2304	}
2305	else
2306	{
2307	comp->gtReverseCond(test);
2308	}
2309
2310	// Redirect the Conditional JUMP to go to `oldNext`
2311	block->bbJumpDest = oldNext;
2312	}
2313	else
2314	{
2315	// Insert an unconditional jump to `oldNext` just after `block`.
2316	newBlock = comp->fgConnectFallThrough(block, oldNext);
2317	noway_assert((newBlock == nullptr) \|\| loopBlocks.CanRepresent(newBlock->bbNum));
2318	}
2319	}
2320	else if ((block->bbJumpKind == BBJ_ALWAYS) && (block->bbJumpDest == newNext))
2321	{
2322	// We've made `block`'s jump target its bbNext, so remove the jump.
2323	if (!comp->fgOptimizeBranchToNext(block, newNext, block->bbPrev))
2324	{
2325	// If optimizing away the goto-next failed for some reason, mark it KEEP_BBJ_ALWAYS to
2326	// prevent assertions from complaining about it.
2327	block->bbFlags \|= BBF_KEEP_BBJ_ALWAYS;
2328	}
2329	}
2330
2331	// Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS.
2332	assert((block->bbJumpKind != BBJ_COND) \|\| (block->bbJumpKind != BBJ_ALWAYS) \|\| (block->bbJumpDest != newNext) \|\|
2333	((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != `0`));
2334	return newBlock;
2335	}
2336
2337	//------------------------------------------------------------------------
2338	// CheckForExit: Check if the given block has any successor edges that are
2339	// loop exits, and update `lastExit` and `exitCount` if so.
2340	//
2341	// Arguments:
2342	// block - Block whose successor edges are to be checked.
2343	//
2344	// Notes:
2345	// If one block has multiple exiting successor edges, those are counted
2346	// as multiple exits in `exitCount`.
2347	//
2348	void CheckForExit(BasicBlock* block)
2349	{
2350	BasicBlock* exitPoint;
2351
2352	switch (block->bbJumpKind)
2353	{
2354	case BBJ_COND:
2355	case BBJ_CALLFINALLY:
2356	case BBJ_ALWAYS:
2357	case BBJ_EHCATCHRET:
2358	assert(block->bbJumpDest);
2359	exitPoint = block->bbJumpDest;
2360
2361	if (!loopBlocks.IsMember(exitPoint->bbNum))
2362	{
2363	/ exit from a block other than BOTTOM /
2364	lastExit = block;
2365	exitCount++;
2366	}
2367	break;
2368
2369	case BBJ_NONE:
2370	break;
2371
2372	case BBJ_EHFINALLYRET:
2373	case BBJ_EHFILTERRET:
2374	/ The "try" associated with this "finally" must be in the*
2375	* same loop, so the finally block will return control inside the loop */
2376	break;
2377
2378	case BBJ_THROW:
2379	case BBJ_RETURN:
2380	/ those are exits from the loop /
2381	lastExit = block;
2382	exitCount++;
2383	break;
2384
2385	case BBJ_SWITCH:
2386
2387	unsigned jumpCnt;
2388	jumpCnt = block->bbJumpSwt->bbsCount;
2389	BasicBlock** jumpTab;
2390	jumpTab = block->bbJumpSwt->bbsDstTab;
2391
2392	do
2393	{
2394	noway_assert(*jumpTab);
2395	exitPoint = *jumpTab;
2396
2397	if (!loopBlocks.IsMember(exitPoint->bbNum))
2398	{
2399	lastExit = block;
2400	exitCount++;
2401	}
2402	} while (++jumpTab, --jumpCnt);
2403	break;
2404
2405	default:
2406	noway_assert(!"Unexpected bbJumpKind");
2407	break;
2408	}
2409
2410	if (block->bbFallsThrough() && !loopBlocks.IsMember(block->bbNext->bbNum))
2411	{
2412	// Found a fall-through exit.
2413	lastExit = block;
2414	exitCount++;
2415	}
2416	}
2417	};
2418	}
2419
2420	/*****************************************************************************
2421	* Find the natural loops, using dominators. Note that the test for
2422	* a loop is slightly different from the standard one, because we have
2423	* not done a depth first reordering of the basic blocks.
2424	*/
2425
2426	void Compiler::optFindNaturalLoops()
2427	{
2428	#ifdef DEBUG
2429	if (verbose)
2430	{
2431	printf("*************** In optFindNaturalLoops()\n");
2432	}
2433	#endif // DEBUG
2434
2435	noway_assert(fgDomsComputed);
2436	assert(fgHasLoops);
2437
2438	#if COUNT_LOOPS
2439	hasMethodLoops = false;
2440	loopsThisMethod = `0`;
2441	loopOverflowThisMethod = false;
2442	#endif
2443
2444	LoopSearch search(this);
2445
2446	for (BasicBlock* head = fgFirstBB; head->bbNext; head = head->bbNext)
2447	{
2448	BasicBlock* top = head->bbNext;
2449
2450	// Blocks that are rarely run have a zero bbWeight and should
2451	// never be optimized here
2452
2453	if (top->bbWeight == BB_ZERO_WEIGHT)
2454	{
2455	continue;
2456	}
2457
2458	for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
2459	{
2460	if (search.FindLoop(head, top, pred->flBlock))
2461	{
2462	// Found a loop; record it and see if we've hit the limit.
2463	bool recordedLoop = search.RecordLoop();
2464
2465	(void)recordedLoop; // avoid unusued variable warnings in COUNT_LOOPS and !DEBUG
2466
2467	#if COUNT_LOOPS
2468	if (!hasMethodLoops)
2469	{
2470	/ mark the method as containing natural loops /
2471	totalLoopMethods++;
2472	hasMethodLoops = true;
2473	}
2474
2475	/ increment total number of loops found /
2476	totalLoopCount++;
2477	loopsThisMethod++;
2478
2479	/ keep track of the number of exits /
2480	loopExitCountTable.record(static_cast<unsigned>(exitCount));
2481	#else // COUNT_LOOPS
2482	assert(recordedLoop);
2483	if (optLoopCount == MAX_LOOP_NUM)
2484	{
2485	// We won't be able to record any more loops, so stop looking.
2486	goto NO_MORE_LOOPS;
2487	}
2488	#endif // COUNT_LOOPS
2489
2490	// Continue searching preds of `top` to see if any other are
2491	// back-edges (this can happen for nested loops). The iteration
2492	// is safe because the compaction we do only modifies predecessor
2493	// lists of blocks that gain or lose fall-through from their
2494	// `bbPrev`, but since the motion is from within the loop to below
2495	// it, we know we're not altering the relationship between `top`
2496	// and its `bbPrev`.
2497	}
2498	}
2499	}
2500	NO_MORE_LOOPS:
2501
2502	#if COUNT_LOOPS
2503	loopCountTable.record(loopsThisMethod);
2504	if (maxLoopsPerMethod < loopsThisMethod)
2505	{
2506	maxLoopsPerMethod = loopsThisMethod;
2507	}
2508	if (loopOverflowThisMethod)
2509	{
2510	totalLoopOverflows++;
2511	}
2512	#endif // COUNT_LOOPS
2513
2514	bool mod = search.ChangedFlowGraph();
2515
2516	if (mod)
2517	{
2518	// Need to renumber blocks now since loop canonicalization
2519	// depends on it; can defer the rest of fgUpdateChangedFlowGraph()
2520	// until after canonicalizing loops. Dominator information is
2521	// recorded in terms of block numbers, so flag it invalid.
2522	fgDomsComputed = false;
2523	fgRenumberBlocks();
2524	}
2525
2526	// Now the loop indices are stable. We can figure out parent/child relationships
2527	// (using table indices to name loops), and label blocks.
2528	for (unsigned char loopInd = `1`; loopInd < optLoopCount; loopInd++)
2529	{
2530	for (unsigned char possibleParent = loopInd; possibleParent > `0`;)
2531	{
2532	possibleParent--;
2533	if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
2534	{
2535	optLoopTable[loopInd].lpParent = possibleParent;
2536	optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
2537	optLoopTable[possibleParent].lpChild = loopInd;
2538	break;
2539	}
2540	}
2541	}
2542
2543	// Now label the blocks with the innermost loop to which they belong. Since parents
2544	// precede children in the table, doing the labeling for each loop in order will achieve
2545	// this -- the innermost loop labeling will be done last.
2546	for (unsigned char loopInd = `0`; loopInd < optLoopCount; loopInd++)
2547	{
2548	BasicBlock* first = optLoopTable[loopInd].lpFirst;
2549	BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
2550	for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
2551	{
2552	blk->bbNatLoopNum = loopInd;
2553	if (blk == bottom)
2554	{
2555	break;
2556	}
2557	assert(blk->bbNext != nullptr); // We should never reach nullptr.
2558	}
2559	}
2560
2561	// Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
2562	// one, if necessary, for loops containing others that share a "top."
2563	for (unsigned char loopInd = `0`; loopInd < optLoopCount; loopInd++)
2564	{
2565	// Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
2566	if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
2567	{
2568	continue;
2569	}
2570
2571	// Otherwise...
2572	if (optCanonicalizeLoopNest(loopInd))
2573	{
2574	mod = true;
2575	}
2576	}
2577	if (mod)
2578	{
2579	fgUpdateChangedFlowGraph();
2580	}
2581
2582	#ifdef DEBUG
2583	if (verbose && optLoopCount > `0`)
2584	{
2585	printf("\nFinal natural loop table:\n");
2586	for (unsigned loopInd = `0`; loopInd < optLoopCount; loopInd++)
2587	{
2588	optPrintLoopInfo(loopInd);
2589	printf("\n");
2590	}
2591	}
2592	#endif // DEBUG
2593	}
2594
2595	void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
2596	{
2597	BasicBlock* newJumpDest = nullptr;
2598	switch (blk->bbJumpKind)
2599	{
2600	case BBJ_THROW:
2601	case BBJ_RETURN:
2602	case BBJ_NONE:
2603	case BBJ_EHFILTERRET:
2604	case BBJ_EHFINALLYRET:
2605	case BBJ_EHCATCHRET:
2606	// These have no jump destination to update.
2607	break;
2608
2609	case BBJ_ALWAYS:
2610	case BBJ_LEAVE:
2611	case BBJ_CALLFINALLY:
2612	case BBJ_COND:
2613	// All of these have a single jump destination to update.
2614	if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
2615	{
2616	blk->bbJumpDest = newJumpDest;
2617	}
2618	break;
2619
2620	case BBJ_SWITCH:
2621	{
2622	bool redirected = false;
2623	for (unsigned i = `0`; i < blk->bbJumpSwt->bbsCount; i++)
2624	{
2625	if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
2626	{
2627	blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
2628	redirected = true;
2629	}
2630	}
2631	// If any redirections happend, invalidate the switch table map for the switch.
2632	if (redirected)
2633	{
2634	// Don't create a new map just to try to remove an entry.
2635	BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/ createIfNull / false);
2636	if (switchMap != nullptr)
2637	{
2638	switchMap->Remove(blk);
2639	}
2640	}
2641	}
2642	break;
2643
2644	default:
2645	unreached();
2646	}
2647	}
2648
2649	// TODO-Cleanup: This should be a static member of the BasicBlock class.
2650	void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
2651	{
2652	assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
2653
2654	// copy the jump destination(s) from "from" to "to".
2655	switch (to->bbJumpKind)
2656	{
2657	case BBJ_ALWAYS:
2658	case BBJ_LEAVE:
2659	case BBJ_CALLFINALLY:
2660	case BBJ_COND:
2661	// All of these have a single jump destination to update.
2662	to->bbJumpDest = from->bbJumpDest;
2663	break;
2664
2665	case BBJ_SWITCH:
2666	{
2667	to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc ();
2668	to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
2669	to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2670
2671	for (unsigned i = `0`; i < from->bbJumpSwt->bbsCount; i++)
2672	{
2673	to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2674	}
2675	}
2676	break;
2677
2678	default:
2679	break;
2680	}
2681	}
2682
2683	// Canonicalize the loop nest rooted at parent loop 'loopInd'.
2684	// Returns 'true' if the flow graph is modified.
2685	bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2686	{
2687	bool modified = false;
2688
2689	// Is the top of the current loop not in any nested loop?
2690	if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2691	{
2692	if (optCanonicalizeLoop(loopInd))
2693	{
2694	modified = true;
2695	}
2696	}
2697
2698	for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2699	child = optLoopTable[child].lpSibling)
2700	{
2701	if (optCanonicalizeLoopNest(child))
2702	{
2703	modified = true;
2704	}
2705	}
2706
2707	return modified;
2708	}
2709
2710	bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2711	{
2712	// Is the top uniquely part of the current loop?
2713	BasicBlock* t = optLoopTable[loopInd].lpTop;
2714
2715	if (t->bbNatLoopNum == loopInd)
2716	{
2717	return false;
2718	}
2719
2720	JITDUMP("in optCanonicalizeLoop: L%02u has top " FMT_BB " (bottom " FMT_BB
2721	") with natural loop number L%02u: need to "
2722	"canonicalize\n",
2723	loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2724
2725	// Otherwise, the top of this loop is also part of a nested loop.
2726	//
2727	// Insert a new unique top for this loop. We must be careful to put this new
2728	// block in the correct EH region. Note that f->bbPrev might be in a different
2729	// EH region. For example:
2730	//
2731	// try {
2732	// ...
2733	// BB07
2734	// }
2735	// BB08 // "first"
2736	//
2737	// In this case, first->bbPrev is BB07, which is in a different 'try' region.
2738	// On the other hand, the first block of multiple loops might be the first
2739	// block of a 'try' region that is completely contained in the multiple loops.
2740	// for example:
2741	//
2742	// BB08 try { }
2743	// ...
2744	// BB10 BBJ_ALWAYS => BB08
2745	// ...
2746	// BB12 BBJ_ALWAYS => BB08
2747	//
2748	// Here, we have two loops, both with BB08 as the "first" block. Block BB08
2749	// is a single-block "try" region. Neither loop "bottom" block is in the same
2750	// "try" region as BB08. This is legal because you can jump to the first block
2751	// of a try region. With EH normalization, no two "try" regions will share
2752	// this block. In this case, we need to insert a new block for the outer loop
2753	// in the same EH region as the branch from the "bottom":
2754	//
2755	// BB30 BBJ_NONE
2756	// BB08 try { }
2757	// ...
2758	// BB10 BBJ_ALWAYS => BB08
2759	// ...
2760	// BB12 BBJ_ALWAYS => BB30
2761	//
2762	// Another possibility is that the "first" block of the loop nest can be the first block
2763	// of a "try" region that also has other predecessors than those in the loop, or even in
2764	// the "try" region (since blocks can target the first block of a "try" region). For example:
2765	//
2766	// BB08 try {
2767	// ...
2768	// BB10 BBJ_ALWAYS => BB08
2769	// ...
2770	// BB12 BBJ_ALWAYS => BB08
2771	// BB13 }
2772	// ...
2773	// BB20 BBJ_ALWAYS => BB08
2774	// ...
2775	// BB25 BBJ_ALWAYS => BB08
2776	//
2777	// Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2778	// bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2779	// same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2780	// the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2781	// old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2782	// (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2783	// situation of branching to a non-first block of a 'try' region.
2784	//
2785	// We can also have a loop nest where the "first" block is outside of a "try" region
2786	// and the back edges are inside a "try" region, for example:
2787	//
2788	// BB02 // "first"
2789	// ...
2790	// BB09 try { BBJ_COND => BB02
2791	// ...
2792	// BB15 BBJ_COND => BB02
2793	// ...
2794	// BB21 } // end of "try"
2795	//
2796	// In this case, both loop back edges were formed by "leave" instructions that were
2797	// imported into branches that were later made conditional. In this case, we don't
2798	// want to copy the EH region of the back edge, since that would create a block
2799	// outside of and disjoint with the "try" region of the back edge. However, to
2800	// simplify things, we disqualify this type of loop, so we should never see this here.
2801
2802	BasicBlock* h = optLoopTable[loopInd].lpHead;
2803	BasicBlock* f = optLoopTable[loopInd].lpFirst;
2804	BasicBlock* b = optLoopTable[loopInd].lpBottom;
2805
2806	// The loop must be entirely contained within a single handler region.
2807	assert(BasicBlock::sameHndRegion(f, b));
2808
2809	// If the bottom block is in the same "try" region, then we extend the EH
2810	// region. Otherwise, we add the new block outside the "try" region.
2811	bool extendRegion = BasicBlock::sameTryRegion(f, b);
2812	BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2813	if (!extendRegion)
2814	{
2815	// We need to set the EH region manually. Set it to be the same
2816	// as the bottom block.
2817	newT->copyEHRegion(b);
2818	}
2819
2820	// The new block can reach the same set of blocks as the old one, but don't try to reflect
2821	// that in its reachability set here -- creating the new block may have changed the BlockSet
2822	// representation from short to long, and canonicalizing loops is immediately followed by
2823	// a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway.
2824
2825	// Redirect the "bottom" of the current loop to "newT".
2826	BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap (getAllocatorLoopHoist());
2827	blockMap->Set(t, newT);
2828	optRedirectBlock(b, blockMap);
2829
2830	// Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2831	// to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2832	// to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2833	// they might have been prevented from participating in the loop nest due to different EH nesting, or some
2834	// other reason.
2835	//
2836	// Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2837	// multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2838	// This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2839	// edge of the blockMap, so nothing will happen.
2840	bool firstPred = true;
2841	for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2842	{
2843	BasicBlock* topPredBlock = topPred->flBlock;
2844
2845	// Skip if topPredBlock is in the loop.
2846	// Note that this uses block number to detect membership in the loop. We are adding blocks during
2847	// canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2848	// outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2849	if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2850	{
2851	JITDUMP("in optCanonicalizeLoop: 'top' predecessor " FMT_BB " is in the range of L%02u (" FMT_BB ".." FMT_BB
2852	"); not "
2853	"redirecting its bottom edge\n",
2854	topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2855	continue;
2856	}
2857
2858	JITDUMP("in optCanonicalizeLoop: redirect top predecessor " FMT_BB " to " FMT_BB "\n", topPredBlock->bbNum,
2859	newT->bbNum);
2860	optRedirectBlock(topPredBlock, blockMap);
2861
2862	// When we have profile data then the 'newT' block will inherit topPredBlock profile weight
2863	if (topPredBlock->hasProfileWeight())
2864	{
2865	// This corrects an issue when the topPredBlock has a profile based weight
2866	//
2867	if (firstPred)
2868	{
2869	JITDUMP("in optCanonicalizeLoop: block " FMT_BB " will inheritWeight from " FMT_BB "\n", newT->bbNum,
2870	topPredBlock->bbNum);
2871
2872	newT->inheritWeight(topPredBlock);
2873	firstPred = false;
2874	}
2875	else
2876	{
2877	JITDUMP("in optCanonicalizeLoop: block " FMT_BB " will also contribute to the weight of " FMT_BB "\n",
2878	newT->bbNum, topPredBlock->bbNum);
2879
2880	BasicBlock::weight_t newWeight = newT->getBBWeight(this) + topPredBlock->getBBWeight(this);
2881	newT->setBBWeight(newWeight);
2882	}
2883	}
2884	}
2885
2886	assert(newT->bbNext == f);
2887	if (f != t)
2888	{
2889	newT->bbJumpKind = BBJ_ALWAYS;
2890	newT->bbJumpDest = t;
2891	newT->bbTreeList = nullptr;
2892	fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2893	}
2894
2895	// If it had been a do-while loop (top == entry), update entry, as well.
2896	BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2897	if (optLoopTable[loopInd].lpTop == origE)
2898	{
2899	optLoopTable[loopInd].lpEntry = newT;
2900	}
2901	optLoopTable[loopInd].lpTop = newT;
2902	optLoopTable[loopInd].lpFirst = newT;
2903
2904	newT->bbNatLoopNum = loopInd;
2905
2906	JITDUMP("in optCanonicalizeLoop: made new block " FMT_BB " [%p] the new unique top of loop %d.\n", newT->bbNum,
2907	dspPtr(newT), loopInd);
2908
2909	// Make sure the head block still goes to the entry...
2910	if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2911	{
2912	h->bbJumpKind = BBJ_ALWAYS;
2913	h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2914	}
2915	else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2916	{
2917	BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /extendRegion/ true);
2918	optLoopTable[loopInd].lpHead = h2;
2919	h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2920	h2->bbTreeList = nullptr;
2921	fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2922	}
2923
2924	// If any loops nested in "loopInd" have the same head and entry as "loopInd",
2925	// it must be the case that they were do-while's (since "h" fell through to the entry).
2926	// The new node "newT" becomes the head of such loops.
2927	for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2928	childLoop = optLoopTable[childLoop].lpSibling)
2929	{
2930	if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2931	newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2932	{
2933	optUpdateLoopHead(childLoop, h, newT);
2934	}
2935	}
2936	return true;
2937	}
2938
2939	bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2940	{
2941	assert(l1 != BasicBlock::NOT_IN_LOOP);
2942	if (l1 == l2)
2943	{
2944	return true;
2945	}
2946	else if (l2 == BasicBlock::NOT_IN_LOOP)
2947	{
2948	return false;
2949	}
2950	else
2951	{
2952	return optLoopContains(l1, optLoopTable[l2].lpParent);
2953	}
2954	}
2955
2956	void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2957	{
2958	assert(optLoopTable[loopInd].lpHead == from);
2959	optLoopTable[loopInd].lpHead = to;
2960	for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2961	childLoop = optLoopTable[childLoop].lpSibling)
2962	{
2963	if (optLoopTable[childLoop].lpHead == from)
2964	{
2965	optUpdateLoopHead(childLoop, from, to);
2966	}
2967	}
2968	}
2969
2970	/*****************************************************************************
2971	* If the : i += const" will cause an overflow exception for the small types.
2972	*/
2973
2974	bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2975	{
2976	int type_MAX;
2977
2978	switch (incrType)
2979	{
2980	case TYP_BYTE:
2981	type_MAX = SCHAR_MAX;
2982	break;
2983	case TYP_UBYTE:
2984	type_MAX = UCHAR_MAX;
2985	break;
2986	case TYP_SHORT:
2987	type_MAX = SHRT_MAX;
2988	break;
2989	case TYP_USHORT:
2990	type_MAX = USHRT_MAX;
2991	break;
2992
2993	case TYP_UINT: // Detected by checking for 32bit ....
2994	case TYP_INT:
2995	return false; // ... overflow same as done for TYP_INT
2996
2997	default:
2998	NO_WAY("Bad type");
2999	}
3000
3001	if (iterAtExit > type_MAX)
3002	{
3003	return true;
3004	}
3005	else
3006	{
3007	return false;
3008	}
3009	}
3010
3011	/*****************************************************************************
3012	* If the "i -= const" will cause an underflow exception for the small types
3013	*/
3014
3015	bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
3016	{
3017	int type_MIN;
3018
3019	switch (decrType)
3020	{
3021	case TYP_BYTE:
3022	type_MIN = SCHAR_MIN;
3023	break;
3024	case TYP_SHORT:
3025	type_MIN = SHRT_MIN;
3026	break;
3027	case TYP_UBYTE:
3028	type_MIN = `0`;
3029	break;
3030	case TYP_USHORT:
3031	type_MIN = `0`;
3032	break;
3033
3034	case TYP_UINT: // Detected by checking for 32bit ....
3035	case TYP_INT:
3036	return false; // ... underflow same as done for TYP_INT
3037
3038	default:
3039	NO_WAY("Bad type");
3040	}
3041
3042	if (iterAtExit < type_MIN)
3043	{
3044	return true;
3045	}
3046	else
3047	{
3048	return false;
3049	}
3050	}
3051
3052	/*****************************************************************************
3053	*
3054	* Helper for unroll loops - Computes the number of repetitions
3055	* in a constant loop. If it cannot prove the number is constant returns false
3056	*/
3057
3058	bool Compiler::optComputeLoopRep(int constInit,
3059	int constLimit,
3060	int iterInc,
3061	genTreeOps iterOper,
3062	var_types iterOperType,
3063	genTreeOps testOper,
3064	bool unsTest,
3065	bool dupCond,
3066	unsigned* iterCount)
3067	{
3068	noway_assert(genActualType(iterOperType) == TYP_INT);
3069
3070	__int64 constInitX;
3071	__int64 constLimitX;
3072
3073	unsigned loopCount;
3074	int iterSign;
3075
3076	// Using this, we can just do a signed comparison with other 32 bit values.
3077	if (unsTest)
3078	{
3079	constLimitX = (unsigned int)constLimit;
3080	}
3081	else
3082	{
3083	constLimitX = (signed int)constLimit;
3084	}
3085
3086	switch (iterOperType)
3087	{
3088	// For small types, the iteration operator will narrow these values if big
3089
3090	#define INIT_ITER_BY_TYPE(type) \
3091	constInitX = (type)constInit; \
3092	iterInc = (type)iterInc;
3093
3094	case TYP_BYTE:
3095	INIT_ITER_BY_TYPE(signed char);
3096	break;
3097	case TYP_UBYTE:
3098	INIT_ITER_BY_TYPE(unsigned char);
3099	break;
3100	case TYP_SHORT:
3101	INIT_ITER_BY_TYPE(signed short);
3102	break;
3103	case TYP_USHORT:
3104	INIT_ITER_BY_TYPE(unsigned short);
3105	break;
3106
3107	// For the big types, 32 bit arithmetic is performed
3108
3109	case TYP_INT:
3110	case TYP_UINT:
3111	if (unsTest)
3112	{
3113	constInitX = (unsigned int)constInit;
3114	}
3115	else
3116	{
3117	constInitX = (signed int)constInit;
3118	}
3119	break;
3120
3121	default:
3122	noway_assert(!"Bad type");
3123	NO_WAY("Bad type");
3124	}
3125
3126	/ If iterInc is zero we have an infinite loop /
3127	if (iterInc == `0`)
3128	{
3129	return false;
3130	}
3131
3132	/ Set iterSign to +1 for positive iterInc and -1 for negative iterInc /
3133	iterSign = (iterInc > `0`) ? +`1` : -`1`;
3134
3135	/ Initialize loopCount to zero /
3136	loopCount = `0`;
3137
3138	// If dupCond is true then the loop head contains a test which skips
3139	// this loop, if the constInit does not pass the loop test
3140	// Such a loop can execute zero times.
3141	// If dupCond is false then we have a true do-while loop which we
3142	// always execute the loop once before performing the loop test
3143	if (!dupCond)
3144	{
3145	loopCount += `1`;
3146	constInitX += iterInc;
3147	}
3148
3149	// bail if count is based on wrap-around math
3150	if (iterInc > `0`)
3151	{
3152	if (constLimitX < constInitX)
3153	{
3154	return false;
3155	}
3156	}
3157	else if (constLimitX > constInitX)
3158	{
3159	return false;
3160	}
3161
3162	/ Compute the number of repetitions /
3163
3164	switch (testOper)
3165	{
3166	__int64 iterAtExitX;
3167
3168	case GT_EQ:
3169	/ something like "for (i=init; i == lim; i++)" doesn't make any sense /
3170	return false;
3171
3172	case GT_NE:
3173	/ "for (i=init; i != lim; i+=const)" - this is tricky since it may*
3174	* have a constant number of iterations or loop forever -
3175	* we have to compute (lim-init) mod iterInc to see if it is zero.
3176	* If mod iterInc is not zero then the limit test will miss an a wrap will occur
3177	* which is probably not what the end user wanted, but it is legal.
3178	*/
3179
3180	if (iterInc > `0`)
3181	{
3182	/ Stepping by one, i.e. Mod with 1 is always zero /
3183	if (iterInc != `1`)
3184	{
3185	if (((constLimitX - constInitX) % iterInc) != `0`)
3186	{
3187	return false;
3188	}
3189	}
3190	}
3191	else
3192	{
3193	noway_assert(iterInc < `0`);
3194	/ Stepping by -1, i.e. Mod with 1 is always zero /
3195	if (iterInc != -`1`)
3196	{
3197	if (((constInitX - constLimitX) % (-iterInc)) != `0`)
3198	{
3199	return false;
3200	}
3201	}
3202	}
3203
3204	switch (iterOper)
3205	{
3206	case GT_SUB:
3207	iterInc = -iterInc;
3208	__fallthrough;
3209
3210	case GT_ADD:
3211	if (constInitX != constLimitX)
3212	{
3213	loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + `1`;
3214	}
3215
3216	iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3217
3218	if (unsTest)
3219	{
3220	iterAtExitX = (unsigned)iterAtExitX;
3221	}
3222
3223	// Check if iteration incr will cause overflow for small types
3224	if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3225	{
3226	return false;
3227	}
3228
3229	// iterator with 32bit overflow. Bad for TYP_(U)INT
3230	if (iterAtExitX < constLimitX)
3231	{
3232	return false;
3233	}
3234
3235	*iterCount = loopCount;
3236	return true;
3237
3238	case GT_MUL:
3239	case GT_DIV:
3240	case GT_RSH:
3241	case GT_LSH:
3242	case GT_UDIV:
3243	return false;
3244
3245	default:
3246	noway_assert(!"Unknown operator for loop iterator");
3247	return false;
3248	}
3249
3250	case GT_LT:
3251	switch (iterOper)
3252	{
3253	case GT_SUB:
3254	iterInc = -iterInc;
3255	__fallthrough;
3256
3257	case GT_ADD:
3258	if (constInitX < constLimitX)
3259	{
3260	loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + `1`;
3261	}
3262
3263	iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3264
3265	if (unsTest)
3266	{
3267	iterAtExitX = (unsigned)iterAtExitX;
3268	}
3269
3270	// Check if iteration incr will cause overflow for small types
3271	if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3272	{
3273	return false;
3274	}
3275
3276	// iterator with 32bit overflow. Bad for TYP_(U)INT
3277	if (iterAtExitX < constLimitX)
3278	{
3279	return false;
3280	}
3281
3282	*iterCount = loopCount;
3283	return true;
3284
3285	case GT_MUL:
3286	case GT_DIV:
3287	case GT_RSH:
3288	case GT_LSH:
3289	case GT_UDIV:
3290	return false;
3291
3292	default:
3293	noway_assert(!"Unknown operator for loop iterator");
3294	return false;
3295	}
3296
3297	case GT_LE:
3298	switch (iterOper)
3299	{
3300	case GT_SUB:
3301	iterInc = -iterInc;
3302	__fallthrough;
3303
3304	case GT_ADD:
3305	if (constInitX <= constLimitX)
3306	{
3307	loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + `1`;
3308	}
3309
3310	iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3311
3312	if (unsTest)
3313	{
3314	iterAtExitX = (unsigned)iterAtExitX;
3315	}
3316
3317	// Check if iteration incr will cause overflow for small types
3318	if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3319	{
3320	return false;
3321	}
3322
3323	// iterator with 32bit overflow. Bad for TYP_(U)INT
3324	if (iterAtExitX <= constLimitX)
3325	{
3326	return false;
3327	}
3328
3329	*iterCount = loopCount;
3330	return true;
3331
3332	case GT_MUL:
3333	case GT_DIV:
3334	case GT_RSH:
3335	case GT_LSH:
3336	case GT_UDIV:
3337	return false;
3338
3339	default:
3340	noway_assert(!"Unknown operator for loop iterator");
3341	return false;
3342	}
3343
3344	case GT_GT:
3345	switch (iterOper)
3346	{
3347	case GT_SUB:
3348	iterInc = -iterInc;
3349	__fallthrough;
3350
3351	case GT_ADD:
3352	if (constInitX > constLimitX)
3353	{
3354	loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + `1`;
3355	}
3356
3357	iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3358
3359	if (unsTest)
3360	{
3361	iterAtExitX = (unsigned)iterAtExitX;
3362	}
3363
3364	// Check if small types will underflow
3365	if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3366	{
3367	return false;
3368	}
3369
3370	// iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3371	if (iterAtExitX > constLimitX)
3372	{
3373	return false;
3374	}
3375
3376	*iterCount = loopCount;
3377	return true;
3378
3379	case GT_MUL:
3380	case GT_DIV:
3381	case GT_RSH:
3382	case GT_LSH:
3383	case GT_UDIV:
3384	return false;
3385
3386	default:
3387	noway_assert(!"Unknown operator for loop iterator");
3388	return false;
3389	}
3390
3391	case GT_GE:
3392	switch (iterOper)
3393	{
3394	case GT_SUB:
3395	iterInc = -iterInc;
3396	__fallthrough;
3397
3398	case GT_ADD:
3399	if (constInitX >= constLimitX)
3400	{
3401	loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + `1`;
3402	}
3403
3404	iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3405
3406	if (unsTest)
3407	{
3408	iterAtExitX = (unsigned)iterAtExitX;
3409	}
3410
3411	// Check if small types will underflow
3412	if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3413	{
3414	return false;
3415	}
3416
3417	// iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3418	if (iterAtExitX >= constLimitX)
3419	{
3420	return false;
3421	}
3422
3423	*iterCount = loopCount;
3424	return true;
3425
3426	case GT_MUL:
3427	case GT_DIV:
3428	case GT_RSH:
3429	case GT_LSH:
3430	case GT_UDIV:
3431	return false;
3432
3433	default:
3434	noway_assert(!"Unknown operator for loop iterator");
3435	return false;
3436	}
3437
3438	default:
3439	noway_assert(!"Unknown operator for loop condition");
3440	}
3441
3442	return false;
3443	}
3444
3445	/*****************************************************************************
3446	*
3447	* Look for loop unrolling candidates and unroll them
3448	*/
3449
3450	#ifdef _PREFAST_
3451	#pragma warning(push)
3452	#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3453	#endif
3454	void Compiler::optUnrollLoops()
3455	{
3456	if (compCodeOpt() == SMALL_CODE)
3457	{
3458	return;
3459	}
3460
3461	if (optLoopCount == `0`)
3462	{
3463	return;
3464	}
3465
3466	#ifdef DEBUG
3467	if (JitConfig.JitNoUnroll())
3468	{
3469	return;
3470	}
3471	#endif
3472
3473	#ifdef DEBUG
3474	if (verbose)
3475	{
3476	printf("*************** In optUnrollLoops()\n");
3477	}
3478	#endif
3479	/ Look for loop unrolling candidates /
3480
3481	bool change = false;
3482
3483	// Visit loops from highest to lowest number to vist them in innermost
3484	// to outermost order
3485	for (unsigned lnum = optLoopCount - `1`; lnum != ~`0U`; --lnum)
3486	{
3487	// This is necessary due to an apparent analysis limitation since
3488	// optLoopCount must be strictly greater than 0 upon entry and lnum
3489	// cannot wrap due to the loop termination condition.
3490	PREFAST_ASSUME(lnum != `0U` - `1`);
3491
3492	BasicBlock* block;
3493	BasicBlock* head;
3494	BasicBlock* bottom;
3495
3496	GenTree* loop;
3497	GenTree* test;
3498	GenTree* incr;
3499	GenTree* phdr;
3500	GenTree* init;
3501
3502	bool dupCond;
3503	int lval;
3504	int lbeg; // initial value for iterator
3505	int llim; // limit value for iterator
3506	unsigned lvar; // iterator lclVar #
3507	int iterInc; // value to increment the iterator
3508	genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.)
3509	var_types iterOperType; // type result of the oper (for overflow instrs)
3510	genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
3511	bool unsTest; // Is the comparison u/int
3512
3513	unsigned loopRetCount; // number of BBJ_RETURN blocks in loop
3514	unsigned totalIter; // total number of iterations in the constant loop
3515	unsigned loopFlags; // actual lpFlags
3516	unsigned requiredFlags; // required lpFlags
3517
3518	static const int ITER_LIMIT[COUNT_OPT_CODE + `1`] = {
3519	`10`, // BLENDED_CODE
3520	`0`, // SMALL_CODE
3521	`20`, // FAST_CODE
3522	`0` // COUNT_OPT_CODE
3523	};
3524
3525	noway_assert(ITER_LIMIT[SMALL_CODE] == `0`);
3526	noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == `0`);
3527
3528	unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
3529
3530	#ifdef DEBUG
3531	if (compStressCompile(STRESS_UNROLL_LOOPS, `50`))
3532	{
3533	iterLimit *= `10`;
3534	}
3535	#endif
3536
3537	static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + `1`] = {
3538	`300`, // BLENDED_CODE
3539	`0`, // SMALL_CODE
3540	`600`, // FAST_CODE
3541	`0` // COUNT_OPT_CODE
3542	};
3543
3544	noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == `0`);
3545	noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == `0`);
3546
3547	int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
3548
3549	loopFlags = optLoopTable[lnum].lpFlags;
3550	// Check for required flags:
3551	// LPFLG_DO_WHILE - required because this transform only handles loops of this form
3552	// LPFLG_CONST - required because this transform only handles full unrolls
3553	// LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons
3554	requiredFlags = LPFLG_DO_WHILE \| LPFLG_CONST \| LPFLG_SIMD_LIMIT;
3555
3556	#ifdef DEBUG
3557	if (compStressCompile(STRESS_UNROLL_LOOPS, `50`))
3558	{
3559	// In stress mode, quadruple the size limit, and drop
3560	// the restriction that loop limit must be Vector<T>.Count.
3561
3562	unrollLimitSz *= `4`;
3563	requiredFlags &= ~LPFLG_SIMD_LIMIT;
3564	}
3565	#endif
3566
3567	/ Ignore the loop if we don't have a do-while*
3568	that has a constant number of iterations /*
3569
3570	if ((loopFlags & requiredFlags) != requiredFlags)
3571	{
3572	continue;
3573	}
3574
3575	/ ignore if removed or marked as not unrollable /
3576
3577	if (loopFlags & (LPFLG_DONT_UNROLL \| LPFLG_REMOVED))
3578	{
3579	continue;
3580	}
3581
3582	head = optLoopTable[lnum].lpHead;
3583	noway_assert(head);
3584	bottom = optLoopTable[lnum].lpBottom;
3585	noway_assert(bottom);
3586
3587	/ Get the loop data:*
3588	- initial constant
3589	- limit constant
3590	- iterator
3591	- iterator increment
3592	- increment operation type (i.e. ADD, SUB, etc...)
3593	- loop test type (i.e. GT_GE, GT_LT, etc...)
3594	*/
3595
3596	lbeg = optLoopTable[lnum].lpConstInit;
3597	llim = optLoopTable[lnum].lpConstLimit();
3598	testOper = optLoopTable[lnum].lpTestOper();
3599
3600	lvar = optLoopTable[lnum].lpIterVar();
3601	iterInc = optLoopTable[lnum].lpIterConst();
3602	iterOper = optLoopTable[lnum].lpIterOper();
3603
3604	iterOperType = optLoopTable[lnum].lpIterOperType();
3605	unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != `0`;
3606
3607	if (lvaTable[lvar].lvAddrExposed)
3608	{ // If the loop iteration variable is address-exposed then bail
3609	continue;
3610	}
3611	if (lvaTable[lvar].lvIsStructField)
3612	{ // If the loop iteration variable is a promoted field from a struct then
3613	// bail
3614	continue;
3615	}
3616
3617	/ Locate the pre-header and initialization and increment/test statements /
3618
3619	phdr = head->bbTreeList;
3620	noway_assert(phdr);
3621	loop = bottom->bbTreeList;
3622	noway_assert(loop);
3623
3624	init = head->lastStmt();
3625	noway_assert(init && (init->gtNext == nullptr));
3626	test = bottom->lastStmt();
3627	noway_assert(test && (test->gtNext == nullptr));
3628	incr = test->gtPrev;
3629	noway_assert(incr);
3630
3631	if (init->gtFlags & GTF_STMT_CMPADD)
3632	{
3633	/ Must be a duplicated loop condition /
3634	noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3635
3636	dupCond = true;
3637	init = init->gtPrev;
3638	noway_assert(init);
3639	}
3640	else
3641	{
3642	dupCond = false;
3643	}
3644
3645	/ Find the number of iterations - the function returns false if not a constant number /
3646
3647	if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3648	{
3649	continue;
3650	}
3651
3652	/ Forget it if there are too many repetitions or not a constant loop /
3653
3654	if (totalIter > iterLimit)
3655	{
3656	continue;
3657	}
3658
3659	noway_assert(init->gtOper == GT_STMT);
3660	init = init->gtStmt.gtStmtExpr;
3661	noway_assert(test->gtOper == GT_STMT);
3662	test = test->gtStmt.gtStmtExpr;
3663	noway_assert(incr->gtOper == GT_STMT);
3664	incr = incr->gtStmt.gtStmtExpr;
3665
3666	// Don't unroll loops we don't understand.
3667	if (incr->gtOper != GT_ASG)
3668	{
3669	continue;
3670	}
3671	incr = incr->gtOp.gtOp2;
3672
3673	/ Make sure everything looks ok /
3674	if ((init->gtOper != GT_ASG) \|\| (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) \|\|
3675	(init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) \|\| (init->gtOp.gtOp2->gtOper != GT_CNS_INT) \|\|
3676	(init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) \|\|
3677
3678	!((incr->gtOper == GT_ADD) \|\| (incr->gtOper == GT_SUB)) \|\| (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) \|\|
3679	(incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) \|\| (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) \|\|
3680	(incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) \|\|
3681
3682	(test->gtOper != GT_JTRUE))
3683	{
3684	noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3685	continue;
3686	}
3687
3688	/ heuristic - Estimated cost in code size of the unrolled loop /
3689
3690	{
3691	ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration
3692
3693	block = head->bbNext;
3694	auto tryIndex = block->bbTryIndex;
3695
3696	loopRetCount = `0`;
3697	for (;; block = block->bbNext)
3698	{
3699	if (block->bbTryIndex != tryIndex)
3700	{
3701	// Unrolling would require cloning EH regions
3702	goto DONE_LOOP;
3703	}
3704
3705	if (block->bbJumpKind == BBJ_RETURN)
3706	{
3707	++loopRetCount;
3708	}
3709
3710	/ Visit all the statements in the block /
3711
3712	for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3713	{
3714	/ Calculate gtCostSz /
3715	gtSetStmtInfo(stmt);
3716
3717	/ Update loopCostSz /
3718	loopCostSz += stmt->gtCostSz;
3719	}
3720
3721	if (block == bottom)
3722	{
3723	break;
3724	}
3725	}
3726
3727	#ifdef JIT32_GCENCODER
3728	if (fgReturnCount + loopRetCount * (totalIter - `1`) > SET_EPILOGCNT_MAX)
3729	{
3730	// Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs.
3731	goto DONE_LOOP;
3732	}
3733	#endif // !JIT32_GCENCODER
3734
3735	/ Compute the estimated increase in code size for the unrolled loop /
3736
3737	ClrSafeInt<unsigned> fixedLoopCostSz(`8`);
3738
3739	ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) -
3740	ClrSafeInt<int>(loopCostSz + fixedLoopCostSz);
3741
3742	/ Don't unroll if too much code duplication would result. /
3743
3744	if (unrollCostSz.IsOverflow() \|\| (unrollCostSz.Value() > unrollLimitSz))
3745	{
3746	goto DONE_LOOP;
3747	}
3748
3749	/ Looks like a good idea to unroll this loop, let's do it! /
3750	CLANG_FORMAT_COMMENT_ANCHOR;
3751
3752	#ifdef DEBUG
3753	if (verbose)
3754	{
3755	printf("\nUnrolling loop " FMT_BB, head->bbNext->bbNum);
3756	if (head->bbNext->bbNum != bottom->bbNum)
3757	{
3758	printf(".." FMT_BB, bottom->bbNum);
3759	}
3760	printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3761	printf(" unrollCostSz = %d\n", unrollCostSz);
3762	printf("\n");
3763	}
3764	#endif
3765	}
3766
3767	/ Create the unrolled loop statement list /
3768	{
3769	BlockToBlockMap blockMap(getAllocator());
3770	BasicBlock* insertAfter = bottom;
3771
3772	for (lval = lbeg; totalIter; totalIter--)
3773	{
3774	for (block = head->bbNext;; block = block->bbNext)
3775	{
3776	BasicBlock* newBlock = insertAfter =
3777	fgNewBBafter(block->bbJumpKind, insertAfter, /extendRegion/ true);
3778	blockMap.Set(block, newBlock);
3779
3780	if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval))
3781	{
3782	// cloneExpr doesn't handle everything
3783	BasicBlock* oldBottomNext = insertAfter->bbNext;
3784	bottom->bbNext = oldBottomNext;
3785	oldBottomNext->bbPrev = bottom;
3786	optLoopTable[lnum].lpFlags \|= LPFLG_DONT_UNROLL;
3787	goto DONE_LOOP;
3788	}
3789	// Block weight should no longer have the loop multiplier
3790	newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
3791	// Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
3792	assert(newBlock->bbJumpDest == nullptr);
3793
3794	if (block == bottom)
3795	{
3796	// Remove the test; we're doing a full unroll.
3797
3798	GenTreeStmt* testCopyStmt = newBlock->lastStmt();
3799	GenTree* testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr;
3800	assert(testCopyExpr->gtOper == GT_JTRUE);
3801	GenTree* sideEffList = nullptr;
3802	gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT \| GTF_ORDER_SIDEEFF);
3803	if (sideEffList == nullptr)
3804	{
3805	fgRemoveStmt(newBlock, testCopyStmt);
3806	}
3807	else
3808	{
3809	testCopyStmt->gtStmt.gtStmtExpr = sideEffList;
3810	}
3811	newBlock->bbJumpKind = BBJ_NONE;
3812
3813	// Exit this loop; we've walked all the blocks.
3814	break;
3815	}
3816	}
3817
3818	// Now redirect any branches within the newly-cloned iteration
3819	for (block = head->bbNext; block != bottom; block = block->bbNext)
3820	{
3821	BasicBlock* newBlock = blockMap [block];
3822	optCopyBlkDest(block, newBlock);
3823	optRedirectBlock(newBlock, &blockMap);
3824	}
3825
3826	/ update the new value for the unrolled iterator /
3827
3828	switch (iterOper)
3829	{
3830	case GT_ADD:
3831	lval += iterInc;
3832	break;
3833
3834	case GT_SUB:
3835	lval -= iterInc;
3836	break;
3837
3838	case GT_RSH:
3839	case GT_LSH:
3840	noway_assert(!"Unrolling not implemented for this loop iterator");
3841	goto DONE_LOOP;
3842
3843	default:
3844	noway_assert(!"Unknown operator for constant loop iterator");
3845	goto DONE_LOOP;
3846	}
3847	}
3848
3849	// Gut the old loop body
3850	for (block = head->bbNext;; block = block->bbNext)
3851	{
3852	block->bbTreeList = nullptr;
3853	block->bbJumpKind = BBJ_NONE;
3854	block->bbFlags &= ~(BBF_NEEDS_GCPOLL \| BBF_LOOP_HEAD);
3855	if (block->bbJumpDest != nullptr)
3856	{
3857	block->bbJumpDest = nullptr;
3858	}
3859
3860	if (block == bottom)
3861	{
3862	break;
3863	}
3864	}
3865
3866	/ if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) /
3867
3868	if (head->bbJumpKind == BBJ_COND)
3869	{
3870	phdr = head->bbTreeList;
3871	noway_assert(phdr);
3872	test = phdr->gtPrev;
3873
3874	noway_assert(test && (test->gtNext == nullptr));
3875	noway_assert(test->gtOper == GT_STMT);
3876	noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3877
3878	init = test->gtPrev;
3879	noway_assert(init && (init->gtNext == test));
3880	noway_assert(init->gtOper == GT_STMT);
3881
3882	init->gtNext = nullptr;
3883	phdr->gtPrev = init;
3884	head->bbJumpKind = BBJ_NONE;
3885	head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3886	}
3887	else
3888	{
3889	/ the loop must execute /
3890	noway_assert(head->bbJumpKind == BBJ_NONE);
3891	}
3892
3893	#ifdef DEBUG
3894	if (verbose)
3895	{
3896	printf("Whole unrolled loop:\n");
3897
3898	gtDispTree(init);
3899	printf("\n");
3900	fgDumpTrees(head->bbNext, insertAfter);
3901	}
3902	#endif
3903
3904	/ Remember that something has changed /
3905
3906	change = true;
3907
3908	/ Make sure to update loop table /
3909
3910	/ Use the LPFLG_REMOVED flag and update the bbLoopMask accordingly*
3911	* (also make head and bottom NULL - to hit an assert or GPF) */
3912
3913	optLoopTable[lnum].lpFlags \|= LPFLG_REMOVED;
3914	optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3915
3916	// Note if we created new BBJ_RETURNs
3917	fgReturnCount += loopRetCount * (totalIter - `1`);
3918	}
3919
3920	DONE_LOOP:;
3921	}
3922
3923	if (change)
3924	{
3925	fgUpdateChangedFlowGraph();
3926	}
3927
3928	#ifdef DEBUG
3929	fgDebugCheckBBlist(true);
3930	#endif
3931	}
3932	#ifdef _PREFAST_
3933	#pragma warning(pop)
3934	#endif
3935
3936	/*****************************************************************************
3937	*
3938	* Return false if there is a code path from 'topBB' to 'botBB' that might
3939	* not execute a method call.
3940	*/
3941
3942	bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3943	{
3944	// TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3945	// as some helper calls are neither interruptible nor hijackable.
3946	// When we can determine this, then we can set BBF_GC_SAFE_POINT for
3947	// those helpers too.
3948
3949	noway_assert(topBB->bbNum <= botBB->bbNum);
3950
3951	// We can always check topBB and botBB for any gc safe points and early out
3952
3953	if ((topBB->bbFlags \| botBB->bbFlags) & BBF_GC_SAFE_POINT)
3954	{
3955	return false;
3956	}
3957
3958	// Otherwise we will need to rely upon the dominator sets
3959
3960	if (!fgDomsComputed)
3961	{
3962	// return a conservative answer of true when we don't have the dominator sets
3963	return true;
3964	}
3965
3966	BasicBlock* curBB = topBB;
3967	for (;;)
3968	{
3969	noway_assert(curBB);
3970
3971	// If we added a loop pre-header block then we will
3972	// have a bbNum greater than fgLastBB, and we won't have
3973	// any dominator information about this block, so skip it.
3974	//
3975	if (curBB->bbNum <= fgLastBB->bbNum)
3976	{
3977	noway_assert(curBB->bbNum <= botBB->bbNum);
3978
3979	// Does this block contain a gc safe point?
3980
3981	if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3982	{
3983	// Will this block always execute on the way to botBB ?
3984	//
3985	// Since we are checking every block in [topBB .. botBB] and we are using
3986	// a lexical definition of a loop.
3987	// (all that we know is that is that botBB is a back-edge to topBB)
3988	// Thus while walking blocks in this range we may encounter some blocks
3989	// that are not really part of the loop, and so we need to perform
3990	// some additional checks:
3991	//
3992	// We will check that the current 'curBB' is reachable from 'topBB'
3993	// and that it dominates the block containing the back-edge 'botBB'
3994	// When both of these are true then we know that the gcsafe point in 'curBB'
3995	// will be encountered in the loop and we can return false
3996	//
3997	if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3998	{
3999	return false;
4000	}
4001	}
4002	else
4003	{
4004	// If we've reached the destination block, then we're done
4005
4006	if (curBB == botBB)
4007	{
4008	break;
4009	}
4010	}
4011	}
4012
4013	curBB = curBB->bbNext;
4014	}
4015
4016	// If we didn't find any blocks that contained a gc safe point and
4017	// also met the fgDominate and fgReachable criteria then we must return true
4018	//
4019	return true;
4020	}
4021
4022	/*****************************************************************************
4023	*
4024	* Find the loop termination test at the bottom of the loop
4025	*/
4026
4027	static GenTree* optFindLoopTermTest(BasicBlock* bottom)
4028	{
4029	GenTree* testt = bottom->bbTreeList;
4030
4031	assert(testt && testt->gtOper == GT_STMT);
4032
4033	GenTree* result = testt->gtPrev;
4034
4035	#ifdef DEBUG
4036	while (testt->gtNext)
4037	{
4038	testt = testt->gtNext;
4039	}
4040
4041	assert(testt == result);
4042	#endif
4043
4044	return result;
4045	}
4046
4047	/*****************************************************************************
4048	* Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
4049	*/
4050
4051	void Compiler::fgOptWhileLoop(BasicBlock* block)
4052	{
4053	noway_assert(opts.OptimizationEnabled());
4054	noway_assert(compCodeOpt() != SMALL_CODE);
4055
4056	/*
4057	Optimize while loops into do { } while loop
4058	Our loop hoisting logic requires do { } while loops.
4059	Specifically, we're looking for the following case:
4060
4061	...
4062	jmp test
4063	loop:
4064	...
4065	...
4066	test:
4067	cond
4068	jtrue loop
4069
4070	If we find this, and the condition is simple enough, we change
4071	the loop to the following:
4072
4073	...
4074	cond
4075	jfalse done
4076	// else fall-through
4077	loop:
4078	...
4079	...
4080	test:
4081	cond
4082	jtrue loop
4083	done:
4084
4085	*/
4086
4087	/ Does the BB end with an unconditional jump? /
4088
4089	if (block->bbJumpKind != BBJ_ALWAYS \|\| (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
4090	{ // It can't be one of the ones we use for our exception magic
4091	return;
4092	}
4093
4094	// It has to be a forward jump
4095	// TODO-CQ: Check if we can also optimize the backwards jump as well.
4096	//
4097	if (fgIsForwardBranch(block) == false)
4098	{
4099	return;
4100	}
4101
4102	// Get hold of the jump target
4103	BasicBlock* bTest = block->bbJumpDest;
4104
4105	// Does the block consist of 'jtrue(cond) block' ?
4106	if (bTest->bbJumpKind != BBJ_COND)
4107	{
4108	return;
4109	}
4110
4111	// bTest must be a backwards jump to block->bbNext
4112	if (bTest->bbJumpDest != block->bbNext)
4113	{
4114	return;
4115	}
4116
4117	// Since test is a BBJ_COND it will have a bbNext
4118	noway_assert(bTest->bbNext);
4119
4120	// 'block' must be in the same try region as the condition, since we're going to insert
4121	// a duplicated condition in 'block', and the condition might include exception throwing code.
4122	if (!BasicBlock::sameTryRegion(block, bTest))
4123	{
4124	return;
4125	}
4126
4127	// We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
4128	// same try region (or no try region) to avoid generating illegal flow.
4129	BasicBlock* bTestNext = bTest->bbNext;
4130	if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
4131	{
4132	return;
4133	}
4134
4135	GenTree* condStmt = optFindLoopTermTest(bTest);
4136
4137	// bTest must only contain only a jtrue with no other stmts, we will only clone
4138	// the conditional, so any other statements will not get cloned
4139	// TODO-CQ: consider cloning the whole bTest block as inserting it after block.
4140	//
4141	if (bTest->bbTreeList != condStmt)
4142	{
4143	return;
4144	}
4145
4146	/ Get to the condition node from the statement tree /
4147
4148	noway_assert(condStmt->gtOper == GT_STMT);
4149
4150	GenTree* condTree = condStmt->gtStmt.gtStmtExpr;
4151	noway_assert(condTree->gtOper == GT_JTRUE);
4152
4153	condTree = condTree->gtOp.gtOp1;
4154
4155	// The condTree has to be a RelOp comparison
4156	// TODO-CQ: Check if we can also optimize the backwards jump as well.
4157	//
4158	if (condTree->OperIsCompare() == false)
4159	{
4160	return;
4161	}
4162
4163	/ We call gtPrepareCost to measure the cost of duplicating this tree /
4164
4165	gtPrepareCost(condTree);
4166	unsigned estDupCostSz = condTree->gtCostSz;
4167
4168	double loopIterations = (double)BB_LOOP_WEIGHT;
4169
4170	bool allProfileWeightsAreValid = false;
4171	BasicBlock::weight_t weightBlock = block->bbWeight;
4172	BasicBlock::weight_t weightTest = bTest->bbWeight;
4173	BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
4174
4175	// If we have profile data then we calculate the number of time
4176	// the loop will iterate into loopIterations
4177	if (fgIsUsingProfileWeights())
4178	{
4179	// Only rely upon the profile weight when all three of these blocks
4180	// have good profile weights
4181	if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
4182	{
4183	allProfileWeightsAreValid = true;
4184
4185	// If this while loop never iterates then don't bother transforming
4186	if (weightNext == `0`)
4187	{
4188	return;
4189	}
4190
4191	// with (weighNext > 0) we should also have (weightTest >= weightBlock)
4192	// if the profile weights are all valid.
4193	//
4194	// weightNext is the number of time this loop iterates
4195	// weightBlock is the number of times that we enter the while loop
4196	// loopIterations is the average number of times that this loop iterates
4197	//
4198	if (weightTest >= weightBlock)
4199	{
4200	loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
4201	}
4202	}
4203	}
4204
4205	unsigned maxDupCostSz = `32`;
4206
4207	// optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
4208	// set loop weights yet
4209	if ((compCodeOpt() == FAST_CODE) \|\| compStressCompile(STRESS_DO_WHILE_LOOPS, `30`))
4210	{
4211	maxDupCostSz *= `4`;
4212	}
4213
4214	// If this loop iterates a lot then raise the maxDupCost
4215	if (loopIterations >= `12.0`)
4216	{
4217	maxDupCostSz *= `2`;
4218	}
4219	if (loopIterations >= `96.0`)
4220	{
4221	maxDupCostSz *= `2`;
4222	}
4223
4224	// If the loop condition has a shared static helper, we really want this loop converted
4225	// as not converting the loop will disable loop hoisting, meaning the shared helper will
4226	// be executed on every loop iteration.
4227	int countOfHelpers = `0`;
4228	fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
4229
4230	if (countOfHelpers > `0` && compCodeOpt() != SMALL_CODE)
4231	{
4232	maxDupCostSz += `24` * min(countOfHelpers, (int)(loopIterations + `1.5`));
4233	}
4234
4235	// If the compare has too high cost then we don't want to dup
4236
4237	bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
4238
4239	#ifdef DEBUG
4240	if (verbose)
4241	{
4242	printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
4243	"\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
4244	condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
4245	costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
4246	allProfileWeightsAreValid ? "true" : "false");
4247	}
4248	#endif
4249
4250	if (costIsTooHigh)
4251	{
4252	return;
4253	}
4254
4255	/ Looks good - duplicate the condition test /
4256
4257	condTree->gtFlags \|= GTF_RELOP_ZTT;
4258
4259	condTree = gtCloneExpr(condTree);
4260	gtReverseCond(condTree);
4261
4262	// Make sure clone expr copied the flag
4263	assert(condTree->gtFlags & GTF_RELOP_ZTT);
4264
4265	condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
4266
4267	/ Create a statement entry out of the condition and*
4268	append the condition test at the end of 'block' /*
4269
4270	GenTree* copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
4271
4272	copyOfCondStmt->gtFlags \|= GTF_STMT_CMPADD;
4273
4274	if (opts.compDbgInfo)
4275	{
4276	copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
4277	}
4278
4279	// Flag the block that received the copy as potentially having an array/vtable
4280	// reference if the block copied from did; this is a conservative guess.
4281	if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF \| BBF_HAS_IDX_LEN))
4282	{
4283	block->bbFlags \|= copyFlags;
4284	}
4285
4286	// If we have profile data for all blocks and we know that we are cloning the
4287	// bTest block into block and thus changing the control flow from block so
4288	// that it no longer goes directly to bTest anymore, we have to adjust the
4289	// weight of bTest by subtracting out the weight of block.
4290	//
4291	if (allProfileWeightsAreValid)
4292	{
4293	//
4294	// Some additional sanity checks before adjusting the weight of bTest
4295	//
4296	if ((weightNext > `0`) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
4297	{
4298	// Get the two edge that flow out of bTest
4299	flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
4300	flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
4301
4302	// Calculate the new weight for block bTest
4303
4304	BasicBlock::weight_t newWeightTest =
4305	(weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
4306	bTest->bbWeight = newWeightTest;
4307
4308	if (newWeightTest == BB_ZERO_WEIGHT)
4309	{
4310	bTest->bbFlags \|= BBF_RUN_RARELY;
4311	// All out edge weights are set to zero
4312	edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4313	edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
4314	edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4315	edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
4316	}
4317	else
4318	{
4319	// Update the our edge weights
4320	edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4321	edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
4322	edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4323	edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
4324	}
4325	}
4326	}
4327
4328	/ Change the block to end with a conditional jump /
4329
4330	block->bbJumpKind = BBJ_COND;
4331	block->bbJumpDest = bTest->bbNext;
4332
4333	/ Mark the jump dest block as being a jump target /
4334	block->bbJumpDest->bbFlags \|= BBF_JMP_TARGET \| BBF_HAS_LABEL;
4335
4336	/ Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' /
4337
4338	fgAddRefPred(block->bbNext, block);
4339
4340	fgRemoveRefPred(bTest, block);
4341	fgAddRefPred(bTest->bbNext, block);
4342
4343	#ifdef DEBUG
4344	if (verbose)
4345	{
4346	printf("\nDuplicating loop condition in " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")", block->bbNum,
4347	block->bbNext->bbNum, bTest->bbNum);
4348	printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
4349
4350	gtDispTree(copyOfCondStmt);
4351	}
4352
4353	#endif
4354	}
4355
4356	/*****************************************************************************
4357	*
4358	* Optimize the BasicBlock layout of the method
4359	*/
4360
4361	void Compiler::optOptimizeLayout()
4362	{
4363	noway_assert(opts.OptimizationEnabled());
4364
4365	#ifdef DEBUG
4366	if (verbose)
4367	{
4368	printf("*************** In optOptimizeLayout()\n");
4369	fgDispHandlerTab();
4370	}
4371
4372	/ Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date /
4373	fgDebugCheckBBlist();
4374	#endif
4375
4376	noway_assert(fgModified == false);
4377
4378	for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
4379	{
4380	/ Make sure the appropriate fields are initialized /
4381
4382	if (block->bbWeight == BB_ZERO_WEIGHT)
4383	{
4384	/ Zero weighted block can't have a LOOP_HEAD flag /
4385	noway_assert(block->isLoopHead() == false);
4386	continue;
4387	}
4388
4389	assert(block->bbLoopNum == `0`);
4390
4391	if (compCodeOpt() != SMALL_CODE)
4392	{
4393	/ Optimize "while(cond){}" loops to "cond; do{}while(cond);" /
4394
4395	fgOptWhileLoop(block);
4396	}
4397	}
4398
4399	if (fgModified)
4400	{
4401	// Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
4402	fgComputeEdgeWeights();
4403	}
4404
4405	fgUpdateFlowGraph(true);
4406	fgReorderBlocks();
4407	fgUpdateFlowGraph();
4408	}
4409
4410	/*****************************************************************************
4411	*
4412	* Perform loop inversion, find and classify natural loops
4413	*/
4414
4415	void Compiler::optOptimizeLoops()
4416	{
4417	noway_assert(opts.OptimizationEnabled());
4418
4419	#ifdef DEBUG
4420	if (verbose)
4421	{
4422	printf("*************** In optOptimizeLoops()\n");
4423	}
4424	#endif
4425
4426	optSetBlockWeights();
4427
4428	/ Were there any loops in the flow graph? /
4429
4430	if (fgHasLoops)
4431	{
4432	/ now that we have dominator information we can find loops /
4433
4434	optFindNaturalLoops();
4435
4436	unsigned loopNum = `0`;
4437
4438	/ Iterate over the flow graph, marking all loops /
4439
4440	/ We will use the following terminology:*
4441	* top - the first basic block in the loop (i.e. the head of the backward edge)
4442	* bottom - the last block in the loop (i.e. the block from which we jump to the top)
4443	* lastBottom - used when we have multiple back-edges to the same top
4444	*/
4445
4446	flowList* pred;
4447
4448	BasicBlock* top;
4449
4450	for (top = fgFirstBB; top; top = top->bbNext)
4451	{
4452	BasicBlock* foundBottom = nullptr;
4453
4454	for (pred = top->bbPreds; pred; pred = pred->flNext)
4455	{
4456	/ Is this a loop candidate? - We look for "back edges" /
4457
4458	BasicBlock* bottom = pred->flBlock;
4459
4460	/ is this a backward edge? (from BOTTOM to TOP) /
4461
4462	if (top->bbNum > bottom->bbNum)
4463	{
4464	continue;
4465	}
4466
4467	/ 'top' also must have the BBF_LOOP_HEAD flag set /
4468
4469	if (top->isLoopHead() == false)
4470	{
4471	continue;
4472	}
4473
4474	/ We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops /
4475
4476	if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
4477	{
4478	continue;
4479	}
4480
4481	/ the top block must be able to reach the bottom block /
4482	if (!fgReachable(top, bottom))
4483	{
4484	continue;
4485	}
4486
4487	/ Found a new loop, record the longest backedge in foundBottom /
4488
4489	if ((foundBottom == nullptr) \|\| (bottom->bbNum > foundBottom->bbNum))
4490	{
4491	foundBottom = bottom;
4492	}
4493	}
4494
4495	if (foundBottom)
4496	{
4497	loopNum++;
4498	#ifdef DEBUG
4499	/ Mark the loop header as such /
4500	assert(FitsIn<unsigned char>(loopNum));
4501	top->bbLoopNum = (unsigned char)loopNum;
4502	#endif
4503
4504	/ Mark all blocks between 'top' and 'bottom' /
4505
4506	optMarkLoopBlocks(top, foundBottom, false);
4507	}
4508
4509	// We track at most 255 loops
4510	if (loopNum == `255`)
4511	{
4512	#if COUNT_LOOPS
4513	totalUnnatLoopOverflows++;
4514	#endif
4515	break;
4516	}
4517	}
4518
4519	#if COUNT_LOOPS
4520	totalUnnatLoopCount += loopNum;
4521	#endif
4522
4523	#ifdef DEBUG
4524	if (verbose)
4525	{
4526	if (loopNum > `0`)
4527	{
4528	printf("\nFound a total of %d loops.", loopNum);
4529	printf("\nAfter loop weight marking:\n");
4530	fgDispBasicBlocks();
4531	printf("\n");
4532	}
4533	}
4534	#endif
4535	optLoopsMarked = true;
4536	}
4537	}
4538
4539	//------------------------------------------------------------------------
4540	// optDeriveLoopCloningConditions: Derive loop cloning conditions.
4541	//
4542	// Arguments:
4543	// loopNum - the current loop index for which conditions are derived.
4544	// context - data structure where all loop cloning info is kept.
4545	//
4546	// Return Value:
4547	// "false" if conditions cannot be obtained. "true" otherwise.
4548	// The cloning conditions are updated in the "conditions"[loopNum] field
4549	// of the "context" parameter.
4550	//
4551	// Operation:
4552	// Inspect the loop cloning optimization candidates and populate the conditions necessary
4553	// for each optimization candidate. Checks if the loop stride is "> 0" if the loop
4554	// condition is "less than". If the initializer is "var" init then adds condition
4555	// "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
4556	// are added to "context". These conditions are checked in the pre-header block
4557	// and the cloning choice is made.
4558	//
4559	// Assumption:
4560	// Callers should assume AND operation is used i.e., if all conditions are
4561	// true, then take the fast path.
4562	//
4563	bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
4564	{
4565	JITDUMP("------------------------------------------------------------\n");
4566	JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
4567
4568	LoopDsc* loop = &optLoopTable[loopNum];
4569	JitExpandArrayStack<LcOptInfo> optInfos = context->GetLoopOptInfo(loopNum);
4570
4571	if (loop->lpTestOper() == GT_LT)
4572	{
4573	// Stride conditions
4574	if (loop->lpIterConst() <= `0`)
4575	{
4576	JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
4577	return false;
4578	}
4579
4580	// Init conditions
4581	if (loop->lpFlags & LPFLG_CONST_INIT)
4582	{
4583	// Only allowing const init at this time.
4584	if (loop->lpConstInit < `0`)
4585	{
4586	JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
4587	return false;
4588	}
4589	}
4590	else if (loop->lpFlags & LPFLG_VAR_INIT)
4591	{
4592	// limitVar >= 0
4593	LC_Condition geZero(GT_GE, LC_Expr (LC_Ident (loop->lpVarInit, LC_Ident::Var)),
4594	LC_Expr (LC_Ident (`0`, LC_Ident::Const)));
4595	context->EnsureConditions(loopNum)->Push(geZero);
4596	}
4597	else
4598	{
4599	JITDUMP("> Not variable init\n");
4600	return false;
4601	}
4602
4603	// Limit Conditions
4604	LC_Ident ident;
4605	if (loop->lpFlags & LPFLG_CONST_LIMIT)
4606	{
4607	int limit = loop->lpConstLimit();
4608	if (limit < `0`)
4609	{
4610	JITDUMP("> limit %d is invalid\n", limit);
4611	return false;
4612	}
4613	ident = LC_Ident (static_cast<unsigned>(limit), LC_Ident::Const);
4614	}
4615	else if (loop->lpFlags & LPFLG_VAR_LIMIT)
4616	{
4617	unsigned limitLcl = loop->lpVarLimit();
4618	ident = LC_Ident (limitLcl, LC_Ident::Var);
4619
4620	LC_Condition geZero(GT_GE, LC_Expr (ident), LC_Expr (LC_Ident (`0`, LC_Ident::Const)));
4621
4622	context->EnsureConditions(loopNum)->Push(geZero);
4623	}
4624	else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4625	{
4626	ArrIndex* index = new (getAllocator()) ArrIndex (getAllocator());
4627	if (!loop->lpArrLenLimit(this, index))
4628	{
4629	JITDUMP("> ArrLen not matching");
4630	return false;
4631	}
4632	ident = LC_Ident (LC_Array (LC_Array::Jagged, index, LC_Array::ArrLen));
4633
4634	// Ensure that this array must be dereference-able, before executing the actual condition.
4635	LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4636	context->EnsureDerefs(loopNum)->Push(array);
4637	}
4638	else
4639	{
4640	JITDUMP("> Undetected limit\n");
4641	return false;
4642	}
4643
4644	for (unsigned i = `0`; i < optInfos->Size(); ++i)
4645	{
4646	LcOptInfo* optInfo = optInfos->GetRef(i);
4647	switch (optInfo->GetOptType())
4648	{
4649	case LcOptInfo::LcJaggedArray:
4650	{
4651	// limit <= arrLen
4652	LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4653	LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4654	LC_Ident arrLenIdent = LC_Ident (arrLen);
4655
4656	LC_Condition cond(GT_LE, LC_Expr (ident), LC_Expr (arrLenIdent));
4657	context->EnsureConditions(loopNum)->Push(cond);
4658
4659	// Ensure that this array must be dereference-able, before executing the actual condition.
4660	LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4661	context->EnsureDerefs(loopNum)->Push(array);
4662	}
4663	break;
4664	case LcOptInfo::LcMdArray:
4665	{
4666	// limit <= mdArrLen
4667	LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4668	LC_Condition cond(GT_LE, LC_Expr (ident),
4669	LC_Expr (LC_Ident (LC_Array (LC_Array::MdArray,
4670	mdArrInfo->GetArrIndexForDim(getAllocator()),
4671	mdArrInfo->dim, LC_Array::None))));
4672	context->EnsureConditions(loopNum)->Push(cond);
4673	}
4674	break;
4675
4676	default:
4677	JITDUMP("Unknown opt\n");
4678	return false;
4679	}
4680	}
4681	JITDUMP("Conditions: (");
4682	DBEXEC(verbose, context->PrintConditions(loopNum));
4683	JITDUMP(")\n");
4684	return true;
4685	}
4686	return false;
4687	}
4688
4689	//------------------------------------------------------------------------------------
4690	// optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4691	//
4692	// Arguments:
4693	// loopNum - the current loop index for which conditions are derived.
4694	// context - data structure where all loop cloning info is kept.
4695	//
4696	// Return Value:
4697	// "false" if conditions cannot be obtained. "true" otherwise.
4698	// The deref conditions are updated in the "derefConditions"[loopNum] field
4699	// of the "context" parameter.
4700	//
4701	// Definition of Deref Conditions:
4702	// To be able to check for the loop cloning condition that (limitVar <= a.len)
4703	// we should first be able to dereference "a". i.e., "a" is non-null.
4704	//
4705	// Example:
4706	//
4707	// for (i in 0..n)
4708	// for (j in 0..n)
4709	// for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4710	// // (n <= a[i][j].len) and other safer conditions to take the fast path
4711	// a[i][j][k] = 0;
4712	//
4713	// Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4714	// This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4715	// be true to do the deref.
4716	//
4717	// (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4718	//
4719	// Note the short circuiting AND. Implication: these conditions should be performed in separate
4720	// blocks each of which will branch to slow path if the condition evaluates to false.
4721	//
4722	// Now, imagine a situation where we have
4723	// a[x][y][k] = 20 and a[i][j][k] = 0
4724	// also in the inner most loop where x, y are parameters, then our conditions will have
4725	// to include
4726	// (x < a.len) &&
4727	// (y < a[x].len)
4728	// in addition to the above conditions (1) to get rid of bounds check on index 'k'
4729	//
4730	// But these conditions can be checked together with conditions
4731	// (i < a.len) without a need for a separate block. In summary, the conditions will be:
4732	//
4733	// (a != null) &&
4734	// ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4735	// (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4736	// (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4737	// (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4738	//
4739	// This naturally yields a tree style pattern, where the nodes of the tree are
4740	// the array and indices respectively.
4741	//
4742	// Example:
4743	// a => {
4744	// i => {
4745	// j => {
4746	// k => {}
4747	// }
4748	// },
4749	// x => {
4750	// y => {
4751	// k => {}
4752	// }
4753	// }
4754	// }
4755	//
4756	// Notice that the variables in the same levels can have their conditions combined in the
4757	// same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4758	// combined with a short-circuiting AND (i.e., different basic blocks).
4759	//
4760	// Operation:
4761	// Construct a tree of array indices and the array which will generate the optimal
4762	// conditions for loop cloning.
4763	//
4764	// a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4765	//
4766	// a => {
4767	// i => {
4768	// j => {
4769	// k => {}
4770	// },
4771	// y => {
4772	// k => {}
4773	// },
4774	// }
4775	// },
4776	// b => {
4777	// i => {}
4778	// }
4779	// In this method, we will construct such a tree by descending depth first into the array
4780	// index operation and forming a tree structure as we encounter the array or the index variables.
4781	//
4782	// This tree structure will then be used to generate conditions like below:
4783	// (a != null) & (b != null) && // from the first level of the tree.
4784	//
4785	// (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4786	// (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4787	//
4788	// (j < a[i].len) & (y < a[i].len) && // from the third level.
4789	// (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4790	//
4791	// and so on.
4792	//
4793	//
4794	bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4795	{
4796	JitExpandArrayStack<LC_Deref*> nodes(getAllocator());
4797	int maxRank = -`1`;
4798
4799	// Get the dereference-able arrays.
4800	JitExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4801
4802	// For each array in the dereference list, construct a tree,
4803	// where the nodes are array and index variables and an edge 'u-v'
4804	// exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4805	// 'u-v-w' transitively if u[v][w] occurs.
4806	for (unsigned i = `0`; i < deref->Size(); ++i)
4807	{
4808	LC_Array& array = (*deref)[i];
4809
4810	// First populate the array base variable.
4811	LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4812	if (node == nullptr)
4813	{
4814	node = new (getAllocator()) LC_Deref (array, `0` /level/);
4815	nodes.Push(node);
4816	}
4817
4818	// For each dimension (level) for the array, populate the tree with the variable
4819	// from that dimension.
4820	unsigned rank = (unsigned)array.GetDimRank();
4821	for (unsigned i = `0`; i < rank; ++i)
4822	{
4823	node->EnsureChildren(getAllocator());
4824	LC_Deref* tmp = node->Find(array.arrIndex->indLcls [i]);
4825	if (tmp == nullptr)
4826	{
4827	tmp = new (getAllocator()) LC_Deref (array, node->level + `1`);
4828	node->children->Push(tmp);
4829	}
4830
4831	// Descend one level down.
4832	node = tmp;
4833	}
4834
4835	// Keep the maxRank of all array dereferences.
4836	maxRank = max((int)rank, maxRank);
4837	}
4838
4839	#ifdef DEBUG
4840	if (verbose)
4841	{
4842	for (unsigned i = `0`; i < nodes.Size(); ++i)
4843	{
4844	if (i != `0`)
4845	{
4846	printf(",");
4847	}
4848	nodes [i]->Print();
4849	printf("\n");
4850	}
4851	}
4852	#endif
4853
4854	if (maxRank == -`1`)
4855	{
4856	return false;
4857	}
4858
4859	// First level will always yield the null-check, since it is made of the array base variables.
4860	// All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4861	// So add 1 after rank 2.*
4862	unsigned condBlocks = (unsigned)maxRank * `2` + `1`;
4863
4864	// Heuristic to not create too many blocks;
4865	if (condBlocks > `4`)
4866	{
4867	return false;
4868	}
4869
4870	// Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4871	JitExpandArrayStack<JitExpandArrayStack<LC_Condition>> levelCond =
4872	context->EnsureBlockConditions(loopNum, condBlocks);
4873	for (unsigned i = `0`; i < nodes.Size(); ++i)
4874	{
4875	nodes [i]->DeriveLevelConditions(levelCond);
4876	}
4877
4878	DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4879	return true;
4880	}
4881
4882	#ifdef DEBUG
4883	//----------------------------------------------------------------------------
4884	// optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4885	//
4886	// Arguments:
4887	// block - the block in which the helper call needs to be inserted.
4888	// insertBefore - the tree before which the helper call will be inserted.
4889	//
4890	void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTree* insertBefore)
4891	{
4892	if (JitConfig.JitDebugLogLoopCloning() == `0`)
4893	{
4894	return;
4895	}
4896	GenTree* logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4897	GenTree* stmt = fgNewStmtFromTree(logCall);
4898	fgInsertStmtBefore(block, insertBefore, stmt);
4899	fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning"));
4900	}
4901	#endif
4902
4903	//------------------------------------------------------------------------
4904	// optPerformStaticOptimizations: Perform the optimizations for the optimization
4905	// candidates gathered during the cloning phase.
4906	//
4907	// Arguments:
4908	// loopNum - the current loop index for which the optimizations are performed.
4909	// context - data structure where all loop cloning info is kept.
4910	// dynamicPath - If true, the optimization is performed in the fast path among the
4911	// cloned loops. If false, it means this is the only path (i.e.,
4912	// there is no slow path.)
4913	//
4914	// Operation:
4915	// Perform the optimizations on the fast path i.e., the path in which the
4916	// optimization candidates were collected at the time of identifying them.
4917	// The candidates store all the information necessary (the tree/stmt/block
4918	// they are from) to perform the optimization.
4919	//
4920	// Assumption:
4921	// The unoptimized path is either already cloned when this method is called or
4922	// there is no unoptimized path (got eliminated statically.) So this method
4923	// performs the optimizations assuming that the path in which the candidates
4924	// were collected is the fast path in which the optimizations will be performed.
4925	//
4926	void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4927	{
4928	JitExpandArrayStack<LcOptInfo> optInfos = context->GetLoopOptInfo(loopNum);
4929	for (unsigned i = `0`; i < optInfos->Size(); ++i)
4930	{
4931	LcOptInfo* optInfo = optInfos->GetRef(i);
4932	switch (optInfo->GetOptType())
4933	{
4934	case LcOptInfo::LcJaggedArray:
4935	{
4936	LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4937	compCurBB = arrIndexInfo->arrIndex.useBlock;
4938	optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks [arrIndexInfo->dim], arrIndexInfo->stmt);
4939	DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4940	}
4941	break;
4942	case LcOptInfo::LcMdArray:
4943	// TODO-CQ: CLONE: Implement.
4944	break;
4945	default:
4946	break;
4947	}
4948	}
4949	}
4950
4951	//----------------------------------------------------------------------------
4952	// optCanCloneLoops: Use the environment flag to determine whether loop
4953	// cloning is allowed to be performed.
4954	//
4955	// Return Value:
4956	// Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4957	// Disabled for retail for now.
4958	//
4959	bool Compiler::optCanCloneLoops()
4960	{
4961	// Enabled for retail builds now.
4962	unsigned cloneLoopsFlag = `1`;
4963	#ifdef DEBUG
4964	cloneLoopsFlag = JitConfig.JitCloneLoops();
4965	#endif
4966	return (cloneLoopsFlag != `0`);
4967	}
4968
4969	//----------------------------------------------------------------------------
4970	// optIsLoopClonable: Determine whether this loop can be cloned.
4971	//
4972	// Arguments:
4973	// loopInd loop index which needs to be checked if it can be cloned.
4974	//
4975	// Return Value:
4976	// Returns true if the loop can be cloned. If it returns false
4977	// prints a message in debug as why the loop can't be cloned.
4978	//
4979	bool Compiler::optIsLoopClonable(unsigned loopInd)
4980	{
4981	// First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4982	// inserting new EH regions in the exception table yet.
4983	BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4984	unsigned loopRetCount = `0`;
4985	for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4986	{
4987	if (blk->bbJumpKind == BBJ_RETURN)
4988	{
4989	loopRetCount++;
4990	}
4991	if (bbIsTryBeg(blk))
4992	{
4993	JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4994	return false;
4995	}
4996	}
4997
4998	// Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4999	// into the middle of a handler (to go to the cloned copy.) Reject.
5000	if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
5001	{
5002	JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
5003	return false;
5004	}
5005
5006	// If the head and entry are in different EH regions, reject.
5007	if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
5008	{
5009	JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
5010	return false;
5011	}
5012
5013	// Is the first block after the last block of the loop a handler or filter start?
5014	// Usually, we create a dummy block after the orginal loop, to skip over the loop clone
5015	// and go to where the original loop did. That raises problems when we don't actually go to
5016	// that block; this is one of those cases. This could be fixed fairly easily; for example,
5017	// we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
5018	// loop. This is just a corner to cut to get this working faster.
5019	BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
5020	if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
5021	{
5022	JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
5023	return false;
5024	}
5025
5026	// We've previously made a decision whether to have separate return epilogs, or branch to one.
5027	// There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate
5028	// epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited
5029	// (or return blocks should not be considered part of the loop, rendering this issue moot).
5030	unsigned epilogLimit = `4`;
5031	#ifdef JIT32_GCENCODER
5032	epilogLimit = SET_EPILOGCNT_MAX;
5033	#endif // JIT32_GCENCODER
5034	if (fgReturnCount + loopRetCount > epilogLimit)
5035	{
5036	JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
5037	"would exceed the limit of %d.\n",
5038	loopRetCount, fgReturnCount, epilogLimit);
5039	return false;
5040	}
5041
5042	// Otherwise, we're going to add those return blocks.
5043	fgReturnCount += loopRetCount;
5044
5045	return true;
5046	}
5047
5048	/*****************************************************************************
5049	*
5050	* Identify loop cloning opportunities, derive loop cloning conditions,
5051	* perform loop cloning, use the derived conditions to choose which
5052	* path to take.
5053	*/
5054	void Compiler::optCloneLoops()
5055	{
5056	JITDUMP("\n*************** In optCloneLoops()\n");
5057	if (optLoopCount == `0` \|\| !optCanCloneLoops())
5058	{
5059	return;
5060	}
5061
5062	#ifdef DEBUG
5063	if (verbose)
5064	{
5065	printf("Blocks/Trees at start of phase\n");
5066	fgDispBasicBlocks(true);
5067	}
5068	#endif
5069
5070	LoopCloneContext context(optLoopCount, getAllocator());
5071
5072	// Obtain array optimization candidates in the context.
5073	optObtainLoopCloningOpts(&context);
5074
5075	// For each loop, derive cloning conditions for the optimization candidates.
5076	for (unsigned i = `0`; i < optLoopCount; ++i)
5077	{
5078	JitExpandArrayStack<LcOptInfo> optInfos = context.GetLoopOptInfo(i);
5079	if (optInfos == nullptr)
5080	{
5081	continue;
5082	}
5083
5084	if (!optDeriveLoopCloningConditions(i, &context) \|\| !optComputeDerefConditions(i, &context))
5085	{
5086	JITDUMP("> Conditions could not be obtained\n");
5087	context.CancelLoopOptInfo(i);
5088	}
5089	else
5090	{
5091	bool allTrue = false;
5092	bool anyFalse = false;
5093	context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
5094	if (anyFalse)
5095	{
5096	context.CancelLoopOptInfo(i);
5097	}
5098	if (allTrue)
5099	{
5100	// Perform static optimizations on the fast path since we always
5101	// have to take the cloned path.
5102	optPerformStaticOptimizations(i, &context DEBUGARG(false));
5103
5104	// No need to clone.
5105	context.CancelLoopOptInfo(i);
5106	}
5107	}
5108	}
5109
5110	#if 0
5111	// The code in this #if has been useful in debugging loop cloning issues, by
5112	// enabling selective enablement of the loop cloning optimization according to
5113	// method hash.
5114	#ifdef DEBUG
5115	unsigned methHash = info.compMethodHash();
5116	char* lostr = getenv("loopclonehashlo");
5117	unsigned methHashLo = `0`;
5118	if (lostr != NULL)
5119	{
5120	sscanf_s(lostr, "%x", &methHashLo);
5121	// methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5122	}
5123	char* histr = getenv("loopclonehashhi");
5124	unsigned methHashHi = UINT32_MAX;
5125	if (histr != NULL)
5126	{
5127	sscanf_s(histr, "%x", &methHashHi);
5128	// methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5129	}
5130	if (methHash < methHashLo \|\| methHash > methHashHi)
5131	return;
5132	#endif
5133	#endif
5134
5135	for (unsigned i = `0`; i < optLoopCount; ++i)
5136	{
5137	if (context.GetLoopOptInfo(i) != nullptr)
5138	{
5139	optLoopsCloned++;
5140	context.OptimizeConditions(i DEBUGARG(verbose));
5141	context.OptimizeBlockConditions(i DEBUGARG(verbose));
5142	optCloneLoop(i, &context);
5143	}
5144	}
5145
5146	#ifdef DEBUG
5147	if (verbose)
5148	{
5149	printf("\nAfter loop cloning:\n");
5150	fgDispBasicBlocks(/dumpTrees/ true);
5151	}
5152	#endif
5153	}
5154
5155	void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
5156	{
5157	assert(loopInd < optLoopCount);
5158
5159	JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
5160	optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
5161	optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
5162
5163	// Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
5164	unsigned depth = optLoopDepth(loopInd);
5165	unsigned ambientWeight = `1`;
5166	for (unsigned j = `0`; j < depth; j++)
5167	{
5168	unsigned lastWeight = ambientWeight;
5169	ambientWeight *= BB_LOOP_WEIGHT;
5170	// If the multiplication overflowed, stick at max.
5171	// (Strictly speaking, a multiplication could overflow and still have a result
5172	// that is >= lastWeight...but if so, the original weight must be pretty large,
5173	// and it got bigger, so that's OK.)
5174	if (ambientWeight < lastWeight)
5175	{
5176	ambientWeight = BB_MAX_WEIGHT;
5177	break;
5178	}
5179	}
5180
5181	// If we're in a non-natural loop, the ambient weight might be higher than we computed above.
5182	// Be safe by taking the max with the head block's weight.
5183	ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
5184
5185	// This is the containing loop, if any -- to label any blocks we create that are outside
5186	// the loop being cloned.
5187	unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
5188
5189	// First, make sure that the loop has a unique header block, creating an empty one if necessary.
5190	optEnsureUniqueHead(loopInd, ambientWeight);
5191
5192	// We're going to make
5193
5194	// H --> E
5195	// F
5196	// T
5197	// E
5198	// B ?-> T
5199	// X
5200	//
5201	// become
5202	//
5203	// H ?-> E2
5204	// H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
5205	// F
5206	// T
5207	// E
5208	// B ?-> T
5209	// X2--> X
5210	// F2
5211	// T2
5212	// E2
5213	// B2 ?-> T2
5214	// X
5215
5216	BasicBlock* h = optLoopTable[loopInd].lpHead;
5217	if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
5218	{
5219	// Make a new block to be the unique entry to the loop.
5220	assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
5221	BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
5222	/extendRegion/ true);
5223	newH->bbWeight = (newH->isRunRarely() ? `0` : ambientWeight);
5224	BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
5225	// This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5226	newH->bbNatLoopNum = ambientLoop;
5227	h = newH;
5228	optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
5229	}
5230
5231	// First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
5232	// "newPred" will be the predecessor of the blocks of the cloned loop.
5233	BasicBlock* b = optLoopTable[loopInd].lpBottom;
5234	BasicBlock* newPred = b;
5235	if (b->bbJumpKind != BBJ_ALWAYS)
5236	{
5237	BasicBlock* x = b->bbNext;
5238	if (x != nullptr)
5239	{
5240	BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /extendRegion/ true);
5241	x2->bbWeight = (x2->isRunRarely() ? `0` : ambientWeight);
5242
5243	// This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5244	x2->bbNatLoopNum = ambientLoop;
5245
5246	x2->bbJumpDest = x;
5247	BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
5248	newPred = x2;
5249	}
5250	}
5251
5252	// Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
5253	// so that "h" already falls through to "e" (e == t == f).
5254	BasicBlock* h2 = nullptr;
5255	if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
5256	{
5257	BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
5258	/extendRegion/ true);
5259	h2->bbWeight = (h2->isRunRarely() ? `0` : ambientWeight);
5260
5261	// This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5262	h2->bbNatLoopNum = ambientLoop;
5263
5264	h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
5265	optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5266	}
5267
5268	// Now we'll clone the blocks of the loop body.
5269	BasicBlock* newFirst = nullptr;
5270	BasicBlock* newBot = nullptr;
5271
5272	BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap (getAllocator());
5273	for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5274	blk = blk->bbNext)
5275	{
5276	BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
5277	/extendRegion/ true);
5278
5279	// Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
5280	// has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
5281	// checked them to guarantee they are clonable.
5282	bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
5283	noway_assert(cloneOk);
5284	// TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
5285	// the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
5286	// loop, if one exists -- the parent of the loop we're cloning.
5287	newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5288
5289	if (newFirst == nullptr)
5290	{
5291	newFirst = newBlk;
5292	}
5293	newBot = newBlk; // Continually overwrite to make sure we get the last one.
5294	newPred = newBlk;
5295	blockMap->Set(blk, newBlk);
5296	}
5297
5298	// Perform the static optimizations on the fast path.
5299	optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
5300
5301	// Now go through the new blocks, remapping their jump targets within the loop.
5302	for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5303	blk = blk->bbNext)
5304	{
5305
5306	BasicBlock* newblk = nullptr;
5307	bool b = blockMap->Lookup(blk, &newblk);
5308	assert(b && newblk != nullptr);
5309
5310	assert(blk->bbJumpKind == newblk->bbJumpKind);
5311
5312	// First copy the jump destination(s) from "blk".
5313	optCopyBlkDest(blk, newblk);
5314
5315	// Now redirect the new block according to "blockMap".
5316	optRedirectBlock(newblk, blockMap);
5317	}
5318
5319	assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 \|\| h->bbNext == optLoopTable[loopInd].lpEntry)) \|\|
5320	(h->bbJumpKind == BBJ_ALWAYS));
5321
5322	// If all the conditions are true, go to E2.
5323	BasicBlock* e2 = nullptr;
5324	bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
5325
5326	h->bbJumpKind = BBJ_COND;
5327
5328	// We will create the following structure
5329	//
5330	// cond0 (in h) -?> cond1
5331	// slow --> e2 (slow) always
5332	// !cond1 -?> slow
5333	// !cond2 -?> slow
5334	// ...
5335	// !condn -?> slow
5336	// h2/entry (fast)
5337	//
5338	// We should always have block conditions, at the minimum, the array should be deref-able
5339	assert(context->HasBlockConditions(loopInd));
5340
5341	// Create a unique header for the slow path.
5342	BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
5343	slowHead->bbWeight = (h->isRunRarely() ? `0` : ambientWeight);
5344	slowHead->bbNatLoopNum = ambientLoop;
5345	slowHead->bbJumpDest = e2;
5346
5347	BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
5348	condLast->bbJumpDest = slowHead;
5349
5350	// If h2 is present it is already the head or replace 'h' by 'condLast'.
5351	if (h2 == nullptr)
5352	{
5353	optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
5354	}
5355	assert(foundIt && e2 != nullptr);
5356
5357	// Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to
5358	// initialize the loop counter immediately before entering the loop, but we've left a shared
5359	// initialization of the loop counter up above the test that determines which version of the
5360	// loop to take.
5361	optLoopTable[loopInd].lpFlags \|= LPFLG_DONT_UNROLL;
5362
5363	fgUpdateChangedFlowGraph();
5364	}
5365
5366	//--------------------------------------------------------------------------------------------------
5367	// optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
5368	//
5369	// Arguments:
5370	// context loop cloning context variable
5371	// loopNum the loop index
5372	// head loop head for "loopNum"
5373	// slowHead the slow path loop head
5374	//
5375	// Return Values:
5376	// None.
5377	//
5378	// Operation:
5379	// Create the following structure.
5380	//
5381	// Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
5382	// condn cannot jtrue to loop head h2. It has to be from a direct pred block.
5383	//
5384	// cond0 (in h) -?> cond1
5385	// slowHead --> e2 (slowHead) always
5386	// !cond1 -?> slowHead
5387	// !cond2 -?> slowHead
5388	// ...
5389	// !condn -?> slowHead
5390	// h2/entry (fast)
5391	//
5392	// Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
5393	//
5394	BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
5395	unsigned loopNum,
5396	BasicBlock* head,
5397	BasicBlock* slowHead)
5398	{
5399	JITDUMP("Inserting loop cloning conditions\n");
5400	assert(context->HasBlockConditions(loopNum));
5401
5402	BasicBlock* curCond = head;
5403	JitExpandArrayStack<JitExpandArrayStack<LC_Condition>> levelCond = context->GetBlockConditions(loopNum);
5404	for (unsigned i = `0`; i < levelCond->Size(); ++i)
5405	{
5406	bool isHeaderBlock = (curCond == head);
5407
5408	// Flip the condition if header block.
5409	context->CondToStmtInBlock(this, ((levelCond)[i]), curCond, isHeaderBlock);
5410
5411	// Create each condition block ensuring wiring between them.
5412	BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
5413	curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
5414	curCond = tmp;
5415
5416	curCond->inheritWeight(head);
5417	curCond->bbNatLoopNum = head->bbNatLoopNum;
5418	JITDUMP("Created new " FMT_BB " for new level\n", curCond->bbNum);
5419	}
5420
5421	// Finally insert cloning conditions after all deref conditions have been inserted.
5422	context->CondToStmtInBlock(this, (context->GetConditions(loopNum)), curCond, false*);
5423	return curCond;
5424	}
5425
5426	void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
5427	{
5428	BasicBlock* h = optLoopTable[loopInd].lpHead;
5429	BasicBlock* t = optLoopTable[loopInd].lpTop;
5430	BasicBlock* e = optLoopTable[loopInd].lpEntry;
5431	BasicBlock* b = optLoopTable[loopInd].lpBottom;
5432
5433	// If "h" dominates the entry block, then it is the unique header.
5434	if (fgDominate(h, e))
5435	{
5436	return;
5437	}
5438
5439	// Otherwise, create a new empty header block, make it the pred of the entry block,
5440	// and redirect the preds of the entry block to go to this.
5441
5442	BasicBlock* beforeTop = t->bbPrev;
5443	// Make sure that the new block is in the same region as the loop.
5444	// (We will only create loops that are entirely within a region.)
5445	BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
5446	// This is in the containing loop.
5447	h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5448	h2->bbWeight = (h2->isRunRarely() ? `0` : ambientWeight);
5449
5450	// We don't care where it was put; splice it between beforeTop and top.
5451	if (beforeTop->bbNext != h2)
5452	{
5453	h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
5454	beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
5455	h2->setNext(t);
5456	}
5457
5458	if (h2->bbNext != e)
5459	{
5460	h2->bbJumpKind = BBJ_ALWAYS;
5461	h2->bbJumpDest = e;
5462	}
5463	BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
5464
5465	// Redirect paths from preds of "e" to go to "h2" instead of "e".
5466	BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap (getAllocator());
5467	blockMap->Set(e, h2);
5468
5469	for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
5470	{
5471	BasicBlock* predBlock = predEntry->flBlock;
5472
5473	// Skip if predBlock is in the loop.
5474	if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
5475	{
5476	continue;
5477	}
5478	optRedirectBlock(predBlock, blockMap);
5479	}
5480
5481	optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5482	}
5483
5484	/*****************************************************************************
5485	*
5486	* Determine the kind of interference for the call.
5487	*/
5488
5489	/ static / inline Compiler::callInterf Compiler::optCallInterf(GenTreeCall* call)
5490	{
5491	// if not a helper, kills everything
5492	if (call->gtCallType != CT_HELPER)
5493	{
5494	return CALLINT_ALL;
5495	}
5496
5497	// setfield and array address store kill all indirections
5498	switch (eeGetHelperNum(call->gtCallMethHnd))
5499	{
5500	case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
5501	case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
5502	case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
5503	case CORINFO_HELP_SETFIELDOBJ:
5504	case CORINFO_HELP_ARRADDR_ST:
5505
5506	return CALLINT_REF_INDIRS;
5507
5508	case CORINFO_HELP_SETFIELDFLOAT:
5509	case CORINFO_HELP_SETFIELDDOUBLE:
5510	case CORINFO_HELP_SETFIELD8:
5511	case CORINFO_HELP_SETFIELD16:
5512	case CORINFO_HELP_SETFIELD32:
5513	case CORINFO_HELP_SETFIELD64:
5514
5515	return CALLINT_SCL_INDIRS;
5516
5517	case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this
5518	case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
5519	case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
5520	case CORINFO_HELP_SETFIELDSTRUCT:
5521
5522	return CALLINT_ALL_INDIRS;
5523
5524	default:
5525	break;
5526	}
5527
5528	// other helpers kill nothing
5529	return CALLINT_NONE;
5530	}
5531
5532	/*****************************************************************************
5533	*
5534	* See if the given tree can be computed in the given precision (which must
5535	* be smaller than the type of the tree for this to make sense). If 'doit'
5536	* is false, we merely check to see whether narrowing is possible; if we
5537	* get called with 'doit' being true, we actually perform the narrowing.
5538	*/
5539
5540	bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
5541	{
5542	genTreeOps oper;
5543	unsigned kind;
5544
5545	noway_assert(tree);
5546	noway_assert(genActualType(tree->gtType) == genActualType(srct));
5547
5548	/ Assume we're only handling integer types /
5549	noway_assert(varTypeIsIntegral(srct));
5550	noway_assert(varTypeIsIntegral(dstt));
5551
5552	unsigned srcSize = genTypeSize(srct);
5553	unsigned dstSize = genTypeSize(dstt);
5554
5555	/ dstt must be smaller than srct to narrow /
5556	if (dstSize >= srcSize)
5557	{
5558	return false;
5559	}
5560
5561	/ Figure out what kind of a node we have /
5562	oper = tree->OperGet();
5563	kind = tree->OperKind();
5564
5565	if (oper == GT_ASG)
5566	{
5567	noway_assert(doit == false);
5568	return false;
5569	}
5570
5571	ValueNumPair NoVNPair = ValueNumPair ();
5572
5573	if (kind & GTK_LEAF)
5574	{
5575	switch (oper)
5576	{
5577	/ Constants can usually be narrowed by changing their value /
5578	CLANG_FORMAT_COMMENT_ANCHOR;
5579
5580	#ifndef _TARGET_64BIT_
5581	__int64 lval;
5582	__int64 lmask;
5583
5584	case GT_CNS_LNG:
5585	lval = tree->gtIntConCommon.LngValue();
5586	lmask = `0`;
5587
5588	switch (dstt)
5589	{
5590	case TYP_BYTE:
5591	lmask = `0x0000007F`;
5592	break;
5593	case TYP_BOOL:
5594	case TYP_UBYTE:
5595	lmask = `0x000000FF`;
5596	break;
5597	case TYP_SHORT:
5598	lmask = `0x00007FFF`;
5599	break;
5600	case TYP_USHORT:
5601	lmask = `0x0000FFFF`;
5602	break;
5603	case TYP_INT:
5604	lmask = `0x7FFFFFFF`;
5605	break;
5606	case TYP_UINT:
5607	lmask = `0xFFFFFFFF`;
5608	break;
5609
5610	default:
5611	return false;
5612	}
5613
5614	if ((lval & lmask) != lval)
5615	return false;
5616
5617	if (doit)
5618	{
5619	tree->ChangeOperConst(GT_CNS_INT);
5620	tree->gtType = TYP_INT;
5621	tree->gtIntCon.gtIconVal = (int)lval;
5622	if (vnStore != nullptr)
5623	{
5624	fgValueNumberTreeConst(tree);
5625	}
5626	}
5627
5628	return true;
5629	#endif
5630
5631	case GT_CNS_INT:
5632
5633	ssize_t ival;
5634	ival = tree->gtIntCon.gtIconVal;
5635	ssize_t imask;
5636	imask = `0`;
5637
5638	switch (dstt)
5639	{
5640	case TYP_BYTE:
5641	imask = `0x0000007F`;
5642	break;
5643	case TYP_BOOL:
5644	case TYP_UBYTE:
5645	imask = `0x000000FF`;
5646	break;
5647	case TYP_SHORT:
5648	imask = `0x00007FFF`;
5649	break;
5650	case TYP_USHORT:
5651	imask = `0x0000FFFF`;
5652	break;
5653	#ifdef _TARGET_64BIT_
5654	case TYP_INT:
5655	imask = `0x7FFFFFFF`;
5656	break;
5657	case TYP_UINT:
5658	imask = `0xFFFFFFFF`;
5659	break;
5660	#endif // _TARGET_64BIT_
5661	default:
5662	return false;
5663	}
5664
5665	if ((ival & imask) != ival)
5666	{
5667	return false;
5668	}
5669
5670	#ifdef _TARGET_64BIT_
5671	if (doit)
5672	{
5673	tree->gtType = TYP_INT;
5674	tree->gtIntCon.gtIconVal = (int)ival;
5675	if (vnStore != nullptr)
5676	{
5677	fgValueNumberTreeConst(tree);
5678	}
5679	}
5680	#endif // _TARGET_64BIT_
5681
5682	return true;
5683
5684	/ Operands that are in memory can usually be narrowed*
5685	simply by changing their gtType /*
5686
5687	case GT_LCL_VAR:
5688	/ We only allow narrowing long -> int for a GT_LCL_VAR /
5689	if (dstSize == sizeof(int))
5690	{
5691	goto NARROW_IND;
5692	}
5693	break;
5694
5695	case GT_CLS_VAR:
5696	case GT_LCL_FLD:
5697	goto NARROW_IND;
5698	default:
5699	break;
5700	}
5701
5702	noway_assert(doit == false);
5703	return false;
5704	}
5705
5706	if (kind & (GTK_BINOP \| GTK_UNOP))
5707	{
5708	GenTree* op1;
5709	op1 = tree->gtOp.gtOp1;
5710	GenTree* op2;
5711	op2 = tree->gtOp.gtOp2;
5712
5713	switch (tree->gtOper)
5714	{
5715	case GT_AND:
5716	noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5717	noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5718
5719	GenTree* opToNarrow;
5720	opToNarrow = nullptr;
5721	GenTree** otherOpPtr;
5722	otherOpPtr = nullptr;
5723	bool foundOperandThatBlocksNarrowing;
5724	foundOperandThatBlocksNarrowing = false;
5725
5726	// If 'dstt' is unsigned and one of the operands can be narrowed into 'dsst',
5727	// the result of the GT_AND will also fit into 'dstt' and can be narrowed.
5728	// The same is true if one of the operands is an int const and can be narrowed into 'dsst'.
5729	if (!gtIsActiveCSE_Candidate(op2) && ((op2->gtOper == GT_CNS_INT) \|\| varTypeIsUnsigned(dstt)))
5730	{
5731	if (optNarrowTree(op2, srct, dstt, NoVNPair, false))
5732	{
5733	opToNarrow = op2;
5734	otherOpPtr = &tree->gtOp.gtOp1;
5735	}
5736	else
5737	{
5738	foundOperandThatBlocksNarrowing = true;
5739	}
5740	}
5741
5742	if ((opToNarrow == nullptr) && !gtIsActiveCSE_Candidate(op1) &&
5743	((op1->gtOper == GT_CNS_INT) \|\| varTypeIsUnsigned(dstt)))
5744	{
5745	if (optNarrowTree(op1, srct, dstt, NoVNPair, false))
5746	{
5747	opToNarrow = op1;
5748	otherOpPtr = &tree->gtOp.gtOp2;
5749	}
5750	else
5751	{
5752	foundOperandThatBlocksNarrowing = true;
5753	}
5754	}
5755
5756	if (opToNarrow != nullptr)
5757	{
5758	// We will change the type of the tree and narrow opToNarrow
5759	//
5760	if (doit)
5761	{
5762	tree->gtType = genActualType(dstt);
5763	tree->SetVNs(vnpNarrow);
5764
5765	optNarrowTree(opToNarrow, srct, dstt, NoVNPair, true);
5766	// We may also need to cast away the upper bits of otherOpPtr*
5767	if (srcSize == `8`)
5768	{
5769	assert(tree->gtType == TYP_INT);
5770	GenTree* castOp = gtNewCastNode(TYP_INT, otherOpPtr, false*, TYP_INT);
5771	#ifdef DEBUG
5772	castOp->gtDebugFlags \|= GTF_DEBUG_NODE_MORPHED;
5773	#endif
5774	*otherOpPtr = castOp;
5775	}
5776	}
5777	return true;
5778	}
5779
5780	if (foundOperandThatBlocksNarrowing)
5781	{
5782	noway_assert(doit == false);
5783	return false;
5784	}
5785
5786	goto COMMON_BINOP;
5787
5788	case GT_ADD:
5789	case GT_MUL:
5790
5791	if (tree->gtOverflow() \|\| varTypeIsSmall(dstt))
5792	{
5793	noway_assert(doit == false);
5794	return false;
5795	}
5796	__fallthrough;
5797
5798	case GT_OR:
5799	case GT_XOR:
5800	noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5801	noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5802	COMMON_BINOP:
5803	if (gtIsActiveCSE_Candidate(op1) \|\| gtIsActiveCSE_Candidate(op2) \|\|
5804	!optNarrowTree(op1, srct, dstt, NoVNPair, doit) \|\| !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5805	{
5806	noway_assert(doit == false);
5807	return false;
5808	}
5809
5810	/ Simply change the type of the tree /
5811
5812	if (doit)
5813	{
5814	if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5815	{
5816	tree->gtFlags &= ~GTF_MUL_64RSLT;
5817	}
5818
5819	tree->gtType = genActualType(dstt);
5820	tree->SetVNs(vnpNarrow);
5821	}
5822
5823	return true;
5824
5825	case GT_IND:
5826
5827	NARROW_IND:
5828
5829	if ((dstSize > genTypeSize(tree->gtType)) &&
5830	(varTypeIsUnsigned(dstt) && !varTypeIsUnsigned(tree->gtType)))
5831	{
5832	return false;
5833	}
5834
5835	/ Simply change the type of the tree /
5836
5837	if (doit && (dstSize <= genTypeSize(tree->gtType)))
5838	{
5839	tree->gtType = genSignedType(dstt);
5840	tree->SetVNs(vnpNarrow);
5841
5842	/ Make sure we don't mess up the variable type /
5843	if ((oper == GT_LCL_VAR) \|\| (oper == GT_LCL_FLD))
5844	{
5845	tree->gtFlags \|= GTF_VAR_CAST;
5846	}
5847	}
5848
5849	return true;
5850
5851	case GT_EQ:
5852	case GT_NE:
5853	case GT_LT:
5854	case GT_LE:
5855	case GT_GT:
5856	case GT_GE:
5857
5858	/ These can always be narrowed since they only represent 0 or 1 /
5859	return true;
5860
5861	case GT_CAST:
5862	{
5863	var_types cast = tree->CastToType();
5864	var_types oprt = op1->TypeGet();
5865	unsigned oprSize = genTypeSize(oprt);
5866
5867	if (cast != srct)
5868	{
5869	return false;
5870	}
5871
5872	if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5873	{
5874	return false;
5875	}
5876
5877	if (tree->gtOverflow())
5878	{
5879	return false;
5880	}
5881
5882	/ Is this a cast from the type we're narrowing to or a smaller one? /
5883
5884	if (oprSize <= dstSize)
5885	{
5886	/ Bash the target type of the cast /
5887
5888	if (doit)
5889	{
5890	dstt = genSignedType(dstt);
5891
5892	if ((oprSize == dstSize) &&
5893	((varTypeIsUnsigned(dstt) == varTypeIsUnsigned(oprt)) \|\| !varTypeIsSmall(dstt)))
5894	{
5895	// Same size and there is no signedness mismatch for small types: change the CAST
5896	// into a NOP
5897
5898	JITDUMP("Cast operation has no effect, bashing [%06d] GT_CAST into a GT_NOP.\n",
5899	dspTreeID(tree));
5900
5901	tree->ChangeOper(GT_NOP);
5902	tree->gtType = dstt;
5903	// Clear the GTF_UNSIGNED flag, as it may have been set on the cast node
5904	tree->gtFlags &= ~GTF_UNSIGNED;
5905	tree->gtOp.gtOp2 = nullptr;
5906	tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5907	}
5908	else
5909	{
5910	// oprSize is smaller or there is a signedness mismatch for small types
5911
5912	// Change the CastToType in the GT_CAST node
5913	tree->CastToType() = dstt;
5914
5915	// The result type of a GT_CAST is never a small type.
5916	// Use genActualType to widen dstt when it is a small types.
5917	tree->gtType = genActualType(dstt);
5918	tree->SetVNs(vnpNarrow);
5919	}
5920	}
5921
5922	return true;
5923	}
5924	}
5925	return false;
5926
5927	case GT_COMMA:
5928	if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5929	{
5930	/ Simply change the type of the tree /
5931
5932	if (doit)
5933	{
5934	tree->gtType = genActualType(dstt);
5935	tree->SetVNs(vnpNarrow);
5936	}
5937	return true;
5938	}
5939	return false;
5940
5941	default:
5942	noway_assert(doit == false);
5943	return false;
5944	}
5945	}
5946
5947	return false;
5948	}
5949
5950	/*****************************************************************************
5951	*
5952	* The following logic figures out whether the given variable is assigned
5953	* somewhere in a list of basic blocks (or in an entire loop).
5954	*/
5955
5956	Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTree** pTree, fgWalkData* data)
5957	{
5958	GenTree* tree = *pTree;
5959
5960	if (tree->OperIs(GT_ASG))
5961	{
5962	GenTree* dest = tree->gtOp.gtOp1;
5963	genTreeOps destOper = dest->OperGet();
5964
5965	isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5966	assert(desc && desc->ivaSelf == desc);
5967
5968	if (destOper == GT_LCL_VAR)
5969	{
5970	unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5971	if (tvar < lclMAX_ALLSET_TRACKED)
5972	{
5973	AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5974	}
5975	else
5976	{
5977	desc->ivaMaskIncomplete = true;
5978	}
5979
5980	if (tvar == desc->ivaVar)
5981	{
5982	if (tree != desc->ivaSkip)
5983	{
5984	return WALK_ABORT;
5985	}
5986	}
5987	}
5988	else if (destOper == GT_LCL_FLD)
5989	{
5990	/ We can't track every field of every var. Moreover, indirections*
5991	may access different parts of the var as different (but
5992	overlapping) fields. So just treat them as indirect accesses /*
5993
5994	// unsigned lclNum = dest->gtLclFld.gtLclNum;
5995	// noway_assert(lvaTable[lclNum].lvAddrTaken);
5996
5997	varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5998	desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd \| refs);
5999	}
6000	else if (destOper == GT_CLS_VAR)
6001	{
6002	desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd \| VR_GLB_VAR);
6003	}
6004	else if (destOper == GT_IND)
6005	{
6006	/ Set the proper indirection bits /
6007
6008	varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
6009	desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd \| refs);
6010	}
6011	}
6012	else if (tree->gtOper == GT_CALL)
6013	{
6014	isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
6015	assert(desc && desc->ivaSelf == desc);
6016
6017	desc->ivaMaskCall = optCallInterf(tree->AsCall());
6018	}
6019
6020	return WALK_CONTINUE;
6021	}
6022
6023	/***************************************************************************/
6024
6025	bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTree* skip, unsigned var)
6026	{
6027	bool result;
6028	isVarAssgDsc desc;
6029
6030	desc.ivaSkip = skip;
6031	#ifdef DEBUG
6032	desc.ivaSelf = &desc;
6033	#endif
6034	desc.ivaVar = var;
6035	desc.ivaMaskCall = CALLINT_NONE;
6036	AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6037
6038	for (;;)
6039	{
6040	noway_assert(beg);
6041
6042	for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
6043	{
6044	noway_assert(stmt->gtOper == GT_STMT);
6045	if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
6046	{
6047	result = true;
6048	goto DONE;
6049	}
6050	}
6051
6052	if (beg == end)
6053	{
6054	break;
6055	}
6056
6057	beg = beg->bbNext;
6058	}
6059
6060	result = false;
6061
6062	DONE:
6063
6064	return result;
6065	}
6066
6067	/***************************************************************************/
6068	int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
6069	{
6070	LoopDsc* loop;
6071
6072	/ Get hold of the loop descriptor /
6073
6074	noway_assert(lnum < optLoopCount);
6075	loop = optLoopTable + lnum;
6076
6077	/ Do we already know what variables are assigned within this loop? /
6078
6079	if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
6080	{
6081	isVarAssgDsc desc;
6082
6083	BasicBlock* beg;
6084	BasicBlock* end;
6085
6086	/ Prepare the descriptor used by the tree walker call-back /
6087
6088	desc.ivaVar = (unsigned)-`1`;
6089	desc.ivaSkip = nullptr;
6090	#ifdef DEBUG
6091	desc.ivaSelf = &desc;
6092	#endif
6093	AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6094	desc.ivaMaskInd = VR_NONE;
6095	desc.ivaMaskCall = CALLINT_NONE;
6096	desc.ivaMaskIncomplete = false;
6097
6098	/ Now walk all the statements of the loop /
6099
6100	beg = loop->lpHead->bbNext;
6101	end = loop->lpBottom;
6102
6103	for (//; //; beg = beg->bbNext)
6104	{
6105	noway_assert(beg);
6106
6107	for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6108	{
6109	noway_assert(stmt->gtOper == GT_STMT);
6110	fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
6111
6112	if (desc.ivaMaskIncomplete)
6113	{
6114	loop->lpFlags \|= LPFLG_ASGVARS_INC;
6115	}
6116	}
6117
6118	if (beg == end)
6119	{
6120	break;
6121	}
6122	}
6123
6124	AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
6125	loop->lpAsgInds = desc.ivaMaskInd;
6126	loop->lpAsgCall = desc.ivaMaskCall;
6127
6128	/ Now we know what variables are assigned in the loop /
6129
6130	loop->lpFlags \|= LPFLG_ASGVARS_YES;
6131	}
6132
6133	/ Now we can finally test the caller's mask against the loop's /
6134	if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) \|\| (loop->lpAsgInds & inds))
6135	{
6136	return `1`;
6137	}
6138
6139	switch (loop->lpAsgCall)
6140	{
6141	case CALLINT_ALL:
6142
6143	/ Can't hoist if the call might have side effect on an indirection. /
6144
6145	if (loop->lpAsgInds != VR_NONE)
6146	{
6147	return `1`;
6148	}
6149
6150	break;
6151
6152	case CALLINT_REF_INDIRS:
6153
6154	/ Can't hoist if the call might have side effect on an ref indirection. /
6155
6156	if (loop->lpAsgInds & VR_IND_REF)
6157	{
6158	return `1`;
6159	}
6160
6161	break;
6162
6163	case CALLINT_SCL_INDIRS:
6164
6165	/ Can't hoist if the call might have side effect on an non-ref indirection. /
6166
6167	if (loop->lpAsgInds & VR_IND_SCL)
6168	{
6169	return `1`;
6170	}
6171
6172	break;
6173
6174	case CALLINT_ALL_INDIRS:
6175
6176	/ Can't hoist if the call might have side effect on any indirection. /
6177
6178	if (loop->lpAsgInds & (VR_IND_REF \| VR_IND_SCL))
6179	{
6180	return `1`;
6181	}
6182
6183	break;
6184
6185	case CALLINT_NONE:
6186
6187	/ Other helpers kill nothing /
6188
6189	break;
6190
6191	default:
6192	noway_assert(!"Unexpected lpAsgCall value");
6193	}
6194
6195	return `0`;
6196	}
6197
6198	void Compiler::optPerformHoistExpr(GenTree* origExpr, unsigned lnum)
6199	{
6200	#ifdef DEBUG
6201	if (verbose)
6202	{
6203	printf("\nHoisting a copy of ");
6204	printTreeID(origExpr);
6205	printf(" into PreHeader for loop L%02u <" FMT_BB ".." FMT_BB ">:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
6206	optLoopTable[lnum].lpBottom->bbNum);
6207	gtDispTree(origExpr);
6208	printf("\n");
6209	}
6210	#endif
6211
6212	// This loop has to be in a form that is approved for hoisting.
6213	assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
6214
6215	// Create a copy of the expression and mark it for CSE's.
6216	GenTree* hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
6217
6218	// At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
6219	assert(hoistExpr != origExpr);
6220	assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
6221
6222	GenTree* hoist = hoistExpr;
6223	// The value of the expression isn't used (unless it's an assignment).
6224	if (hoistExpr->OperGet() != GT_ASG)
6225	{
6226	hoist = gtUnusedValNode(hoistExpr);
6227	}
6228
6229	/ Put the statement in the preheader /
6230
6231	fgCreateLoopPreHeader(lnum);
6232
6233	BasicBlock* preHead = optLoopTable[lnum].lpHead;
6234	assert(preHead->bbJumpKind == BBJ_NONE);
6235
6236	// fgMorphTree requires that compCurBB be the block that contains
6237	// (or in this case, will contain) the expression.
6238	compCurBB = preHead;
6239	hoist = fgMorphTree(hoist);
6240
6241	GenTree* hoistStmt = gtNewStmt(hoist);
6242	hoistStmt->gtFlags \|= GTF_STMT_CMPADD;
6243
6244	/ simply append the statement at the end of the preHead's list /
6245
6246	GenTree* treeList = preHead->bbTreeList;
6247
6248	if (treeList)
6249	{
6250	/ append after last statement /
6251
6252	GenTree* last = treeList->gtPrev;
6253	assert(last->gtNext == nullptr);
6254
6255	last->gtNext = hoistStmt;
6256	hoistStmt->gtPrev = last;
6257	treeList->gtPrev = hoistStmt;
6258	}
6259	else
6260	{
6261	/ Empty pre-header - store the single statement in the block /
6262
6263	preHead->bbTreeList = hoistStmt;
6264	hoistStmt->gtPrev = hoistStmt;
6265	}
6266
6267	hoistStmt->gtNext = nullptr;
6268
6269	#ifdef DEBUG
6270	if (verbose)
6271	{
6272	printf("This hoisted copy placed in PreHeader (" FMT_BB "):\n", preHead->bbNum);
6273	gtDispTree(hoist);
6274	}
6275	#endif
6276
6277	if (fgStmtListThreaded)
6278	{
6279	gtSetStmtInfo(hoistStmt);
6280	fgSetStmtSeq(hoistStmt);
6281	}
6282
6283	#ifdef DEBUG
6284	if (m_nodeTestData != nullptr)
6285	{
6286
6287	// What is the depth of the loop "lnum"?
6288	ssize_t depth = `0`;
6289	unsigned lnumIter = lnum;
6290	while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
6291	{
6292	depth++;
6293	lnumIter = optLoopTable[lnumIter].lpParent;
6294	}
6295
6296	NodeToTestDataMap* testData = GetNodeTestData();
6297
6298	TestLabelAndNum tlAndN;
6299	if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
6300	{
6301	if (tlAndN.m_num == -`1`)
6302	{
6303	printf("Node ");
6304	printTreeID(origExpr);
6305	printf(" was declared 'do not hoist', but is being hoisted.\n");
6306	assert(false);
6307	}
6308	else if (tlAndN.m_num != depth)
6309	{
6310	printf("Node ");
6311	printTreeID(origExpr);
6312	printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
6313	"%d.\n",
6314	tlAndN.m_num, depth);
6315	assert(false);
6316	}
6317	else
6318	{
6319	// We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
6320	// hoist" annotations.
6321	testData->Remove(origExpr);
6322	// Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
6323	tlAndN.m_tl = TL_CSE_Def;
6324	tlAndN.m_num = m_loopHoistCSEClass++;
6325	testData->Set(hoistExpr, tlAndN);
6326	}
6327	}
6328	}
6329	#endif
6330
6331	#if LOOP_HOIST_STATS
6332	if (!m_curLoopHasHoistedExpression)
6333	{
6334	m_loopsWithHoistedExpressions++;
6335	m_curLoopHasHoistedExpression = true;
6336	}
6337	m_totalHoistedExpressions++;
6338	#endif // LOOP_HOIST_STATS
6339	}
6340
6341	void Compiler::optHoistLoopCode()
6342	{
6343	// If we don't have any loops in the method then take an early out now.
6344	if (optLoopCount == `0`)
6345	{
6346	return;
6347	}
6348
6349	#ifdef DEBUG
6350	unsigned jitNoHoist = JitConfig.JitNoHoist();
6351	if (jitNoHoist > `0`)
6352	{
6353	return;
6354	}
6355	#endif
6356
6357	#if 0
6358	// The code in this #if has been useful in debugging loop cloning issues, by
6359	// enabling selective enablement of the loop cloning optimization according to
6360	// method hash.
6361	#ifdef DEBUG
6362	unsigned methHash = info.compMethodHash();
6363	char* lostr = getenv("loophoisthashlo");
6364	unsigned methHashLo = `0`;
6365	if (lostr != NULL)
6366	{
6367	sscanf_s(lostr, "%x", &methHashLo);
6368	// methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
6369	}
6370	char* histr = getenv("loophoisthashhi");
6371	unsigned methHashHi = UINT32_MAX;
6372	if (histr != NULL)
6373	{
6374	sscanf_s(histr, "%x", &methHashHi);
6375	// methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
6376	}
6377	if (methHash < methHashLo \|\| methHash > methHashHi)
6378	return;
6379	printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
6380	#endif // DEBUG
6381	#endif // 0 -- debugging loop cloning issues
6382
6383	#ifdef DEBUG
6384	if (verbose)
6385	{
6386	printf("\n*************** In optHoistLoopCode()\n");
6387	printf("Blocks/Trees before phase\n");
6388	fgDispBasicBlocks(true);
6389	printf("");
6390	}
6391	#endif
6392
6393	// Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
6394	// they are invariant.)
6395	LoopHoistContext hoistCtxt(this);
6396	for (unsigned lnum = `0`; lnum < optLoopCount; lnum++)
6397	{
6398	if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6399	{
6400	continue;
6401	}
6402
6403	if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6404	{
6405	optHoistLoopNest(lnum, &hoistCtxt);
6406	}
6407	}
6408
6409	#if DEBUG
6410	if (fgModified)
6411	{
6412	if (verbose)
6413	{
6414	printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
6415	fgDispBasicBlocks(true);
6416	printf("");
6417	}
6418
6419	// Make sure that the predecessor lists are accurate
6420	fgDebugCheckBBlist();
6421	}
6422	#endif
6423
6424	#ifdef DEBUG
6425	// Test Data stuff..
6426	// If we have no test data, early out.
6427	if (m_nodeTestData == nullptr)
6428	{
6429	return;
6430	}
6431	NodeToTestDataMap* testData = GetNodeTestData();
6432	for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
6433	{
6434	TestLabelAndNum tlAndN;
6435	GenTree* node = ki.Get();
6436	bool b = testData->Lookup(node, &tlAndN);
6437	assert(b);
6438	if (tlAndN.m_tl != TL_LoopHoist)
6439	{
6440	continue;
6441	}
6442	// Otherwise, it is a loop hoist annotation.
6443	assert(tlAndN.m_num < `100`); // >= 100 indicates nested static field address, should already have been moved.
6444	if (tlAndN.m_num >= `0`)
6445	{
6446	printf("Node ");
6447	printTreeID(node);
6448	printf(" was declared 'must hoist', but has not been hoisted.\n");
6449	assert(false);
6450	}
6451	}
6452	#endif // DEBUG
6453	}
6454
6455	void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
6456	{
6457	// Do this loop, then recursively do all nested loops.
6458	CLANG_FORMAT_COMMENT_ANCHOR;
6459
6460	#if LOOP_HOIST_STATS
6461	// Record stats
6462	m_curLoopHasHoistedExpression = false;
6463	m_loopsConsidered++;
6464	#endif // LOOP_HOIST_STATS
6465
6466	optHoistThisLoop(lnum, hoistCtxt);
6467
6468	VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
6469
6470	if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
6471	{
6472	// Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
6473	// TODO-Cleanup: we should have a set abstraction for loops.
6474	if (hoistedInCurLoop != nullptr)
6475	{
6476	for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6477	{
6478	#ifdef DEBUG
6479	bool b;
6480	assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
6481	#endif
6482	hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
6483	}
6484	}
6485
6486	for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
6487	child = optLoopTable[child].lpSibling)
6488	{
6489	optHoistLoopNest(child, hoistCtxt);
6490	}
6491
6492	// Now remove them.
6493	// TODO-Cleanup: we should have a set abstraction for loops.
6494	if (hoistedInCurLoop != nullptr)
6495	{
6496	for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6497	{
6498	// Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
6499	hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
6500	}
6501	}
6502	}
6503	}
6504
6505	void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
6506	{
6507	LoopDsc* pLoopDsc = &optLoopTable[lnum];
6508
6509	/ If loop was removed continue /
6510
6511	if (pLoopDsc->lpFlags & LPFLG_REMOVED)
6512	{
6513	return;
6514	}
6515
6516	/ Get the head and tail of the loop /
6517
6518	BasicBlock* head = pLoopDsc->lpHead;
6519	BasicBlock* tail = pLoopDsc->lpBottom;
6520	BasicBlock* lbeg = pLoopDsc->lpEntry;
6521
6522	// We must have a do-while loop
6523	if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == `0`)
6524	{
6525	return;
6526	}
6527
6528	// The loop-head must dominate the loop-entry.
6529	// TODO-CQ: Couldn't we make this true if it's not?
6530	if (!fgDominate(head, lbeg))
6531	{
6532	return;
6533	}
6534
6535	// if lbeg is the start of a new try block then we won't be able to hoist
6536	if (!BasicBlock::sameTryRegion(head, lbeg))
6537	{
6538	return;
6539	}
6540
6541	// We don't bother hoisting when inside of a catch block
6542	if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
6543	{
6544	return;
6545	}
6546
6547	pLoopDsc->lpFlags \|= LPFLG_HOISTABLE;
6548
6549	unsigned begn = lbeg->bbNum;
6550	unsigned endn = tail->bbNum;
6551
6552	// Ensure the per-loop sets/tables are empty.
6553	hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
6554
6555	#ifdef DEBUG
6556	if (verbose)
6557	{
6558	printf("optHoistLoopCode for loop L%02u <" FMT_BB ".." FMT_BB ">:\n", lnum, begn, endn);
6559	printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
6560	}
6561	#endif
6562
6563	VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
6564
6565	pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
6566	pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
6567	pLoopDsc->lpHoistedExprCount = `0`;
6568
6569	#ifndef _TARGET_64BIT_
6570	unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
6571
6572	if (longVarsCount > `0`)
6573	{
6574	// Since 64-bit variables take up two registers on 32-bit targets, we increase
6575	// the Counts such that each TYP_LONG variable counts twice.
6576	//
6577	VARSET_TP loopLongVars(VarSetOps::Intersection(this, loopVars, lvaLongVars));
6578	VARSET_TP inOutLongVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
6579
6580	#ifdef DEBUG
6581	if (verbose)
6582	{
6583	printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
6584	lvaDispVarSet(lvaLongVars);
6585	}
6586	#endif
6587	pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
6588	pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
6589	}
6590	#endif // !_TARGET_64BIT_
6591
6592	#ifdef DEBUG
6593	if (verbose)
6594	{
6595	printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
6596	lvaDispVarSet(pLoopDsc->lpVarUseDef);
6597
6598	printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
6599	lvaDispVarSet(pLoopDsc->lpVarInOut);
6600
6601	printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
6602	lvaDispVarSet(loopVars);
6603	printf("\n");
6604	}
6605	#endif
6606
6607	unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
6608
6609	if (floatVarsCount > `0`)
6610	{
6611	VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars));
6612	VARSET_TP inOutFPVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
6613
6614	pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
6615	pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
6616	pLoopDsc->lpHoistedFPExprCount = `0`;
6617
6618	pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
6619	pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
6620
6621	#ifdef DEBUG
6622	if (verbose)
6623	{
6624	printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
6625	lvaDispVarSet(inOutFPVars);
6626
6627	printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
6628	lvaDispVarSet(loopFPVars);
6629	}
6630	#endif
6631	}
6632	else // (floatVarsCount == 0)
6633	{
6634	pLoopDsc->lpLoopVarFPCount = `0`;
6635	pLoopDsc->lpVarInOutFPCount = `0`;
6636	pLoopDsc->lpHoistedFPExprCount = `0`;
6637	}
6638
6639	// Find the set of definitely-executed blocks.
6640	// Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
6641	// Until we have post-dominators, we'll special-case for single-exit blocks.
6642	JitExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
6643	if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
6644	{
6645	assert(pLoopDsc->lpExit != nullptr);
6646	BasicBlock* cur = pLoopDsc->lpExit;
6647	// Push dominators, until we reach "entry" or exit the loop.
6648	while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
6649	{
6650	defExec.Push(cur);
6651	cur = cur->bbIDom;
6652	}
6653	// If we didn't reach the entry block, give up and just* push the entry block.*
6654	if (cur != pLoopDsc->lpEntry)
6655	{
6656	defExec.Reset();
6657	}
6658	defExec.Push(pLoopDsc->lpEntry);
6659	}
6660	else // More than one exit
6661	{
6662	// We'll assume that only the entry block is definitely executed.
6663	// We could in the future do better.
6664	defExec.Push(pLoopDsc->lpEntry);
6665	}
6666
6667	while (defExec.Size() > `0`)
6668	{
6669	// Consider in reverse order: dominator before dominatee.
6670	BasicBlock* blk = defExec.Pop();
6671	optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
6672	}
6673	}
6674
6675	// Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
6676	void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
6677	{
6678	LoopDsc* pLoopDsc = &optLoopTable[lnum];
6679	bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
6680	unsigned blkWeight = blk->getBBWeight(this);
6681
6682	#ifdef DEBUG
6683	if (verbose)
6684	{
6685	printf(" optHoistLoopExprsForBlock " FMT_BB " (weight=%6s) of loop L%02u <" FMT_BB ".." FMT_BB
6686	">, firstBlock is %s\n",
6687	blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6688	firstBlockAndBeforeSideEffect ? "true" : "false");
6689	if (blkWeight < (BB_UNITY_WEIGHT / `10`))
6690	{
6691	printf(" block weight is too small to perform hoisting.\n");
6692	}
6693	}
6694	#endif
6695
6696	if (blkWeight < (BB_UNITY_WEIGHT / `10`))
6697	{
6698	// Block weight is too small to perform hoisting.
6699	return;
6700	}
6701
6702	for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6703	{
6704	GenTree* stmtTree = stmt->gtStmtExpr;
6705	bool hoistable;
6706	bool cctorDependent;
6707	(void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable,
6708	&cctorDependent);
6709	if (hoistable)
6710	{
6711	// we will try to hoist the top-level stmtTree
6712	optHoistCandidate(stmtTree, lnum, hoistCtxt);
6713	}
6714	}
6715	}
6716
6717	bool Compiler::optIsProfitableToHoistableTree(GenTree* tree, unsigned lnum)
6718	{
6719	LoopDsc* pLoopDsc = &optLoopTable[lnum];
6720
6721	bool loopContainsCall = pLoopDsc->lpContainsCall;
6722
6723	int availRegCount;
6724	int hoistedExprCount;
6725	int loopVarCount;
6726	int varInOutCount;
6727
6728	if (varTypeIsFloating(tree->TypeGet()))
6729	{
6730	hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6731	loopVarCount = pLoopDsc->lpLoopVarFPCount;
6732	varInOutCount = pLoopDsc->lpVarInOutFPCount;
6733
6734	availRegCount = CNT_CALLEE_SAVED_FLOAT;
6735	if (!loopContainsCall)
6736	{
6737	availRegCount += CNT_CALLEE_TRASH_FLOAT - `1`;
6738	}
6739	#ifdef _TARGET_ARM_
6740	// For ARM each double takes two FP registers
6741	// For now on ARM we won't track singles/doubles
6742	// and instead just assume that we always have doubles.
6743	//
6744	availRegCount /= `2`;
6745	#endif
6746	}
6747	else
6748	{
6749	hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6750	loopVarCount = pLoopDsc->lpLoopVarCount;
6751	varInOutCount = pLoopDsc->lpVarInOutCount;
6752
6753	availRegCount = CNT_CALLEE_SAVED - `1`;
6754	if (!loopContainsCall)
6755	{
6756	availRegCount += CNT_CALLEE_TRASH - `1`;
6757	}
6758	#ifndef _TARGET_64BIT_
6759	// For our 32-bit targets Long types take two registers.
6760	if (varTypeIsLong(tree->TypeGet()))
6761	{
6762	availRegCount = (availRegCount + `1`) / `2`;
6763	}
6764	#endif
6765	}
6766
6767	// decrement the availRegCount by the count of expression that we have already hoisted.
6768	availRegCount -= hoistedExprCount;
6769
6770	// the variables that are read/written inside the loop should
6771	// always be a subset of the InOut variables for the loop
6772	assert(loopVarCount <= varInOutCount);
6773
6774	// When loopVarCount >= availRegCount we believe that all of the
6775	// available registers will get used to hold LclVars inside the loop.
6776	// This pessimistically assumes that each loopVar has a conflicting
6777	// lifetime with every other loopVar.
6778	// For this case we will hoist the expression only if is profitable
6779	// to place it in a stack home location (gtCostEx >= 2IND_COST_EX)*
6780	// as we believe it will be placed in the stack or one of the other
6781	// loopVars will be spilled into the stack
6782	//
6783	if (loopVarCount >= availRegCount)
6784	{
6785	// Don't hoist expressions that are not heavy: tree->gtCostEx < (2IND_COST_EX)*
6786	if (tree->gtCostEx < (`2` * IND_COST_EX))
6787	{
6788	return false;
6789	}
6790	}
6791
6792	// When varInOutCount < availRegCount we are know that there are
6793	// some available register(s) when we enter the loop body.
6794	// When varInOutCount == availRegCount there often will be a register
6795	// available when we enter the loop body, since a loop often defines a
6796	// LclVar on exit or there is often at least one LclVar that is worth
6797	// spilling to the stack to make way for this hoisted expression.
6798	// So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6799	//
6800	if (varInOutCount > availRegCount)
6801	{
6802	// Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6803	if (tree->gtCostEx <= MIN_CSE_COST + `1`)
6804	{
6805	return false;
6806	}
6807	}
6808
6809	return true;
6810	}
6811
6812	//
6813	// This function returns true if 'tree' is a loop invariant expression.
6814	// It also sets 'pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block,*
6815	// and sets 'pCctorDependent' if 'tree' is a function of a static field that must not be*
6816	// hoisted (even if 'pHoistable' is true) unless a preceding corresponding cctor init helper*
6817	// call is also hoisted.
6818	//
6819	bool Compiler::optHoistLoopExprsForTree(GenTree* tree,
6820	unsigned lnum,
6821	LoopHoistContext* hoistCtxt,
6822	bool* pFirstBlockAndBeforeSideEffect,
6823	bool* pHoistable,
6824	bool* pCctorDependent)
6825	{
6826	// First do the children.
6827	// We must keep track of whether each child node was hoistable or not
6828	//
6829	unsigned nChildren = tree->NumChildren();
6830	bool childrenHoistable[GenTree::MAX_CHILDREN];
6831	bool childrenCctorDependent[GenTree::MAX_CHILDREN];
6832
6833	// Initialize the array elements for childrenHoistable[] to false
6834	for (unsigned i = `0`; i < nChildren; i++)
6835	{
6836	childrenHoistable[i] = false;
6837	childrenCctorDependent[i] = false;
6838	}
6839
6840	// Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees.
6841	// In the IconHandle case, it's of course the dereference, rather than the constant itself, that is
6842	// truly dependent on the cctor. So a more precise approach would be to separately propagate
6843	// isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput;
6844	// the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns
6845	// false for constants.
6846	bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != `0`)) \|\|
6847	(tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != `0`)));
6848	bool treeIsInvariant = true;
6849	for (unsigned childNum = `0`; childNum < nChildren; childNum++)
6850	{
6851	if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6852	&childrenHoistable[childNum], &childrenCctorDependent[childNum]))
6853	{
6854	treeIsInvariant = false;
6855	}
6856
6857	if (childrenCctorDependent[childNum])
6858	{
6859	// Normally, a parent of a cctor-dependent tree is also cctor-dependent.
6860	treeIsCctorDependent = true;
6861
6862	// Check for the case where we can stop propagating cctor-dependent upwards.
6863	if (tree->OperIs(GT_COMMA) && (childNum == `1`))
6864	{
6865	GenTree* op1 = tree->gtGetOp1();
6866	if (op1->OperIs(GT_CALL))
6867	{
6868	GenTreeCall* call = op1->AsCall();
6869	if ((call->gtCallType == CT_HELPER) &&
6870	s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd)))
6871	{
6872	// Hoisting the comma is ok because it would hoist the initialization along
6873	// with the static field reference.
6874	treeIsCctorDependent = false;
6875	// Hoisting the static field without hoisting the initialization would be
6876	// incorrect, make sure we consider the field (which we flagged as
6877	// cctor-dependent) non-hoistable.
6878	noway_assert(!childrenHoistable[childNum]);
6879	}
6880	}
6881	}
6882	}
6883	}
6884
6885	// If all the children of "tree" are hoistable, then "tree" itself can be hoisted,
6886	// unless it has a static var reference that can't be hoisted past its cctor call.
6887	bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent;
6888
6889	// But we must see if anything else prevents "tree" from being hoisted.
6890	//
6891	if (treeIsInvariant)
6892	{
6893	// Tree must be a suitable CSE candidate for us to be able to hoist it.
6894	treeIsHoistable &= optIsCSEcandidate(tree);
6895
6896	// If it's a call, it must be a helper call, and be pure.
6897	// Further, if it may run a cctor, it must be labeled as "Hoistable"
6898	// (meaning it won't run a cctor because the class is not precise-init).
6899	if (treeIsHoistable && tree->OperGet() == GT_CALL)
6900	{
6901	GenTreeCall* call = tree->AsCall();
6902	if (call->gtCallType != CT_HELPER)
6903	{
6904	treeIsHoistable = false;
6905	}
6906	else
6907	{
6908	CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6909	if (!s_helperCallProperties.IsPure(helpFunc))
6910	{
6911	treeIsHoistable = false;
6912	}
6913	else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == `0`)
6914	{
6915	treeIsHoistable = false;
6916	}
6917	}
6918	}
6919
6920	if (treeIsHoistable)
6921	{
6922	if (!(*pFirstBlockAndBeforeSideEffect))
6923	{
6924	// For now, we give up on an expression that might raise an exception if it is after the
6925	// first possible global side effect (and we assume we're after that if we're not in the first block).
6926	// TODO-CQ: this is when we might do loop cloning.
6927	//
6928	if ((tree->gtFlags & GTF_EXCEPT) != `0`)
6929	{
6930	treeIsHoistable = false;
6931	}
6932	}
6933	}
6934
6935	// Is the value of the whole tree loop invariant?
6936	treeIsInvariant =
6937	optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6938
6939	// Is the value of the whole tree loop invariant?
6940	if (!treeIsInvariant)
6941	{
6942	treeIsHoistable = false;
6943	}
6944	}
6945
6946	// Check if we need to set 'pFirstBlockAndBeforeSideEffect' to false.*
6947	// If we encounter a tree with a call in it
6948	// or if we see an assignment to global we set it to false.
6949	//
6950	// If we are already set to false then we can skip these checks
6951	//
6952	if (*pFirstBlockAndBeforeSideEffect)
6953	{
6954	// For this purpose, we only care about memory side effects. We assume that expressions will
6955	// be hoisted so that they are evaluated in the same order as they would have been in the loop,
6956	// and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6957	// here, since that includes exceptions.)
6958	if (tree->IsCall())
6959	{
6960	// If it's a call, it must be a helper call that does not mutate the heap.
6961	// Further, if it may run a cctor, it must be labeled as "Hoistable"
6962	// (meaning it won't run a cctor because the class is not precise-init).
6963	GenTreeCall* call = tree->AsCall();
6964	if (call->gtCallType != CT_HELPER)
6965	{
6966	pFirstBlockAndBeforeSideEffect = false*;
6967	}
6968	else
6969	{
6970	CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6971	if (s_helperCallProperties.MutatesHeap(helpFunc))
6972	{
6973	pFirstBlockAndBeforeSideEffect = false*;
6974	}
6975	else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == `0`)
6976	{
6977	pFirstBlockAndBeforeSideEffect = false*;
6978	}
6979	}
6980	}
6981	else if (tree->OperIs(GT_ASG))
6982	{
6983	// If the LHS of the assignment has a global reference, then assume it's a global side effect.
6984	GenTree* lhs = tree->gtOp.gtOp1;
6985	if (lhs->gtFlags & GTF_GLOB_REF)
6986	{
6987	pFirstBlockAndBeforeSideEffect = false*;
6988	}
6989	}
6990	else if (tree->OperIsCopyBlkOp())
6991	{
6992	GenTree* args = tree->gtOp.gtOp1;
6993	assert(args->OperGet() == GT_LIST);
6994	if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6995	{
6996	pFirstBlockAndBeforeSideEffect = false*;
6997	}
6998	}
6999	}
7000
7001	// If this 'tree' is hoistable then we return and the caller will
7002	// decide to hoist it as part of larger hoistable expression.
7003	//
7004	if (!treeIsHoistable)
7005	{
7006	// We are not hoistable so we will now hoist any hoistable children.
7007	//
7008	for (unsigned childNum = `0`; childNum < nChildren; childNum++)
7009	{
7010	if (childrenHoistable[childNum])
7011	{
7012	// We can't hoist the LHS of an assignment, isn't a real use.
7013	if ((childNum == `0`) && tree->OperIs(GT_ASG))
7014	{
7015	continue;
7016	}
7017
7018	GenTree* child = tree->GetChild(childNum);
7019
7020	// We try to hoist this 'child' tree
7021	optHoistCandidate(child, lnum, hoistCtxt);
7022	}
7023	}
7024	}
7025
7026	*pHoistable = treeIsHoistable;
7027	*pCctorDependent = treeIsCctorDependent;
7028	return treeIsInvariant;
7029	}
7030
7031	void Compiler::optHoistCandidate(GenTree* tree, unsigned lnum, LoopHoistContext* hoistCtxt)
7032	{
7033	if (lnum == BasicBlock::NOT_IN_LOOP)
7034	{
7035	// The hoisted expression isn't valid at any loop head so don't hoist this expression.
7036	return;
7037	}
7038
7039	// The outer loop also must be suitable for hoisting...
7040	if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == `0`)
7041	{
7042	return;
7043	}
7044
7045	// If the hoisted expression isn't valid at this loop head then break
7046	if (!optTreeIsValidAtLoopHead(tree, lnum))
7047	{
7048	return;
7049	}
7050
7051	// It must pass the hoistable profitablity tests for this loop level
7052	if (!optIsProfitableToHoistableTree(tree, lnum))
7053	{
7054	return;
7055	}
7056
7057	bool b;
7058	if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
7059	{
7060	// already hoisted in a parent loop, so don't hoist this expression.
7061	return;
7062	}
7063
7064	if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
7065	{
7066	// already hoisted this expression in the current loop, so don't hoist this expression.
7067	return;
7068	}
7069
7070	// Expression can be hoisted
7071	optPerformHoistExpr(tree, lnum);
7072
7073	// Increment lpHoistedExprCount or lpHoistedFPExprCount
7074	if (!varTypeIsFloating(tree->TypeGet()))
7075	{
7076	optLoopTable[lnum].lpHoistedExprCount++;
7077	#ifndef _TARGET_64BIT_
7078	// For our 32-bit targets Long types take two registers.
7079	if (varTypeIsLong(tree->TypeGet()))
7080	{
7081	optLoopTable[lnum].lpHoistedExprCount++;
7082	}
7083	#endif
7084	}
7085	else // Floating point expr hoisted
7086	{
7087	optLoopTable[lnum].lpHoistedFPExprCount++;
7088	}
7089
7090	// Record the hoisted expression in hoistCtxt
7091	hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
7092	}
7093
7094	bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
7095	{
7096	// If it is not a VN, is not loop-invariant.
7097	if (vn == ValueNumStore::NoVN)
7098	{
7099	return false;
7100	}
7101
7102	// We'll always short-circuit constants.
7103	if (vnStore->IsVNConstant(vn) \|\| vn == vnStore->VNForVoid())
7104	{
7105	return true;
7106	}
7107
7108	// If we've done this query previously, don't repeat.
7109	bool previousRes = false;
7110	if (loopVnInvariantCache->Lookup(vn, &previousRes))
7111	{
7112	return previousRes;
7113	}
7114
7115	bool res = true;
7116	VNFuncApp funcApp;
7117	if (vnStore->GetVNFunc(vn, &funcApp))
7118	{
7119	if (funcApp.m_func == VNF_PhiDef)
7120	{
7121	// First, make sure it's a "proper" phi -- the definition is a Phi application.
7122	VNFuncApp phiDefValFuncApp;
7123	if (!vnStore->GetVNFunc(funcApp.m_args[`2`], &phiDefValFuncApp) \|\| phiDefValFuncApp.m_func != VNF_Phi)
7124	{
7125	// It's not really* a definition, rather a pass-through of some other VN.*
7126	// (This could occur, say if both sides of an if-then-else diamond made the
7127	// same assignment to a variable.)
7128	res = optVNIsLoopInvariant(funcApp.m_args[`2`], lnum, loopVnInvariantCache);
7129	}
7130	else
7131	{
7132	// Is the definition within the loop? If so, is not loop-invariant.
7133	unsigned lclNum = funcApp.m_args[`0`];
7134	unsigned ssaNum = funcApp.m_args[`1`];
7135	LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
7136	res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
7137	}
7138	}
7139	else if (funcApp.m_func == VNF_PhiMemoryDef)
7140	{
7141	BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[`0`]));
7142	res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
7143	}
7144	else
7145	{
7146	for (unsigned i = `0`; i < funcApp.m_arity; i++)
7147	{
7148	// TODO-CQ: We need to either make sure that all* VN functions*
7149	// always take VN args, or else have a list of arg positions to exempt, as implicitly
7150	// constant.
7151	if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
7152	{
7153	res = false;
7154	break;
7155	}
7156	}
7157	}
7158	}
7159	else
7160	{
7161	// Non-function "new, unique" VN's may be annotated with the loop nest where
7162	// their definition occurs.
7163	BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
7164
7165	if (vnLoopNum == MAX_LOOP_NUM)
7166	{
7167	res = false;
7168	}
7169	else
7170	{
7171	res = !optLoopContains(lnum, vnLoopNum);
7172	}
7173	}
7174
7175	loopVnInvariantCache->Set(vn, res);
7176	return res;
7177	}
7178
7179	bool Compiler::optTreeIsValidAtLoopHead(GenTree* tree, unsigned lnum)
7180	{
7181	if (tree->OperIsLocal())
7182	{
7183	GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
7184	unsigned lclNum = lclVar->gtLclNum;
7185
7186	// The lvlVar must be have an Ssa tracked lifetime
7187	if (!lvaInSsa(lclNum))
7188	{
7189	return false;
7190	}
7191
7192	// If the loop does not contains the SSA def we can hoist it.
7193	if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
7194	{
7195	return true;
7196	}
7197	}
7198	else if (tree->OperIsConst())
7199	{
7200	return true;
7201	}
7202	else // If every one of the children nodes are valid at this Loop's Head.
7203	{
7204	unsigned nChildren = tree->NumChildren();
7205	for (unsigned childNum = `0`; childNum < nChildren; childNum++)
7206	{
7207	if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
7208	{
7209	return false;
7210	}
7211	}
7212	return true;
7213	}
7214	return false;
7215	}
7216
7217	/*****************************************************************************
7218	*
7219	* Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
7220	* header. The pre-header will replace the current lpHead in the loop table.
7221	* The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
7222	* will also be dominated by the loop-top, lpHead->bbNext.
7223	*
7224	*/
7225
7226	void Compiler::fgCreateLoopPreHeader(unsigned lnum)
7227	{
7228	LoopDsc* pLoopDsc = &optLoopTable[lnum];
7229
7230	/ This loop has to be a "do-while" loop /
7231
7232	assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
7233
7234	/ Have we already created a loop-preheader block? /
7235
7236	if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
7237	{
7238	return;
7239	}
7240
7241	BasicBlock* head = pLoopDsc->lpHead;
7242	BasicBlock* top = pLoopDsc->lpTop;
7243	BasicBlock* entry = pLoopDsc->lpEntry;
7244
7245	// if 'entry' and 'head' are in different try regions then we won't be able to hoist
7246	if (!BasicBlock::sameTryRegion(head, entry))
7247	{
7248	return;
7249	}
7250
7251	// Ensure that lpHead always dominates lpEntry
7252
7253	noway_assert(fgDominate(head, entry));
7254
7255	/ Get hold of the first block of the loop body /
7256
7257	assert(top == entry);
7258
7259	/ Allocate a new basic block /
7260
7261	BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
7262	preHead->bbFlags \|= BBF_INTERNAL \| BBF_LOOP_PREHEADER;
7263
7264	// Must set IL code offset
7265	preHead->bbCodeOffs = top->bbCodeOffs;
7266
7267	// Set the default value of the preHead weight in case we don't have
7268	// valid profile data and since this blocks weight is just an estimate
7269	// we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
7270	//
7271	preHead->inheritWeight(head);
7272	preHead->bbFlags &= ~BBF_PROF_WEIGHT;
7273
7274	#ifdef DEBUG
7275	if (verbose)
7276	{
7277	printf("\nCreated PreHeader (" FMT_BB ") for loop L%02u (" FMT_BB " - " FMT_BB "), with weight = %s\n",
7278	preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
7279	}
7280	#endif
7281
7282	// The preheader block is part of the containing loop (if any).
7283	preHead->bbNatLoopNum = pLoopDsc->lpParent;
7284
7285	if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
7286	{
7287	if ((head->bbWeight == `0`) \|\| (head->bbNext->bbWeight == `0`))
7288	{
7289	preHead->bbWeight = `0`;
7290	preHead->bbFlags \|= BBF_RUN_RARELY;
7291	}
7292	else
7293	{
7294	bool allValidProfileWeights =
7295	(head->hasProfileWeight() && head->bbJumpDest->hasProfileWeight() && head->bbNext->hasProfileWeight());
7296
7297	if (allValidProfileWeights)
7298	{
7299	double loopEnteredCount;
7300	double loopSkippedCount;
7301
7302	if (fgHaveValidEdgeWeights)
7303	{
7304	flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
7305	flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
7306	noway_assert(edgeToNext != nullptr);
7307	noway_assert(edgeToJump != nullptr);
7308
7309	loopEnteredCount =
7310	((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / `2.0`;
7311	loopSkippedCount =
7312	((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / `2.0`;
7313	}
7314	else
7315	{
7316	loopEnteredCount = (double)head->bbNext->bbWeight;
7317	loopSkippedCount = (double)head->bbJumpDest->bbWeight;
7318	}
7319
7320	double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
7321
7322	// Calculate a good approximation of the preHead's block weight
7323	unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + `0.5`);
7324	preHead->setBBWeight(max(preHeadWeight, `1`));
7325	noway_assert(!preHead->isRunRarely());
7326	}
7327	}
7328	}
7329
7330	// Link in the preHead block.
7331	fgInsertBBbefore(top, preHead);
7332
7333	// Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
7334	// However, that is too expensive at this point. Instead, we update the phi
7335	// node block references, if we created pre-header block due to hoisting.
7336	// This is sufficient because any definition participating in SSA that flowed
7337	// into the phi via the loop header block will now flow through the preheader
7338	// block from the header block.
7339
7340	for (GenTree* stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
7341	{
7342	GenTree* tree = stmt->gtStmt.gtStmtExpr;
7343	if (tree->OperGet() != GT_ASG)
7344	{
7345	break;
7346	}
7347	GenTree* op2 = tree->gtGetOp2();
7348	if (op2->OperGet() != GT_PHI)
7349	{
7350	break;
7351	}
7352	GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
7353	while (args != nullptr)
7354	{
7355	GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
7356	if (phiArg->gtPredBB == head)
7357	{
7358	phiArg->gtPredBB = preHead;
7359	}
7360	args = args->Rest();
7361	}
7362	}
7363
7364	// The handler can't begin at the top of the loop. If it did, it would be incorrect
7365	// to set the handler index on the pre header without updating the exception table.
7366	noway_assert(!top->hasHndIndex() \|\| fgFirstBlockOfHandler(top) != top);
7367
7368	// Update the EH table to make the hoisted block part of the loop's EH block.
7369	fgExtendEHRegionBefore(top);
7370
7371	// TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
7372	// (e.g: hoisting expression in a loop with the same 'head' as this one)
7373
7374	/ Update the loop entry /
7375
7376	pLoopDsc->lpHead = preHead;
7377	pLoopDsc->lpFlags \|= LPFLG_HAS_PREHEAD;
7378
7379	/ The new block becomes the 'head' of the loop - update bbRefs and bbPreds*
7380	All predecessors of 'beg', (which is the entry in the loop)
7381	now have to jump to 'preHead', unless they are dominated by 'head' /*
7382
7383	preHead->bbRefs = `0`;
7384	fgAddRefPred(preHead, head);
7385	bool checkNestedLoops = false;
7386
7387	for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
7388	{
7389	BasicBlock* predBlock = pred->flBlock;
7390
7391	if (fgDominate(top, predBlock))
7392	{
7393	// note: if 'top' dominates predBlock, 'head' dominates predBlock too
7394	// (we know that 'head' dominates 'top'), but using 'top' instead of
7395	// 'head' in the test allows us to not enter here if 'predBlock == head'
7396
7397	if (predBlock != pLoopDsc->lpBottom)
7398	{
7399	noway_assert(predBlock != head);
7400	checkNestedLoops = true;
7401	}
7402	continue;
7403	}
7404
7405	switch (predBlock->bbJumpKind)
7406	{
7407	case BBJ_NONE:
7408	noway_assert(predBlock == head);
7409	break;
7410
7411	case BBJ_COND:
7412	if (predBlock == head)
7413	{
7414	noway_assert(predBlock->bbJumpDest != top);
7415	break;
7416	}
7417	__fallthrough;
7418
7419	case BBJ_ALWAYS:
7420	case BBJ_EHCATCHRET:
7421	noway_assert(predBlock->bbJumpDest == top);
7422	predBlock->bbJumpDest = preHead;
7423	preHead->bbFlags \|= BBF_JMP_TARGET \| BBF_HAS_LABEL;
7424
7425	if (predBlock == head)
7426	{
7427	// This is essentially the same case of predBlock being a BBJ_NONE. We may not be
7428	// able to make this a BBJ_NONE if it's an internal block (for example, a leave).
7429	// Just break, pred will be removed after switch.
7430	}
7431	else
7432	{
7433	fgRemoveRefPred(top, predBlock);
7434	fgAddRefPred(preHead, predBlock);
7435	}
7436	break;
7437
7438	case BBJ_SWITCH:
7439	unsigned jumpCnt;
7440	jumpCnt = predBlock->bbJumpSwt->bbsCount;
7441	BasicBlock** jumpTab;
7442	jumpTab = predBlock->bbJumpSwt->bbsDstTab;
7443
7444	do
7445	{
7446	assert(*jumpTab);
7447	if ((*jumpTab) == top)
7448	{
7449	(*jumpTab) = preHead;
7450
7451	fgRemoveRefPred(top, predBlock);
7452	fgAddRefPred(preHead, predBlock);
7453	preHead->bbFlags \|= BBF_JMP_TARGET \| BBF_HAS_LABEL;
7454	}
7455	} while (++jumpTab, --jumpCnt);
7456
7457	default:
7458	noway_assert(!"Unexpected bbJumpKind");
7459	break;
7460	}
7461	}
7462
7463	noway_assert(!fgGetPredForBlock(top, preHead));
7464	fgRemoveRefPred(top, head);
7465	fgAddRefPred(top, preHead);
7466
7467	/*
7468	If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
7469	(other than the back-edge of the loop we are considering) then we likely have nested
7470	do-while loops with the same entry block and inserting the preheader block changes the head
7471	of all the nested loops. Now we will update this piece of information in the loop table, and
7472	mark all nested loops as having a preheader (the preheader block can be shared among all nested
7473	do-while loops with the same entry block).
7474	*/
7475	if (checkNestedLoops)
7476	{
7477	for (unsigned l = `0`; l < optLoopCount; l++)
7478	{
7479	if (optLoopTable[l].lpHead == head)
7480	{
7481	noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
7482	noway_assert(optLoopTable[l].lpEntry == top);
7483	optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
7484	optLoopTable[l].lpFlags \|= LPFLG_HAS_PREHEAD;
7485	#ifdef DEBUG
7486	if (verbose)
7487	{
7488	printf("Same PreHeader (" FMT_BB ") can be used for loop L%02u (" FMT_BB " - " FMT_BB ")\n\n",
7489	preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
7490	}
7491	#endif
7492	}
7493	}
7494	}
7495	}
7496
7497	bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
7498	{
7499	for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
7500	{
7501	if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7502	{
7503	continue;
7504	}
7505	if (optLoopTable[lnum].lpEntry == blk)
7506	{
7507	*pLnum = lnum;
7508	return true;
7509	}
7510	}
7511	return false;
7512	}
7513
7514	void Compiler::optComputeLoopSideEffects()
7515	{
7516	unsigned lnum;
7517	for (lnum = `0`; lnum < optLoopCount; lnum++)
7518	{
7519	VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
7520	VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
7521	optLoopTable[lnum].lpContainsCall = false;
7522	}
7523
7524	for (lnum = `0`; lnum < optLoopCount; lnum++)
7525	{
7526	if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7527	{
7528	continue;
7529	}
7530
7531	if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
7532	{ // Is outermost...
7533	optComputeLoopNestSideEffects(lnum);
7534	}
7535	}
7536
7537	VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
7538	#ifndef _TARGET_64BIT_
7539	VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
7540	#endif
7541
7542	for (unsigned i = `0`; i < lvaCount; i++)
7543	{
7544	LclVarDsc* varDsc = &lvaTable[i];
7545	if (varDsc->lvTracked)
7546	{
7547	if (varTypeIsFloating(varDsc->lvType))
7548	{
7549	VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
7550	}
7551	#ifndef _TARGET_64BIT_
7552	else if (varTypeIsLong(varDsc->lvType))
7553	{
7554	VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
7555	}
7556	#endif
7557	}
7558	}
7559	}
7560
7561	void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
7562	{
7563	assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
7564	BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
7565	for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
7566	{
7567	optComputeLoopSideEffectsOfBlock(bbInLoop);
7568	}
7569	}
7570
7571	void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
7572	{
7573	unsigned mostNestedLoop = blk->bbNatLoopNum;
7574	assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
7575
7576	AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
7577
7578	// MemoryKinds for which an in-loop call or store has arbitrary effects.
7579	MemoryKindSet memoryHavoc = emptyMemoryKindSet;
7580
7581	// Now iterate over the remaining statements, and their trees.
7582	for (GenTree* stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
7583	{
7584	for (GenTree* tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
7585	{
7586	genTreeOps oper = tree->OperGet();
7587
7588	// Even after we set memoryHavoc we still may want to know if a loop contains calls
7589	if (memoryHavoc == fullMemoryKindSet)
7590	{
7591	if (oper == GT_CALL)
7592	{
7593	// Record that this loop contains a call
7594	AddContainsCallAllContainingLoops(mostNestedLoop);
7595	}
7596
7597	// If we just set lpContainsCall or it was previously set
7598	if (optLoopTable[mostNestedLoop].lpContainsCall)
7599	{
7600	// We can early exit after both memoryHavoc and lpContainsCall are both set to true.
7601	break;
7602	}
7603
7604	// We are just looking for GT_CALL nodes after memoryHavoc was set.
7605	continue;
7606	}
7607
7608	// otherwise memoryHavoc is not set for at least one heap ID
7609	assert(memoryHavoc != fullMemoryKindSet);
7610
7611	// This body is a distillation of the memory side-effect code of value numbering.
7612	// We also do a very limited analysis if byref PtrTo values, to cover some cases
7613	// that the compiler creates.
7614
7615	if (oper == GT_ASG)
7616	{
7617	GenTree* lhs = tree->gtOp.gtOp1->gtEffectiveVal(/commaOnly/ true);
7618
7619	if (lhs->OperGet() == GT_IND)
7620	{
7621	GenTree* arg = lhs->gtOp.gtOp1->gtEffectiveVal(/commaOnly/ true);
7622	FieldSeqNode* fldSeqArrElem = nullptr;
7623
7624	if ((tree->gtFlags & GTF_IND_VOLATILE) != `0`)
7625	{
7626	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7627	continue;
7628	}
7629
7630	ArrayInfo arrInfo;
7631
7632	if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
7633	{
7634	// If it's a local byref for which we recorded a value number, use that...
7635	GenTreeLclVar* argLcl = arg->AsLclVar();
7636	if (lvaInSsa(argLcl->GetLclNum()))
7637	{
7638	ValueNum argVN =
7639	lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
7640	VNFuncApp funcApp;
7641	if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
7642	funcApp.m_func == VNF_PtrToArrElem)
7643	{
7644	assert(vnStore->IsVNHandle(funcApp.m_args[`0`]));
7645	CORINFO_CLASS_HANDLE elemType =
7646	CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[`0`]));
7647	AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
7648	// Don't set memoryHavoc for GcHeap below. Do set memoryHavoc for ByrefExposed
7649	// (conservatively assuming that a byref may alias the array element)
7650	memoryHavoc \|= memoryKindSet(ByrefExposed);
7651	continue;
7652	}
7653	}
7654	// Otherwise...
7655	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7656	}
7657	// Is the LHS an array index expression?
7658	else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
7659	{
7660	// We actually ignore "fldSeq" -- any modification to an S[], at any
7661	// field of "S", will lose all information about the array type.
7662	CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7663	AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
7664	// Conservatively assume byrefs may alias this array element
7665	memoryHavoc \|= memoryKindSet(ByrefExposed);
7666	}
7667	else
7668	{
7669	// We are only interested in IsFieldAddr()'s fldSeq out parameter.
7670	//
7671	GenTree* obj = nullptr; // unused
7672	GenTree* staticOffset = nullptr; // unused
7673	FieldSeqNode* fldSeq = nullptr;
7674
7675	if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
7676	(fldSeq != FieldSeqStore::NotAField()))
7677	{
7678	// Get the first (object) field from field seq. GcHeap[field] will yield the "field map".
7679	assert(fldSeq != nullptr);
7680	if (fldSeq->IsFirstElemFieldSeq())
7681	{
7682	fldSeq = fldSeq->m_next;
7683	assert(fldSeq != nullptr);
7684	}
7685
7686	AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
7687	// Conservatively assume byrefs may alias this object.
7688	memoryHavoc \|= memoryKindSet(ByrefExposed);
7689	}
7690	else
7691	{
7692	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7693	}
7694	}
7695	}
7696	else if (lhs->OperIsBlk())
7697	{
7698	GenTreeLclVarCommon* lclVarTree;
7699	bool isEntire;
7700	if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
7701	{
7702	// For now, assume arbitrary side effects on GcHeap/ByrefExposed...
7703	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7704	}
7705	else if (lvaVarAddrExposed(lclVarTree->gtLclNum))
7706	{
7707	memoryHavoc \|= memoryKindSet(ByrefExposed);
7708	}
7709	}
7710	else if (lhs->OperGet() == GT_CLS_VAR)
7711	{
7712	AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
7713	// Conservatively assume byrefs may alias this static field
7714	memoryHavoc \|= memoryKindSet(ByrefExposed);
7715	}
7716	// Otherwise, must be local lhs form. I should assert that.
7717	else if (lhs->OperGet() == GT_LCL_VAR)
7718	{
7719	GenTreeLclVar* lhsLcl = lhs->AsLclVar();
7720	GenTree* rhs = tree->gtOp.gtOp2;
7721	ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
7722	// If we gave the RHS a value number, propagate it.
7723	if (rhsVN != ValueNumStore::NoVN)
7724	{
7725	rhsVN = vnStore->VNNormalValue(rhsVN);
7726	if (lvaInSsa(lhsLcl->GetLclNum()))
7727	{
7728	lvaTable[lhsLcl->GetLclNum()]
7729	.GetPerSsaData(lhsLcl->GetSsaNum())
7730	->m_vnPair.SetLiberal(rhsVN);
7731	}
7732	}
7733	// If the local is address-exposed, count this as ByrefExposed havoc
7734	if (lvaVarAddrExposed(lhsLcl->gtLclNum))
7735	{
7736	memoryHavoc \|= memoryKindSet(ByrefExposed);
7737	}
7738	}
7739	}
7740	else // if (oper != GT_ASG)
7741	{
7742	switch (oper)
7743	{
7744	case GT_COMMA:
7745	tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7746	break;
7747
7748	case GT_ADDR:
7749	// Is it an addr of a array index expression?
7750	{
7751	GenTree* addrArg = tree->gtOp.gtOp1;
7752	if (addrArg->OperGet() == GT_IND)
7753	{
7754	// Is the LHS an array index expression?
7755	if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7756	{
7757	ArrayInfo arrInfo;
7758	bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7759	assert(b);
7760	CORINFO_CLASS_HANDLE elemTypeEq =
7761	EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7762	ValueNum elemTypeEqVN =
7763	vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
7764	ValueNum ptrToArrElemVN =
7765	vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, elemTypeEqVN,
7766	// The rest are dummy arguments.
7767	vnStore->VNForNull(), vnStore->VNForNull(),
7768	vnStore->VNForNull());
7769	tree->gtVNPair.SetBoth(ptrToArrElemVN);
7770	}
7771	}
7772	}
7773	break;
7774
7775	case GT_LOCKADD: // Binop
7776	case GT_XADD: // Binop
7777	case GT_XCHG: // Binop
7778	case GT_CMPXCHG: // Specialop
7779	{
7780	assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering");
7781	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7782	}
7783	break;
7784
7785	case GT_CALL:
7786	{
7787	GenTreeCall* call = tree->AsCall();
7788
7789	// Record that this loop contains a call
7790	AddContainsCallAllContainingLoops(mostNestedLoop);
7791
7792	if (call->gtCallType == CT_HELPER)
7793	{
7794	CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7795	if (s_helperCallProperties.MutatesHeap(helpFunc))
7796	{
7797	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7798	}
7799	else if (s_helperCallProperties.MayRunCctor(helpFunc))
7800	{
7801	// If the call is labeled as "Hoistable", then we've checked the
7802	// class that would be constructed, and it is not precise-init, so
7803	// the cctor will not be run by this call. Otherwise, it might be,
7804	// and might have arbitrary side effects.
7805	if ((tree->gtFlags & GTF_CALL_HOISTABLE) == `0`)
7806	{
7807	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7808	}
7809	}
7810	}
7811	else
7812	{
7813	memoryHavoc \|= memoryKindSet(GcHeap, ByrefExposed);
7814	}
7815	break;
7816	}
7817
7818	default:
7819	// All other gtOper node kinds, leave 'memoryHavoc' unchanged (i.e. false)
7820	break;
7821	}
7822	}
7823	}
7824	}
7825
7826	if (memoryHavoc != emptyMemoryKindSet)
7827	{
7828	// Record that all loops containing this block have memory havoc effects.
7829	unsigned lnum = mostNestedLoop;
7830	while (lnum != BasicBlock::NOT_IN_LOOP)
7831	{
7832	for (MemoryKind memoryKind : allMemoryKinds ())
7833	{
7834	if ((memoryHavoc & memoryKindSet(memoryKind)) != `0`)
7835	{
7836	optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true;
7837	}
7838	}
7839	lnum = optLoopTable[lnum].lpParent;
7840	}
7841	}
7842	}
7843
7844	// Marks the containsCall information to "lnum" and any parent loops.
7845	void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7846	{
7847	assert(`0` <= lnum && lnum < optLoopCount);
7848	while (lnum != BasicBlock::NOT_IN_LOOP)
7849	{
7850	optLoopTable[lnum].lpContainsCall = true;
7851	lnum = optLoopTable[lnum].lpParent;
7852	}
7853	}
7854
7855	// Adds the variable liveness information for 'blk' to 'this' LoopDsc
7856	void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7857	{
7858	VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7859	VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7860
7861	VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7862	VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7863	}
7864
7865	// Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7866	void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7867	{
7868	assert(`0` <= lnum && lnum < optLoopCount);
7869	while (lnum != BasicBlock::NOT_IN_LOOP)
7870	{
7871	optLoopTable[lnum].AddVariableLiveness(this, blk);
7872	lnum = optLoopTable[lnum].lpParent;
7873	}
7874	}
7875
7876	// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7877	void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7878	{
7879	assert(`0` <= lnum && lnum < optLoopCount);
7880	while (lnum != BasicBlock::NOT_IN_LOOP)
7881	{
7882	optLoopTable[lnum].AddModifiedField(this, fldHnd);
7883	lnum = optLoopTable[lnum].lpParent;
7884	}
7885	}
7886
7887	// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7888	void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7889	{
7890	assert(`0` <= lnum && lnum < optLoopCount);
7891	while (lnum != BasicBlock::NOT_IN_LOOP)
7892	{
7893	optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7894	lnum = optLoopTable[lnum].lpParent;
7895	}
7896	}
7897
7898	/*****************************************************************************
7899	*
7900	* Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7901	* The 'keepList'is either a single tree or a list of trees that are formed by
7902	* one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7903	* gtExtractSideEffList method.
7904	*/
7905
7906	/ static /
7907	Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTree** pTree, fgWalkData* data)
7908	{
7909	GenTree* tree = *pTree;
7910	Compiler* comp = data->compiler;
7911	GenTree* keepList = (GenTree*)(data->pCallbackData);
7912
7913	// We may have a non-NULL side effect list that is being kept
7914	//
7915	if (keepList)
7916	{
7917	GenTree* keptTree = keepList;
7918	while (keptTree->OperGet() == GT_COMMA)
7919	{
7920	assert(keptTree->OperKind() & GTK_SMPOP);
7921	GenTree* op1 = keptTree->gtOp.gtOp1;
7922	GenTree* op2 = keptTree->gtGetOp2();
7923
7924	// For the GT_COMMA case the op1 is part of the orginal CSE tree
7925	// that is being kept because it contains some side-effect
7926	//
7927	if (tree == op1)
7928	{
7929	// This tree and all of its sub trees are being kept.
7930	return WALK_SKIP_SUBTREES;
7931	}
7932
7933	// For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7934	// which can again be another GT_COMMA or the final side-effect part
7935	//
7936	keptTree = op2;
7937	}
7938	if (tree == keptTree)
7939	{
7940	// This tree and all of its sub trees are being kept.
7941	return WALK_SKIP_SUBTREES;
7942	}
7943	}
7944
7945	return WALK_CONTINUE;
7946	}
7947
7948	/*****************************************************************************
7949	*
7950	* Routine called to decrement the LclVar ref counts when removing a tree
7951	* during the remove RangeCheck phase.
7952	* This method will decrement the refcounts for any LclVars used below 'deadTree',
7953	* unless the node is found in the 'keepList' (which are saved side effects)
7954	* The keepList is communicated using the walkData.pCallbackData field
7955	* Also the compCurBB must be set to the current BasicBlock which contains
7956	* 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7957	*/
7958
7959	void Compiler::optRemoveTree(GenTree* deadTree, GenTree* keepList)
7960	{
7961	// We communicate this value using the walkData.pCallbackData field
7962	//
7963	fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7964	}
7965
7966	//------------------------------------------------------------------------------
7967	// optRemoveRangeCheck : Given an array index node, mark it as not needing a range check.
7968	//
7969	// Arguments:
7970	// tree - Range check tree
7971	// stmt - Statement the tree belongs to
7972
7973	void Compiler::optRemoveRangeCheck(GenTree* tree, GenTree* stmt)
7974	{
7975	#if !REARRANGE_ADDS
7976	noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7977	#endif
7978
7979	noway_assert(stmt->gtOper == GT_STMT);
7980	noway_assert(tree->gtOper == GT_COMMA);
7981
7982	GenTree* bndsChkTree = tree->gtOp.gtOp1;
7983
7984	noway_assert(bndsChkTree->OperIsBoundsCheck());
7985
7986	GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7987
7988	#ifdef DEBUG
7989	if (verbose)
7990	{
7991	printf("Before optRemoveRangeCheck:\n");
7992	gtDispTree(tree);
7993	}
7994	#endif
7995
7996	GenTree* sideEffList = nullptr;
7997
7998	gtExtractSideEffList(bndsChkTree, &sideEffList, GTF_ASG);
7999
8000	// Decrement the ref counts for any LclVars that are being deleted
8001	//
8002	optRemoveTree(bndsChkTree, sideEffList);
8003
8004	// Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
8005	tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
8006	// TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
8007	tree->gtFlags \|= GTF_DONT_CSE;
8008
8009	gtUpdateSideEffects(stmt, tree);
8010
8011	/ Recalculate the gtCostSz, etc... /
8012	gtSetStmtInfo(stmt);
8013
8014	/ Re-thread the nodes if necessary /
8015	if (fgStmtListThreaded)
8016	{
8017	fgSetStmtSeq(stmt);
8018	}
8019
8020	#ifdef DEBUG
8021	if (verbose)
8022	{
8023	printf("After optRemoveRangeCheck:\n");
8024	gtDispTree(tree);
8025	}
8026	#endif
8027	}
8028
8029	/*****************************************************************************
8030	* Return the scale in an array reference, given a pointer to the
8031	* multiplication node.
8032	*/
8033
8034	ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk))
8035	{
8036	assert(mul);
8037	assert(mul->gtOper == GT_MUL \|\| mul->gtOper == GT_LSH);
8038	assert(mul->gtOp.gtOp2->IsCnsIntOrI());
8039
8040	ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
8041
8042	if (mul->gtOper == GT_LSH)
8043	{
8044	scale = ((ssize_t)`1`) << scale;
8045	}
8046
8047	GenTree* index = mul->gtOp.gtOp1;
8048
8049	if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
8050	{
8051	// case of two cascading multiplications for constant int (e.g. 20 morphed to * 5 * 4):*
8052	// When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. 5),*
8053	// we can bump up the scale from 4 to 54, and then change index to index->gtOp.gtOp1.*
8054	// Otherwise, we cannot optimize it. We will simply keep the original scale and index.
8055	scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
8056	index = index->gtOp.gtOp1;
8057	}
8058
8059	assert(!bRngChk \|\| index->gtOper != GT_COMMA);
8060
8061	if (pIndex)
8062	{
8063	*pIndex = index;
8064	}
8065
8066	return scale;
8067	}
8068
8069	//------------------------------------------------------------------------------
8070	// optObtainLoopCloningOpts: Identify optimization candidates and update
8071	// the "context" for array optimizations.
8072	//
8073	// Arguments:
8074	// context - data structure where all loop cloning info is kept. The
8075	// optInfo fields of the context are updated with the
8076	// identified optimization candidates.
8077	//
8078	void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
8079	{
8080	for (unsigned i = `0`; i < optLoopCount; i++)
8081	{
8082	JITDUMP("Considering loop %d to clone for optimizations.\n", i);
8083	if (optIsLoopClonable(i))
8084	{
8085	if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
8086	{
8087	optIdentifyLoopOptInfo(i, context);
8088	}
8089	}
8090	JITDUMP("------------------------------------------------------------\n");
8091	}
8092	JITDUMP("\n");
8093	}
8094
8095	//------------------------------------------------------------------------
8096	// optIdentifyLoopOptInfo: Identify loop optimization candidates an also
8097	// check if the loop is suitable for the optimizations performed.
8098	//
8099	// Arguments:
8100	// loopNum - the current loop index for which conditions are derived.
8101	// context - data structure where all loop cloning candidates will be
8102	// updated.
8103	//
8104	// Return Value:
8105	// If the loop is not suitable for the optimizations, return false - context
8106	// should not contain any optimization candidate for the loop if false.
8107	// Else return true.
8108	//
8109	// Operation:
8110	// Check if the loop is well formed for this optimization and identify the
8111	// optimization candidates and update the "context" parameter with all the
8112	// contextual information necessary to perform the optimization later.
8113	//
8114	bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
8115	{
8116	noway_assert(loopNum < optLoopCount);
8117
8118	LoopDsc* pLoop = &optLoopTable[loopNum];
8119
8120	if (!(pLoop->lpFlags & LPFLG_ITER))
8121	{
8122	JITDUMP("> No iter flag on loop %d.\n", loopNum);
8123	return false;
8124	}
8125
8126	unsigned ivLclNum = pLoop->lpIterVar();
8127	if (lvaVarAddrExposed(ivLclNum))
8128	{
8129	JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
8130	return false;
8131	}
8132
8133	BasicBlock* head = pLoop->lpHead;
8134	BasicBlock* end = pLoop->lpBottom;
8135	BasicBlock* beg = head->bbNext;
8136
8137	if (end->bbJumpKind != BBJ_COND)
8138	{
8139	JITDUMP("> Couldn't find termination test.\n");
8140	return false;
8141	}
8142
8143	if (end->bbJumpDest != beg)
8144	{
8145	JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
8146	return false;
8147	}
8148
8149	// TODO-CQ: CLONE: Mark increasing or decreasing loops.
8150	if ((pLoop->lpIterOper() != GT_ADD) \|\| (pLoop->lpIterConst() != `1`))
8151	{
8152	JITDUMP("> Loop iteration operator not matching\n");
8153	return false;
8154	}
8155
8156	if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == `0` && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == `0` &&
8157	(pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == `0`)
8158	{
8159	JITDUMP("> Loop limit is neither constant, variable or array length\n");
8160	return false;
8161	}
8162
8163	if (!(((pLoop->lpTestOper() == GT_LT \|\| pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD)) \|\|
8164	((pLoop->lpTestOper() == GT_GT \|\| pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB))))
8165	{
8166	JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
8167	GenTree::OpName(pLoop->lpTestOper()), GenTree::OpName(pLoop->lpIterOper()));
8168	return false;
8169	}
8170
8171	if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) \|\| !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
8172	{
8173	JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
8174	pLoop->lpTestTree->gtTreeID);
8175	return false;
8176	}
8177
8178	#ifdef DEBUG
8179	GenTree* op1 = pLoop->lpIterator();
8180	noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
8181	#endif
8182
8183	JITDUMP("Checking blocks " FMT_BB ".." FMT_BB " for optimization candidates\n", beg->bbNum,
8184	end->bbNext ? end->bbNext->bbNum : `0`);
8185
8186	LoopCloneVisitorInfo info(context, loopNum, nullptr);
8187	for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
8188	{
8189	compCurBB = block;
8190	for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
8191	{
8192	info.stmt = stmt;
8193	const bool lclVarsOnly = false;
8194	const bool computeStack = false;
8195	fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly,
8196	computeStack);
8197	}
8198	}
8199
8200	return true;
8201	}
8202
8203	//---------------------------------------------------------------------------------------------------------------
8204	// optExtractArrIndex: Try to extract the array index from "tree".
8205	//
8206	// Arguments:
8207	// tree the tree to be checked if it is the array [] operation.
8208	// result the extracted GT_INDEX information is updated in result.
8209	// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
8210	//
8211	// Return Value:
8212	// Returns true if array index can be extracted, else, return false. See assumption about
8213	// what will be extracted. The "result" variable's rank parameter is advanced for every
8214	// dimension of [] encountered.
8215	//
8216	// Operation:
8217	// Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
8218	// we have converted a GT_INDEX tree into a scaled index base offset expression. We need
8219	// to reconstruct this to be able to know if this is an array access.
8220	//
8221	// Assumption:
8222	// The method extracts only if the array base and indices are GT_LCL_VAR.
8223	//
8224	// TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
8225	//
8226	// [000024] ------------ STMT void(IL 0x007...0x00C)*
8227	// [000021] a--XG+------ \| /-- IND int*
8228	// [000045] -----+------ \| \| \| /-- CNS_INT long 16 Fseq[#FirstElem]*
8229	// [000046] -----+------ \| \| \| /-- ADD long*
8230	// [000043] -----+-N---- \| \| \| \| \| /-- CNS_INT long 2*
8231	// [000044] -----+------ \| \| \| \| \-- LSH long*
8232	// [000042] -----+------ \| \| \| \| \-- CAST long < -int*
8233	// [000039] i----+------ \| \| \| \| \-- LCL_VAR int V04 loc0*
8234	// [000047] -----+------ \| \| \-- ADD byref*
8235	// [000038] -----+------ \| \| \-- LCL_VAR ref V00 arg0*
8236	// [000048] ---XG+------ \| /-- COMMA int*
8237	// [000041] ---X-+------ \| \| \-- ARR_BOUNDS_CHECK_Rng void*
8238	// [000020] -----+------ \| \| +-- LCL_VAR int V04 loc0*
8239	// [000040] ---X-+------ \| \| \-- ARR_LENGTH int*
8240	// [000019] -----+------ \| \| \-- LCL_VAR ref V00 arg0*
8241	// [000023] -A-XG+------ \-- ASG int*
8242	// [000022] D----+-N---- \-- LCL_VAR int V06 tmp1*
8243
8244	bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8245	{
8246	if (tree->gtOper != GT_COMMA)
8247	{
8248	return false;
8249	}
8250	GenTree* before = tree->gtGetOp1();
8251	if (before->gtOper != GT_ARR_BOUNDS_CHECK)
8252	{
8253	return false;
8254	}
8255	GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
8256	if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
8257	{
8258	return false;
8259	}
8260
8261	// For span we may see gtArrLen is a local var or local field or constant.
8262	// We won't try and extract those.
8263	if (arrBndsChk->gtArrLen->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_CNS_INT))
8264	{
8265	return false;
8266	}
8267	if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
8268	{
8269	return false;
8270	}
8271	unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
8272	if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
8273	{
8274	return false;
8275	}
8276
8277	unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
8278
8279	GenTree* after = tree->gtGetOp2();
8280
8281	if (after->gtOper != GT_IND)
8282	{
8283	return false;
8284	}
8285	// It used to be the case that arrBndsChks for struct types would fail the previous check because
8286	// after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
8287	// return false if the type of 'after' is a struct type. (This was causing us to clone loops
8288	// that we were not previously cloning.)
8289	// TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
8290	// types.
8291	if (varTypeIsStruct(after))
8292	{
8293	return false;
8294	}
8295
8296	GenTree* sibo = after->gtGetOp1(); // sibo = scaleindex + base + offset*
8297	if (sibo->gtOper != GT_ADD)
8298	{
8299	return false;
8300	}
8301	GenTree* base = sibo->gtGetOp1();
8302	GenTree* sio = sibo->gtGetOp2(); // sio == scaleindex + offset*
8303	if (base->OperGet() != GT_LCL_VAR \|\| base->gtLclVarCommon.gtLclNum != arrLcl)
8304	{
8305	return false;
8306	}
8307	if (sio->gtOper != GT_ADD)
8308	{
8309	return false;
8310	}
8311	GenTree* ofs = sio->gtGetOp2();
8312	GenTree* si = sio->gtGetOp1(); // si = scaleindex*
8313	if (ofs->gtOper != GT_CNS_INT)
8314	{
8315	return false;
8316	}
8317	if (si->gtOper != GT_LSH)
8318	{
8319	return false;
8320	}
8321	GenTree* scale = si->gtGetOp2();
8322	GenTree* index = si->gtGetOp1();
8323	if (scale->gtOper != GT_CNS_INT)
8324	{
8325	return false;
8326	}
8327	#ifdef _TARGET_64BIT_
8328	if (index->gtOper != GT_CAST)
8329	{
8330	return false;
8331	}
8332	GenTree* indexVar = index->gtGetOp1();
8333	#else
8334	GenTree* indexVar = index;
8335	#endif
8336	if (indexVar->gtOper != GT_LCL_VAR \|\| indexVar->gtLclVarCommon.gtLclNum != indLcl)
8337	{
8338	return false;
8339	}
8340	if (lhsNum == BAD_VAR_NUM)
8341	{
8342	result->arrLcl = arrLcl;
8343	}
8344	result->indLcls.Push(indLcl);
8345	result->bndsChks.Push(tree);
8346	result->useBlock = compCurBB;
8347	result->rank++;
8348
8349	return true;
8350	}
8351
8352	//---------------------------------------------------------------------------------------------------------------
8353	// optReconstructArrIndex: Reconstruct array index.
8354	//
8355	// Arguments:
8356	// tree the tree to be checked if it is an array [][][] operation.
8357	// result the extracted GT_INDEX information.
8358	// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
8359	//
8360	// Return Value:
8361	// Returns true if array index can be extracted, else, return false. "rank" field in
8362	// "result" contains the array access depth. The "indLcls" fields contain the indices.
8363	//
8364	// Operation:
8365	// Recursively look for a list of array indices. In the example below, we encounter,
8366	// V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
8367	// The return value would then be:
8368	// ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
8369	//
8370	// V00[V01][V02] would be morphed as:
8371	//
8372	// [000000001B366848] ---XG------- indir int
8373	// [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
8374	// [000000001B36C200] ---XG------- comma int
8375	// [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
8376	// [000000001B36C278] -A-XG------- comma int
8377	// [000000001B366730] R--XG------- indir ref
8378	// [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
8379	// [000000001B36C818] ---XG------- comma ref
8380	// [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
8381	// [000000001B36BB60] -A-XG------- = ref
8382	// [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
8383	// [000000001B36A668] -A-XG------- = int
8384	// [000000001B36A5F0] D------N---- lclVar int V03 tmp0
8385	//
8386	// Assumption:
8387	// The method extracts only if the array base and indices are GT_LCL_VAR.
8388	//
8389	bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8390	{
8391	// If we can extract "tree" (which is a top level comma) return.
8392	if (optExtractArrIndex(tree, result, lhsNum))
8393	{
8394	return true;
8395	}
8396	// We have a comma (check if array base expr is computed in "before"), descend further.
8397	else if (tree->OperGet() == GT_COMMA)
8398	{
8399	GenTree* before = tree->gtGetOp1();
8400	// "before" should evaluate an array base for the "after" indexing.
8401	if (before->OperGet() != GT_ASG)
8402	{
8403	return false;
8404	}
8405	GenTree* lhs = before->gtGetOp1();
8406	GenTree* rhs = before->gtGetOp2();
8407
8408	// "rhs" should contain an GT_INDEX
8409	if (!lhs->IsLocal() \|\| !optReconstructArrIndex(rhs, result, lhsNum))
8410	{
8411	return false;
8412	}
8413	unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
8414	GenTree* after = tree->gtGetOp2();
8415	// Pass the "lhsNum", so we can verify if indeed it is used as the array base.
8416	return optExtractArrIndex(after, result, lhsNum);
8417	}
8418	return false;
8419	}
8420
8421	/ static /
8422	Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pTree, Compiler::fgWalkData* data)
8423	{
8424	return data->compiler->optCanOptimizeByLoopCloning(pTree, (LoopCloneVisitorInfo)data->pCallbackData);
8425	}
8426
8427	//-------------------------------------------------------------------------
8428	// optIsStackLocalInvariant: Is stack local invariant in loop.
8429	//
8430	// Arguments:
8431	// loopNum The loop in which the variable is tested for invariance.
8432	// lclNum The local that is tested for invariance in the loop.
8433	//
8434	// Return Value:
8435	// Returns true if the variable is loop invariant in loopNum.
8436	//
8437	bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
8438	{
8439	if (lvaVarAddrExposed(lclNum))
8440	{
8441	return false;
8442	}
8443	if (optIsVarAssgLoop(loopNum, lclNum))
8444	{
8445	return false;
8446	}
8447	return true;
8448	}
8449
8450	//----------------------------------------------------------------------------------------------
8451	// optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
8452	// identify as potential candidate and update the loop context.
8453	//
8454	// Arguments:
8455	// tree The tree encountered during the tree walk.
8456	// info Supplies information about the current block or stmt in which the tree is.
8457	// Also supplies the "context" pointer for updating with loop cloning
8458	// candidates. Also supplies loopNum.
8459	//
8460	// Operation:
8461	// If array index can be reconstructed, check if the iter var of the loop matches the
8462	// array index var in some dim. Also ensure other index vars before the identified
8463	// dim are loop invariant.
8464	//
8465	// Return Value:
8466	// Skip sub trees if the optimization candidate is identified or else continue walking
8467	//
8468	Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info)
8469	{
8470	ArrIndex arrIndex(getAllocator());
8471
8472	// Check if array index can be optimized.
8473	if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
8474	{
8475	assert(tree->gtOper == GT_COMMA);
8476	#ifdef DEBUG
8477	if (verbose)
8478	{
8479	JITDUMP("Found ArrIndex at tree ");
8480	printTreeID(tree);
8481	printf(" which is equivalent to: ");
8482	arrIndex.Print();
8483	JITDUMP("\n");
8484	}
8485	#endif
8486	if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
8487	{
8488	return WALK_SKIP_SUBTREES;
8489	}
8490
8491	// Walk the dimensions and see if iterVar of the loop is used as index.
8492	for (unsigned dim = `0`; dim < arrIndex.rank; ++dim)
8493	{
8494	// Is index variable also used as the loop iter var.
8495	if (arrIndex.indLcls [dim] == optLoopTable[info->loopNum].lpIterVar())
8496	{
8497	// Check the previous indices are all loop invariant.
8498	for (unsigned dim2 = `0`; dim2 < dim; ++dim2)
8499	{
8500	if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls [dim2]))
8501	{
8502	JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
8503	return WALK_SKIP_SUBTREES;
8504	}
8505	}
8506	#ifdef DEBUG
8507	if (verbose)
8508	{
8509	JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
8510	arrIndex.Print();
8511	JITDUMP(" on dim %d\n", dim);
8512	}
8513	#endif
8514	// Update the loop context.
8515	info->context->EnsureLoopOptInfo(info->loopNum)
8516	->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo (arrIndex, dim, info->stmt));
8517	}
8518	else
8519	{
8520	JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
8521	dim);
8522	}
8523	}
8524	return WALK_SKIP_SUBTREES;
8525	}
8526	else if (tree->gtOper == GT_ARR_ELEM)
8527	{
8528	// TODO-CQ: CLONE: Implement.
8529	return WALK_SKIP_SUBTREES;
8530	}
8531	return WALK_CONTINUE;
8532	}
8533
8534	struct optRangeCheckDsc
8535	{
8536	Compiler* pCompiler;
8537	bool bValidIndex;
8538	};
8539	/*
8540	Walk to make sure that only locals and constants are contained in the index
8541	for a range check
8542	*/
8543	Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTree** pTree, fgWalkData* data)
8544	{
8545	GenTree* tree = *pTree;
8546	optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
8547
8548	if (tree->gtOper == GT_IND \|\| tree->gtOper == GT_CLS_VAR \|\| tree->gtOper == GT_FIELD \|\| tree->gtOper == GT_LCL_FLD)
8549	{
8550	pData->bValidIndex = false;
8551	return WALK_ABORT;
8552	}
8553
8554	if (tree->gtOper == GT_LCL_VAR)
8555	{
8556	if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
8557	{
8558	pData->bValidIndex = false;
8559	return WALK_ABORT;
8560	}
8561	}
8562
8563	return WALK_CONTINUE;
8564	}
8565
8566	/*
8567	returns true if a range check can legally be removed (for the moment it checks
8568	that the array is a local array (non subject to racing conditions) and that the
8569	index is either a constant or a local
8570	*/
8571	bool Compiler::optIsRangeCheckRemovable(GenTree* tree)
8572	{
8573	noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
8574	GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
8575	GenTree* pArray = bndsChk->GetArray();
8576	if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
8577	{
8578	return false;
8579	}
8580	GenTree* pIndex = bndsChk->gtIndex;
8581
8582	// The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8583	// Otherwise we can be targeted by malicious race-conditions.
8584	if (pArray != nullptr)
8585	{
8586	if (pArray->gtOper != GT_LCL_VAR)
8587	{
8588
8589	#ifdef DEBUG
8590	if (verbose)
8591	{
8592	printf("Can't remove range check if the array isn't referenced with a local\n");
8593	gtDispTree(pArray);
8594	}
8595	#endif
8596	return false;
8597	}
8598	else
8599	{
8600	noway_assert(pArray->gtType == TYP_REF);
8601	noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8602
8603	if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8604	{
8605	// If the array address has been taken, don't do the optimization
8606	// (this restriction can be lowered a bit, but i don't think it's worth it)
8607	CLANG_FORMAT_COMMENT_ANCHOR;
8608	#ifdef DEBUG
8609	if (verbose)
8610	{
8611	printf("Can't remove range check if the array has its address taken\n");
8612	gtDispTree(pArray);
8613	}
8614	#endif
8615	return false;
8616	}
8617	}
8618	}
8619
8620	optRangeCheckDsc Data;
8621	Data.pCompiler = this;
8622	Data.bValidIndex = true;
8623
8624	fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8625
8626	if (!Data.bValidIndex)
8627	{
8628	#ifdef DEBUG
8629	if (verbose)
8630	{
8631	printf("Can't remove range check with this index");
8632	gtDispTree(pIndex);
8633	}
8634	#endif
8635
8636	return false;
8637	}
8638
8639	return true;
8640	}
8641
8642	/******************************************************************************
8643	*
8644	* Replace x==null with (x\|x)==0 if x is a GC-type.
8645	* This will stress code-gen and the emitter to make sure they support such trees.
8646	*/
8647
8648	#ifdef DEBUG
8649
8650	void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8651	{
8652	if (!compStressCompile(STRESS_OPT_BOOLS_GC, `20`))
8653	{
8654	return;
8655	}
8656
8657	noway_assert(condBlock->bbJumpKind == BBJ_COND);
8658	GenTree* condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8659
8660	noway_assert(condStmt->gtOper == GT_JTRUE);
8661
8662	bool isBool;
8663	GenTree* relop;
8664
8665	GenTree* comparand = optIsBoolCond(condStmt, &relop, &isBool);
8666
8667	if (comparand == nullptr \|\| !varTypeIsGC(comparand->TypeGet()))
8668	{
8669	return;
8670	}
8671
8672	if (comparand->gtFlags & (GTF_ASG \| GTF_CALL \| GTF_ORDER_SIDEEFF))
8673	{
8674	return;
8675	}
8676
8677	GenTree* comparandClone = gtCloneExpr(comparand);
8678
8679	noway_assert(relop->gtOp.gtOp1 == comparand);
8680	genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, `50`) ? GT_OR : GT_AND;
8681	relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8682
8683	// Comparand type is already checked, and we have const int, there is no harm
8684	// morphing it into a TYP_I_IMPL.
8685	noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8686	relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8687	}
8688
8689	#endif
8690
8691	/******************************************************************************
8692	* Function used by folding of boolean conditionals
8693	* Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8694	* "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8695	* being a boolean lclVar and "op2" the const 0/1.
8696	* On success, the comparand (ie. boolVal) is returned. Else NULL.
8697	* compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8698	* boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8699	* When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8700	* value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8701	*/
8702
8703	GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8704	{
8705	bool isBool = false;
8706
8707	noway_assert(condBranch->gtOper == GT_JTRUE);
8708	GenTree* cond = condBranch->gtOp.gtOp1;
8709
8710	/ The condition must be "!= 0" or "== 0" /
8711
8712	if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8713	{
8714	return nullptr;
8715	}
8716
8717	/ Return the compare node to the caller /
8718
8719	*compPtr = cond;
8720
8721	/ Get hold of the comparands /
8722
8723	GenTree* opr1 = cond->gtOp.gtOp1;
8724	GenTree* opr2 = cond->gtOp.gtOp2;
8725
8726	if (opr2->gtOper != GT_CNS_INT)
8727	{
8728	return nullptr;
8729	}
8730
8731	if (!opr2->IsIntegralConst(`0`) && !opr2->IsIntegralConst(`1`))
8732	{
8733	return nullptr;
8734	}
8735
8736	ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8737
8738	/ Is the value a boolean?*
8739	* We can either have a boolean expression (marked GTF_BOOLEAN) or
8740	* a local variable that is marked as being boolean (lvIsBoolean) */
8741
8742	if (opr1->gtFlags & GTF_BOOLEAN)
8743	{
8744	isBool = true;
8745	}
8746	else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(`0`) \|\| opr1->IsIntegralConst(`1`)))
8747	{
8748	isBool = true;
8749	}
8750	else if (opr1->gtOper == GT_LCL_VAR)
8751	{
8752	/ is it a boolean local variable /
8753
8754	unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8755	noway_assert(lclNum < lvaCount);
8756
8757	if (lvaTable[lclNum].lvIsBoolean)
8758	{
8759	isBool = true;
8760	}
8761	}
8762
8763	/ Was our comparison against the constant 1 (i.e. true) /
8764	if (ival2 == `1`)
8765	{
8766	// If this is a boolean expression tree we can reverse the relop
8767	// and change the true to false.
8768	if (isBool)
8769	{
8770	gtReverseCond(cond);
8771	opr2->gtIntCon.gtIconVal = `0`;
8772	}
8773	else
8774	{
8775	return nullptr;
8776	}
8777	}
8778
8779	*boolPtr = isBool;
8780	return opr1;
8781	}
8782
8783	void Compiler::optOptimizeBools()
8784	{
8785	#ifdef DEBUG
8786	if (verbose)
8787	{
8788	printf("*************** In optOptimizeBools()\n");
8789	if (verboseTrees)
8790	{
8791	printf("Blocks/Trees before phase\n");
8792	fgDispBasicBlocks(true);
8793	}
8794	}
8795	#endif
8796	bool change;
8797
8798	do
8799	{
8800	change = false;
8801
8802	for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8803	{
8804	/ We're only interested in conditional jumps here /
8805
8806	if (b1->bbJumpKind != BBJ_COND)
8807	{
8808	continue;
8809	}
8810
8811	/ If there is no next block, we're done /
8812
8813	BasicBlock* b2 = b1->bbNext;
8814	if (!b2)
8815	{
8816	break;
8817	}
8818
8819	/ The next block must not be marked as BBF_DONT_REMOVE /
8820	if (b2->bbFlags & BBF_DONT_REMOVE)
8821	{
8822	continue;
8823	}
8824
8825	/ The next block also needs to be a condition /
8826
8827	if (b2->bbJumpKind != BBJ_COND)
8828	{
8829	#ifdef DEBUG
8830	optOptimizeBoolsGcStress(b1);
8831	#endif
8832	continue;
8833	}
8834
8835	bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8836
8837	if (b1->bbJumpDest == b2->bbJumpDest)
8838	{
8839	/ Given the following sequence of blocks :*
8840	B1: brtrue(t1, BX)
8841	B2: brtrue(t2, BX)
8842	B3:
8843	we will try to fold it to :
8844	B1: brtrue(t1\|t2, BX)
8845	B3:
8846	*/
8847
8848	sameTarget = true;
8849	}
8850	else if (b1->bbJumpDest == b2->bbNext) /b1->bbJumpDest->bbNum == n1+2/
8851	{
8852	/ Given the following sequence of blocks :*
8853	B1: brtrue(t1, B3)
8854	B2: brtrue(t2, BX)
8855	B3:
8856	we will try to fold it to :
8857	B1: brtrue((!t1)&&t2, BX)
8858	B3:
8859	*/
8860
8861	sameTarget = false;
8862	}
8863	else
8864	{
8865	continue;
8866	}
8867
8868	/ The second block must contain a single statement /
8869
8870	GenTree* s2 = b2->bbTreeList;
8871	if (s2->gtPrev != s2)
8872	{
8873	continue;
8874	}
8875
8876	noway_assert(s2->gtOper == GT_STMT);
8877	GenTree* t2 = s2->gtStmt.gtStmtExpr;
8878	noway_assert(t2->gtOper == GT_JTRUE);
8879
8880	/ Find the condition for the first block /
8881
8882	GenTree* s1 = b1->bbTreeList->gtPrev;
8883
8884	noway_assert(s1->gtOper == GT_STMT);
8885	GenTree* t1 = s1->gtStmt.gtStmtExpr;
8886	noway_assert(t1->gtOper == GT_JTRUE);
8887
8888	if (b2->countOfInEdges() > `1`)
8889	{
8890	continue;
8891	}
8892
8893	/ Find the branch conditions of b1 and b2 /
8894
8895	bool bool1, bool2;
8896
8897	GenTree* c1 = optIsBoolCond(t1, &t1, &bool1);
8898	if (!c1)
8899	{
8900	continue;
8901	}
8902
8903	GenTree* c2 = optIsBoolCond(t2, &t2, &bool2);
8904	if (!c2)
8905	{
8906	continue;
8907	}
8908
8909	noway_assert(t1->gtOper == GT_EQ \|\| t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8910	noway_assert(t2->gtOper == GT_EQ \|\| t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8911
8912	// Leave out floats where the bit-representation is more complicated
8913	// - there are two representations for 0.
8914	//
8915	if (varTypeIsFloating(c1->TypeGet()) \|\| varTypeIsFloating(c2->TypeGet()))
8916	{
8917	continue;
8918	}
8919
8920	// Make sure the types involved are of the same sizes
8921	if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8922	{
8923	continue;
8924	}
8925	if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8926	{
8927	continue;
8928	}
8929	#ifdef _TARGET_ARMARCH_
8930	// Skip the small operand which we cannot encode.
8931	if (varTypeIsSmall(c1->TypeGet()))
8932	continue;
8933	#endif
8934	/ The second condition must not contain side effects /
8935
8936	if (c2->gtFlags & GTF_GLOB_EFFECT)
8937	{
8938	continue;
8939	}
8940
8941	/ The second condition must not be too expensive /
8942
8943	gtPrepareCost(c2);
8944
8945	if (c2->gtCostEx > `12`)
8946	{
8947	continue;
8948	}
8949
8950	genTreeOps foldOp;
8951	genTreeOps cmpOp;
8952	var_types foldType = c1->TypeGet();
8953	if (varTypeIsGC(foldType))
8954	{
8955	foldType = TYP_I_IMPL;
8956	}
8957
8958	if (sameTarget)
8959	{
8960	/ Both conditions must be the same /
8961
8962	if (t1->gtOper != t2->gtOper)
8963	{
8964	continue;
8965	}
8966
8967	if (t1->gtOper == GT_EQ)
8968	{
8969	/ t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0*
8970	So we will branch to BX if (c1&c2)==0 /*
8971
8972	foldOp = GT_AND;
8973	cmpOp = GT_EQ;
8974	}
8975	else
8976	{
8977	/ t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0*
8978	So we will branch to BX if (c1\|c2)!=0 /*
8979
8980	foldOp = GT_OR;
8981	cmpOp = GT_NE;
8982	}
8983	}
8984	else
8985	{
8986	/ The b1 condition must be the reverse of the b2 condition /
8987
8988	if (t1->gtOper == t2->gtOper)
8989	{
8990	continue;
8991	}
8992
8993	if (t1->gtOper == GT_EQ)
8994	{
8995	/ t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0*
8996	So we will branch to BX if (c1&c2)!=0 /*
8997
8998	foldOp = GT_AND;
8999	cmpOp = GT_NE;
9000	}
9001	else
9002	{
9003	/ t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0*
9004	So we will branch to BX if (c1\|c2)==0 /*
9005
9006	foldOp = GT_OR;
9007	cmpOp = GT_EQ;
9008	}
9009	}
9010
9011	// Anding requires both values to be 0 or 1
9012
9013	if ((foldOp == GT_AND) && (!bool1 \|\| !bool2))
9014	{
9015	continue;
9016	}
9017
9018	//
9019	// Now update the trees
9020	//
9021	GenTree* cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
9022	if (bool1 && bool2)
9023	{
9024	/ When we 'OR'/'AND' two booleans, the result is boolean as well /
9025	cmpOp1->gtFlags \|= GTF_BOOLEAN;
9026	}
9027
9028	t1->SetOper(cmpOp);
9029	t1->gtOp.gtOp1 = cmpOp1;
9030	t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
9031
9032	#if FEATURE_SET_FLAGS
9033	// For comparisons against zero we will have the GTF_SET_FLAGS set
9034	// and this can cause an assert to fire in fgMoveOpsLeft(GenTree tree)*
9035	// during the CSE phase.
9036	//
9037	// So make sure to clear any GTF_SET_FLAGS bit on these operations
9038	// as they are no longer feeding directly into a comparisons against zero
9039
9040	// Make sure that the GTF_SET_FLAGS bit is cleared.
9041	// Fix 388436 ARM JitStress WP7
9042	c1->gtFlags &= ~GTF_SET_FLAGS;
9043	c2->gtFlags &= ~GTF_SET_FLAGS;
9044
9045	// The new top level node that we just created does feed directly into
9046	// a comparison against zero, so set the GTF_SET_FLAGS bit so that
9047	// we generate an instruction that sets the flags, which allows us
9048	// to omit the cmp with zero instruction.
9049
9050	// Request that the codegen for cmpOp1 sets the condition flags
9051	// when it generates the code for cmpOp1.
9052	//
9053	cmpOp1->gtRequestSetFlags();
9054	#endif
9055
9056	flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
9057	flowList* edge2;
9058
9059	/ Modify the target of the conditional jump and update bbRefs and bbPreds /
9060
9061	if (sameTarget)
9062	{
9063	edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
9064	}
9065	else
9066	{
9067	edge2 = fgGetPredForBlock(b2->bbNext, b2);
9068
9069	fgRemoveRefPred(b1->bbJumpDest, b1);
9070
9071	b1->bbJumpDest = b2->bbJumpDest;
9072
9073	fgAddRefPred(b2->bbJumpDest, b1);
9074	}
9075
9076	noway_assert(edge1 != nullptr);
9077	noway_assert(edge2 != nullptr);
9078
9079	BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
9080	BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
9081	if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
9082	{
9083	edge1->flEdgeWeightMin = edgeSumMin;
9084	edge1->flEdgeWeightMax = edgeSumMax;
9085	}
9086	else
9087	{
9088	edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
9089	edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
9090	}
9091
9092	/ Get rid of the second block (which is a BBJ_COND) /
9093
9094	noway_assert(b1->bbJumpKind == BBJ_COND);
9095	noway_assert(b2->bbJumpKind == BBJ_COND);
9096	noway_assert(b1->bbJumpDest == b2->bbJumpDest);
9097	noway_assert(b1->bbNext == b2);
9098	noway_assert(b2->bbNext);
9099
9100	fgUnlinkBlock(b2);
9101	b2->bbFlags \|= BBF_REMOVED;
9102
9103	// If b2 was the last block of a try or handler, update the EH table.
9104
9105	ehUpdateForDeletedBlock(b2);
9106
9107	/ Update bbRefs and bbPreds /
9108
9109	/ Replace pred 'b2' for 'b2->bbNext' with 'b1'*
9110	* Remove pred 'b2' for 'b2->bbJumpDest' */
9111
9112	fgReplacePred(b2->bbNext, b2, b1);
9113
9114	fgRemoveRefPred(b2->bbJumpDest, b2);
9115
9116	/ Update the block numbers and try again /
9117
9118	change = true;
9119	/*
9120	do
9121	{
9122	b2->bbNum = ++n1;
9123	b2 = b2->bbNext;
9124	}
9125	while (b2);
9126	*/
9127
9128	// Update loop table
9129	fgUpdateLoopsAfterCompacting(b1, b2);
9130
9131	#ifdef DEBUG
9132	if (verbose)
9133	{
9134	printf("Folded %sboolean conditions of " FMT_BB " and " FMT_BB " to :\n",
9135	c2->OperIsLeaf() ? "" : "non-leaf ", b1->bbNum, b2->bbNum);
9136	gtDispTree(s1);
9137	printf("\n");
9138	}
9139	#endif
9140	}
9141	} while (change);
9142
9143	#ifdef DEBUG
9144	fgDebugCheckBBlist();
9145	#endif
9146	}
9147

Browse the source code of CoreCLR/jit/optimizer.cpp