1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX OptCSE XX
9XX XX
10XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12*/
13
14#include "jitpch.h"
15#ifdef _MSC_VER
16#pragma hdrstop
17#endif
18
19/*****************************************************************************/
20#if FEATURE_ANYCSE
21/*****************************************************************************/
22
23/* static */
24const size_t Compiler::s_optCSEhashSize = EXPSET_SZ * 2;
25
26/*****************************************************************************
27 *
28 * We've found all the candidates, build the index for easy access.
29 */
30
31void Compiler::optCSEstop()
32{
33 if (optCSECandidateCount == 0)
34 {
35 return;
36 }
37
38 CSEdsc* dsc;
39 CSEdsc** ptr;
40 unsigned cnt;
41
42 optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount]();
43
44 for (cnt = s_optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++)
45 {
46 for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket)
47 {
48 if (dsc->csdIndex)
49 {
50 noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount);
51 if (optCSEtab[dsc->csdIndex - 1] == nullptr)
52 {
53 optCSEtab[dsc->csdIndex - 1] = dsc;
54 }
55 }
56 }
57 }
58
59#ifdef DEBUG
60 for (cnt = 0; cnt < optCSECandidateCount; cnt++)
61 {
62 noway_assert(optCSEtab[cnt] != nullptr);
63 }
64#endif
65}
66
67/*****************************************************************************
68 *
69 * Return the descriptor for the CSE with the given index.
70 */
71
72inline Compiler::CSEdsc* Compiler::optCSEfindDsc(unsigned index)
73{
74 noway_assert(index);
75 noway_assert(index <= optCSECandidateCount);
76 noway_assert(optCSEtab[index - 1]);
77
78 return optCSEtab[index - 1];
79}
80
81//------------------------------------------------------------------------
82// Compiler::optUnmarkCSE
83//
84// Arguments:
85// tree - A sub tree that originally was part of a CSE use
86// that we are currently in the process of removing.
87//
88// Return Value:
89// Returns true if we can safely remove the 'tree' node.
90// Returns false if the node is a CSE def that the caller
91// needs to extract and preserve.
92//
93// Notes:
94// If 'tree' is a CSE use then we perform an unmark CSE operation
95// so that the CSE used counts and weight are updated properly.
96// The only caller for this method is optUnmarkCSEs which is a
97// tree walker vistor function. When we return false this method
98// returns WALK_SKIP_SUBTREES so that we don't visit the remaining
99// nodes of the CSE def.
100//
101bool Compiler::optUnmarkCSE(GenTree* tree)
102{
103 if (!IS_CSE_INDEX(tree->gtCSEnum))
104 {
105 // If this node isn't a CSE use or def we can safely remove this node.
106 //
107 return true;
108 }
109
110 // make sure it's been initialized
111 noway_assert(optCSEweight <= BB_MAX_WEIGHT);
112
113 // Is this a CSE use?
114 if (IS_CSE_USE(tree->gtCSEnum))
115 {
116 unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
117 CSEdsc* desc = optCSEfindDsc(CSEnum);
118
119#ifdef DEBUG
120 if (verbose)
121 {
122 printf("Unmark CSE use #%02d at ", CSEnum);
123 printTreeID(tree);
124 printf(": %3d -> %3d\n", desc->csdUseCount, desc->csdUseCount - 1);
125 }
126#endif // DEBUG
127
128 // Perform an unmark CSE operation
129
130 // 1. Reduce the nested CSE's 'use' count
131
132 noway_assert(desc->csdUseCount > 0);
133
134 if (desc->csdUseCount > 0)
135 {
136 desc->csdUseCount -= 1;
137
138 if (desc->csdUseWtCnt < optCSEweight)
139 {
140 desc->csdUseWtCnt = 0;
141 }
142 else
143 {
144 desc->csdUseWtCnt -= optCSEweight;
145 }
146 }
147
148 // 2. Unmark the CSE infomation in the node
149
150 tree->gtCSEnum = NO_CSE;
151 return true;
152 }
153 else
154 {
155 // It is not safe to remove this node, so we will return false
156 // and the caller must add this node to the side effect list
157 //
158 return false;
159 }
160}
161
162Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTree** pTree, fgWalkData* walkData)
163{
164 GenTree* tree = *pTree;
165 Compiler* comp = walkData->compiler;
166 optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData);
167
168 if (IS_CSE_INDEX(tree->gtCSEnum))
169 {
170 unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
171 unsigned cseBit = genCSEnum2bit(cseIndex);
172 if (IS_CSE_DEF(tree->gtCSEnum))
173 {
174 BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_defMask, cseBit);
175 }
176 else
177 {
178 BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_useMask, cseBit);
179 }
180 }
181
182 return WALK_CONTINUE;
183}
184
185// This functions walks all the node for an given tree
186// and return the mask of CSE defs and uses for the tree
187//
188void Compiler::optCSE_GetMaskData(GenTree* tree, optCSE_MaskData* pMaskData)
189{
190 pMaskData->CSE_defMask = BitVecOps::MakeEmpty(cseTraits);
191 pMaskData->CSE_useMask = BitVecOps::MakeEmpty(cseTraits);
192 fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
193}
194
195//------------------------------------------------------------------------
196// optCSE_canSwap: Determine if the execution order of two nodes can be swapped.
197//
198// Arguments:
199// op1 - The first node
200// op2 - The second node
201//
202// Return Value:
203// Return true iff it safe to swap the execution order of 'op1' and 'op2',
204// considering only the locations of the CSE defs and uses.
205//
206// Assumptions:
207// 'op1' currently occurse before 'op2' in the execution order.
208//
209bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2)
210{
211 // op1 and op2 must be non-null.
212 assert(op1 != nullptr);
213 assert(op2 != nullptr);
214
215 bool canSwap = true; // the default result unless proven otherwise.
216
217 optCSE_MaskData op1MaskData;
218 optCSE_MaskData op2MaskData;
219
220 optCSE_GetMaskData(op1, &op1MaskData);
221 optCSE_GetMaskData(op2, &op2MaskData);
222
223 // We cannot swap if op1 contains a CSE def that is used by op2
224 if (!BitVecOps::IsEmptyIntersection(cseTraits, op1MaskData.CSE_defMask, op2MaskData.CSE_useMask))
225 {
226 canSwap = false;
227 }
228 else
229 {
230 // We also cannot swap if op2 contains a CSE def that is used by op1.
231 if (!BitVecOps::IsEmptyIntersection(cseTraits, op2MaskData.CSE_defMask, op1MaskData.CSE_useMask))
232 {
233 canSwap = false;
234 }
235 }
236
237 return canSwap;
238}
239
240//------------------------------------------------------------------------
241// optCSE_canSwap: Determine if the execution order of a node's operands can be swapped.
242//
243// Arguments:
244// tree - The node of interest
245//
246// Return Value:
247// Return true iff it safe to swap the execution order of the operands of 'tree',
248// considering only the locations of the CSE defs and uses.
249//
250bool Compiler::optCSE_canSwap(GenTree* tree)
251{
252 // We must have a binary treenode with non-null op1 and op2
253 assert((tree->OperKind() & GTK_SMPOP) != 0);
254
255 GenTree* op1 = tree->gtOp.gtOp1;
256 GenTree* op2 = tree->gtGetOp2();
257
258 return optCSE_canSwap(op1, op2);
259}
260
261/*****************************************************************************
262 *
263 * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
264 * when (CodeOptKind() != Compiler::SMALL_CODE)
265 */
266
267/* static */
268int __cdecl Compiler::optCSEcostCmpEx(const void* op1, const void* op2)
269{
270 CSEdsc* dsc1 = *(CSEdsc**)op1;
271 CSEdsc* dsc2 = *(CSEdsc**)op2;
272
273 GenTree* exp1 = dsc1->csdTree;
274 GenTree* exp2 = dsc2->csdTree;
275
276 int diff;
277
278 diff = (int)(exp2->gtCostEx - exp1->gtCostEx);
279
280 if (diff != 0)
281 {
282 return diff;
283 }
284
285 // Sort the higher Use Counts toward the top
286 diff = (int)(dsc2->csdUseWtCnt - dsc1->csdUseWtCnt);
287
288 if (diff != 0)
289 {
290 return diff;
291 }
292
293 // With the same use count, Sort the lower Def Counts toward the top
294 diff = (int)(dsc1->csdDefWtCnt - dsc2->csdDefWtCnt);
295
296 if (diff != 0)
297 {
298 return diff;
299 }
300
301 // In order to ensure that we have a stable sort, we break ties using the csdIndex
302 return (int)(dsc1->csdIndex - dsc2->csdIndex);
303}
304
305/*****************************************************************************
306 *
307 * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
308 * when (CodeOptKind() == Compiler::SMALL_CODE)
309 */
310
311/* static */
312int __cdecl Compiler::optCSEcostCmpSz(const void* op1, const void* op2)
313{
314 CSEdsc* dsc1 = *(CSEdsc**)op1;
315 CSEdsc* dsc2 = *(CSEdsc**)op2;
316
317 GenTree* exp1 = dsc1->csdTree;
318 GenTree* exp2 = dsc2->csdTree;
319
320 int diff;
321
322 diff = (int)(exp2->gtCostSz - exp1->gtCostSz);
323
324 if (diff != 0)
325 {
326 return diff;
327 }
328
329 // Sort the higher Use Counts toward the top
330 diff = (int)(dsc2->csdUseCount - dsc1->csdUseCount);
331
332 if (diff != 0)
333 {
334 return diff;
335 }
336
337 // With the same use count, Sort the lower Def Counts toward the top
338 diff = (int)(dsc1->csdDefCount - dsc2->csdDefCount);
339
340 if (diff != 0)
341 {
342 return diff;
343 }
344
345 // In order to ensure that we have a stable sort, we break ties using the csdIndex
346 return (int)(dsc1->csdIndex - dsc2->csdIndex);
347}
348
349/*****************************************************************************/
350#if FEATURE_VALNUM_CSE
351/*****************************************************************************/
352
353/*****************************************************************************
354 *
355 * Initialize the Value Number CSE tracking logic.
356 */
357
358void Compiler::optValnumCSE_Init()
359{
360#ifdef DEBUG
361 optCSEtab = nullptr;
362#endif
363
364 // Init traits and full/empty bitvectors. This will be used to track the
365 // individual cse indexes.
366 cseTraits = new (getAllocator()) BitVecTraits(EXPSET_SZ, this);
367 cseFull = BitVecOps::MakeFull(cseTraits);
368
369 /* Allocate and clear the hash bucket table */
370
371 optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize]();
372
373 optCSECandidateCount = 0;
374 optDoCSE = false; // Stays false until we find duplicate CSE tree
375
376 // optCseCheckedBoundMap is unused in most functions, allocated only when used
377 optCseCheckedBoundMap = nullptr;
378}
379
380//---------------------------------------------------------------------------
381// optValnumCSE_Index:
382// - Returns the CSE index to use for this tree,
383// or zero if this expression is not currently a CSE.
384//
385// Arguments:
386// tree - The current candidate CSE expression
387// stmt - The current statement that contains tree
388//
389//
390// Notes: We build a hash table that contains all of the expressions that
391// are presented to this method. Whenever we see a duplicate expression
392// we have a CSE candidate. If it is the first time seeing the duplicate
393// we allocate a new CSE index. If we have already allocated a CSE index
394// we return that index. There currently is a limit on the number of CSEs
395// that we can have of MAX_CSE_CNT (64)
396//
397unsigned Compiler::optValnumCSE_Index(GenTree* tree, GenTree* stmt)
398{
399 unsigned key;
400 unsigned hash;
401 unsigned hval;
402 CSEdsc* hashDsc;
403
404 // We use the liberal Value numbers when building the set of CSE
405 ValueNum vnLib = tree->GetVN(VNK_Liberal);
406 ValueNum vnLibNorm = vnStore->VNNormalValue(vnLib);
407
408 // We use the normal value number because we want the CSE candidate to
409 // represent all expressions that produce the same normal value number
410 // We will handle the case where we have different exception sets when
411 // promoting the candidates.
412 //
413 // We do this because a GT_IND will usually have a NullPtrExc entry in its
414 // exc set, but we may have cleared the GTF_EXCEPT flag and if so, it won't
415 // have an NullPtrExc, or we may have assigned the value of an GT_IND
416 // into a LCL_VAR and then read it back later.
417 //
418 // When we are promoting the CSE candidates we insure that any CSE
419 // uses that we promote have an exc set that is the same as the CSE defs
420 // or have an empty set. And that all of the CSE defs produced the required
421 // set of exceptions for the CSE uses.
422 //
423
424 // We assign either vnLib or vnLibNorm as the hash key
425 //
426 // The only exception to using the normal value is for the GT_COMMA nodes.
427 // Here we check to see if we have a GT_COMMA with a different value number
428 // than the one from its op2. For this case we want to create two different
429 // CSE candidates. This allows us to CSE the GT_COMMA separately from its value.
430 //
431 if (tree->OperGet() == GT_COMMA)
432 {
433 // op2 is the value produced by a GT_COMMA
434 GenTree* op2 = tree->gtOp.gtOp2;
435 ValueNum vnOp2Lib = op2->GetVN(VNK_Liberal);
436
437 // If the value number for op2 and tree are different, then some new
438 // exceptions were produced by op1. For that case we will NOT use the
439 // normal value. This allows us to CSE commas with an op1 that is
440 // an ARR_BOUNDS_CHECK.
441 //
442 if (vnOp2Lib != vnLib)
443 {
444 key = (unsigned)vnLib; // include the exc set in the hash key
445 }
446 else
447 {
448 key = (unsigned)vnLibNorm;
449 }
450
451 // If we didn't do the above we would have op1 as the CSE def
452 // and the parent comma as the CSE use (but with a different exc set)
453 // This would prevent us from making any CSE with the comma
454 //
455 assert(vnLibNorm == vnStore->VNNormalValue(vnOp2Lib));
456 }
457 else // Not a GT_COMMA
458 {
459 key = (unsigned)vnLibNorm;
460 }
461
462 // Compute the hash value for the expression
463
464 hash = key;
465 hash *= (unsigned)(s_optCSEhashSize + 1);
466 hash >>= 7;
467
468 hval = hash % s_optCSEhashSize;
469
470 /* Look for a matching index in the hash table */
471
472 bool newCSE = false;
473
474 for (hashDsc = optCSEhash[hval]; hashDsc; hashDsc = hashDsc->csdNextInBucket)
475 {
476 if (hashDsc->csdHashKey == key)
477 {
478 treeStmtLst* newElem;
479
480 /* Have we started the list of matching nodes? */
481
482 if (hashDsc->csdTreeList == nullptr)
483 {
484 // Create the new element based upon the matching hashDsc element.
485
486 newElem = new (this, CMK_TreeStatementList) treeStmtLst;
487
488 newElem->tslTree = hashDsc->csdTree;
489 newElem->tslStmt = hashDsc->csdStmt;
490 newElem->tslBlock = hashDsc->csdBlock;
491 newElem->tslNext = nullptr;
492
493 /* Start the list with the first CSE candidate recorded */
494
495 hashDsc->csdTreeList = newElem;
496 hashDsc->csdTreeLast = newElem;
497 }
498
499 noway_assert(hashDsc->csdTreeList);
500
501 /* Append this expression to the end of the list */
502
503 newElem = new (this, CMK_TreeStatementList) treeStmtLst;
504
505 newElem->tslTree = tree;
506 newElem->tslStmt = stmt;
507 newElem->tslBlock = compCurBB;
508 newElem->tslNext = nullptr;
509
510 hashDsc->csdTreeLast->tslNext = newElem;
511 hashDsc->csdTreeLast = newElem;
512
513 optDoCSE = true; // Found a duplicate CSE tree
514
515 /* Have we assigned a CSE index? */
516 if (hashDsc->csdIndex == 0)
517 {
518 newCSE = true;
519 break;
520 }
521
522 assert(FitsIn<signed char>(hashDsc->csdIndex));
523 tree->gtCSEnum = ((signed char)hashDsc->csdIndex);
524 return hashDsc->csdIndex;
525 }
526 }
527
528 if (!newCSE)
529 {
530 /* Not found, create a new entry (unless we have too many already) */
531
532 if (optCSECandidateCount < MAX_CSE_CNT)
533 {
534 hashDsc = new (this, CMK_CSE) CSEdsc;
535
536 hashDsc->csdHashKey = key;
537 hashDsc->csdIndex = 0;
538 hashDsc->csdLiveAcrossCall = 0;
539 hashDsc->csdDefCount = 0;
540 hashDsc->csdUseCount = 0;
541 hashDsc->csdDefWtCnt = 0;
542 hashDsc->csdUseWtCnt = 0;
543 hashDsc->defExcSetPromise = vnStore->VNForEmptyExcSet();
544 hashDsc->defExcSetCurrent = vnStore->VNForNull(); // uninit value
545 hashDsc->defConservNormVN = vnStore->VNForNull(); // uninit value
546
547 hashDsc->csdTree = tree;
548 hashDsc->csdStmt = stmt;
549 hashDsc->csdBlock = compCurBB;
550 hashDsc->csdTreeList = nullptr;
551
552 /* Append the entry to the hash bucket */
553
554 hashDsc->csdNextInBucket = optCSEhash[hval];
555 optCSEhash[hval] = hashDsc;
556 }
557 return 0;
558 }
559 else // newCSE is true
560 {
561 /* We get here only after finding a matching CSE */
562
563 /* Create a new CSE (unless we have the maximum already) */
564
565 if (optCSECandidateCount == MAX_CSE_CNT)
566 {
567 return 0;
568 }
569
570 C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT);
571
572 unsigned CSEindex = ++optCSECandidateCount;
573 // EXPSET_TP CSEmask = genCSEnum2bit(CSEindex);
574
575 /* Record the new CSE index in the hashDsc */
576 hashDsc->csdIndex = CSEindex;
577
578 /* Update the gtCSEnum field in the original tree */
579 noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0);
580 assert(FitsIn<signed char>(CSEindex));
581
582 hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char)CSEindex);
583 noway_assert(((unsigned)hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex);
584
585 tree->gtCSEnum = ((signed char)CSEindex);
586
587#ifdef DEBUG
588 if (verbose)
589 {
590 EXPSET_TP tempMask = BitVecOps::MakeSingleton(cseTraits, genCSEnum2bit(CSEindex));
591 printf("\nCSE candidate #%02u, vn=", CSEindex);
592 vnPrint(key, 0);
593 printf(" cseMask=%s in " FMT_BB ", [cost=%2u, size=%2u]: \n", genES2str(cseTraits, tempMask),
594 compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz);
595 gtDispTree(tree);
596 }
597#endif // DEBUG
598
599 return CSEindex;
600 }
601}
602
603/*****************************************************************************
604 *
605 * Locate CSE candidates and assign indices to them
606 * return 0 if no CSE candidates were found
607 * Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks
608 */
609
610unsigned Compiler::optValnumCSE_Locate()
611{
612 // Locate CSE candidates and assign them indices
613
614 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
615 {
616 GenTree* stmt;
617 GenTree* tree;
618
619 /* Make the block publicly available */
620
621 compCurBB = block;
622
623 /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */
624 /* Everyone who uses these flags are required to clear afterwards */
625 noway_assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
626
627 /* Walk the statement trees in this basic block */
628 for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
629 {
630 noway_assert(stmt->gtOper == GT_STMT);
631
632 /* We walk the tree in the forwards direction (bottom up) */
633 bool stmtHasArrLenCandidate = false;
634 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
635 {
636 if (tree->OperIsCompare() && stmtHasArrLenCandidate)
637 {
638 // Check if this compare is a function of (one of) the checked
639 // bound candidate(s); we may want to update its value number.
640 // if the array length gets CSEd
641 optCseUpdateCheckedBoundMap(tree);
642 }
643
644 if (!optIsCSEcandidate(tree))
645 {
646 continue;
647 }
648
649 if (ValueNumStore::isReservedVN(tree->GetVN(VNK_Liberal)))
650 {
651 continue;
652 }
653
654 // Don't CSE constant values, instead let the Value Number
655 // based Assertion Prop phase handle them. Here, unlike
656 // the rest of optCSE, we use the conservative value number
657 // rather than the liberal one, since the conservative one
658 // is what the Value Number based Assertion Prop will use
659 // and the point is to avoid optimizing cases that it will
660 // handle.
661 //
662 if (vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair)))
663 {
664 continue;
665 }
666
667 /* Assign an index to this expression */
668
669 unsigned CSEindex = optValnumCSE_Index(tree, stmt);
670
671 if (CSEindex != 0)
672 {
673 noway_assert(((unsigned)tree->gtCSEnum) == CSEindex);
674 }
675
676 if (IS_CSE_INDEX(CSEindex) && (tree->OperGet() == GT_ARR_LENGTH))
677 {
678 stmtHasArrLenCandidate = true;
679 }
680 }
681 }
682 }
683
684 /* We're done if there were no interesting expressions */
685
686 if (!optDoCSE)
687 {
688 return 0;
689 }
690
691 /* We're finished building the expression lookup table */
692
693 optCSEstop();
694
695 return 1;
696}
697
698//------------------------------------------------------------------------
699// optCseUpdateCheckedBoundMap: Check if this compare is a tractable function of
700// a checked bound that is a CSE candidate, and insert
701// an entry in the optCseCheckedBoundMap if so. This facilitates
702// subsequently updating the compare's value number if
703// the bound gets CSEd.
704//
705// Arguments:
706// compare - The compare node to check
707
708void Compiler::optCseUpdateCheckedBoundMap(GenTree* compare)
709{
710 assert(compare->OperIsCompare());
711
712 ValueNum compareVN = compare->gtVNPair.GetConservative();
713 VNFuncApp cmpVNFuncApp;
714
715 if (!vnStore->GetVNFunc(compareVN, &cmpVNFuncApp) || (cmpVNFuncApp.m_func != GetVNFuncForNode(compare)))
716 {
717 // Value numbering inferred this compare as something other
718 // than its own operator; leave its value number alone.
719 return;
720 }
721
722 // Now look for a checked bound feeding the compare
723 ValueNumStore::CompareCheckedBoundArithInfo info;
724
725 GenTree* boundParent = nullptr;
726
727 if (vnStore->IsVNCompareCheckedBound(compareVN))
728 {
729 // Simple compare of an bound against something else.
730
731 vnStore->GetCompareCheckedBound(compareVN, &info);
732 boundParent = compare;
733 }
734 else if (vnStore->IsVNCompareCheckedBoundArith(compareVN))
735 {
736 // Compare of a bound +/- some offset to something else.
737
738 GenTree* op1 = compare->gtGetOp1();
739 GenTree* op2 = compare->gtGetOp2();
740
741 vnStore->GetCompareCheckedBoundArithInfo(compareVN, &info);
742 if (GetVNFuncForNode(op1) == (VNFunc)info.arrOper)
743 {
744 // The arithmetic node is the bound's parent.
745 boundParent = op1;
746 }
747 else if (GetVNFuncForNode(op2) == (VNFunc)info.arrOper)
748 {
749 // The arithmetic node is the bound's parent.
750 boundParent = op2;
751 }
752 }
753
754 if (boundParent != nullptr)
755 {
756 GenTree* bound = nullptr;
757
758 // Find which child of boundParent is the bound. Abort if neither
759 // conservative value number matches the one from the compare VN.
760
761 GenTree* child1 = boundParent->gtGetOp1();
762 if ((info.vnBound == child1->gtVNPair.GetConservative()) && IS_CSE_INDEX(child1->gtCSEnum))
763 {
764 bound = child1;
765 }
766 else
767 {
768 GenTree* child2 = boundParent->gtGetOp2();
769 if ((info.vnBound == child2->gtVNPair.GetConservative()) && IS_CSE_INDEX(child2->gtCSEnum))
770 {
771 bound = child2;
772 }
773 }
774
775 if (bound != nullptr)
776 {
777 // Found a checked bound feeding a compare that is a tractable function of it;
778 // record this in the map so we can update the compare VN if the bound
779 // node gets CSEd.
780
781 if (optCseCheckedBoundMap == nullptr)
782 {
783 // Allocate map on first use.
784 optCseCheckedBoundMap = new (getAllocator()) NodeToNodeMap(getAllocator());
785 }
786
787 optCseCheckedBoundMap->Set(bound, compare);
788 }
789 }
790}
791
792/*****************************************************************************
793 *
794 * Compute each blocks bbCseGen
795 * This is the bitset that represents the CSEs that are generated within the block
796 */
797void Compiler::optValnumCSE_InitDataFlow()
798{
799 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
800 {
801 /* Initialize the blocks's bbCseIn set */
802
803 bool init_to_zero = false;
804
805 if (block == fgFirstBB)
806 {
807 /* Clear bbCseIn for the entry block */
808 init_to_zero = true;
809 }
810#if !CSE_INTO_HANDLERS
811 else
812 {
813 if (bbIsHandlerBeg(block))
814 {
815 /* Clear everything on entry to filters or handlers */
816 init_to_zero = true;
817 }
818 }
819#endif
820 if (init_to_zero)
821 {
822 /* Initialize to {ZERO} prior to dataflow */
823 block->bbCseIn = BitVecOps::MakeEmpty(cseTraits);
824 }
825 else
826 {
827 /* Initialize to {ALL} prior to dataflow */
828 block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseFull);
829 }
830
831 block->bbCseOut = BitVecOps::MakeCopy(cseTraits, cseFull);
832
833 /* Initialize to {ZERO} prior to locating the CSE candidates */
834 block->bbCseGen = BitVecOps::MakeEmpty(cseTraits);
835 }
836
837 // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
838 // in the block's bbCseGen bitset
839 //
840 for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++)
841 {
842 CSEdsc* dsc = optCSEtab[cnt];
843 unsigned CSEindex = dsc->csdIndex;
844 treeStmtLst* lst = dsc->csdTreeList;
845 noway_assert(lst);
846
847 while (lst != nullptr)
848 {
849 BasicBlock* block = lst->tslBlock;
850 BitVecOps::AddElemD(cseTraits, block->bbCseGen, genCSEnum2bit(CSEindex));
851 lst = lst->tslNext;
852 }
853 }
854
855#ifdef DEBUG
856 // Dump out the bbCseGen information that we just created
857 //
858 if (verbose)
859 {
860 bool headerPrinted = false;
861 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
862 {
863 if (block->bbCseGen != nullptr)
864 {
865 if (!headerPrinted)
866 {
867 printf("\nBlocks that generate CSE def/uses\n");
868 headerPrinted = true;
869 }
870 printf(FMT_BB, block->bbNum);
871 printf(" cseGen = %s\n", genES2str(cseTraits, block->bbCseGen));
872 }
873 }
874 }
875
876 fgDebugCheckLinks();
877
878#endif // DEBUG
879}
880
881/*****************************************************************************
882 *
883 * CSE Dataflow, so that all helper methods for dataflow are in a single place
884 *
885 */
886class CSE_DataFlow
887{
888 BitVecTraits* m_pBitVecTraits;
889 EXPSET_TP m_preMergeOut;
890
891public:
892 CSE_DataFlow(Compiler* pCompiler) : m_pBitVecTraits(pCompiler->cseTraits), m_preMergeOut(BitVecOps::UninitVal())
893 {
894 }
895
896 // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
897 void StartMerge(BasicBlock* block)
898 {
899 BitVecOps::Assign(m_pBitVecTraits, m_preMergeOut, block->bbCseOut);
900 }
901
902 // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
903 void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
904 {
905 BitVecOps::IntersectionD(m_pBitVecTraits, block->bbCseIn, predBlock->bbCseOut);
906 }
907
908 // At the end of the merge store results of the dataflow equations, in a postmerge state.
909 bool EndMerge(BasicBlock* block)
910 {
911 BitVecOps::DataFlowD(m_pBitVecTraits, block->bbCseOut, block->bbCseGen, block->bbCseIn);
912 return !BitVecOps::Equal(m_pBitVecTraits, block->bbCseOut, m_preMergeOut);
913 }
914};
915
916/*****************************************************************************
917 *
918 * Perform a DataFlow forward analysis using the block CSE bitsets:
919 * Inputs:
920 * bbCseGen - Exact CSEs that are become available within the block
921 * bbCseIn - Maximal estimate of CSEs that are/could be available at input to the block
922 * bbCseOut - Maximal estimate of CSEs that are/could be available at exit to the block
923 *
924 * Outputs:
925 * bbCseIn - Computed CSEs that are available at input to the block
926 * bbCseOut - Computed CSEs that are available at exit to the block
927 */
928
929void Compiler::optValnumCSE_DataFlow()
930{
931 CSE_DataFlow cse(this);
932
933 // Modified dataflow algorithm for available expressions.
934 DataFlow cse_flow(this);
935
936 cse_flow.ForwardAnalysis(cse);
937
938#ifdef DEBUG
939 if (verbose)
940 {
941 printf("\nAfter performing DataFlow for ValnumCSE's\n");
942
943 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
944 {
945 printf(FMT_BB, block->bbNum);
946 printf(" cseIn = %s", genES2str(cseTraits, block->bbCseIn));
947 printf(" cseOut = %s", genES2str(cseTraits, block->bbCseOut));
948 printf("\n");
949 }
950
951 printf("\n");
952 }
953#endif // DEBUG
954}
955
956//---------------------------------------------------------------------------
957// optValnumCSE_Availablity:
958//
959// Using the information computed by CSE_DataFlow determine for each
960// CSE whether the CSE is a definition (if the CSE was not available)
961// or if the CSE is a use (if the CSE was previously made available)
962// The implementation iterates of all blocks setting 'available_cses'
963// to the CSEs that are available at input to the block.
964// When a CSE expression is encountered it is classified as either
965// as a definition (if the CSE is not in the 'available_cses' set) or
966// as a use (if the CSE is in the 'available_cses' set). If the CSE
967// is a definition then it is added to the 'available_cses' set.
968//
969// This algorithm uncovers the defs and uses gradually and as it does
970// so it also builds the exception set that all defs make: 'defExcSetCurrent'
971// and the exception set that the uses we have seen depend upon: 'defExcSetPromise'
972//
973// Typically expressions with the same normal ValueNum generate exactly the
974// same exception sets. There are two way that we can get different exception
975// sets with the same Normal value number.
976//
977// 1. We used an arithmetic identiity:
978// e.g. (p.a + q.b) * 0 :: The normal value for the expression is zero
979// and we have NullPtrExc(p) and NullPtrExc(q)
980// e.g. (p.a - p.a) :: The normal value for the expression is zero
981// and we have NullPtrExc(p)
982// 2. We stored an expression into a LclVar or into Memory and read it later
983// e.g. t = p.a;
984// e1 = (t + q.b) :: e1 has one NullPtrExc and e2 has two.
985// e2 = (p.a + q.b) but both compute the same normal value//
986// e.g. m.a = p.a;
987// e1 = (m.a + q.b) :: e1 and e2 have different exception sets.
988// e2 = (p.a + q.b) but both compute the same normal value
989//
990//
991void Compiler::optValnumCSE_Availablity()
992{
993#ifdef DEBUG
994 if (verbose)
995 {
996 printf("Labeling the CSEs with Use/Def information\n");
997 }
998#endif
999 EXPSET_TP available_cses = BitVecOps::MakeEmpty(cseTraits);
1000
1001 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
1002 {
1003 GenTree* stmt;
1004 GenTree* tree;
1005
1006 // Make the block publicly available
1007
1008 compCurBB = block;
1009
1010 // Retrieve the available CSE's at the start of this block
1011
1012 BitVecOps::Assign(cseTraits, available_cses, block->bbCseIn);
1013
1014 optCSEweight = block->getBBWeight(this);
1015
1016 // Walk the statement trees in this basic block
1017
1018 for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
1019 {
1020 noway_assert(stmt->gtOper == GT_STMT);
1021
1022 // We walk the tree in the forwards direction (bottom up)
1023
1024 for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
1025 {
1026 if (IS_CSE_INDEX(tree->gtCSEnum))
1027 {
1028 unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
1029 unsigned int cseBit = genCSEnum2bit(CSEnum);
1030 CSEdsc* desc = optCSEfindDsc(CSEnum);
1031 unsigned stmw = block->getBBWeight(this);
1032 bool isUse = BitVecOps::IsMember(cseTraits, available_cses, cseBit);
1033 bool isDef = !isUse; // If is isn't a CSE use, it is a CSE def
1034#ifdef DEBUG
1035 VNFuncApp excSeq;
1036
1037 if (verbose)
1038 {
1039 printf("BB%02u ", block->bbNum);
1040 printTreeID(tree);
1041
1042 printf(" %s of CSE #%02u [weight=%s]\n", isUse ? "Use" : "Def", CSEnum, refCntWtd2str(stmw));
1043 }
1044#endif
1045 // Have we decided to abandon work on this CSE?
1046 if (desc->defExcSetPromise == ValueNumStore::NoVN)
1047 {
1048 // This candidate had defs with differing liberal exc set VNs
1049 // We have abandoned CSE promotion for this candidate
1050
1051 // Clear the CSE flag
1052 tree->gtCSEnum = NO_CSE;
1053
1054 JITDUMP(" Abandoned - CSE candidate has defs with different exception sets!\n");
1055 continue;
1056 }
1057
1058 // Record the exception set for tree's liberal value number
1059 //
1060 ValueNum theLiberalExcSet = vnStore->VNExceptionSet(tree->gtVNPair.GetLiberal());
1061
1062 // Is this a CSE use or a def?
1063
1064 if (isDef)
1065 {
1066 // @ToDo - Remove this block as it no longer applies
1067 if (tree->gtFlags & GTF_COLON_COND)
1068 {
1069 // We can't create CSE definitions inside QMARK-COLON trees
1070 tree->gtCSEnum = NO_CSE;
1071
1072 JITDUMP(" NO_CSE - This CSE def occurs in a GTF_COLON_COND!\n");
1073 continue;
1074 }
1075
1076 // This is a CSE def
1077
1078 // Is defExcSetCurrent still set to the uninit marker value of VNForNull() ?
1079 if (desc->defExcSetCurrent == vnStore->VNForNull())
1080 {
1081 // This is the first time visited, so record this defs exeception set
1082 desc->defExcSetCurrent = theLiberalExcSet;
1083 }
1084
1085 // Have we seen a CSE use and made a promise of an exception set?
1086 //
1087 if (desc->defExcSetPromise != vnStore->VNForEmptyExcSet())
1088 {
1089 // The exeception set held in desc->defExcSetPromise must be a subset of theLiberalExcSet
1090 //
1091 if (vnStore->VNExcIsSubset(theLiberalExcSet, desc->defExcSetPromise))
1092 {
1093 // This new def still satisfies any promise made to all the CSE uses that we have
1094 // encountered
1095 //
1096
1097 // no update is needed when these are the same VN
1098 if (desc->defExcSetCurrent != theLiberalExcSet)
1099 {
1100 // We will change the value of desc->defExcSetCurrent to be the intersection of
1101 // these two sets.
1102 // This is the set of exceptions that all CSE defs have (that we have visted so far)
1103 //
1104 ValueNum intersectionExcSet =
1105 vnStore->VNExcSetIntersection(desc->defExcSetCurrent, theLiberalExcSet);
1106#ifdef DEBUG
1107 if (this->verbose)
1108 {
1109 vnStore->GetVNFunc(desc->defExcSetCurrent, &excSeq);
1110 printf(">>> defExcSetCurrent is ");
1111 vnStore->vnDumpExcSeq(this, &excSeq, true);
1112 printf("\n");
1113
1114 vnStore->GetVNFunc(theLiberalExcSet, &excSeq);
1115 printf(">>> theLiberalExcSet is ");
1116 vnStore->vnDumpExcSeq(this, &excSeq, true);
1117 printf("\n");
1118
1119 if (intersectionExcSet == vnStore->VNForEmptyExcSet())
1120 {
1121 printf(">>> the intersectionExcSet is the EmptyExcSet\n");
1122 }
1123 else
1124 {
1125 vnStore->GetVNFunc(intersectionExcSet, &excSeq);
1126 printf(">>> the intersectionExcSet is ");
1127 vnStore->vnDumpExcSeq(this, &excSeq, true);
1128 printf("\n");
1129 }
1130 }
1131#endif // DEBUG
1132 // Change the defExcSetCurrent to be a subset of its prior value
1133 //
1134 assert(vnStore->VNExcIsSubset(desc->defExcSetCurrent, intersectionExcSet));
1135 desc->defExcSetCurrent = intersectionExcSet;
1136 }
1137 }
1138 else // This CSE def doesn't satisfy one of the exceptions already promised to a CSE use
1139 {
1140 // So, we will abandon all CSE promotions for this candidate
1141 //
1142 // We use the marker value of NoVN to indicate that we
1143 // should abandon this CSE candidate
1144 //
1145 desc->defExcSetPromise = ValueNumStore::NoVN;
1146 tree->gtCSEnum = NO_CSE;
1147
1148 JITDUMP(" Abandon - CSE candidate has defs with exception sets that do not satisfy "
1149 "some CSE use\n");
1150 continue;
1151 }
1152 }
1153
1154 // Record or update the value of desc->defConservNormVN
1155 //
1156 ValueNum theConservNormVN = vnStore->VNConservativeNormalValue(tree->gtVNPair);
1157
1158 // Is defConservNormVN still set to the uninit marker value of VNForNull() ?
1159 if (desc->defConservNormVN == vnStore->VNForNull())
1160 {
1161 // This is the first def that we have visited, set defConservNormVN
1162 desc->defConservNormVN = theConservNormVN;
1163 }
1164 else
1165 {
1166 // Check to see if all defs have the same conservative normal VN
1167 if (theConservNormVN != desc->defConservNormVN)
1168 {
1169 // This candidate has defs with differing conservative normal VNs, mark it with NoVN
1170 desc->defConservNormVN = ValueNumStore::NoVN; // record the marker for differing VNs
1171 }
1172 }
1173
1174 // If we get here we have accepted this node as a valid CSE def
1175
1176 desc->csdDefCount += 1;
1177 desc->csdDefWtCnt += stmw;
1178
1179 // Mark the node as a CSE definition
1180
1181 tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum);
1182
1183 // This CSE becomes available after this def
1184 BitVecOps::AddElemD(cseTraits, available_cses, cseBit);
1185 }
1186 else // We are visiting a CSE use
1187 {
1188 assert(isUse);
1189
1190 // If the CSE use has no requirements for an exception set then we don't have to do anything
1191 // here
1192 //
1193 if (theLiberalExcSet != vnStore->VNForEmptyExcSet())
1194 {
1195 // Are we visiting a use first, before visiting any defs of this CSE?
1196 // This is an atypical case that can occur with a bottom tested loop.
1197 //
1198 // Is defExcSetCurrent still set to the uninit marker value of VNForNull() ?
1199 if (desc->defExcSetCurrent == vnStore->VNForNull())
1200 {
1201 // Update defExcSetPromise, this is our required exception set for all CSE defs
1202 // that we encounter later.
1203 //
1204 // We could see multiple uses before a def, so we require the Union of all exception
1205 // sets
1206 //
1207 desc->defExcSetPromise =
1208 vnStore->VNExcSetUnion(desc->defExcSetPromise, theLiberalExcSet);
1209 }
1210 else // we have already seen a def for this CSE and defExcSetCurrent is setup
1211 {
1212 if (vnStore->VNExcIsSubset(desc->defExcSetCurrent, theLiberalExcSet))
1213 {
1214 // The current set of exceptions produced by all CSE defs have (that we have visted
1215 // so far)
1216 // meets our requirement
1217 //
1218 // Add any exception items to the defExcSetPromise set
1219 //
1220 desc->defExcSetPromise =
1221 vnStore->VNExcSetUnion(desc->defExcSetPromise, theLiberalExcSet);
1222 }
1223 }
1224
1225 // At this point defExcSetPromise contains all of the exception items that we can promise
1226 // here.
1227 //
1228 if (!vnStore->VNExcIsSubset(desc->defExcSetPromise, theLiberalExcSet))
1229 {
1230 // We can't safely make this into a CSE use, because this
1231 // CSE use has an exeception set item that is not promised
1232 // by all of our CSE defs.
1233 //
1234 // We will omit this CSE use from the graph and proceed,
1235 // the other uses and defs can still participate in the CSE optimization.
1236
1237 // So this can't be a CSE use
1238 tree->gtCSEnum = NO_CSE;
1239
1240 JITDUMP(
1241 " NO_CSE - This use has an exception set item that isn't contained in the defs!\n");
1242 continue;
1243 }
1244 }
1245
1246 // When we get here we have accepted this node as a valid CSE use
1247
1248 desc->csdUseCount += 1;
1249 desc->csdUseWtCnt += stmw;
1250 }
1251 }
1252 }
1253 }
1254 }
1255}
1256
1257// The following class handles the CSE heuristics
1258// we use a complex set of heuristic rules
1259// to determine if it is likely to be profitable to perform this CSE
1260//
1261class CSE_Heuristic
1262{
1263 Compiler* m_pCompiler;
1264 unsigned m_addCSEcount;
1265
1266 unsigned aggressiveRefCnt;
1267 unsigned moderateRefCnt;
1268 unsigned enregCount; // count of the number of enregisterable variables
1269 bool largeFrame;
1270 bool hugeFrame;
1271 Compiler::codeOptimize codeOptKind;
1272 Compiler::CSEdsc** sortTab;
1273 size_t sortSiz;
1274#ifdef DEBUG
1275 CLRRandom m_cseRNG;
1276 unsigned m_bias;
1277#endif
1278
1279public:
1280 CSE_Heuristic(Compiler* pCompiler) : m_pCompiler(pCompiler)
1281 {
1282 codeOptKind = m_pCompiler->compCodeOpt();
1283 }
1284
1285 Compiler::codeOptimize CodeOptKind()
1286 {
1287 return codeOptKind;
1288 }
1289
1290 // Perform the Initialization step for our CSE Heuristics
1291 // determine the various cut off values to use for
1292 // the aggressive, moderate and conservative CSE promotions
1293 // count the number of enregisterable variables
1294 // determine if the method has a large or huge stack frame.
1295 //
1296 void Initialize()
1297 {
1298 m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */
1299
1300 // Record the weighted ref count of the last "for sure" callee saved LclVar
1301 aggressiveRefCnt = 0;
1302 moderateRefCnt = 0;
1303 enregCount = 0;
1304 largeFrame = false;
1305 hugeFrame = false;
1306 sortTab = nullptr;
1307 sortSiz = 0;
1308
1309#ifdef _TARGET_XARCH_
1310 if (m_pCompiler->compLongUsed)
1311 {
1312 enregCount++;
1313 }
1314#endif
1315
1316 unsigned frameSize = 0;
1317 unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1);
1318 unsigned lclNum;
1319 LclVarDsc* varDsc;
1320
1321 for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++)
1322 {
1323 if (varDsc->lvRefCnt() == 0)
1324 {
1325 continue;
1326 }
1327
1328#if FEATURE_FIXED_OUT_ARGS
1329 // Skip the OutgoingArgArea in computing frame size, since
1330 // its size is not yet known and it doesn't affect local
1331 // offsets from the frame pointer (though it may affect
1332 // them from the stack pointer).
1333 noway_assert(m_pCompiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
1334 if (lclNum == m_pCompiler->lvaOutgoingArgSpaceVar)
1335 {
1336 continue;
1337 }
1338#endif // FEATURE_FIXED_OUT_ARGS
1339
1340 bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home
1341
1342 // Some LclVars always have stack homes
1343 if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK))
1344 {
1345 onStack = true;
1346 }
1347
1348#ifdef _TARGET_X86_
1349 // Treat floating point and 64 bit integers as always on the stack
1350 if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet()))
1351 onStack = true;
1352#endif
1353
1354 if (onStack)
1355 {
1356 frameSize += m_pCompiler->lvaLclSize(lclNum);
1357 }
1358 else
1359 {
1360 // For the purposes of estimating the frameSize we
1361 // will consider this LclVar as being enregistered.
1362 // Now we reduce the remaining regAvailEstimate by
1363 // an appropriate amount.
1364 if (varDsc->lvRefCnt() <= 2)
1365 {
1366 // a single use single def LclVar only uses 1
1367 regAvailEstimate -= 1;
1368 }
1369 else
1370 {
1371 // a LclVar with multiple uses and defs uses 2
1372 if (regAvailEstimate >= 2)
1373 {
1374 regAvailEstimate -= 2;
1375 }
1376 else
1377 {
1378 // Don't try to subtract when regAvailEstimate is 1
1379 regAvailEstimate = 0;
1380 }
1381 }
1382 }
1383#ifdef _TARGET_XARCH_
1384 if (frameSize > 0x080)
1385 {
1386 // We likely have a large stack frame.
1387 // Thus we might need to use large displacements when loading or storing
1388 // to CSE LclVars that are not enregistered
1389 largeFrame = true;
1390 break; // early out, we don't need to keep increasing frameSize
1391 }
1392#else // _TARGET_ARM_
1393 if (frameSize > 0x0400)
1394 {
1395 largeFrame = true;
1396 }
1397 if (frameSize > 0x10000)
1398 {
1399 hugeFrame = true;
1400 break;
1401 }
1402#endif
1403 }
1404
1405 unsigned sortNum = 0;
1406 while (sortNum < m_pCompiler->lvaTrackedCount)
1407 {
1408 LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++];
1409 var_types varTyp = varDsc->TypeGet();
1410
1411 if (varDsc->lvDoNotEnregister)
1412 {
1413 continue;
1414 }
1415
1416 if (!varTypeIsFloating(varTyp))
1417 {
1418 // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior.
1419 // Note that this makes genTypeStSz return 1.
1420 if (varTypeIsStruct(varTyp))
1421 {
1422 varTyp = TYP_STRUCT;
1423 }
1424 enregCount += genTypeStSz(varTyp);
1425 }
1426
1427 if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG * 3 / 2)))
1428 {
1429 if (CodeOptKind() == Compiler::SMALL_CODE)
1430 {
1431 aggressiveRefCnt = varDsc->lvRefCnt() + BB_UNITY_WEIGHT;
1432 }
1433 else
1434 {
1435 aggressiveRefCnt = varDsc->lvRefCntWtd() + BB_UNITY_WEIGHT;
1436 }
1437 }
1438 if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2))))
1439 {
1440 if (CodeOptKind() == Compiler::SMALL_CODE)
1441 {
1442 moderateRefCnt = varDsc->lvRefCnt();
1443 }
1444 else
1445 {
1446 moderateRefCnt = varDsc->lvRefCntWtd();
1447 }
1448 }
1449 }
1450 unsigned mult = 3;
1451 // use smaller value for mult when enregCount is in [0..4]
1452 if (enregCount <= 4)
1453 {
1454 mult = (enregCount <= 2) ? 1 : 2;
1455 }
1456
1457 aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt);
1458 moderateRefCnt = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt);
1459
1460#ifdef DEBUG
1461 if (m_pCompiler->verbose)
1462 {
1463 printf("\n");
1464 printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
1465 printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
1466 printf("Framesize estimate is 0x%04X\n", frameSize);
1467 printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
1468 }
1469#endif
1470 }
1471
1472 void SortCandidates()
1473 {
1474 /* Create an expression table sorted by decreasing cost */
1475 sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount];
1476
1477 sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab);
1478 memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
1479
1480 if (CodeOptKind() == Compiler::SMALL_CODE)
1481 {
1482 qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz);
1483 }
1484 else
1485 {
1486 qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx);
1487 }
1488
1489#ifdef DEBUG
1490 if (m_pCompiler->verbose)
1491 {
1492 printf("\nSorted CSE candidates:\n");
1493 /* Print out the CSE candidates */
1494 EXPSET_TP tempMask;
1495 for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++)
1496 {
1497 Compiler::CSEdsc* dsc = sortTab[cnt];
1498 GenTree* expr = dsc->csdTree;
1499
1500 unsigned def;
1501 unsigned use;
1502
1503 if (CodeOptKind() == Compiler::SMALL_CODE)
1504 {
1505 def = dsc->csdDefCount; // def count
1506 use = dsc->csdUseCount; // use count (excluding the implicit uses at defs)
1507 }
1508 else
1509 {
1510 def = dsc->csdDefWtCnt; // weighted def count
1511 use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
1512 }
1513
1514 tempMask = BitVecOps::MakeSingleton(m_pCompiler->cseTraits, genCSEnum2bit(dsc->csdIndex));
1515 printf("CSE #%02u, {$%-3x, $%-3x} cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex,
1516 dsc->csdHashKey, dsc->defExcSetPromise, genES2str(m_pCompiler->cseTraits, tempMask),
1517 dsc->csdUseCount, def, use);
1518 printf("] :: ");
1519 m_pCompiler->gtDispTree(expr, nullptr, nullptr, true);
1520 }
1521 printf("\n");
1522 }
1523#endif // DEBUG
1524 }
1525
1526 // The following class nested within CSE_Heuristic encapsulates the information
1527 // about the current CSE candidate that is under consideration
1528 //
1529 // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation
1530 // and needs to be reworked for the Value Number based implementation
1531 //
1532 class CSE_Candidate
1533 {
1534 CSE_Heuristic* m_context;
1535 Compiler::CSEdsc* m_CseDsc;
1536
1537 unsigned m_cseIndex;
1538
1539 unsigned m_defCount;
1540 unsigned m_useCount;
1541
1542 unsigned m_Cost;
1543 unsigned m_Size;
1544
1545 public:
1546 CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) : m_context(context), m_CseDsc(cseDsc)
1547 {
1548 m_cseIndex = m_CseDsc->csdIndex;
1549 }
1550
1551 Compiler::CSEdsc* CseDsc()
1552 {
1553 return m_CseDsc;
1554 }
1555 unsigned CseIndex()
1556 {
1557 return m_cseIndex;
1558 }
1559 unsigned DefCount()
1560 {
1561 return m_defCount;
1562 }
1563 unsigned UseCount()
1564 {
1565 return m_useCount;
1566 }
1567 // TODO-CQ: With ValNum CSE's the Expr and its cost can vary.
1568 GenTree* Expr()
1569 {
1570 return m_CseDsc->csdTree;
1571 }
1572 unsigned Cost()
1573 {
1574 return m_Cost;
1575 }
1576 unsigned Size()
1577 {
1578 return m_Size;
1579 }
1580
1581 bool LiveAcrossCall()
1582 {
1583 return (m_CseDsc->csdLiveAcrossCall != 0);
1584 }
1585
1586 void InitializeCounts()
1587 {
1588 if (m_context->CodeOptKind() == Compiler::SMALL_CODE)
1589 {
1590 m_Cost = Expr()->gtCostSz; // the estimated code size
1591 m_Size = Expr()->gtCostSz; // always the gtCostSz
1592 m_defCount = m_CseDsc->csdDefCount; // def count
1593 m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs)
1594 }
1595 else
1596 {
1597 m_Cost = Expr()->gtCostEx; // the estimated execution cost
1598 m_Size = Expr()->gtCostSz; // always the gtCostSz
1599 m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count
1600 m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
1601 }
1602 }
1603 };
1604
1605#ifdef DEBUG
1606 //------------------------------------------------------------------------
1607 // optConfigBiasedCSE:
1608 // Stress mode to shuffle the decision to CSE or not using environment
1609 // variable COMPlus_JitStressBiasedCSE (= 0 to 100%). When the bias value
1610 // is not specified but COMPlus_JitStress is ON, generate a random bias.
1611 //
1612 // Return Value:
1613 // 0 -- This method is indifferent about this CSE (no bias specified and no stress)
1614 // 1 -- This CSE must be performed to maintain specified/generated bias.
1615 // -1 -- This CSE mustn't be performed to maintain specified/generated bias.
1616 //
1617 // Operation:
1618 // A debug stress only method that returns "1" with probability (P)
1619 // defined by:
1620 //
1621 // P = (COMPlus_JitStressBiasedCSE / 100) (or)
1622 // P = (random(100) / 100) when COMPlus_JitStress is specified and
1623 // COMPlus_JitStressBiasedCSE is unspecified.
1624 //
1625 // When specified, the bias is reinterpreted as a decimal number between 0
1626 // to 100.
1627 // When bias is not specified, a bias is randomly generated if COMPlus_JitStress
1628 // is non-zero.
1629 //
1630 // Callers are supposed to call this method for each CSE promotion decision
1631 // and ignore the call if return value is 0 and honor the 1 with a CSE and
1632 // -1 with a no-CSE to maintain the specified/generated bias.
1633 //
1634 int optConfigBiasedCSE()
1635 {
1636 // Seed the PRNG, if never done before.
1637 if (!m_cseRNG.IsInitialized())
1638 {
1639 m_cseRNG.Init(m_pCompiler->info.compMethodHash());
1640 m_bias = m_cseRNG.Next(100);
1641 }
1642
1643 // Obtain the bias value and reinterpret as decimal.
1644 unsigned bias = ReinterpretHexAsDecimal(JitConfig.JitStressBiasedCSE());
1645
1646 // Invalid value, check if JitStress is ON.
1647 if (bias > 100)
1648 {
1649 if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT))
1650 {
1651 // JitStress is OFF for CSE, nothing to do.
1652 return 0;
1653 }
1654 bias = m_bias;
1655 JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n", bias);
1656 }
1657
1658 // Generate a number between (0, 99) and if the generated
1659 // number is smaller than bias, then perform CSE.
1660 unsigned gen = m_cseRNG.Next(100);
1661 int ret = (gen < bias) ? 1 : -1;
1662
1663 if (m_pCompiler->verbose)
1664 {
1665 if (ret < 0)
1666 {
1667 printf("No CSE because gen=%d >= bias=%d\n", gen, bias);
1668 }
1669 else
1670 {
1671 printf("Promoting CSE because gen=%d < bias=%d\n", gen, bias);
1672 }
1673 }
1674
1675 // Indicate whether to perform CSE or not.
1676 return ret;
1677 }
1678#endif
1679
1680 // Given a CSE candidate decide whether it passes or fails the profitability heuristic
1681 // return true if we believe that it is profitable to promote this candidate to a CSE
1682 //
1683 bool PromotionCheck(CSE_Candidate* candidate)
1684 {
1685 bool result = false;
1686
1687#ifdef DEBUG
1688 int stressResult = optConfigBiasedCSE();
1689 if (stressResult != 0)
1690 {
1691 // Stress is enabled. Check whether to perform CSE or not.
1692 return (stressResult > 0);
1693 }
1694
1695 if (m_pCompiler->optConfigDisableCSE2())
1696 {
1697 return false; // skip this CSE
1698 }
1699#endif
1700
1701 /*
1702 Our calculation is based on the following cost estimate formula
1703
1704 Existing costs are:
1705
1706 (def + use) * cost
1707
1708 If we introduce a CSE temp are each definition and
1709 replace the use with a CSE temp then our cost is:
1710
1711 (def * (cost + cse-def-cost)) + (use * cse-use-cost)
1712
1713 We must estimate the values to use for cse-def-cost and cse-use-cost
1714
1715 If we are able to enregister the CSE then the cse-use-cost is one
1716 and cse-def-cost is either zero or one. Zero in the case where
1717 we needed to evaluate the def into a register and we can use that
1718 register as the CSE temp as well.
1719
1720 If we are unable to enregister the CSE then the cse-use-cost is IND_COST
1721 and the cse-def-cost is also IND_COST.
1722
1723 If we want to be conservative we use IND_COST as the the value
1724 for both cse-def-cost and cse-use-cost and then we never introduce
1725 a CSE that could pessimize the execution time of the method.
1726
1727 If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the
1728 values for both cse-def-cost and cse-use-cost.
1729
1730 If we want to be aggressive we use 1 as the values for both
1731 cse-def-cost and cse-use-cost.
1732
1733 If we believe that the CSE very valuable in terms of weighted ref counts
1734 such that it would always be enregistered by the register allocator we choose
1735 the aggressive use def costs.
1736
1737 If we believe that the CSE is somewhat valuable in terms of weighted ref counts
1738 such that it could be likely be enregistered by the register allocator we choose
1739 the moderate use def costs.
1740
1741 otherwise we choose the conservative use def costs.
1742
1743 */
1744
1745 unsigned cse_def_cost;
1746 unsigned cse_use_cost;
1747
1748 unsigned no_cse_cost = 0;
1749 unsigned yes_cse_cost = 0;
1750 unsigned extra_yes_cost = 0;
1751 unsigned extra_no_cost = 0;
1752
1753 // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar
1754 // Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar
1755 unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
1756
1757 if (CodeOptKind() == Compiler::SMALL_CODE)
1758 {
1759 if (cseRefCnt >= aggressiveRefCnt)
1760 {
1761#ifdef DEBUG
1762 if (m_pCompiler->verbose)
1763 {
1764 printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
1765 }
1766#endif
1767 cse_def_cost = 1;
1768 cse_use_cost = 1;
1769
1770 if (candidate->LiveAcrossCall() != 0)
1771 {
1772 if (largeFrame)
1773 {
1774 cse_def_cost++;
1775 cse_use_cost++;
1776 }
1777 if (hugeFrame)
1778 {
1779 cse_def_cost++;
1780 cse_use_cost++;
1781 }
1782 }
1783 }
1784 else if (largeFrame)
1785 {
1786#ifdef DEBUG
1787 if (m_pCompiler->verbose)
1788 {
1789 printf("Codesize CSE Promotion (large frame)\n");
1790 }
1791#endif
1792#ifdef _TARGET_XARCH_
1793 /* The following formula is good choice when optimizing CSE for SMALL_CODE */
1794 cse_def_cost = 6; // mov [EBP-0x00001FC],reg
1795 cse_use_cost = 5; // [EBP-0x00001FC]
1796#else // _TARGET_ARM_
1797 if (hugeFrame)
1798 {
1799 cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
1800 cse_use_cost = 12;
1801 }
1802 else
1803 {
1804 cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
1805 cse_use_cost = 8;
1806 }
1807#endif
1808 }
1809 else // small frame
1810 {
1811#ifdef DEBUG
1812 if (m_pCompiler->verbose)
1813 {
1814 printf("Codesize CSE Promotion (small frame)\n");
1815 }
1816#endif
1817#ifdef _TARGET_XARCH_
1818 /* The following formula is good choice when optimizing CSE for SMALL_CODE */
1819 cse_def_cost = 3; // mov [EBP-1C],reg
1820 cse_use_cost = 2; // [EBP-1C]
1821#else // _TARGET_ARM_
1822 cse_def_cost = 2; // str reg,[sp+0x9c]
1823 cse_use_cost = 2; // ldr reg,[sp+0x9c]
1824#endif
1825 }
1826 }
1827 else // not SMALL_CODE ...
1828 {
1829 if (cseRefCnt >= aggressiveRefCnt)
1830 {
1831#ifdef DEBUG
1832 if (m_pCompiler->verbose)
1833 {
1834 printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
1835 }
1836#endif
1837 cse_def_cost = 1;
1838 cse_use_cost = 1;
1839 }
1840 else if (cseRefCnt >= moderateRefCnt)
1841 {
1842
1843 if (candidate->LiveAcrossCall() == 0)
1844 {
1845#ifdef DEBUG
1846 if (m_pCompiler->verbose)
1847 {
1848 printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt,
1849 moderateRefCnt);
1850 }
1851#endif
1852 cse_def_cost = 2;
1853 cse_use_cost = 1;
1854 }
1855 else // candidate is live across call
1856 {
1857#ifdef DEBUG
1858 if (m_pCompiler->verbose)
1859 {
1860 printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
1861 }
1862#endif
1863 cse_def_cost = 2;
1864 cse_use_cost = 2;
1865 extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
1866 // saved register
1867 }
1868 }
1869 else // Conservative CSE promotion
1870 {
1871 if (candidate->LiveAcrossCall() == 0)
1872 {
1873#ifdef DEBUG
1874 if (m_pCompiler->verbose)
1875 {
1876 printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n", cseRefCnt,
1877 moderateRefCnt);
1878 }
1879#endif
1880 cse_def_cost = 2;
1881 cse_use_cost = 2;
1882 }
1883 else // candidate is live across call
1884 {
1885#ifdef DEBUG
1886 if (m_pCompiler->verbose)
1887 {
1888 printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
1889 }
1890#endif
1891 cse_def_cost = 3;
1892 cse_use_cost = 3;
1893 extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
1894 // saved register
1895 }
1896
1897 // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
1898 if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
1899 {
1900 cse_def_cost++;
1901 cse_use_cost++;
1902 }
1903 }
1904
1905 if (largeFrame)
1906 {
1907 cse_def_cost++;
1908 cse_use_cost++;
1909 }
1910 if (hugeFrame)
1911 {
1912 cse_def_cost++;
1913 cse_use_cost++;
1914 }
1915 }
1916
1917 // estimate the cost from lost codesize reduction if we do not perform the CSE
1918 if (candidate->Size() > cse_use_cost)
1919 {
1920 Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the
1921 // weighted count
1922 extra_no_cost = candidate->Size() - cse_use_cost;
1923 extra_no_cost = extra_no_cost * dsc->csdUseCount * 2;
1924 }
1925
1926 /* no_cse_cost is the cost estimate when we decide not to make a CSE */
1927 /* yes_cse_cost is the cost estimate when we decide to make a CSE */
1928
1929 no_cse_cost = candidate->UseCount() * candidate->Cost();
1930 yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost);
1931
1932 no_cse_cost += extra_no_cost;
1933 yes_cse_cost += extra_yes_cost;
1934
1935#ifdef DEBUG
1936 if (m_pCompiler->verbose)
1937 {
1938 printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt,
1939 moderateRefCnt);
1940 printf("defCnt=%d, useCnt=%d, cost=%d, size=%d\n", candidate->DefCount(), candidate->UseCount(),
1941 candidate->Cost(), candidate->Size());
1942 printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost,
1943 extra_no_cost, extra_yes_cost);
1944
1945 printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
1946 (no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
1947 }
1948#endif
1949
1950 // Should we make this candidate into a CSE?
1951 // Is the yes cost less than the no cost
1952 //
1953 if (yes_cse_cost <= no_cse_cost)
1954 {
1955 result = true; // Yes make this a CSE
1956 }
1957 else
1958 {
1959 /* In stress mode we will make some extra CSEs */
1960 if (no_cse_cost > 0)
1961 {
1962 int percentage = (no_cse_cost * 100) / yes_cse_cost;
1963
1964 if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
1965 {
1966 result = true; // Yes make this a CSE
1967 }
1968 }
1969 }
1970
1971 return result;
1972 }
1973
1974 // PerformCSE() takes a successful candidate and performs the appropriate replacements:
1975 //
1976 // It will replace all of the CSE defs with assignments to a new "cse0" LclVar
1977 // and will replace all of the CSE uses with reads of the "cse0" LclVar
1978 //
1979 void PerformCSE(CSE_Candidate* successfulCandidate)
1980 {
1981 unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
1982
1983 if (successfulCandidate->LiveAcrossCall() != 0)
1984 {
1985 // As we introduce new LclVars for these CSE we slightly
1986 // increase the cutoffs for aggressive and moderate CSE's
1987 //
1988 int incr = BB_UNITY_WEIGHT;
1989
1990 if (cseRefCnt > aggressiveRefCnt)
1991 {
1992 aggressiveRefCnt += incr;
1993 }
1994
1995 if (cseRefCnt > moderateRefCnt)
1996 {
1997 moderateRefCnt += (incr / 2);
1998 }
1999 }
2000
2001 /* Introduce a new temp for the CSE */
2002
2003 // we will create a long lifetime temp for the new cse LclVar
2004 unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE"));
2005 var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
2006 if (varTypeIsStruct(cseLclVarTyp))
2007 {
2008 m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false);
2009 }
2010 m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp;
2011 m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
2012
2013 // Record that we created a new LclVar for use as a CSE temp
2014 m_addCSEcount++;
2015 m_pCompiler->optCSEcount++;
2016
2017 // Walk all references to this CSE, adding an assignment
2018 // to the CSE temp to all defs and changing all refs to
2019 // a simple use of the CSE temp.
2020 //
2021 // Later we will unmark any nested CSE's for the CSE uses.
2022 //
2023 Compiler::CSEdsc* dsc = successfulCandidate->CseDsc();
2024 Compiler::treeStmtLst* lst;
2025
2026#ifdef DEBUG
2027 // Verify that all of the ValueNumbers in this list are correct as
2028 // Morph will change them when it performs a mutating operation.
2029 //
2030 ValueNum firstVN = ValueNumStore::NoVN;
2031 ValueNum currVN;
2032 bool allSame = true;
2033
2034 lst = dsc->csdTreeList;
2035 while (lst != nullptr)
2036 {
2037 // Ignore this node if the gtCSEnum value has been cleared
2038 if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
2039 {
2040 // We used the liberal Value numbers when building the set of CSE
2041 currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair);
2042 assert(currVN != ValueNumStore::NoVN);
2043
2044 if (firstVN == ValueNumStore::NoVN)
2045 {
2046 firstVN = currVN;
2047 }
2048 else if (currVN != firstVN)
2049 {
2050 allSame = false;
2051 break;
2052 }
2053 }
2054 lst = lst->tslNext;
2055 }
2056 if (!allSame)
2057 {
2058 lst = dsc->csdTreeList;
2059 GenTree* firstTree = lst->tslTree;
2060 printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", m_pCompiler->info.compFullName,
2061 GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet()));
2062 while (lst != nullptr)
2063 {
2064 if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
2065 {
2066 currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair);
2067 printf("0x%x(%s " FMT_VN ") ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "use" : "def",
2068 currVN);
2069 }
2070 lst = lst->tslNext;
2071 }
2072 printf("\n");
2073 }
2074#endif // DEBUG
2075
2076 // Setup 'lst' to point at the start of this candidate list
2077 lst = dsc->csdTreeList;
2078 noway_assert(lst);
2079
2080 do
2081 {
2082 /* Process the next node in the list */
2083 GenTree* exp = lst->tslTree;
2084 GenTree* stm = lst->tslStmt;
2085 noway_assert(stm->gtOper == GT_STMT);
2086 BasicBlock* blk = lst->tslBlock;
2087
2088 /* Advance to the next node in the list */
2089 lst = lst->tslNext;
2090
2091 // We may have cleared this CSE in optValuenumCSE_Availablity
2092 // due to different exception sets.
2093 //
2094 // Ignore this node if the gtCSEnum value has been cleared
2095 if (!IS_CSE_INDEX(exp->gtCSEnum))
2096 {
2097 continue;
2098 }
2099
2100 // Assert if we used DEBUG_DESTROY_NODE on this CSE exp
2101 assert(exp->gtOper != GT_COUNT);
2102
2103 /* Make sure we update the weighted ref count correctly */
2104 m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler);
2105
2106 /* Figure out the actual type of the value */
2107 var_types expTyp = genActualType(exp->TypeGet());
2108 noway_assert(expTyp == cseLclVarTyp);
2109
2110 // This will contain the replacement tree for exp
2111 // It will either be the CSE def or CSE ref
2112 //
2113 GenTree* cse = nullptr;
2114 bool isDef;
2115 FieldSeqNode* fldSeq = nullptr;
2116 bool hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq);
2117
2118 if (IS_CSE_USE(exp->gtCSEnum))
2119 {
2120 /* This is a use of the CSE */
2121 isDef = false;
2122#ifdef DEBUG
2123 if (m_pCompiler->verbose)
2124 {
2125 printf("\nWorking on the replacement of the CSE #%02u use at ", exp->gtCSEnum);
2126 Compiler::printTreeID(exp);
2127 printf(" in " FMT_BB "\n", blk->bbNum);
2128 }
2129#endif // DEBUG
2130
2131 // We will replace the CSE ref with a new tree
2132 // this is typically just a simple use of the new CSE LclVar
2133 //
2134 ValueNumStore* vnStore = m_pCompiler->vnStore;
2135 cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
2136
2137 // assign the proper ValueNumber, A CSE use discards any exceptions
2138 cse->gtVNPair = vnStore->VNPNormalPair(exp->gtVNPair);
2139
2140 ValueNum theConservativeVN = successfulCandidate->CseDsc()->defConservNormVN;
2141
2142 if (theConservativeVN != ValueNumStore::NoVN)
2143 {
2144 // All defs of this CSE share the same normal conservative VN, and we are rewriting this
2145 // use to fetch the same value with no reload, so we can safely propagate that
2146 // conservative VN to this use. This can help range check elimination later on.
2147 cse->gtVNPair.SetConservative(theConservativeVN);
2148
2149 // If the old VN was flagged as a checked bound, propagate that to the new VN
2150 // to make sure assertion prop will pay attention to this VN.
2151 ValueNum oldVN = exp->gtVNPair.GetConservative();
2152 if (!vnStore->IsVNConstant(theConservativeVN) && vnStore->IsVNCheckedBound(oldVN))
2153 {
2154 vnStore->SetVNIsCheckedBound(theConservativeVN);
2155 }
2156
2157 GenTree* cmp;
2158 if ((m_pCompiler->optCseCheckedBoundMap != nullptr) &&
2159 (m_pCompiler->optCseCheckedBoundMap->Lookup(exp, &cmp)))
2160 {
2161 // Propagate the new value number to this compare node as well, since
2162 // subsequent range check elimination will try to correlate it with
2163 // the other appearances that are getting CSEd.
2164
2165 ValueNum oldCmpVN = cmp->gtVNPair.GetConservative();
2166 ValueNum newCmpArgVN;
2167
2168 ValueNumStore::CompareCheckedBoundArithInfo info;
2169 if (vnStore->IsVNCompareCheckedBound(oldCmpVN))
2170 {
2171 // Comparison is against the bound directly.
2172
2173 newCmpArgVN = theConservativeVN;
2174 vnStore->GetCompareCheckedBound(oldCmpVN, &info);
2175 }
2176 else
2177 {
2178 // Comparison is against the bound +/- some offset.
2179
2180 assert(vnStore->IsVNCompareCheckedBoundArith(oldCmpVN));
2181 vnStore->GetCompareCheckedBoundArithInfo(oldCmpVN, &info);
2182 newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper,
2183 info.arrOp, theConservativeVN);
2184 }
2185 ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper,
2186 info.cmpOp, newCmpArgVN);
2187 cmp->gtVNPair.SetConservative(newCmpVN);
2188 }
2189 }
2190#ifdef DEBUG
2191 cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF;
2192#endif // DEBUG
2193
2194 // Now we need to unmark any nested CSE's uses that are found in 'exp'
2195 // As well we extract any nested CSE defs that are found in 'exp' and
2196 // these are appended to the sideEffList
2197
2198 // Afterwards the set of nodes in the 'sideEffectList' are preserved and
2199 // all other nodes are removed and have their ref counts decremented
2200 //
2201 exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
2202
2203 GenTree* sideEffList = nullptr;
2204 m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS | GTF_IS_IN_CSE);
2205
2206 // If we have any side effects or extracted CSE defs then we need to create a GT_COMMA tree instead
2207 //
2208 if (sideEffList != nullptr)
2209 {
2210#ifdef DEBUG
2211 if (m_pCompiler->verbose)
2212 {
2213 printf("\nThis CSE use has side effects and/or nested CSE defs. The sideEffectList:\n");
2214 m_pCompiler->gtDispTree(sideEffList);
2215 printf("\n");
2216 }
2217#endif
2218
2219 GenTree* cseVal = cse;
2220 GenTree* curSideEff = sideEffList;
2221 ValueNumStore* vnStore = m_pCompiler->vnStore;
2222 ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
2223
2224 while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG))
2225 {
2226 GenTree* op1 = curSideEff->gtOp.gtOp1;
2227 GenTree* op2 = curSideEff->gtOp.gtOp2;
2228
2229 ValueNumPair op1vnp;
2230 ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
2231 vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp);
2232
2233 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
2234 curSideEff = op2;
2235 }
2236
2237 // We may have inserted a narrowing cast during a previous remorph
2238 // and it will not have a value number.
2239 if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined())
2240 {
2241 // The inserted cast will have no exceptional effects
2242 assert(curSideEff->gtOverflow() == false);
2243 // Process the exception effects from the cast's operand.
2244 curSideEff = curSideEff->gtOp.gtOp1;
2245 }
2246
2247 ValueNumPair op2vnp;
2248 ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
2249 vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp);
2250 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
2251
2252 op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
2253 vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp);
2254 exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
2255
2256 // Create a comma node with the sideEffList as op1
2257 cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal);
2258 cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
2259 }
2260 }
2261 else
2262 {
2263 /* This is a def of the CSE */
2264 isDef = true;
2265#ifdef DEBUG
2266 if (m_pCompiler->verbose)
2267 {
2268 printf("\nCSE #%02u def at ", GET_CSE_INDEX(exp->gtCSEnum));
2269 Compiler::printTreeID(exp);
2270 printf(" replaced in " FMT_BB " with def of V%02u\n", blk->bbNum, cseLclVarNum);
2271 }
2272#endif // DEBUG
2273
2274 exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
2275
2276 GenTree* val = exp;
2277
2278 /* Create an assignment of the value to the temp */
2279 GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
2280
2281 // assign the proper Value Numbers
2282 asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
2283 asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val'
2284
2285 noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
2286 noway_assert(asg->gtOp.gtOp2 == val);
2287
2288 /* Create a reference to the CSE temp */
2289 GenTree* ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
2290 ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val'
2291
2292 // If it has a zero-offset field seq, copy annotation to the ref
2293 if (hasZeroMapAnnotation)
2294 {
2295 m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq);
2296 }
2297
2298 /* Create a comma node for the CSE assignment */
2299 cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
2300 cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
2301 // as the assignment to the CSE LclVar
2302 // cannot add any new exceptions
2303 }
2304
2305 // Walk the statement 'stm' and find the pointer
2306 // in the tree is pointing to 'exp'
2307 //
2308 GenTree** link = m_pCompiler->gtFindLink(stm, exp);
2309
2310#ifdef DEBUG
2311 if (link == nullptr)
2312 {
2313 printf("\ngtFindLink failed: stm=");
2314 Compiler::printTreeID(stm);
2315 printf(", exp=");
2316 Compiler::printTreeID(exp);
2317 printf("\n");
2318 printf("stm =");
2319 m_pCompiler->gtDispTree(stm);
2320 printf("\n");
2321 printf("exp =");
2322 m_pCompiler->gtDispTree(exp);
2323 printf("\n");
2324 }
2325#endif // DEBUG
2326
2327 noway_assert(link);
2328
2329 // Mutate this link, thus replacing the old exp with the new cse representation
2330 //
2331 *link = cse;
2332
2333 // If it has a zero-offset field seq, copy annotation.
2334 if (hasZeroMapAnnotation)
2335 {
2336 m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq);
2337 }
2338
2339 assert(m_pCompiler->fgRemoveRestOfBlock == false);
2340
2341 /* re-morph the statement */
2342 m_pCompiler->fgMorphBlockStmt(blk, stm->AsStmt() DEBUGARG("optValnumCSE"));
2343
2344 } while (lst != nullptr);
2345 }
2346
2347 // Consider each of the CSE candidates and if the CSE passes
2348 // the PromotionCheck then transform the CSE by calling PerformCSE
2349 //
2350 void ConsiderCandidates()
2351 {
2352 /* Consider each CSE candidate, in order of decreasing cost */
2353 unsigned cnt = m_pCompiler->optCSECandidateCount;
2354 Compiler::CSEdsc** ptr = sortTab;
2355 for (; (cnt > 0); cnt--, ptr++)
2356 {
2357 Compiler::CSEdsc* dsc = *ptr;
2358 if (dsc->defExcSetPromise == ValueNumStore::NoVN)
2359 {
2360 JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n");
2361 continue;
2362 }
2363
2364 CSE_Candidate candidate(this, dsc);
2365
2366 candidate.InitializeCounts();
2367
2368 if (candidate.UseCount() == 0)
2369 {
2370 JITDUMP("Skipped CSE #%02u because use count is 0\n", candidate.CseIndex());
2371 continue;
2372 }
2373
2374#ifdef DEBUG
2375 if (m_pCompiler->verbose)
2376 {
2377 printf("\nConsidering CSE #%02u {$%-3x, $%-3x} [def=%2u, use=%2u, cost=%2u] CSE Expression:\n",
2378 candidate.CseIndex(), dsc->csdHashKey, dsc->defExcSetPromise, candidate.DefCount(),
2379 candidate.UseCount(), candidate.Cost());
2380 m_pCompiler->gtDispTree(candidate.Expr());
2381 printf("\n");
2382 }
2383#endif
2384
2385 if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0))
2386 {
2387 // If we reach this point, then the CSE def was incorrectly marked or the
2388 // block with this use is unreachable. So skip and go to the next CSE.
2389 // Without the "continue", we'd generate bad code in retail.
2390 // Commented out a noway_assert(false) here due to bug: 3290124.
2391 // The problem is if there is sub-graph that is not reachable from the
2392 // entry point, the CSE flags propagated, would be incorrect for it.
2393 continue;
2394 }
2395
2396 bool doCSE = PromotionCheck(&candidate);
2397
2398#ifdef DEBUG
2399 if (m_pCompiler->verbose)
2400 {
2401 if (doCSE)
2402 {
2403 printf("\nPromoting CSE:\n");
2404 }
2405 else
2406 {
2407 printf("Did Not promote this CSE\n");
2408 }
2409 }
2410#endif // DEBUG
2411
2412 if (doCSE)
2413 {
2414 PerformCSE(&candidate);
2415 }
2416 }
2417 }
2418
2419 // Perform the necessary cleanup after our CSE heuristics have run
2420 //
2421 void Cleanup()
2422 {
2423 // Nothing to do, currently.
2424 }
2425};
2426
2427/*****************************************************************************
2428 *
2429 * Routine for performing the Value Number based CSE using our heuristics
2430 */
2431
2432void Compiler::optValnumCSE_Heuristic()
2433{
2434#ifdef DEBUG
2435 if (verbose)
2436 {
2437 printf("\n************ Trees at start of optValnumCSE_Heuristic()\n");
2438 fgDumpTrees(fgFirstBB, nullptr);
2439 printf("\n");
2440 }
2441#endif // DEBUG
2442
2443 CSE_Heuristic cse_heuristic(this);
2444
2445 cse_heuristic.Initialize();
2446 cse_heuristic.SortCandidates();
2447 cse_heuristic.ConsiderCandidates();
2448 cse_heuristic.Cleanup();
2449}
2450
2451/*****************************************************************************
2452 *
2453 * Perform common sub-expression elimination.
2454 */
2455
2456void Compiler::optOptimizeValnumCSEs()
2457{
2458#ifdef DEBUG
2459 if (verbose)
2460 {
2461 printf("\n*************** In optOptimizeValnumCSEs()\n");
2462 }
2463
2464 if (optConfigDisableCSE())
2465 {
2466 return; // Disabled by JitNoCSE
2467 }
2468#endif
2469
2470 optValnumCSE_phase = true;
2471
2472 /* Initialize the expression tracking logic */
2473
2474 optValnumCSE_Init();
2475
2476 /* Locate interesting expressions and assign indices to them */
2477
2478 if (optValnumCSE_Locate() > 0)
2479 {
2480 optCSECandidateTotal += optCSECandidateCount;
2481
2482 optValnumCSE_InitDataFlow();
2483
2484 optValnumCSE_DataFlow();
2485
2486 optValnumCSE_Availablity();
2487
2488 optValnumCSE_Heuristic();
2489 }
2490
2491 optValnumCSE_phase = false;
2492}
2493
2494#endif // FEATURE_VALNUM_CSE
2495
2496/*****************************************************************************
2497 *
2498 * The following determines whether the given expression is a worthy CSE
2499 * candidate.
2500 */
2501bool Compiler::optIsCSEcandidate(GenTree* tree)
2502{
2503 /* No good if the expression contains side effects or if it was marked as DONT CSE */
2504
2505 if (tree->gtFlags & (GTF_ASG | GTF_DONT_CSE))
2506 {
2507 return false;
2508 }
2509
2510 /* The only reason a TYP_STRUCT tree might occur is as an argument to
2511 GT_ADDR. It will never be actually materialized. So ignore them.
2512 Also TYP_VOIDs */
2513
2514 var_types type = tree->TypeGet();
2515 genTreeOps oper = tree->OperGet();
2516
2517 // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming
2518 // to use regular assignments, or handling copyObj.
2519 if (varTypeIsStruct(type) || type == TYP_VOID)
2520 {
2521 return false;
2522 }
2523
2524#ifdef _TARGET_X86_
2525 if (type == TYP_FLOAT)
2526 {
2527 // TODO-X86-CQ: Revisit this
2528 // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
2529 return false;
2530 }
2531#else
2532 if (oper == GT_CNS_DBL)
2533 {
2534 // TODO-CQ: Revisit this
2535 // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
2536 return false;
2537 }
2538#endif
2539
2540 unsigned cost;
2541 if (compCodeOpt() == SMALL_CODE)
2542 {
2543 cost = tree->gtCostSz;
2544 }
2545 else
2546 {
2547 cost = tree->gtCostEx;
2548 }
2549
2550 /* Don't bother if the potential savings are very low */
2551 if (cost < MIN_CSE_COST)
2552 {
2553 return false;
2554 }
2555
2556#if !CSE_CONSTS
2557 /* Don't bother with constants */
2558 if (tree->OperKind() & GTK_CONST)
2559 return false;
2560#endif
2561
2562 /* Check for some special cases */
2563
2564 switch (oper)
2565 {
2566 case GT_CALL:
2567
2568 GenTreeCall* call;
2569 call = tree->AsCall();
2570
2571 // Don't mark calls to allocation helpers as CSE candidates.
2572 // Marking them as CSE candidates usually blocks CSEs rather than enables them.
2573 // A typical case is:
2574 // [1] GT_IND(x) = GT_CALL ALLOC_HELPER
2575 // ...
2576 // [2] y = GT_IND(x)
2577 // ...
2578 // [3] z = GT_IND(x)
2579 // If we mark CALL ALLOC_HELPER as a CSE candidate, we later discover
2580 // that it can't be a CSE def because GT_INDs in [2] and [3] can cause
2581 // more exceptions (NullRef) so we abandon this CSE.
2582 // If we don't mark CALL ALLOC_HELPER as a CSE candidate, we are able
2583 // to use GT_IND(x) in [2] as a CSE def.
2584 if ((call->gtCallType == CT_HELPER) &&
2585 s_helperCallProperties.IsAllocator(eeGetHelperNum(call->gtCallMethHnd)))
2586 {
2587 return false;
2588 }
2589
2590 // If we have a simple helper call with no other persistent side-effects
2591 // then we allow this tree to be a CSE candidate
2592 //
2593 if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS | GTF_IS_IN_CSE) == false)
2594 {
2595 return true;
2596 }
2597 else
2598 {
2599 // Calls generally cannot be CSE-ed
2600 return false;
2601 }
2602
2603 case GT_IND:
2604 // TODO-CQ: Review this...
2605 /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM).
2606 Doing the first allows cse to also kick in for code like
2607 "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing
2608 the second would not allow it */
2609
2610 return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM);
2611
2612 case GT_CNS_INT:
2613 case GT_CNS_LNG:
2614 case GT_CNS_DBL:
2615 case GT_CNS_STR:
2616 return true; // We reach here only when CSE_CONSTS is enabled
2617
2618 case GT_ARR_ELEM:
2619 case GT_ARR_LENGTH:
2620 case GT_CLS_VAR:
2621 case GT_LCL_FLD:
2622 return true;
2623
2624 case GT_LCL_VAR:
2625 return false; // Can't CSE a volatile LCL_VAR
2626
2627 case GT_NEG:
2628 case GT_NOT:
2629 case GT_BSWAP:
2630 case GT_BSWAP16:
2631 case GT_CAST:
2632 return true; // CSE these Unary Operators
2633
2634 case GT_SUB:
2635 case GT_DIV:
2636 case GT_MOD:
2637 case GT_UDIV:
2638 case GT_UMOD:
2639 case GT_OR:
2640 case GT_AND:
2641 case GT_XOR:
2642 case GT_RSH:
2643 case GT_RSZ:
2644 case GT_ROL:
2645 case GT_ROR:
2646 return true; // CSE these Binary Operators
2647
2648 case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
2649 case GT_MUL:
2650 case GT_LSH:
2651 if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
2652 {
2653 return false;
2654 }
2655
2656 case GT_EQ:
2657 case GT_NE:
2658 case GT_LT:
2659 case GT_LE:
2660 case GT_GE:
2661 case GT_GT:
2662 return true; // Also CSE these Comparison Operators
2663
2664 case GT_INTRINSIC:
2665 return true; // Intrinsics
2666
2667 case GT_COMMA:
2668 return true; // Allow GT_COMMA nodes to be CSE-ed.
2669
2670 case GT_COLON:
2671 case GT_QMARK:
2672 case GT_NOP:
2673 case GT_RETURN:
2674 return false; // Currently the only special nodes that we hit
2675 // that we know that we don't want to CSE
2676
2677 default:
2678 break; // Any new nodes that we might add later...
2679 }
2680
2681 return false;
2682}
2683
2684#ifdef DEBUG
2685//
2686// A Debug only method that allows you to control whether the CSE logic is enabled for this method.
2687//
2688// If this method returns false then the CSE phase should be performed.
2689// If the method returns true then the CSE phase should be skipped.
2690//
2691bool Compiler::optConfigDisableCSE()
2692{
2693 // Next check if COMPlus_JitNoCSE is set and applies to this method
2694 //
2695 unsigned jitNoCSE = JitConfig.JitNoCSE();
2696
2697 if (jitNoCSE > 0)
2698 {
2699 unsigned methodCount = Compiler::jitTotalMethodCompiled;
2700 if ((jitNoCSE & 0xF000000) == 0xF000000)
2701 {
2702 unsigned methodCountMask = methodCount & 0xFFF;
2703 unsigned bitsZero = (jitNoCSE >> 12) & 0xFFF;
2704 unsigned bitsOne = (jitNoCSE >> 0) & 0xFFF;
2705
2706 if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
2707 {
2708 if (verbose)
2709 {
2710 printf(" Disabled by JitNoCSE methodCountMask\n");
2711 }
2712
2713 return true; // The CSE phase for this method is disabled
2714 }
2715 }
2716 else if (jitNoCSE <= (methodCount + 1))
2717 {
2718 if (verbose)
2719 {
2720 printf(" Disabled by JitNoCSE > methodCount\n");
2721 }
2722
2723 return true; // The CSE phase for this method is disabled
2724 }
2725 }
2726
2727 return false;
2728}
2729
2730//
2731// A Debug only method that allows you to control whether the CSE logic is enabled for
2732// a particular CSE in a method
2733//
2734// If this method returns false then the CSE should be performed.
2735// If the method returns true then the CSE should be skipped.
2736//
2737bool Compiler::optConfigDisableCSE2()
2738{
2739 static unsigned totalCSEcount = 0;
2740
2741 unsigned jitNoCSE2 = JitConfig.JitNoCSE2();
2742
2743 totalCSEcount++;
2744
2745 if (jitNoCSE2 > 0)
2746 {
2747 if ((jitNoCSE2 & 0xF000000) == 0xF000000)
2748 {
2749 unsigned totalCSEMask = totalCSEcount & 0xFFF;
2750 unsigned bitsZero = (jitNoCSE2 >> 12) & 0xFFF;
2751 unsigned bitsOne = (jitNoCSE2 >> 0) & 0xFFF;
2752
2753 if (((totalCSEMask & bitsOne) == bitsOne) && ((~totalCSEMask & bitsZero) == bitsZero))
2754 {
2755 if (verbose)
2756 {
2757 printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n");
2758 }
2759 return true;
2760 }
2761 }
2762 else if ((jitNoCSE2 & 0xF000000) == 0xE000000)
2763 {
2764 unsigned totalCSEMask = totalCSEcount & 0xFFF;
2765 unsigned disableMask = jitNoCSE2 & 0xFFF;
2766
2767 disableMask >>= (totalCSEMask % 12);
2768
2769 if (disableMask & 1)
2770 {
2771 if (verbose)
2772 {
2773 printf(" Disabled by jitNoCSE2 rotating disable mask\n");
2774 }
2775 return true;
2776 }
2777 }
2778 else if (jitNoCSE2 <= totalCSEcount)
2779 {
2780 if (verbose)
2781 {
2782 printf(" Disabled by jitNoCSE2 > totalCSEcount\n");
2783 }
2784 return true;
2785 }
2786 }
2787 return false;
2788}
2789#endif
2790
2791void Compiler::optOptimizeCSEs()
2792{
2793#ifdef DEBUG
2794 if (verbose)
2795 {
2796 printf("\n*************** In optOptimizeCSEs()\n");
2797 printf("Blocks/Trees at start of optOptimizeCSE phase\n");
2798 fgDispBasicBlocks(true);
2799 }
2800#endif // DEBUG
2801
2802 optCSECandidateCount = 0;
2803 optCSEstart = lvaCount;
2804
2805#if FEATURE_VALNUM_CSE
2806 INDEBUG(optEnsureClearCSEInfo());
2807 optOptimizeValnumCSEs();
2808 EndPhase(PHASE_OPTIMIZE_VALNUM_CSES);
2809#endif // FEATURE_VALNUM_CSE
2810}
2811
2812/*****************************************************************************
2813 *
2814 * Cleanup after CSE to allow us to run more than once.
2815 */
2816
2817void Compiler::optCleanupCSEs()
2818{
2819 // We must clear the BBF_VISITED and BBF_MARKED flags
2820 //
2821 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
2822 {
2823 // And clear all the "visited" bits on the block
2824 //
2825 block->bbFlags &= ~(BBF_VISITED | BBF_MARKED);
2826
2827 /* Walk the statement trees in this basic block */
2828
2829 GenTree* stmt;
2830
2831 // Initialize 'stmt' to the first non-Phi statement
2832 stmt = block->FirstNonPhiDef();
2833
2834 for (; stmt; stmt = stmt->gtNext)
2835 {
2836 noway_assert(stmt->gtOper == GT_STMT);
2837
2838 /* We must clear the gtCSEnum field */
2839 for (GenTree* tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
2840 {
2841 tree->gtCSEnum = NO_CSE;
2842 }
2843 }
2844 }
2845}
2846
2847#ifdef DEBUG
2848
2849/*****************************************************************************
2850 *
2851 * Ensure that all the CSE information in the IR is initialized the way we expect it,
2852 * before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed.
2853 */
2854
2855void Compiler::optEnsureClearCSEInfo()
2856{
2857 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
2858 {
2859 assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
2860
2861 /* Walk the statement trees in this basic block */
2862
2863 GenTree* stmt;
2864
2865 // Initialize 'stmt' to the first non-Phi statement
2866 stmt = block->FirstNonPhiDef();
2867
2868 for (; stmt; stmt = stmt->gtNext)
2869 {
2870 assert(stmt->gtOper == GT_STMT);
2871
2872 for (GenTree* tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
2873 {
2874 assert(tree->gtCSEnum == NO_CSE);
2875 }
2876 }
2877 }
2878}
2879
2880#endif // DEBUG
2881
2882/*****************************************************************************/
2883#endif // FEATURE_ANYCSE
2884/*****************************************************************************/
2885