| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 7 | XX XX |
| 8 | XX OptCSE XX |
| 9 | XX XX |
| 10 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 11 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
| 12 | */ |
| 13 | |
| 14 | #include "jitpch.h" |
| 15 | #ifdef _MSC_VER |
| 16 | #pragma hdrstop |
| 17 | #endif |
| 18 | |
| 19 | /*****************************************************************************/ |
| 20 | #if FEATURE_ANYCSE |
| 21 | /*****************************************************************************/ |
| 22 | |
| 23 | /* static */ |
| 24 | const size_t Compiler::s_optCSEhashSize = EXPSET_SZ * 2; |
| 25 | |
| 26 | /***************************************************************************** |
| 27 | * |
| 28 | * We've found all the candidates, build the index for easy access. |
| 29 | */ |
| 30 | |
| 31 | void Compiler::optCSEstop() |
| 32 | { |
| 33 | if (optCSECandidateCount == 0) |
| 34 | { |
| 35 | return; |
| 36 | } |
| 37 | |
| 38 | CSEdsc* dsc; |
| 39 | CSEdsc** ptr; |
| 40 | unsigned cnt; |
| 41 | |
| 42 | optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount](); |
| 43 | |
| 44 | for (cnt = s_optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++) |
| 45 | { |
| 46 | for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket) |
| 47 | { |
| 48 | if (dsc->csdIndex) |
| 49 | { |
| 50 | noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount); |
| 51 | if (optCSEtab[dsc->csdIndex - 1] == nullptr) |
| 52 | { |
| 53 | optCSEtab[dsc->csdIndex - 1] = dsc; |
| 54 | } |
| 55 | } |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | #ifdef DEBUG |
| 60 | for (cnt = 0; cnt < optCSECandidateCount; cnt++) |
| 61 | { |
| 62 | noway_assert(optCSEtab[cnt] != nullptr); |
| 63 | } |
| 64 | #endif |
| 65 | } |
| 66 | |
| 67 | /***************************************************************************** |
| 68 | * |
| 69 | * Return the descriptor for the CSE with the given index. |
| 70 | */ |
| 71 | |
| 72 | inline Compiler::CSEdsc* Compiler::optCSEfindDsc(unsigned index) |
| 73 | { |
| 74 | noway_assert(index); |
| 75 | noway_assert(index <= optCSECandidateCount); |
| 76 | noway_assert(optCSEtab[index - 1]); |
| 77 | |
| 78 | return optCSEtab[index - 1]; |
| 79 | } |
| 80 | |
| 81 | //------------------------------------------------------------------------ |
| 82 | // Compiler::optUnmarkCSE |
| 83 | // |
| 84 | // Arguments: |
| 85 | // tree - A sub tree that originally was part of a CSE use |
| 86 | // that we are currently in the process of removing. |
| 87 | // |
| 88 | // Return Value: |
| 89 | // Returns true if we can safely remove the 'tree' node. |
| 90 | // Returns false if the node is a CSE def that the caller |
| 91 | // needs to extract and preserve. |
| 92 | // |
| 93 | // Notes: |
| 94 | // If 'tree' is a CSE use then we perform an unmark CSE operation |
| 95 | // so that the CSE used counts and weight are updated properly. |
| 96 | // The only caller for this method is optUnmarkCSEs which is a |
| 97 | // tree walker vistor function. When we return false this method |
| 98 | // returns WALK_SKIP_SUBTREES so that we don't visit the remaining |
| 99 | // nodes of the CSE def. |
| 100 | // |
| 101 | bool Compiler::optUnmarkCSE(GenTree* tree) |
| 102 | { |
| 103 | if (!IS_CSE_INDEX(tree->gtCSEnum)) |
| 104 | { |
| 105 | // If this node isn't a CSE use or def we can safely remove this node. |
| 106 | // |
| 107 | return true; |
| 108 | } |
| 109 | |
| 110 | // make sure it's been initialized |
| 111 | noway_assert(optCSEweight <= BB_MAX_WEIGHT); |
| 112 | |
| 113 | // Is this a CSE use? |
| 114 | if (IS_CSE_USE(tree->gtCSEnum)) |
| 115 | { |
| 116 | unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum); |
| 117 | CSEdsc* desc = optCSEfindDsc(CSEnum); |
| 118 | |
| 119 | #ifdef DEBUG |
| 120 | if (verbose) |
| 121 | { |
| 122 | printf("Unmark CSE use #%02d at " , CSEnum); |
| 123 | printTreeID(tree); |
| 124 | printf(": %3d -> %3d\n" , desc->csdUseCount, desc->csdUseCount - 1); |
| 125 | } |
| 126 | #endif // DEBUG |
| 127 | |
| 128 | // Perform an unmark CSE operation |
| 129 | |
| 130 | // 1. Reduce the nested CSE's 'use' count |
| 131 | |
| 132 | noway_assert(desc->csdUseCount > 0); |
| 133 | |
| 134 | if (desc->csdUseCount > 0) |
| 135 | { |
| 136 | desc->csdUseCount -= 1; |
| 137 | |
| 138 | if (desc->csdUseWtCnt < optCSEweight) |
| 139 | { |
| 140 | desc->csdUseWtCnt = 0; |
| 141 | } |
| 142 | else |
| 143 | { |
| 144 | desc->csdUseWtCnt -= optCSEweight; |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | // 2. Unmark the CSE infomation in the node |
| 149 | |
| 150 | tree->gtCSEnum = NO_CSE; |
| 151 | return true; |
| 152 | } |
| 153 | else |
| 154 | { |
| 155 | // It is not safe to remove this node, so we will return false |
| 156 | // and the caller must add this node to the side effect list |
| 157 | // |
| 158 | return false; |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTree** pTree, fgWalkData* walkData) |
| 163 | { |
| 164 | GenTree* tree = *pTree; |
| 165 | Compiler* comp = walkData->compiler; |
| 166 | optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData); |
| 167 | |
| 168 | if (IS_CSE_INDEX(tree->gtCSEnum)) |
| 169 | { |
| 170 | unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum); |
| 171 | unsigned cseBit = genCSEnum2bit(cseIndex); |
| 172 | if (IS_CSE_DEF(tree->gtCSEnum)) |
| 173 | { |
| 174 | BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_defMask, cseBit); |
| 175 | } |
| 176 | else |
| 177 | { |
| 178 | BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_useMask, cseBit); |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | return WALK_CONTINUE; |
| 183 | } |
| 184 | |
| 185 | // This functions walks all the node for an given tree |
| 186 | // and return the mask of CSE defs and uses for the tree |
| 187 | // |
| 188 | void Compiler::optCSE_GetMaskData(GenTree* tree, optCSE_MaskData* pMaskData) |
| 189 | { |
| 190 | pMaskData->CSE_defMask = BitVecOps::MakeEmpty(cseTraits); |
| 191 | pMaskData->CSE_useMask = BitVecOps::MakeEmpty(cseTraits); |
| 192 | fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData); |
| 193 | } |
| 194 | |
| 195 | //------------------------------------------------------------------------ |
| 196 | // optCSE_canSwap: Determine if the execution order of two nodes can be swapped. |
| 197 | // |
| 198 | // Arguments: |
| 199 | // op1 - The first node |
| 200 | // op2 - The second node |
| 201 | // |
| 202 | // Return Value: |
| 203 | // Return true iff it safe to swap the execution order of 'op1' and 'op2', |
| 204 | // considering only the locations of the CSE defs and uses. |
| 205 | // |
| 206 | // Assumptions: |
| 207 | // 'op1' currently occurse before 'op2' in the execution order. |
| 208 | // |
| 209 | bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2) |
| 210 | { |
| 211 | // op1 and op2 must be non-null. |
| 212 | assert(op1 != nullptr); |
| 213 | assert(op2 != nullptr); |
| 214 | |
| 215 | bool canSwap = true; // the default result unless proven otherwise. |
| 216 | |
| 217 | optCSE_MaskData op1MaskData; |
| 218 | optCSE_MaskData op2MaskData; |
| 219 | |
| 220 | optCSE_GetMaskData(op1, &op1MaskData); |
| 221 | optCSE_GetMaskData(op2, &op2MaskData); |
| 222 | |
| 223 | // We cannot swap if op1 contains a CSE def that is used by op2 |
| 224 | if (!BitVecOps::IsEmptyIntersection(cseTraits, op1MaskData.CSE_defMask, op2MaskData.CSE_useMask)) |
| 225 | { |
| 226 | canSwap = false; |
| 227 | } |
| 228 | else |
| 229 | { |
| 230 | // We also cannot swap if op2 contains a CSE def that is used by op1. |
| 231 | if (!BitVecOps::IsEmptyIntersection(cseTraits, op2MaskData.CSE_defMask, op1MaskData.CSE_useMask)) |
| 232 | { |
| 233 | canSwap = false; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | return canSwap; |
| 238 | } |
| 239 | |
| 240 | //------------------------------------------------------------------------ |
| 241 | // optCSE_canSwap: Determine if the execution order of a node's operands can be swapped. |
| 242 | // |
| 243 | // Arguments: |
| 244 | // tree - The node of interest |
| 245 | // |
| 246 | // Return Value: |
| 247 | // Return true iff it safe to swap the execution order of the operands of 'tree', |
| 248 | // considering only the locations of the CSE defs and uses. |
| 249 | // |
| 250 | bool Compiler::optCSE_canSwap(GenTree* tree) |
| 251 | { |
| 252 | // We must have a binary treenode with non-null op1 and op2 |
| 253 | assert((tree->OperKind() & GTK_SMPOP) != 0); |
| 254 | |
| 255 | GenTree* op1 = tree->gtOp.gtOp1; |
| 256 | GenTree* op2 = tree->gtGetOp2(); |
| 257 | |
| 258 | return optCSE_canSwap(op1, op2); |
| 259 | } |
| 260 | |
| 261 | /***************************************************************************** |
| 262 | * |
| 263 | * Compare function passed to qsort() by CSE_Heuristic::SortCandidates |
| 264 | * when (CodeOptKind() != Compiler::SMALL_CODE) |
| 265 | */ |
| 266 | |
| 267 | /* static */ |
| 268 | int __cdecl Compiler::optCSEcostCmpEx(const void* op1, const void* op2) |
| 269 | { |
| 270 | CSEdsc* dsc1 = *(CSEdsc**)op1; |
| 271 | CSEdsc* dsc2 = *(CSEdsc**)op2; |
| 272 | |
| 273 | GenTree* exp1 = dsc1->csdTree; |
| 274 | GenTree* exp2 = dsc2->csdTree; |
| 275 | |
| 276 | int diff; |
| 277 | |
| 278 | diff = (int)(exp2->gtCostEx - exp1->gtCostEx); |
| 279 | |
| 280 | if (diff != 0) |
| 281 | { |
| 282 | return diff; |
| 283 | } |
| 284 | |
| 285 | // Sort the higher Use Counts toward the top |
| 286 | diff = (int)(dsc2->csdUseWtCnt - dsc1->csdUseWtCnt); |
| 287 | |
| 288 | if (diff != 0) |
| 289 | { |
| 290 | return diff; |
| 291 | } |
| 292 | |
| 293 | // With the same use count, Sort the lower Def Counts toward the top |
| 294 | diff = (int)(dsc1->csdDefWtCnt - dsc2->csdDefWtCnt); |
| 295 | |
| 296 | if (diff != 0) |
| 297 | { |
| 298 | return diff; |
| 299 | } |
| 300 | |
| 301 | // In order to ensure that we have a stable sort, we break ties using the csdIndex |
| 302 | return (int)(dsc1->csdIndex - dsc2->csdIndex); |
| 303 | } |
| 304 | |
| 305 | /***************************************************************************** |
| 306 | * |
| 307 | * Compare function passed to qsort() by CSE_Heuristic::SortCandidates |
| 308 | * when (CodeOptKind() == Compiler::SMALL_CODE) |
| 309 | */ |
| 310 | |
| 311 | /* static */ |
| 312 | int __cdecl Compiler::optCSEcostCmpSz(const void* op1, const void* op2) |
| 313 | { |
| 314 | CSEdsc* dsc1 = *(CSEdsc**)op1; |
| 315 | CSEdsc* dsc2 = *(CSEdsc**)op2; |
| 316 | |
| 317 | GenTree* exp1 = dsc1->csdTree; |
| 318 | GenTree* exp2 = dsc2->csdTree; |
| 319 | |
| 320 | int diff; |
| 321 | |
| 322 | diff = (int)(exp2->gtCostSz - exp1->gtCostSz); |
| 323 | |
| 324 | if (diff != 0) |
| 325 | { |
| 326 | return diff; |
| 327 | } |
| 328 | |
| 329 | // Sort the higher Use Counts toward the top |
| 330 | diff = (int)(dsc2->csdUseCount - dsc1->csdUseCount); |
| 331 | |
| 332 | if (diff != 0) |
| 333 | { |
| 334 | return diff; |
| 335 | } |
| 336 | |
| 337 | // With the same use count, Sort the lower Def Counts toward the top |
| 338 | diff = (int)(dsc1->csdDefCount - dsc2->csdDefCount); |
| 339 | |
| 340 | if (diff != 0) |
| 341 | { |
| 342 | return diff; |
| 343 | } |
| 344 | |
| 345 | // In order to ensure that we have a stable sort, we break ties using the csdIndex |
| 346 | return (int)(dsc1->csdIndex - dsc2->csdIndex); |
| 347 | } |
| 348 | |
| 349 | /*****************************************************************************/ |
| 350 | #if FEATURE_VALNUM_CSE |
| 351 | /*****************************************************************************/ |
| 352 | |
| 353 | /***************************************************************************** |
| 354 | * |
| 355 | * Initialize the Value Number CSE tracking logic. |
| 356 | */ |
| 357 | |
| 358 | void Compiler::optValnumCSE_Init() |
| 359 | { |
| 360 | #ifdef DEBUG |
| 361 | optCSEtab = nullptr; |
| 362 | #endif |
| 363 | |
| 364 | // Init traits and full/empty bitvectors. This will be used to track the |
| 365 | // individual cse indexes. |
| 366 | cseTraits = new (getAllocator()) BitVecTraits(EXPSET_SZ, this); |
| 367 | cseFull = BitVecOps::MakeFull(cseTraits); |
| 368 | |
| 369 | /* Allocate and clear the hash bucket table */ |
| 370 | |
| 371 | optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize](); |
| 372 | |
| 373 | optCSECandidateCount = 0; |
| 374 | optDoCSE = false; // Stays false until we find duplicate CSE tree |
| 375 | |
| 376 | // optCseCheckedBoundMap is unused in most functions, allocated only when used |
| 377 | optCseCheckedBoundMap = nullptr; |
| 378 | } |
| 379 | |
| 380 | //--------------------------------------------------------------------------- |
| 381 | // optValnumCSE_Index: |
| 382 | // - Returns the CSE index to use for this tree, |
| 383 | // or zero if this expression is not currently a CSE. |
| 384 | // |
| 385 | // Arguments: |
| 386 | // tree - The current candidate CSE expression |
| 387 | // stmt - The current statement that contains tree |
| 388 | // |
| 389 | // |
| 390 | // Notes: We build a hash table that contains all of the expressions that |
| 391 | // are presented to this method. Whenever we see a duplicate expression |
| 392 | // we have a CSE candidate. If it is the first time seeing the duplicate |
| 393 | // we allocate a new CSE index. If we have already allocated a CSE index |
| 394 | // we return that index. There currently is a limit on the number of CSEs |
| 395 | // that we can have of MAX_CSE_CNT (64) |
| 396 | // |
| 397 | unsigned Compiler::optValnumCSE_Index(GenTree* tree, GenTree* stmt) |
| 398 | { |
| 399 | unsigned key; |
| 400 | unsigned hash; |
| 401 | unsigned hval; |
| 402 | CSEdsc* hashDsc; |
| 403 | |
| 404 | // We use the liberal Value numbers when building the set of CSE |
| 405 | ValueNum vnLib = tree->GetVN(VNK_Liberal); |
| 406 | ValueNum vnLibNorm = vnStore->VNNormalValue(vnLib); |
| 407 | |
| 408 | // We use the normal value number because we want the CSE candidate to |
| 409 | // represent all expressions that produce the same normal value number |
| 410 | // We will handle the case where we have different exception sets when |
| 411 | // promoting the candidates. |
| 412 | // |
| 413 | // We do this because a GT_IND will usually have a NullPtrExc entry in its |
| 414 | // exc set, but we may have cleared the GTF_EXCEPT flag and if so, it won't |
| 415 | // have an NullPtrExc, or we may have assigned the value of an GT_IND |
| 416 | // into a LCL_VAR and then read it back later. |
| 417 | // |
| 418 | // When we are promoting the CSE candidates we insure that any CSE |
| 419 | // uses that we promote have an exc set that is the same as the CSE defs |
| 420 | // or have an empty set. And that all of the CSE defs produced the required |
| 421 | // set of exceptions for the CSE uses. |
| 422 | // |
| 423 | |
| 424 | // We assign either vnLib or vnLibNorm as the hash key |
| 425 | // |
| 426 | // The only exception to using the normal value is for the GT_COMMA nodes. |
| 427 | // Here we check to see if we have a GT_COMMA with a different value number |
| 428 | // than the one from its op2. For this case we want to create two different |
| 429 | // CSE candidates. This allows us to CSE the GT_COMMA separately from its value. |
| 430 | // |
| 431 | if (tree->OperGet() == GT_COMMA) |
| 432 | { |
| 433 | // op2 is the value produced by a GT_COMMA |
| 434 | GenTree* op2 = tree->gtOp.gtOp2; |
| 435 | ValueNum vnOp2Lib = op2->GetVN(VNK_Liberal); |
| 436 | |
| 437 | // If the value number for op2 and tree are different, then some new |
| 438 | // exceptions were produced by op1. For that case we will NOT use the |
| 439 | // normal value. This allows us to CSE commas with an op1 that is |
| 440 | // an ARR_BOUNDS_CHECK. |
| 441 | // |
| 442 | if (vnOp2Lib != vnLib) |
| 443 | { |
| 444 | key = (unsigned)vnLib; // include the exc set in the hash key |
| 445 | } |
| 446 | else |
| 447 | { |
| 448 | key = (unsigned)vnLibNorm; |
| 449 | } |
| 450 | |
| 451 | // If we didn't do the above we would have op1 as the CSE def |
| 452 | // and the parent comma as the CSE use (but with a different exc set) |
| 453 | // This would prevent us from making any CSE with the comma |
| 454 | // |
| 455 | assert(vnLibNorm == vnStore->VNNormalValue(vnOp2Lib)); |
| 456 | } |
| 457 | else // Not a GT_COMMA |
| 458 | { |
| 459 | key = (unsigned)vnLibNorm; |
| 460 | } |
| 461 | |
| 462 | // Compute the hash value for the expression |
| 463 | |
| 464 | hash = key; |
| 465 | hash *= (unsigned)(s_optCSEhashSize + 1); |
| 466 | hash >>= 7; |
| 467 | |
| 468 | hval = hash % s_optCSEhashSize; |
| 469 | |
| 470 | /* Look for a matching index in the hash table */ |
| 471 | |
| 472 | bool newCSE = false; |
| 473 | |
| 474 | for (hashDsc = optCSEhash[hval]; hashDsc; hashDsc = hashDsc->csdNextInBucket) |
| 475 | { |
| 476 | if (hashDsc->csdHashKey == key) |
| 477 | { |
| 478 | treeStmtLst* newElem; |
| 479 | |
| 480 | /* Have we started the list of matching nodes? */ |
| 481 | |
| 482 | if (hashDsc->csdTreeList == nullptr) |
| 483 | { |
| 484 | // Create the new element based upon the matching hashDsc element. |
| 485 | |
| 486 | newElem = new (this, CMK_TreeStatementList) treeStmtLst; |
| 487 | |
| 488 | newElem->tslTree = hashDsc->csdTree; |
| 489 | newElem->tslStmt = hashDsc->csdStmt; |
| 490 | newElem->tslBlock = hashDsc->csdBlock; |
| 491 | newElem->tslNext = nullptr; |
| 492 | |
| 493 | /* Start the list with the first CSE candidate recorded */ |
| 494 | |
| 495 | hashDsc->csdTreeList = newElem; |
| 496 | hashDsc->csdTreeLast = newElem; |
| 497 | } |
| 498 | |
| 499 | noway_assert(hashDsc->csdTreeList); |
| 500 | |
| 501 | /* Append this expression to the end of the list */ |
| 502 | |
| 503 | newElem = new (this, CMK_TreeStatementList) treeStmtLst; |
| 504 | |
| 505 | newElem->tslTree = tree; |
| 506 | newElem->tslStmt = stmt; |
| 507 | newElem->tslBlock = compCurBB; |
| 508 | newElem->tslNext = nullptr; |
| 509 | |
| 510 | hashDsc->csdTreeLast->tslNext = newElem; |
| 511 | hashDsc->csdTreeLast = newElem; |
| 512 | |
| 513 | optDoCSE = true; // Found a duplicate CSE tree |
| 514 | |
| 515 | /* Have we assigned a CSE index? */ |
| 516 | if (hashDsc->csdIndex == 0) |
| 517 | { |
| 518 | newCSE = true; |
| 519 | break; |
| 520 | } |
| 521 | |
| 522 | assert(FitsIn<signed char>(hashDsc->csdIndex)); |
| 523 | tree->gtCSEnum = ((signed char)hashDsc->csdIndex); |
| 524 | return hashDsc->csdIndex; |
| 525 | } |
| 526 | } |
| 527 | |
| 528 | if (!newCSE) |
| 529 | { |
| 530 | /* Not found, create a new entry (unless we have too many already) */ |
| 531 | |
| 532 | if (optCSECandidateCount < MAX_CSE_CNT) |
| 533 | { |
| 534 | hashDsc = new (this, CMK_CSE) CSEdsc; |
| 535 | |
| 536 | hashDsc->csdHashKey = key; |
| 537 | hashDsc->csdIndex = 0; |
| 538 | hashDsc->csdLiveAcrossCall = 0; |
| 539 | hashDsc->csdDefCount = 0; |
| 540 | hashDsc->csdUseCount = 0; |
| 541 | hashDsc->csdDefWtCnt = 0; |
| 542 | hashDsc->csdUseWtCnt = 0; |
| 543 | hashDsc->defExcSetPromise = vnStore->VNForEmptyExcSet(); |
| 544 | hashDsc->defExcSetCurrent = vnStore->VNForNull(); // uninit value |
| 545 | hashDsc->defConservNormVN = vnStore->VNForNull(); // uninit value |
| 546 | |
| 547 | hashDsc->csdTree = tree; |
| 548 | hashDsc->csdStmt = stmt; |
| 549 | hashDsc->csdBlock = compCurBB; |
| 550 | hashDsc->csdTreeList = nullptr; |
| 551 | |
| 552 | /* Append the entry to the hash bucket */ |
| 553 | |
| 554 | hashDsc->csdNextInBucket = optCSEhash[hval]; |
| 555 | optCSEhash[hval] = hashDsc; |
| 556 | } |
| 557 | return 0; |
| 558 | } |
| 559 | else // newCSE is true |
| 560 | { |
| 561 | /* We get here only after finding a matching CSE */ |
| 562 | |
| 563 | /* Create a new CSE (unless we have the maximum already) */ |
| 564 | |
| 565 | if (optCSECandidateCount == MAX_CSE_CNT) |
| 566 | { |
| 567 | return 0; |
| 568 | } |
| 569 | |
| 570 | C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT); |
| 571 | |
| 572 | unsigned CSEindex = ++optCSECandidateCount; |
| 573 | // EXPSET_TP CSEmask = genCSEnum2bit(CSEindex); |
| 574 | |
| 575 | /* Record the new CSE index in the hashDsc */ |
| 576 | hashDsc->csdIndex = CSEindex; |
| 577 | |
| 578 | /* Update the gtCSEnum field in the original tree */ |
| 579 | noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0); |
| 580 | assert(FitsIn<signed char>(CSEindex)); |
| 581 | |
| 582 | hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char)CSEindex); |
| 583 | noway_assert(((unsigned)hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex); |
| 584 | |
| 585 | tree->gtCSEnum = ((signed char)CSEindex); |
| 586 | |
| 587 | #ifdef DEBUG |
| 588 | if (verbose) |
| 589 | { |
| 590 | EXPSET_TP tempMask = BitVecOps::MakeSingleton(cseTraits, genCSEnum2bit(CSEindex)); |
| 591 | printf("\nCSE candidate #%02u, vn=" , CSEindex); |
| 592 | vnPrint(key, 0); |
| 593 | printf(" cseMask=%s in " FMT_BB ", [cost=%2u, size=%2u]: \n" , genES2str(cseTraits, tempMask), |
| 594 | compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz); |
| 595 | gtDispTree(tree); |
| 596 | } |
| 597 | #endif // DEBUG |
| 598 | |
| 599 | return CSEindex; |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | /***************************************************************************** |
| 604 | * |
| 605 | * Locate CSE candidates and assign indices to them |
| 606 | * return 0 if no CSE candidates were found |
| 607 | * Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks |
| 608 | */ |
| 609 | |
| 610 | unsigned Compiler::optValnumCSE_Locate() |
| 611 | { |
| 612 | // Locate CSE candidates and assign them indices |
| 613 | |
| 614 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 615 | { |
| 616 | GenTree* stmt; |
| 617 | GenTree* tree; |
| 618 | |
| 619 | /* Make the block publicly available */ |
| 620 | |
| 621 | compCurBB = block; |
| 622 | |
| 623 | /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */ |
| 624 | /* Everyone who uses these flags are required to clear afterwards */ |
| 625 | noway_assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0); |
| 626 | |
| 627 | /* Walk the statement trees in this basic block */ |
| 628 | for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) |
| 629 | { |
| 630 | noway_assert(stmt->gtOper == GT_STMT); |
| 631 | |
| 632 | /* We walk the tree in the forwards direction (bottom up) */ |
| 633 | bool stmtHasArrLenCandidate = false; |
| 634 | for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) |
| 635 | { |
| 636 | if (tree->OperIsCompare() && stmtHasArrLenCandidate) |
| 637 | { |
| 638 | // Check if this compare is a function of (one of) the checked |
| 639 | // bound candidate(s); we may want to update its value number. |
| 640 | // if the array length gets CSEd |
| 641 | optCseUpdateCheckedBoundMap(tree); |
| 642 | } |
| 643 | |
| 644 | if (!optIsCSEcandidate(tree)) |
| 645 | { |
| 646 | continue; |
| 647 | } |
| 648 | |
| 649 | if (ValueNumStore::isReservedVN(tree->GetVN(VNK_Liberal))) |
| 650 | { |
| 651 | continue; |
| 652 | } |
| 653 | |
| 654 | // Don't CSE constant values, instead let the Value Number |
| 655 | // based Assertion Prop phase handle them. Here, unlike |
| 656 | // the rest of optCSE, we use the conservative value number |
| 657 | // rather than the liberal one, since the conservative one |
| 658 | // is what the Value Number based Assertion Prop will use |
| 659 | // and the point is to avoid optimizing cases that it will |
| 660 | // handle. |
| 661 | // |
| 662 | if (vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair))) |
| 663 | { |
| 664 | continue; |
| 665 | } |
| 666 | |
| 667 | /* Assign an index to this expression */ |
| 668 | |
| 669 | unsigned CSEindex = optValnumCSE_Index(tree, stmt); |
| 670 | |
| 671 | if (CSEindex != 0) |
| 672 | { |
| 673 | noway_assert(((unsigned)tree->gtCSEnum) == CSEindex); |
| 674 | } |
| 675 | |
| 676 | if (IS_CSE_INDEX(CSEindex) && (tree->OperGet() == GT_ARR_LENGTH)) |
| 677 | { |
| 678 | stmtHasArrLenCandidate = true; |
| 679 | } |
| 680 | } |
| 681 | } |
| 682 | } |
| 683 | |
| 684 | /* We're done if there were no interesting expressions */ |
| 685 | |
| 686 | if (!optDoCSE) |
| 687 | { |
| 688 | return 0; |
| 689 | } |
| 690 | |
| 691 | /* We're finished building the expression lookup table */ |
| 692 | |
| 693 | optCSEstop(); |
| 694 | |
| 695 | return 1; |
| 696 | } |
| 697 | |
| 698 | //------------------------------------------------------------------------ |
| 699 | // optCseUpdateCheckedBoundMap: Check if this compare is a tractable function of |
| 700 | // a checked bound that is a CSE candidate, and insert |
| 701 | // an entry in the optCseCheckedBoundMap if so. This facilitates |
| 702 | // subsequently updating the compare's value number if |
| 703 | // the bound gets CSEd. |
| 704 | // |
| 705 | // Arguments: |
| 706 | // compare - The compare node to check |
| 707 | |
| 708 | void Compiler::optCseUpdateCheckedBoundMap(GenTree* compare) |
| 709 | { |
| 710 | assert(compare->OperIsCompare()); |
| 711 | |
| 712 | ValueNum compareVN = compare->gtVNPair.GetConservative(); |
| 713 | VNFuncApp cmpVNFuncApp; |
| 714 | |
| 715 | if (!vnStore->GetVNFunc(compareVN, &cmpVNFuncApp) || (cmpVNFuncApp.m_func != GetVNFuncForNode(compare))) |
| 716 | { |
| 717 | // Value numbering inferred this compare as something other |
| 718 | // than its own operator; leave its value number alone. |
| 719 | return; |
| 720 | } |
| 721 | |
| 722 | // Now look for a checked bound feeding the compare |
| 723 | ValueNumStore::CompareCheckedBoundArithInfo info; |
| 724 | |
| 725 | GenTree* boundParent = nullptr; |
| 726 | |
| 727 | if (vnStore->IsVNCompareCheckedBound(compareVN)) |
| 728 | { |
| 729 | // Simple compare of an bound against something else. |
| 730 | |
| 731 | vnStore->GetCompareCheckedBound(compareVN, &info); |
| 732 | boundParent = compare; |
| 733 | } |
| 734 | else if (vnStore->IsVNCompareCheckedBoundArith(compareVN)) |
| 735 | { |
| 736 | // Compare of a bound +/- some offset to something else. |
| 737 | |
| 738 | GenTree* op1 = compare->gtGetOp1(); |
| 739 | GenTree* op2 = compare->gtGetOp2(); |
| 740 | |
| 741 | vnStore->GetCompareCheckedBoundArithInfo(compareVN, &info); |
| 742 | if (GetVNFuncForNode(op1) == (VNFunc)info.arrOper) |
| 743 | { |
| 744 | // The arithmetic node is the bound's parent. |
| 745 | boundParent = op1; |
| 746 | } |
| 747 | else if (GetVNFuncForNode(op2) == (VNFunc)info.arrOper) |
| 748 | { |
| 749 | // The arithmetic node is the bound's parent. |
| 750 | boundParent = op2; |
| 751 | } |
| 752 | } |
| 753 | |
| 754 | if (boundParent != nullptr) |
| 755 | { |
| 756 | GenTree* bound = nullptr; |
| 757 | |
| 758 | // Find which child of boundParent is the bound. Abort if neither |
| 759 | // conservative value number matches the one from the compare VN. |
| 760 | |
| 761 | GenTree* child1 = boundParent->gtGetOp1(); |
| 762 | if ((info.vnBound == child1->gtVNPair.GetConservative()) && IS_CSE_INDEX(child1->gtCSEnum)) |
| 763 | { |
| 764 | bound = child1; |
| 765 | } |
| 766 | else |
| 767 | { |
| 768 | GenTree* child2 = boundParent->gtGetOp2(); |
| 769 | if ((info.vnBound == child2->gtVNPair.GetConservative()) && IS_CSE_INDEX(child2->gtCSEnum)) |
| 770 | { |
| 771 | bound = child2; |
| 772 | } |
| 773 | } |
| 774 | |
| 775 | if (bound != nullptr) |
| 776 | { |
| 777 | // Found a checked bound feeding a compare that is a tractable function of it; |
| 778 | // record this in the map so we can update the compare VN if the bound |
| 779 | // node gets CSEd. |
| 780 | |
| 781 | if (optCseCheckedBoundMap == nullptr) |
| 782 | { |
| 783 | // Allocate map on first use. |
| 784 | optCseCheckedBoundMap = new (getAllocator()) NodeToNodeMap(getAllocator()); |
| 785 | } |
| 786 | |
| 787 | optCseCheckedBoundMap->Set(bound, compare); |
| 788 | } |
| 789 | } |
| 790 | } |
| 791 | |
| 792 | /***************************************************************************** |
| 793 | * |
| 794 | * Compute each blocks bbCseGen |
| 795 | * This is the bitset that represents the CSEs that are generated within the block |
| 796 | */ |
| 797 | void Compiler::optValnumCSE_InitDataFlow() |
| 798 | { |
| 799 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 800 | { |
| 801 | /* Initialize the blocks's bbCseIn set */ |
| 802 | |
| 803 | bool init_to_zero = false; |
| 804 | |
| 805 | if (block == fgFirstBB) |
| 806 | { |
| 807 | /* Clear bbCseIn for the entry block */ |
| 808 | init_to_zero = true; |
| 809 | } |
| 810 | #if !CSE_INTO_HANDLERS |
| 811 | else |
| 812 | { |
| 813 | if (bbIsHandlerBeg(block)) |
| 814 | { |
| 815 | /* Clear everything on entry to filters or handlers */ |
| 816 | init_to_zero = true; |
| 817 | } |
| 818 | } |
| 819 | #endif |
| 820 | if (init_to_zero) |
| 821 | { |
| 822 | /* Initialize to {ZERO} prior to dataflow */ |
| 823 | block->bbCseIn = BitVecOps::MakeEmpty(cseTraits); |
| 824 | } |
| 825 | else |
| 826 | { |
| 827 | /* Initialize to {ALL} prior to dataflow */ |
| 828 | block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseFull); |
| 829 | } |
| 830 | |
| 831 | block->bbCseOut = BitVecOps::MakeCopy(cseTraits, cseFull); |
| 832 | |
| 833 | /* Initialize to {ZERO} prior to locating the CSE candidates */ |
| 834 | block->bbCseGen = BitVecOps::MakeEmpty(cseTraits); |
| 835 | } |
| 836 | |
| 837 | // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex |
| 838 | // in the block's bbCseGen bitset |
| 839 | // |
| 840 | for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++) |
| 841 | { |
| 842 | CSEdsc* dsc = optCSEtab[cnt]; |
| 843 | unsigned CSEindex = dsc->csdIndex; |
| 844 | treeStmtLst* lst = dsc->csdTreeList; |
| 845 | noway_assert(lst); |
| 846 | |
| 847 | while (lst != nullptr) |
| 848 | { |
| 849 | BasicBlock* block = lst->tslBlock; |
| 850 | BitVecOps::AddElemD(cseTraits, block->bbCseGen, genCSEnum2bit(CSEindex)); |
| 851 | lst = lst->tslNext; |
| 852 | } |
| 853 | } |
| 854 | |
| 855 | #ifdef DEBUG |
| 856 | // Dump out the bbCseGen information that we just created |
| 857 | // |
| 858 | if (verbose) |
| 859 | { |
| 860 | bool = false; |
| 861 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 862 | { |
| 863 | if (block->bbCseGen != nullptr) |
| 864 | { |
| 865 | if (!headerPrinted) |
| 866 | { |
| 867 | printf("\nBlocks that generate CSE def/uses\n" ); |
| 868 | headerPrinted = true; |
| 869 | } |
| 870 | printf(FMT_BB, block->bbNum); |
| 871 | printf(" cseGen = %s\n" , genES2str(cseTraits, block->bbCseGen)); |
| 872 | } |
| 873 | } |
| 874 | } |
| 875 | |
| 876 | fgDebugCheckLinks(); |
| 877 | |
| 878 | #endif // DEBUG |
| 879 | } |
| 880 | |
| 881 | /***************************************************************************** |
| 882 | * |
| 883 | * CSE Dataflow, so that all helper methods for dataflow are in a single place |
| 884 | * |
| 885 | */ |
| 886 | class CSE_DataFlow |
| 887 | { |
| 888 | BitVecTraits* m_pBitVecTraits; |
| 889 | EXPSET_TP m_preMergeOut; |
| 890 | |
| 891 | public: |
| 892 | CSE_DataFlow(Compiler* pCompiler) : m_pBitVecTraits(pCompiler->cseTraits), m_preMergeOut(BitVecOps::UninitVal()) |
| 893 | { |
| 894 | } |
| 895 | |
| 896 | // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.) |
| 897 | void StartMerge(BasicBlock* block) |
| 898 | { |
| 899 | BitVecOps::Assign(m_pBitVecTraits, m_preMergeOut, block->bbCseOut); |
| 900 | } |
| 901 | |
| 902 | // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags. |
| 903 | void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds) |
| 904 | { |
| 905 | BitVecOps::IntersectionD(m_pBitVecTraits, block->bbCseIn, predBlock->bbCseOut); |
| 906 | } |
| 907 | |
| 908 | // At the end of the merge store results of the dataflow equations, in a postmerge state. |
| 909 | bool EndMerge(BasicBlock* block) |
| 910 | { |
| 911 | BitVecOps::DataFlowD(m_pBitVecTraits, block->bbCseOut, block->bbCseGen, block->bbCseIn); |
| 912 | return !BitVecOps::Equal(m_pBitVecTraits, block->bbCseOut, m_preMergeOut); |
| 913 | } |
| 914 | }; |
| 915 | |
| 916 | /***************************************************************************** |
| 917 | * |
| 918 | * Perform a DataFlow forward analysis using the block CSE bitsets: |
| 919 | * Inputs: |
| 920 | * bbCseGen - Exact CSEs that are become available within the block |
| 921 | * bbCseIn - Maximal estimate of CSEs that are/could be available at input to the block |
| 922 | * bbCseOut - Maximal estimate of CSEs that are/could be available at exit to the block |
| 923 | * |
| 924 | * Outputs: |
| 925 | * bbCseIn - Computed CSEs that are available at input to the block |
| 926 | * bbCseOut - Computed CSEs that are available at exit to the block |
| 927 | */ |
| 928 | |
| 929 | void Compiler::optValnumCSE_DataFlow() |
| 930 | { |
| 931 | CSE_DataFlow cse(this); |
| 932 | |
| 933 | // Modified dataflow algorithm for available expressions. |
| 934 | DataFlow cse_flow(this); |
| 935 | |
| 936 | cse_flow.ForwardAnalysis(cse); |
| 937 | |
| 938 | #ifdef DEBUG |
| 939 | if (verbose) |
| 940 | { |
| 941 | printf("\nAfter performing DataFlow for ValnumCSE's\n" ); |
| 942 | |
| 943 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 944 | { |
| 945 | printf(FMT_BB, block->bbNum); |
| 946 | printf(" cseIn = %s" , genES2str(cseTraits, block->bbCseIn)); |
| 947 | printf(" cseOut = %s" , genES2str(cseTraits, block->bbCseOut)); |
| 948 | printf("\n" ); |
| 949 | } |
| 950 | |
| 951 | printf("\n" ); |
| 952 | } |
| 953 | #endif // DEBUG |
| 954 | } |
| 955 | |
| 956 | //--------------------------------------------------------------------------- |
| 957 | // optValnumCSE_Availablity: |
| 958 | // |
| 959 | // Using the information computed by CSE_DataFlow determine for each |
| 960 | // CSE whether the CSE is a definition (if the CSE was not available) |
| 961 | // or if the CSE is a use (if the CSE was previously made available) |
| 962 | // The implementation iterates of all blocks setting 'available_cses' |
| 963 | // to the CSEs that are available at input to the block. |
| 964 | // When a CSE expression is encountered it is classified as either |
| 965 | // as a definition (if the CSE is not in the 'available_cses' set) or |
| 966 | // as a use (if the CSE is in the 'available_cses' set). If the CSE |
| 967 | // is a definition then it is added to the 'available_cses' set. |
| 968 | // |
| 969 | // This algorithm uncovers the defs and uses gradually and as it does |
| 970 | // so it also builds the exception set that all defs make: 'defExcSetCurrent' |
| 971 | // and the exception set that the uses we have seen depend upon: 'defExcSetPromise' |
| 972 | // |
| 973 | // Typically expressions with the same normal ValueNum generate exactly the |
| 974 | // same exception sets. There are two way that we can get different exception |
| 975 | // sets with the same Normal value number. |
| 976 | // |
| 977 | // 1. We used an arithmetic identiity: |
| 978 | // e.g. (p.a + q.b) * 0 :: The normal value for the expression is zero |
| 979 | // and we have NullPtrExc(p) and NullPtrExc(q) |
| 980 | // e.g. (p.a - p.a) :: The normal value for the expression is zero |
| 981 | // and we have NullPtrExc(p) |
| 982 | // 2. We stored an expression into a LclVar or into Memory and read it later |
| 983 | // e.g. t = p.a; |
| 984 | // e1 = (t + q.b) :: e1 has one NullPtrExc and e2 has two. |
| 985 | // e2 = (p.a + q.b) but both compute the same normal value// |
| 986 | // e.g. m.a = p.a; |
| 987 | // e1 = (m.a + q.b) :: e1 and e2 have different exception sets. |
| 988 | // e2 = (p.a + q.b) but both compute the same normal value |
| 989 | // |
| 990 | // |
| 991 | void Compiler::optValnumCSE_Availablity() |
| 992 | { |
| 993 | #ifdef DEBUG |
| 994 | if (verbose) |
| 995 | { |
| 996 | printf("Labeling the CSEs with Use/Def information\n" ); |
| 997 | } |
| 998 | #endif |
| 999 | EXPSET_TP available_cses = BitVecOps::MakeEmpty(cseTraits); |
| 1000 | |
| 1001 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 1002 | { |
| 1003 | GenTree* stmt; |
| 1004 | GenTree* tree; |
| 1005 | |
| 1006 | // Make the block publicly available |
| 1007 | |
| 1008 | compCurBB = block; |
| 1009 | |
| 1010 | // Retrieve the available CSE's at the start of this block |
| 1011 | |
| 1012 | BitVecOps::Assign(cseTraits, available_cses, block->bbCseIn); |
| 1013 | |
| 1014 | optCSEweight = block->getBBWeight(this); |
| 1015 | |
| 1016 | // Walk the statement trees in this basic block |
| 1017 | |
| 1018 | for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) |
| 1019 | { |
| 1020 | noway_assert(stmt->gtOper == GT_STMT); |
| 1021 | |
| 1022 | // We walk the tree in the forwards direction (bottom up) |
| 1023 | |
| 1024 | for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) |
| 1025 | { |
| 1026 | if (IS_CSE_INDEX(tree->gtCSEnum)) |
| 1027 | { |
| 1028 | unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum); |
| 1029 | unsigned int cseBit = genCSEnum2bit(CSEnum); |
| 1030 | CSEdsc* desc = optCSEfindDsc(CSEnum); |
| 1031 | unsigned stmw = block->getBBWeight(this); |
| 1032 | bool isUse = BitVecOps::IsMember(cseTraits, available_cses, cseBit); |
| 1033 | bool isDef = !isUse; // If is isn't a CSE use, it is a CSE def |
| 1034 | #ifdef DEBUG |
| 1035 | VNFuncApp excSeq; |
| 1036 | |
| 1037 | if (verbose) |
| 1038 | { |
| 1039 | printf("BB%02u " , block->bbNum); |
| 1040 | printTreeID(tree); |
| 1041 | |
| 1042 | printf(" %s of CSE #%02u [weight=%s]\n" , isUse ? "Use" : "Def" , CSEnum, refCntWtd2str(stmw)); |
| 1043 | } |
| 1044 | #endif |
| 1045 | // Have we decided to abandon work on this CSE? |
| 1046 | if (desc->defExcSetPromise == ValueNumStore::NoVN) |
| 1047 | { |
| 1048 | // This candidate had defs with differing liberal exc set VNs |
| 1049 | // We have abandoned CSE promotion for this candidate |
| 1050 | |
| 1051 | // Clear the CSE flag |
| 1052 | tree->gtCSEnum = NO_CSE; |
| 1053 | |
| 1054 | JITDUMP(" Abandoned - CSE candidate has defs with different exception sets!\n" ); |
| 1055 | continue; |
| 1056 | } |
| 1057 | |
| 1058 | // Record the exception set for tree's liberal value number |
| 1059 | // |
| 1060 | ValueNum theLiberalExcSet = vnStore->VNExceptionSet(tree->gtVNPair.GetLiberal()); |
| 1061 | |
| 1062 | // Is this a CSE use or a def? |
| 1063 | |
| 1064 | if (isDef) |
| 1065 | { |
| 1066 | // @ToDo - Remove this block as it no longer applies |
| 1067 | if (tree->gtFlags & GTF_COLON_COND) |
| 1068 | { |
| 1069 | // We can't create CSE definitions inside QMARK-COLON trees |
| 1070 | tree->gtCSEnum = NO_CSE; |
| 1071 | |
| 1072 | JITDUMP(" NO_CSE - This CSE def occurs in a GTF_COLON_COND!\n" ); |
| 1073 | continue; |
| 1074 | } |
| 1075 | |
| 1076 | // This is a CSE def |
| 1077 | |
| 1078 | // Is defExcSetCurrent still set to the uninit marker value of VNForNull() ? |
| 1079 | if (desc->defExcSetCurrent == vnStore->VNForNull()) |
| 1080 | { |
| 1081 | // This is the first time visited, so record this defs exeception set |
| 1082 | desc->defExcSetCurrent = theLiberalExcSet; |
| 1083 | } |
| 1084 | |
| 1085 | // Have we seen a CSE use and made a promise of an exception set? |
| 1086 | // |
| 1087 | if (desc->defExcSetPromise != vnStore->VNForEmptyExcSet()) |
| 1088 | { |
| 1089 | // The exeception set held in desc->defExcSetPromise must be a subset of theLiberalExcSet |
| 1090 | // |
| 1091 | if (vnStore->VNExcIsSubset(theLiberalExcSet, desc->defExcSetPromise)) |
| 1092 | { |
| 1093 | // This new def still satisfies any promise made to all the CSE uses that we have |
| 1094 | // encountered |
| 1095 | // |
| 1096 | |
| 1097 | // no update is needed when these are the same VN |
| 1098 | if (desc->defExcSetCurrent != theLiberalExcSet) |
| 1099 | { |
| 1100 | // We will change the value of desc->defExcSetCurrent to be the intersection of |
| 1101 | // these two sets. |
| 1102 | // This is the set of exceptions that all CSE defs have (that we have visted so far) |
| 1103 | // |
| 1104 | ValueNum intersectionExcSet = |
| 1105 | vnStore->VNExcSetIntersection(desc->defExcSetCurrent, theLiberalExcSet); |
| 1106 | #ifdef DEBUG |
| 1107 | if (this->verbose) |
| 1108 | { |
| 1109 | vnStore->GetVNFunc(desc->defExcSetCurrent, &excSeq); |
| 1110 | printf(">>> defExcSetCurrent is " ); |
| 1111 | vnStore->vnDumpExcSeq(this, &excSeq, true); |
| 1112 | printf("\n" ); |
| 1113 | |
| 1114 | vnStore->GetVNFunc(theLiberalExcSet, &excSeq); |
| 1115 | printf(">>> theLiberalExcSet is " ); |
| 1116 | vnStore->vnDumpExcSeq(this, &excSeq, true); |
| 1117 | printf("\n" ); |
| 1118 | |
| 1119 | if (intersectionExcSet == vnStore->VNForEmptyExcSet()) |
| 1120 | { |
| 1121 | printf(">>> the intersectionExcSet is the EmptyExcSet\n" ); |
| 1122 | } |
| 1123 | else |
| 1124 | { |
| 1125 | vnStore->GetVNFunc(intersectionExcSet, &excSeq); |
| 1126 | printf(">>> the intersectionExcSet is " ); |
| 1127 | vnStore->vnDumpExcSeq(this, &excSeq, true); |
| 1128 | printf("\n" ); |
| 1129 | } |
| 1130 | } |
| 1131 | #endif // DEBUG |
| 1132 | // Change the defExcSetCurrent to be a subset of its prior value |
| 1133 | // |
| 1134 | assert(vnStore->VNExcIsSubset(desc->defExcSetCurrent, intersectionExcSet)); |
| 1135 | desc->defExcSetCurrent = intersectionExcSet; |
| 1136 | } |
| 1137 | } |
| 1138 | else // This CSE def doesn't satisfy one of the exceptions already promised to a CSE use |
| 1139 | { |
| 1140 | // So, we will abandon all CSE promotions for this candidate |
| 1141 | // |
| 1142 | // We use the marker value of NoVN to indicate that we |
| 1143 | // should abandon this CSE candidate |
| 1144 | // |
| 1145 | desc->defExcSetPromise = ValueNumStore::NoVN; |
| 1146 | tree->gtCSEnum = NO_CSE; |
| 1147 | |
| 1148 | JITDUMP(" Abandon - CSE candidate has defs with exception sets that do not satisfy " |
| 1149 | "some CSE use\n" ); |
| 1150 | continue; |
| 1151 | } |
| 1152 | } |
| 1153 | |
| 1154 | // Record or update the value of desc->defConservNormVN |
| 1155 | // |
| 1156 | ValueNum theConservNormVN = vnStore->VNConservativeNormalValue(tree->gtVNPair); |
| 1157 | |
| 1158 | // Is defConservNormVN still set to the uninit marker value of VNForNull() ? |
| 1159 | if (desc->defConservNormVN == vnStore->VNForNull()) |
| 1160 | { |
| 1161 | // This is the first def that we have visited, set defConservNormVN |
| 1162 | desc->defConservNormVN = theConservNormVN; |
| 1163 | } |
| 1164 | else |
| 1165 | { |
| 1166 | // Check to see if all defs have the same conservative normal VN |
| 1167 | if (theConservNormVN != desc->defConservNormVN) |
| 1168 | { |
| 1169 | // This candidate has defs with differing conservative normal VNs, mark it with NoVN |
| 1170 | desc->defConservNormVN = ValueNumStore::NoVN; // record the marker for differing VNs |
| 1171 | } |
| 1172 | } |
| 1173 | |
| 1174 | // If we get here we have accepted this node as a valid CSE def |
| 1175 | |
| 1176 | desc->csdDefCount += 1; |
| 1177 | desc->csdDefWtCnt += stmw; |
| 1178 | |
| 1179 | // Mark the node as a CSE definition |
| 1180 | |
| 1181 | tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum); |
| 1182 | |
| 1183 | // This CSE becomes available after this def |
| 1184 | BitVecOps::AddElemD(cseTraits, available_cses, cseBit); |
| 1185 | } |
| 1186 | else // We are visiting a CSE use |
| 1187 | { |
| 1188 | assert(isUse); |
| 1189 | |
| 1190 | // If the CSE use has no requirements for an exception set then we don't have to do anything |
| 1191 | // here |
| 1192 | // |
| 1193 | if (theLiberalExcSet != vnStore->VNForEmptyExcSet()) |
| 1194 | { |
| 1195 | // Are we visiting a use first, before visiting any defs of this CSE? |
| 1196 | // This is an atypical case that can occur with a bottom tested loop. |
| 1197 | // |
| 1198 | // Is defExcSetCurrent still set to the uninit marker value of VNForNull() ? |
| 1199 | if (desc->defExcSetCurrent == vnStore->VNForNull()) |
| 1200 | { |
| 1201 | // Update defExcSetPromise, this is our required exception set for all CSE defs |
| 1202 | // that we encounter later. |
| 1203 | // |
| 1204 | // We could see multiple uses before a def, so we require the Union of all exception |
| 1205 | // sets |
| 1206 | // |
| 1207 | desc->defExcSetPromise = |
| 1208 | vnStore->VNExcSetUnion(desc->defExcSetPromise, theLiberalExcSet); |
| 1209 | } |
| 1210 | else // we have already seen a def for this CSE and defExcSetCurrent is setup |
| 1211 | { |
| 1212 | if (vnStore->VNExcIsSubset(desc->defExcSetCurrent, theLiberalExcSet)) |
| 1213 | { |
| 1214 | // The current set of exceptions produced by all CSE defs have (that we have visted |
| 1215 | // so far) |
| 1216 | // meets our requirement |
| 1217 | // |
| 1218 | // Add any exception items to the defExcSetPromise set |
| 1219 | // |
| 1220 | desc->defExcSetPromise = |
| 1221 | vnStore->VNExcSetUnion(desc->defExcSetPromise, theLiberalExcSet); |
| 1222 | } |
| 1223 | } |
| 1224 | |
| 1225 | // At this point defExcSetPromise contains all of the exception items that we can promise |
| 1226 | // here. |
| 1227 | // |
| 1228 | if (!vnStore->VNExcIsSubset(desc->defExcSetPromise, theLiberalExcSet)) |
| 1229 | { |
| 1230 | // We can't safely make this into a CSE use, because this |
| 1231 | // CSE use has an exeception set item that is not promised |
| 1232 | // by all of our CSE defs. |
| 1233 | // |
| 1234 | // We will omit this CSE use from the graph and proceed, |
| 1235 | // the other uses and defs can still participate in the CSE optimization. |
| 1236 | |
| 1237 | // So this can't be a CSE use |
| 1238 | tree->gtCSEnum = NO_CSE; |
| 1239 | |
| 1240 | JITDUMP( |
| 1241 | " NO_CSE - This use has an exception set item that isn't contained in the defs!\n" ); |
| 1242 | continue; |
| 1243 | } |
| 1244 | } |
| 1245 | |
| 1246 | // When we get here we have accepted this node as a valid CSE use |
| 1247 | |
| 1248 | desc->csdUseCount += 1; |
| 1249 | desc->csdUseWtCnt += stmw; |
| 1250 | } |
| 1251 | } |
| 1252 | } |
| 1253 | } |
| 1254 | } |
| 1255 | } |
| 1256 | |
| 1257 | // The following class handles the CSE heuristics |
| 1258 | // we use a complex set of heuristic rules |
| 1259 | // to determine if it is likely to be profitable to perform this CSE |
| 1260 | // |
| 1261 | class CSE_Heuristic |
| 1262 | { |
| 1263 | Compiler* m_pCompiler; |
| 1264 | unsigned m_addCSEcount; |
| 1265 | |
| 1266 | unsigned aggressiveRefCnt; |
| 1267 | unsigned moderateRefCnt; |
| 1268 | unsigned enregCount; // count of the number of enregisterable variables |
| 1269 | bool largeFrame; |
| 1270 | bool hugeFrame; |
| 1271 | Compiler::codeOptimize codeOptKind; |
| 1272 | Compiler::CSEdsc** sortTab; |
| 1273 | size_t sortSiz; |
| 1274 | #ifdef DEBUG |
| 1275 | CLRRandom m_cseRNG; |
| 1276 | unsigned m_bias; |
| 1277 | #endif |
| 1278 | |
| 1279 | public: |
| 1280 | CSE_Heuristic(Compiler* pCompiler) : m_pCompiler(pCompiler) |
| 1281 | { |
| 1282 | codeOptKind = m_pCompiler->compCodeOpt(); |
| 1283 | } |
| 1284 | |
| 1285 | Compiler::codeOptimize CodeOptKind() |
| 1286 | { |
| 1287 | return codeOptKind; |
| 1288 | } |
| 1289 | |
| 1290 | // Perform the Initialization step for our CSE Heuristics |
| 1291 | // determine the various cut off values to use for |
| 1292 | // the aggressive, moderate and conservative CSE promotions |
| 1293 | // count the number of enregisterable variables |
| 1294 | // determine if the method has a large or huge stack frame. |
| 1295 | // |
| 1296 | void Initialize() |
| 1297 | { |
| 1298 | m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */ |
| 1299 | |
| 1300 | // Record the weighted ref count of the last "for sure" callee saved LclVar |
| 1301 | aggressiveRefCnt = 0; |
| 1302 | moderateRefCnt = 0; |
| 1303 | enregCount = 0; |
| 1304 | largeFrame = false; |
| 1305 | hugeFrame = false; |
| 1306 | sortTab = nullptr; |
| 1307 | sortSiz = 0; |
| 1308 | |
| 1309 | #ifdef _TARGET_XARCH_ |
| 1310 | if (m_pCompiler->compLongUsed) |
| 1311 | { |
| 1312 | enregCount++; |
| 1313 | } |
| 1314 | #endif |
| 1315 | |
| 1316 | unsigned frameSize = 0; |
| 1317 | unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1); |
| 1318 | unsigned lclNum; |
| 1319 | LclVarDsc* varDsc; |
| 1320 | |
| 1321 | for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++) |
| 1322 | { |
| 1323 | if (varDsc->lvRefCnt() == 0) |
| 1324 | { |
| 1325 | continue; |
| 1326 | } |
| 1327 | |
| 1328 | #if FEATURE_FIXED_OUT_ARGS |
| 1329 | // Skip the OutgoingArgArea in computing frame size, since |
| 1330 | // its size is not yet known and it doesn't affect local |
| 1331 | // offsets from the frame pointer (though it may affect |
| 1332 | // them from the stack pointer). |
| 1333 | noway_assert(m_pCompiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); |
| 1334 | if (lclNum == m_pCompiler->lvaOutgoingArgSpaceVar) |
| 1335 | { |
| 1336 | continue; |
| 1337 | } |
| 1338 | #endif // FEATURE_FIXED_OUT_ARGS |
| 1339 | |
| 1340 | bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home |
| 1341 | |
| 1342 | // Some LclVars always have stack homes |
| 1343 | if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK)) |
| 1344 | { |
| 1345 | onStack = true; |
| 1346 | } |
| 1347 | |
| 1348 | #ifdef _TARGET_X86_ |
| 1349 | // Treat floating point and 64 bit integers as always on the stack |
| 1350 | if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet())) |
| 1351 | onStack = true; |
| 1352 | #endif |
| 1353 | |
| 1354 | if (onStack) |
| 1355 | { |
| 1356 | frameSize += m_pCompiler->lvaLclSize(lclNum); |
| 1357 | } |
| 1358 | else |
| 1359 | { |
| 1360 | // For the purposes of estimating the frameSize we |
| 1361 | // will consider this LclVar as being enregistered. |
| 1362 | // Now we reduce the remaining regAvailEstimate by |
| 1363 | // an appropriate amount. |
| 1364 | if (varDsc->lvRefCnt() <= 2) |
| 1365 | { |
| 1366 | // a single use single def LclVar only uses 1 |
| 1367 | regAvailEstimate -= 1; |
| 1368 | } |
| 1369 | else |
| 1370 | { |
| 1371 | // a LclVar with multiple uses and defs uses 2 |
| 1372 | if (regAvailEstimate >= 2) |
| 1373 | { |
| 1374 | regAvailEstimate -= 2; |
| 1375 | } |
| 1376 | else |
| 1377 | { |
| 1378 | // Don't try to subtract when regAvailEstimate is 1 |
| 1379 | regAvailEstimate = 0; |
| 1380 | } |
| 1381 | } |
| 1382 | } |
| 1383 | #ifdef _TARGET_XARCH_ |
| 1384 | if (frameSize > 0x080) |
| 1385 | { |
| 1386 | // We likely have a large stack frame. |
| 1387 | // Thus we might need to use large displacements when loading or storing |
| 1388 | // to CSE LclVars that are not enregistered |
| 1389 | largeFrame = true; |
| 1390 | break; // early out, we don't need to keep increasing frameSize |
| 1391 | } |
| 1392 | #else // _TARGET_ARM_ |
| 1393 | if (frameSize > 0x0400) |
| 1394 | { |
| 1395 | largeFrame = true; |
| 1396 | } |
| 1397 | if (frameSize > 0x10000) |
| 1398 | { |
| 1399 | hugeFrame = true; |
| 1400 | break; |
| 1401 | } |
| 1402 | #endif |
| 1403 | } |
| 1404 | |
| 1405 | unsigned sortNum = 0; |
| 1406 | while (sortNum < m_pCompiler->lvaTrackedCount) |
| 1407 | { |
| 1408 | LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++]; |
| 1409 | var_types varTyp = varDsc->TypeGet(); |
| 1410 | |
| 1411 | if (varDsc->lvDoNotEnregister) |
| 1412 | { |
| 1413 | continue; |
| 1414 | } |
| 1415 | |
| 1416 | if (!varTypeIsFloating(varTyp)) |
| 1417 | { |
| 1418 | // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior. |
| 1419 | // Note that this makes genTypeStSz return 1. |
| 1420 | if (varTypeIsStruct(varTyp)) |
| 1421 | { |
| 1422 | varTyp = TYP_STRUCT; |
| 1423 | } |
| 1424 | enregCount += genTypeStSz(varTyp); |
| 1425 | } |
| 1426 | |
| 1427 | if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG * 3 / 2))) |
| 1428 | { |
| 1429 | if (CodeOptKind() == Compiler::SMALL_CODE) |
| 1430 | { |
| 1431 | aggressiveRefCnt = varDsc->lvRefCnt() + BB_UNITY_WEIGHT; |
| 1432 | } |
| 1433 | else |
| 1434 | { |
| 1435 | aggressiveRefCnt = varDsc->lvRefCntWtd() + BB_UNITY_WEIGHT; |
| 1436 | } |
| 1437 | } |
| 1438 | if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2)))) |
| 1439 | { |
| 1440 | if (CodeOptKind() == Compiler::SMALL_CODE) |
| 1441 | { |
| 1442 | moderateRefCnt = varDsc->lvRefCnt(); |
| 1443 | } |
| 1444 | else |
| 1445 | { |
| 1446 | moderateRefCnt = varDsc->lvRefCntWtd(); |
| 1447 | } |
| 1448 | } |
| 1449 | } |
| 1450 | unsigned mult = 3; |
| 1451 | // use smaller value for mult when enregCount is in [0..4] |
| 1452 | if (enregCount <= 4) |
| 1453 | { |
| 1454 | mult = (enregCount <= 2) ? 1 : 2; |
| 1455 | } |
| 1456 | |
| 1457 | aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt); |
| 1458 | moderateRefCnt = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt); |
| 1459 | |
| 1460 | #ifdef DEBUG |
| 1461 | if (m_pCompiler->verbose) |
| 1462 | { |
| 1463 | printf("\n" ); |
| 1464 | printf("Aggressive CSE Promotion cutoff is %u\n" , aggressiveRefCnt); |
| 1465 | printf("Moderate CSE Promotion cutoff is %u\n" , moderateRefCnt); |
| 1466 | printf("Framesize estimate is 0x%04X\n" , frameSize); |
| 1467 | printf("We have a %s frame\n" , hugeFrame ? "huge" : (largeFrame ? "large" : "small" )); |
| 1468 | } |
| 1469 | #endif |
| 1470 | } |
| 1471 | |
| 1472 | void SortCandidates() |
| 1473 | { |
| 1474 | /* Create an expression table sorted by decreasing cost */ |
| 1475 | sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount]; |
| 1476 | |
| 1477 | sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab); |
| 1478 | memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); |
| 1479 | |
| 1480 | if (CodeOptKind() == Compiler::SMALL_CODE) |
| 1481 | { |
| 1482 | qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz); |
| 1483 | } |
| 1484 | else |
| 1485 | { |
| 1486 | qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx); |
| 1487 | } |
| 1488 | |
| 1489 | #ifdef DEBUG |
| 1490 | if (m_pCompiler->verbose) |
| 1491 | { |
| 1492 | printf("\nSorted CSE candidates:\n" ); |
| 1493 | /* Print out the CSE candidates */ |
| 1494 | EXPSET_TP tempMask; |
| 1495 | for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++) |
| 1496 | { |
| 1497 | Compiler::CSEdsc* dsc = sortTab[cnt]; |
| 1498 | GenTree* expr = dsc->csdTree; |
| 1499 | |
| 1500 | unsigned def; |
| 1501 | unsigned use; |
| 1502 | |
| 1503 | if (CodeOptKind() == Compiler::SMALL_CODE) |
| 1504 | { |
| 1505 | def = dsc->csdDefCount; // def count |
| 1506 | use = dsc->csdUseCount; // use count (excluding the implicit uses at defs) |
| 1507 | } |
| 1508 | else |
| 1509 | { |
| 1510 | def = dsc->csdDefWtCnt; // weighted def count |
| 1511 | use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) |
| 1512 | } |
| 1513 | |
| 1514 | tempMask = BitVecOps::MakeSingleton(m_pCompiler->cseTraits, genCSEnum2bit(dsc->csdIndex)); |
| 1515 | printf("CSE #%02u, {$%-3x, $%-3x} cseMask=%s,useCnt=%d: [def=%3u, use=%3u" , dsc->csdIndex, |
| 1516 | dsc->csdHashKey, dsc->defExcSetPromise, genES2str(m_pCompiler->cseTraits, tempMask), |
| 1517 | dsc->csdUseCount, def, use); |
| 1518 | printf("] :: " ); |
| 1519 | m_pCompiler->gtDispTree(expr, nullptr, nullptr, true); |
| 1520 | } |
| 1521 | printf("\n" ); |
| 1522 | } |
| 1523 | #endif // DEBUG |
| 1524 | } |
| 1525 | |
| 1526 | // The following class nested within CSE_Heuristic encapsulates the information |
| 1527 | // about the current CSE candidate that is under consideration |
| 1528 | // |
| 1529 | // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation |
| 1530 | // and needs to be reworked for the Value Number based implementation |
| 1531 | // |
| 1532 | class CSE_Candidate |
| 1533 | { |
| 1534 | CSE_Heuristic* m_context; |
| 1535 | Compiler::CSEdsc* m_CseDsc; |
| 1536 | |
| 1537 | unsigned m_cseIndex; |
| 1538 | |
| 1539 | unsigned m_defCount; |
| 1540 | unsigned m_useCount; |
| 1541 | |
| 1542 | unsigned m_Cost; |
| 1543 | unsigned m_Size; |
| 1544 | |
| 1545 | public: |
| 1546 | CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) : m_context(context), m_CseDsc(cseDsc) |
| 1547 | { |
| 1548 | m_cseIndex = m_CseDsc->csdIndex; |
| 1549 | } |
| 1550 | |
| 1551 | Compiler::CSEdsc* CseDsc() |
| 1552 | { |
| 1553 | return m_CseDsc; |
| 1554 | } |
| 1555 | unsigned CseIndex() |
| 1556 | { |
| 1557 | return m_cseIndex; |
| 1558 | } |
| 1559 | unsigned DefCount() |
| 1560 | { |
| 1561 | return m_defCount; |
| 1562 | } |
| 1563 | unsigned UseCount() |
| 1564 | { |
| 1565 | return m_useCount; |
| 1566 | } |
| 1567 | // TODO-CQ: With ValNum CSE's the Expr and its cost can vary. |
| 1568 | GenTree* Expr() |
| 1569 | { |
| 1570 | return m_CseDsc->csdTree; |
| 1571 | } |
| 1572 | unsigned Cost() |
| 1573 | { |
| 1574 | return m_Cost; |
| 1575 | } |
| 1576 | unsigned Size() |
| 1577 | { |
| 1578 | return m_Size; |
| 1579 | } |
| 1580 | |
| 1581 | bool LiveAcrossCall() |
| 1582 | { |
| 1583 | return (m_CseDsc->csdLiveAcrossCall != 0); |
| 1584 | } |
| 1585 | |
| 1586 | void InitializeCounts() |
| 1587 | { |
| 1588 | if (m_context->CodeOptKind() == Compiler::SMALL_CODE) |
| 1589 | { |
| 1590 | m_Cost = Expr()->gtCostSz; // the estimated code size |
| 1591 | m_Size = Expr()->gtCostSz; // always the gtCostSz |
| 1592 | m_defCount = m_CseDsc->csdDefCount; // def count |
| 1593 | m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs) |
| 1594 | } |
| 1595 | else |
| 1596 | { |
| 1597 | m_Cost = Expr()->gtCostEx; // the estimated execution cost |
| 1598 | m_Size = Expr()->gtCostSz; // always the gtCostSz |
| 1599 | m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count |
| 1600 | m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) |
| 1601 | } |
| 1602 | } |
| 1603 | }; |
| 1604 | |
| 1605 | #ifdef DEBUG |
| 1606 | //------------------------------------------------------------------------ |
| 1607 | // optConfigBiasedCSE: |
| 1608 | // Stress mode to shuffle the decision to CSE or not using environment |
| 1609 | // variable COMPlus_JitStressBiasedCSE (= 0 to 100%). When the bias value |
| 1610 | // is not specified but COMPlus_JitStress is ON, generate a random bias. |
| 1611 | // |
| 1612 | // Return Value: |
| 1613 | // 0 -- This method is indifferent about this CSE (no bias specified and no stress) |
| 1614 | // 1 -- This CSE must be performed to maintain specified/generated bias. |
| 1615 | // -1 -- This CSE mustn't be performed to maintain specified/generated bias. |
| 1616 | // |
| 1617 | // Operation: |
| 1618 | // A debug stress only method that returns "1" with probability (P) |
| 1619 | // defined by: |
| 1620 | // |
| 1621 | // P = (COMPlus_JitStressBiasedCSE / 100) (or) |
| 1622 | // P = (random(100) / 100) when COMPlus_JitStress is specified and |
| 1623 | // COMPlus_JitStressBiasedCSE is unspecified. |
| 1624 | // |
| 1625 | // When specified, the bias is reinterpreted as a decimal number between 0 |
| 1626 | // to 100. |
| 1627 | // When bias is not specified, a bias is randomly generated if COMPlus_JitStress |
| 1628 | // is non-zero. |
| 1629 | // |
| 1630 | // Callers are supposed to call this method for each CSE promotion decision |
| 1631 | // and ignore the call if return value is 0 and honor the 1 with a CSE and |
| 1632 | // -1 with a no-CSE to maintain the specified/generated bias. |
| 1633 | // |
| 1634 | int optConfigBiasedCSE() |
| 1635 | { |
| 1636 | // Seed the PRNG, if never done before. |
| 1637 | if (!m_cseRNG.IsInitialized()) |
| 1638 | { |
| 1639 | m_cseRNG.Init(m_pCompiler->info.compMethodHash()); |
| 1640 | m_bias = m_cseRNG.Next(100); |
| 1641 | } |
| 1642 | |
| 1643 | // Obtain the bias value and reinterpret as decimal. |
| 1644 | unsigned bias = ReinterpretHexAsDecimal(JitConfig.JitStressBiasedCSE()); |
| 1645 | |
| 1646 | // Invalid value, check if JitStress is ON. |
| 1647 | if (bias > 100) |
| 1648 | { |
| 1649 | if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT)) |
| 1650 | { |
| 1651 | // JitStress is OFF for CSE, nothing to do. |
| 1652 | return 0; |
| 1653 | } |
| 1654 | bias = m_bias; |
| 1655 | JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n" , bias); |
| 1656 | } |
| 1657 | |
| 1658 | // Generate a number between (0, 99) and if the generated |
| 1659 | // number is smaller than bias, then perform CSE. |
| 1660 | unsigned gen = m_cseRNG.Next(100); |
| 1661 | int ret = (gen < bias) ? 1 : -1; |
| 1662 | |
| 1663 | if (m_pCompiler->verbose) |
| 1664 | { |
| 1665 | if (ret < 0) |
| 1666 | { |
| 1667 | printf("No CSE because gen=%d >= bias=%d\n" , gen, bias); |
| 1668 | } |
| 1669 | else |
| 1670 | { |
| 1671 | printf("Promoting CSE because gen=%d < bias=%d\n" , gen, bias); |
| 1672 | } |
| 1673 | } |
| 1674 | |
| 1675 | // Indicate whether to perform CSE or not. |
| 1676 | return ret; |
| 1677 | } |
| 1678 | #endif |
| 1679 | |
| 1680 | // Given a CSE candidate decide whether it passes or fails the profitability heuristic |
| 1681 | // return true if we believe that it is profitable to promote this candidate to a CSE |
| 1682 | // |
| 1683 | bool PromotionCheck(CSE_Candidate* candidate) |
| 1684 | { |
| 1685 | bool result = false; |
| 1686 | |
| 1687 | #ifdef DEBUG |
| 1688 | int stressResult = optConfigBiasedCSE(); |
| 1689 | if (stressResult != 0) |
| 1690 | { |
| 1691 | // Stress is enabled. Check whether to perform CSE or not. |
| 1692 | return (stressResult > 0); |
| 1693 | } |
| 1694 | |
| 1695 | if (m_pCompiler->optConfigDisableCSE2()) |
| 1696 | { |
| 1697 | return false; // skip this CSE |
| 1698 | } |
| 1699 | #endif |
| 1700 | |
| 1701 | /* |
| 1702 | Our calculation is based on the following cost estimate formula |
| 1703 | |
| 1704 | Existing costs are: |
| 1705 | |
| 1706 | (def + use) * cost |
| 1707 | |
| 1708 | If we introduce a CSE temp are each definition and |
| 1709 | replace the use with a CSE temp then our cost is: |
| 1710 | |
| 1711 | (def * (cost + cse-def-cost)) + (use * cse-use-cost) |
| 1712 | |
| 1713 | We must estimate the values to use for cse-def-cost and cse-use-cost |
| 1714 | |
| 1715 | If we are able to enregister the CSE then the cse-use-cost is one |
| 1716 | and cse-def-cost is either zero or one. Zero in the case where |
| 1717 | we needed to evaluate the def into a register and we can use that |
| 1718 | register as the CSE temp as well. |
| 1719 | |
| 1720 | If we are unable to enregister the CSE then the cse-use-cost is IND_COST |
| 1721 | and the cse-def-cost is also IND_COST. |
| 1722 | |
| 1723 | If we want to be conservative we use IND_COST as the the value |
| 1724 | for both cse-def-cost and cse-use-cost and then we never introduce |
| 1725 | a CSE that could pessimize the execution time of the method. |
| 1726 | |
| 1727 | If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the |
| 1728 | values for both cse-def-cost and cse-use-cost. |
| 1729 | |
| 1730 | If we want to be aggressive we use 1 as the values for both |
| 1731 | cse-def-cost and cse-use-cost. |
| 1732 | |
| 1733 | If we believe that the CSE very valuable in terms of weighted ref counts |
| 1734 | such that it would always be enregistered by the register allocator we choose |
| 1735 | the aggressive use def costs. |
| 1736 | |
| 1737 | If we believe that the CSE is somewhat valuable in terms of weighted ref counts |
| 1738 | such that it could be likely be enregistered by the register allocator we choose |
| 1739 | the moderate use def costs. |
| 1740 | |
| 1741 | otherwise we choose the conservative use def costs. |
| 1742 | |
| 1743 | */ |
| 1744 | |
| 1745 | unsigned cse_def_cost; |
| 1746 | unsigned cse_use_cost; |
| 1747 | |
| 1748 | unsigned no_cse_cost = 0; |
| 1749 | unsigned yes_cse_cost = 0; |
| 1750 | unsigned = 0; |
| 1751 | unsigned = 0; |
| 1752 | |
| 1753 | // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar |
| 1754 | // Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar |
| 1755 | unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount(); |
| 1756 | |
| 1757 | if (CodeOptKind() == Compiler::SMALL_CODE) |
| 1758 | { |
| 1759 | if (cseRefCnt >= aggressiveRefCnt) |
| 1760 | { |
| 1761 | #ifdef DEBUG |
| 1762 | if (m_pCompiler->verbose) |
| 1763 | { |
| 1764 | printf("Aggressive CSE Promotion (%u >= %u)\n" , cseRefCnt, aggressiveRefCnt); |
| 1765 | } |
| 1766 | #endif |
| 1767 | cse_def_cost = 1; |
| 1768 | cse_use_cost = 1; |
| 1769 | |
| 1770 | if (candidate->LiveAcrossCall() != 0) |
| 1771 | { |
| 1772 | if (largeFrame) |
| 1773 | { |
| 1774 | cse_def_cost++; |
| 1775 | cse_use_cost++; |
| 1776 | } |
| 1777 | if (hugeFrame) |
| 1778 | { |
| 1779 | cse_def_cost++; |
| 1780 | cse_use_cost++; |
| 1781 | } |
| 1782 | } |
| 1783 | } |
| 1784 | else if (largeFrame) |
| 1785 | { |
| 1786 | #ifdef DEBUG |
| 1787 | if (m_pCompiler->verbose) |
| 1788 | { |
| 1789 | printf("Codesize CSE Promotion (large frame)\n" ); |
| 1790 | } |
| 1791 | #endif |
| 1792 | #ifdef _TARGET_XARCH_ |
| 1793 | /* The following formula is good choice when optimizing CSE for SMALL_CODE */ |
| 1794 | cse_def_cost = 6; // mov [EBP-0x00001FC],reg |
| 1795 | cse_use_cost = 5; // [EBP-0x00001FC] |
| 1796 | #else // _TARGET_ARM_ |
| 1797 | if (hugeFrame) |
| 1798 | { |
| 1799 | cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10] |
| 1800 | cse_use_cost = 12; |
| 1801 | } |
| 1802 | else |
| 1803 | { |
| 1804 | cse_def_cost = 8; // movw r10 and str reg,[sp+r10] |
| 1805 | cse_use_cost = 8; |
| 1806 | } |
| 1807 | #endif |
| 1808 | } |
| 1809 | else // small frame |
| 1810 | { |
| 1811 | #ifdef DEBUG |
| 1812 | if (m_pCompiler->verbose) |
| 1813 | { |
| 1814 | printf("Codesize CSE Promotion (small frame)\n" ); |
| 1815 | } |
| 1816 | #endif |
| 1817 | #ifdef _TARGET_XARCH_ |
| 1818 | /* The following formula is good choice when optimizing CSE for SMALL_CODE */ |
| 1819 | cse_def_cost = 3; // mov [EBP-1C],reg |
| 1820 | cse_use_cost = 2; // [EBP-1C] |
| 1821 | #else // _TARGET_ARM_ |
| 1822 | cse_def_cost = 2; // str reg,[sp+0x9c] |
| 1823 | cse_use_cost = 2; // ldr reg,[sp+0x9c] |
| 1824 | #endif |
| 1825 | } |
| 1826 | } |
| 1827 | else // not SMALL_CODE ... |
| 1828 | { |
| 1829 | if (cseRefCnt >= aggressiveRefCnt) |
| 1830 | { |
| 1831 | #ifdef DEBUG |
| 1832 | if (m_pCompiler->verbose) |
| 1833 | { |
| 1834 | printf("Aggressive CSE Promotion (%u >= %u)\n" , cseRefCnt, aggressiveRefCnt); |
| 1835 | } |
| 1836 | #endif |
| 1837 | cse_def_cost = 1; |
| 1838 | cse_use_cost = 1; |
| 1839 | } |
| 1840 | else if (cseRefCnt >= moderateRefCnt) |
| 1841 | { |
| 1842 | |
| 1843 | if (candidate->LiveAcrossCall() == 0) |
| 1844 | { |
| 1845 | #ifdef DEBUG |
| 1846 | if (m_pCompiler->verbose) |
| 1847 | { |
| 1848 | printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n" , cseRefCnt, |
| 1849 | moderateRefCnt); |
| 1850 | } |
| 1851 | #endif |
| 1852 | cse_def_cost = 2; |
| 1853 | cse_use_cost = 1; |
| 1854 | } |
| 1855 | else // candidate is live across call |
| 1856 | { |
| 1857 | #ifdef DEBUG |
| 1858 | if (m_pCompiler->verbose) |
| 1859 | { |
| 1860 | printf("Moderate CSE Promotion (%u >= %u)\n" , cseRefCnt, moderateRefCnt); |
| 1861 | } |
| 1862 | #endif |
| 1863 | cse_def_cost = 2; |
| 1864 | cse_use_cost = 2; |
| 1865 | extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller |
| 1866 | // saved register |
| 1867 | } |
| 1868 | } |
| 1869 | else // Conservative CSE promotion |
| 1870 | { |
| 1871 | if (candidate->LiveAcrossCall() == 0) |
| 1872 | { |
| 1873 | #ifdef DEBUG |
| 1874 | if (m_pCompiler->verbose) |
| 1875 | { |
| 1876 | printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n" , cseRefCnt, |
| 1877 | moderateRefCnt); |
| 1878 | } |
| 1879 | #endif |
| 1880 | cse_def_cost = 2; |
| 1881 | cse_use_cost = 2; |
| 1882 | } |
| 1883 | else // candidate is live across call |
| 1884 | { |
| 1885 | #ifdef DEBUG |
| 1886 | if (m_pCompiler->verbose) |
| 1887 | { |
| 1888 | printf("Conservative CSE Promotion (%u < %u)\n" , cseRefCnt, moderateRefCnt); |
| 1889 | } |
| 1890 | #endif |
| 1891 | cse_def_cost = 3; |
| 1892 | cse_use_cost = 3; |
| 1893 | extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller |
| 1894 | // saved register |
| 1895 | } |
| 1896 | |
| 1897 | // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable |
| 1898 | if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED) |
| 1899 | { |
| 1900 | cse_def_cost++; |
| 1901 | cse_use_cost++; |
| 1902 | } |
| 1903 | } |
| 1904 | |
| 1905 | if (largeFrame) |
| 1906 | { |
| 1907 | cse_def_cost++; |
| 1908 | cse_use_cost++; |
| 1909 | } |
| 1910 | if (hugeFrame) |
| 1911 | { |
| 1912 | cse_def_cost++; |
| 1913 | cse_use_cost++; |
| 1914 | } |
| 1915 | } |
| 1916 | |
| 1917 | // estimate the cost from lost codesize reduction if we do not perform the CSE |
| 1918 | if (candidate->Size() > cse_use_cost) |
| 1919 | { |
| 1920 | Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the |
| 1921 | // weighted count |
| 1922 | extra_no_cost = candidate->Size() - cse_use_cost; |
| 1923 | extra_no_cost = extra_no_cost * dsc->csdUseCount * 2; |
| 1924 | } |
| 1925 | |
| 1926 | /* no_cse_cost is the cost estimate when we decide not to make a CSE */ |
| 1927 | /* yes_cse_cost is the cost estimate when we decide to make a CSE */ |
| 1928 | |
| 1929 | no_cse_cost = candidate->UseCount() * candidate->Cost(); |
| 1930 | yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); |
| 1931 | |
| 1932 | no_cse_cost += extra_no_cost; |
| 1933 | yes_cse_cost += extra_yes_cost; |
| 1934 | |
| 1935 | #ifdef DEBUG |
| 1936 | if (m_pCompiler->verbose) |
| 1937 | { |
| 1938 | printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n" , cseRefCnt, aggressiveRefCnt, |
| 1939 | moderateRefCnt); |
| 1940 | printf("defCnt=%d, useCnt=%d, cost=%d, size=%d\n" , candidate->DefCount(), candidate->UseCount(), |
| 1941 | candidate->Cost(), candidate->Size()); |
| 1942 | printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n" , cse_def_cost, cse_use_cost, |
| 1943 | extra_no_cost, extra_yes_cost); |
| 1944 | |
| 1945 | printf("CSE cost savings check (%u >= %u) %s\n" , no_cse_cost, yes_cse_cost, |
| 1946 | (no_cse_cost >= yes_cse_cost) ? "passes" : "fails" ); |
| 1947 | } |
| 1948 | #endif |
| 1949 | |
| 1950 | // Should we make this candidate into a CSE? |
| 1951 | // Is the yes cost less than the no cost |
| 1952 | // |
| 1953 | if (yes_cse_cost <= no_cse_cost) |
| 1954 | { |
| 1955 | result = true; // Yes make this a CSE |
| 1956 | } |
| 1957 | else |
| 1958 | { |
| 1959 | /* In stress mode we will make some extra CSEs */ |
| 1960 | if (no_cse_cost > 0) |
| 1961 | { |
| 1962 | int percentage = (no_cse_cost * 100) / yes_cse_cost; |
| 1963 | |
| 1964 | if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage)) |
| 1965 | { |
| 1966 | result = true; // Yes make this a CSE |
| 1967 | } |
| 1968 | } |
| 1969 | } |
| 1970 | |
| 1971 | return result; |
| 1972 | } |
| 1973 | |
| 1974 | // PerformCSE() takes a successful candidate and performs the appropriate replacements: |
| 1975 | // |
| 1976 | // It will replace all of the CSE defs with assignments to a new "cse0" LclVar |
| 1977 | // and will replace all of the CSE uses with reads of the "cse0" LclVar |
| 1978 | // |
| 1979 | void PerformCSE(CSE_Candidate* successfulCandidate) |
| 1980 | { |
| 1981 | unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount(); |
| 1982 | |
| 1983 | if (successfulCandidate->LiveAcrossCall() != 0) |
| 1984 | { |
| 1985 | // As we introduce new LclVars for these CSE we slightly |
| 1986 | // increase the cutoffs for aggressive and moderate CSE's |
| 1987 | // |
| 1988 | int incr = BB_UNITY_WEIGHT; |
| 1989 | |
| 1990 | if (cseRefCnt > aggressiveRefCnt) |
| 1991 | { |
| 1992 | aggressiveRefCnt += incr; |
| 1993 | } |
| 1994 | |
| 1995 | if (cseRefCnt > moderateRefCnt) |
| 1996 | { |
| 1997 | moderateRefCnt += (incr / 2); |
| 1998 | } |
| 1999 | } |
| 2000 | |
| 2001 | /* Introduce a new temp for the CSE */ |
| 2002 | |
| 2003 | // we will create a long lifetime temp for the new cse LclVar |
| 2004 | unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE" )); |
| 2005 | var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); |
| 2006 | if (varTypeIsStruct(cseLclVarTyp)) |
| 2007 | { |
| 2008 | m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false); |
| 2009 | } |
| 2010 | m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp; |
| 2011 | m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true; |
| 2012 | |
| 2013 | // Record that we created a new LclVar for use as a CSE temp |
| 2014 | m_addCSEcount++; |
| 2015 | m_pCompiler->optCSEcount++; |
| 2016 | |
| 2017 | // Walk all references to this CSE, adding an assignment |
| 2018 | // to the CSE temp to all defs and changing all refs to |
| 2019 | // a simple use of the CSE temp. |
| 2020 | // |
| 2021 | // Later we will unmark any nested CSE's for the CSE uses. |
| 2022 | // |
| 2023 | Compiler::CSEdsc* dsc = successfulCandidate->CseDsc(); |
| 2024 | Compiler::treeStmtLst* lst; |
| 2025 | |
| 2026 | #ifdef DEBUG |
| 2027 | // Verify that all of the ValueNumbers in this list are correct as |
| 2028 | // Morph will change them when it performs a mutating operation. |
| 2029 | // |
| 2030 | ValueNum firstVN = ValueNumStore::NoVN; |
| 2031 | ValueNum currVN; |
| 2032 | bool allSame = true; |
| 2033 | |
| 2034 | lst = dsc->csdTreeList; |
| 2035 | while (lst != nullptr) |
| 2036 | { |
| 2037 | // Ignore this node if the gtCSEnum value has been cleared |
| 2038 | if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) |
| 2039 | { |
| 2040 | // We used the liberal Value numbers when building the set of CSE |
| 2041 | currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); |
| 2042 | assert(currVN != ValueNumStore::NoVN); |
| 2043 | |
| 2044 | if (firstVN == ValueNumStore::NoVN) |
| 2045 | { |
| 2046 | firstVN = currVN; |
| 2047 | } |
| 2048 | else if (currVN != firstVN) |
| 2049 | { |
| 2050 | allSame = false; |
| 2051 | break; |
| 2052 | } |
| 2053 | } |
| 2054 | lst = lst->tslNext; |
| 2055 | } |
| 2056 | if (!allSame) |
| 2057 | { |
| 2058 | lst = dsc->csdTreeList; |
| 2059 | GenTree* firstTree = lst->tslTree; |
| 2060 | printf("In %s, CSE (oper = %s, type = %s) has differing VNs: " , m_pCompiler->info.compFullName, |
| 2061 | GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet())); |
| 2062 | while (lst != nullptr) |
| 2063 | { |
| 2064 | if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) |
| 2065 | { |
| 2066 | currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); |
| 2067 | printf("0x%x(%s " FMT_VN ") " , lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "use" : "def" , |
| 2068 | currVN); |
| 2069 | } |
| 2070 | lst = lst->tslNext; |
| 2071 | } |
| 2072 | printf("\n" ); |
| 2073 | } |
| 2074 | #endif // DEBUG |
| 2075 | |
| 2076 | // Setup 'lst' to point at the start of this candidate list |
| 2077 | lst = dsc->csdTreeList; |
| 2078 | noway_assert(lst); |
| 2079 | |
| 2080 | do |
| 2081 | { |
| 2082 | /* Process the next node in the list */ |
| 2083 | GenTree* exp = lst->tslTree; |
| 2084 | GenTree* stm = lst->tslStmt; |
| 2085 | noway_assert(stm->gtOper == GT_STMT); |
| 2086 | BasicBlock* blk = lst->tslBlock; |
| 2087 | |
| 2088 | /* Advance to the next node in the list */ |
| 2089 | lst = lst->tslNext; |
| 2090 | |
| 2091 | // We may have cleared this CSE in optValuenumCSE_Availablity |
| 2092 | // due to different exception sets. |
| 2093 | // |
| 2094 | // Ignore this node if the gtCSEnum value has been cleared |
| 2095 | if (!IS_CSE_INDEX(exp->gtCSEnum)) |
| 2096 | { |
| 2097 | continue; |
| 2098 | } |
| 2099 | |
| 2100 | // Assert if we used DEBUG_DESTROY_NODE on this CSE exp |
| 2101 | assert(exp->gtOper != GT_COUNT); |
| 2102 | |
| 2103 | /* Make sure we update the weighted ref count correctly */ |
| 2104 | m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler); |
| 2105 | |
| 2106 | /* Figure out the actual type of the value */ |
| 2107 | var_types expTyp = genActualType(exp->TypeGet()); |
| 2108 | noway_assert(expTyp == cseLclVarTyp); |
| 2109 | |
| 2110 | // This will contain the replacement tree for exp |
| 2111 | // It will either be the CSE def or CSE ref |
| 2112 | // |
| 2113 | GenTree* cse = nullptr; |
| 2114 | bool isDef; |
| 2115 | FieldSeqNode* fldSeq = nullptr; |
| 2116 | bool hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq); |
| 2117 | |
| 2118 | if (IS_CSE_USE(exp->gtCSEnum)) |
| 2119 | { |
| 2120 | /* This is a use of the CSE */ |
| 2121 | isDef = false; |
| 2122 | #ifdef DEBUG |
| 2123 | if (m_pCompiler->verbose) |
| 2124 | { |
| 2125 | printf("\nWorking on the replacement of the CSE #%02u use at " , exp->gtCSEnum); |
| 2126 | Compiler::printTreeID(exp); |
| 2127 | printf(" in " FMT_BB "\n" , blk->bbNum); |
| 2128 | } |
| 2129 | #endif // DEBUG |
| 2130 | |
| 2131 | // We will replace the CSE ref with a new tree |
| 2132 | // this is typically just a simple use of the new CSE LclVar |
| 2133 | // |
| 2134 | ValueNumStore* vnStore = m_pCompiler->vnStore; |
| 2135 | cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); |
| 2136 | |
| 2137 | // assign the proper ValueNumber, A CSE use discards any exceptions |
| 2138 | cse->gtVNPair = vnStore->VNPNormalPair(exp->gtVNPair); |
| 2139 | |
| 2140 | ValueNum theConservativeVN = successfulCandidate->CseDsc()->defConservNormVN; |
| 2141 | |
| 2142 | if (theConservativeVN != ValueNumStore::NoVN) |
| 2143 | { |
| 2144 | // All defs of this CSE share the same normal conservative VN, and we are rewriting this |
| 2145 | // use to fetch the same value with no reload, so we can safely propagate that |
| 2146 | // conservative VN to this use. This can help range check elimination later on. |
| 2147 | cse->gtVNPair.SetConservative(theConservativeVN); |
| 2148 | |
| 2149 | // If the old VN was flagged as a checked bound, propagate that to the new VN |
| 2150 | // to make sure assertion prop will pay attention to this VN. |
| 2151 | ValueNum oldVN = exp->gtVNPair.GetConservative(); |
| 2152 | if (!vnStore->IsVNConstant(theConservativeVN) && vnStore->IsVNCheckedBound(oldVN)) |
| 2153 | { |
| 2154 | vnStore->SetVNIsCheckedBound(theConservativeVN); |
| 2155 | } |
| 2156 | |
| 2157 | GenTree* cmp; |
| 2158 | if ((m_pCompiler->optCseCheckedBoundMap != nullptr) && |
| 2159 | (m_pCompiler->optCseCheckedBoundMap->Lookup(exp, &cmp))) |
| 2160 | { |
| 2161 | // Propagate the new value number to this compare node as well, since |
| 2162 | // subsequent range check elimination will try to correlate it with |
| 2163 | // the other appearances that are getting CSEd. |
| 2164 | |
| 2165 | ValueNum oldCmpVN = cmp->gtVNPair.GetConservative(); |
| 2166 | ValueNum newCmpArgVN; |
| 2167 | |
| 2168 | ValueNumStore::CompareCheckedBoundArithInfo info; |
| 2169 | if (vnStore->IsVNCompareCheckedBound(oldCmpVN)) |
| 2170 | { |
| 2171 | // Comparison is against the bound directly. |
| 2172 | |
| 2173 | newCmpArgVN = theConservativeVN; |
| 2174 | vnStore->GetCompareCheckedBound(oldCmpVN, &info); |
| 2175 | } |
| 2176 | else |
| 2177 | { |
| 2178 | // Comparison is against the bound +/- some offset. |
| 2179 | |
| 2180 | assert(vnStore->IsVNCompareCheckedBoundArith(oldCmpVN)); |
| 2181 | vnStore->GetCompareCheckedBoundArithInfo(oldCmpVN, &info); |
| 2182 | newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper, |
| 2183 | info.arrOp, theConservativeVN); |
| 2184 | } |
| 2185 | ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper, |
| 2186 | info.cmpOp, newCmpArgVN); |
| 2187 | cmp->gtVNPair.SetConservative(newCmpVN); |
| 2188 | } |
| 2189 | } |
| 2190 | #ifdef DEBUG |
| 2191 | cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF; |
| 2192 | #endif // DEBUG |
| 2193 | |
| 2194 | // Now we need to unmark any nested CSE's uses that are found in 'exp' |
| 2195 | // As well we extract any nested CSE defs that are found in 'exp' and |
| 2196 | // these are appended to the sideEffList |
| 2197 | |
| 2198 | // Afterwards the set of nodes in the 'sideEffectList' are preserved and |
| 2199 | // all other nodes are removed and have their ref counts decremented |
| 2200 | // |
| 2201 | exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field |
| 2202 | |
| 2203 | GenTree* sideEffList = nullptr; |
| 2204 | m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS | GTF_IS_IN_CSE); |
| 2205 | |
| 2206 | // If we have any side effects or extracted CSE defs then we need to create a GT_COMMA tree instead |
| 2207 | // |
| 2208 | if (sideEffList != nullptr) |
| 2209 | { |
| 2210 | #ifdef DEBUG |
| 2211 | if (m_pCompiler->verbose) |
| 2212 | { |
| 2213 | printf("\nThis CSE use has side effects and/or nested CSE defs. The sideEffectList:\n" ); |
| 2214 | m_pCompiler->gtDispTree(sideEffList); |
| 2215 | printf("\n" ); |
| 2216 | } |
| 2217 | #endif |
| 2218 | |
| 2219 | GenTree* cseVal = cse; |
| 2220 | GenTree* curSideEff = sideEffList; |
| 2221 | ValueNumStore* vnStore = m_pCompiler->vnStore; |
| 2222 | ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet(); |
| 2223 | |
| 2224 | while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG)) |
| 2225 | { |
| 2226 | GenTree* op1 = curSideEff->gtOp.gtOp1; |
| 2227 | GenTree* op2 = curSideEff->gtOp.gtOp2; |
| 2228 | |
| 2229 | ValueNumPair op1vnp; |
| 2230 | ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet(); |
| 2231 | vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp); |
| 2232 | |
| 2233 | exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp); |
| 2234 | curSideEff = op2; |
| 2235 | } |
| 2236 | |
| 2237 | // We may have inserted a narrowing cast during a previous remorph |
| 2238 | // and it will not have a value number. |
| 2239 | if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined()) |
| 2240 | { |
| 2241 | // The inserted cast will have no exceptional effects |
| 2242 | assert(curSideEff->gtOverflow() == false); |
| 2243 | // Process the exception effects from the cast's operand. |
| 2244 | curSideEff = curSideEff->gtOp.gtOp1; |
| 2245 | } |
| 2246 | |
| 2247 | ValueNumPair op2vnp; |
| 2248 | ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet(); |
| 2249 | vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp); |
| 2250 | exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp); |
| 2251 | |
| 2252 | op2Xvnp = ValueNumStore::VNPForEmptyExcSet(); |
| 2253 | vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp); |
| 2254 | exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp); |
| 2255 | |
| 2256 | // Create a comma node with the sideEffList as op1 |
| 2257 | cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal); |
| 2258 | cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp); |
| 2259 | } |
| 2260 | } |
| 2261 | else |
| 2262 | { |
| 2263 | /* This is a def of the CSE */ |
| 2264 | isDef = true; |
| 2265 | #ifdef DEBUG |
| 2266 | if (m_pCompiler->verbose) |
| 2267 | { |
| 2268 | printf("\nCSE #%02u def at " , GET_CSE_INDEX(exp->gtCSEnum)); |
| 2269 | Compiler::printTreeID(exp); |
| 2270 | printf(" replaced in " FMT_BB " with def of V%02u\n" , blk->bbNum, cseLclVarNum); |
| 2271 | } |
| 2272 | #endif // DEBUG |
| 2273 | |
| 2274 | exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field |
| 2275 | |
| 2276 | GenTree* val = exp; |
| 2277 | |
| 2278 | /* Create an assignment of the value to the temp */ |
| 2279 | GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val); |
| 2280 | |
| 2281 | // assign the proper Value Numbers |
| 2282 | asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void |
| 2283 | asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val' |
| 2284 | |
| 2285 | noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR); |
| 2286 | noway_assert(asg->gtOp.gtOp2 == val); |
| 2287 | |
| 2288 | /* Create a reference to the CSE temp */ |
| 2289 | GenTree* ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); |
| 2290 | ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val' |
| 2291 | |
| 2292 | // If it has a zero-offset field seq, copy annotation to the ref |
| 2293 | if (hasZeroMapAnnotation) |
| 2294 | { |
| 2295 | m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq); |
| 2296 | } |
| 2297 | |
| 2298 | /* Create a comma node for the CSE assignment */ |
| 2299 | cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref); |
| 2300 | cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val' |
| 2301 | // as the assignment to the CSE LclVar |
| 2302 | // cannot add any new exceptions |
| 2303 | } |
| 2304 | |
| 2305 | // Walk the statement 'stm' and find the pointer |
| 2306 | // in the tree is pointing to 'exp' |
| 2307 | // |
| 2308 | GenTree** link = m_pCompiler->gtFindLink(stm, exp); |
| 2309 | |
| 2310 | #ifdef DEBUG |
| 2311 | if (link == nullptr) |
| 2312 | { |
| 2313 | printf("\ngtFindLink failed: stm=" ); |
| 2314 | Compiler::printTreeID(stm); |
| 2315 | printf(", exp=" ); |
| 2316 | Compiler::printTreeID(exp); |
| 2317 | printf("\n" ); |
| 2318 | printf("stm =" ); |
| 2319 | m_pCompiler->gtDispTree(stm); |
| 2320 | printf("\n" ); |
| 2321 | printf("exp =" ); |
| 2322 | m_pCompiler->gtDispTree(exp); |
| 2323 | printf("\n" ); |
| 2324 | } |
| 2325 | #endif // DEBUG |
| 2326 | |
| 2327 | noway_assert(link); |
| 2328 | |
| 2329 | // Mutate this link, thus replacing the old exp with the new cse representation |
| 2330 | // |
| 2331 | *link = cse; |
| 2332 | |
| 2333 | // If it has a zero-offset field seq, copy annotation. |
| 2334 | if (hasZeroMapAnnotation) |
| 2335 | { |
| 2336 | m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq); |
| 2337 | } |
| 2338 | |
| 2339 | assert(m_pCompiler->fgRemoveRestOfBlock == false); |
| 2340 | |
| 2341 | /* re-morph the statement */ |
| 2342 | m_pCompiler->fgMorphBlockStmt(blk, stm->AsStmt() DEBUGARG("optValnumCSE" )); |
| 2343 | |
| 2344 | } while (lst != nullptr); |
| 2345 | } |
| 2346 | |
| 2347 | // Consider each of the CSE candidates and if the CSE passes |
| 2348 | // the PromotionCheck then transform the CSE by calling PerformCSE |
| 2349 | // |
| 2350 | void ConsiderCandidates() |
| 2351 | { |
| 2352 | /* Consider each CSE candidate, in order of decreasing cost */ |
| 2353 | unsigned cnt = m_pCompiler->optCSECandidateCount; |
| 2354 | Compiler::CSEdsc** ptr = sortTab; |
| 2355 | for (; (cnt > 0); cnt--, ptr++) |
| 2356 | { |
| 2357 | Compiler::CSEdsc* dsc = *ptr; |
| 2358 | if (dsc->defExcSetPromise == ValueNumStore::NoVN) |
| 2359 | { |
| 2360 | JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n" ); |
| 2361 | continue; |
| 2362 | } |
| 2363 | |
| 2364 | CSE_Candidate candidate(this, dsc); |
| 2365 | |
| 2366 | candidate.InitializeCounts(); |
| 2367 | |
| 2368 | if (candidate.UseCount() == 0) |
| 2369 | { |
| 2370 | JITDUMP("Skipped CSE #%02u because use count is 0\n" , candidate.CseIndex()); |
| 2371 | continue; |
| 2372 | } |
| 2373 | |
| 2374 | #ifdef DEBUG |
| 2375 | if (m_pCompiler->verbose) |
| 2376 | { |
| 2377 | printf("\nConsidering CSE #%02u {$%-3x, $%-3x} [def=%2u, use=%2u, cost=%2u] CSE Expression:\n" , |
| 2378 | candidate.CseIndex(), dsc->csdHashKey, dsc->defExcSetPromise, candidate.DefCount(), |
| 2379 | candidate.UseCount(), candidate.Cost()); |
| 2380 | m_pCompiler->gtDispTree(candidate.Expr()); |
| 2381 | printf("\n" ); |
| 2382 | } |
| 2383 | #endif |
| 2384 | |
| 2385 | if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0)) |
| 2386 | { |
| 2387 | // If we reach this point, then the CSE def was incorrectly marked or the |
| 2388 | // block with this use is unreachable. So skip and go to the next CSE. |
| 2389 | // Without the "continue", we'd generate bad code in retail. |
| 2390 | // Commented out a noway_assert(false) here due to bug: 3290124. |
| 2391 | // The problem is if there is sub-graph that is not reachable from the |
| 2392 | // entry point, the CSE flags propagated, would be incorrect for it. |
| 2393 | continue; |
| 2394 | } |
| 2395 | |
| 2396 | bool doCSE = PromotionCheck(&candidate); |
| 2397 | |
| 2398 | #ifdef DEBUG |
| 2399 | if (m_pCompiler->verbose) |
| 2400 | { |
| 2401 | if (doCSE) |
| 2402 | { |
| 2403 | printf("\nPromoting CSE:\n" ); |
| 2404 | } |
| 2405 | else |
| 2406 | { |
| 2407 | printf("Did Not promote this CSE\n" ); |
| 2408 | } |
| 2409 | } |
| 2410 | #endif // DEBUG |
| 2411 | |
| 2412 | if (doCSE) |
| 2413 | { |
| 2414 | PerformCSE(&candidate); |
| 2415 | } |
| 2416 | } |
| 2417 | } |
| 2418 | |
| 2419 | // Perform the necessary cleanup after our CSE heuristics have run |
| 2420 | // |
| 2421 | void Cleanup() |
| 2422 | { |
| 2423 | // Nothing to do, currently. |
| 2424 | } |
| 2425 | }; |
| 2426 | |
| 2427 | /***************************************************************************** |
| 2428 | * |
| 2429 | * Routine for performing the Value Number based CSE using our heuristics |
| 2430 | */ |
| 2431 | |
| 2432 | void Compiler::optValnumCSE_Heuristic() |
| 2433 | { |
| 2434 | #ifdef DEBUG |
| 2435 | if (verbose) |
| 2436 | { |
| 2437 | printf("\n************ Trees at start of optValnumCSE_Heuristic()\n" ); |
| 2438 | fgDumpTrees(fgFirstBB, nullptr); |
| 2439 | printf("\n" ); |
| 2440 | } |
| 2441 | #endif // DEBUG |
| 2442 | |
| 2443 | CSE_Heuristic cse_heuristic(this); |
| 2444 | |
| 2445 | cse_heuristic.Initialize(); |
| 2446 | cse_heuristic.SortCandidates(); |
| 2447 | cse_heuristic.ConsiderCandidates(); |
| 2448 | cse_heuristic.Cleanup(); |
| 2449 | } |
| 2450 | |
| 2451 | /***************************************************************************** |
| 2452 | * |
| 2453 | * Perform common sub-expression elimination. |
| 2454 | */ |
| 2455 | |
| 2456 | void Compiler::optOptimizeValnumCSEs() |
| 2457 | { |
| 2458 | #ifdef DEBUG |
| 2459 | if (verbose) |
| 2460 | { |
| 2461 | printf("\n*************** In optOptimizeValnumCSEs()\n" ); |
| 2462 | } |
| 2463 | |
| 2464 | if (optConfigDisableCSE()) |
| 2465 | { |
| 2466 | return; // Disabled by JitNoCSE |
| 2467 | } |
| 2468 | #endif |
| 2469 | |
| 2470 | optValnumCSE_phase = true; |
| 2471 | |
| 2472 | /* Initialize the expression tracking logic */ |
| 2473 | |
| 2474 | optValnumCSE_Init(); |
| 2475 | |
| 2476 | /* Locate interesting expressions and assign indices to them */ |
| 2477 | |
| 2478 | if (optValnumCSE_Locate() > 0) |
| 2479 | { |
| 2480 | optCSECandidateTotal += optCSECandidateCount; |
| 2481 | |
| 2482 | optValnumCSE_InitDataFlow(); |
| 2483 | |
| 2484 | optValnumCSE_DataFlow(); |
| 2485 | |
| 2486 | optValnumCSE_Availablity(); |
| 2487 | |
| 2488 | optValnumCSE_Heuristic(); |
| 2489 | } |
| 2490 | |
| 2491 | optValnumCSE_phase = false; |
| 2492 | } |
| 2493 | |
| 2494 | #endif // FEATURE_VALNUM_CSE |
| 2495 | |
| 2496 | /***************************************************************************** |
| 2497 | * |
| 2498 | * The following determines whether the given expression is a worthy CSE |
| 2499 | * candidate. |
| 2500 | */ |
| 2501 | bool Compiler::optIsCSEcandidate(GenTree* tree) |
| 2502 | { |
| 2503 | /* No good if the expression contains side effects or if it was marked as DONT CSE */ |
| 2504 | |
| 2505 | if (tree->gtFlags & (GTF_ASG | GTF_DONT_CSE)) |
| 2506 | { |
| 2507 | return false; |
| 2508 | } |
| 2509 | |
| 2510 | /* The only reason a TYP_STRUCT tree might occur is as an argument to |
| 2511 | GT_ADDR. It will never be actually materialized. So ignore them. |
| 2512 | Also TYP_VOIDs */ |
| 2513 | |
| 2514 | var_types type = tree->TypeGet(); |
| 2515 | genTreeOps oper = tree->OperGet(); |
| 2516 | |
| 2517 | // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming |
| 2518 | // to use regular assignments, or handling copyObj. |
| 2519 | if (varTypeIsStruct(type) || type == TYP_VOID) |
| 2520 | { |
| 2521 | return false; |
| 2522 | } |
| 2523 | |
| 2524 | #ifdef _TARGET_X86_ |
| 2525 | if (type == TYP_FLOAT) |
| 2526 | { |
| 2527 | // TODO-X86-CQ: Revisit this |
| 2528 | // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles |
| 2529 | return false; |
| 2530 | } |
| 2531 | #else |
| 2532 | if (oper == GT_CNS_DBL) |
| 2533 | { |
| 2534 | // TODO-CQ: Revisit this |
| 2535 | // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles |
| 2536 | return false; |
| 2537 | } |
| 2538 | #endif |
| 2539 | |
| 2540 | unsigned cost; |
| 2541 | if (compCodeOpt() == SMALL_CODE) |
| 2542 | { |
| 2543 | cost = tree->gtCostSz; |
| 2544 | } |
| 2545 | else |
| 2546 | { |
| 2547 | cost = tree->gtCostEx; |
| 2548 | } |
| 2549 | |
| 2550 | /* Don't bother if the potential savings are very low */ |
| 2551 | if (cost < MIN_CSE_COST) |
| 2552 | { |
| 2553 | return false; |
| 2554 | } |
| 2555 | |
| 2556 | #if !CSE_CONSTS |
| 2557 | /* Don't bother with constants */ |
| 2558 | if (tree->OperKind() & GTK_CONST) |
| 2559 | return false; |
| 2560 | #endif |
| 2561 | |
| 2562 | /* Check for some special cases */ |
| 2563 | |
| 2564 | switch (oper) |
| 2565 | { |
| 2566 | case GT_CALL: |
| 2567 | |
| 2568 | GenTreeCall* call; |
| 2569 | call = tree->AsCall(); |
| 2570 | |
| 2571 | // Don't mark calls to allocation helpers as CSE candidates. |
| 2572 | // Marking them as CSE candidates usually blocks CSEs rather than enables them. |
| 2573 | // A typical case is: |
| 2574 | // [1] GT_IND(x) = GT_CALL ALLOC_HELPER |
| 2575 | // ... |
| 2576 | // [2] y = GT_IND(x) |
| 2577 | // ... |
| 2578 | // [3] z = GT_IND(x) |
| 2579 | // If we mark CALL ALLOC_HELPER as a CSE candidate, we later discover |
| 2580 | // that it can't be a CSE def because GT_INDs in [2] and [3] can cause |
| 2581 | // more exceptions (NullRef) so we abandon this CSE. |
| 2582 | // If we don't mark CALL ALLOC_HELPER as a CSE candidate, we are able |
| 2583 | // to use GT_IND(x) in [2] as a CSE def. |
| 2584 | if ((call->gtCallType == CT_HELPER) && |
| 2585 | s_helperCallProperties.IsAllocator(eeGetHelperNum(call->gtCallMethHnd))) |
| 2586 | { |
| 2587 | return false; |
| 2588 | } |
| 2589 | |
| 2590 | // If we have a simple helper call with no other persistent side-effects |
| 2591 | // then we allow this tree to be a CSE candidate |
| 2592 | // |
| 2593 | if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS | GTF_IS_IN_CSE) == false) |
| 2594 | { |
| 2595 | return true; |
| 2596 | } |
| 2597 | else |
| 2598 | { |
| 2599 | // Calls generally cannot be CSE-ed |
| 2600 | return false; |
| 2601 | } |
| 2602 | |
| 2603 | case GT_IND: |
| 2604 | // TODO-CQ: Review this... |
| 2605 | /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM). |
| 2606 | Doing the first allows cse to also kick in for code like |
| 2607 | "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing |
| 2608 | the second would not allow it */ |
| 2609 | |
| 2610 | return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM); |
| 2611 | |
| 2612 | case GT_CNS_INT: |
| 2613 | case GT_CNS_LNG: |
| 2614 | case GT_CNS_DBL: |
| 2615 | case GT_CNS_STR: |
| 2616 | return true; // We reach here only when CSE_CONSTS is enabled |
| 2617 | |
| 2618 | case GT_ARR_ELEM: |
| 2619 | case GT_ARR_LENGTH: |
| 2620 | case GT_CLS_VAR: |
| 2621 | case GT_LCL_FLD: |
| 2622 | return true; |
| 2623 | |
| 2624 | case GT_LCL_VAR: |
| 2625 | return false; // Can't CSE a volatile LCL_VAR |
| 2626 | |
| 2627 | case GT_NEG: |
| 2628 | case GT_NOT: |
| 2629 | case GT_BSWAP: |
| 2630 | case GT_BSWAP16: |
| 2631 | case GT_CAST: |
| 2632 | return true; // CSE these Unary Operators |
| 2633 | |
| 2634 | case GT_SUB: |
| 2635 | case GT_DIV: |
| 2636 | case GT_MOD: |
| 2637 | case GT_UDIV: |
| 2638 | case GT_UMOD: |
| 2639 | case GT_OR: |
| 2640 | case GT_AND: |
| 2641 | case GT_XOR: |
| 2642 | case GT_RSH: |
| 2643 | case GT_RSZ: |
| 2644 | case GT_ROL: |
| 2645 | case GT_ROR: |
| 2646 | return true; // CSE these Binary Operators |
| 2647 | |
| 2648 | case GT_ADD: // Check for ADDRMODE flag on these Binary Operators |
| 2649 | case GT_MUL: |
| 2650 | case GT_LSH: |
| 2651 | if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0) |
| 2652 | { |
| 2653 | return false; |
| 2654 | } |
| 2655 | |
| 2656 | case GT_EQ: |
| 2657 | case GT_NE: |
| 2658 | case GT_LT: |
| 2659 | case GT_LE: |
| 2660 | case GT_GE: |
| 2661 | case GT_GT: |
| 2662 | return true; // Also CSE these Comparison Operators |
| 2663 | |
| 2664 | case GT_INTRINSIC: |
| 2665 | return true; // Intrinsics |
| 2666 | |
| 2667 | case GT_COMMA: |
| 2668 | return true; // Allow GT_COMMA nodes to be CSE-ed. |
| 2669 | |
| 2670 | case GT_COLON: |
| 2671 | case GT_QMARK: |
| 2672 | case GT_NOP: |
| 2673 | case GT_RETURN: |
| 2674 | return false; // Currently the only special nodes that we hit |
| 2675 | // that we know that we don't want to CSE |
| 2676 | |
| 2677 | default: |
| 2678 | break; // Any new nodes that we might add later... |
| 2679 | } |
| 2680 | |
| 2681 | return false; |
| 2682 | } |
| 2683 | |
| 2684 | #ifdef DEBUG |
| 2685 | // |
| 2686 | // A Debug only method that allows you to control whether the CSE logic is enabled for this method. |
| 2687 | // |
| 2688 | // If this method returns false then the CSE phase should be performed. |
| 2689 | // If the method returns true then the CSE phase should be skipped. |
| 2690 | // |
| 2691 | bool Compiler::optConfigDisableCSE() |
| 2692 | { |
| 2693 | // Next check if COMPlus_JitNoCSE is set and applies to this method |
| 2694 | // |
| 2695 | unsigned jitNoCSE = JitConfig.JitNoCSE(); |
| 2696 | |
| 2697 | if (jitNoCSE > 0) |
| 2698 | { |
| 2699 | unsigned methodCount = Compiler::jitTotalMethodCompiled; |
| 2700 | if ((jitNoCSE & 0xF000000) == 0xF000000) |
| 2701 | { |
| 2702 | unsigned methodCountMask = methodCount & 0xFFF; |
| 2703 | unsigned bitsZero = (jitNoCSE >> 12) & 0xFFF; |
| 2704 | unsigned bitsOne = (jitNoCSE >> 0) & 0xFFF; |
| 2705 | |
| 2706 | if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero)) |
| 2707 | { |
| 2708 | if (verbose) |
| 2709 | { |
| 2710 | printf(" Disabled by JitNoCSE methodCountMask\n" ); |
| 2711 | } |
| 2712 | |
| 2713 | return true; // The CSE phase for this method is disabled |
| 2714 | } |
| 2715 | } |
| 2716 | else if (jitNoCSE <= (methodCount + 1)) |
| 2717 | { |
| 2718 | if (verbose) |
| 2719 | { |
| 2720 | printf(" Disabled by JitNoCSE > methodCount\n" ); |
| 2721 | } |
| 2722 | |
| 2723 | return true; // The CSE phase for this method is disabled |
| 2724 | } |
| 2725 | } |
| 2726 | |
| 2727 | return false; |
| 2728 | } |
| 2729 | |
| 2730 | // |
| 2731 | // A Debug only method that allows you to control whether the CSE logic is enabled for |
| 2732 | // a particular CSE in a method |
| 2733 | // |
| 2734 | // If this method returns false then the CSE should be performed. |
| 2735 | // If the method returns true then the CSE should be skipped. |
| 2736 | // |
| 2737 | bool Compiler::optConfigDisableCSE2() |
| 2738 | { |
| 2739 | static unsigned totalCSEcount = 0; |
| 2740 | |
| 2741 | unsigned jitNoCSE2 = JitConfig.JitNoCSE2(); |
| 2742 | |
| 2743 | totalCSEcount++; |
| 2744 | |
| 2745 | if (jitNoCSE2 > 0) |
| 2746 | { |
| 2747 | if ((jitNoCSE2 & 0xF000000) == 0xF000000) |
| 2748 | { |
| 2749 | unsigned totalCSEMask = totalCSEcount & 0xFFF; |
| 2750 | unsigned bitsZero = (jitNoCSE2 >> 12) & 0xFFF; |
| 2751 | unsigned bitsOne = (jitNoCSE2 >> 0) & 0xFFF; |
| 2752 | |
| 2753 | if (((totalCSEMask & bitsOne) == bitsOne) && ((~totalCSEMask & bitsZero) == bitsZero)) |
| 2754 | { |
| 2755 | if (verbose) |
| 2756 | { |
| 2757 | printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n" ); |
| 2758 | } |
| 2759 | return true; |
| 2760 | } |
| 2761 | } |
| 2762 | else if ((jitNoCSE2 & 0xF000000) == 0xE000000) |
| 2763 | { |
| 2764 | unsigned totalCSEMask = totalCSEcount & 0xFFF; |
| 2765 | unsigned disableMask = jitNoCSE2 & 0xFFF; |
| 2766 | |
| 2767 | disableMask >>= (totalCSEMask % 12); |
| 2768 | |
| 2769 | if (disableMask & 1) |
| 2770 | { |
| 2771 | if (verbose) |
| 2772 | { |
| 2773 | printf(" Disabled by jitNoCSE2 rotating disable mask\n" ); |
| 2774 | } |
| 2775 | return true; |
| 2776 | } |
| 2777 | } |
| 2778 | else if (jitNoCSE2 <= totalCSEcount) |
| 2779 | { |
| 2780 | if (verbose) |
| 2781 | { |
| 2782 | printf(" Disabled by jitNoCSE2 > totalCSEcount\n" ); |
| 2783 | } |
| 2784 | return true; |
| 2785 | } |
| 2786 | } |
| 2787 | return false; |
| 2788 | } |
| 2789 | #endif |
| 2790 | |
| 2791 | void Compiler::optOptimizeCSEs() |
| 2792 | { |
| 2793 | #ifdef DEBUG |
| 2794 | if (verbose) |
| 2795 | { |
| 2796 | printf("\n*************** In optOptimizeCSEs()\n" ); |
| 2797 | printf("Blocks/Trees at start of optOptimizeCSE phase\n" ); |
| 2798 | fgDispBasicBlocks(true); |
| 2799 | } |
| 2800 | #endif // DEBUG |
| 2801 | |
| 2802 | optCSECandidateCount = 0; |
| 2803 | optCSEstart = lvaCount; |
| 2804 | |
| 2805 | #if FEATURE_VALNUM_CSE |
| 2806 | INDEBUG(optEnsureClearCSEInfo()); |
| 2807 | optOptimizeValnumCSEs(); |
| 2808 | EndPhase(PHASE_OPTIMIZE_VALNUM_CSES); |
| 2809 | #endif // FEATURE_VALNUM_CSE |
| 2810 | } |
| 2811 | |
| 2812 | /***************************************************************************** |
| 2813 | * |
| 2814 | * Cleanup after CSE to allow us to run more than once. |
| 2815 | */ |
| 2816 | |
| 2817 | void Compiler::optCleanupCSEs() |
| 2818 | { |
| 2819 | // We must clear the BBF_VISITED and BBF_MARKED flags |
| 2820 | // |
| 2821 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 2822 | { |
| 2823 | // And clear all the "visited" bits on the block |
| 2824 | // |
| 2825 | block->bbFlags &= ~(BBF_VISITED | BBF_MARKED); |
| 2826 | |
| 2827 | /* Walk the statement trees in this basic block */ |
| 2828 | |
| 2829 | GenTree* stmt; |
| 2830 | |
| 2831 | // Initialize 'stmt' to the first non-Phi statement |
| 2832 | stmt = block->FirstNonPhiDef(); |
| 2833 | |
| 2834 | for (; stmt; stmt = stmt->gtNext) |
| 2835 | { |
| 2836 | noway_assert(stmt->gtOper == GT_STMT); |
| 2837 | |
| 2838 | /* We must clear the gtCSEnum field */ |
| 2839 | for (GenTree* tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev) |
| 2840 | { |
| 2841 | tree->gtCSEnum = NO_CSE; |
| 2842 | } |
| 2843 | } |
| 2844 | } |
| 2845 | } |
| 2846 | |
| 2847 | #ifdef DEBUG |
| 2848 | |
| 2849 | /***************************************************************************** |
| 2850 | * |
| 2851 | * Ensure that all the CSE information in the IR is initialized the way we expect it, |
| 2852 | * before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed. |
| 2853 | */ |
| 2854 | |
| 2855 | void Compiler::optEnsureClearCSEInfo() |
| 2856 | { |
| 2857 | for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) |
| 2858 | { |
| 2859 | assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0); |
| 2860 | |
| 2861 | /* Walk the statement trees in this basic block */ |
| 2862 | |
| 2863 | GenTree* stmt; |
| 2864 | |
| 2865 | // Initialize 'stmt' to the first non-Phi statement |
| 2866 | stmt = block->FirstNonPhiDef(); |
| 2867 | |
| 2868 | for (; stmt; stmt = stmt->gtNext) |
| 2869 | { |
| 2870 | assert(stmt->gtOper == GT_STMT); |
| 2871 | |
| 2872 | for (GenTree* tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev) |
| 2873 | { |
| 2874 | assert(tree->gtCSEnum == NO_CSE); |
| 2875 | } |
| 2876 | } |
| 2877 | } |
| 2878 | } |
| 2879 | |
| 2880 | #endif // DEBUG |
| 2881 | |
| 2882 | /*****************************************************************************/ |
| 2883 | #endif // FEATURE_ANYCSE |
| 2884 | /*****************************************************************************/ |
| 2885 | |