| 1 | /* | 
|---|
| 2 | * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. | 
|---|
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
|---|
| 4 | * | 
|---|
| 5 | * This code is free software; you can redistribute it and/or modify it | 
|---|
| 6 | * under the terms of the GNU General Public License version 2 only, as | 
|---|
| 7 | * published by the Free Software Foundation. | 
|---|
| 8 | * | 
|---|
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT | 
|---|
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|---|
| 11 | * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|---|
| 12 | * version 2 for more details (a copy is included in the LICENSE file that | 
|---|
| 13 | * accompanied this code). | 
|---|
| 14 | * | 
|---|
| 15 | * You should have received a copy of the GNU General Public License version | 
|---|
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, | 
|---|
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
|---|
| 18 | * | 
|---|
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | 
|---|
| 20 | * or visit www.oracle.com if you need additional information or have any | 
|---|
| 21 | * questions. | 
|---|
| 22 | * | 
|---|
| 23 | */ | 
|---|
| 24 |  | 
|---|
| 25 | #include "precompiled.hpp" | 
|---|
| 26 | #include "libadt/vectset.hpp" | 
|---|
| 27 | #include "memory/allocation.inline.hpp" | 
|---|
| 28 | #include "memory/resourceArea.hpp" | 
|---|
| 29 | #include "opto/block.hpp" | 
|---|
| 30 | #include "opto/c2compiler.hpp" | 
|---|
| 31 | #include "opto/callnode.hpp" | 
|---|
| 32 | #include "opto/cfgnode.hpp" | 
|---|
| 33 | #include "opto/machnode.hpp" | 
|---|
| 34 | #include "opto/opcodes.hpp" | 
|---|
| 35 | #include "opto/phaseX.hpp" | 
|---|
| 36 | #include "opto/rootnode.hpp" | 
|---|
| 37 | #include "opto/runtime.hpp" | 
|---|
| 38 | #include "opto/chaitin.hpp" | 
|---|
| 39 | #include "runtime/deoptimization.hpp" | 
|---|
| 40 |  | 
|---|
| 41 | // Portions of code courtesy of Clifford Click | 
|---|
| 42 |  | 
|---|
| 43 | // Optimization - Graph Style | 
|---|
| 44 |  | 
|---|
| 45 | // To avoid float value underflow | 
|---|
| 46 | #define MIN_BLOCK_FREQUENCY 1.e-35f | 
|---|
| 47 |  | 
|---|
| 48 | //----------------------------schedule_node_into_block------------------------- | 
|---|
| 49 | // Insert node n into block b. Look for projections of n and make sure they | 
|---|
| 50 | // are in b also. | 
|---|
| 51 | void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { | 
|---|
| 52 | // Set basic block of n, Add n to b, | 
|---|
| 53 | map_node_to_block(n, b); | 
|---|
| 54 | b->add_inst(n); | 
|---|
| 55 |  | 
|---|
| 56 | // After Matching, nearly any old Node may have projections trailing it. | 
|---|
| 57 | // These are usually machine-dependent flags.  In any case, they might | 
|---|
| 58 | // float to another block below this one.  Move them up. | 
|---|
| 59 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | 
|---|
| 60 | Node*  use  = n->fast_out(i); | 
|---|
| 61 | if (use->is_Proj()) { | 
|---|
| 62 | Block* buse = get_block_for_node(use); | 
|---|
| 63 | if (buse != b) {              // In wrong block? | 
|---|
| 64 | if (buse != NULL) { | 
|---|
| 65 | buse->find_remove(use);   // Remove from wrong block | 
|---|
| 66 | } | 
|---|
| 67 | map_node_to_block(use, b); | 
|---|
| 68 | b->add_inst(use); | 
|---|
| 69 | } | 
|---|
| 70 | } | 
|---|
| 71 | } | 
|---|
| 72 | } | 
|---|
| 73 |  | 
|---|
| 74 | //----------------------------replace_block_proj_ctrl------------------------- | 
|---|
| 75 | // Nodes that have is_block_proj() nodes as their control need to use | 
|---|
| 76 | // the appropriate Region for their actual block as their control since | 
|---|
| 77 | // the projection will be in a predecessor block. | 
|---|
| 78 | void PhaseCFG::replace_block_proj_ctrl( Node *n ) { | 
|---|
| 79 | const Node *in0 = n->in(0); | 
|---|
| 80 | assert(in0 != NULL, "Only control-dependent"); | 
|---|
| 81 | const Node *p = in0->is_block_proj(); | 
|---|
| 82 | if (p != NULL && p != n) {    // Control from a block projection? | 
|---|
| 83 | assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); | 
|---|
| 84 | // Find trailing Region | 
|---|
| 85 | Block *pb = get_block_for_node(in0); // Block-projection already has basic block | 
|---|
| 86 | uint j = 0; | 
|---|
| 87 | if (pb->_num_succs != 1) {  // More then 1 successor? | 
|---|
| 88 | // Search for successor | 
|---|
| 89 | uint max = pb->number_of_nodes(); | 
|---|
| 90 | assert( max > 1, ""); | 
|---|
| 91 | uint start = max - pb->_num_succs; | 
|---|
| 92 | // Find which output path belongs to projection | 
|---|
| 93 | for (j = start; j < max; j++) { | 
|---|
| 94 | if( pb->get_node(j) == in0 ) | 
|---|
| 95 | break; | 
|---|
| 96 | } | 
|---|
| 97 | assert( j < max, "must find"); | 
|---|
| 98 | // Change control to match head of successor basic block | 
|---|
| 99 | j -= start; | 
|---|
| 100 | } | 
|---|
| 101 | n->set_req(0, pb->_succs[j]->head()); | 
|---|
| 102 | } | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | bool PhaseCFG::is_dominator(Node* dom_node, Node* node) { | 
|---|
| 106 | if (dom_node == node) { | 
|---|
| 107 | return true; | 
|---|
| 108 | } | 
|---|
| 109 | Block* d = get_block_for_node(dom_node); | 
|---|
| 110 | Block* n = get_block_for_node(node); | 
|---|
| 111 | if (d == n) { | 
|---|
| 112 | if (dom_node->is_block_start()) { | 
|---|
| 113 | return true; | 
|---|
| 114 | } | 
|---|
| 115 | if (node->is_block_start()) { | 
|---|
| 116 | return false; | 
|---|
| 117 | } | 
|---|
| 118 | if (dom_node->is_block_proj()) { | 
|---|
| 119 | return false; | 
|---|
| 120 | } | 
|---|
| 121 | if (node->is_block_proj()) { | 
|---|
| 122 | return true; | 
|---|
| 123 | } | 
|---|
| 124 | #ifdef ASSERT | 
|---|
| 125 | node->dump(); | 
|---|
| 126 | dom_node->dump(); | 
|---|
| 127 | #endif | 
|---|
| 128 | fatal( "unhandled"); | 
|---|
| 129 | return false; | 
|---|
| 130 | } | 
|---|
| 131 | return d->dom_lca(n) == d; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | //------------------------------schedule_pinned_nodes-------------------------- | 
|---|
| 135 | // Set the basic block for Nodes pinned into blocks | 
|---|
| 136 | void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { | 
|---|
| 137 | // Allocate node stack of size C->live_nodes()+8 to avoid frequent realloc | 
|---|
| 138 | GrowableArray <Node *> spstack(C->live_nodes() + 8); | 
|---|
| 139 | spstack.push(_root); | 
|---|
| 140 | while (spstack.is_nonempty()) { | 
|---|
| 141 | Node* node = spstack.pop(); | 
|---|
| 142 | if (!visited.test_set(node->_idx)) { // Test node and flag it as visited | 
|---|
| 143 | if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down! | 
|---|
| 144 | assert(node->in(0), "pinned Node must have Control"); | 
|---|
| 145 | // Before setting block replace block_proj control edge | 
|---|
| 146 | replace_block_proj_ctrl(node); | 
|---|
| 147 | Node* input = node->in(0); | 
|---|
| 148 | while (!input->is_block_start()) { | 
|---|
| 149 | input = input->in(0); | 
|---|
| 150 | } | 
|---|
| 151 | Block* block = get_block_for_node(input); // Basic block of controlling input | 
|---|
| 152 | schedule_node_into_block(node, block); | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | // If the node has precedence edges (added when CastPP nodes are | 
|---|
| 156 | // removed in final_graph_reshaping), fix the control of the | 
|---|
| 157 | // node to cover the precedence edges and remove the | 
|---|
| 158 | // dependencies. | 
|---|
| 159 | Node* n = NULL; | 
|---|
| 160 | for (uint i = node->len()-1; i >= node->req(); i--) { | 
|---|
| 161 | Node* m = node->in(i); | 
|---|
| 162 | if (m == NULL) continue; | 
|---|
| 163 | // Skip the precedence edge if the test that guarded a CastPP: | 
|---|
| 164 | // - was optimized out during escape analysis | 
|---|
| 165 | // (OptimizePtrCompare): the CastPP's control isn't an end of | 
|---|
| 166 | // block. | 
|---|
| 167 | // - is moved in the branch of a dominating If: the control of | 
|---|
| 168 | // the CastPP is then a Region. | 
|---|
| 169 | if (m->is_block_proj() || m->is_block_start()) { | 
|---|
| 170 | node->rm_prec(i); | 
|---|
| 171 | if (n == NULL) { | 
|---|
| 172 | n = m; | 
|---|
| 173 | } else { | 
|---|
| 174 | assert(is_dominator(n, m) || is_dominator(m, n), "one must dominate the other"); | 
|---|
| 175 | n = is_dominator(n, m) ? m : n; | 
|---|
| 176 | } | 
|---|
| 177 | } | 
|---|
| 178 | } | 
|---|
| 179 | if (n != NULL) { | 
|---|
| 180 | assert(node->in(0), "control should have been set"); | 
|---|
| 181 | assert(is_dominator(n, node->in(0)) || is_dominator(node->in(0), n), "one must dominate the other"); | 
|---|
| 182 | if (!is_dominator(n, node->in(0))) { | 
|---|
| 183 | node->set_req(0, n); | 
|---|
| 184 | } | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | // process all inputs that are non NULL | 
|---|
| 188 | for (int i = node->req() - 1; i >= 0; --i) { | 
|---|
| 189 | if (node->in(i) != NULL) { | 
|---|
| 190 | spstack.push(node->in(i)); | 
|---|
| 191 | } | 
|---|
| 192 | } | 
|---|
| 193 | } | 
|---|
| 194 | } | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | #ifdef ASSERT | 
|---|
| 198 | // Assert that new input b2 is dominated by all previous inputs. | 
|---|
| 199 | // Check this by by seeing that it is dominated by b1, the deepest | 
|---|
| 200 | // input observed until b2. | 
|---|
| 201 | static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) { | 
|---|
| 202 | if (b1 == NULL)  return; | 
|---|
| 203 | assert(b1->_dom_depth < b2->_dom_depth, "sanity"); | 
|---|
| 204 | Block* tmp = b2; | 
|---|
| 205 | while (tmp != b1 && tmp != NULL) { | 
|---|
| 206 | tmp = tmp->_idom; | 
|---|
| 207 | } | 
|---|
| 208 | if (tmp != b1) { | 
|---|
| 209 | // Detected an unschedulable graph.  Print some nice stuff and die. | 
|---|
| 210 | tty->print_cr( "!!! Unschedulable graph !!!"); | 
|---|
| 211 | for (uint j=0; j<n->len(); j++) { // For all inputs | 
|---|
| 212 | Node* inn = n->in(j); // Get input | 
|---|
| 213 | if (inn == NULL)  continue;  // Ignore NULL, missing inputs | 
|---|
| 214 | Block* inb = cfg->get_block_for_node(inn); | 
|---|
| 215 | tty->print( "B%d idom=B%d depth=%2d ",inb->_pre_order, | 
|---|
| 216 | inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); | 
|---|
| 217 | inn->dump(); | 
|---|
| 218 | } | 
|---|
| 219 | tty->print( "Failing node: "); | 
|---|
| 220 | n->dump(); | 
|---|
| 221 | assert(false, "unscheduable graph"); | 
|---|
| 222 | } | 
|---|
| 223 | } | 
|---|
| 224 | #endif | 
|---|
| 225 |  | 
|---|
| 226 | static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) { | 
|---|
| 227 | // Find the last input dominated by all other inputs. | 
|---|
| 228 | Block* deepb           = NULL;        // Deepest block so far | 
|---|
| 229 | int    deepb_dom_depth = 0; | 
|---|
| 230 | for (uint k = 0; k < n->len(); k++) { // For all inputs | 
|---|
| 231 | Node* inn = n->in(k);               // Get input | 
|---|
| 232 | if (inn == NULL)  continue;         // Ignore NULL, missing inputs | 
|---|
| 233 | Block* inb = cfg->get_block_for_node(inn); | 
|---|
| 234 | assert(inb != NULL, "must already have scheduled this input"); | 
|---|
| 235 | if (deepb_dom_depth < (int) inb->_dom_depth) { | 
|---|
| 236 | // The new inb must be dominated by the previous deepb. | 
|---|
| 237 | // The various inputs must be linearly ordered in the dom | 
|---|
| 238 | // tree, or else there will not be a unique deepest block. | 
|---|
| 239 | DEBUG_ONLY(assert_dom(deepb, inb, n, cfg)); | 
|---|
| 240 | deepb = inb;                      // Save deepest block | 
|---|
| 241 | deepb_dom_depth = deepb->_dom_depth; | 
|---|
| 242 | } | 
|---|
| 243 | } | 
|---|
| 244 | assert(deepb != NULL, "must be at least one input to n"); | 
|---|
| 245 | return deepb; | 
|---|
| 246 | } | 
|---|
| 247 |  | 
|---|
| 248 |  | 
|---|
| 249 | //------------------------------schedule_early--------------------------------- | 
|---|
| 250 | // Find the earliest Block any instruction can be placed in.  Some instructions | 
|---|
| 251 | // are pinned into Blocks.  Unpinned instructions can appear in last block in | 
|---|
| 252 | // which all their inputs occur. | 
|---|
| 253 | bool PhaseCFG::schedule_early(VectorSet &visited, Node_Stack &roots) { | 
|---|
| 254 | // Allocate stack with enough space to avoid frequent realloc | 
|---|
| 255 | Node_Stack nstack(roots.size() + 8); | 
|---|
| 256 | // _root will be processed among C->top() inputs | 
|---|
| 257 | roots.push(C->top(), 0); | 
|---|
| 258 | visited.set(C->top()->_idx); | 
|---|
| 259 |  | 
|---|
| 260 | while (roots.size() != 0) { | 
|---|
| 261 | // Use local variables nstack_top_n & nstack_top_i to cache values | 
|---|
| 262 | // on stack's top. | 
|---|
| 263 | Node* parent_node = roots.node(); | 
|---|
| 264 | uint  input_index = 0; | 
|---|
| 265 | roots.pop(); | 
|---|
| 266 |  | 
|---|
| 267 | while (true) { | 
|---|
| 268 | if (input_index == 0) { | 
|---|
| 269 | // Fixup some control.  Constants without control get attached | 
|---|
| 270 | // to root and nodes that use is_block_proj() nodes should be attached | 
|---|
| 271 | // to the region that starts their block. | 
|---|
| 272 | const Node* control_input = parent_node->in(0); | 
|---|
| 273 | if (control_input != NULL) { | 
|---|
| 274 | replace_block_proj_ctrl(parent_node); | 
|---|
| 275 | } else { | 
|---|
| 276 | // Is a constant with NO inputs? | 
|---|
| 277 | if (parent_node->req() == 1) { | 
|---|
| 278 | parent_node->set_req(0, _root); | 
|---|
| 279 | } | 
|---|
| 280 | } | 
|---|
| 281 | } | 
|---|
| 282 |  | 
|---|
| 283 | // First, visit all inputs and force them to get a block.  If an | 
|---|
| 284 | // input is already in a block we quit following inputs (to avoid | 
|---|
| 285 | // cycles). Instead we put that Node on a worklist to be handled | 
|---|
| 286 | // later (since IT'S inputs may not have a block yet). | 
|---|
| 287 |  | 
|---|
| 288 | // Assume all n's inputs will be processed | 
|---|
| 289 | bool done = true; | 
|---|
| 290 |  | 
|---|
| 291 | while (input_index < parent_node->len()) { | 
|---|
| 292 | Node* in = parent_node->in(input_index++); | 
|---|
| 293 | if (in == NULL) { | 
|---|
| 294 | continue; | 
|---|
| 295 | } | 
|---|
| 296 |  | 
|---|
| 297 | int is_visited = visited.test_set(in->_idx); | 
|---|
| 298 | if (!has_block(in)) { | 
|---|
| 299 | if (is_visited) { | 
|---|
| 300 | assert(false, "graph should be schedulable"); | 
|---|
| 301 | return false; | 
|---|
| 302 | } | 
|---|
| 303 | // Save parent node and next input's index. | 
|---|
| 304 | nstack.push(parent_node, input_index); | 
|---|
| 305 | // Process current input now. | 
|---|
| 306 | parent_node = in; | 
|---|
| 307 | input_index = 0; | 
|---|
| 308 | // Not all n's inputs processed. | 
|---|
| 309 | done = false; | 
|---|
| 310 | break; | 
|---|
| 311 | } else if (!is_visited) { | 
|---|
| 312 | // Visit this guy later, using worklist | 
|---|
| 313 | roots.push(in, 0); | 
|---|
| 314 | } | 
|---|
| 315 | } | 
|---|
| 316 |  | 
|---|
| 317 | if (done) { | 
|---|
| 318 | // All of n's inputs have been processed, complete post-processing. | 
|---|
| 319 |  | 
|---|
| 320 | // Some instructions are pinned into a block.  These include Region, | 
|---|
| 321 | // Phi, Start, Return, and other control-dependent instructions and | 
|---|
| 322 | // any projections which depend on them. | 
|---|
| 323 | if (!parent_node->pinned()) { | 
|---|
| 324 | // Set earliest legal block. | 
|---|
| 325 | Block* earliest_block = find_deepest_input(parent_node, this); | 
|---|
| 326 | map_node_to_block(parent_node, earliest_block); | 
|---|
| 327 | } else { | 
|---|
| 328 | assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); | 
|---|
| 329 | } | 
|---|
| 330 |  | 
|---|
| 331 | if (nstack.is_empty()) { | 
|---|
| 332 | // Finished all nodes on stack. | 
|---|
| 333 | // Process next node on the worklist 'roots'. | 
|---|
| 334 | break; | 
|---|
| 335 | } | 
|---|
| 336 | // Get saved parent node and next input's index. | 
|---|
| 337 | parent_node = nstack.node(); | 
|---|
| 338 | input_index = nstack.index(); | 
|---|
| 339 | nstack.pop(); | 
|---|
| 340 | } | 
|---|
| 341 | } | 
|---|
| 342 | } | 
|---|
| 343 | return true; | 
|---|
| 344 | } | 
|---|
| 345 |  | 
|---|
| 346 | //------------------------------dom_lca---------------------------------------- | 
|---|
| 347 | // Find least common ancestor in dominator tree | 
|---|
| 348 | // LCA is a current notion of LCA, to be raised above 'this'. | 
|---|
| 349 | // As a convenient boundary condition, return 'this' if LCA is NULL. | 
|---|
| 350 | // Find the LCA of those two nodes. | 
|---|
| 351 | Block* Block::dom_lca(Block* LCA) { | 
|---|
| 352 | if (LCA == NULL || LCA == this)  return this; | 
|---|
| 353 |  | 
|---|
| 354 | Block* anc = this; | 
|---|
| 355 | while (anc->_dom_depth > LCA->_dom_depth) | 
|---|
| 356 | anc = anc->_idom;           // Walk up till anc is as high as LCA | 
|---|
| 357 |  | 
|---|
| 358 | while (LCA->_dom_depth > anc->_dom_depth) | 
|---|
| 359 | LCA = LCA->_idom;           // Walk up till LCA is as high as anc | 
|---|
| 360 |  | 
|---|
| 361 | while (LCA != anc) {          // Walk both up till they are the same | 
|---|
| 362 | LCA = LCA->_idom; | 
|---|
| 363 | anc = anc->_idom; | 
|---|
| 364 | } | 
|---|
| 365 |  | 
|---|
| 366 | return LCA; | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | //--------------------------raise_LCA_above_use-------------------------------- | 
|---|
| 370 | // We are placing a definition, and have been given a def->use edge. | 
|---|
| 371 | // The definition must dominate the use, so move the LCA upward in the | 
|---|
| 372 | // dominator tree to dominate the use.  If the use is a phi, adjust | 
|---|
| 373 | // the LCA only with the phi input paths which actually use this def. | 
|---|
| 374 | static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) { | 
|---|
| 375 | Block* buse = cfg->get_block_for_node(use); | 
|---|
| 376 | if (buse == NULL)    return LCA;   // Unused killing Projs have no use block | 
|---|
| 377 | if (!use->is_Phi())  return buse->dom_lca(LCA); | 
|---|
| 378 | uint pmax = use->req();       // Number of Phi inputs | 
|---|
| 379 | // Why does not this loop just break after finding the matching input to | 
|---|
| 380 | // the Phi?  Well...it's like this.  I do not have true def-use/use-def | 
|---|
| 381 | // chains.  Means I cannot distinguish, from the def-use direction, which | 
|---|
| 382 | // of many use-defs lead from the same use to the same def.  That is, this | 
|---|
| 383 | // Phi might have several uses of the same def.  Each use appears in a | 
|---|
| 384 | // different predecessor block.  But when I enter here, I cannot distinguish | 
|---|
| 385 | // which use-def edge I should find the predecessor block for.  So I find | 
|---|
| 386 | // them all.  Means I do a little extra work if a Phi uses the same value | 
|---|
| 387 | // more than once. | 
|---|
| 388 | for (uint j=1; j<pmax; j++) { // For all inputs | 
|---|
| 389 | if (use->in(j) == def) {    // Found matching input? | 
|---|
| 390 | Block* pred = cfg->get_block_for_node(buse->pred(j)); | 
|---|
| 391 | LCA = pred->dom_lca(LCA); | 
|---|
| 392 | } | 
|---|
| 393 | } | 
|---|
| 394 | return LCA; | 
|---|
| 395 | } | 
|---|
| 396 |  | 
|---|
| 397 | //----------------------------raise_LCA_above_marks---------------------------- | 
|---|
| 398 | // Return a new LCA that dominates LCA and any of its marked predecessors. | 
|---|
| 399 | // Search all my parents up to 'early' (exclusive), looking for predecessors | 
|---|
| 400 | // which are marked with the given index.  Return the LCA (in the dom tree) | 
|---|
| 401 | // of all marked blocks.  If there are none marked, return the original | 
|---|
| 402 | // LCA. | 
|---|
| 403 | static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) { | 
|---|
| 404 | Block_List worklist; | 
|---|
| 405 | worklist.push(LCA); | 
|---|
| 406 | while (worklist.size() > 0) { | 
|---|
| 407 | Block* mid = worklist.pop(); | 
|---|
| 408 | if (mid == early)  continue;  // stop searching here | 
|---|
| 409 |  | 
|---|
| 410 | // Test and set the visited bit. | 
|---|
| 411 | if (mid->raise_LCA_visited() == mark)  continue;  // already visited | 
|---|
| 412 |  | 
|---|
| 413 | // Don't process the current LCA, otherwise the search may terminate early | 
|---|
| 414 | if (mid != LCA && mid->raise_LCA_mark() == mark) { | 
|---|
| 415 | // Raise the LCA. | 
|---|
| 416 | LCA = mid->dom_lca(LCA); | 
|---|
| 417 | if (LCA == early)  break;   // stop searching everywhere | 
|---|
| 418 | assert(early->dominates(LCA), "early is high enough"); | 
|---|
| 419 | // Resume searching at that point, skipping intermediate levels. | 
|---|
| 420 | worklist.push(LCA); | 
|---|
| 421 | if (LCA == mid) | 
|---|
| 422 | continue; // Don't mark as visited to avoid early termination. | 
|---|
| 423 | } else { | 
|---|
| 424 | // Keep searching through this block's predecessors. | 
|---|
| 425 | for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { | 
|---|
| 426 | Block* mid_parent = cfg->get_block_for_node(mid->pred(j)); | 
|---|
| 427 | worklist.push(mid_parent); | 
|---|
| 428 | } | 
|---|
| 429 | } | 
|---|
| 430 | mid->set_raise_LCA_visited(mark); | 
|---|
| 431 | } | 
|---|
| 432 | return LCA; | 
|---|
| 433 | } | 
|---|
| 434 |  | 
|---|
| 435 | //--------------------------memory_early_block-------------------------------- | 
|---|
| 436 | // This is a variation of find_deepest_input, the heart of schedule_early. | 
|---|
| 437 | // Find the "early" block for a load, if we considered only memory and | 
|---|
| 438 | // address inputs, that is, if other data inputs were ignored. | 
|---|
| 439 | // | 
|---|
| 440 | // Because a subset of edges are considered, the resulting block will | 
|---|
| 441 | // be earlier (at a shallower dom_depth) than the true schedule_early | 
|---|
| 442 | // point of the node. We compute this earlier block as a more permissive | 
|---|
| 443 | // site for anti-dependency insertion, but only if subsume_loads is enabled. | 
|---|
| 444 | static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) { | 
|---|
| 445 | Node* base; | 
|---|
| 446 | Node* index; | 
|---|
| 447 | Node* store = load->in(MemNode::Memory); | 
|---|
| 448 | load->as_Mach()->memory_inputs(base, index); | 
|---|
| 449 |  | 
|---|
| 450 | assert(base != NodeSentinel && index != NodeSentinel, | 
|---|
| 451 | "unexpected base/index inputs"); | 
|---|
| 452 |  | 
|---|
| 453 | Node* mem_inputs[4]; | 
|---|
| 454 | int mem_inputs_length = 0; | 
|---|
| 455 | if (base != NULL)  mem_inputs[mem_inputs_length++] = base; | 
|---|
| 456 | if (index != NULL) mem_inputs[mem_inputs_length++] = index; | 
|---|
| 457 | if (store != NULL) mem_inputs[mem_inputs_length++] = store; | 
|---|
| 458 |  | 
|---|
| 459 | // In the comparision below, add one to account for the control input, | 
|---|
| 460 | // which may be null, but always takes up a spot in the in array. | 
|---|
| 461 | if (mem_inputs_length + 1 < (int) load->req()) { | 
|---|
| 462 | // This "load" has more inputs than just the memory, base and index inputs. | 
|---|
| 463 | // For purposes of checking anti-dependences, we need to start | 
|---|
| 464 | // from the early block of only the address portion of the instruction, | 
|---|
| 465 | // and ignore other blocks that may have factored into the wider | 
|---|
| 466 | // schedule_early calculation. | 
|---|
| 467 | if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); | 
|---|
| 468 |  | 
|---|
| 469 | Block* deepb           = NULL;        // Deepest block so far | 
|---|
| 470 | int    deepb_dom_depth = 0; | 
|---|
| 471 | for (int i = 0; i < mem_inputs_length; i++) { | 
|---|
| 472 | Block* inb = cfg->get_block_for_node(mem_inputs[i]); | 
|---|
| 473 | if (deepb_dom_depth < (int) inb->_dom_depth) { | 
|---|
| 474 | // The new inb must be dominated by the previous deepb. | 
|---|
| 475 | // The various inputs must be linearly ordered in the dom | 
|---|
| 476 | // tree, or else there will not be a unique deepest block. | 
|---|
| 477 | DEBUG_ONLY(assert_dom(deepb, inb, load, cfg)); | 
|---|
| 478 | deepb = inb;                      // Save deepest block | 
|---|
| 479 | deepb_dom_depth = deepb->_dom_depth; | 
|---|
| 480 | } | 
|---|
| 481 | } | 
|---|
| 482 | early = deepb; | 
|---|
| 483 | } | 
|---|
| 484 |  | 
|---|
| 485 | return early; | 
|---|
| 486 | } | 
|---|
| 487 |  | 
|---|
| 488 | //--------------------------insert_anti_dependences--------------------------- | 
|---|
| 489 | // A load may need to witness memory that nearby stores can overwrite. | 
|---|
| 490 | // For each nearby store, either insert an "anti-dependence" edge | 
|---|
| 491 | // from the load to the store, or else move LCA upward to force the | 
|---|
| 492 | // load to (eventually) be scheduled in a block above the store. | 
|---|
| 493 | // | 
|---|
| 494 | // Do not add edges to stores on distinct control-flow paths; | 
|---|
| 495 | // only add edges to stores which might interfere. | 
|---|
| 496 | // | 
|---|
| 497 | // Return the (updated) LCA.  There will not be any possibly interfering | 
|---|
| 498 | // store between the load's "early block" and the updated LCA. | 
|---|
| 499 | // Any stores in the updated LCA will have new precedence edges | 
|---|
| 500 | // back to the load.  The caller is expected to schedule the load | 
|---|
| 501 | // in the LCA, in which case the precedence edges will make LCM | 
|---|
| 502 | // preserve anti-dependences.  The caller may also hoist the load | 
|---|
| 503 | // above the LCA, if it is not the early block. | 
|---|
| 504 | Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { | 
|---|
| 505 | assert(load->needs_anti_dependence_check(), "must be a load of some sort"); | 
|---|
| 506 | assert(LCA != NULL, ""); | 
|---|
| 507 | DEBUG_ONLY(Block* LCA_orig = LCA); | 
|---|
| 508 |  | 
|---|
| 509 | // Compute the alias index.  Loads and stores with different alias indices | 
|---|
| 510 | // do not need anti-dependence edges. | 
|---|
| 511 | int load_alias_idx = C->get_alias_index(load->adr_type()); | 
|---|
| 512 | #ifdef ASSERT | 
|---|
| 513 | if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 && | 
|---|
| 514 | (PrintOpto || VerifyAliases || | 
|---|
| 515 | (PrintMiscellaneous && (WizardMode || Verbose)))) { | 
|---|
| 516 | // Load nodes should not consume all of memory. | 
|---|
| 517 | // Reporting a bottom type indicates a bug in adlc. | 
|---|
| 518 | // If some particular type of node validly consumes all of memory, | 
|---|
| 519 | // sharpen the preceding "if" to exclude it, so we can catch bugs here. | 
|---|
| 520 | tty->print_cr( "*** Possible Anti-Dependence Bug:  Load consumes all of memory."); | 
|---|
| 521 | load->dump(2); | 
|---|
| 522 | if (VerifyAliases)  assert(load_alias_idx != Compile::AliasIdxBot, ""); | 
|---|
| 523 | } | 
|---|
| 524 | #endif | 
|---|
| 525 | assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), | 
|---|
| 526 | "String compare is only known 'load' that does not conflict with any stores"); | 
|---|
| 527 | assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), | 
|---|
| 528 | "String equals is a 'load' that does not conflict with any stores"); | 
|---|
| 529 | assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), | 
|---|
| 530 | "String indexOf is a 'load' that does not conflict with any stores"); | 
|---|
| 531 | assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOfChar), | 
|---|
| 532 | "String indexOfChar is a 'load' that does not conflict with any stores"); | 
|---|
| 533 | assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), | 
|---|
| 534 | "Arrays equals is a 'load' that does not conflict with any stores"); | 
|---|
| 535 | assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_HasNegatives), | 
|---|
| 536 | "HasNegatives is a 'load' that does not conflict with any stores"); | 
|---|
| 537 |  | 
|---|
| 538 | if (!C->alias_type(load_alias_idx)->is_rewritable()) { | 
|---|
| 539 | // It is impossible to spoil this load by putting stores before it, | 
|---|
| 540 | // because we know that the stores will never update the value | 
|---|
| 541 | // which 'load' must witness. | 
|---|
| 542 | return LCA; | 
|---|
| 543 | } | 
|---|
| 544 |  | 
|---|
| 545 | node_idx_t load_index = load->_idx; | 
|---|
| 546 |  | 
|---|
| 547 | // Note the earliest legal placement of 'load', as determined by | 
|---|
| 548 | // by the unique point in the dom tree where all memory effects | 
|---|
| 549 | // and other inputs are first available.  (Computed by schedule_early.) | 
|---|
| 550 | // For normal loads, 'early' is the shallowest place (dom graph wise) | 
|---|
| 551 | // to look for anti-deps between this load and any store. | 
|---|
| 552 | Block* early = get_block_for_node(load); | 
|---|
| 553 |  | 
|---|
| 554 | // If we are subsuming loads, compute an "early" block that only considers | 
|---|
| 555 | // memory or address inputs. This block may be different than the | 
|---|
| 556 | // schedule_early block in that it could be at an even shallower depth in the | 
|---|
| 557 | // dominator tree, and allow for a broader discovery of anti-dependences. | 
|---|
| 558 | if (C->subsume_loads()) { | 
|---|
| 559 | early = memory_early_block(load, early, this); | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | ResourceArea *area = Thread::current()->resource_area(); | 
|---|
| 563 | Node_List worklist_mem(area);     // prior memory state to store | 
|---|
| 564 | Node_List worklist_store(area);   // possible-def to explore | 
|---|
| 565 | Node_List worklist_visited(area); // visited mergemem nodes | 
|---|
| 566 | Node_List non_early_stores(area); // all relevant stores outside of early | 
|---|
| 567 | bool must_raise_LCA = false; | 
|---|
| 568 |  | 
|---|
| 569 | #ifdef TRACK_PHI_INPUTS | 
|---|
| 570 | // %%% This extra checking fails because MergeMem nodes are not GVNed. | 
|---|
| 571 | // Provide "phi_inputs" to check if every input to a PhiNode is from the | 
|---|
| 572 | // original memory state.  This indicates a PhiNode for which should not | 
|---|
| 573 | // prevent the load from sinking.  For such a block, set_raise_LCA_mark | 
|---|
| 574 | // may be overly conservative. | 
|---|
| 575 | // Mechanism: count inputs seen for each Phi encountered in worklist_store. | 
|---|
| 576 | DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0)); | 
|---|
| 577 | #endif | 
|---|
| 578 |  | 
|---|
| 579 | // 'load' uses some memory state; look for users of the same state. | 
|---|
| 580 | // Recurse through MergeMem nodes to the stores that use them. | 
|---|
| 581 |  | 
|---|
| 582 | // Each of these stores is a possible definition of memory | 
|---|
| 583 | // that 'load' needs to use.  We need to force 'load' | 
|---|
| 584 | // to occur before each such store.  When the store is in | 
|---|
| 585 | // the same block as 'load', we insert an anti-dependence | 
|---|
| 586 | // edge load->store. | 
|---|
| 587 |  | 
|---|
| 588 | // The relevant stores "nearby" the load consist of a tree rooted | 
|---|
| 589 | // at initial_mem, with internal nodes of type MergeMem. | 
|---|
| 590 | // Therefore, the branches visited by the worklist are of this form: | 
|---|
| 591 | //    initial_mem -> (MergeMem ->)* store | 
|---|
| 592 | // The anti-dependence constraints apply only to the fringe of this tree. | 
|---|
| 593 |  | 
|---|
| 594 | Node* initial_mem = load->in(MemNode::Memory); | 
|---|
| 595 | worklist_store.push(initial_mem); | 
|---|
| 596 | worklist_visited.push(initial_mem); | 
|---|
| 597 | worklist_mem.push(NULL); | 
|---|
| 598 | while (worklist_store.size() > 0) { | 
|---|
| 599 | // Examine a nearby store to see if it might interfere with our load. | 
|---|
| 600 | Node* mem   = worklist_mem.pop(); | 
|---|
| 601 | Node* store = worklist_store.pop(); | 
|---|
| 602 | uint op = store->Opcode(); | 
|---|
| 603 |  | 
|---|
| 604 | // MergeMems do not directly have anti-deps. | 
|---|
| 605 | // Treat them as internal nodes in a forward tree of memory states, | 
|---|
| 606 | // the leaves of which are each a 'possible-def'. | 
|---|
| 607 | if (store == initial_mem    // root (exclusive) of tree we are searching | 
|---|
| 608 | || op == Op_MergeMem    // internal node of tree we are searching | 
|---|
| 609 | ) { | 
|---|
| 610 | mem = store;   // It's not a possibly interfering store. | 
|---|
| 611 | if (store == initial_mem) | 
|---|
| 612 | initial_mem = NULL;  // only process initial memory once | 
|---|
| 613 |  | 
|---|
| 614 | for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { | 
|---|
| 615 | store = mem->fast_out(i); | 
|---|
| 616 | if (store->is_MergeMem()) { | 
|---|
| 617 | // Be sure we don't get into combinatorial problems. | 
|---|
| 618 | // (Allow phis to be repeated; they can merge two relevant states.) | 
|---|
| 619 | uint j = worklist_visited.size(); | 
|---|
| 620 | for (; j > 0; j--) { | 
|---|
| 621 | if (worklist_visited.at(j-1) == store)  break; | 
|---|
| 622 | } | 
|---|
| 623 | if (j > 0)  continue; // already on work list; do not repeat | 
|---|
| 624 | worklist_visited.push(store); | 
|---|
| 625 | } | 
|---|
| 626 | worklist_mem.push(mem); | 
|---|
| 627 | worklist_store.push(store); | 
|---|
| 628 | } | 
|---|
| 629 | continue; | 
|---|
| 630 | } | 
|---|
| 631 |  | 
|---|
| 632 | if (op == Op_MachProj || op == Op_Catch)   continue; | 
|---|
| 633 | if (store->needs_anti_dependence_check())  continue;  // not really a store | 
|---|
| 634 |  | 
|---|
| 635 | // Compute the alias index.  Loads and stores with different alias | 
|---|
| 636 | // indices do not need anti-dependence edges.  Wide MemBar's are | 
|---|
| 637 | // anti-dependent on everything (except immutable memories). | 
|---|
| 638 | const TypePtr* adr_type = store->adr_type(); | 
|---|
| 639 | if (!C->can_alias(adr_type, load_alias_idx))  continue; | 
|---|
| 640 |  | 
|---|
| 641 | // Most slow-path runtime calls do NOT modify Java memory, but | 
|---|
| 642 | // they can block and so write Raw memory. | 
|---|
| 643 | if (store->is_Mach()) { | 
|---|
| 644 | MachNode* mstore = store->as_Mach(); | 
|---|
| 645 | if (load_alias_idx != Compile::AliasIdxRaw) { | 
|---|
| 646 | // Check for call into the runtime using the Java calling | 
|---|
| 647 | // convention (and from there into a wrapper); it has no | 
|---|
| 648 | // _method.  Can't do this optimization for Native calls because | 
|---|
| 649 | // they CAN write to Java memory. | 
|---|
| 650 | if (mstore->ideal_Opcode() == Op_CallStaticJava) { | 
|---|
| 651 | assert(mstore->is_MachSafePoint(), ""); | 
|---|
| 652 | MachSafePointNode* ms = (MachSafePointNode*) mstore; | 
|---|
| 653 | assert(ms->is_MachCallJava(), ""); | 
|---|
| 654 | MachCallJavaNode* mcj = (MachCallJavaNode*) ms; | 
|---|
| 655 | if (mcj->_method == NULL) { | 
|---|
| 656 | // These runtime calls do not write to Java visible memory | 
|---|
| 657 | // (other than Raw) and so do not require anti-dependence edges. | 
|---|
| 658 | continue; | 
|---|
| 659 | } | 
|---|
| 660 | } | 
|---|
| 661 | // Same for SafePoints: they read/write Raw but only read otherwise. | 
|---|
| 662 | // This is basically a workaround for SafePoints only defining control | 
|---|
| 663 | // instead of control + memory. | 
|---|
| 664 | if (mstore->ideal_Opcode() == Op_SafePoint) | 
|---|
| 665 | continue; | 
|---|
| 666 | } else { | 
|---|
| 667 | // Some raw memory, such as the load of "top" at an allocation, | 
|---|
| 668 | // can be control dependent on the previous safepoint. See | 
|---|
| 669 | // comments in GraphKit::allocate_heap() about control input. | 
|---|
| 670 | // Inserting an anti-dep between such a safepoint and a use | 
|---|
| 671 | // creates a cycle, and will cause a subsequent failure in | 
|---|
| 672 | // local scheduling.  (BugId 4919904) | 
|---|
| 673 | // (%%% How can a control input be a safepoint and not a projection??) | 
|---|
| 674 | if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore) | 
|---|
| 675 | continue; | 
|---|
| 676 | } | 
|---|
| 677 | } | 
|---|
| 678 |  | 
|---|
| 679 | // Identify a block that the current load must be above, | 
|---|
| 680 | // or else observe that 'store' is all the way up in the | 
|---|
| 681 | // earliest legal block for 'load'.  In the latter case, | 
|---|
| 682 | // immediately insert an anti-dependence edge. | 
|---|
| 683 | Block* store_block = get_block_for_node(store); | 
|---|
| 684 | assert(store_block != NULL, "unused killing projections skipped above"); | 
|---|
| 685 |  | 
|---|
| 686 | if (store->is_Phi()) { | 
|---|
| 687 | // Loop-phis need to raise load before input. (Other phis are treated | 
|---|
| 688 | // as store below.) | 
|---|
| 689 | // | 
|---|
| 690 | // 'load' uses memory which is one (or more) of the Phi's inputs. | 
|---|
| 691 | // It must be scheduled not before the Phi, but rather before | 
|---|
| 692 | // each of the relevant Phi inputs. | 
|---|
| 693 | // | 
|---|
| 694 | // Instead of finding the LCA of all inputs to a Phi that match 'mem', | 
|---|
| 695 | // we mark each corresponding predecessor block and do a combined | 
|---|
| 696 | // hoisting operation later (raise_LCA_above_marks). | 
|---|
| 697 | // | 
|---|
| 698 | // Do not assert(store_block != early, "Phi merging memory after access") | 
|---|
| 699 | // PhiNode may be at start of block 'early' with backedge to 'early' | 
|---|
| 700 | DEBUG_ONLY(bool found_match = false); | 
|---|
| 701 | for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { | 
|---|
| 702 | if (store->in(j) == mem) {   // Found matching input? | 
|---|
| 703 | DEBUG_ONLY(found_match = true); | 
|---|
| 704 | Block* pred_block = get_block_for_node(store_block->pred(j)); | 
|---|
| 705 | if (pred_block != early) { | 
|---|
| 706 | // If any predecessor of the Phi matches the load's "early block", | 
|---|
| 707 | // we do not need a precedence edge between the Phi and 'load' | 
|---|
| 708 | // since the load will be forced into a block preceding the Phi. | 
|---|
| 709 | pred_block->set_raise_LCA_mark(load_index); | 
|---|
| 710 | assert(!LCA_orig->dominates(pred_block) || | 
|---|
| 711 | early->dominates(pred_block), "early is high enough"); | 
|---|
| 712 | must_raise_LCA = true; | 
|---|
| 713 | } else { | 
|---|
| 714 | // anti-dependent upon PHI pinned below 'early', no edge needed | 
|---|
| 715 | LCA = early;             // but can not schedule below 'early' | 
|---|
| 716 | } | 
|---|
| 717 | } | 
|---|
| 718 | } | 
|---|
| 719 | assert(found_match, "no worklist bug"); | 
|---|
| 720 | #ifdef TRACK_PHI_INPUTS | 
|---|
| 721 | #ifdef ASSERT | 
|---|
| 722 | // This assert asks about correct handling of PhiNodes, which may not | 
|---|
| 723 | // have all input edges directly from 'mem'. See BugId 4621264 | 
|---|
| 724 | int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1; | 
|---|
| 725 | // Increment by exactly one even if there are multiple copies of 'mem' | 
|---|
| 726 | // coming into the phi, because we will run this block several times | 
|---|
| 727 | // if there are several copies of 'mem'.  (That's how DU iterators work.) | 
|---|
| 728 | phi_inputs.at_put(store->_idx, num_mem_inputs); | 
|---|
| 729 | assert(PhiNode::Input + num_mem_inputs < store->req(), | 
|---|
| 730 | "Expect at least one phi input will not be from original memory state"); | 
|---|
| 731 | #endif //ASSERT | 
|---|
| 732 | #endif //TRACK_PHI_INPUTS | 
|---|
| 733 | } else if (store_block != early) { | 
|---|
| 734 | // 'store' is between the current LCA and earliest possible block. | 
|---|
| 735 | // Label its block, and decide later on how to raise the LCA | 
|---|
| 736 | // to include the effect on LCA of this store. | 
|---|
| 737 | // If this store's block gets chosen as the raised LCA, we | 
|---|
| 738 | // will find him on the non_early_stores list and stick him | 
|---|
| 739 | // with a precedence edge. | 
|---|
| 740 | // (But, don't bother if LCA is already raised all the way.) | 
|---|
| 741 | if (LCA != early) { | 
|---|
| 742 | store_block->set_raise_LCA_mark(load_index); | 
|---|
| 743 | must_raise_LCA = true; | 
|---|
| 744 | non_early_stores.push(store); | 
|---|
| 745 | } | 
|---|
| 746 | } else { | 
|---|
| 747 | // Found a possibly-interfering store in the load's 'early' block. | 
|---|
| 748 | // This means 'load' cannot sink at all in the dominator tree. | 
|---|
| 749 | // Add an anti-dep edge, and squeeze 'load' into the highest block. | 
|---|
| 750 | assert(store != load->in(0), "dependence cycle found"); | 
|---|
| 751 | if (verify) { | 
|---|
| 752 | assert(store->find_edge(load) != -1, "missing precedence edge"); | 
|---|
| 753 | } else { | 
|---|
| 754 | store->add_prec(load); | 
|---|
| 755 | } | 
|---|
| 756 | LCA = early; | 
|---|
| 757 | // This turns off the process of gathering non_early_stores. | 
|---|
| 758 | } | 
|---|
| 759 | } | 
|---|
| 760 | // (Worklist is now empty; all nearby stores have been visited.) | 
|---|
| 761 |  | 
|---|
| 762 | // Finished if 'load' must be scheduled in its 'early' block. | 
|---|
| 763 | // If we found any stores there, they have already been given | 
|---|
| 764 | // precedence edges. | 
|---|
| 765 | if (LCA == early)  return LCA; | 
|---|
| 766 |  | 
|---|
| 767 | // We get here only if there are no possibly-interfering stores | 
|---|
| 768 | // in the load's 'early' block.  Move LCA up above all predecessors | 
|---|
| 769 | // which contain stores we have noted. | 
|---|
| 770 | // | 
|---|
| 771 | // The raised LCA block can be a home to such interfering stores, | 
|---|
| 772 | // but its predecessors must not contain any such stores. | 
|---|
| 773 | // | 
|---|
| 774 | // The raised LCA will be a lower bound for placing the load, | 
|---|
| 775 | // preventing the load from sinking past any block containing | 
|---|
| 776 | // a store that may invalidate the memory state required by 'load'. | 
|---|
| 777 | if (must_raise_LCA) | 
|---|
| 778 | LCA = raise_LCA_above_marks(LCA, load->_idx, early, this); | 
|---|
| 779 | if (LCA == early)  return LCA; | 
|---|
| 780 |  | 
|---|
| 781 | // Insert anti-dependence edges from 'load' to each store | 
|---|
| 782 | // in the non-early LCA block. | 
|---|
| 783 | // Mine the non_early_stores list for such stores. | 
|---|
| 784 | if (LCA->raise_LCA_mark() == load_index) { | 
|---|
| 785 | while (non_early_stores.size() > 0) { | 
|---|
| 786 | Node* store = non_early_stores.pop(); | 
|---|
| 787 | Block* store_block = get_block_for_node(store); | 
|---|
| 788 | if (store_block == LCA) { | 
|---|
| 789 | // add anti_dependence from store to load in its own block | 
|---|
| 790 | assert(store != load->in(0), "dependence cycle found"); | 
|---|
| 791 | if (verify) { | 
|---|
| 792 | assert(store->find_edge(load) != -1, "missing precedence edge"); | 
|---|
| 793 | } else { | 
|---|
| 794 | store->add_prec(load); | 
|---|
| 795 | } | 
|---|
| 796 | } else { | 
|---|
| 797 | assert(store_block->raise_LCA_mark() == load_index, "block was marked"); | 
|---|
| 798 | // Any other stores we found must be either inside the new LCA | 
|---|
| 799 | // or else outside the original LCA.  In the latter case, they | 
|---|
| 800 | // did not interfere with any use of 'load'. | 
|---|
| 801 | assert(LCA->dominates(store_block) | 
|---|
| 802 | || !LCA_orig->dominates(store_block), "no stray stores"); | 
|---|
| 803 | } | 
|---|
| 804 | } | 
|---|
| 805 | } | 
|---|
| 806 |  | 
|---|
| 807 | // Return the highest block containing stores; any stores | 
|---|
| 808 | // within that block have been given anti-dependence edges. | 
|---|
| 809 | return LCA; | 
|---|
| 810 | } | 
|---|
| 811 |  | 
|---|
| 812 | // This class is used to iterate backwards over the nodes in the graph. | 
|---|
| 813 |  | 
|---|
| 814 | class Node_Backward_Iterator { | 
|---|
| 815 |  | 
|---|
| 816 | private: | 
|---|
| 817 | Node_Backward_Iterator(); | 
|---|
| 818 |  | 
|---|
| 819 | public: | 
|---|
| 820 | // Constructor for the iterator | 
|---|
| 821 | Node_Backward_Iterator(Node *root, VectorSet &visited, Node_Stack &stack, PhaseCFG &cfg); | 
|---|
| 822 |  | 
|---|
| 823 | // Postincrement operator to iterate over the nodes | 
|---|
| 824 | Node *next(); | 
|---|
| 825 |  | 
|---|
| 826 | private: | 
|---|
| 827 | VectorSet   &_visited; | 
|---|
| 828 | Node_Stack  &_stack; | 
|---|
| 829 | PhaseCFG &_cfg; | 
|---|
| 830 | }; | 
|---|
| 831 |  | 
|---|
| 832 | // Constructor for the Node_Backward_Iterator | 
|---|
| 833 | Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_Stack &stack, PhaseCFG &cfg) | 
|---|
| 834 | : _visited(visited), _stack(stack), _cfg(cfg) { | 
|---|
| 835 | // The stack should contain exactly the root | 
|---|
| 836 | stack.clear(); | 
|---|
| 837 | stack.push(root, root->outcnt()); | 
|---|
| 838 |  | 
|---|
| 839 | // Clear the visited bits | 
|---|
| 840 | visited.Clear(); | 
|---|
| 841 | } | 
|---|
| 842 |  | 
|---|
| 843 | // Iterator for the Node_Backward_Iterator | 
|---|
| 844 | Node *Node_Backward_Iterator::next() { | 
|---|
| 845 |  | 
|---|
| 846 | // If the _stack is empty, then just return NULL: finished. | 
|---|
| 847 | if ( !_stack.size() ) | 
|---|
| 848 | return NULL; | 
|---|
| 849 |  | 
|---|
| 850 | // I visit unvisited not-anti-dependence users first, then anti-dependent | 
|---|
| 851 | // children next. I iterate backwards to support removal of nodes. | 
|---|
| 852 | // The stack holds states consisting of 3 values: | 
|---|
| 853 | // current Def node, flag which indicates 1st/2nd pass, index of current out edge | 
|---|
| 854 | Node *self = (Node*)(((uintptr_t)_stack.node()) & ~1); | 
|---|
| 855 | bool iterate_anti_dep = (((uintptr_t)_stack.node()) & 1); | 
|---|
| 856 | uint idx = MIN2(_stack.index(), self->outcnt()); // Support removal of nodes. | 
|---|
| 857 | _stack.pop(); | 
|---|
| 858 |  | 
|---|
| 859 | // I cycle here when I am entering a deeper level of recursion. | 
|---|
| 860 | // The key variable 'self' was set prior to jumping here. | 
|---|
| 861 | while( 1 ) { | 
|---|
| 862 |  | 
|---|
| 863 | _visited.set(self->_idx); | 
|---|
| 864 |  | 
|---|
| 865 | // Now schedule all uses as late as possible. | 
|---|
| 866 | const Node* src = self->is_Proj() ? self->in(0) : self; | 
|---|
| 867 | uint src_rpo = _cfg.get_block_for_node(src)->_rpo; | 
|---|
| 868 |  | 
|---|
| 869 | // Schedule all nodes in a post-order visit | 
|---|
| 870 | Node *unvisited = NULL;  // Unvisited anti-dependent Node, if any | 
|---|
| 871 |  | 
|---|
| 872 | // Scan for unvisited nodes | 
|---|
| 873 | while (idx > 0) { | 
|---|
| 874 | // For all uses, schedule late | 
|---|
| 875 | Node* n = self->raw_out(--idx); // Use | 
|---|
| 876 |  | 
|---|
| 877 | // Skip already visited children | 
|---|
| 878 | if ( _visited.test(n->_idx) ) | 
|---|
| 879 | continue; | 
|---|
| 880 |  | 
|---|
| 881 | // do not traverse backward control edges | 
|---|
| 882 | Node *use = n->is_Proj() ? n->in(0) : n; | 
|---|
| 883 | uint use_rpo = _cfg.get_block_for_node(use)->_rpo; | 
|---|
| 884 |  | 
|---|
| 885 | if ( use_rpo < src_rpo ) | 
|---|
| 886 | continue; | 
|---|
| 887 |  | 
|---|
| 888 | // Phi nodes always precede uses in a basic block | 
|---|
| 889 | if ( use_rpo == src_rpo && use->is_Phi() ) | 
|---|
| 890 | continue; | 
|---|
| 891 |  | 
|---|
| 892 | unvisited = n;      // Found unvisited | 
|---|
| 893 |  | 
|---|
| 894 | // Check for possible-anti-dependent | 
|---|
| 895 | // 1st pass: No such nodes, 2nd pass: Only such nodes. | 
|---|
| 896 | if (n->needs_anti_dependence_check() == iterate_anti_dep) { | 
|---|
| 897 | unvisited = n;      // Found unvisited | 
|---|
| 898 | break; | 
|---|
| 899 | } | 
|---|
| 900 | } | 
|---|
| 901 |  | 
|---|
| 902 | // Did I find an unvisited not-anti-dependent Node? | 
|---|
| 903 | if (!unvisited) { | 
|---|
| 904 | if (!iterate_anti_dep) { | 
|---|
| 905 | // 2nd pass: Iterate over nodes which needs_anti_dependence_check. | 
|---|
| 906 | iterate_anti_dep = true; | 
|---|
| 907 | idx = self->outcnt(); | 
|---|
| 908 | continue; | 
|---|
| 909 | } | 
|---|
| 910 | break;                  // All done with children; post-visit 'self' | 
|---|
| 911 | } | 
|---|
| 912 |  | 
|---|
| 913 | // Visit the unvisited Node.  Contains the obvious push to | 
|---|
| 914 | // indicate I'm entering a deeper level of recursion.  I push the | 
|---|
| 915 | // old state onto the _stack and set a new state and loop (recurse). | 
|---|
| 916 | _stack.push((Node*)((uintptr_t)self | (uintptr_t)iterate_anti_dep), idx); | 
|---|
| 917 | self = unvisited; | 
|---|
| 918 | iterate_anti_dep = false; | 
|---|
| 919 | idx = self->outcnt(); | 
|---|
| 920 | } // End recursion loop | 
|---|
| 921 |  | 
|---|
| 922 | return self; | 
|---|
| 923 | } | 
|---|
| 924 |  | 
|---|
| 925 | //------------------------------ComputeLatenciesBackwards---------------------- | 
|---|
| 926 | // Compute the latency of all the instructions. | 
|---|
| 927 | void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_Stack &stack) { | 
|---|
| 928 | #ifndef PRODUCT | 
|---|
| 929 | if (trace_opto_pipelining()) | 
|---|
| 930 | tty->print( "\n#---- ComputeLatenciesBackwards ----\n"); | 
|---|
| 931 | #endif | 
|---|
| 932 |  | 
|---|
| 933 | Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); | 
|---|
| 934 | Node *n; | 
|---|
| 935 |  | 
|---|
| 936 | // Walk over all the nodes from last to first | 
|---|
| 937 | while ((n = iter.next())) { | 
|---|
| 938 | // Set the latency for the definitions of this instruction | 
|---|
| 939 | partial_latency_of_defs(n); | 
|---|
| 940 | } | 
|---|
| 941 | } // end ComputeLatenciesBackwards | 
|---|
| 942 |  | 
|---|
| 943 | //------------------------------partial_latency_of_defs------------------------ | 
|---|
| 944 | // Compute the latency impact of this node on all defs.  This computes | 
|---|
| 945 | // a number that increases as we approach the beginning of the routine. | 
|---|
| 946 | void PhaseCFG::partial_latency_of_defs(Node *n) { | 
|---|
| 947 | // Set the latency for this instruction | 
|---|
| 948 | #ifndef PRODUCT | 
|---|
| 949 | if (trace_opto_pipelining()) { | 
|---|
| 950 | tty->print( "# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); | 
|---|
| 951 | dump(); | 
|---|
| 952 | } | 
|---|
| 953 | #endif | 
|---|
| 954 |  | 
|---|
| 955 | if (n->is_Proj()) { | 
|---|
| 956 | n = n->in(0); | 
|---|
| 957 | } | 
|---|
| 958 |  | 
|---|
| 959 | if (n->is_Root()) { | 
|---|
| 960 | return; | 
|---|
| 961 | } | 
|---|
| 962 |  | 
|---|
| 963 | uint nlen = n->len(); | 
|---|
| 964 | uint use_latency = get_latency_for_node(n); | 
|---|
| 965 | uint use_pre_order = get_block_for_node(n)->_pre_order; | 
|---|
| 966 |  | 
|---|
| 967 | for (uint j = 0; j < nlen; j++) { | 
|---|
| 968 | Node *def = n->in(j); | 
|---|
| 969 |  | 
|---|
| 970 | if (!def || def == n) { | 
|---|
| 971 | continue; | 
|---|
| 972 | } | 
|---|
| 973 |  | 
|---|
| 974 | // Walk backwards thru projections | 
|---|
| 975 | if (def->is_Proj()) { | 
|---|
| 976 | def = def->in(0); | 
|---|
| 977 | } | 
|---|
| 978 |  | 
|---|
| 979 | #ifndef PRODUCT | 
|---|
| 980 | if (trace_opto_pipelining()) { | 
|---|
| 981 | tty->print( "#    in(%2d): ", j); | 
|---|
| 982 | def->dump(); | 
|---|
| 983 | } | 
|---|
| 984 | #endif | 
|---|
| 985 |  | 
|---|
| 986 | // If the defining block is not known, assume it is ok | 
|---|
| 987 | Block *def_block = get_block_for_node(def); | 
|---|
| 988 | uint def_pre_order = def_block ? def_block->_pre_order : 0; | 
|---|
| 989 |  | 
|---|
| 990 | if ((use_pre_order <  def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { | 
|---|
| 991 | continue; | 
|---|
| 992 | } | 
|---|
| 993 |  | 
|---|
| 994 | uint delta_latency = n->latency(j); | 
|---|
| 995 | uint current_latency = delta_latency + use_latency; | 
|---|
| 996 |  | 
|---|
| 997 | if (get_latency_for_node(def) < current_latency) { | 
|---|
| 998 | set_latency_for_node(def, current_latency); | 
|---|
| 999 | } | 
|---|
| 1000 |  | 
|---|
| 1001 | #ifndef PRODUCT | 
|---|
| 1002 | if (trace_opto_pipelining()) { | 
|---|
| 1003 | tty->print_cr( "#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); | 
|---|
| 1004 | } | 
|---|
| 1005 | #endif | 
|---|
| 1006 | } | 
|---|
| 1007 | } | 
|---|
| 1008 |  | 
|---|
| 1009 | //------------------------------latency_from_use------------------------------- | 
|---|
| 1010 | // Compute the latency of a specific use | 
|---|
| 1011 | int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { | 
|---|
| 1012 | // If self-reference, return no latency | 
|---|
| 1013 | if (use == n || use->is_Root()) { | 
|---|
| 1014 | return 0; | 
|---|
| 1015 | } | 
|---|
| 1016 |  | 
|---|
| 1017 | uint def_pre_order = get_block_for_node(def)->_pre_order; | 
|---|
| 1018 | uint latency = 0; | 
|---|
| 1019 |  | 
|---|
| 1020 | // If the use is not a projection, then it is simple... | 
|---|
| 1021 | if (!use->is_Proj()) { | 
|---|
| 1022 | #ifndef PRODUCT | 
|---|
| 1023 | if (trace_opto_pipelining()) { | 
|---|
| 1024 | tty->print( "#    out(): "); | 
|---|
| 1025 | use->dump(); | 
|---|
| 1026 | } | 
|---|
| 1027 | #endif | 
|---|
| 1028 |  | 
|---|
| 1029 | uint use_pre_order = get_block_for_node(use)->_pre_order; | 
|---|
| 1030 |  | 
|---|
| 1031 | if (use_pre_order < def_pre_order) | 
|---|
| 1032 | return 0; | 
|---|
| 1033 |  | 
|---|
| 1034 | if (use_pre_order == def_pre_order && use->is_Phi()) | 
|---|
| 1035 | return 0; | 
|---|
| 1036 |  | 
|---|
| 1037 | uint nlen = use->len(); | 
|---|
| 1038 | uint nl = get_latency_for_node(use); | 
|---|
| 1039 |  | 
|---|
| 1040 | for ( uint j=0; j<nlen; j++ ) { | 
|---|
| 1041 | if (use->in(j) == n) { | 
|---|
| 1042 | // Change this if we want local latencies | 
|---|
| 1043 | uint ul = use->latency(j); | 
|---|
| 1044 | uint  l = ul + nl; | 
|---|
| 1045 | if (latency < l) latency = l; | 
|---|
| 1046 | #ifndef PRODUCT | 
|---|
| 1047 | if (trace_opto_pipelining()) { | 
|---|
| 1048 | tty->print_cr( "#      %d + edge_latency(%d) == %d -> %d, latency = %d", | 
|---|
| 1049 | nl, j, ul, l, latency); | 
|---|
| 1050 | } | 
|---|
| 1051 | #endif | 
|---|
| 1052 | } | 
|---|
| 1053 | } | 
|---|
| 1054 | } else { | 
|---|
| 1055 | // This is a projection, just grab the latency of the use(s) | 
|---|
| 1056 | for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) { | 
|---|
| 1057 | uint l = latency_from_use(use, def, use->fast_out(j)); | 
|---|
| 1058 | if (latency < l) latency = l; | 
|---|
| 1059 | } | 
|---|
| 1060 | } | 
|---|
| 1061 |  | 
|---|
| 1062 | return latency; | 
|---|
| 1063 | } | 
|---|
| 1064 |  | 
|---|
| 1065 | //------------------------------latency_from_uses------------------------------ | 
|---|
| 1066 | // Compute the latency of this instruction relative to all of it's uses. | 
|---|
| 1067 | // This computes a number that increases as we approach the beginning of the | 
|---|
| 1068 | // routine. | 
|---|
| 1069 | void PhaseCFG::latency_from_uses(Node *n) { | 
|---|
| 1070 | // Set the latency for this instruction | 
|---|
| 1071 | #ifndef PRODUCT | 
|---|
| 1072 | if (trace_opto_pipelining()) { | 
|---|
| 1073 | tty->print( "# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); | 
|---|
| 1074 | dump(); | 
|---|
| 1075 | } | 
|---|
| 1076 | #endif | 
|---|
| 1077 | uint latency=0; | 
|---|
| 1078 | const Node *def = n->is_Proj() ? n->in(0): n; | 
|---|
| 1079 |  | 
|---|
| 1080 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { | 
|---|
| 1081 | uint l = latency_from_use(n, def, n->fast_out(i)); | 
|---|
| 1082 |  | 
|---|
| 1083 | if (latency < l) latency = l; | 
|---|
| 1084 | } | 
|---|
| 1085 |  | 
|---|
| 1086 | set_latency_for_node(n, latency); | 
|---|
| 1087 | } | 
|---|
| 1088 |  | 
|---|
| 1089 | //------------------------------hoist_to_cheaper_block------------------------- | 
|---|
| 1090 | // Pick a block for node self, between early and LCA, that is a cheaper | 
|---|
| 1091 | // alternative to LCA. | 
|---|
| 1092 | Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { | 
|---|
| 1093 | const double delta = 1+PROB_UNLIKELY_MAG(4); | 
|---|
| 1094 | Block* least       = LCA; | 
|---|
| 1095 | double least_freq  = least->_freq; | 
|---|
| 1096 | uint target        = get_latency_for_node(self); | 
|---|
| 1097 | uint start_latency = get_latency_for_node(LCA->head()); | 
|---|
| 1098 | uint end_latency   = get_latency_for_node(LCA->get_node(LCA->end_idx())); | 
|---|
| 1099 | bool in_latency    = (target <= start_latency); | 
|---|
| 1100 | const Block* root_block = get_block_for_node(_root); | 
|---|
| 1101 |  | 
|---|
| 1102 | // Turn off latency scheduling if scheduling is just plain off | 
|---|
| 1103 | if (!C->do_scheduling()) | 
|---|
| 1104 | in_latency = true; | 
|---|
| 1105 |  | 
|---|
| 1106 | // Do not hoist (to cover latency) instructions which target a | 
|---|
| 1107 | // single register.  Hoisting stretches the live range of the | 
|---|
| 1108 | // single register and may force spilling. | 
|---|
| 1109 | MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | 
|---|
| 1110 | if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty()) | 
|---|
| 1111 | in_latency = true; | 
|---|
| 1112 |  | 
|---|
| 1113 | #ifndef PRODUCT | 
|---|
| 1114 | if (trace_opto_pipelining()) { | 
|---|
| 1115 | tty->print( "# Find cheaper block for latency %d: ", get_latency_for_node(self)); | 
|---|
| 1116 | self->dump(); | 
|---|
| 1117 | tty->print_cr( "#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | 
|---|
| 1118 | LCA->_pre_order, | 
|---|
| 1119 | LCA->head()->_idx, | 
|---|
| 1120 | start_latency, | 
|---|
| 1121 | LCA->get_node(LCA->end_idx())->_idx, | 
|---|
| 1122 | end_latency, | 
|---|
| 1123 | least_freq); | 
|---|
| 1124 | } | 
|---|
| 1125 | #endif | 
|---|
| 1126 |  | 
|---|
| 1127 | int cand_cnt = 0;  // number of candidates tried | 
|---|
| 1128 |  | 
|---|
| 1129 | // Walk up the dominator tree from LCA (Lowest common ancestor) to | 
|---|
| 1130 | // the earliest legal location.  Capture the least execution frequency. | 
|---|
| 1131 | while (LCA != early) { | 
|---|
| 1132 | LCA = LCA->_idom;         // Follow up the dominator tree | 
|---|
| 1133 |  | 
|---|
| 1134 | if (LCA == NULL) { | 
|---|
| 1135 | // Bailout without retry | 
|---|
| 1136 | assert(false, "graph should be schedulable"); | 
|---|
| 1137 | C->record_method_not_compilable( "late schedule failed: LCA == NULL"); | 
|---|
| 1138 | return least; | 
|---|
| 1139 | } | 
|---|
| 1140 |  | 
|---|
| 1141 | // Don't hoist machine instructions to the root basic block | 
|---|
| 1142 | if (mach && LCA == root_block) | 
|---|
| 1143 | break; | 
|---|
| 1144 |  | 
|---|
| 1145 | uint start_lat = get_latency_for_node(LCA->head()); | 
|---|
| 1146 | uint end_idx   = LCA->end_idx(); | 
|---|
| 1147 | uint end_lat   = get_latency_for_node(LCA->get_node(end_idx)); | 
|---|
| 1148 | double LCA_freq = LCA->_freq; | 
|---|
| 1149 | #ifndef PRODUCT | 
|---|
| 1150 | if (trace_opto_pipelining()) { | 
|---|
| 1151 | tty->print_cr( "#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", | 
|---|
| 1152 | LCA->_pre_order, LCA->head()->_idx, start_lat, end_idx, end_lat, LCA_freq); | 
|---|
| 1153 | } | 
|---|
| 1154 | #endif | 
|---|
| 1155 | cand_cnt++; | 
|---|
| 1156 | if (LCA_freq < least_freq              || // Better Frequency | 
|---|
| 1157 | (StressGCM && Compile::randomized_select(cand_cnt)) || // Should be randomly accepted in stress mode | 
|---|
| 1158 | (!StressGCM                    &&    // Otherwise, choose with latency | 
|---|
| 1159 | !in_latency                   &&    // No block containing latency | 
|---|
| 1160 | LCA_freq < least_freq * delta &&    // No worse frequency | 
|---|
| 1161 | target >= end_lat             &&    // within latency range | 
|---|
| 1162 | !self->is_iteratively_computed() )  // But don't hoist IV increments | 
|---|
| 1163 | // because they may end up above other uses of their phi forcing | 
|---|
| 1164 | // their result register to be different from their input. | 
|---|
| 1165 | ) { | 
|---|
| 1166 | least = LCA;            // Found cheaper block | 
|---|
| 1167 | least_freq = LCA_freq; | 
|---|
| 1168 | start_latency = start_lat; | 
|---|
| 1169 | end_latency = end_lat; | 
|---|
| 1170 | if (target <= start_lat) | 
|---|
| 1171 | in_latency = true; | 
|---|
| 1172 | } | 
|---|
| 1173 | } | 
|---|
| 1174 |  | 
|---|
| 1175 | #ifndef PRODUCT | 
|---|
| 1176 | if (trace_opto_pipelining()) { | 
|---|
| 1177 | tty->print_cr( "#  Choose block B%d with start latency=%d and freq=%g", | 
|---|
| 1178 | least->_pre_order, start_latency, least_freq); | 
|---|
| 1179 | } | 
|---|
| 1180 | #endif | 
|---|
| 1181 |  | 
|---|
| 1182 | // See if the latency needs to be updated | 
|---|
| 1183 | if (target < end_latency) { | 
|---|
| 1184 | #ifndef PRODUCT | 
|---|
| 1185 | if (trace_opto_pipelining()) { | 
|---|
| 1186 | tty->print_cr( "#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); | 
|---|
| 1187 | } | 
|---|
| 1188 | #endif | 
|---|
| 1189 | set_latency_for_node(self, end_latency); | 
|---|
| 1190 | partial_latency_of_defs(self); | 
|---|
| 1191 | } | 
|---|
| 1192 |  | 
|---|
| 1193 | return least; | 
|---|
| 1194 | } | 
|---|
| 1195 |  | 
|---|
| 1196 |  | 
|---|
| 1197 | //------------------------------schedule_late----------------------------------- | 
|---|
| 1198 | // Now schedule all codes as LATE as possible.  This is the LCA in the | 
|---|
| 1199 | // dominator tree of all USES of a value.  Pick the block with the least | 
|---|
| 1200 | // loop nesting depth that is lowest in the dominator tree. | 
|---|
| 1201 | extern const char must_clone[]; | 
|---|
| 1202 | void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) { | 
|---|
| 1203 | #ifndef PRODUCT | 
|---|
| 1204 | if (trace_opto_pipelining()) | 
|---|
| 1205 | tty->print( "\n#---- schedule_late ----\n"); | 
|---|
| 1206 | #endif | 
|---|
| 1207 |  | 
|---|
| 1208 | Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); | 
|---|
| 1209 | Node *self; | 
|---|
| 1210 |  | 
|---|
| 1211 | // Walk over all the nodes from last to first | 
|---|
| 1212 | while ((self = iter.next())) { | 
|---|
| 1213 | Block* early = get_block_for_node(self); // Earliest legal placement | 
|---|
| 1214 |  | 
|---|
| 1215 | if (self->is_top()) { | 
|---|
| 1216 | // Top node goes in bb #2 with other constants. | 
|---|
| 1217 | // It must be special-cased, because it has no out edges. | 
|---|
| 1218 | early->add_inst(self); | 
|---|
| 1219 | continue; | 
|---|
| 1220 | } | 
|---|
| 1221 |  | 
|---|
| 1222 | // No uses, just terminate | 
|---|
| 1223 | if (self->outcnt() == 0) { | 
|---|
| 1224 | assert(self->is_MachProj(), "sanity"); | 
|---|
| 1225 | continue;                   // Must be a dead machine projection | 
|---|
| 1226 | } | 
|---|
| 1227 |  | 
|---|
| 1228 | // If node is pinned in the block, then no scheduling can be done. | 
|---|
| 1229 | if( self->pinned() )          // Pinned in block? | 
|---|
| 1230 | continue; | 
|---|
| 1231 |  | 
|---|
| 1232 | MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL; | 
|---|
| 1233 | if (mach) { | 
|---|
| 1234 | switch (mach->ideal_Opcode()) { | 
|---|
| 1235 | case Op_CreateEx: | 
|---|
| 1236 | // Don't move exception creation | 
|---|
| 1237 | early->add_inst(self); | 
|---|
| 1238 | continue; | 
|---|
| 1239 | break; | 
|---|
| 1240 | case Op_CheckCastPP: { | 
|---|
| 1241 | // Don't move CheckCastPP nodes away from their input, if the input | 
|---|
| 1242 | // is a rawptr (5071820). | 
|---|
| 1243 | Node *def = self->in(1); | 
|---|
| 1244 | if (def != NULL && def->bottom_type()->base() == Type::RawPtr) { | 
|---|
| 1245 | early->add_inst(self); | 
|---|
| 1246 | #ifdef ASSERT | 
|---|
| 1247 | _raw_oops.push(def); | 
|---|
| 1248 | #endif | 
|---|
| 1249 | continue; | 
|---|
| 1250 | } | 
|---|
| 1251 | break; | 
|---|
| 1252 | } | 
|---|
| 1253 | default: | 
|---|
| 1254 | break; | 
|---|
| 1255 | } | 
|---|
| 1256 | } | 
|---|
| 1257 |  | 
|---|
| 1258 | // Gather LCA of all uses | 
|---|
| 1259 | Block *LCA = NULL; | 
|---|
| 1260 | { | 
|---|
| 1261 | for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { | 
|---|
| 1262 | // For all uses, find LCA | 
|---|
| 1263 | Node* use = self->fast_out(i); | 
|---|
| 1264 | LCA = raise_LCA_above_use(LCA, use, self, this); | 
|---|
| 1265 | } | 
|---|
| 1266 | guarantee(LCA != NULL, "There must be a LCA"); | 
|---|
| 1267 | }  // (Hide defs of imax, i from rest of block.) | 
|---|
| 1268 |  | 
|---|
| 1269 | // Place temps in the block of their use.  This isn't a | 
|---|
| 1270 | // requirement for correctness but it reduces useless | 
|---|
| 1271 | // interference between temps and other nodes. | 
|---|
| 1272 | if (mach != NULL && mach->is_MachTemp()) { | 
|---|
| 1273 | map_node_to_block(self, LCA); | 
|---|
| 1274 | LCA->add_inst(self); | 
|---|
| 1275 | continue; | 
|---|
| 1276 | } | 
|---|
| 1277 |  | 
|---|
| 1278 | // Check if 'self' could be anti-dependent on memory | 
|---|
| 1279 | if (self->needs_anti_dependence_check()) { | 
|---|
| 1280 | // Hoist LCA above possible-defs and insert anti-dependences to | 
|---|
| 1281 | // defs in new LCA block. | 
|---|
| 1282 | LCA = insert_anti_dependences(LCA, self); | 
|---|
| 1283 | } | 
|---|
| 1284 |  | 
|---|
| 1285 | if (early->_dom_depth > LCA->_dom_depth) { | 
|---|
| 1286 | // Somehow the LCA has moved above the earliest legal point. | 
|---|
| 1287 | // (One way this can happen is via memory_early_block.) | 
|---|
| 1288 | if (C->subsume_loads() == true && !C->failing()) { | 
|---|
| 1289 | // Retry with subsume_loads == false | 
|---|
| 1290 | // If this is the first failure, the sentinel string will "stick" | 
|---|
| 1291 | // to the Compile object, and the C2Compiler will see it and retry. | 
|---|
| 1292 | C->record_failure(C2Compiler::retry_no_subsuming_loads()); | 
|---|
| 1293 | } else { | 
|---|
| 1294 | // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) | 
|---|
| 1295 | assert(false, "graph should be schedulable"); | 
|---|
| 1296 | C->record_method_not_compilable( "late schedule failed: incorrect graph"); | 
|---|
| 1297 | } | 
|---|
| 1298 | return; | 
|---|
| 1299 | } | 
|---|
| 1300 |  | 
|---|
| 1301 | // If there is no opportunity to hoist, then we're done. | 
|---|
| 1302 | // In stress mode, try to hoist even the single operations. | 
|---|
| 1303 | bool try_to_hoist = StressGCM || (LCA != early); | 
|---|
| 1304 |  | 
|---|
| 1305 | // Must clone guys stay next to use; no hoisting allowed. | 
|---|
| 1306 | // Also cannot hoist guys that alter memory or are otherwise not | 
|---|
| 1307 | // allocatable (hoisting can make a value live longer, leading to | 
|---|
| 1308 | // anti and output dependency problems which are normally resolved | 
|---|
| 1309 | // by the register allocator giving everyone a different register). | 
|---|
| 1310 | if (mach != NULL && must_clone[mach->ideal_Opcode()]) | 
|---|
| 1311 | try_to_hoist = false; | 
|---|
| 1312 |  | 
|---|
| 1313 | Block* late = NULL; | 
|---|
| 1314 | if (try_to_hoist) { | 
|---|
| 1315 | // Now find the block with the least execution frequency. | 
|---|
| 1316 | // Start at the latest schedule and work up to the earliest schedule | 
|---|
| 1317 | // in the dominator tree.  Thus the Node will dominate all its uses. | 
|---|
| 1318 | late = hoist_to_cheaper_block(LCA, early, self); | 
|---|
| 1319 | } else { | 
|---|
| 1320 | // Just use the LCA of the uses. | 
|---|
| 1321 | late = LCA; | 
|---|
| 1322 | } | 
|---|
| 1323 |  | 
|---|
| 1324 | // Put the node into target block | 
|---|
| 1325 | schedule_node_into_block(self, late); | 
|---|
| 1326 |  | 
|---|
| 1327 | #ifdef ASSERT | 
|---|
| 1328 | if (self->needs_anti_dependence_check()) { | 
|---|
| 1329 | // since precedence edges are only inserted when we're sure they | 
|---|
| 1330 | // are needed make sure that after placement in a block we don't | 
|---|
| 1331 | // need any new precedence edges. | 
|---|
| 1332 | verify_anti_dependences(late, self); | 
|---|
| 1333 | } | 
|---|
| 1334 | #endif | 
|---|
| 1335 | } // Loop until all nodes have been visited | 
|---|
| 1336 |  | 
|---|
| 1337 | } // end ScheduleLate | 
|---|
| 1338 |  | 
|---|
| 1339 | //------------------------------GlobalCodeMotion------------------------------- | 
|---|
| 1340 | void PhaseCFG::global_code_motion() { | 
|---|
| 1341 | ResourceMark rm; | 
|---|
| 1342 |  | 
|---|
| 1343 | #ifndef PRODUCT | 
|---|
| 1344 | if (trace_opto_pipelining()) { | 
|---|
| 1345 | tty->print( "\n---- Start GlobalCodeMotion ----\n"); | 
|---|
| 1346 | } | 
|---|
| 1347 | #endif | 
|---|
| 1348 |  | 
|---|
| 1349 | // Initialize the node to block mapping for things on the proj_list | 
|---|
| 1350 | for (uint i = 0; i < _matcher.number_of_projections(); i++) { | 
|---|
| 1351 | unmap_node_from_block(_matcher.get_projection(i)); | 
|---|
| 1352 | } | 
|---|
| 1353 |  | 
|---|
| 1354 | // Set the basic block for Nodes pinned into blocks | 
|---|
| 1355 | Arena* arena = Thread::current()->resource_area(); | 
|---|
| 1356 | VectorSet visited(arena); | 
|---|
| 1357 | schedule_pinned_nodes(visited); | 
|---|
| 1358 |  | 
|---|
| 1359 | // Find the earliest Block any instruction can be placed in.  Some | 
|---|
| 1360 | // instructions are pinned into Blocks.  Unpinned instructions can | 
|---|
| 1361 | // appear in last block in which all their inputs occur. | 
|---|
| 1362 | visited.Clear(); | 
|---|
| 1363 | Node_Stack stack(arena, (C->live_nodes() >> 2) + 16); // pre-grow | 
|---|
| 1364 | if (!schedule_early(visited, stack)) { | 
|---|
| 1365 | // Bailout without retry | 
|---|
| 1366 | C->record_method_not_compilable( "early schedule failed"); | 
|---|
| 1367 | return; | 
|---|
| 1368 | } | 
|---|
| 1369 |  | 
|---|
| 1370 | // Build Def-Use edges. | 
|---|
| 1371 | // Compute the latency information (via backwards walk) for all the | 
|---|
| 1372 | // instructions in the graph | 
|---|
| 1373 | _node_latency = new GrowableArray<uint>(); // resource_area allocation | 
|---|
| 1374 |  | 
|---|
| 1375 | if (C->do_scheduling()) { | 
|---|
| 1376 | compute_latencies_backwards(visited, stack); | 
|---|
| 1377 | } | 
|---|
| 1378 |  | 
|---|
| 1379 | // Now schedule all codes as LATE as possible.  This is the LCA in the | 
|---|
| 1380 | // dominator tree of all USES of a value.  Pick the block with the least | 
|---|
| 1381 | // loop nesting depth that is lowest in the dominator tree. | 
|---|
| 1382 | // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) | 
|---|
| 1383 | schedule_late(visited, stack); | 
|---|
| 1384 | if (C->failing()) { | 
|---|
| 1385 | // schedule_late fails only when graph is incorrect. | 
|---|
| 1386 | assert(!VerifyGraphEdges, "verification should have failed"); | 
|---|
| 1387 | return; | 
|---|
| 1388 | } | 
|---|
| 1389 |  | 
|---|
| 1390 | #ifndef PRODUCT | 
|---|
| 1391 | if (trace_opto_pipelining()) { | 
|---|
| 1392 | tty->print( "\n---- Detect implicit null checks ----\n"); | 
|---|
| 1393 | } | 
|---|
| 1394 | #endif | 
|---|
| 1395 |  | 
|---|
| 1396 | // Detect implicit-null-check opportunities.  Basically, find NULL checks | 
|---|
| 1397 | // with suitable memory ops nearby.  Use the memory op to do the NULL check. | 
|---|
| 1398 | // I can generate a memory op if there is not one nearby. | 
|---|
| 1399 | if (C->is_method_compilation()) { | 
|---|
| 1400 | // By reversing the loop direction we get a very minor gain on mpegaudio. | 
|---|
| 1401 | // Feel free to revert to a forward loop for clarity. | 
|---|
| 1402 | // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { | 
|---|
| 1403 | for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { | 
|---|
| 1404 | Node* proj = _matcher._null_check_tests[i]; | 
|---|
| 1405 | Node* val  = _matcher._null_check_tests[i + 1]; | 
|---|
| 1406 | Block* block = get_block_for_node(proj); | 
|---|
| 1407 | implicit_null_check(block, proj, val, C->allowed_deopt_reasons()); | 
|---|
| 1408 | // The implicit_null_check will only perform the transformation | 
|---|
| 1409 | // if the null branch is truly uncommon, *and* it leads to an | 
|---|
| 1410 | // uncommon trap.  Combined with the too_many_traps guards | 
|---|
| 1411 | // above, this prevents SEGV storms reported in 6366351, | 
|---|
| 1412 | // by recompiling offending methods without this optimization. | 
|---|
| 1413 | } | 
|---|
| 1414 | } | 
|---|
| 1415 |  | 
|---|
| 1416 | bool block_size_threshold_ok = false; | 
|---|
| 1417 | intptr_t *recalc_pressure_nodes = NULL; | 
|---|
| 1418 | if (OptoRegScheduling) { | 
|---|
| 1419 | for (uint i = 0; i < number_of_blocks(); i++) { | 
|---|
| 1420 | Block* block = get_block(i); | 
|---|
| 1421 | if (block->number_of_nodes() > 10) { | 
|---|
| 1422 | block_size_threshold_ok = true; | 
|---|
| 1423 | break; | 
|---|
| 1424 | } | 
|---|
| 1425 | } | 
|---|
| 1426 | } | 
|---|
| 1427 |  | 
|---|
| 1428 | // Enabling the scheduler for register pressure plus finding blocks of size to schedule for it | 
|---|
| 1429 | // is key to enabling this feature. | 
|---|
| 1430 | PhaseChaitin regalloc(C->unique(), *this, _matcher, true); | 
|---|
| 1431 | ResourceArea live_arena(mtCompiler);      // Arena for liveness | 
|---|
| 1432 | ResourceMark rm_live(&live_arena); | 
|---|
| 1433 | PhaseLive live(*this, regalloc._lrg_map.names(), &live_arena, true); | 
|---|
| 1434 | PhaseIFG ifg(&live_arena); | 
|---|
| 1435 | if (OptoRegScheduling && block_size_threshold_ok) { | 
|---|
| 1436 | regalloc.mark_ssa(); | 
|---|
| 1437 | Compile::TracePhase tp( "computeLive", &timers[_t_computeLive]); | 
|---|
| 1438 | rm_live.reset_to_mark();           // Reclaim working storage | 
|---|
| 1439 | IndexSet::reset_memory(C, &live_arena); | 
|---|
| 1440 | uint node_size = regalloc._lrg_map.max_lrg_id(); | 
|---|
| 1441 | ifg.init(node_size); // Empty IFG | 
|---|
| 1442 | regalloc.set_ifg(ifg); | 
|---|
| 1443 | regalloc.set_live(live); | 
|---|
| 1444 | regalloc.gather_lrg_masks(false);    // Collect LRG masks | 
|---|
| 1445 | live.compute(node_size); // Compute liveness | 
|---|
| 1446 |  | 
|---|
| 1447 | recalc_pressure_nodes = NEW_RESOURCE_ARRAY(intptr_t, node_size); | 
|---|
| 1448 | for (uint i = 0; i < node_size; i++) { | 
|---|
| 1449 | recalc_pressure_nodes[i] = 0; | 
|---|
| 1450 | } | 
|---|
| 1451 | } | 
|---|
| 1452 | _regalloc = ®alloc; | 
|---|
| 1453 |  | 
|---|
| 1454 | #ifndef PRODUCT | 
|---|
| 1455 | if (trace_opto_pipelining()) { | 
|---|
| 1456 | tty->print( "\n---- Start Local Scheduling ----\n"); | 
|---|
| 1457 | } | 
|---|
| 1458 | #endif | 
|---|
| 1459 |  | 
|---|
| 1460 | // Schedule locally.  Right now a simple topological sort. | 
|---|
| 1461 | // Later, do a real latency aware scheduler. | 
|---|
| 1462 | GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1); | 
|---|
| 1463 | visited.Clear(); | 
|---|
| 1464 | for (uint i = 0; i < number_of_blocks(); i++) { | 
|---|
| 1465 | Block* block = get_block(i); | 
|---|
| 1466 | if (!schedule_local(block, ready_cnt, visited, recalc_pressure_nodes)) { | 
|---|
| 1467 | if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { | 
|---|
| 1468 | C->record_method_not_compilable( "local schedule failed"); | 
|---|
| 1469 | } | 
|---|
| 1470 | _regalloc = NULL; | 
|---|
| 1471 | return; | 
|---|
| 1472 | } | 
|---|
| 1473 | } | 
|---|
| 1474 | _regalloc = NULL; | 
|---|
| 1475 |  | 
|---|
| 1476 | // If we inserted any instructions between a Call and his CatchNode, | 
|---|
| 1477 | // clone the instructions on all paths below the Catch. | 
|---|
| 1478 | for (uint i = 0; i < number_of_blocks(); i++) { | 
|---|
| 1479 | Block* block = get_block(i); | 
|---|
| 1480 | call_catch_cleanup(block); | 
|---|
| 1481 | } | 
|---|
| 1482 |  | 
|---|
| 1483 | #ifndef PRODUCT | 
|---|
| 1484 | if (trace_opto_pipelining()) { | 
|---|
| 1485 | tty->print( "\n---- After GlobalCodeMotion ----\n"); | 
|---|
| 1486 | for (uint i = 0; i < number_of_blocks(); i++) { | 
|---|
| 1487 | Block* block = get_block(i); | 
|---|
| 1488 | block->dump(); | 
|---|
| 1489 | } | 
|---|
| 1490 | } | 
|---|
| 1491 | #endif | 
|---|
| 1492 | // Dead. | 
|---|
| 1493 | _node_latency = (GrowableArray<uint> *)((intptr_t)0xdeadbeef); | 
|---|
| 1494 | } | 
|---|
| 1495 |  | 
|---|
| 1496 | bool PhaseCFG::do_global_code_motion() { | 
|---|
| 1497 |  | 
|---|
| 1498 | build_dominator_tree(); | 
|---|
| 1499 | if (C->failing()) { | 
|---|
| 1500 | return false; | 
|---|
| 1501 | } | 
|---|
| 1502 |  | 
|---|
| 1503 | NOT_PRODUCT( C->verify_graph_edges(); ) | 
|---|
| 1504 |  | 
|---|
| 1505 | estimate_block_frequency(); | 
|---|
| 1506 |  | 
|---|
| 1507 | global_code_motion(); | 
|---|
| 1508 |  | 
|---|
| 1509 | if (C->failing()) { | 
|---|
| 1510 | return false; | 
|---|
| 1511 | } | 
|---|
| 1512 |  | 
|---|
| 1513 | return true; | 
|---|
| 1514 | } | 
|---|
| 1515 |  | 
|---|
| 1516 | //------------------------------Estimate_Block_Frequency----------------------- | 
|---|
| 1517 | // Estimate block frequencies based on IfNode probabilities. | 
|---|
| 1518 | void PhaseCFG::estimate_block_frequency() { | 
|---|
| 1519 |  | 
|---|
| 1520 | // Force conditional branches leading to uncommon traps to be unlikely, | 
|---|
| 1521 | // not because we get to the uncommon_trap with less relative frequency, | 
|---|
| 1522 | // but because an uncommon_trap typically causes a deopt, so we only get | 
|---|
| 1523 | // there once. | 
|---|
| 1524 | if (C->do_freq_based_layout()) { | 
|---|
| 1525 | Block_List worklist; | 
|---|
| 1526 | Block* root_blk = get_block(0); | 
|---|
| 1527 | for (uint i = 1; i < root_blk->num_preds(); i++) { | 
|---|
| 1528 | Block *pb = get_block_for_node(root_blk->pred(i)); | 
|---|
| 1529 | if (pb->has_uncommon_code()) { | 
|---|
| 1530 | worklist.push(pb); | 
|---|
| 1531 | } | 
|---|
| 1532 | } | 
|---|
| 1533 | while (worklist.size() > 0) { | 
|---|
| 1534 | Block* uct = worklist.pop(); | 
|---|
| 1535 | if (uct == get_root_block()) { | 
|---|
| 1536 | continue; | 
|---|
| 1537 | } | 
|---|
| 1538 | for (uint i = 1; i < uct->num_preds(); i++) { | 
|---|
| 1539 | Block *pb = get_block_for_node(uct->pred(i)); | 
|---|
| 1540 | if (pb->_num_succs == 1) { | 
|---|
| 1541 | worklist.push(pb); | 
|---|
| 1542 | } else if (pb->num_fall_throughs() == 2) { | 
|---|
| 1543 | pb->update_uncommon_branch(uct); | 
|---|
| 1544 | } | 
|---|
| 1545 | } | 
|---|
| 1546 | } | 
|---|
| 1547 | } | 
|---|
| 1548 |  | 
|---|
| 1549 | // Create the loop tree and calculate loop depth. | 
|---|
| 1550 | _root_loop = create_loop_tree(); | 
|---|
| 1551 | _root_loop->compute_loop_depth(0); | 
|---|
| 1552 |  | 
|---|
| 1553 | // Compute block frequency of each block, relative to a single loop entry. | 
|---|
| 1554 | _root_loop->compute_freq(); | 
|---|
| 1555 |  | 
|---|
| 1556 | // Adjust all frequencies to be relative to a single method entry | 
|---|
| 1557 | _root_loop->_freq = 1.0; | 
|---|
| 1558 | _root_loop->scale_freq(); | 
|---|
| 1559 |  | 
|---|
| 1560 | // Save outmost loop frequency for LRG frequency threshold | 
|---|
| 1561 | _outer_loop_frequency = _root_loop->outer_loop_freq(); | 
|---|
| 1562 |  | 
|---|
| 1563 | // force paths ending at uncommon traps to be infrequent | 
|---|
| 1564 | if (!C->do_freq_based_layout()) { | 
|---|
| 1565 | Block_List worklist; | 
|---|
| 1566 | Block* root_blk = get_block(0); | 
|---|
| 1567 | for (uint i = 1; i < root_blk->num_preds(); i++) { | 
|---|
| 1568 | Block *pb = get_block_for_node(root_blk->pred(i)); | 
|---|
| 1569 | if (pb->has_uncommon_code()) { | 
|---|
| 1570 | worklist.push(pb); | 
|---|
| 1571 | } | 
|---|
| 1572 | } | 
|---|
| 1573 | while (worklist.size() > 0) { | 
|---|
| 1574 | Block* uct = worklist.pop(); | 
|---|
| 1575 | uct->_freq = PROB_MIN; | 
|---|
| 1576 | for (uint i = 1; i < uct->num_preds(); i++) { | 
|---|
| 1577 | Block *pb = get_block_for_node(uct->pred(i)); | 
|---|
| 1578 | if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { | 
|---|
| 1579 | worklist.push(pb); | 
|---|
| 1580 | } | 
|---|
| 1581 | } | 
|---|
| 1582 | } | 
|---|
| 1583 | } | 
|---|
| 1584 |  | 
|---|
| 1585 | #ifdef ASSERT | 
|---|
| 1586 | for (uint i = 0; i < number_of_blocks(); i++) { | 
|---|
| 1587 | Block* b = get_block(i); | 
|---|
| 1588 | assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); | 
|---|
| 1589 | } | 
|---|
| 1590 | #endif | 
|---|
| 1591 |  | 
|---|
| 1592 | #ifndef PRODUCT | 
|---|
| 1593 | if (PrintCFGBlockFreq) { | 
|---|
| 1594 | tty->print_cr( "CFG Block Frequencies"); | 
|---|
| 1595 | _root_loop->dump_tree(); | 
|---|
| 1596 | if (Verbose) { | 
|---|
| 1597 | tty->print_cr( "PhaseCFG dump"); | 
|---|
| 1598 | dump(); | 
|---|
| 1599 | tty->print_cr( "Node dump"); | 
|---|
| 1600 | _root->dump(99999); | 
|---|
| 1601 | } | 
|---|
| 1602 | } | 
|---|
| 1603 | #endif | 
|---|
| 1604 | } | 
|---|
| 1605 |  | 
|---|
| 1606 | //----------------------------create_loop_tree-------------------------------- | 
|---|
| 1607 | // Create a loop tree from the CFG | 
|---|
| 1608 | CFGLoop* PhaseCFG::create_loop_tree() { | 
|---|
| 1609 |  | 
|---|
| 1610 | #ifdef ASSERT | 
|---|
| 1611 | assert(get_block(0) == get_root_block(), "first block should be root block"); | 
|---|
| 1612 | for (uint i = 0; i < number_of_blocks(); i++) { | 
|---|
| 1613 | Block* block = get_block(i); | 
|---|
| 1614 | // Check that _loop field are clear...we could clear them if not. | 
|---|
| 1615 | assert(block->_loop == NULL, "clear _loop expected"); | 
|---|
| 1616 | // Sanity check that the RPO numbering is reflected in the _blocks array. | 
|---|
| 1617 | // It doesn't have to be for the loop tree to be built, but if it is not, | 
|---|
| 1618 | // then the blocks have been reordered since dom graph building...which | 
|---|
| 1619 | // may question the RPO numbering | 
|---|
| 1620 | assert(block->_rpo == i, "unexpected reverse post order number"); | 
|---|
| 1621 | } | 
|---|
| 1622 | #endif | 
|---|
| 1623 |  | 
|---|
| 1624 | int idct = 0; | 
|---|
| 1625 | CFGLoop* root_loop = new CFGLoop(idct++); | 
|---|
| 1626 |  | 
|---|
| 1627 | Block_List worklist; | 
|---|
| 1628 |  | 
|---|
| 1629 | // Assign blocks to loops | 
|---|
| 1630 | for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block | 
|---|
| 1631 | Block* block = get_block(i); | 
|---|
| 1632 |  | 
|---|
| 1633 | if (block->head()->is_Loop()) { | 
|---|
| 1634 | Block* loop_head = block; | 
|---|
| 1635 | assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | 
|---|
| 1636 | Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); | 
|---|
| 1637 | Block* tail = get_block_for_node(tail_n); | 
|---|
| 1638 |  | 
|---|
| 1639 | // Defensively filter out Loop nodes for non-single-entry loops. | 
|---|
| 1640 | // For all reasonable loops, the head occurs before the tail in RPO. | 
|---|
| 1641 | if (i <= tail->_rpo) { | 
|---|
| 1642 |  | 
|---|
| 1643 | // The tail and (recursive) predecessors of the tail | 
|---|
| 1644 | // are made members of a new loop. | 
|---|
| 1645 |  | 
|---|
| 1646 | assert(worklist.size() == 0, "nonempty worklist"); | 
|---|
| 1647 | CFGLoop* nloop = new CFGLoop(idct++); | 
|---|
| 1648 | assert(loop_head->_loop == NULL, "just checking"); | 
|---|
| 1649 | loop_head->_loop = nloop; | 
|---|
| 1650 | // Add to nloop so push_pred() will skip over inner loops | 
|---|
| 1651 | nloop->add_member(loop_head); | 
|---|
| 1652 | nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this); | 
|---|
| 1653 |  | 
|---|
| 1654 | while (worklist.size() > 0) { | 
|---|
| 1655 | Block* member = worklist.pop(); | 
|---|
| 1656 | if (member != loop_head) { | 
|---|
| 1657 | for (uint j = 1; j < member->num_preds(); j++) { | 
|---|
| 1658 | nloop->push_pred(member, j, worklist, this); | 
|---|
| 1659 | } | 
|---|
| 1660 | } | 
|---|
| 1661 | } | 
|---|
| 1662 | } | 
|---|
| 1663 | } | 
|---|
| 1664 | } | 
|---|
| 1665 |  | 
|---|
| 1666 | // Create a member list for each loop consisting | 
|---|
| 1667 | // of both blocks and (immediate child) loops. | 
|---|
| 1668 | for (uint i = 0; i < number_of_blocks(); i++) { | 
|---|
| 1669 | Block* block = get_block(i); | 
|---|
| 1670 | CFGLoop* lp = block->_loop; | 
|---|
| 1671 | if (lp == NULL) { | 
|---|
| 1672 | // Not assigned to a loop. Add it to the method's pseudo loop. | 
|---|
| 1673 | block->_loop = root_loop; | 
|---|
| 1674 | lp = root_loop; | 
|---|
| 1675 | } | 
|---|
| 1676 | if (lp == root_loop || block != lp->head()) { // loop heads are already members | 
|---|
| 1677 | lp->add_member(block); | 
|---|
| 1678 | } | 
|---|
| 1679 | if (lp != root_loop) { | 
|---|
| 1680 | if (lp->parent() == NULL) { | 
|---|
| 1681 | // Not a nested loop. Make it a child of the method's pseudo loop. | 
|---|
| 1682 | root_loop->add_nested_loop(lp); | 
|---|
| 1683 | } | 
|---|
| 1684 | if (block == lp->head()) { | 
|---|
| 1685 | // Add nested loop to member list of parent loop. | 
|---|
| 1686 | lp->parent()->add_member(lp); | 
|---|
| 1687 | } | 
|---|
| 1688 | } | 
|---|
| 1689 | } | 
|---|
| 1690 |  | 
|---|
| 1691 | return root_loop; | 
|---|
| 1692 | } | 
|---|
| 1693 |  | 
|---|
| 1694 | //------------------------------push_pred-------------------------------------- | 
|---|
| 1695 | void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) { | 
|---|
| 1696 | Node* pred_n = blk->pred(i); | 
|---|
| 1697 | Block* pred = cfg->get_block_for_node(pred_n); | 
|---|
| 1698 | CFGLoop *pred_loop = pred->_loop; | 
|---|
| 1699 | if (pred_loop == NULL) { | 
|---|
| 1700 | // Filter out blocks for non-single-entry loops. | 
|---|
| 1701 | // For all reasonable loops, the head occurs before the tail in RPO. | 
|---|
| 1702 | if (pred->_rpo > head()->_rpo) { | 
|---|
| 1703 | pred->_loop = this; | 
|---|
| 1704 | worklist.push(pred); | 
|---|
| 1705 | } | 
|---|
| 1706 | } else if (pred_loop != this) { | 
|---|
| 1707 | // Nested loop. | 
|---|
| 1708 | while (pred_loop->_parent != NULL && pred_loop->_parent != this) { | 
|---|
| 1709 | pred_loop = pred_loop->_parent; | 
|---|
| 1710 | } | 
|---|
| 1711 | // Make pred's loop be a child | 
|---|
| 1712 | if (pred_loop->_parent == NULL) { | 
|---|
| 1713 | add_nested_loop(pred_loop); | 
|---|
| 1714 | // Continue with loop entry predecessor. | 
|---|
| 1715 | Block* pred_head = pred_loop->head(); | 
|---|
| 1716 | assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); | 
|---|
| 1717 | assert(pred_head != head(), "loop head in only one loop"); | 
|---|
| 1718 | push_pred(pred_head, LoopNode::EntryControl, worklist, cfg); | 
|---|
| 1719 | } else { | 
|---|
| 1720 | assert(pred_loop->_parent == this && _parent == NULL, "just checking"); | 
|---|
| 1721 | } | 
|---|
| 1722 | } | 
|---|
| 1723 | } | 
|---|
| 1724 |  | 
|---|
| 1725 | //------------------------------add_nested_loop-------------------------------- | 
|---|
| 1726 | // Make cl a child of the current loop in the loop tree. | 
|---|
| 1727 | void CFGLoop::add_nested_loop(CFGLoop* cl) { | 
|---|
| 1728 | assert(_parent == NULL, "no parent yet"); | 
|---|
| 1729 | assert(cl != this, "not my own parent"); | 
|---|
| 1730 | cl->_parent = this; | 
|---|
| 1731 | CFGLoop* ch = _child; | 
|---|
| 1732 | if (ch == NULL) { | 
|---|
| 1733 | _child = cl; | 
|---|
| 1734 | } else { | 
|---|
| 1735 | while (ch->_sibling != NULL) { ch = ch->_sibling; } | 
|---|
| 1736 | ch->_sibling = cl; | 
|---|
| 1737 | } | 
|---|
| 1738 | } | 
|---|
| 1739 |  | 
|---|
| 1740 | //------------------------------compute_loop_depth----------------------------- | 
|---|
| 1741 | // Store the loop depth in each CFGLoop object. | 
|---|
| 1742 | // Recursively walk the children to do the same for them. | 
|---|
| 1743 | void CFGLoop::compute_loop_depth(int depth) { | 
|---|
| 1744 | _depth = depth; | 
|---|
| 1745 | CFGLoop* ch = _child; | 
|---|
| 1746 | while (ch != NULL) { | 
|---|
| 1747 | ch->compute_loop_depth(depth + 1); | 
|---|
| 1748 | ch = ch->_sibling; | 
|---|
| 1749 | } | 
|---|
| 1750 | } | 
|---|
| 1751 |  | 
|---|
| 1752 | //------------------------------compute_freq----------------------------------- | 
|---|
| 1753 | // Compute the frequency of each block and loop, relative to a single entry | 
|---|
| 1754 | // into the dominating loop head. | 
|---|
| 1755 | void CFGLoop::compute_freq() { | 
|---|
| 1756 | // Bottom up traversal of loop tree (visit inner loops first.) | 
|---|
| 1757 | // Set loop head frequency to 1.0, then transitively | 
|---|
| 1758 | // compute frequency for all successors in the loop, | 
|---|
| 1759 | // as well as for each exit edge.  Inner loops are | 
|---|
| 1760 | // treated as single blocks with loop exit targets | 
|---|
| 1761 | // as the successor blocks. | 
|---|
| 1762 |  | 
|---|
| 1763 | // Nested loops first | 
|---|
| 1764 | CFGLoop* ch = _child; | 
|---|
| 1765 | while (ch != NULL) { | 
|---|
| 1766 | ch->compute_freq(); | 
|---|
| 1767 | ch = ch->_sibling; | 
|---|
| 1768 | } | 
|---|
| 1769 | assert (_members.length() > 0, "no empty loops"); | 
|---|
| 1770 | Block* hd = head(); | 
|---|
| 1771 | hd->_freq = 1.0; | 
|---|
| 1772 | for (int i = 0; i < _members.length(); i++) { | 
|---|
| 1773 | CFGElement* s = _members.at(i); | 
|---|
| 1774 | double freq = s->_freq; | 
|---|
| 1775 | if (s->is_block()) { | 
|---|
| 1776 | Block* b = s->as_Block(); | 
|---|
| 1777 | for (uint j = 0; j < b->_num_succs; j++) { | 
|---|
| 1778 | Block* sb = b->_succs[j]; | 
|---|
| 1779 | update_succ_freq(sb, freq * b->succ_prob(j)); | 
|---|
| 1780 | } | 
|---|
| 1781 | } else { | 
|---|
| 1782 | CFGLoop* lp = s->as_CFGLoop(); | 
|---|
| 1783 | assert(lp->_parent == this, "immediate child"); | 
|---|
| 1784 | for (int k = 0; k < lp->_exits.length(); k++) { | 
|---|
| 1785 | Block* eb = lp->_exits.at(k).get_target(); | 
|---|
| 1786 | double prob = lp->_exits.at(k).get_prob(); | 
|---|
| 1787 | update_succ_freq(eb, freq * prob); | 
|---|
| 1788 | } | 
|---|
| 1789 | } | 
|---|
| 1790 | } | 
|---|
| 1791 |  | 
|---|
| 1792 | // For all loops other than the outer, "method" loop, | 
|---|
| 1793 | // sum and normalize the exit probability. The "method" loop | 
|---|
| 1794 | // should keep the initial exit probability of 1, so that | 
|---|
| 1795 | // inner blocks do not get erroneously scaled. | 
|---|
| 1796 | if (_depth != 0) { | 
|---|
| 1797 | // Total the exit probabilities for this loop. | 
|---|
| 1798 | double exits_sum = 0.0f; | 
|---|
| 1799 | for (int i = 0; i < _exits.length(); i++) { | 
|---|
| 1800 | exits_sum += _exits.at(i).get_prob(); | 
|---|
| 1801 | } | 
|---|
| 1802 |  | 
|---|
| 1803 | // Normalize the exit probabilities. Until now, the | 
|---|
| 1804 | // probabilities estimate the possibility of exit per | 
|---|
| 1805 | // a single loop iteration; afterward, they estimate | 
|---|
| 1806 | // the probability of exit per loop entry. | 
|---|
| 1807 | for (int i = 0; i < _exits.length(); i++) { | 
|---|
| 1808 | Block* et = _exits.at(i).get_target(); | 
|---|
| 1809 | float new_prob = 0.0f; | 
|---|
| 1810 | if (_exits.at(i).get_prob() > 0.0f) { | 
|---|
| 1811 | new_prob = _exits.at(i).get_prob() / exits_sum; | 
|---|
| 1812 | } | 
|---|
| 1813 | BlockProbPair bpp(et, new_prob); | 
|---|
| 1814 | _exits.at_put(i, bpp); | 
|---|
| 1815 | } | 
|---|
| 1816 |  | 
|---|
| 1817 | // Save the total, but guard against unreasonable probability, | 
|---|
| 1818 | // as the value is used to estimate the loop trip count. | 
|---|
| 1819 | // An infinite trip count would blur relative block | 
|---|
| 1820 | // frequencies. | 
|---|
| 1821 | if (exits_sum > 1.0f) exits_sum = 1.0; | 
|---|
| 1822 | if (exits_sum < PROB_MIN) exits_sum = PROB_MIN; | 
|---|
| 1823 | _exit_prob = exits_sum; | 
|---|
| 1824 | } | 
|---|
| 1825 | } | 
|---|
| 1826 |  | 
|---|
| 1827 | //------------------------------succ_prob------------------------------------- | 
|---|
| 1828 | // Determine the probability of reaching successor 'i' from the receiver block. | 
|---|
| 1829 | float Block::succ_prob(uint i) { | 
|---|
| 1830 | int eidx = end_idx(); | 
|---|
| 1831 | Node *n = get_node(eidx);  // Get ending Node | 
|---|
| 1832 |  | 
|---|
| 1833 | int op = n->Opcode(); | 
|---|
| 1834 | if (n->is_Mach()) { | 
|---|
| 1835 | if (n->is_MachNullCheck()) { | 
|---|
| 1836 | // Can only reach here if called after lcm. The original Op_If is gone, | 
|---|
| 1837 | // so we attempt to infer the probability from one or both of the | 
|---|
| 1838 | // successor blocks. | 
|---|
| 1839 | assert(_num_succs == 2, "expecting 2 successors of a null check"); | 
|---|
| 1840 | // If either successor has only one predecessor, then the | 
|---|
| 1841 | // probability estimate can be derived using the | 
|---|
| 1842 | // relative frequency of the successor and this block. | 
|---|
| 1843 | if (_succs[i]->num_preds() == 2) { | 
|---|
| 1844 | return _succs[i]->_freq / _freq; | 
|---|
| 1845 | } else if (_succs[1-i]->num_preds() == 2) { | 
|---|
| 1846 | return 1 - (_succs[1-i]->_freq / _freq); | 
|---|
| 1847 | } else { | 
|---|
| 1848 | // Estimate using both successor frequencies | 
|---|
| 1849 | float freq = _succs[i]->_freq; | 
|---|
| 1850 | return freq / (freq + _succs[1-i]->_freq); | 
|---|
| 1851 | } | 
|---|
| 1852 | } | 
|---|
| 1853 | op = n->as_Mach()->ideal_Opcode(); | 
|---|
| 1854 | } | 
|---|
| 1855 |  | 
|---|
| 1856 |  | 
|---|
| 1857 | // Switch on branch type | 
|---|
| 1858 | switch( op ) { | 
|---|
| 1859 | case Op_CountedLoopEnd: | 
|---|
| 1860 | case Op_If: { | 
|---|
| 1861 | assert (i < 2, "just checking"); | 
|---|
| 1862 | // Conditionals pass on only part of their frequency | 
|---|
| 1863 | float prob  = n->as_MachIf()->_prob; | 
|---|
| 1864 | assert(prob >= 0.0 && prob <= 1.0, "out of range probability"); | 
|---|
| 1865 | // If succ[i] is the FALSE branch, invert path info | 
|---|
| 1866 | if( get_node(i + eidx + 1)->Opcode() == Op_IfFalse ) { | 
|---|
| 1867 | return 1.0f - prob; // not taken | 
|---|
| 1868 | } else { | 
|---|
| 1869 | return prob; // taken | 
|---|
| 1870 | } | 
|---|
| 1871 | } | 
|---|
| 1872 |  | 
|---|
| 1873 | case Op_Jump: | 
|---|
| 1874 | return n->as_MachJump()->_probs[get_node(i + eidx + 1)->as_JumpProj()->_con]; | 
|---|
| 1875 |  | 
|---|
| 1876 | case Op_Catch: { | 
|---|
| 1877 | const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj(); | 
|---|
| 1878 | if (ci->_con == CatchProjNode::fall_through_index) { | 
|---|
| 1879 | // Fall-thru path gets the lion's share. | 
|---|
| 1880 | return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs; | 
|---|
| 1881 | } else { | 
|---|
| 1882 | // Presume exceptional paths are equally unlikely | 
|---|
| 1883 | return PROB_UNLIKELY_MAG(5); | 
|---|
| 1884 | } | 
|---|
| 1885 | } | 
|---|
| 1886 |  | 
|---|
| 1887 | case Op_Root: | 
|---|
| 1888 | case Op_Goto: | 
|---|
| 1889 | // Pass frequency straight thru to target | 
|---|
| 1890 | return 1.0f; | 
|---|
| 1891 |  | 
|---|
| 1892 | case Op_NeverBranch: | 
|---|
| 1893 | return 0.0f; | 
|---|
| 1894 |  | 
|---|
| 1895 | case Op_TailCall: | 
|---|
| 1896 | case Op_TailJump: | 
|---|
| 1897 | case Op_Return: | 
|---|
| 1898 | case Op_Halt: | 
|---|
| 1899 | case Op_Rethrow: | 
|---|
| 1900 | // Do not push out freq to root block | 
|---|
| 1901 | return 0.0f; | 
|---|
| 1902 |  | 
|---|
| 1903 | default: | 
|---|
| 1904 | ShouldNotReachHere(); | 
|---|
| 1905 | } | 
|---|
| 1906 |  | 
|---|
| 1907 | return 0.0f; | 
|---|
| 1908 | } | 
|---|
| 1909 |  | 
|---|
| 1910 | //------------------------------num_fall_throughs----------------------------- | 
|---|
| 1911 | // Return the number of fall-through candidates for a block | 
|---|
| 1912 | int Block::num_fall_throughs() { | 
|---|
| 1913 | int eidx = end_idx(); | 
|---|
| 1914 | Node *n = get_node(eidx);  // Get ending Node | 
|---|
| 1915 |  | 
|---|
| 1916 | int op = n->Opcode(); | 
|---|
| 1917 | if (n->is_Mach()) { | 
|---|
| 1918 | if (n->is_MachNullCheck()) { | 
|---|
| 1919 | // In theory, either side can fall-thru, for simplicity sake, | 
|---|
| 1920 | // let's say only the false branch can now. | 
|---|
| 1921 | return 1; | 
|---|
| 1922 | } | 
|---|
| 1923 | op = n->as_Mach()->ideal_Opcode(); | 
|---|
| 1924 | } | 
|---|
| 1925 |  | 
|---|
| 1926 | // Switch on branch type | 
|---|
| 1927 | switch( op ) { | 
|---|
| 1928 | case Op_CountedLoopEnd: | 
|---|
| 1929 | case Op_If: | 
|---|
| 1930 | return 2; | 
|---|
| 1931 |  | 
|---|
| 1932 | case Op_Root: | 
|---|
| 1933 | case Op_Goto: | 
|---|
| 1934 | return 1; | 
|---|
| 1935 |  | 
|---|
| 1936 | case Op_Catch: { | 
|---|
| 1937 | for (uint i = 0; i < _num_succs; i++) { | 
|---|
| 1938 | const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj(); | 
|---|
| 1939 | if (ci->_con == CatchProjNode::fall_through_index) { | 
|---|
| 1940 | return 1; | 
|---|
| 1941 | } | 
|---|
| 1942 | } | 
|---|
| 1943 | return 0; | 
|---|
| 1944 | } | 
|---|
| 1945 |  | 
|---|
| 1946 | case Op_Jump: | 
|---|
| 1947 | case Op_NeverBranch: | 
|---|
| 1948 | case Op_TailCall: | 
|---|
| 1949 | case Op_TailJump: | 
|---|
| 1950 | case Op_Return: | 
|---|
| 1951 | case Op_Halt: | 
|---|
| 1952 | case Op_Rethrow: | 
|---|
| 1953 | return 0; | 
|---|
| 1954 |  | 
|---|
| 1955 | default: | 
|---|
| 1956 | ShouldNotReachHere(); | 
|---|
| 1957 | } | 
|---|
| 1958 |  | 
|---|
| 1959 | return 0; | 
|---|
| 1960 | } | 
|---|
| 1961 |  | 
|---|
| 1962 | //------------------------------succ_fall_through----------------------------- | 
|---|
| 1963 | // Return true if a specific successor could be fall-through target. | 
|---|
| 1964 | bool Block::succ_fall_through(uint i) { | 
|---|
| 1965 | int eidx = end_idx(); | 
|---|
| 1966 | Node *n = get_node(eidx);  // Get ending Node | 
|---|
| 1967 |  | 
|---|
| 1968 | int op = n->Opcode(); | 
|---|
| 1969 | if (n->is_Mach()) { | 
|---|
| 1970 | if (n->is_MachNullCheck()) { | 
|---|
| 1971 | // In theory, either side can fall-thru, for simplicity sake, | 
|---|
| 1972 | // let's say only the false branch can now. | 
|---|
| 1973 | return get_node(i + eidx + 1)->Opcode() == Op_IfFalse; | 
|---|
| 1974 | } | 
|---|
| 1975 | op = n->as_Mach()->ideal_Opcode(); | 
|---|
| 1976 | } | 
|---|
| 1977 |  | 
|---|
| 1978 | // Switch on branch type | 
|---|
| 1979 | switch( op ) { | 
|---|
| 1980 | case Op_CountedLoopEnd: | 
|---|
| 1981 | case Op_If: | 
|---|
| 1982 | case Op_Root: | 
|---|
| 1983 | case Op_Goto: | 
|---|
| 1984 | return true; | 
|---|
| 1985 |  | 
|---|
| 1986 | case Op_Catch: { | 
|---|
| 1987 | const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj(); | 
|---|
| 1988 | return ci->_con == CatchProjNode::fall_through_index; | 
|---|
| 1989 | } | 
|---|
| 1990 |  | 
|---|
| 1991 | case Op_Jump: | 
|---|
| 1992 | case Op_NeverBranch: | 
|---|
| 1993 | case Op_TailCall: | 
|---|
| 1994 | case Op_TailJump: | 
|---|
| 1995 | case Op_Return: | 
|---|
| 1996 | case Op_Halt: | 
|---|
| 1997 | case Op_Rethrow: | 
|---|
| 1998 | return false; | 
|---|
| 1999 |  | 
|---|
| 2000 | default: | 
|---|
| 2001 | ShouldNotReachHere(); | 
|---|
| 2002 | } | 
|---|
| 2003 |  | 
|---|
| 2004 | return false; | 
|---|
| 2005 | } | 
|---|
| 2006 |  | 
|---|
| 2007 | //------------------------------update_uncommon_branch------------------------ | 
|---|
| 2008 | // Update the probability of a two-branch to be uncommon | 
|---|
| 2009 | void Block::update_uncommon_branch(Block* ub) { | 
|---|
| 2010 | int eidx = end_idx(); | 
|---|
| 2011 | Node *n = get_node(eidx);  // Get ending Node | 
|---|
| 2012 |  | 
|---|
| 2013 | int op = n->as_Mach()->ideal_Opcode(); | 
|---|
| 2014 |  | 
|---|
| 2015 | assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If"); | 
|---|
| 2016 | assert(num_fall_throughs() == 2, "must be a two way branch block"); | 
|---|
| 2017 |  | 
|---|
| 2018 | // Which successor is ub? | 
|---|
| 2019 | uint s; | 
|---|
| 2020 | for (s = 0; s <_num_succs; s++) { | 
|---|
| 2021 | if (_succs[s] == ub) break; | 
|---|
| 2022 | } | 
|---|
| 2023 | assert(s < 2, "uncommon successor must be found"); | 
|---|
| 2024 |  | 
|---|
| 2025 | // If ub is the true path, make the proability small, else | 
|---|
| 2026 | // ub is the false path, and make the probability large | 
|---|
| 2027 | bool invert = (get_node(s + eidx + 1)->Opcode() == Op_IfFalse); | 
|---|
| 2028 |  | 
|---|
| 2029 | // Get existing probability | 
|---|
| 2030 | float p = n->as_MachIf()->_prob; | 
|---|
| 2031 |  | 
|---|
| 2032 | if (invert) p = 1.0 - p; | 
|---|
| 2033 | if (p > PROB_MIN) { | 
|---|
| 2034 | p = PROB_MIN; | 
|---|
| 2035 | } | 
|---|
| 2036 | if (invert) p = 1.0 - p; | 
|---|
| 2037 |  | 
|---|
| 2038 | n->as_MachIf()->_prob = p; | 
|---|
| 2039 | } | 
|---|
| 2040 |  | 
|---|
| 2041 | //------------------------------update_succ_freq------------------------------- | 
|---|
| 2042 | // Update the appropriate frequency associated with block 'b', a successor of | 
|---|
| 2043 | // a block in this loop. | 
|---|
| 2044 | void CFGLoop::update_succ_freq(Block* b, double freq) { | 
|---|
| 2045 | if (b->_loop == this) { | 
|---|
| 2046 | if (b == head()) { | 
|---|
| 2047 | // back branch within the loop | 
|---|
| 2048 | // Do nothing now, the loop carried frequency will be | 
|---|
| 2049 | // adjust later in scale_freq(). | 
|---|
| 2050 | } else { | 
|---|
| 2051 | // simple branch within the loop | 
|---|
| 2052 | b->_freq += freq; | 
|---|
| 2053 | } | 
|---|
| 2054 | } else if (!in_loop_nest(b)) { | 
|---|
| 2055 | // branch is exit from this loop | 
|---|
| 2056 | BlockProbPair bpp(b, freq); | 
|---|
| 2057 | _exits.append(bpp); | 
|---|
| 2058 | } else { | 
|---|
| 2059 | // branch into nested loop | 
|---|
| 2060 | CFGLoop* ch = b->_loop; | 
|---|
| 2061 | ch->_freq += freq; | 
|---|
| 2062 | } | 
|---|
| 2063 | } | 
|---|
| 2064 |  | 
|---|
| 2065 | //------------------------------in_loop_nest----------------------------------- | 
|---|
| 2066 | // Determine if block b is in the receiver's loop nest. | 
|---|
| 2067 | bool CFGLoop::in_loop_nest(Block* b) { | 
|---|
| 2068 | int depth = _depth; | 
|---|
| 2069 | CFGLoop* b_loop = b->_loop; | 
|---|
| 2070 | int b_depth = b_loop->_depth; | 
|---|
| 2071 | if (depth == b_depth) { | 
|---|
| 2072 | return true; | 
|---|
| 2073 | } | 
|---|
| 2074 | while (b_depth > depth) { | 
|---|
| 2075 | b_loop = b_loop->_parent; | 
|---|
| 2076 | b_depth = b_loop->_depth; | 
|---|
| 2077 | } | 
|---|
| 2078 | return b_loop == this; | 
|---|
| 2079 | } | 
|---|
| 2080 |  | 
|---|
| 2081 | //------------------------------scale_freq------------------------------------- | 
|---|
| 2082 | // Scale frequency of loops and blocks by trip counts from outer loops | 
|---|
| 2083 | // Do a top down traversal of loop tree (visit outer loops first.) | 
|---|
| 2084 | void CFGLoop::scale_freq() { | 
|---|
| 2085 | double loop_freq = _freq * trip_count(); | 
|---|
| 2086 | _freq = loop_freq; | 
|---|
| 2087 | for (int i = 0; i < _members.length(); i++) { | 
|---|
| 2088 | CFGElement* s = _members.at(i); | 
|---|
| 2089 | double block_freq = s->_freq * loop_freq; | 
|---|
| 2090 | if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY) | 
|---|
| 2091 | block_freq = MIN_BLOCK_FREQUENCY; | 
|---|
| 2092 | s->_freq = block_freq; | 
|---|
| 2093 | } | 
|---|
| 2094 | CFGLoop* ch = _child; | 
|---|
| 2095 | while (ch != NULL) { | 
|---|
| 2096 | ch->scale_freq(); | 
|---|
| 2097 | ch = ch->_sibling; | 
|---|
| 2098 | } | 
|---|
| 2099 | } | 
|---|
| 2100 |  | 
|---|
| 2101 | // Frequency of outer loop | 
|---|
| 2102 | double CFGLoop::outer_loop_freq() const { | 
|---|
| 2103 | if (_child != NULL) { | 
|---|
| 2104 | return _child->_freq; | 
|---|
| 2105 | } | 
|---|
| 2106 | return _freq; | 
|---|
| 2107 | } | 
|---|
| 2108 |  | 
|---|
| 2109 | #ifndef PRODUCT | 
|---|
| 2110 | //------------------------------dump_tree-------------------------------------- | 
|---|
| 2111 | void CFGLoop::dump_tree() const { | 
|---|
| 2112 | dump(); | 
|---|
| 2113 | if (_child != NULL)   _child->dump_tree(); | 
|---|
| 2114 | if (_sibling != NULL) _sibling->dump_tree(); | 
|---|
| 2115 | } | 
|---|
| 2116 |  | 
|---|
| 2117 | //------------------------------dump------------------------------------------- | 
|---|
| 2118 | void CFGLoop::dump() const { | 
|---|
| 2119 | for (int i = 0; i < _depth; i++) tty->print( "   "); | 
|---|
| 2120 | tty->print( "%s: %d  trip_count: %6.0f freq: %6.0f\n", | 
|---|
| 2121 | _depth == 0 ? "Method": "Loop", _id, trip_count(), _freq); | 
|---|
| 2122 | for (int i = 0; i < _depth; i++) tty->print( "   "); | 
|---|
| 2123 | tty->print( "         members:"); | 
|---|
| 2124 | int k = 0; | 
|---|
| 2125 | for (int i = 0; i < _members.length(); i++) { | 
|---|
| 2126 | if (k++ >= 6) { | 
|---|
| 2127 | tty->print( "\n              "); | 
|---|
| 2128 | for (int j = 0; j < _depth+1; j++) tty->print( "   "); | 
|---|
| 2129 | k = 0; | 
|---|
| 2130 | } | 
|---|
| 2131 | CFGElement *s = _members.at(i); | 
|---|
| 2132 | if (s->is_block()) { | 
|---|
| 2133 | Block *b = s->as_Block(); | 
|---|
| 2134 | tty->print( " B%d(%6.3f)", b->_pre_order, b->_freq); | 
|---|
| 2135 | } else { | 
|---|
| 2136 | CFGLoop* lp = s->as_CFGLoop(); | 
|---|
| 2137 | tty->print( " L%d(%6.3f)", lp->_id, lp->_freq); | 
|---|
| 2138 | } | 
|---|
| 2139 | } | 
|---|
| 2140 | tty->print( "\n"); | 
|---|
| 2141 | for (int i = 0; i < _depth; i++) tty->print( "   "); | 
|---|
| 2142 | tty->print( "         exits:  "); | 
|---|
| 2143 | k = 0; | 
|---|
| 2144 | for (int i = 0; i < _exits.length(); i++) { | 
|---|
| 2145 | if (k++ >= 7) { | 
|---|
| 2146 | tty->print( "\n              "); | 
|---|
| 2147 | for (int j = 0; j < _depth+1; j++) tty->print( "   "); | 
|---|
| 2148 | k = 0; | 
|---|
| 2149 | } | 
|---|
| 2150 | Block *blk = _exits.at(i).get_target(); | 
|---|
| 2151 | double prob = _exits.at(i).get_prob(); | 
|---|
| 2152 | tty->print( " ->%d@%d%%", blk->_pre_order, (int)(prob*100)); | 
|---|
| 2153 | } | 
|---|
| 2154 | tty->print( "\n"); | 
|---|
| 2155 | } | 
|---|
| 2156 | #endif | 
|---|
| 2157 |  | 
|---|