| 1 | /* |
| 2 | * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #include "precompiled.hpp" |
| 26 | #include "compiler/oopMap.hpp" |
| 27 | #include "memory/allocation.inline.hpp" |
| 28 | #include "memory/resourceArea.hpp" |
| 29 | #include "opto/addnode.hpp" |
| 30 | #include "opto/block.hpp" |
| 31 | #include "opto/callnode.hpp" |
| 32 | #include "opto/cfgnode.hpp" |
| 33 | #include "opto/chaitin.hpp" |
| 34 | #include "opto/coalesce.hpp" |
| 35 | #include "opto/indexSet.hpp" |
| 36 | #include "opto/machnode.hpp" |
| 37 | #include "opto/memnode.hpp" |
| 38 | #include "opto/opcodes.hpp" |
| 39 | |
| 40 | PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) { |
| 41 | } |
| 42 | |
| 43 | void PhaseIFG::init( uint maxlrg ) { |
| 44 | _maxlrg = maxlrg; |
| 45 | _yanked = new (_arena) VectorSet(_arena); |
| 46 | _is_square = false; |
| 47 | // Make uninitialized adjacency lists |
| 48 | _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg); |
| 49 | // Also make empty live range structures |
| 50 | _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) ); |
| 51 | memset((void*)_lrgs,0,sizeof(LRG)*maxlrg); |
| 52 | // Init all to empty |
| 53 | for( uint i = 0; i < maxlrg; i++ ) { |
| 54 | _adjs[i].initialize(maxlrg); |
| 55 | _lrgs[i].Set_All(); |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | // Add edge between vertices a & b. These are sorted (triangular matrix), |
| 60 | // then the smaller number is inserted in the larger numbered array. |
| 61 | int PhaseIFG::add_edge( uint a, uint b ) { |
| 62 | lrgs(a).invalid_degree(); |
| 63 | lrgs(b).invalid_degree(); |
| 64 | // Sort a and b, so that a is bigger |
| 65 | assert( !_is_square, "only on triangular" ); |
| 66 | if( a < b ) { uint tmp = a; a = b; b = tmp; } |
| 67 | return _adjs[a].insert( b ); |
| 68 | } |
| 69 | |
| 70 | // Is there an edge between a and b? |
| 71 | int PhaseIFG::test_edge( uint a, uint b ) const { |
| 72 | // Sort a and b, so that a is larger |
| 73 | assert( !_is_square, "only on triangular" ); |
| 74 | if( a < b ) { uint tmp = a; a = b; b = tmp; } |
| 75 | return _adjs[a].member(b); |
| 76 | } |
| 77 | |
| 78 | // Convert triangular matrix to square matrix |
| 79 | void PhaseIFG::SquareUp() { |
| 80 | assert( !_is_square, "only on triangular" ); |
| 81 | |
| 82 | // Simple transpose |
| 83 | for( uint i = 0; i < _maxlrg; i++ ) { |
| 84 | IndexSetIterator elements(&_adjs[i]); |
| 85 | uint datum; |
| 86 | while ((datum = elements.next()) != 0) { |
| 87 | _adjs[datum].insert( i ); |
| 88 | } |
| 89 | } |
| 90 | _is_square = true; |
| 91 | } |
| 92 | |
| 93 | // Compute effective degree in bulk |
| 94 | void PhaseIFG::Compute_Effective_Degree() { |
| 95 | assert( _is_square, "only on square" ); |
| 96 | |
| 97 | for( uint i = 0; i < _maxlrg; i++ ) |
| 98 | lrgs(i).set_degree(effective_degree(i)); |
| 99 | } |
| 100 | |
| 101 | int PhaseIFG::test_edge_sq( uint a, uint b ) const { |
| 102 | assert( _is_square, "only on square" ); |
| 103 | // Swap, so that 'a' has the lesser count. Then binary search is on |
| 104 | // the smaller of a's list and b's list. |
| 105 | if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; } |
| 106 | //return _adjs[a].unordered_member(b); |
| 107 | return _adjs[a].member(b); |
| 108 | } |
| 109 | |
| 110 | // Union edges of B into A |
| 111 | void PhaseIFG::Union( uint a, uint b ) { |
| 112 | assert( _is_square, "only on square" ); |
| 113 | IndexSet *A = &_adjs[a]; |
| 114 | IndexSetIterator b_elements(&_adjs[b]); |
| 115 | uint datum; |
| 116 | while ((datum = b_elements.next()) != 0) { |
| 117 | if(A->insert(datum)) { |
| 118 | _adjs[datum].insert(a); |
| 119 | lrgs(a).invalid_degree(); |
| 120 | lrgs(datum).invalid_degree(); |
| 121 | } |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | // Yank a Node and all connected edges from the IFG. Return a |
| 126 | // list of neighbors (edges) yanked. |
| 127 | IndexSet *PhaseIFG::remove_node( uint a ) { |
| 128 | assert( _is_square, "only on square" ); |
| 129 | assert( !_yanked->test(a), "" ); |
| 130 | _yanked->set(a); |
| 131 | |
| 132 | // I remove the LRG from all neighbors. |
| 133 | IndexSetIterator elements(&_adjs[a]); |
| 134 | LRG &lrg_a = lrgs(a); |
| 135 | uint datum; |
| 136 | while ((datum = elements.next()) != 0) { |
| 137 | _adjs[datum].remove(a); |
| 138 | lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) ); |
| 139 | } |
| 140 | return neighbors(a); |
| 141 | } |
| 142 | |
| 143 | // Re-insert a yanked Node. |
| 144 | void PhaseIFG::re_insert( uint a ) { |
| 145 | assert( _is_square, "only on square" ); |
| 146 | assert( _yanked->test(a), "" ); |
| 147 | (*_yanked) >>= a; |
| 148 | |
| 149 | IndexSetIterator elements(&_adjs[a]); |
| 150 | uint datum; |
| 151 | while ((datum = elements.next()) != 0) { |
| 152 | _adjs[datum].insert(a); |
| 153 | lrgs(datum).invalid_degree(); |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | // Compute the degree between 2 live ranges. If both live ranges are |
| 158 | // aligned-adjacent powers-of-2 then we use the MAX size. If either is |
| 159 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to |
| 160 | // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why |
| 161 | // this is so. |
| 162 | int LRG::compute_degree( LRG &l ) const { |
| 163 | int tmp; |
| 164 | int num_regs = _num_regs; |
| 165 | int nregs = l.num_regs(); |
| 166 | tmp = (_fat_proj || l._fat_proj) // either is a fat-proj? |
| 167 | ? (num_regs * nregs) // then use product |
| 168 | : MAX2(num_regs,nregs); // else use max |
| 169 | return tmp; |
| 170 | } |
| 171 | |
| 172 | // Compute effective degree for this live range. If both live ranges are |
| 173 | // aligned-adjacent powers-of-2 then we use the MAX size. If either is |
| 174 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to |
| 175 | // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why |
| 176 | // this is so. |
| 177 | int PhaseIFG::effective_degree( uint lidx ) const { |
| 178 | int eff = 0; |
| 179 | int num_regs = lrgs(lidx).num_regs(); |
| 180 | int fat_proj = lrgs(lidx)._fat_proj; |
| 181 | IndexSet *s = neighbors(lidx); |
| 182 | IndexSetIterator elements(s); |
| 183 | uint nidx; |
| 184 | while((nidx = elements.next()) != 0) { |
| 185 | LRG &lrgn = lrgs(nidx); |
| 186 | int nregs = lrgn.num_regs(); |
| 187 | eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj? |
| 188 | ? (num_regs * nregs) // then use product |
| 189 | : MAX2(num_regs,nregs); // else use max |
| 190 | } |
| 191 | return eff; |
| 192 | } |
| 193 | |
| 194 | |
| 195 | #ifndef PRODUCT |
| 196 | void PhaseIFG::dump() const { |
| 197 | tty->print_cr("-- Interference Graph --%s--" , |
| 198 | _is_square ? "square" : "triangular" ); |
| 199 | if( _is_square ) { |
| 200 | for( uint i = 0; i < _maxlrg; i++ ) { |
| 201 | tty->print( (*_yanked)[i] ? "XX " : " " ); |
| 202 | tty->print("L%d: { " ,i); |
| 203 | IndexSetIterator elements(&_adjs[i]); |
| 204 | uint datum; |
| 205 | while ((datum = elements.next()) != 0) { |
| 206 | tty->print("L%d " , datum); |
| 207 | } |
| 208 | tty->print_cr("}" ); |
| 209 | |
| 210 | } |
| 211 | return; |
| 212 | } |
| 213 | |
| 214 | // Triangular |
| 215 | for( uint i = 0; i < _maxlrg; i++ ) { |
| 216 | uint j; |
| 217 | tty->print( (*_yanked)[i] ? "XX " : " " ); |
| 218 | tty->print("L%d: { " ,i); |
| 219 | for( j = _maxlrg; j > i; j-- ) |
| 220 | if( test_edge(j - 1,i) ) { |
| 221 | tty->print("L%d " ,j - 1); |
| 222 | } |
| 223 | tty->print("| " ); |
| 224 | IndexSetIterator elements(&_adjs[i]); |
| 225 | uint datum; |
| 226 | while ((datum = elements.next()) != 0) { |
| 227 | tty->print("L%d " , datum); |
| 228 | } |
| 229 | tty->print("}\n" ); |
| 230 | } |
| 231 | tty->print("\n" ); |
| 232 | } |
| 233 | |
| 234 | void PhaseIFG::stats() const { |
| 235 | ResourceMark rm; |
| 236 | int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2); |
| 237 | memset( h_cnt, 0, sizeof(int)*_maxlrg*2 ); |
| 238 | uint i; |
| 239 | for( i = 0; i < _maxlrg; i++ ) { |
| 240 | h_cnt[neighbor_cnt(i)]++; |
| 241 | } |
| 242 | tty->print_cr("--Histogram of counts--" ); |
| 243 | for( i = 0; i < _maxlrg*2; i++ ) |
| 244 | if( h_cnt[i] ) |
| 245 | tty->print("%d/%d " ,i,h_cnt[i]); |
| 246 | tty->cr(); |
| 247 | } |
| 248 | |
| 249 | void PhaseIFG::verify( const PhaseChaitin *pc ) const { |
| 250 | // IFG is square, sorted and no need for Find |
| 251 | for( uint i = 0; i < _maxlrg; i++ ) { |
| 252 | assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" ); |
| 253 | IndexSet *set = &_adjs[i]; |
| 254 | IndexSetIterator elements(set); |
| 255 | uint idx; |
| 256 | uint last = 0; |
| 257 | while ((idx = elements.next()) != 0) { |
| 258 | assert(idx != i, "Must have empty diagonal" ); |
| 259 | assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find" ); |
| 260 | assert(_adjs[idx].member(i), "IFG not square" ); |
| 261 | assert(!(*_yanked)[idx], "No yanked neighbors" ); |
| 262 | assert(last < idx, "not sorted increasing" ); |
| 263 | last = idx; |
| 264 | } |
| 265 | assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong" ); |
| 266 | } |
| 267 | } |
| 268 | #endif |
| 269 | |
| 270 | /* |
| 271 | * Interfere this register with everything currently live. |
| 272 | * Check for interference by checking overlap of regmasks. |
| 273 | * Only interfere if acceptable register masks overlap. |
| 274 | */ |
| 275 | void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) { |
| 276 | LRG& lrg = lrgs(lid); |
| 277 | const RegMask& rm = lrg.mask(); |
| 278 | IndexSetIterator elements(liveout); |
| 279 | uint interfering_lid = elements.next(); |
| 280 | while (interfering_lid != 0) { |
| 281 | LRG& interfering_lrg = lrgs(interfering_lid); |
| 282 | if (rm.overlap(interfering_lrg.mask())) { |
| 283 | _ifg->add_edge(lid, interfering_lid); |
| 284 | } |
| 285 | interfering_lid = elements.next(); |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | // Actually build the interference graph. Uses virtual registers only, no |
| 290 | // physical register masks. This allows me to be very aggressive when |
| 291 | // coalescing copies. Some of this aggressiveness will have to be undone |
| 292 | // later, but I'd rather get all the copies I can now (since unremoved copies |
| 293 | // at this point can end up in bad places). Copies I re-insert later I have |
| 294 | // more opportunity to insert them in low-frequency locations. |
| 295 | void PhaseChaitin::build_ifg_virtual( ) { |
| 296 | Compile::TracePhase tp("buildIFG_virt" , &timers[_t_buildIFGvirtual]); |
| 297 | |
| 298 | // For all blocks (in any order) do... |
| 299 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { |
| 300 | Block* block = _cfg.get_block(i); |
| 301 | IndexSet* liveout = _live->live(block); |
| 302 | |
| 303 | // The IFG is built by a single reverse pass over each basic block. |
| 304 | // Starting with the known live-out set, we remove things that get |
| 305 | // defined and add things that become live (essentially executing one |
| 306 | // pass of a standard LIVE analysis). Just before a Node defines a value |
| 307 | // (and removes it from the live-ness set) that value is certainly live. |
| 308 | // The defined value interferes with everything currently live. The |
| 309 | // value is then removed from the live-ness set and it's inputs are |
| 310 | // added to the live-ness set. |
| 311 | for (uint j = block->end_idx() + 1; j > 1; j--) { |
| 312 | Node* n = block->get_node(j - 1); |
| 313 | |
| 314 | // Get value being defined |
| 315 | uint r = _lrg_map.live_range_id(n); |
| 316 | |
| 317 | // Some special values do not allocate |
| 318 | if (r) { |
| 319 | |
| 320 | // Remove from live-out set |
| 321 | liveout->remove(r); |
| 322 | |
| 323 | // Copies do not define a new value and so do not interfere. |
| 324 | // Remove the copies source from the liveout set before interfering. |
| 325 | uint idx = n->is_Copy(); |
| 326 | if (idx != 0) { |
| 327 | liveout->remove(_lrg_map.live_range_id(n->in(idx))); |
| 328 | } |
| 329 | |
| 330 | // Interfere with everything live |
| 331 | interfere_with_live(r, liveout); |
| 332 | } |
| 333 | |
| 334 | // Make all inputs live |
| 335 | if (!n->is_Phi()) { // Phi function uses come from prior block |
| 336 | for(uint k = 1; k < n->req(); k++) { |
| 337 | liveout->insert(_lrg_map.live_range_id(n->in(k))); |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | // 2-address instructions always have the defined value live |
| 342 | // on entry to the instruction, even though it is being defined |
| 343 | // by the instruction. We pretend a virtual copy sits just prior |
| 344 | // to the instruction and kills the src-def'd register. |
| 345 | // In other words, for 2-address instructions the defined value |
| 346 | // interferes with all inputs. |
| 347 | uint idx; |
| 348 | if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) { |
| 349 | const MachNode *mach = n->as_Mach(); |
| 350 | // Sometimes my 2-address ADDs are commuted in a bad way. |
| 351 | // We generally want the USE-DEF register to refer to the |
| 352 | // loop-varying quantity, to avoid a copy. |
| 353 | uint op = mach->ideal_Opcode(); |
| 354 | // Check that mach->num_opnds() == 3 to ensure instruction is |
| 355 | // not subsuming constants, effectively excludes addI_cin_imm |
| 356 | // Can NOT swap for instructions like addI_cin_imm since it |
| 357 | // is adding zero to yhi + carry and the second ideal-input |
| 358 | // points to the result of adding low-halves. |
| 359 | // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm |
| 360 | if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) && |
| 361 | n->in(1)->bottom_type()->base() == Type::Int && |
| 362 | // See if the ADD is involved in a tight data loop the wrong way |
| 363 | n->in(2)->is_Phi() && |
| 364 | n->in(2)->in(2) == n ) { |
| 365 | Node *tmp = n->in(1); |
| 366 | n->set_req( 1, n->in(2) ); |
| 367 | n->set_req( 2, tmp ); |
| 368 | } |
| 369 | // Defined value interferes with all inputs |
| 370 | uint lidx = _lrg_map.live_range_id(n->in(idx)); |
| 371 | for (uint k = 1; k < n->req(); k++) { |
| 372 | uint kidx = _lrg_map.live_range_id(n->in(k)); |
| 373 | if (kidx != lidx) { |
| 374 | _ifg->add_edge(r, kidx); |
| 375 | } |
| 376 | } |
| 377 | } |
| 378 | } // End of forall instructions in block |
| 379 | } // End of forall blocks |
| 380 | } |
| 381 | |
| 382 | #ifdef ASSERT |
| 383 | uint PhaseChaitin::count_int_pressure(IndexSet* liveout) { |
| 384 | IndexSetIterator elements(liveout); |
| 385 | uint lidx = elements.next(); |
| 386 | uint cnt = 0; |
| 387 | while (lidx != 0) { |
| 388 | LRG& lrg = lrgs(lidx); |
| 389 | if (lrg.mask_is_nonempty_and_up() && |
| 390 | !lrg.is_float_or_vector() && |
| 391 | lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { |
| 392 | cnt += lrg.reg_pressure(); |
| 393 | } |
| 394 | lidx = elements.next(); |
| 395 | } |
| 396 | return cnt; |
| 397 | } |
| 398 | |
| 399 | uint PhaseChaitin::count_float_pressure(IndexSet* liveout) { |
| 400 | IndexSetIterator elements(liveout); |
| 401 | uint lidx = elements.next(); |
| 402 | uint cnt = 0; |
| 403 | while (lidx != 0) { |
| 404 | LRG& lrg = lrgs(lidx); |
| 405 | if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) { |
| 406 | cnt += lrg.reg_pressure(); |
| 407 | } |
| 408 | lidx = elements.next(); |
| 409 | } |
| 410 | return cnt; |
| 411 | } |
| 412 | #endif |
| 413 | |
| 414 | /* |
| 415 | * Adjust register pressure down by 1. Capture last hi-to-low transition, |
| 416 | */ |
| 417 | void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) { |
| 418 | if (lrg.mask_is_nonempty_and_up()) { |
| 419 | if (lrg.is_float_or_vector()) { |
| 420 | float_pressure.lower(lrg, location); |
| 421 | } else { |
| 422 | // Do not count the SP and flag registers |
| 423 | const RegMask& r = lrg.mask(); |
| 424 | if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) { |
| 425 | int_pressure.lower(lrg, location); |
| 426 | } |
| 427 | } |
| 428 | } |
| 429 | if (_scheduling_info_generated == false) { |
| 430 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect" ); |
| 431 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect" ); |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | /* Go to the first non-phi index in a block */ |
| 436 | static uint first_nonphi_index(Block* b) { |
| 437 | uint i; |
| 438 | uint end_idx = b->end_idx(); |
| 439 | for (i = 1; i < end_idx; i++) { |
| 440 | Node* n = b->get_node(i); |
| 441 | if (!n->is_Phi()) { |
| 442 | break; |
| 443 | } |
| 444 | } |
| 445 | return i; |
| 446 | } |
| 447 | |
| 448 | /* |
| 449 | * Spills could be inserted before a CreateEx node which should be the first |
| 450 | * instruction in a block after Phi nodes. If so, move the CreateEx node up. |
| 451 | */ |
| 452 | static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) { |
| 453 | for (uint i = first_inst; i < last_inst; i++) { |
| 454 | Node* ex = b->get_node(i); |
| 455 | if (ex->is_SpillCopy()) { |
| 456 | continue; |
| 457 | } |
| 458 | |
| 459 | if (i > first_inst && |
| 460 | ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) { |
| 461 | b->remove_node(i); |
| 462 | b->insert_node(ex, first_inst); |
| 463 | } |
| 464 | // Stop once a CreateEx or any other node is found |
| 465 | break; |
| 466 | } |
| 467 | } |
| 468 | |
| 469 | /* |
| 470 | * When new live ranges are live, we raise the register pressure |
| 471 | */ |
| 472 | void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) { |
| 473 | if (lrg.mask_is_nonempty_and_up()) { |
| 474 | if (lrg.is_float_or_vector()) { |
| 475 | float_pressure.raise(lrg); |
| 476 | } else { |
| 477 | // Do not count the SP and flag registers |
| 478 | const RegMask& rm = lrg.mask(); |
| 479 | if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) { |
| 480 | int_pressure.raise(lrg); |
| 481 | } |
| 482 | } |
| 483 | } |
| 484 | } |
| 485 | |
| 486 | |
| 487 | /* |
| 488 | * Computes the initial register pressure of a block, looking at all live |
| 489 | * ranges in the liveout. The register pressure is computed for both float |
| 490 | * and int/pointer registers. |
| 491 | * Live ranges in the liveout are presumed live for the whole block. |
| 492 | * We add the cost for the whole block to the area of the live ranges initially. |
| 493 | * If a live range gets killed in the block, we'll subtract the unused part of |
| 494 | * the block from the area. |
| 495 | */ |
| 496 | void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) { |
| 497 | IndexSetIterator elements(liveout); |
| 498 | uint lid = elements.next(); |
| 499 | while (lid != 0) { |
| 500 | LRG& lrg = lrgs(lid); |
| 501 | lrg._area += cost; |
| 502 | raise_pressure(b, lrg, int_pressure, float_pressure); |
| 503 | lid = elements.next(); |
| 504 | } |
| 505 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect" ); |
| 506 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect" ); |
| 507 | } |
| 508 | |
| 509 | /* |
| 510 | * Computes the entry register pressure of a block, looking at all live |
| 511 | * ranges in the livein. The register pressure is computed for both float |
| 512 | * and int/pointer registers. |
| 513 | */ |
| 514 | void PhaseChaitin::compute_entry_block_pressure(Block* b) { |
| 515 | IndexSet* livein = _live->livein(b); |
| 516 | IndexSetIterator elements(livein); |
| 517 | uint lid = elements.next(); |
| 518 | while (lid != 0) { |
| 519 | LRG& lrg = lrgs(lid); |
| 520 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); |
| 521 | lid = elements.next(); |
| 522 | } |
| 523 | // Now check phis for locally defined inputs |
| 524 | for (uint j = 0; j < b->number_of_nodes(); j++) { |
| 525 | Node* n = b->get_node(j); |
| 526 | if (n->is_Phi()) { |
| 527 | for (uint k = 1; k < n->req(); k++) { |
| 528 | Node* phi_in = n->in(k); |
| 529 | // Because we are talking about phis, raise register pressure once for each |
| 530 | // instance of a phi to account for a single value |
| 531 | if (_cfg.get_block_for_node(phi_in) == b) { |
| 532 | LRG& lrg = lrgs(phi_in->_idx); |
| 533 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); |
| 534 | break; |
| 535 | } |
| 536 | } |
| 537 | } |
| 538 | } |
| 539 | _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure()); |
| 540 | _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure()); |
| 541 | } |
| 542 | |
| 543 | /* |
| 544 | * Computes the exit register pressure of a block, looking at all live |
| 545 | * ranges in the liveout. The register pressure is computed for both float |
| 546 | * and int/pointer registers. |
| 547 | */ |
| 548 | void PhaseChaitin::compute_exit_block_pressure(Block* b) { |
| 549 | IndexSet* livein = _live->live(b); |
| 550 | IndexSetIterator elements(livein); |
| 551 | _sched_int_pressure.set_current_pressure(0); |
| 552 | _sched_float_pressure.set_current_pressure(0); |
| 553 | uint lid = elements.next(); |
| 554 | while (lid != 0) { |
| 555 | LRG& lrg = lrgs(lid); |
| 556 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); |
| 557 | lid = elements.next(); |
| 558 | } |
| 559 | } |
| 560 | |
| 561 | /* |
| 562 | * Remove dead node if it's not used. |
| 563 | * We only remove projection nodes if the node "defining" the projection is |
| 564 | * dead, for example on x86, if we have a dead Add node we remove its |
| 565 | * RFLAGS node. |
| 566 | */ |
| 567 | bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) { |
| 568 | Node* def = n->in(0); |
| 569 | if (!n->is_Proj() || |
| 570 | (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) { |
| 571 | if (n->is_MachProj()) { |
| 572 | // Don't remove KILL projections if their "defining" nodes have |
| 573 | // memory effects (have SCMemProj projection node) - |
| 574 | // they are not dead even when their result is not used. |
| 575 | // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes. |
| 576 | // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list) |
| 577 | // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed |
| 578 | // in block in such order that KILL MachProj nodes are processed first. |
| 579 | if (def->has_out_with(Op_SCMemProj)) { |
| 580 | return false; |
| 581 | } |
| 582 | } |
| 583 | b->remove_node(location); |
| 584 | LRG& lrg = lrgs(lid); |
| 585 | if (lrg._def == n) { |
| 586 | lrg._def = 0; |
| 587 | } |
| 588 | n->disconnect_inputs(NULL, C); |
| 589 | _cfg.unmap_node_from_block(n); |
| 590 | n->replace_by(C->top()); |
| 591 | return true; |
| 592 | } |
| 593 | return false; |
| 594 | } |
| 595 | |
| 596 | /* |
| 597 | * When encountering a fat projection, we might go from a low to high to low |
| 598 | * (since the fat proj only lives at this instruction) going backwards in the |
| 599 | * block. If we find a low to high transition, we record it. |
| 600 | */ |
| 601 | void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) { |
| 602 | RegMask mask_tmp = lrg.mask(); |
| 603 | mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]); |
| 604 | pressure.check_pressure_at_fatproj(location, mask_tmp); |
| 605 | } |
| 606 | |
| 607 | /* |
| 608 | * Insure high score for immediate-use spill copies so they get a color. |
| 609 | * All single-use MachSpillCopy(s) that immediately precede their |
| 610 | * use must color early. If a longer live range steals their |
| 611 | * color, the spill copy will split and may push another spill copy |
| 612 | * further away resulting in an infinite spill-split-retry cycle. |
| 613 | * Assigning a zero area results in a high score() and a good |
| 614 | * location in the simplify list. |
| 615 | */ |
| 616 | void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) { |
| 617 | if (n->is_SpillCopy() && |
| 618 | lrg.is_singledef() && // A multi defined live range can still split |
| 619 | n->outcnt() == 1 && // and use must be in this block |
| 620 | _cfg.get_block_for_node(n->unique_out()) == b) { |
| 621 | |
| 622 | Node* single_use = n->unique_out(); |
| 623 | assert(b->find_node(single_use) >= next_inst, "Use must be later in block" ); |
| 624 | // Use can be earlier in block if it is a Phi, but then I should be a MultiDef |
| 625 | |
| 626 | // Find first non SpillCopy 'm' that follows the current instruction |
| 627 | // (current_inst - 1) is index for current instruction 'n' |
| 628 | Node* m = n; |
| 629 | for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) { |
| 630 | m = b->get_node(i); |
| 631 | } |
| 632 | if (m == single_use) { |
| 633 | lrg._area = 0.0; |
| 634 | } |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | /* |
| 639 | * Copies do not define a new value and so do not interfere. |
| 640 | * Remove the copies source from the liveout set before interfering. |
| 641 | */ |
| 642 | void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) { |
| 643 | if (liveout->remove(lid_copy)) { |
| 644 | LRG& lrg_copy = lrgs(lid_copy); |
| 645 | lrg_copy._area -= cost; |
| 646 | |
| 647 | // Lower register pressure since copy and definition can share the same register |
| 648 | lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure); |
| 649 | } |
| 650 | } |
| 651 | |
| 652 | /* |
| 653 | * The defined value must go in a particular register. Remove that register from |
| 654 | * all conflicting parties and avoid the interference. |
| 655 | */ |
| 656 | void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) { |
| 657 | // Check for common case |
| 658 | const RegMask& rm = lrg.mask(); |
| 659 | int r_size = lrg.num_regs(); |
| 660 | // Smear odd bits |
| 661 | IndexSetIterator elements(liveout); |
| 662 | uint l = elements.next(); |
| 663 | while (l != 0) { |
| 664 | LRG& interfering_lrg = lrgs(l); |
| 665 | // If 'l' must spill already, do not further hack his bits. |
| 666 | // He'll get some interferences and be forced to spill later. |
| 667 | if (interfering_lrg._must_spill) { |
| 668 | l = elements.next(); |
| 669 | continue; |
| 670 | } |
| 671 | |
| 672 | // Remove bound register(s) from 'l's choices |
| 673 | RegMask old = interfering_lrg.mask(); |
| 674 | uint old_size = interfering_lrg.mask_size(); |
| 675 | |
| 676 | // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no |
| 677 | // longer interferes with 'rm'. If 'l' requires aligned |
| 678 | // adjacent pairs, subtract out bit pairs. |
| 679 | assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity" ); |
| 680 | |
| 681 | if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) { |
| 682 | RegMask r2mask = rm; |
| 683 | // Leave only aligned set of bits. |
| 684 | r2mask.smear_to_sets(interfering_lrg.num_regs()); |
| 685 | // It includes vector case. |
| 686 | interfering_lrg.SUBTRACT(r2mask); |
| 687 | interfering_lrg.compute_set_mask_size(); |
| 688 | } else if (r_size != 1) { |
| 689 | // fat proj |
| 690 | interfering_lrg.SUBTRACT(rm); |
| 691 | interfering_lrg.compute_set_mask_size(); |
| 692 | } else { |
| 693 | // Common case: size 1 bound removal |
| 694 | OptoReg::Name r_reg = rm.find_first_elem(); |
| 695 | if (interfering_lrg.mask().Member(r_reg)) { |
| 696 | interfering_lrg.Remove(r_reg); |
| 697 | interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1); |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | // If 'l' goes completely dry, it must spill. |
| 702 | if (interfering_lrg.not_free()) { |
| 703 | // Give 'l' some kind of reasonable mask, so it picks up |
| 704 | // interferences (and will spill later). |
| 705 | interfering_lrg.set_mask(old); |
| 706 | interfering_lrg.set_mask_size(old_size); |
| 707 | must_spill++; |
| 708 | interfering_lrg._must_spill = 1; |
| 709 | interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); |
| 710 | } |
| 711 | l = elements.next(); |
| 712 | } |
| 713 | } |
| 714 | |
| 715 | /* |
| 716 | * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the |
| 717 | * sole use of a StoreLConditional. While StoreLConditionals set memory (the |
| 718 | * SCMemProj use) they also def flags; if that flag def is unused the allocator |
| 719 | * sees a flag-setting instruction with no use of the flags and assumes it's |
| 720 | * dead. This keeps the (useless) flag-setting behavior alive while also |
| 721 | * keeping the (useful) memory update effect. |
| 722 | */ |
| 723 | void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) { |
| 724 | JVMState* jvms = n->jvms(); |
| 725 | uint debug_start = jvms ? jvms->debug_start() : 999999; |
| 726 | |
| 727 | for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) { |
| 728 | Node* def = n->in(k); |
| 729 | uint lid = _lrg_map.live_range_id(def); |
| 730 | if (!lid) { |
| 731 | continue; |
| 732 | } |
| 733 | LRG& lrg = lrgs(lid); |
| 734 | |
| 735 | // No use-side cost for spilling debug info |
| 736 | if (k < debug_start) { |
| 737 | // A USE costs twice block frequency (once for the Load, once |
| 738 | // for a Load-delay). Rematerialized uses only cost once. |
| 739 | lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2)); |
| 740 | } |
| 741 | |
| 742 | if (liveout->insert(lid)) { |
| 743 | // Newly live things assumed live from here to top of block |
| 744 | lrg._area += cost; |
| 745 | raise_pressure(b, lrg, int_pressure, float_pressure); |
| 746 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect" ); |
| 747 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect" ); |
| 748 | } |
| 749 | assert(lrg._area >= 0.0, "negative spill area" ); |
| 750 | } |
| 751 | } |
| 752 | |
| 753 | /* |
| 754 | * If we run off the top of the block with high pressure just record that the |
| 755 | * whole block is high pressure. (Even though we might have a transition |
| 756 | * later down in the block) |
| 757 | */ |
| 758 | void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) { |
| 759 | // current pressure now means the pressure before the first instruction in the block |
| 760 | // (since we have stepped through all instructions backwards) |
| 761 | if (pressure.current_pressure() > pressure.high_pressure_limit()) { |
| 762 | pressure.set_high_pressure_index_to_block_start(); |
| 763 | } |
| 764 | } |
| 765 | |
| 766 | /* |
| 767 | * Compute high pressure indice; avoid landing in the middle of projnodes |
| 768 | * and set the high pressure index for the block |
| 769 | */ |
| 770 | void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) { |
| 771 | uint i = pressure.high_pressure_index(); |
| 772 | if (i < b->number_of_nodes() && i < b->end_idx() + 1) { |
| 773 | Node* cur = b->get_node(i); |
| 774 | while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { |
| 775 | cur = b->get_node(--i); |
| 776 | } |
| 777 | } |
| 778 | block_hrp_index = i; |
| 779 | } |
| 780 | |
| 781 | void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) { |
| 782 | if (str != NULL) { |
| 783 | tty->print_cr("# *** %s ***" , str); |
| 784 | } |
| 785 | tty->print_cr("# start pressure is = %d" , pressure.start_pressure()); |
| 786 | tty->print_cr("# max pressure is = %d" , pressure.final_pressure()); |
| 787 | tty->print_cr("# end pressure is = %d" , pressure.current_pressure()); |
| 788 | tty->print_cr("#" ); |
| 789 | } |
| 790 | |
| 791 | /* Build an interference graph: |
| 792 | * That is, if 2 live ranges are simultaneously alive but in their acceptable |
| 793 | * register sets do not overlap, then they do not interfere. The IFG is built |
| 794 | * by a single reverse pass over each basic block. Starting with the known |
| 795 | * live-out set, we remove things that get defined and add things that become |
| 796 | * live (essentially executing one pass of a standard LIVE analysis). Just |
| 797 | * before a Node defines a value (and removes it from the live-ness set) that |
| 798 | * value is certainly live. The defined value interferes with everything |
| 799 | * currently live. The value is then removed from the live-ness set and it's |
| 800 | * inputs are added to the live-ness set. |
| 801 | * Compute register pressure for each block: |
| 802 | * We store the biggest register pressure for each block and also the first |
| 803 | * low to high register pressure transition within the block (if any). |
| 804 | */ |
| 805 | uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { |
| 806 | Compile::TracePhase tp("buildIFG" , &timers[_t_buildIFGphysical]); |
| 807 | |
| 808 | uint must_spill = 0; |
| 809 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { |
| 810 | Block* block = _cfg.get_block(i); |
| 811 | |
| 812 | // Clone (rather than smash in place) the liveout info, so it is alive |
| 813 | // for the "collect_gc_info" phase later. |
| 814 | IndexSet liveout(_live->live(block)); |
| 815 | |
| 816 | uint first_inst = first_nonphi_index(block); |
| 817 | uint last_inst = block->end_idx(); |
| 818 | |
| 819 | move_exception_node_up(block, first_inst, last_inst); |
| 820 | |
| 821 | Pressure int_pressure(last_inst + 1, INTPRESSURE); |
| 822 | Pressure float_pressure(last_inst + 1, FLOATPRESSURE); |
| 823 | block->_reg_pressure = 0; |
| 824 | block->_freg_pressure = 0; |
| 825 | |
| 826 | int inst_count = last_inst - first_inst; |
| 827 | double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); |
| 828 | assert(cost >= 0.0, "negative spill cost" ); |
| 829 | |
| 830 | compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost); |
| 831 | |
| 832 | for (uint location = last_inst; location > 0; location--) { |
| 833 | Node* n = block->get_node(location); |
| 834 | uint lid = _lrg_map.live_range_id(n); |
| 835 | |
| 836 | if(lid) { |
| 837 | LRG& lrg = lrgs(lid); |
| 838 | |
| 839 | // A DEF normally costs block frequency; rematerialized values are |
| 840 | // removed from the DEF sight, so LOWER costs here. |
| 841 | lrg._cost += n->rematerialize() ? 0 : block->_freq; |
| 842 | |
| 843 | if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) { |
| 844 | if (remove_node_if_not_used(block, location, n, lid, &liveout)) { |
| 845 | float_pressure.lower_high_pressure_index(); |
| 846 | int_pressure.lower_high_pressure_index(); |
| 847 | continue; |
| 848 | } |
| 849 | if (lrg._fat_proj) { |
| 850 | check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI); |
| 851 | check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD); |
| 852 | } |
| 853 | } else { |
| 854 | // A live range ends at its definition, remove the remaining area. |
| 855 | // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf, |
| 856 | // and +Inf - +Inf = NaN. So let's not do that subtraction. |
| 857 | if (g_isfinite(cost)) { |
| 858 | lrg._area -= cost; |
| 859 | } |
| 860 | assert(lrg._area >= 0.0, "negative spill area" ); |
| 861 | |
| 862 | assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst); |
| 863 | |
| 864 | if (liveout.remove(lid)) { |
| 865 | lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure); |
| 866 | } |
| 867 | uint copy_idx = n->is_Copy(); |
| 868 | if (copy_idx) { |
| 869 | uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx)); |
| 870 | remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure); |
| 871 | } |
| 872 | } |
| 873 | |
| 874 | // Since rematerializable DEFs are not bound but the live range is, |
| 875 | // some uses must be bound. If we spill live range 'r', it can |
| 876 | // rematerialize at each use site according to its bindings. |
| 877 | if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) { |
| 878 | remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill); |
| 879 | } |
| 880 | interfere_with_live(lid, &liveout); |
| 881 | } |
| 882 | |
| 883 | // Area remaining in the block |
| 884 | inst_count--; |
| 885 | cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); |
| 886 | |
| 887 | if (!n->is_Phi()) { |
| 888 | add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure); |
| 889 | } |
| 890 | } |
| 891 | |
| 892 | check_for_high_pressure_block(int_pressure); |
| 893 | check_for_high_pressure_block(float_pressure); |
| 894 | adjust_high_pressure_index(block, block->_ihrp_index, int_pressure); |
| 895 | adjust_high_pressure_index(block, block->_fhrp_index, float_pressure); |
| 896 | // set the final_pressure as the register pressure for the block |
| 897 | block->_reg_pressure = int_pressure.final_pressure(); |
| 898 | block->_freg_pressure = float_pressure.final_pressure(); |
| 899 | |
| 900 | #ifndef PRODUCT |
| 901 | // Gather Register Pressure Statistics |
| 902 | if (PrintOptoStatistics) { |
| 903 | if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) { |
| 904 | _high_pressure++; |
| 905 | } else { |
| 906 | _low_pressure++; |
| 907 | } |
| 908 | } |
| 909 | #endif |
| 910 | } |
| 911 | |
| 912 | return must_spill; |
| 913 | } |
| 914 | |