| 1 | /* | 
|---|
| 2 | * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. | 
|---|
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
|---|
| 4 | * | 
|---|
| 5 | * This code is free software; you can redistribute it and/or modify it | 
|---|
| 6 | * under the terms of the GNU General Public License version 2 only, as | 
|---|
| 7 | * published by the Free Software Foundation. | 
|---|
| 8 | * | 
|---|
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT | 
|---|
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|---|
| 11 | * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|---|
| 12 | * version 2 for more details (a copy is included in the LICENSE file that | 
|---|
| 13 | * accompanied this code). | 
|---|
| 14 | * | 
|---|
| 15 | * You should have received a copy of the GNU General Public License version | 
|---|
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, | 
|---|
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
|---|
| 18 | * | 
|---|
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | 
|---|
| 20 | * or visit www.oracle.com if you need additional information or have any | 
|---|
| 21 | * questions. | 
|---|
| 22 | * | 
|---|
| 23 | */ | 
|---|
| 24 |  | 
|---|
| 25 | #include "precompiled.hpp" | 
|---|
| 26 | #include "compiler/oopMap.hpp" | 
|---|
| 27 | #include "memory/allocation.inline.hpp" | 
|---|
| 28 | #include "memory/resourceArea.hpp" | 
|---|
| 29 | #include "opto/addnode.hpp" | 
|---|
| 30 | #include "opto/block.hpp" | 
|---|
| 31 | #include "opto/callnode.hpp" | 
|---|
| 32 | #include "opto/cfgnode.hpp" | 
|---|
| 33 | #include "opto/chaitin.hpp" | 
|---|
| 34 | #include "opto/coalesce.hpp" | 
|---|
| 35 | #include "opto/indexSet.hpp" | 
|---|
| 36 | #include "opto/machnode.hpp" | 
|---|
| 37 | #include "opto/memnode.hpp" | 
|---|
| 38 | #include "opto/opcodes.hpp" | 
|---|
| 39 |  | 
|---|
| 40 | PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) { | 
|---|
| 41 | } | 
|---|
| 42 |  | 
|---|
| 43 | void PhaseIFG::init( uint maxlrg ) { | 
|---|
| 44 | _maxlrg = maxlrg; | 
|---|
| 45 | _yanked = new (_arena) VectorSet(_arena); | 
|---|
| 46 | _is_square = false; | 
|---|
| 47 | // Make uninitialized adjacency lists | 
|---|
| 48 | _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg); | 
|---|
| 49 | // Also make empty live range structures | 
|---|
| 50 | _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) ); | 
|---|
| 51 | memset((void*)_lrgs,0,sizeof(LRG)*maxlrg); | 
|---|
| 52 | // Init all to empty | 
|---|
| 53 | for( uint i = 0; i < maxlrg; i++ ) { | 
|---|
| 54 | _adjs[i].initialize(maxlrg); | 
|---|
| 55 | _lrgs[i].Set_All(); | 
|---|
| 56 | } | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | // Add edge between vertices a & b.  These are sorted (triangular matrix), | 
|---|
| 60 | // then the smaller number is inserted in the larger numbered array. | 
|---|
| 61 | int PhaseIFG::add_edge( uint a, uint b ) { | 
|---|
| 62 | lrgs(a).invalid_degree(); | 
|---|
| 63 | lrgs(b).invalid_degree(); | 
|---|
| 64 | // Sort a and b, so that a is bigger | 
|---|
| 65 | assert( !_is_square, "only on triangular"); | 
|---|
| 66 | if( a < b ) { uint tmp = a; a = b; b = tmp; } | 
|---|
| 67 | return _adjs[a].insert( b ); | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | // Is there an edge between a and b? | 
|---|
| 71 | int PhaseIFG::test_edge( uint a, uint b ) const { | 
|---|
| 72 | // Sort a and b, so that a is larger | 
|---|
| 73 | assert( !_is_square, "only on triangular"); | 
|---|
| 74 | if( a < b ) { uint tmp = a; a = b; b = tmp; } | 
|---|
| 75 | return _adjs[a].member(b); | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | // Convert triangular matrix to square matrix | 
|---|
| 79 | void PhaseIFG::SquareUp() { | 
|---|
| 80 | assert( !_is_square, "only on triangular"); | 
|---|
| 81 |  | 
|---|
| 82 | // Simple transpose | 
|---|
| 83 | for( uint i = 0; i < _maxlrg; i++ ) { | 
|---|
| 84 | IndexSetIterator elements(&_adjs[i]); | 
|---|
| 85 | uint datum; | 
|---|
| 86 | while ((datum = elements.next()) != 0) { | 
|---|
| 87 | _adjs[datum].insert( i ); | 
|---|
| 88 | } | 
|---|
| 89 | } | 
|---|
| 90 | _is_square = true; | 
|---|
| 91 | } | 
|---|
| 92 |  | 
|---|
| 93 | // Compute effective degree in bulk | 
|---|
| 94 | void PhaseIFG::Compute_Effective_Degree() { | 
|---|
| 95 | assert( _is_square, "only on square"); | 
|---|
| 96 |  | 
|---|
| 97 | for( uint i = 0; i < _maxlrg; i++ ) | 
|---|
| 98 | lrgs(i).set_degree(effective_degree(i)); | 
|---|
| 99 | } | 
|---|
| 100 |  | 
|---|
| 101 | int PhaseIFG::test_edge_sq( uint a, uint b ) const { | 
|---|
| 102 | assert( _is_square, "only on square"); | 
|---|
| 103 | // Swap, so that 'a' has the lesser count.  Then binary search is on | 
|---|
| 104 | // the smaller of a's list and b's list. | 
|---|
| 105 | if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; } | 
|---|
| 106 | //return _adjs[a].unordered_member(b); | 
|---|
| 107 | return _adjs[a].member(b); | 
|---|
| 108 | } | 
|---|
| 109 |  | 
|---|
| 110 | // Union edges of B into A | 
|---|
| 111 | void PhaseIFG::Union( uint a, uint b ) { | 
|---|
| 112 | assert( _is_square, "only on square"); | 
|---|
| 113 | IndexSet *A = &_adjs[a]; | 
|---|
| 114 | IndexSetIterator b_elements(&_adjs[b]); | 
|---|
| 115 | uint datum; | 
|---|
| 116 | while ((datum = b_elements.next()) != 0) { | 
|---|
| 117 | if(A->insert(datum)) { | 
|---|
| 118 | _adjs[datum].insert(a); | 
|---|
| 119 | lrgs(a).invalid_degree(); | 
|---|
| 120 | lrgs(datum).invalid_degree(); | 
|---|
| 121 | } | 
|---|
| 122 | } | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | // Yank a Node and all connected edges from the IFG.  Return a | 
|---|
| 126 | // list of neighbors (edges) yanked. | 
|---|
| 127 | IndexSet *PhaseIFG::remove_node( uint a ) { | 
|---|
| 128 | assert( _is_square, "only on square"); | 
|---|
| 129 | assert( !_yanked->test(a), ""); | 
|---|
| 130 | _yanked->set(a); | 
|---|
| 131 |  | 
|---|
| 132 | // I remove the LRG from all neighbors. | 
|---|
| 133 | IndexSetIterator elements(&_adjs[a]); | 
|---|
| 134 | LRG &lrg_a = lrgs(a); | 
|---|
| 135 | uint datum; | 
|---|
| 136 | while ((datum = elements.next()) != 0) { | 
|---|
| 137 | _adjs[datum].remove(a); | 
|---|
| 138 | lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) ); | 
|---|
| 139 | } | 
|---|
| 140 | return neighbors(a); | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | // Re-insert a yanked Node. | 
|---|
| 144 | void PhaseIFG::re_insert( uint a ) { | 
|---|
| 145 | assert( _is_square, "only on square"); | 
|---|
| 146 | assert( _yanked->test(a), ""); | 
|---|
| 147 | (*_yanked) >>= a; | 
|---|
| 148 |  | 
|---|
| 149 | IndexSetIterator elements(&_adjs[a]); | 
|---|
| 150 | uint datum; | 
|---|
| 151 | while ((datum = elements.next()) != 0) { | 
|---|
| 152 | _adjs[datum].insert(a); | 
|---|
| 153 | lrgs(datum).invalid_degree(); | 
|---|
| 154 | } | 
|---|
| 155 | } | 
|---|
| 156 |  | 
|---|
| 157 | // Compute the degree between 2 live ranges.  If both live ranges are | 
|---|
| 158 | // aligned-adjacent powers-of-2 then we use the MAX size.  If either is | 
|---|
| 159 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to | 
|---|
| 160 | // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why | 
|---|
| 161 | // this is so. | 
|---|
| 162 | int LRG::compute_degree( LRG &l ) const { | 
|---|
| 163 | int tmp; | 
|---|
| 164 | int num_regs = _num_regs; | 
|---|
| 165 | int nregs = l.num_regs(); | 
|---|
| 166 | tmp =  (_fat_proj || l._fat_proj)     // either is a fat-proj? | 
|---|
| 167 | ? (num_regs * nregs)                // then use product | 
|---|
| 168 | : MAX2(num_regs,nregs);             // else use max | 
|---|
| 169 | return tmp; | 
|---|
| 170 | } | 
|---|
| 171 |  | 
|---|
| 172 | // Compute effective degree for this live range.  If both live ranges are | 
|---|
| 173 | // aligned-adjacent powers-of-2 then we use the MAX size.  If either is | 
|---|
| 174 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to | 
|---|
| 175 | // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why | 
|---|
| 176 | // this is so. | 
|---|
| 177 | int PhaseIFG::effective_degree( uint lidx ) const { | 
|---|
| 178 | int eff = 0; | 
|---|
| 179 | int num_regs = lrgs(lidx).num_regs(); | 
|---|
| 180 | int fat_proj = lrgs(lidx)._fat_proj; | 
|---|
| 181 | IndexSet *s = neighbors(lidx); | 
|---|
| 182 | IndexSetIterator elements(s); | 
|---|
| 183 | uint nidx; | 
|---|
| 184 | while((nidx = elements.next()) != 0) { | 
|---|
| 185 | LRG &lrgn = lrgs(nidx); | 
|---|
| 186 | int nregs = lrgn.num_regs(); | 
|---|
| 187 | eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj? | 
|---|
| 188 | ? (num_regs * nregs)              // then use product | 
|---|
| 189 | : MAX2(num_regs,nregs);           // else use max | 
|---|
| 190 | } | 
|---|
| 191 | return eff; | 
|---|
| 192 | } | 
|---|
| 193 |  | 
|---|
| 194 |  | 
|---|
| 195 | #ifndef PRODUCT | 
|---|
| 196 | void PhaseIFG::dump() const { | 
|---|
| 197 | tty->print_cr( "-- Interference Graph --%s--", | 
|---|
| 198 | _is_square ? "square": "triangular"); | 
|---|
| 199 | if( _is_square ) { | 
|---|
| 200 | for( uint i = 0; i < _maxlrg; i++ ) { | 
|---|
| 201 | tty->print( (*_yanked)[i] ? "XX ": "  "); | 
|---|
| 202 | tty->print( "L%d: { ",i); | 
|---|
| 203 | IndexSetIterator elements(&_adjs[i]); | 
|---|
| 204 | uint datum; | 
|---|
| 205 | while ((datum = elements.next()) != 0) { | 
|---|
| 206 | tty->print( "L%d ", datum); | 
|---|
| 207 | } | 
|---|
| 208 | tty->print_cr( "}"); | 
|---|
| 209 |  | 
|---|
| 210 | } | 
|---|
| 211 | return; | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | // Triangular | 
|---|
| 215 | for( uint i = 0; i < _maxlrg; i++ ) { | 
|---|
| 216 | uint j; | 
|---|
| 217 | tty->print( (*_yanked)[i] ? "XX ": "  "); | 
|---|
| 218 | tty->print( "L%d: { ",i); | 
|---|
| 219 | for( j = _maxlrg; j > i; j-- ) | 
|---|
| 220 | if( test_edge(j - 1,i) ) { | 
|---|
| 221 | tty->print( "L%d ",j - 1); | 
|---|
| 222 | } | 
|---|
| 223 | tty->print( "| "); | 
|---|
| 224 | IndexSetIterator elements(&_adjs[i]); | 
|---|
| 225 | uint datum; | 
|---|
| 226 | while ((datum = elements.next()) != 0) { | 
|---|
| 227 | tty->print( "L%d ", datum); | 
|---|
| 228 | } | 
|---|
| 229 | tty->print( "}\n"); | 
|---|
| 230 | } | 
|---|
| 231 | tty->print( "\n"); | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | void PhaseIFG::stats() const { | 
|---|
| 235 | ResourceMark rm; | 
|---|
| 236 | int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2); | 
|---|
| 237 | memset( h_cnt, 0, sizeof(int)*_maxlrg*2 ); | 
|---|
| 238 | uint i; | 
|---|
| 239 | for( i = 0; i < _maxlrg; i++ ) { | 
|---|
| 240 | h_cnt[neighbor_cnt(i)]++; | 
|---|
| 241 | } | 
|---|
| 242 | tty->print_cr( "--Histogram of counts--"); | 
|---|
| 243 | for( i = 0; i < _maxlrg*2; i++ ) | 
|---|
| 244 | if( h_cnt[i] ) | 
|---|
| 245 | tty->print( "%d/%d ",i,h_cnt[i]); | 
|---|
| 246 | tty->cr(); | 
|---|
| 247 | } | 
|---|
| 248 |  | 
|---|
| 249 | void PhaseIFG::verify( const PhaseChaitin *pc ) const { | 
|---|
| 250 | // IFG is square, sorted and no need for Find | 
|---|
| 251 | for( uint i = 0; i < _maxlrg; i++ ) { | 
|---|
| 252 | assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely"); | 
|---|
| 253 | IndexSet *set = &_adjs[i]; | 
|---|
| 254 | IndexSetIterator elements(set); | 
|---|
| 255 | uint idx; | 
|---|
| 256 | uint last = 0; | 
|---|
| 257 | while ((idx = elements.next()) != 0) { | 
|---|
| 258 | assert(idx != i, "Must have empty diagonal"); | 
|---|
| 259 | assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find"); | 
|---|
| 260 | assert(_adjs[idx].member(i), "IFG not square"); | 
|---|
| 261 | assert(!(*_yanked)[idx], "No yanked neighbors"); | 
|---|
| 262 | assert(last < idx, "not sorted increasing"); | 
|---|
| 263 | last = idx; | 
|---|
| 264 | } | 
|---|
| 265 | assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong"); | 
|---|
| 266 | } | 
|---|
| 267 | } | 
|---|
| 268 | #endif | 
|---|
| 269 |  | 
|---|
| 270 | /* | 
|---|
| 271 | * Interfere this register with everything currently live. | 
|---|
| 272 | * Check for interference by checking overlap of regmasks. | 
|---|
| 273 | * Only interfere if acceptable register masks overlap. | 
|---|
| 274 | */ | 
|---|
| 275 | void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) { | 
|---|
| 276 | LRG& lrg = lrgs(lid); | 
|---|
| 277 | const RegMask& rm = lrg.mask(); | 
|---|
| 278 | IndexSetIterator elements(liveout); | 
|---|
| 279 | uint interfering_lid = elements.next(); | 
|---|
| 280 | while (interfering_lid != 0) { | 
|---|
| 281 | LRG& interfering_lrg = lrgs(interfering_lid); | 
|---|
| 282 | if (rm.overlap(interfering_lrg.mask())) { | 
|---|
| 283 | _ifg->add_edge(lid, interfering_lid); | 
|---|
| 284 | } | 
|---|
| 285 | interfering_lid = elements.next(); | 
|---|
| 286 | } | 
|---|
| 287 | } | 
|---|
| 288 |  | 
|---|
| 289 | // Actually build the interference graph.  Uses virtual registers only, no | 
|---|
| 290 | // physical register masks.  This allows me to be very aggressive when | 
|---|
| 291 | // coalescing copies.  Some of this aggressiveness will have to be undone | 
|---|
| 292 | // later, but I'd rather get all the copies I can now (since unremoved copies | 
|---|
| 293 | // at this point can end up in bad places).  Copies I re-insert later I have | 
|---|
| 294 | // more opportunity to insert them in low-frequency locations. | 
|---|
| 295 | void PhaseChaitin::build_ifg_virtual( ) { | 
|---|
| 296 | Compile::TracePhase tp( "buildIFG_virt", &timers[_t_buildIFGvirtual]); | 
|---|
| 297 |  | 
|---|
| 298 | // For all blocks (in any order) do... | 
|---|
| 299 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { | 
|---|
| 300 | Block* block = _cfg.get_block(i); | 
|---|
| 301 | IndexSet* liveout = _live->live(block); | 
|---|
| 302 |  | 
|---|
| 303 | // The IFG is built by a single reverse pass over each basic block. | 
|---|
| 304 | // Starting with the known live-out set, we remove things that get | 
|---|
| 305 | // defined and add things that become live (essentially executing one | 
|---|
| 306 | // pass of a standard LIVE analysis). Just before a Node defines a value | 
|---|
| 307 | // (and removes it from the live-ness set) that value is certainly live. | 
|---|
| 308 | // The defined value interferes with everything currently live.  The | 
|---|
| 309 | // value is then removed from the live-ness set and it's inputs are | 
|---|
| 310 | // added to the live-ness set. | 
|---|
| 311 | for (uint j = block->end_idx() + 1; j > 1; j--) { | 
|---|
| 312 | Node* n = block->get_node(j - 1); | 
|---|
| 313 |  | 
|---|
| 314 | // Get value being defined | 
|---|
| 315 | uint r = _lrg_map.live_range_id(n); | 
|---|
| 316 |  | 
|---|
| 317 | // Some special values do not allocate | 
|---|
| 318 | if (r) { | 
|---|
| 319 |  | 
|---|
| 320 | // Remove from live-out set | 
|---|
| 321 | liveout->remove(r); | 
|---|
| 322 |  | 
|---|
| 323 | // Copies do not define a new value and so do not interfere. | 
|---|
| 324 | // Remove the copies source from the liveout set before interfering. | 
|---|
| 325 | uint idx = n->is_Copy(); | 
|---|
| 326 | if (idx != 0) { | 
|---|
| 327 | liveout->remove(_lrg_map.live_range_id(n->in(idx))); | 
|---|
| 328 | } | 
|---|
| 329 |  | 
|---|
| 330 | // Interfere with everything live | 
|---|
| 331 | interfere_with_live(r, liveout); | 
|---|
| 332 | } | 
|---|
| 333 |  | 
|---|
| 334 | // Make all inputs live | 
|---|
| 335 | if (!n->is_Phi()) {      // Phi function uses come from prior block | 
|---|
| 336 | for(uint k = 1; k < n->req(); k++) { | 
|---|
| 337 | liveout->insert(_lrg_map.live_range_id(n->in(k))); | 
|---|
| 338 | } | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | // 2-address instructions always have the defined value live | 
|---|
| 342 | // on entry to the instruction, even though it is being defined | 
|---|
| 343 | // by the instruction.  We pretend a virtual copy sits just prior | 
|---|
| 344 | // to the instruction and kills the src-def'd register. | 
|---|
| 345 | // In other words, for 2-address instructions the defined value | 
|---|
| 346 | // interferes with all inputs. | 
|---|
| 347 | uint idx; | 
|---|
| 348 | if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) { | 
|---|
| 349 | const MachNode *mach = n->as_Mach(); | 
|---|
| 350 | // Sometimes my 2-address ADDs are commuted in a bad way. | 
|---|
| 351 | // We generally want the USE-DEF register to refer to the | 
|---|
| 352 | // loop-varying quantity, to avoid a copy. | 
|---|
| 353 | uint op = mach->ideal_Opcode(); | 
|---|
| 354 | // Check that mach->num_opnds() == 3 to ensure instruction is | 
|---|
| 355 | // not subsuming constants, effectively excludes addI_cin_imm | 
|---|
| 356 | // Can NOT swap for instructions like addI_cin_imm since it | 
|---|
| 357 | // is adding zero to yhi + carry and the second ideal-input | 
|---|
| 358 | // points to the result of adding low-halves. | 
|---|
| 359 | // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm | 
|---|
| 360 | if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) && | 
|---|
| 361 | n->in(1)->bottom_type()->base() == Type::Int && | 
|---|
| 362 | // See if the ADD is involved in a tight data loop the wrong way | 
|---|
| 363 | n->in(2)->is_Phi() && | 
|---|
| 364 | n->in(2)->in(2) == n ) { | 
|---|
| 365 | Node *tmp = n->in(1); | 
|---|
| 366 | n->set_req( 1, n->in(2) ); | 
|---|
| 367 | n->set_req( 2, tmp ); | 
|---|
| 368 | } | 
|---|
| 369 | // Defined value interferes with all inputs | 
|---|
| 370 | uint lidx = _lrg_map.live_range_id(n->in(idx)); | 
|---|
| 371 | for (uint k = 1; k < n->req(); k++) { | 
|---|
| 372 | uint kidx = _lrg_map.live_range_id(n->in(k)); | 
|---|
| 373 | if (kidx != lidx) { | 
|---|
| 374 | _ifg->add_edge(r, kidx); | 
|---|
| 375 | } | 
|---|
| 376 | } | 
|---|
| 377 | } | 
|---|
| 378 | } // End of forall instructions in block | 
|---|
| 379 | } // End of forall blocks | 
|---|
| 380 | } | 
|---|
| 381 |  | 
|---|
| 382 | #ifdef ASSERT | 
|---|
| 383 | uint PhaseChaitin::count_int_pressure(IndexSet* liveout) { | 
|---|
| 384 | IndexSetIterator elements(liveout); | 
|---|
| 385 | uint lidx = elements.next(); | 
|---|
| 386 | uint cnt = 0; | 
|---|
| 387 | while (lidx != 0) { | 
|---|
| 388 | LRG& lrg = lrgs(lidx); | 
|---|
| 389 | if (lrg.mask_is_nonempty_and_up() && | 
|---|
| 390 | !lrg.is_float_or_vector() && | 
|---|
| 391 | lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { | 
|---|
| 392 | cnt += lrg.reg_pressure(); | 
|---|
| 393 | } | 
|---|
| 394 | lidx = elements.next(); | 
|---|
| 395 | } | 
|---|
| 396 | return cnt; | 
|---|
| 397 | } | 
|---|
| 398 |  | 
|---|
| 399 | uint PhaseChaitin::count_float_pressure(IndexSet* liveout) { | 
|---|
| 400 | IndexSetIterator elements(liveout); | 
|---|
| 401 | uint lidx = elements.next(); | 
|---|
| 402 | uint cnt = 0; | 
|---|
| 403 | while (lidx != 0) { | 
|---|
| 404 | LRG& lrg = lrgs(lidx); | 
|---|
| 405 | if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) { | 
|---|
| 406 | cnt += lrg.reg_pressure(); | 
|---|
| 407 | } | 
|---|
| 408 | lidx = elements.next(); | 
|---|
| 409 | } | 
|---|
| 410 | return cnt; | 
|---|
| 411 | } | 
|---|
| 412 | #endif | 
|---|
| 413 |  | 
|---|
| 414 | /* | 
|---|
| 415 | * Adjust register pressure down by 1.  Capture last hi-to-low transition, | 
|---|
| 416 | */ | 
|---|
| 417 | void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) { | 
|---|
| 418 | if (lrg.mask_is_nonempty_and_up()) { | 
|---|
| 419 | if (lrg.is_float_or_vector()) { | 
|---|
| 420 | float_pressure.lower(lrg, location); | 
|---|
| 421 | } else { | 
|---|
| 422 | // Do not count the SP and flag registers | 
|---|
| 423 | const RegMask& r = lrg.mask(); | 
|---|
| 424 | if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) { | 
|---|
| 425 | int_pressure.lower(lrg, location); | 
|---|
| 426 | } | 
|---|
| 427 | } | 
|---|
| 428 | } | 
|---|
| 429 | if (_scheduling_info_generated == false) { | 
|---|
| 430 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect"); | 
|---|
| 431 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect"); | 
|---|
| 432 | } | 
|---|
| 433 | } | 
|---|
| 434 |  | 
|---|
| 435 | /* Go to the first non-phi index in a block */ | 
|---|
| 436 | static uint first_nonphi_index(Block* b) { | 
|---|
| 437 | uint i; | 
|---|
| 438 | uint end_idx = b->end_idx(); | 
|---|
| 439 | for (i = 1; i < end_idx; i++) { | 
|---|
| 440 | Node* n = b->get_node(i); | 
|---|
| 441 | if (!n->is_Phi()) { | 
|---|
| 442 | break; | 
|---|
| 443 | } | 
|---|
| 444 | } | 
|---|
| 445 | return i; | 
|---|
| 446 | } | 
|---|
| 447 |  | 
|---|
| 448 | /* | 
|---|
| 449 | * Spills could be inserted before a CreateEx node which should be the first | 
|---|
| 450 | * instruction in a block after Phi nodes. If so, move the CreateEx node up. | 
|---|
| 451 | */ | 
|---|
| 452 | static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) { | 
|---|
| 453 | for (uint i = first_inst; i < last_inst; i++) { | 
|---|
| 454 | Node* ex = b->get_node(i); | 
|---|
| 455 | if (ex->is_SpillCopy()) { | 
|---|
| 456 | continue; | 
|---|
| 457 | } | 
|---|
| 458 |  | 
|---|
| 459 | if (i > first_inst && | 
|---|
| 460 | ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) { | 
|---|
| 461 | b->remove_node(i); | 
|---|
| 462 | b->insert_node(ex, first_inst); | 
|---|
| 463 | } | 
|---|
| 464 | // Stop once a CreateEx or any other node is found | 
|---|
| 465 | break; | 
|---|
| 466 | } | 
|---|
| 467 | } | 
|---|
| 468 |  | 
|---|
| 469 | /* | 
|---|
| 470 | * When new live ranges are live, we raise the register pressure | 
|---|
| 471 | */ | 
|---|
| 472 | void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) { | 
|---|
| 473 | if (lrg.mask_is_nonempty_and_up()) { | 
|---|
| 474 | if (lrg.is_float_or_vector()) { | 
|---|
| 475 | float_pressure.raise(lrg); | 
|---|
| 476 | } else { | 
|---|
| 477 | // Do not count the SP and flag registers | 
|---|
| 478 | const RegMask& rm = lrg.mask(); | 
|---|
| 479 | if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) { | 
|---|
| 480 | int_pressure.raise(lrg); | 
|---|
| 481 | } | 
|---|
| 482 | } | 
|---|
| 483 | } | 
|---|
| 484 | } | 
|---|
| 485 |  | 
|---|
| 486 |  | 
|---|
| 487 | /* | 
|---|
| 488 | * Computes the initial register pressure of a block, looking at all live | 
|---|
| 489 | * ranges in the liveout. The register pressure is computed for both float | 
|---|
| 490 | * and int/pointer registers. | 
|---|
| 491 | * Live ranges in the liveout are presumed live for the whole block. | 
|---|
| 492 | * We add the cost for the whole block to the area of the live ranges initially. | 
|---|
| 493 | * If a live range gets killed in the block, we'll subtract the unused part of | 
|---|
| 494 | * the block from the area. | 
|---|
| 495 | */ | 
|---|
| 496 | void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) { | 
|---|
| 497 | IndexSetIterator elements(liveout); | 
|---|
| 498 | uint lid = elements.next(); | 
|---|
| 499 | while (lid != 0) { | 
|---|
| 500 | LRG& lrg = lrgs(lid); | 
|---|
| 501 | lrg._area += cost; | 
|---|
| 502 | raise_pressure(b, lrg, int_pressure, float_pressure); | 
|---|
| 503 | lid = elements.next(); | 
|---|
| 504 | } | 
|---|
| 505 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect"); | 
|---|
| 506 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect"); | 
|---|
| 507 | } | 
|---|
| 508 |  | 
|---|
| 509 | /* | 
|---|
| 510 | * Computes the entry register pressure of a block, looking at all live | 
|---|
| 511 | * ranges in the livein. The register pressure is computed for both float | 
|---|
| 512 | * and int/pointer registers. | 
|---|
| 513 | */ | 
|---|
| 514 | void PhaseChaitin::compute_entry_block_pressure(Block* b) { | 
|---|
| 515 | IndexSet* livein = _live->livein(b); | 
|---|
| 516 | IndexSetIterator elements(livein); | 
|---|
| 517 | uint lid = elements.next(); | 
|---|
| 518 | while (lid != 0) { | 
|---|
| 519 | LRG& lrg = lrgs(lid); | 
|---|
| 520 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); | 
|---|
| 521 | lid = elements.next(); | 
|---|
| 522 | } | 
|---|
| 523 | // Now check phis for locally defined inputs | 
|---|
| 524 | for (uint j = 0; j < b->number_of_nodes(); j++) { | 
|---|
| 525 | Node* n = b->get_node(j); | 
|---|
| 526 | if (n->is_Phi()) { | 
|---|
| 527 | for (uint k = 1; k < n->req(); k++) { | 
|---|
| 528 | Node* phi_in = n->in(k); | 
|---|
| 529 | // Because we are talking about phis, raise register pressure once for each | 
|---|
| 530 | // instance of a phi to account for a single value | 
|---|
| 531 | if (_cfg.get_block_for_node(phi_in) == b) { | 
|---|
| 532 | LRG& lrg = lrgs(phi_in->_idx); | 
|---|
| 533 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); | 
|---|
| 534 | break; | 
|---|
| 535 | } | 
|---|
| 536 | } | 
|---|
| 537 | } | 
|---|
| 538 | } | 
|---|
| 539 | _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure()); | 
|---|
| 540 | _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure()); | 
|---|
| 541 | } | 
|---|
| 542 |  | 
|---|
| 543 | /* | 
|---|
| 544 | * Computes the exit register pressure of a block, looking at all live | 
|---|
| 545 | * ranges in the liveout. The register pressure is computed for both float | 
|---|
| 546 | * and int/pointer registers. | 
|---|
| 547 | */ | 
|---|
| 548 | void PhaseChaitin::compute_exit_block_pressure(Block* b) { | 
|---|
| 549 | IndexSet* livein = _live->live(b); | 
|---|
| 550 | IndexSetIterator elements(livein); | 
|---|
| 551 | _sched_int_pressure.set_current_pressure(0); | 
|---|
| 552 | _sched_float_pressure.set_current_pressure(0); | 
|---|
| 553 | uint lid = elements.next(); | 
|---|
| 554 | while (lid != 0) { | 
|---|
| 555 | LRG& lrg = lrgs(lid); | 
|---|
| 556 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); | 
|---|
| 557 | lid = elements.next(); | 
|---|
| 558 | } | 
|---|
| 559 | } | 
|---|
| 560 |  | 
|---|
| 561 | /* | 
|---|
| 562 | * Remove dead node if it's not used. | 
|---|
| 563 | * We only remove projection nodes if the node "defining" the projection is | 
|---|
| 564 | * dead, for example on x86, if we have a dead Add node we remove its | 
|---|
| 565 | * RFLAGS node. | 
|---|
| 566 | */ | 
|---|
| 567 | bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) { | 
|---|
| 568 | Node* def = n->in(0); | 
|---|
| 569 | if (!n->is_Proj() || | 
|---|
| 570 | (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) { | 
|---|
| 571 | if (n->is_MachProj()) { | 
|---|
| 572 | // Don't remove KILL projections if their "defining" nodes have | 
|---|
| 573 | // memory effects (have SCMemProj projection node) - | 
|---|
| 574 | // they are not dead even when their result is not used. | 
|---|
| 575 | // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes. | 
|---|
| 576 | // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list) | 
|---|
| 577 | // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed | 
|---|
| 578 | // in block in such order that KILL MachProj nodes are processed first. | 
|---|
| 579 | if (def->has_out_with(Op_SCMemProj)) { | 
|---|
| 580 | return false; | 
|---|
| 581 | } | 
|---|
| 582 | } | 
|---|
| 583 | b->remove_node(location); | 
|---|
| 584 | LRG& lrg = lrgs(lid); | 
|---|
| 585 | if (lrg._def == n) { | 
|---|
| 586 | lrg._def = 0; | 
|---|
| 587 | } | 
|---|
| 588 | n->disconnect_inputs(NULL, C); | 
|---|
| 589 | _cfg.unmap_node_from_block(n); | 
|---|
| 590 | n->replace_by(C->top()); | 
|---|
| 591 | return true; | 
|---|
| 592 | } | 
|---|
| 593 | return false; | 
|---|
| 594 | } | 
|---|
| 595 |  | 
|---|
| 596 | /* | 
|---|
| 597 | * When encountering a fat projection, we might go from a low to high to low | 
|---|
| 598 | * (since the fat proj only lives at this instruction) going backwards in the | 
|---|
| 599 | * block. If we find a low to high transition, we record it. | 
|---|
| 600 | */ | 
|---|
| 601 | void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) { | 
|---|
| 602 | RegMask mask_tmp = lrg.mask(); | 
|---|
| 603 | mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]); | 
|---|
| 604 | pressure.check_pressure_at_fatproj(location, mask_tmp); | 
|---|
| 605 | } | 
|---|
| 606 |  | 
|---|
| 607 | /* | 
|---|
| 608 | * Insure high score for immediate-use spill copies so they get a color. | 
|---|
| 609 | * All single-use MachSpillCopy(s) that immediately precede their | 
|---|
| 610 | * use must color early.  If a longer live range steals their | 
|---|
| 611 | * color, the spill copy will split and may push another spill copy | 
|---|
| 612 | * further away resulting in an infinite spill-split-retry cycle. | 
|---|
| 613 | * Assigning a zero area results in a high score() and a good | 
|---|
| 614 | * location in the simplify list. | 
|---|
| 615 | */ | 
|---|
| 616 | void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) { | 
|---|
| 617 | if (n->is_SpillCopy() && | 
|---|
| 618 | lrg.is_singledef() && // A multi defined live range can still split | 
|---|
| 619 | n->outcnt() == 1 &&   // and use must be in this block | 
|---|
| 620 | _cfg.get_block_for_node(n->unique_out()) == b) { | 
|---|
| 621 |  | 
|---|
| 622 | Node* single_use = n->unique_out(); | 
|---|
| 623 | assert(b->find_node(single_use) >= next_inst, "Use must be later in block"); | 
|---|
| 624 | // Use can be earlier in block if it is a Phi, but then I should be a MultiDef | 
|---|
| 625 |  | 
|---|
| 626 | // Find first non SpillCopy 'm' that follows the current instruction | 
|---|
| 627 | // (current_inst - 1) is index for current instruction 'n' | 
|---|
| 628 | Node* m = n; | 
|---|
| 629 | for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) { | 
|---|
| 630 | m = b->get_node(i); | 
|---|
| 631 | } | 
|---|
| 632 | if (m == single_use) { | 
|---|
| 633 | lrg._area = 0.0; | 
|---|
| 634 | } | 
|---|
| 635 | } | 
|---|
| 636 | } | 
|---|
| 637 |  | 
|---|
| 638 | /* | 
|---|
| 639 | * Copies do not define a new value and so do not interfere. | 
|---|
| 640 | * Remove the copies source from the liveout set before interfering. | 
|---|
| 641 | */ | 
|---|
| 642 | void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) { | 
|---|
| 643 | if (liveout->remove(lid_copy)) { | 
|---|
| 644 | LRG& lrg_copy = lrgs(lid_copy); | 
|---|
| 645 | lrg_copy._area -= cost; | 
|---|
| 646 |  | 
|---|
| 647 | // Lower register pressure since copy and definition can share the same register | 
|---|
| 648 | lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure); | 
|---|
| 649 | } | 
|---|
| 650 | } | 
|---|
| 651 |  | 
|---|
| 652 | /* | 
|---|
| 653 | * The defined value must go in a particular register. Remove that register from | 
|---|
| 654 | * all conflicting parties and avoid the interference. | 
|---|
| 655 | */ | 
|---|
| 656 | void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) { | 
|---|
| 657 | // Check for common case | 
|---|
| 658 | const RegMask& rm = lrg.mask(); | 
|---|
| 659 | int r_size = lrg.num_regs(); | 
|---|
| 660 | // Smear odd bits | 
|---|
| 661 | IndexSetIterator elements(liveout); | 
|---|
| 662 | uint l = elements.next(); | 
|---|
| 663 | while (l != 0) { | 
|---|
| 664 | LRG& interfering_lrg = lrgs(l); | 
|---|
| 665 | // If 'l' must spill already, do not further hack his bits. | 
|---|
| 666 | // He'll get some interferences and be forced to spill later. | 
|---|
| 667 | if (interfering_lrg._must_spill) { | 
|---|
| 668 | l = elements.next(); | 
|---|
| 669 | continue; | 
|---|
| 670 | } | 
|---|
| 671 |  | 
|---|
| 672 | // Remove bound register(s) from 'l's choices | 
|---|
| 673 | RegMask old = interfering_lrg.mask(); | 
|---|
| 674 | uint old_size = interfering_lrg.mask_size(); | 
|---|
| 675 |  | 
|---|
| 676 | // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no | 
|---|
| 677 | // longer interferes with 'rm'.  If 'l' requires aligned | 
|---|
| 678 | // adjacent pairs, subtract out bit pairs. | 
|---|
| 679 | assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity"); | 
|---|
| 680 |  | 
|---|
| 681 | if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) { | 
|---|
| 682 | RegMask r2mask = rm; | 
|---|
| 683 | // Leave only aligned set of bits. | 
|---|
| 684 | r2mask.smear_to_sets(interfering_lrg.num_regs()); | 
|---|
| 685 | // It includes vector case. | 
|---|
| 686 | interfering_lrg.SUBTRACT(r2mask); | 
|---|
| 687 | interfering_lrg.compute_set_mask_size(); | 
|---|
| 688 | } else if (r_size != 1) { | 
|---|
| 689 | // fat proj | 
|---|
| 690 | interfering_lrg.SUBTRACT(rm); | 
|---|
| 691 | interfering_lrg.compute_set_mask_size(); | 
|---|
| 692 | } else { | 
|---|
| 693 | // Common case: size 1 bound removal | 
|---|
| 694 | OptoReg::Name r_reg = rm.find_first_elem(); | 
|---|
| 695 | if (interfering_lrg.mask().Member(r_reg)) { | 
|---|
| 696 | interfering_lrg.Remove(r_reg); | 
|---|
| 697 | interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1); | 
|---|
| 698 | } | 
|---|
| 699 | } | 
|---|
| 700 |  | 
|---|
| 701 | // If 'l' goes completely dry, it must spill. | 
|---|
| 702 | if (interfering_lrg.not_free()) { | 
|---|
| 703 | // Give 'l' some kind of reasonable mask, so it picks up | 
|---|
| 704 | // interferences (and will spill later). | 
|---|
| 705 | interfering_lrg.set_mask(old); | 
|---|
| 706 | interfering_lrg.set_mask_size(old_size); | 
|---|
| 707 | must_spill++; | 
|---|
| 708 | interfering_lrg._must_spill = 1; | 
|---|
| 709 | interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); | 
|---|
| 710 | } | 
|---|
| 711 | l = elements.next(); | 
|---|
| 712 | } | 
|---|
| 713 | } | 
|---|
| 714 |  | 
|---|
| 715 | /* | 
|---|
| 716 | * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the | 
|---|
| 717 | * sole use of a StoreLConditional. While StoreLConditionals set memory (the | 
|---|
| 718 | * SCMemProj use) they also def flags; if that flag def is unused the allocator | 
|---|
| 719 | * sees a flag-setting instruction with no use of the flags and assumes it's | 
|---|
| 720 | * dead.  This keeps the (useless) flag-setting behavior alive while also | 
|---|
| 721 | * keeping the (useful) memory update effect. | 
|---|
| 722 | */ | 
|---|
| 723 | void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) { | 
|---|
| 724 | JVMState* jvms = n->jvms(); | 
|---|
| 725 | uint debug_start = jvms ? jvms->debug_start() : 999999; | 
|---|
| 726 |  | 
|---|
| 727 | for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) { | 
|---|
| 728 | Node* def = n->in(k); | 
|---|
| 729 | uint lid = _lrg_map.live_range_id(def); | 
|---|
| 730 | if (!lid) { | 
|---|
| 731 | continue; | 
|---|
| 732 | } | 
|---|
| 733 | LRG& lrg = lrgs(lid); | 
|---|
| 734 |  | 
|---|
| 735 | // No use-side cost for spilling debug info | 
|---|
| 736 | if (k < debug_start) { | 
|---|
| 737 | // A USE costs twice block frequency (once for the Load, once | 
|---|
| 738 | // for a Load-delay).  Rematerialized uses only cost once. | 
|---|
| 739 | lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2)); | 
|---|
| 740 | } | 
|---|
| 741 |  | 
|---|
| 742 | if (liveout->insert(lid)) { | 
|---|
| 743 | // Newly live things assumed live from here to top of block | 
|---|
| 744 | lrg._area += cost; | 
|---|
| 745 | raise_pressure(b, lrg, int_pressure, float_pressure); | 
|---|
| 746 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect"); | 
|---|
| 747 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect"); | 
|---|
| 748 | } | 
|---|
| 749 | assert(lrg._area >= 0.0, "negative spill area"); | 
|---|
| 750 | } | 
|---|
| 751 | } | 
|---|
| 752 |  | 
|---|
| 753 | /* | 
|---|
| 754 | * If we run off the top of the block with high pressure just record that the | 
|---|
| 755 | * whole block is high pressure. (Even though we might have a transition | 
|---|
| 756 | * later down in the block) | 
|---|
| 757 | */ | 
|---|
| 758 | void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) { | 
|---|
| 759 | // current pressure now means the pressure before the first instruction in the block | 
|---|
| 760 | // (since we have stepped through all instructions backwards) | 
|---|
| 761 | if (pressure.current_pressure() > pressure.high_pressure_limit()) { | 
|---|
| 762 | pressure.set_high_pressure_index_to_block_start(); | 
|---|
| 763 | } | 
|---|
| 764 | } | 
|---|
| 765 |  | 
|---|
| 766 | /* | 
|---|
| 767 | * Compute high pressure indice; avoid landing in the middle of projnodes | 
|---|
| 768 | * and set the high pressure index for the block | 
|---|
| 769 | */ | 
|---|
| 770 | void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) { | 
|---|
| 771 | uint i = pressure.high_pressure_index(); | 
|---|
| 772 | if (i < b->number_of_nodes() && i < b->end_idx() + 1) { | 
|---|
| 773 | Node* cur = b->get_node(i); | 
|---|
| 774 | while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { | 
|---|
| 775 | cur = b->get_node(--i); | 
|---|
| 776 | } | 
|---|
| 777 | } | 
|---|
| 778 | block_hrp_index = i; | 
|---|
| 779 | } | 
|---|
| 780 |  | 
|---|
| 781 | void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) { | 
|---|
| 782 | if (str != NULL) { | 
|---|
| 783 | tty->print_cr( "#  *** %s ***", str); | 
|---|
| 784 | } | 
|---|
| 785 | tty->print_cr( "#     start pressure is = %d", pressure.start_pressure()); | 
|---|
| 786 | tty->print_cr( "#     max pressure is = %d", pressure.final_pressure()); | 
|---|
| 787 | tty->print_cr( "#     end pressure is = %d", pressure.current_pressure()); | 
|---|
| 788 | tty->print_cr( "#"); | 
|---|
| 789 | } | 
|---|
| 790 |  | 
|---|
| 791 | /* Build an interference graph: | 
|---|
| 792 | *   That is, if 2 live ranges are simultaneously alive but in their acceptable | 
|---|
| 793 | *   register sets do not overlap, then they do not interfere. The IFG is built | 
|---|
| 794 | *   by a single reverse pass over each basic block. Starting with the known | 
|---|
| 795 | *   live-out set, we remove things that get defined and add things that become | 
|---|
| 796 | *   live (essentially executing one pass of a standard LIVE analysis). Just | 
|---|
| 797 | *   before a Node defines a value (and removes it from the live-ness set) that | 
|---|
| 798 | *   value is certainly live. The defined value interferes with everything | 
|---|
| 799 | *   currently live. The value is then removed from the live-ness set and it's | 
|---|
| 800 | *   inputs are added to the live-ness set. | 
|---|
| 801 | * Compute register pressure for each block: | 
|---|
| 802 | *   We store the biggest register pressure for each block and also the first | 
|---|
| 803 | *   low to high register pressure transition within the block (if any). | 
|---|
| 804 | */ | 
|---|
| 805 | uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { | 
|---|
| 806 | Compile::TracePhase tp( "buildIFG", &timers[_t_buildIFGphysical]); | 
|---|
| 807 |  | 
|---|
| 808 | uint must_spill = 0; | 
|---|
| 809 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { | 
|---|
| 810 | Block* block = _cfg.get_block(i); | 
|---|
| 811 |  | 
|---|
| 812 | // Clone (rather than smash in place) the liveout info, so it is alive | 
|---|
| 813 | // for the "collect_gc_info" phase later. | 
|---|
| 814 | IndexSet liveout(_live->live(block)); | 
|---|
| 815 |  | 
|---|
| 816 | uint first_inst = first_nonphi_index(block); | 
|---|
| 817 | uint last_inst = block->end_idx(); | 
|---|
| 818 |  | 
|---|
| 819 | move_exception_node_up(block, first_inst, last_inst); | 
|---|
| 820 |  | 
|---|
| 821 | Pressure int_pressure(last_inst + 1, INTPRESSURE); | 
|---|
| 822 | Pressure float_pressure(last_inst + 1, FLOATPRESSURE); | 
|---|
| 823 | block->_reg_pressure = 0; | 
|---|
| 824 | block->_freg_pressure = 0; | 
|---|
| 825 |  | 
|---|
| 826 | int inst_count = last_inst - first_inst; | 
|---|
| 827 | double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); | 
|---|
| 828 | assert(cost >= 0.0, "negative spill cost"); | 
|---|
| 829 |  | 
|---|
| 830 | compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost); | 
|---|
| 831 |  | 
|---|
| 832 | for (uint location = last_inst; location > 0; location--) { | 
|---|
| 833 | Node* n = block->get_node(location); | 
|---|
| 834 | uint lid = _lrg_map.live_range_id(n); | 
|---|
| 835 |  | 
|---|
| 836 | if(lid) { | 
|---|
| 837 | LRG& lrg = lrgs(lid); | 
|---|
| 838 |  | 
|---|
| 839 | // A DEF normally costs block frequency; rematerialized values are | 
|---|
| 840 | // removed from the DEF sight, so LOWER costs here. | 
|---|
| 841 | lrg._cost += n->rematerialize() ? 0 : block->_freq; | 
|---|
| 842 |  | 
|---|
| 843 | if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) { | 
|---|
| 844 | if (remove_node_if_not_used(block, location, n, lid, &liveout)) { | 
|---|
| 845 | float_pressure.lower_high_pressure_index(); | 
|---|
| 846 | int_pressure.lower_high_pressure_index(); | 
|---|
| 847 | continue; | 
|---|
| 848 | } | 
|---|
| 849 | if (lrg._fat_proj) { | 
|---|
| 850 | check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI); | 
|---|
| 851 | check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD); | 
|---|
| 852 | } | 
|---|
| 853 | } else { | 
|---|
| 854 | // A live range ends at its definition, remove the remaining area. | 
|---|
| 855 | // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf, | 
|---|
| 856 | // and +Inf - +Inf = NaN. So let's not do that subtraction. | 
|---|
| 857 | if (g_isfinite(cost)) { | 
|---|
| 858 | lrg._area -= cost; | 
|---|
| 859 | } | 
|---|
| 860 | assert(lrg._area >= 0.0, "negative spill area"); | 
|---|
| 861 |  | 
|---|
| 862 | assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst); | 
|---|
| 863 |  | 
|---|
| 864 | if (liveout.remove(lid)) { | 
|---|
| 865 | lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure); | 
|---|
| 866 | } | 
|---|
| 867 | uint copy_idx = n->is_Copy(); | 
|---|
| 868 | if (copy_idx) { | 
|---|
| 869 | uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx)); | 
|---|
| 870 | remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure); | 
|---|
| 871 | } | 
|---|
| 872 | } | 
|---|
| 873 |  | 
|---|
| 874 | // Since rematerializable DEFs are not bound but the live range is, | 
|---|
| 875 | // some uses must be bound. If we spill live range 'r', it can | 
|---|
| 876 | // rematerialize at each use site according to its bindings. | 
|---|
| 877 | if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) { | 
|---|
| 878 | remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill); | 
|---|
| 879 | } | 
|---|
| 880 | interfere_with_live(lid, &liveout); | 
|---|
| 881 | } | 
|---|
| 882 |  | 
|---|
| 883 | // Area remaining in the block | 
|---|
| 884 | inst_count--; | 
|---|
| 885 | cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); | 
|---|
| 886 |  | 
|---|
| 887 | if (!n->is_Phi()) { | 
|---|
| 888 | add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure); | 
|---|
| 889 | } | 
|---|
| 890 | } | 
|---|
| 891 |  | 
|---|
| 892 | check_for_high_pressure_block(int_pressure); | 
|---|
| 893 | check_for_high_pressure_block(float_pressure); | 
|---|
| 894 | adjust_high_pressure_index(block, block->_ihrp_index, int_pressure); | 
|---|
| 895 | adjust_high_pressure_index(block, block->_fhrp_index, float_pressure); | 
|---|
| 896 | // set the final_pressure as the register pressure for the block | 
|---|
| 897 | block->_reg_pressure = int_pressure.final_pressure(); | 
|---|
| 898 | block->_freg_pressure = float_pressure.final_pressure(); | 
|---|
| 899 |  | 
|---|
| 900 | #ifndef PRODUCT | 
|---|
| 901 | // Gather Register Pressure Statistics | 
|---|
| 902 | if (PrintOptoStatistics) { | 
|---|
| 903 | if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) { | 
|---|
| 904 | _high_pressure++; | 
|---|
| 905 | } else { | 
|---|
| 906 | _low_pressure++; | 
|---|
| 907 | } | 
|---|
| 908 | } | 
|---|
| 909 | #endif | 
|---|
| 910 | } | 
|---|
| 911 |  | 
|---|
| 912 | return must_spill; | 
|---|
| 913 | } | 
|---|
| 914 |  | 
|---|