1 | /* |
2 | * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "compiler/oopMap.hpp" |
27 | #include "memory/allocation.inline.hpp" |
28 | #include "memory/resourceArea.hpp" |
29 | #include "opto/addnode.hpp" |
30 | #include "opto/block.hpp" |
31 | #include "opto/callnode.hpp" |
32 | #include "opto/cfgnode.hpp" |
33 | #include "opto/chaitin.hpp" |
34 | #include "opto/coalesce.hpp" |
35 | #include "opto/indexSet.hpp" |
36 | #include "opto/machnode.hpp" |
37 | #include "opto/memnode.hpp" |
38 | #include "opto/opcodes.hpp" |
39 | |
40 | PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) { |
41 | } |
42 | |
43 | void PhaseIFG::init( uint maxlrg ) { |
44 | _maxlrg = maxlrg; |
45 | _yanked = new (_arena) VectorSet(_arena); |
46 | _is_square = false; |
47 | // Make uninitialized adjacency lists |
48 | _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg); |
49 | // Also make empty live range structures |
50 | _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) ); |
51 | memset((void*)_lrgs,0,sizeof(LRG)*maxlrg); |
52 | // Init all to empty |
53 | for( uint i = 0; i < maxlrg; i++ ) { |
54 | _adjs[i].initialize(maxlrg); |
55 | _lrgs[i].Set_All(); |
56 | } |
57 | } |
58 | |
59 | // Add edge between vertices a & b. These are sorted (triangular matrix), |
60 | // then the smaller number is inserted in the larger numbered array. |
61 | int PhaseIFG::add_edge( uint a, uint b ) { |
62 | lrgs(a).invalid_degree(); |
63 | lrgs(b).invalid_degree(); |
64 | // Sort a and b, so that a is bigger |
65 | assert( !_is_square, "only on triangular" ); |
66 | if( a < b ) { uint tmp = a; a = b; b = tmp; } |
67 | return _adjs[a].insert( b ); |
68 | } |
69 | |
70 | // Is there an edge between a and b? |
71 | int PhaseIFG::test_edge( uint a, uint b ) const { |
72 | // Sort a and b, so that a is larger |
73 | assert( !_is_square, "only on triangular" ); |
74 | if( a < b ) { uint tmp = a; a = b; b = tmp; } |
75 | return _adjs[a].member(b); |
76 | } |
77 | |
78 | // Convert triangular matrix to square matrix |
79 | void PhaseIFG::SquareUp() { |
80 | assert( !_is_square, "only on triangular" ); |
81 | |
82 | // Simple transpose |
83 | for( uint i = 0; i < _maxlrg; i++ ) { |
84 | IndexSetIterator elements(&_adjs[i]); |
85 | uint datum; |
86 | while ((datum = elements.next()) != 0) { |
87 | _adjs[datum].insert( i ); |
88 | } |
89 | } |
90 | _is_square = true; |
91 | } |
92 | |
93 | // Compute effective degree in bulk |
94 | void PhaseIFG::Compute_Effective_Degree() { |
95 | assert( _is_square, "only on square" ); |
96 | |
97 | for( uint i = 0; i < _maxlrg; i++ ) |
98 | lrgs(i).set_degree(effective_degree(i)); |
99 | } |
100 | |
101 | int PhaseIFG::test_edge_sq( uint a, uint b ) const { |
102 | assert( _is_square, "only on square" ); |
103 | // Swap, so that 'a' has the lesser count. Then binary search is on |
104 | // the smaller of a's list and b's list. |
105 | if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; } |
106 | //return _adjs[a].unordered_member(b); |
107 | return _adjs[a].member(b); |
108 | } |
109 | |
110 | // Union edges of B into A |
111 | void PhaseIFG::Union( uint a, uint b ) { |
112 | assert( _is_square, "only on square" ); |
113 | IndexSet *A = &_adjs[a]; |
114 | IndexSetIterator b_elements(&_adjs[b]); |
115 | uint datum; |
116 | while ((datum = b_elements.next()) != 0) { |
117 | if(A->insert(datum)) { |
118 | _adjs[datum].insert(a); |
119 | lrgs(a).invalid_degree(); |
120 | lrgs(datum).invalid_degree(); |
121 | } |
122 | } |
123 | } |
124 | |
125 | // Yank a Node and all connected edges from the IFG. Return a |
126 | // list of neighbors (edges) yanked. |
127 | IndexSet *PhaseIFG::remove_node( uint a ) { |
128 | assert( _is_square, "only on square" ); |
129 | assert( !_yanked->test(a), "" ); |
130 | _yanked->set(a); |
131 | |
132 | // I remove the LRG from all neighbors. |
133 | IndexSetIterator elements(&_adjs[a]); |
134 | LRG &lrg_a = lrgs(a); |
135 | uint datum; |
136 | while ((datum = elements.next()) != 0) { |
137 | _adjs[datum].remove(a); |
138 | lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) ); |
139 | } |
140 | return neighbors(a); |
141 | } |
142 | |
143 | // Re-insert a yanked Node. |
144 | void PhaseIFG::re_insert( uint a ) { |
145 | assert( _is_square, "only on square" ); |
146 | assert( _yanked->test(a), "" ); |
147 | (*_yanked) >>= a; |
148 | |
149 | IndexSetIterator elements(&_adjs[a]); |
150 | uint datum; |
151 | while ((datum = elements.next()) != 0) { |
152 | _adjs[datum].insert(a); |
153 | lrgs(datum).invalid_degree(); |
154 | } |
155 | } |
156 | |
157 | // Compute the degree between 2 live ranges. If both live ranges are |
158 | // aligned-adjacent powers-of-2 then we use the MAX size. If either is |
159 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to |
160 | // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why |
161 | // this is so. |
162 | int LRG::compute_degree( LRG &l ) const { |
163 | int tmp; |
164 | int num_regs = _num_regs; |
165 | int nregs = l.num_regs(); |
166 | tmp = (_fat_proj || l._fat_proj) // either is a fat-proj? |
167 | ? (num_regs * nregs) // then use product |
168 | : MAX2(num_regs,nregs); // else use max |
169 | return tmp; |
170 | } |
171 | |
172 | // Compute effective degree for this live range. If both live ranges are |
173 | // aligned-adjacent powers-of-2 then we use the MAX size. If either is |
174 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to |
175 | // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why |
176 | // this is so. |
177 | int PhaseIFG::effective_degree( uint lidx ) const { |
178 | int eff = 0; |
179 | int num_regs = lrgs(lidx).num_regs(); |
180 | int fat_proj = lrgs(lidx)._fat_proj; |
181 | IndexSet *s = neighbors(lidx); |
182 | IndexSetIterator elements(s); |
183 | uint nidx; |
184 | while((nidx = elements.next()) != 0) { |
185 | LRG &lrgn = lrgs(nidx); |
186 | int nregs = lrgn.num_regs(); |
187 | eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj? |
188 | ? (num_regs * nregs) // then use product |
189 | : MAX2(num_regs,nregs); // else use max |
190 | } |
191 | return eff; |
192 | } |
193 | |
194 | |
195 | #ifndef PRODUCT |
196 | void PhaseIFG::dump() const { |
197 | tty->print_cr("-- Interference Graph --%s--" , |
198 | _is_square ? "square" : "triangular" ); |
199 | if( _is_square ) { |
200 | for( uint i = 0; i < _maxlrg; i++ ) { |
201 | tty->print( (*_yanked)[i] ? "XX " : " " ); |
202 | tty->print("L%d: { " ,i); |
203 | IndexSetIterator elements(&_adjs[i]); |
204 | uint datum; |
205 | while ((datum = elements.next()) != 0) { |
206 | tty->print("L%d " , datum); |
207 | } |
208 | tty->print_cr("}" ); |
209 | |
210 | } |
211 | return; |
212 | } |
213 | |
214 | // Triangular |
215 | for( uint i = 0; i < _maxlrg; i++ ) { |
216 | uint j; |
217 | tty->print( (*_yanked)[i] ? "XX " : " " ); |
218 | tty->print("L%d: { " ,i); |
219 | for( j = _maxlrg; j > i; j-- ) |
220 | if( test_edge(j - 1,i) ) { |
221 | tty->print("L%d " ,j - 1); |
222 | } |
223 | tty->print("| " ); |
224 | IndexSetIterator elements(&_adjs[i]); |
225 | uint datum; |
226 | while ((datum = elements.next()) != 0) { |
227 | tty->print("L%d " , datum); |
228 | } |
229 | tty->print("}\n" ); |
230 | } |
231 | tty->print("\n" ); |
232 | } |
233 | |
234 | void PhaseIFG::stats() const { |
235 | ResourceMark rm; |
236 | int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2); |
237 | memset( h_cnt, 0, sizeof(int)*_maxlrg*2 ); |
238 | uint i; |
239 | for( i = 0; i < _maxlrg; i++ ) { |
240 | h_cnt[neighbor_cnt(i)]++; |
241 | } |
242 | tty->print_cr("--Histogram of counts--" ); |
243 | for( i = 0; i < _maxlrg*2; i++ ) |
244 | if( h_cnt[i] ) |
245 | tty->print("%d/%d " ,i,h_cnt[i]); |
246 | tty->cr(); |
247 | } |
248 | |
249 | void PhaseIFG::verify( const PhaseChaitin *pc ) const { |
250 | // IFG is square, sorted and no need for Find |
251 | for( uint i = 0; i < _maxlrg; i++ ) { |
252 | assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" ); |
253 | IndexSet *set = &_adjs[i]; |
254 | IndexSetIterator elements(set); |
255 | uint idx; |
256 | uint last = 0; |
257 | while ((idx = elements.next()) != 0) { |
258 | assert(idx != i, "Must have empty diagonal" ); |
259 | assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find" ); |
260 | assert(_adjs[idx].member(i), "IFG not square" ); |
261 | assert(!(*_yanked)[idx], "No yanked neighbors" ); |
262 | assert(last < idx, "not sorted increasing" ); |
263 | last = idx; |
264 | } |
265 | assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong" ); |
266 | } |
267 | } |
268 | #endif |
269 | |
270 | /* |
271 | * Interfere this register with everything currently live. |
272 | * Check for interference by checking overlap of regmasks. |
273 | * Only interfere if acceptable register masks overlap. |
274 | */ |
275 | void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) { |
276 | LRG& lrg = lrgs(lid); |
277 | const RegMask& rm = lrg.mask(); |
278 | IndexSetIterator elements(liveout); |
279 | uint interfering_lid = elements.next(); |
280 | while (interfering_lid != 0) { |
281 | LRG& interfering_lrg = lrgs(interfering_lid); |
282 | if (rm.overlap(interfering_lrg.mask())) { |
283 | _ifg->add_edge(lid, interfering_lid); |
284 | } |
285 | interfering_lid = elements.next(); |
286 | } |
287 | } |
288 | |
289 | // Actually build the interference graph. Uses virtual registers only, no |
290 | // physical register masks. This allows me to be very aggressive when |
291 | // coalescing copies. Some of this aggressiveness will have to be undone |
292 | // later, but I'd rather get all the copies I can now (since unremoved copies |
293 | // at this point can end up in bad places). Copies I re-insert later I have |
294 | // more opportunity to insert them in low-frequency locations. |
295 | void PhaseChaitin::build_ifg_virtual( ) { |
296 | Compile::TracePhase tp("buildIFG_virt" , &timers[_t_buildIFGvirtual]); |
297 | |
298 | // For all blocks (in any order) do... |
299 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { |
300 | Block* block = _cfg.get_block(i); |
301 | IndexSet* liveout = _live->live(block); |
302 | |
303 | // The IFG is built by a single reverse pass over each basic block. |
304 | // Starting with the known live-out set, we remove things that get |
305 | // defined and add things that become live (essentially executing one |
306 | // pass of a standard LIVE analysis). Just before a Node defines a value |
307 | // (and removes it from the live-ness set) that value is certainly live. |
308 | // The defined value interferes with everything currently live. The |
309 | // value is then removed from the live-ness set and it's inputs are |
310 | // added to the live-ness set. |
311 | for (uint j = block->end_idx() + 1; j > 1; j--) { |
312 | Node* n = block->get_node(j - 1); |
313 | |
314 | // Get value being defined |
315 | uint r = _lrg_map.live_range_id(n); |
316 | |
317 | // Some special values do not allocate |
318 | if (r) { |
319 | |
320 | // Remove from live-out set |
321 | liveout->remove(r); |
322 | |
323 | // Copies do not define a new value and so do not interfere. |
324 | // Remove the copies source from the liveout set before interfering. |
325 | uint idx = n->is_Copy(); |
326 | if (idx != 0) { |
327 | liveout->remove(_lrg_map.live_range_id(n->in(idx))); |
328 | } |
329 | |
330 | // Interfere with everything live |
331 | interfere_with_live(r, liveout); |
332 | } |
333 | |
334 | // Make all inputs live |
335 | if (!n->is_Phi()) { // Phi function uses come from prior block |
336 | for(uint k = 1; k < n->req(); k++) { |
337 | liveout->insert(_lrg_map.live_range_id(n->in(k))); |
338 | } |
339 | } |
340 | |
341 | // 2-address instructions always have the defined value live |
342 | // on entry to the instruction, even though it is being defined |
343 | // by the instruction. We pretend a virtual copy sits just prior |
344 | // to the instruction and kills the src-def'd register. |
345 | // In other words, for 2-address instructions the defined value |
346 | // interferes with all inputs. |
347 | uint idx; |
348 | if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) { |
349 | const MachNode *mach = n->as_Mach(); |
350 | // Sometimes my 2-address ADDs are commuted in a bad way. |
351 | // We generally want the USE-DEF register to refer to the |
352 | // loop-varying quantity, to avoid a copy. |
353 | uint op = mach->ideal_Opcode(); |
354 | // Check that mach->num_opnds() == 3 to ensure instruction is |
355 | // not subsuming constants, effectively excludes addI_cin_imm |
356 | // Can NOT swap for instructions like addI_cin_imm since it |
357 | // is adding zero to yhi + carry and the second ideal-input |
358 | // points to the result of adding low-halves. |
359 | // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm |
360 | if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) && |
361 | n->in(1)->bottom_type()->base() == Type::Int && |
362 | // See if the ADD is involved in a tight data loop the wrong way |
363 | n->in(2)->is_Phi() && |
364 | n->in(2)->in(2) == n ) { |
365 | Node *tmp = n->in(1); |
366 | n->set_req( 1, n->in(2) ); |
367 | n->set_req( 2, tmp ); |
368 | } |
369 | // Defined value interferes with all inputs |
370 | uint lidx = _lrg_map.live_range_id(n->in(idx)); |
371 | for (uint k = 1; k < n->req(); k++) { |
372 | uint kidx = _lrg_map.live_range_id(n->in(k)); |
373 | if (kidx != lidx) { |
374 | _ifg->add_edge(r, kidx); |
375 | } |
376 | } |
377 | } |
378 | } // End of forall instructions in block |
379 | } // End of forall blocks |
380 | } |
381 | |
382 | #ifdef ASSERT |
383 | uint PhaseChaitin::count_int_pressure(IndexSet* liveout) { |
384 | IndexSetIterator elements(liveout); |
385 | uint lidx = elements.next(); |
386 | uint cnt = 0; |
387 | while (lidx != 0) { |
388 | LRG& lrg = lrgs(lidx); |
389 | if (lrg.mask_is_nonempty_and_up() && |
390 | !lrg.is_float_or_vector() && |
391 | lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { |
392 | cnt += lrg.reg_pressure(); |
393 | } |
394 | lidx = elements.next(); |
395 | } |
396 | return cnt; |
397 | } |
398 | |
399 | uint PhaseChaitin::count_float_pressure(IndexSet* liveout) { |
400 | IndexSetIterator elements(liveout); |
401 | uint lidx = elements.next(); |
402 | uint cnt = 0; |
403 | while (lidx != 0) { |
404 | LRG& lrg = lrgs(lidx); |
405 | if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) { |
406 | cnt += lrg.reg_pressure(); |
407 | } |
408 | lidx = elements.next(); |
409 | } |
410 | return cnt; |
411 | } |
412 | #endif |
413 | |
414 | /* |
415 | * Adjust register pressure down by 1. Capture last hi-to-low transition, |
416 | */ |
417 | void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) { |
418 | if (lrg.mask_is_nonempty_and_up()) { |
419 | if (lrg.is_float_or_vector()) { |
420 | float_pressure.lower(lrg, location); |
421 | } else { |
422 | // Do not count the SP and flag registers |
423 | const RegMask& r = lrg.mask(); |
424 | if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) { |
425 | int_pressure.lower(lrg, location); |
426 | } |
427 | } |
428 | } |
429 | if (_scheduling_info_generated == false) { |
430 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect" ); |
431 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect" ); |
432 | } |
433 | } |
434 | |
435 | /* Go to the first non-phi index in a block */ |
436 | static uint first_nonphi_index(Block* b) { |
437 | uint i; |
438 | uint end_idx = b->end_idx(); |
439 | for (i = 1; i < end_idx; i++) { |
440 | Node* n = b->get_node(i); |
441 | if (!n->is_Phi()) { |
442 | break; |
443 | } |
444 | } |
445 | return i; |
446 | } |
447 | |
448 | /* |
449 | * Spills could be inserted before a CreateEx node which should be the first |
450 | * instruction in a block after Phi nodes. If so, move the CreateEx node up. |
451 | */ |
452 | static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) { |
453 | for (uint i = first_inst; i < last_inst; i++) { |
454 | Node* ex = b->get_node(i); |
455 | if (ex->is_SpillCopy()) { |
456 | continue; |
457 | } |
458 | |
459 | if (i > first_inst && |
460 | ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) { |
461 | b->remove_node(i); |
462 | b->insert_node(ex, first_inst); |
463 | } |
464 | // Stop once a CreateEx or any other node is found |
465 | break; |
466 | } |
467 | } |
468 | |
469 | /* |
470 | * When new live ranges are live, we raise the register pressure |
471 | */ |
472 | void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) { |
473 | if (lrg.mask_is_nonempty_and_up()) { |
474 | if (lrg.is_float_or_vector()) { |
475 | float_pressure.raise(lrg); |
476 | } else { |
477 | // Do not count the SP and flag registers |
478 | const RegMask& rm = lrg.mask(); |
479 | if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) { |
480 | int_pressure.raise(lrg); |
481 | } |
482 | } |
483 | } |
484 | } |
485 | |
486 | |
487 | /* |
488 | * Computes the initial register pressure of a block, looking at all live |
489 | * ranges in the liveout. The register pressure is computed for both float |
490 | * and int/pointer registers. |
491 | * Live ranges in the liveout are presumed live for the whole block. |
492 | * We add the cost for the whole block to the area of the live ranges initially. |
493 | * If a live range gets killed in the block, we'll subtract the unused part of |
494 | * the block from the area. |
495 | */ |
496 | void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) { |
497 | IndexSetIterator elements(liveout); |
498 | uint lid = elements.next(); |
499 | while (lid != 0) { |
500 | LRG& lrg = lrgs(lid); |
501 | lrg._area += cost; |
502 | raise_pressure(b, lrg, int_pressure, float_pressure); |
503 | lid = elements.next(); |
504 | } |
505 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect" ); |
506 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect" ); |
507 | } |
508 | |
509 | /* |
510 | * Computes the entry register pressure of a block, looking at all live |
511 | * ranges in the livein. The register pressure is computed for both float |
512 | * and int/pointer registers. |
513 | */ |
514 | void PhaseChaitin::compute_entry_block_pressure(Block* b) { |
515 | IndexSet* livein = _live->livein(b); |
516 | IndexSetIterator elements(livein); |
517 | uint lid = elements.next(); |
518 | while (lid != 0) { |
519 | LRG& lrg = lrgs(lid); |
520 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); |
521 | lid = elements.next(); |
522 | } |
523 | // Now check phis for locally defined inputs |
524 | for (uint j = 0; j < b->number_of_nodes(); j++) { |
525 | Node* n = b->get_node(j); |
526 | if (n->is_Phi()) { |
527 | for (uint k = 1; k < n->req(); k++) { |
528 | Node* phi_in = n->in(k); |
529 | // Because we are talking about phis, raise register pressure once for each |
530 | // instance of a phi to account for a single value |
531 | if (_cfg.get_block_for_node(phi_in) == b) { |
532 | LRG& lrg = lrgs(phi_in->_idx); |
533 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); |
534 | break; |
535 | } |
536 | } |
537 | } |
538 | } |
539 | _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure()); |
540 | _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure()); |
541 | } |
542 | |
543 | /* |
544 | * Computes the exit register pressure of a block, looking at all live |
545 | * ranges in the liveout. The register pressure is computed for both float |
546 | * and int/pointer registers. |
547 | */ |
548 | void PhaseChaitin::compute_exit_block_pressure(Block* b) { |
549 | IndexSet* livein = _live->live(b); |
550 | IndexSetIterator elements(livein); |
551 | _sched_int_pressure.set_current_pressure(0); |
552 | _sched_float_pressure.set_current_pressure(0); |
553 | uint lid = elements.next(); |
554 | while (lid != 0) { |
555 | LRG& lrg = lrgs(lid); |
556 | raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure); |
557 | lid = elements.next(); |
558 | } |
559 | } |
560 | |
561 | /* |
562 | * Remove dead node if it's not used. |
563 | * We only remove projection nodes if the node "defining" the projection is |
564 | * dead, for example on x86, if we have a dead Add node we remove its |
565 | * RFLAGS node. |
566 | */ |
567 | bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) { |
568 | Node* def = n->in(0); |
569 | if (!n->is_Proj() || |
570 | (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) { |
571 | if (n->is_MachProj()) { |
572 | // Don't remove KILL projections if their "defining" nodes have |
573 | // memory effects (have SCMemProj projection node) - |
574 | // they are not dead even when their result is not used. |
575 | // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes. |
576 | // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list) |
577 | // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed |
578 | // in block in such order that KILL MachProj nodes are processed first. |
579 | if (def->has_out_with(Op_SCMemProj)) { |
580 | return false; |
581 | } |
582 | } |
583 | b->remove_node(location); |
584 | LRG& lrg = lrgs(lid); |
585 | if (lrg._def == n) { |
586 | lrg._def = 0; |
587 | } |
588 | n->disconnect_inputs(NULL, C); |
589 | _cfg.unmap_node_from_block(n); |
590 | n->replace_by(C->top()); |
591 | return true; |
592 | } |
593 | return false; |
594 | } |
595 | |
596 | /* |
597 | * When encountering a fat projection, we might go from a low to high to low |
598 | * (since the fat proj only lives at this instruction) going backwards in the |
599 | * block. If we find a low to high transition, we record it. |
600 | */ |
601 | void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) { |
602 | RegMask mask_tmp = lrg.mask(); |
603 | mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]); |
604 | pressure.check_pressure_at_fatproj(location, mask_tmp); |
605 | } |
606 | |
607 | /* |
608 | * Insure high score for immediate-use spill copies so they get a color. |
609 | * All single-use MachSpillCopy(s) that immediately precede their |
610 | * use must color early. If a longer live range steals their |
611 | * color, the spill copy will split and may push another spill copy |
612 | * further away resulting in an infinite spill-split-retry cycle. |
613 | * Assigning a zero area results in a high score() and a good |
614 | * location in the simplify list. |
615 | */ |
616 | void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) { |
617 | if (n->is_SpillCopy() && |
618 | lrg.is_singledef() && // A multi defined live range can still split |
619 | n->outcnt() == 1 && // and use must be in this block |
620 | _cfg.get_block_for_node(n->unique_out()) == b) { |
621 | |
622 | Node* single_use = n->unique_out(); |
623 | assert(b->find_node(single_use) >= next_inst, "Use must be later in block" ); |
624 | // Use can be earlier in block if it is a Phi, but then I should be a MultiDef |
625 | |
626 | // Find first non SpillCopy 'm' that follows the current instruction |
627 | // (current_inst - 1) is index for current instruction 'n' |
628 | Node* m = n; |
629 | for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) { |
630 | m = b->get_node(i); |
631 | } |
632 | if (m == single_use) { |
633 | lrg._area = 0.0; |
634 | } |
635 | } |
636 | } |
637 | |
638 | /* |
639 | * Copies do not define a new value and so do not interfere. |
640 | * Remove the copies source from the liveout set before interfering. |
641 | */ |
642 | void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) { |
643 | if (liveout->remove(lid_copy)) { |
644 | LRG& lrg_copy = lrgs(lid_copy); |
645 | lrg_copy._area -= cost; |
646 | |
647 | // Lower register pressure since copy and definition can share the same register |
648 | lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure); |
649 | } |
650 | } |
651 | |
652 | /* |
653 | * The defined value must go in a particular register. Remove that register from |
654 | * all conflicting parties and avoid the interference. |
655 | */ |
656 | void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) { |
657 | // Check for common case |
658 | const RegMask& rm = lrg.mask(); |
659 | int r_size = lrg.num_regs(); |
660 | // Smear odd bits |
661 | IndexSetIterator elements(liveout); |
662 | uint l = elements.next(); |
663 | while (l != 0) { |
664 | LRG& interfering_lrg = lrgs(l); |
665 | // If 'l' must spill already, do not further hack his bits. |
666 | // He'll get some interferences and be forced to spill later. |
667 | if (interfering_lrg._must_spill) { |
668 | l = elements.next(); |
669 | continue; |
670 | } |
671 | |
672 | // Remove bound register(s) from 'l's choices |
673 | RegMask old = interfering_lrg.mask(); |
674 | uint old_size = interfering_lrg.mask_size(); |
675 | |
676 | // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no |
677 | // longer interferes with 'rm'. If 'l' requires aligned |
678 | // adjacent pairs, subtract out bit pairs. |
679 | assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity" ); |
680 | |
681 | if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) { |
682 | RegMask r2mask = rm; |
683 | // Leave only aligned set of bits. |
684 | r2mask.smear_to_sets(interfering_lrg.num_regs()); |
685 | // It includes vector case. |
686 | interfering_lrg.SUBTRACT(r2mask); |
687 | interfering_lrg.compute_set_mask_size(); |
688 | } else if (r_size != 1) { |
689 | // fat proj |
690 | interfering_lrg.SUBTRACT(rm); |
691 | interfering_lrg.compute_set_mask_size(); |
692 | } else { |
693 | // Common case: size 1 bound removal |
694 | OptoReg::Name r_reg = rm.find_first_elem(); |
695 | if (interfering_lrg.mask().Member(r_reg)) { |
696 | interfering_lrg.Remove(r_reg); |
697 | interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1); |
698 | } |
699 | } |
700 | |
701 | // If 'l' goes completely dry, it must spill. |
702 | if (interfering_lrg.not_free()) { |
703 | // Give 'l' some kind of reasonable mask, so it picks up |
704 | // interferences (and will spill later). |
705 | interfering_lrg.set_mask(old); |
706 | interfering_lrg.set_mask_size(old_size); |
707 | must_spill++; |
708 | interfering_lrg._must_spill = 1; |
709 | interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); |
710 | } |
711 | l = elements.next(); |
712 | } |
713 | } |
714 | |
715 | /* |
716 | * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the |
717 | * sole use of a StoreLConditional. While StoreLConditionals set memory (the |
718 | * SCMemProj use) they also def flags; if that flag def is unused the allocator |
719 | * sees a flag-setting instruction with no use of the flags and assumes it's |
720 | * dead. This keeps the (useless) flag-setting behavior alive while also |
721 | * keeping the (useful) memory update effect. |
722 | */ |
723 | void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) { |
724 | JVMState* jvms = n->jvms(); |
725 | uint debug_start = jvms ? jvms->debug_start() : 999999; |
726 | |
727 | for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) { |
728 | Node* def = n->in(k); |
729 | uint lid = _lrg_map.live_range_id(def); |
730 | if (!lid) { |
731 | continue; |
732 | } |
733 | LRG& lrg = lrgs(lid); |
734 | |
735 | // No use-side cost for spilling debug info |
736 | if (k < debug_start) { |
737 | // A USE costs twice block frequency (once for the Load, once |
738 | // for a Load-delay). Rematerialized uses only cost once. |
739 | lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2)); |
740 | } |
741 | |
742 | if (liveout->insert(lid)) { |
743 | // Newly live things assumed live from here to top of block |
744 | lrg._area += cost; |
745 | raise_pressure(b, lrg, int_pressure, float_pressure); |
746 | assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect" ); |
747 | assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect" ); |
748 | } |
749 | assert(lrg._area >= 0.0, "negative spill area" ); |
750 | } |
751 | } |
752 | |
753 | /* |
754 | * If we run off the top of the block with high pressure just record that the |
755 | * whole block is high pressure. (Even though we might have a transition |
756 | * later down in the block) |
757 | */ |
758 | void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) { |
759 | // current pressure now means the pressure before the first instruction in the block |
760 | // (since we have stepped through all instructions backwards) |
761 | if (pressure.current_pressure() > pressure.high_pressure_limit()) { |
762 | pressure.set_high_pressure_index_to_block_start(); |
763 | } |
764 | } |
765 | |
766 | /* |
767 | * Compute high pressure indice; avoid landing in the middle of projnodes |
768 | * and set the high pressure index for the block |
769 | */ |
770 | void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) { |
771 | uint i = pressure.high_pressure_index(); |
772 | if (i < b->number_of_nodes() && i < b->end_idx() + 1) { |
773 | Node* cur = b->get_node(i); |
774 | while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { |
775 | cur = b->get_node(--i); |
776 | } |
777 | } |
778 | block_hrp_index = i; |
779 | } |
780 | |
781 | void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) { |
782 | if (str != NULL) { |
783 | tty->print_cr("# *** %s ***" , str); |
784 | } |
785 | tty->print_cr("# start pressure is = %d" , pressure.start_pressure()); |
786 | tty->print_cr("# max pressure is = %d" , pressure.final_pressure()); |
787 | tty->print_cr("# end pressure is = %d" , pressure.current_pressure()); |
788 | tty->print_cr("#" ); |
789 | } |
790 | |
791 | /* Build an interference graph: |
792 | * That is, if 2 live ranges are simultaneously alive but in their acceptable |
793 | * register sets do not overlap, then they do not interfere. The IFG is built |
794 | * by a single reverse pass over each basic block. Starting with the known |
795 | * live-out set, we remove things that get defined and add things that become |
796 | * live (essentially executing one pass of a standard LIVE analysis). Just |
797 | * before a Node defines a value (and removes it from the live-ness set) that |
798 | * value is certainly live. The defined value interferes with everything |
799 | * currently live. The value is then removed from the live-ness set and it's |
800 | * inputs are added to the live-ness set. |
801 | * Compute register pressure for each block: |
802 | * We store the biggest register pressure for each block and also the first |
803 | * low to high register pressure transition within the block (if any). |
804 | */ |
805 | uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { |
806 | Compile::TracePhase tp("buildIFG" , &timers[_t_buildIFGphysical]); |
807 | |
808 | uint must_spill = 0; |
809 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { |
810 | Block* block = _cfg.get_block(i); |
811 | |
812 | // Clone (rather than smash in place) the liveout info, so it is alive |
813 | // for the "collect_gc_info" phase later. |
814 | IndexSet liveout(_live->live(block)); |
815 | |
816 | uint first_inst = first_nonphi_index(block); |
817 | uint last_inst = block->end_idx(); |
818 | |
819 | move_exception_node_up(block, first_inst, last_inst); |
820 | |
821 | Pressure int_pressure(last_inst + 1, INTPRESSURE); |
822 | Pressure float_pressure(last_inst + 1, FLOATPRESSURE); |
823 | block->_reg_pressure = 0; |
824 | block->_freg_pressure = 0; |
825 | |
826 | int inst_count = last_inst - first_inst; |
827 | double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); |
828 | assert(cost >= 0.0, "negative spill cost" ); |
829 | |
830 | compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost); |
831 | |
832 | for (uint location = last_inst; location > 0; location--) { |
833 | Node* n = block->get_node(location); |
834 | uint lid = _lrg_map.live_range_id(n); |
835 | |
836 | if(lid) { |
837 | LRG& lrg = lrgs(lid); |
838 | |
839 | // A DEF normally costs block frequency; rematerialized values are |
840 | // removed from the DEF sight, so LOWER costs here. |
841 | lrg._cost += n->rematerialize() ? 0 : block->_freq; |
842 | |
843 | if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) { |
844 | if (remove_node_if_not_used(block, location, n, lid, &liveout)) { |
845 | float_pressure.lower_high_pressure_index(); |
846 | int_pressure.lower_high_pressure_index(); |
847 | continue; |
848 | } |
849 | if (lrg._fat_proj) { |
850 | check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI); |
851 | check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD); |
852 | } |
853 | } else { |
854 | // A live range ends at its definition, remove the remaining area. |
855 | // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf, |
856 | // and +Inf - +Inf = NaN. So let's not do that subtraction. |
857 | if (g_isfinite(cost)) { |
858 | lrg._area -= cost; |
859 | } |
860 | assert(lrg._area >= 0.0, "negative spill area" ); |
861 | |
862 | assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst); |
863 | |
864 | if (liveout.remove(lid)) { |
865 | lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure); |
866 | } |
867 | uint copy_idx = n->is_Copy(); |
868 | if (copy_idx) { |
869 | uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx)); |
870 | remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure); |
871 | } |
872 | } |
873 | |
874 | // Since rematerializable DEFs are not bound but the live range is, |
875 | // some uses must be bound. If we spill live range 'r', it can |
876 | // rematerialize at each use site according to its bindings. |
877 | if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) { |
878 | remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill); |
879 | } |
880 | interfere_with_live(lid, &liveout); |
881 | } |
882 | |
883 | // Area remaining in the block |
884 | inst_count--; |
885 | cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); |
886 | |
887 | if (!n->is_Phi()) { |
888 | add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure); |
889 | } |
890 | } |
891 | |
892 | check_for_high_pressure_block(int_pressure); |
893 | check_for_high_pressure_block(float_pressure); |
894 | adjust_high_pressure_index(block, block->_ihrp_index, int_pressure); |
895 | adjust_high_pressure_index(block, block->_fhrp_index, float_pressure); |
896 | // set the final_pressure as the register pressure for the block |
897 | block->_reg_pressure = int_pressure.final_pressure(); |
898 | block->_freg_pressure = float_pressure.final_pressure(); |
899 | |
900 | #ifndef PRODUCT |
901 | // Gather Register Pressure Statistics |
902 | if (PrintOptoStatistics) { |
903 | if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) { |
904 | _high_pressure++; |
905 | } else { |
906 | _low_pressure++; |
907 | } |
908 | } |
909 | #endif |
910 | } |
911 | |
912 | return must_spill; |
913 | } |
914 | |