1/*
2 * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "gc/shared/barrierSet.hpp"
27#include "gc/shared/c2/barrierSetC2.hpp"
28#include "memory/allocation.inline.hpp"
29#include "memory/resourceArea.hpp"
30#include "opto/addnode.hpp"
31#include "opto/callnode.hpp"
32#include "opto/castnode.hpp"
33#include "opto/connode.hpp"
34#include "opto/castnode.hpp"
35#include "opto/divnode.hpp"
36#include "opto/loopnode.hpp"
37#include "opto/matcher.hpp"
38#include "opto/mulnode.hpp"
39#include "opto/movenode.hpp"
40#include "opto/opaquenode.hpp"
41#include "opto/rootnode.hpp"
42#include "opto/subnode.hpp"
43#include "utilities/macros.hpp"
44#if INCLUDE_ZGC
45#include "gc/z/c2/zBarrierSetC2.hpp"
46#endif
47
48//=============================================================================
49//------------------------------split_thru_phi---------------------------------
50// Split Node 'n' through merge point if there is enough win.
51Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
52 if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
53 // ConvI2L may have type information on it which is unsafe to push up
54 // so disable this for now
55 return NULL;
56 }
57
58 // Splitting range check CastIIs through a loop induction Phi can
59 // cause new Phis to be created that are left unrelated to the loop
60 // induction Phi and prevent optimizations (vectorization)
61 if (n->Opcode() == Op_CastII && n->as_CastII()->has_range_check() &&
62 region->is_CountedLoop() && n->in(1) == region->as_CountedLoop()->phi()) {
63 return NULL;
64 }
65
66 int wins = 0;
67 assert(!n->is_CFG(), "");
68 assert(region->is_Region(), "");
69
70 const Type* type = n->bottom_type();
71 const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
72 Node *phi;
73 if (t_oop != NULL && t_oop->is_known_instance_field()) {
74 int iid = t_oop->instance_id();
75 int index = C->get_alias_index(t_oop);
76 int offset = t_oop->offset();
77 phi = new PhiNode(region, type, NULL, iid, index, offset);
78 } else {
79 phi = PhiNode::make_blank(region, n);
80 }
81 uint old_unique = C->unique();
82 for (uint i = 1; i < region->req(); i++) {
83 Node *x;
84 Node* the_clone = NULL;
85 if (region->in(i) == C->top()) {
86 x = C->top(); // Dead path? Use a dead data op
87 } else {
88 x = n->clone(); // Else clone up the data op
89 the_clone = x; // Remember for possible deletion.
90 // Alter data node to use pre-phi inputs
91 if (n->in(0) == region)
92 x->set_req( 0, region->in(i) );
93 for (uint j = 1; j < n->req(); j++) {
94 Node *in = n->in(j);
95 if (in->is_Phi() && in->in(0) == region)
96 x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
97 }
98 }
99 // Check for a 'win' on some paths
100 const Type *t = x->Value(&_igvn);
101
102 bool singleton = t->singleton();
103
104 // A TOP singleton indicates that there are no possible values incoming
105 // along a particular edge. In most cases, this is OK, and the Phi will
106 // be eliminated later in an Ideal call. However, we can't allow this to
107 // happen if the singleton occurs on loop entry, as the elimination of
108 // the PhiNode may cause the resulting node to migrate back to a previous
109 // loop iteration.
110 if (singleton && t == Type::TOP) {
111 // Is_Loop() == false does not confirm the absence of a loop (e.g., an
112 // irreducible loop may not be indicated by an affirmative is_Loop());
113 // therefore, the only top we can split thru a phi is on a backedge of
114 // a loop.
115 singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
116 }
117
118 if (singleton) {
119 wins++;
120 x = ((PhaseGVN&)_igvn).makecon(t);
121 } else {
122 // We now call Identity to try to simplify the cloned node.
123 // Note that some Identity methods call phase->type(this).
124 // Make sure that the type array is big enough for
125 // our new node, even though we may throw the node away.
126 // (Note: This tweaking with igvn only works because x is a new node.)
127 _igvn.set_type(x, t);
128 // If x is a TypeNode, capture any more-precise type permanently into Node
129 // otherwise it will be not updated during igvn->transform since
130 // igvn->type(x) is set to x->Value() already.
131 x->raise_bottom_type(t);
132 Node *y = _igvn.apply_identity(x);
133 if (y != x) {
134 wins++;
135 x = y;
136 } else {
137 y = _igvn.hash_find(x);
138 if (y) {
139 wins++;
140 x = y;
141 } else {
142 // Else x is a new node we are keeping
143 // We do not need register_new_node_with_optimizer
144 // because set_type has already been called.
145 _igvn._worklist.push(x);
146 }
147 }
148 }
149 if (x != the_clone && the_clone != NULL)
150 _igvn.remove_dead_node(the_clone);
151 phi->set_req( i, x );
152 }
153 // Too few wins?
154 if (wins <= policy) {
155 _igvn.remove_dead_node(phi);
156 return NULL;
157 }
158
159 // Record Phi
160 register_new_node( phi, region );
161
162 for (uint i2 = 1; i2 < phi->req(); i2++) {
163 Node *x = phi->in(i2);
164 // If we commoned up the cloned 'x' with another existing Node,
165 // the existing Node picks up a new use. We need to make the
166 // existing Node occur higher up so it dominates its uses.
167 Node *old_ctrl;
168 IdealLoopTree *old_loop;
169
170 if (x->is_Con()) {
171 // Constant's control is always root.
172 set_ctrl(x, C->root());
173 continue;
174 }
175 // The occasional new node
176 if (x->_idx >= old_unique) { // Found a new, unplaced node?
177 old_ctrl = NULL;
178 old_loop = NULL; // Not in any prior loop
179 } else {
180 old_ctrl = get_ctrl(x);
181 old_loop = get_loop(old_ctrl); // Get prior loop
182 }
183 // New late point must dominate new use
184 Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
185 if (new_ctrl == old_ctrl) // Nothing is changed
186 continue;
187
188 IdealLoopTree *new_loop = get_loop(new_ctrl);
189
190 // Don't move x into a loop if its uses are
191 // outside of loop. Otherwise x will be cloned
192 // for each use outside of this loop.
193 IdealLoopTree *use_loop = get_loop(region);
194 if (!new_loop->is_member(use_loop) &&
195 (old_loop == NULL || !new_loop->is_member(old_loop))) {
196 // Take early control, later control will be recalculated
197 // during next iteration of loop optimizations.
198 new_ctrl = get_early_ctrl(x);
199 new_loop = get_loop(new_ctrl);
200 }
201 // Set new location
202 set_ctrl(x, new_ctrl);
203 // If changing loop bodies, see if we need to collect into new body
204 if (old_loop != new_loop) {
205 if (old_loop && !old_loop->_child)
206 old_loop->_body.yank(x);
207 if (!new_loop->_child)
208 new_loop->_body.push(x); // Collect body info
209 }
210 }
211
212 return phi;
213}
214
215//------------------------------dominated_by------------------------------------
216// Replace the dominated test with an obvious true or false. Place it on the
217// IGVN worklist for later cleanup. Move control-dependent data Nodes on the
218// live path up to the dominating control.
219void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
220 if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
221
222 // prevdom is the dominating projection of the dominating test.
223 assert( iff->is_If(), "" );
224 assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
225 int pop = prevdom->Opcode();
226 assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
227 if (flip) {
228 if (pop == Op_IfTrue)
229 pop = Op_IfFalse;
230 else
231 pop = Op_IfTrue;
232 }
233 // 'con' is set to true or false to kill the dominated test.
234 Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
235 set_ctrl(con, C->root()); // Constant gets a new use
236 // Hack the dominated test
237 _igvn.replace_input_of(iff, 1, con);
238
239 // If I dont have a reachable TRUE and FALSE path following the IfNode then
240 // I can assume this path reaches an infinite loop. In this case it's not
241 // important to optimize the data Nodes - either the whole compilation will
242 // be tossed or this path (and all data Nodes) will go dead.
243 if (iff->outcnt() != 2) return;
244
245 // Make control-dependent data Nodes on the live path (path that will remain
246 // once the dominated IF is removed) become control-dependent on the
247 // dominating projection.
248 Node* dp = iff->as_If()->proj_out_or_null(pop == Op_IfTrue);
249
250 // Loop predicates may have depending checks which should not
251 // be skipped. For example, range check predicate has two checks
252 // for lower and upper bounds.
253 if (dp == NULL)
254 return;
255
256 ProjNode* dp_proj = dp->as_Proj();
257 ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
258 if (exclude_loop_predicate &&
259 (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
260 unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_profile_predicate) != NULL ||
261 unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
262 // If this is a range check (IfNode::is_range_check), do not
263 // reorder because Compile::allow_range_check_smearing might have
264 // changed the check.
265 return; // Let IGVN transformation change control dependence.
266 }
267
268 IdealLoopTree *old_loop = get_loop(dp);
269
270 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
271 Node* cd = dp->fast_out(i); // Control-dependent node
272 if (cd->depends_only_on_test()) {
273 assert(cd->in(0) == dp, "");
274 _igvn.replace_input_of(cd, 0, prevdom);
275 set_early_ctrl(cd);
276 IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
277 if (old_loop != new_loop) {
278 if (!old_loop->_child) old_loop->_body.yank(cd);
279 if (!new_loop->_child) new_loop->_body.push(cd);
280 }
281 --i;
282 --imax;
283 }
284 }
285}
286
287//------------------------------has_local_phi_input----------------------------
288// Return TRUE if 'n' has Phi inputs from its local block and no other
289// block-local inputs (all non-local-phi inputs come from earlier blocks)
290Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
291 Node *n_ctrl = get_ctrl(n);
292 // See if some inputs come from a Phi in this block, or from before
293 // this block.
294 uint i;
295 for( i = 1; i < n->req(); i++ ) {
296 Node *phi = n->in(i);
297 if( phi->is_Phi() && phi->in(0) == n_ctrl )
298 break;
299 }
300 if( i >= n->req() )
301 return NULL; // No Phi inputs; nowhere to clone thru
302
303 // Check for inputs created between 'n' and the Phi input. These
304 // must split as well; they have already been given the chance
305 // (courtesy of a post-order visit) and since they did not we must
306 // recover the 'cost' of splitting them by being very profitable
307 // when splitting 'n'. Since this is unlikely we simply give up.
308 for( i = 1; i < n->req(); i++ ) {
309 Node *m = n->in(i);
310 if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
311 // We allow the special case of AddP's with no local inputs.
312 // This allows us to split-up address expressions.
313 if (m->is_AddP() &&
314 get_ctrl(m->in(2)) != n_ctrl &&
315 get_ctrl(m->in(3)) != n_ctrl) {
316 // Move the AddP up to dominating point
317 Node* c = find_non_split_ctrl(idom(n_ctrl));
318 if (c->is_OuterStripMinedLoop()) {
319 c->as_Loop()->verify_strip_mined(1);
320 c = c->in(LoopNode::EntryControl);
321 }
322 set_ctrl_and_loop(m, c);
323 continue;
324 }
325 return NULL;
326 }
327 assert(m->is_Phi() || is_dominator(get_ctrl(m), n_ctrl), "m has strange control");
328 }
329
330 return n_ctrl;
331}
332
333//------------------------------remix_address_expressions----------------------
334// Rework addressing expressions to get the most loop-invariant stuff
335// moved out. We'd like to do all associative operators, but it's especially
336// important (common) to do address expressions.
337Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
338 if (!has_ctrl(n)) return NULL;
339 Node *n_ctrl = get_ctrl(n);
340 IdealLoopTree *n_loop = get_loop(n_ctrl);
341
342 // See if 'n' mixes loop-varying and loop-invariant inputs and
343 // itself is loop-varying.
344
345 // Only interested in binary ops (and AddP)
346 if( n->req() < 3 || n->req() > 4 ) return NULL;
347
348 Node *n1_ctrl = get_ctrl(n->in( 1));
349 Node *n2_ctrl = get_ctrl(n->in( 2));
350 Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
351 IdealLoopTree *n1_loop = get_loop( n1_ctrl );
352 IdealLoopTree *n2_loop = get_loop( n2_ctrl );
353 IdealLoopTree *n3_loop = get_loop( n3_ctrl );
354
355 // Does one of my inputs spin in a tighter loop than self?
356 if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
357 (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
358 (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
359 return NULL; // Leave well enough alone
360
361 // Is at least one of my inputs loop-invariant?
362 if( n1_loop == n_loop &&
363 n2_loop == n_loop &&
364 n3_loop == n_loop )
365 return NULL; // No loop-invariant inputs
366
367
368 int n_op = n->Opcode();
369
370 // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
371 if( n_op == Op_LShiftI ) {
372 // Scale is loop invariant
373 Node *scale = n->in(2);
374 Node *scale_ctrl = get_ctrl(scale);
375 IdealLoopTree *scale_loop = get_loop(scale_ctrl );
376 if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
377 return NULL;
378 const TypeInt *scale_t = scale->bottom_type()->isa_int();
379 if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
380 return NULL; // Dont bother with byte/short masking
381 // Add must vary with loop (else shift would be loop-invariant)
382 Node *add = n->in(1);
383 Node *add_ctrl = get_ctrl(add);
384 IdealLoopTree *add_loop = get_loop(add_ctrl);
385 //assert( n_loop == add_loop, "" );
386 if( n_loop != add_loop ) return NULL; // happens w/ evil ZKM loops
387
388 // Convert I-V into I+ (0-V); same for V-I
389 if( add->Opcode() == Op_SubI &&
390 _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
391 Node *zero = _igvn.intcon(0);
392 set_ctrl(zero, C->root());
393 Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
394 register_new_node( neg, get_ctrl(add->in(2) ) );
395 add = new AddINode( add->in(1), neg );
396 register_new_node( add, add_ctrl );
397 }
398 if( add->Opcode() != Op_AddI ) return NULL;
399 // See if one add input is loop invariant
400 Node *add_var = add->in(1);
401 Node *add_var_ctrl = get_ctrl(add_var);
402 IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
403 Node *add_invar = add->in(2);
404 Node *add_invar_ctrl = get_ctrl(add_invar);
405 IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
406 if( add_var_loop == n_loop ) {
407 } else if( add_invar_loop == n_loop ) {
408 // Swap to find the invariant part
409 add_invar = add_var;
410 add_invar_ctrl = add_var_ctrl;
411 add_invar_loop = add_var_loop;
412 add_var = add->in(2);
413 Node *add_var_ctrl = get_ctrl(add_var);
414 IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
415 } else // Else neither input is loop invariant
416 return NULL;
417 if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
418 return NULL; // No invariant part of the add?
419
420 // Yes! Reshape address expression!
421 Node *inv_scale = new LShiftINode( add_invar, scale );
422 Node *inv_scale_ctrl =
423 dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
424 add_invar_ctrl : scale_ctrl;
425 register_new_node( inv_scale, inv_scale_ctrl );
426 Node *var_scale = new LShiftINode( add_var, scale );
427 register_new_node( var_scale, n_ctrl );
428 Node *var_add = new AddINode( var_scale, inv_scale );
429 register_new_node( var_add, n_ctrl );
430 _igvn.replace_node( n, var_add );
431 return var_add;
432 }
433
434 // Replace (I+V) with (V+I)
435 if( n_op == Op_AddI ||
436 n_op == Op_AddL ||
437 n_op == Op_AddF ||
438 n_op == Op_AddD ||
439 n_op == Op_MulI ||
440 n_op == Op_MulL ||
441 n_op == Op_MulF ||
442 n_op == Op_MulD ) {
443 if( n2_loop == n_loop ) {
444 assert( n1_loop != n_loop, "" );
445 n->swap_edges(1, 2);
446 }
447 }
448
449 // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
450 // but not if I2 is a constant.
451 if( n_op == Op_AddP ) {
452 if( n2_loop == n_loop && n3_loop != n_loop ) {
453 if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
454 Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
455 Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
456 IdealLoopTree *n22loop = get_loop( n22_ctrl );
457 IdealLoopTree *n23_loop = get_loop( n23_ctrl );
458 if( n22loop != n_loop && n22loop->is_member(n_loop) &&
459 n23_loop == n_loop ) {
460 Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
461 // Stuff new AddP in the loop preheader
462 register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
463 Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
464 register_new_node( add2, n_ctrl );
465 _igvn.replace_node( n, add2 );
466 return add2;
467 }
468 }
469 }
470
471 // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
472 if (n2_loop != n_loop && n3_loop == n_loop) {
473 if (n->in(3)->Opcode() == Op_AddX) {
474 Node *V = n->in(3)->in(1);
475 Node *I = n->in(3)->in(2);
476 if (is_member(n_loop,get_ctrl(V))) {
477 } else {
478 Node *tmp = V; V = I; I = tmp;
479 }
480 if (!is_member(n_loop,get_ctrl(I))) {
481 Node *add1 = new AddPNode(n->in(1), n->in(2), I);
482 // Stuff new AddP in the loop preheader
483 register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
484 Node *add2 = new AddPNode(n->in(1), add1, V);
485 register_new_node(add2, n_ctrl);
486 _igvn.replace_node(n, add2);
487 return add2;
488 }
489 }
490 }
491 }
492
493 return NULL;
494}
495
496// Optimize ((in1[2*i] * in2[2*i]) + (in1[2*i+1] * in2[2*i+1]))
497Node *PhaseIdealLoop::convert_add_to_muladd(Node* n) {
498 assert(n->Opcode() == Op_AddI, "sanity");
499 Node * nn = NULL;
500 Node * in1 = n->in(1);
501 Node * in2 = n->in(2);
502 if (in1->Opcode() == Op_MulI && in2->Opcode() == Op_MulI) {
503 IdealLoopTree* loop_n = get_loop(get_ctrl(n));
504 if (loop_n->_head->as_Loop()->is_valid_counted_loop() &&
505 Matcher::match_rule_supported(Op_MulAddS2I) &&
506 Matcher::match_rule_supported(Op_MulAddVS2VI)) {
507 Node* mul_in1 = in1->in(1);
508 Node* mul_in2 = in1->in(2);
509 Node* mul_in3 = in2->in(1);
510 Node* mul_in4 = in2->in(2);
511 if (mul_in1->Opcode() == Op_LoadS &&
512 mul_in2->Opcode() == Op_LoadS &&
513 mul_in3->Opcode() == Op_LoadS &&
514 mul_in4->Opcode() == Op_LoadS) {
515 IdealLoopTree* loop1 = get_loop(get_ctrl(mul_in1));
516 IdealLoopTree* loop2 = get_loop(get_ctrl(mul_in2));
517 IdealLoopTree* loop3 = get_loop(get_ctrl(mul_in3));
518 IdealLoopTree* loop4 = get_loop(get_ctrl(mul_in4));
519 IdealLoopTree* loop5 = get_loop(get_ctrl(in1));
520 IdealLoopTree* loop6 = get_loop(get_ctrl(in2));
521 // All nodes should be in the same counted loop.
522 if (loop_n == loop1 && loop_n == loop2 && loop_n == loop3 &&
523 loop_n == loop4 && loop_n == loop5 && loop_n == loop6) {
524 Node* adr1 = mul_in1->in(MemNode::Address);
525 Node* adr2 = mul_in2->in(MemNode::Address);
526 Node* adr3 = mul_in3->in(MemNode::Address);
527 Node* adr4 = mul_in4->in(MemNode::Address);
528 if (adr1->is_AddP() && adr2->is_AddP() && adr3->is_AddP() && adr4->is_AddP()) {
529 if ((adr1->in(AddPNode::Base) == adr3->in(AddPNode::Base)) &&
530 (adr2->in(AddPNode::Base) == adr4->in(AddPNode::Base))) {
531 nn = new MulAddS2INode(mul_in1, mul_in2, mul_in3, mul_in4);
532 register_new_node(nn, get_ctrl(n));
533 _igvn.replace_node(n, nn);
534 return nn;
535 } else if ((adr1->in(AddPNode::Base) == adr4->in(AddPNode::Base)) &&
536 (adr2->in(AddPNode::Base) == adr3->in(AddPNode::Base))) {
537 nn = new MulAddS2INode(mul_in1, mul_in2, mul_in4, mul_in3);
538 register_new_node(nn, get_ctrl(n));
539 _igvn.replace_node(n, nn);
540 return nn;
541 }
542 }
543 }
544 }
545 }
546 }
547 return nn;
548}
549
550//------------------------------conditional_move-------------------------------
551// Attempt to replace a Phi with a conditional move. We have some pretty
552// strict profitability requirements. All Phis at the merge point must
553// be converted, so we can remove the control flow. We need to limit the
554// number of c-moves to a small handful. All code that was in the side-arms
555// of the CFG diamond is now speculatively executed. This code has to be
556// "cheap enough". We are pretty much limited to CFG diamonds that merge
557// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
558Node *PhaseIdealLoop::conditional_move( Node *region ) {
559
560 assert(region->is_Region(), "sanity check");
561 if (region->req() != 3) return NULL;
562
563 // Check for CFG diamond
564 Node *lp = region->in(1);
565 Node *rp = region->in(2);
566 if (!lp || !rp) return NULL;
567 Node *lp_c = lp->in(0);
568 if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
569 IfNode *iff = lp_c->as_If();
570
571 // Check for ops pinned in an arm of the diamond.
572 // Can't remove the control flow in this case
573 if (lp->outcnt() > 1) return NULL;
574 if (rp->outcnt() > 1) return NULL;
575
576 IdealLoopTree* r_loop = get_loop(region);
577 assert(r_loop == get_loop(iff), "sanity");
578 // Always convert to CMOVE if all results are used only outside this loop.
579 bool used_inside_loop = (r_loop == _ltree_root);
580
581 // Check profitability
582 int cost = 0;
583 int phis = 0;
584 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
585 Node *out = region->fast_out(i);
586 if (!out->is_Phi()) continue; // Ignore other control edges, etc
587 phis++;
588 PhiNode* phi = out->as_Phi();
589 BasicType bt = phi->type()->basic_type();
590 switch (bt) {
591 case T_DOUBLE:
592 case T_FLOAT:
593 if (C->use_cmove()) {
594 continue; //TODO: maybe we want to add some cost
595 }
596 cost += Matcher::float_cmove_cost(); // Could be very expensive
597 break;
598 case T_LONG: {
599 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
600 }
601 case T_INT: // These all CMOV fine
602 case T_ADDRESS: { // (RawPtr)
603 cost++;
604 break;
605 }
606 case T_NARROWOOP: // Fall through
607 case T_OBJECT: { // Base oops are OK, but not derived oops
608 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
609 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
610 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus
611 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we
612 // have a Phi for the base here that we convert to a CMOVE all is well
613 // and good. But if the base is dead, we'll not make a CMOVE. Later
614 // the allocator will have to produce a base by creating a CMOVE of the
615 // relevant bases. This puts the allocator in the business of
616 // manufacturing expensive instructions, generally a bad plan.
617 // Just Say No to Conditionally-Moved Derived Pointers.
618 if (tp && tp->offset() != 0)
619 return NULL;
620 cost++;
621 break;
622 }
623 default:
624 return NULL; // In particular, can't do memory or I/O
625 }
626 // Add in cost any speculative ops
627 for (uint j = 1; j < region->req(); j++) {
628 Node *proj = region->in(j);
629 Node *inp = phi->in(j);
630 if (get_ctrl(inp) == proj) { // Found local op
631 cost++;
632 // Check for a chain of dependent ops; these will all become
633 // speculative in a CMOV.
634 for (uint k = 1; k < inp->req(); k++)
635 if (get_ctrl(inp->in(k)) == proj)
636 cost += ConditionalMoveLimit; // Too much speculative goo
637 }
638 }
639 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
640 // This will likely Split-If, a higher-payoff operation.
641 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
642 Node* use = phi->fast_out(k);
643 if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
644 cost += ConditionalMoveLimit;
645 // Is there a use inside the loop?
646 // Note: check only basic types since CMoveP is pinned.
647 if (!used_inside_loop && is_java_primitive(bt)) {
648 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
649 if (r_loop == u_loop || r_loop->is_member(u_loop)) {
650 used_inside_loop = true;
651 }
652 }
653 }
654 }//for
655 Node* bol = iff->in(1);
656 assert(bol->Opcode() == Op_Bool, "");
657 int cmp_op = bol->in(1)->Opcode();
658 // It is expensive to generate flags from a float compare.
659 // Avoid duplicated float compare.
660 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
661
662 float infrequent_prob = PROB_UNLIKELY_MAG(3);
663 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
664 if (used_inside_loop) {
665 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
666
667 // BlockLayoutByFrequency optimization moves infrequent branch
668 // from hot path. No point in CMOV'ing in such case (110 is used
669 // instead of 100 to take into account not exactness of float value).
670 if (BlockLayoutByFrequency) {
671 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
672 }
673 }
674 // Check for highly predictable branch. No point in CMOV'ing if
675 // we are going to predict accurately all the time.
676 if (C->use_cmove() && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) {
677 //keep going
678 } else if (iff->_prob < infrequent_prob ||
679 iff->_prob > (1.0f - infrequent_prob))
680 return NULL;
681
682 // --------------
683 // Now replace all Phis with CMOV's
684 Node *cmov_ctrl = iff->in(0);
685 uint flip = (lp->Opcode() == Op_IfTrue);
686 Node_List wq;
687 while (1) {
688 PhiNode* phi = NULL;
689 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
690 Node *out = region->fast_out(i);
691 if (out->is_Phi()) {
692 phi = out->as_Phi();
693 break;
694 }
695 }
696 if (phi == NULL) break;
697 if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
698 // Move speculative ops
699 wq.push(phi);
700 while (wq.size() > 0) {
701 Node *n = wq.pop();
702 for (uint j = 1; j < n->req(); j++) {
703 Node* m = n->in(j);
704 if (m != NULL && !is_dominator(get_ctrl(m), cmov_ctrl)) {
705#ifndef PRODUCT
706 if (PrintOpto && VerifyLoopOptimizations) {
707 tty->print(" speculate: ");
708 m->dump();
709 }
710#endif
711 set_ctrl(m, cmov_ctrl);
712 wq.push(m);
713 }
714 }
715 }
716 Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
717 register_new_node( cmov, cmov_ctrl );
718 _igvn.replace_node( phi, cmov );
719#ifndef PRODUCT
720 if (TraceLoopOpts) {
721 tty->print("CMOV ");
722 r_loop->dump_head();
723 if (Verbose) {
724 bol->in(1)->dump(1);
725 cmov->dump(1);
726 }
727 }
728 if (VerifyLoopOptimizations) verify();
729#endif
730 }
731
732 // The useless CFG diamond will fold up later; see the optimization in
733 // RegionNode::Ideal.
734 _igvn._worklist.push(region);
735
736 return iff->in(1);
737}
738
739static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
740 for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
741 Node* u = m->fast_out(i);
742 if (u->is_CFG()) {
743 if (u->Opcode() == Op_NeverBranch) {
744 u = ((NeverBranchNode*)u)->proj_out(0);
745 enqueue_cfg_uses(u, wq);
746 } else {
747 wq.push(u);
748 }
749 }
750 }
751}
752
753// Try moving a store out of a loop, right before the loop
754Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
755 // Store has to be first in the loop body
756 IdealLoopTree *n_loop = get_loop(n_ctrl);
757 if (n->is_Store() && n_loop != _ltree_root &&
758 n_loop->is_loop() && n_loop->_head->is_Loop() &&
759 n->in(0) != NULL) {
760 Node* address = n->in(MemNode::Address);
761 Node* value = n->in(MemNode::ValueIn);
762 Node* mem = n->in(MemNode::Memory);
763 IdealLoopTree* address_loop = get_loop(get_ctrl(address));
764 IdealLoopTree* value_loop = get_loop(get_ctrl(value));
765
766 // - address and value must be loop invariant
767 // - memory must be a memory Phi for the loop
768 // - Store must be the only store on this memory slice in the
769 // loop: if there's another store following this one then value
770 // written at iteration i by the second store could be overwritten
771 // at iteration i+n by the first store: it's not safe to move the
772 // first store out of the loop
773 // - nothing must observe the memory Phi: it guarantees no read
774 // before the store, we are also guaranteed the store post
775 // dominates the loop head (ignoring a possible early
776 // exit). Otherwise there would be extra Phi involved between the
777 // loop's Phi and the store.
778 // - there must be no early exit from the loop before the Store
779 // (such an exit most of the time would be an extra use of the
780 // memory Phi but sometimes is a bottom memory Phi that takes the
781 // store as input).
782
783 if (!n_loop->is_member(address_loop) &&
784 !n_loop->is_member(value_loop) &&
785 mem->is_Phi() && mem->in(0) == n_loop->_head &&
786 mem->outcnt() == 1 &&
787 mem->in(LoopNode::LoopBackControl) == n) {
788
789 assert(n_loop->_tail != NULL, "need a tail");
790 assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop");
791
792 // Verify that there's no early exit of the loop before the store.
793 bool ctrl_ok = false;
794 {
795 // Follow control from loop head until n, we exit the loop or
796 // we reach the tail
797 ResourceMark rm;
798 Unique_Node_List wq;
799 wq.push(n_loop->_head);
800
801 for (uint next = 0; next < wq.size(); ++next) {
802 Node *m = wq.at(next);
803 if (m == n->in(0)) {
804 ctrl_ok = true;
805 continue;
806 }
807 assert(!has_ctrl(m), "should be CFG");
808 if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
809 ctrl_ok = false;
810 break;
811 }
812 enqueue_cfg_uses(m, wq);
813 if (wq.size() > 10) {
814 ctrl_ok = false;
815 break;
816 }
817 }
818 }
819 if (ctrl_ok) {
820 // move the Store
821 _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
822 _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl));
823 _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
824 // Disconnect the phi now. An empty phi can confuse other
825 // optimizations in this pass of loop opts.
826 _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
827 n_loop->_body.yank(mem);
828
829 set_ctrl_and_loop(n, n->in(0));
830
831 return n;
832 }
833 }
834 }
835 return NULL;
836}
837
838// Try moving a store out of a loop, right after the loop
839void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
840 if (n->is_Store() && n->in(0) != NULL) {
841 Node *n_ctrl = get_ctrl(n);
842 IdealLoopTree *n_loop = get_loop(n_ctrl);
843 // Store must be in a loop
844 if (n_loop != _ltree_root && !n_loop->_irreducible) {
845 Node* address = n->in(MemNode::Address);
846 Node* value = n->in(MemNode::ValueIn);
847 IdealLoopTree* address_loop = get_loop(get_ctrl(address));
848 // address must be loop invariant
849 if (!n_loop->is_member(address_loop)) {
850 // Store must be last on this memory slice in the loop and
851 // nothing in the loop must observe it
852 Node* phi = NULL;
853 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
854 Node* u = n->fast_out(i);
855 if (has_ctrl(u)) { // control use?
856 IdealLoopTree *u_loop = get_loop(get_ctrl(u));
857 if (!n_loop->is_member(u_loop)) {
858 continue;
859 }
860 if (u->is_Phi() && u->in(0) == n_loop->_head) {
861 assert(_igvn.type(u) == Type::MEMORY, "bad phi");
862 // multiple phis on the same slice are possible
863 if (phi != NULL) {
864 return;
865 }
866 phi = u;
867 continue;
868 }
869 }
870 return;
871 }
872 if (phi != NULL) {
873 // Nothing in the loop before the store (next iteration)
874 // must observe the stored value
875 bool mem_ok = true;
876 {
877 ResourceMark rm;
878 Unique_Node_List wq;
879 wq.push(phi);
880 for (uint next = 0; next < wq.size() && mem_ok; ++next) {
881 Node *m = wq.at(next);
882 for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
883 Node* u = m->fast_out(i);
884 if (u->is_Store() || u->is_Phi()) {
885 if (u != n) {
886 wq.push(u);
887 mem_ok = (wq.size() <= 10);
888 }
889 } else {
890 mem_ok = false;
891 break;
892 }
893 }
894 }
895 }
896 if (mem_ok) {
897 // Move the store out of the loop if the LCA of all
898 // users (except for the phi) is outside the loop.
899 Node* hook = new Node(1);
900 _igvn.rehash_node_delayed(phi);
901 int count = phi->replace_edge(n, hook);
902 assert(count > 0, "inconsistent phi");
903
904 // Compute latest point this store can go
905 Node* lca = get_late_ctrl(n, get_ctrl(n));
906 if (n_loop->is_member(get_loop(lca))) {
907 // LCA is in the loop - bail out
908 _igvn.replace_node(hook, n);
909 return;
910 }
911#ifdef ASSERT
912 if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) {
913 assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined");
914 n_loop->_head->as_Loop()->verify_strip_mined(1);
915 Node* outer = n_loop->_head->as_CountedLoop()->outer_loop();
916 IdealLoopTree* outer_loop = get_loop(outer);
917 assert(n_loop->_parent == outer_loop, "broken loop tree");
918 assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state");
919 }
920#endif
921
922 // Move store out of the loop
923 _igvn.replace_node(hook, n->in(MemNode::Memory));
924 _igvn.replace_input_of(n, 0, lca);
925 set_ctrl_and_loop(n, lca);
926
927 // Disconnect the phi now. An empty phi can confuse other
928 // optimizations in this pass of loop opts..
929 if (phi->in(LoopNode::LoopBackControl) == phi) {
930 _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
931 n_loop->_body.yank(phi);
932 }
933 }
934 }
935 }
936 }
937 }
938}
939
940//------------------------------split_if_with_blocks_pre-----------------------
941// Do the real work in a non-recursive function. Data nodes want to be
942// cloned in the pre-order so they can feed each other nicely.
943Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
944 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
945 Node* bs_res = bs->split_if_pre(this, n);
946 if (bs_res != NULL) {
947 return bs_res;
948 }
949 // Cloning these guys is unlikely to win
950 int n_op = n->Opcode();
951 if( n_op == Op_MergeMem ) return n;
952 if( n->is_Proj() ) return n;
953 // Do not clone-up CmpFXXX variations, as these are always
954 // followed by a CmpI
955 if( n->is_Cmp() ) return n;
956 // Attempt to use a conditional move instead of a phi/branch
957 if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
958 Node *cmov = conditional_move( n );
959 if( cmov ) return cmov;
960 }
961 if( n->is_CFG() || n->is_LoadStore() )
962 return n;
963 if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd
964 n_op == Op_Opaque2 ) {
965 if( !C->major_progress() ) // If chance of no more loop opts...
966 _igvn._worklist.push(n); // maybe we'll remove them
967 return n;
968 }
969
970 if( n->is_Con() ) return n; // No cloning for Con nodes
971
972 Node *n_ctrl = get_ctrl(n);
973 if( !n_ctrl ) return n; // Dead node
974
975 Node* res = try_move_store_before_loop(n, n_ctrl);
976 if (res != NULL) {
977 return n;
978 }
979
980 // Attempt to remix address expressions for loop invariants
981 Node *m = remix_address_expressions( n );
982 if( m ) return m;
983
984 if (n_op == Op_AddI) {
985 Node *nn = convert_add_to_muladd( n );
986 if ( nn ) return nn;
987 }
988
989 if (n->is_ConstraintCast()) {
990 Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
991 // ConstraintCastNode::dominating_cast() uses node control input to determine domination.
992 // Node control inputs don't necessarily agree with loop control info (due to
993 // transformations happened in between), thus additional dominance check is needed
994 // to keep loop info valid.
995 if (dom_cast != NULL && is_dominator(get_ctrl(dom_cast), get_ctrl(n))) {
996 _igvn.replace_node(n, dom_cast);
997 return dom_cast;
998 }
999 }
1000
1001 // Determine if the Node has inputs from some local Phi.
1002 // Returns the block to clone thru.
1003 Node *n_blk = has_local_phi_input( n );
1004 if( !n_blk ) return n;
1005
1006 // Do not clone the trip counter through on a CountedLoop
1007 // (messes up the canonical shape).
1008 if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
1009
1010 // Check for having no control input; not pinned. Allow
1011 // dominating control.
1012 if (n->in(0)) {
1013 Node *dom = idom(n_blk);
1014 if (dom_lca(n->in(0), dom) != n->in(0)) {
1015 return n;
1016 }
1017 }
1018 // Policy: when is it profitable. You must get more wins than
1019 // policy before it is considered profitable. Policy is usually 0,
1020 // so 1 win is considered profitable. Big merges will require big
1021 // cloning, so get a larger policy.
1022 int policy = n_blk->req() >> 2;
1023
1024 // If the loop is a candidate for range check elimination,
1025 // delay splitting through it's phi until a later loop optimization
1026 if (n_blk->is_CountedLoop()) {
1027 IdealLoopTree *lp = get_loop(n_blk);
1028 if (lp && lp->_rce_candidate) {
1029 return n;
1030 }
1031 }
1032
1033 if (must_throttle_split_if()) return n;
1034
1035 // Split 'n' through the merge point if it is profitable
1036 Node *phi = split_thru_phi( n, n_blk, policy );
1037 if (!phi) return n;
1038
1039 // Found a Phi to split thru!
1040 // Replace 'n' with the new phi
1041 _igvn.replace_node( n, phi );
1042 // Moved a load around the loop, 'en-registering' something.
1043 if (n_blk->is_Loop() && n->is_Load() &&
1044 !phi->in(LoopNode::LoopBackControl)->is_Load())
1045 C->set_major_progress();
1046
1047 return phi;
1048}
1049
1050static bool merge_point_too_heavy(Compile* C, Node* region) {
1051 // Bail out if the region and its phis have too many users.
1052 int weight = 0;
1053 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1054 weight += region->fast_out(i)->outcnt();
1055 }
1056 int nodes_left = C->max_node_limit() - C->live_nodes();
1057 if (weight * 8 > nodes_left) {
1058 if (PrintOpto) {
1059 tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight);
1060 }
1061 return true;
1062 } else {
1063 return false;
1064 }
1065}
1066
1067static bool merge_point_safe(Node* region) {
1068 // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
1069 // having a PhiNode input. This sidesteps the dangerous case where the split
1070 // ConvI2LNode may become TOP if the input Value() does not
1071 // overlap the ConvI2L range, leaving a node which may not dominate its
1072 // uses.
1073 // A better fix for this problem can be found in the BugTraq entry, but
1074 // expediency for Mantis demands this hack.
1075 // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
1076 // split_if_with_blocks from splitting a block because we could not move around
1077 // the FastLockNode.
1078 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1079 Node* n = region->fast_out(i);
1080 if (n->is_Phi()) {
1081 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1082 Node* m = n->fast_out(j);
1083 if (m->is_FastLock())
1084 return false;
1085#ifdef _LP64
1086 if (m->Opcode() == Op_ConvI2L)
1087 return false;
1088 if (m->is_CastII() && m->isa_CastII()->has_range_check()) {
1089 return false;
1090 }
1091#endif
1092 }
1093 }
1094 }
1095 return true;
1096}
1097
1098
1099//------------------------------place_near_use---------------------------------
1100// Place some computation next to use but not inside inner loops.
1101// For inner loop uses move it to the preheader area.
1102Node *PhaseIdealLoop::place_near_use(Node *useblock) const {
1103 IdealLoopTree *u_loop = get_loop( useblock );
1104 if (u_loop->_irreducible) {
1105 return useblock;
1106 }
1107 if (u_loop->_child) {
1108 if (useblock == u_loop->_head && u_loop->_head->is_OuterStripMinedLoop()) {
1109 return u_loop->_head->in(LoopNode::EntryControl);
1110 }
1111 return useblock;
1112 }
1113 return u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
1114}
1115
1116
1117bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) {
1118 if (!n->is_If() || n->is_CountedLoopEnd()) {
1119 return false;
1120 }
1121 if (!n->in(0)->is_Region()) {
1122 return false;
1123 }
1124 Node* region = n->in(0);
1125 Node* dom = idom(region);
1126 if (!dom->is_If() || dom->in(1) != n->in(1)) {
1127 return false;
1128 }
1129 IfNode* dom_if = dom->as_If();
1130 Node* proj_true = dom_if->proj_out(1);
1131 Node* proj_false = dom_if->proj_out(0);
1132
1133 for (uint i = 1; i < region->req(); i++) {
1134 if (is_dominator(proj_true, region->in(i))) {
1135 continue;
1136 }
1137 if (is_dominator(proj_false, region->in(i))) {
1138 continue;
1139 }
1140 return false;
1141 }
1142
1143 return true;
1144}
1145
1146
1147bool PhaseIdealLoop::can_split_if(Node* n_ctrl) {
1148 if (must_throttle_split_if()) {
1149 return false;
1150 }
1151
1152 // Do not do 'split-if' if irreducible loops are present.
1153 if (_has_irreducible_loops) {
1154 return false;
1155 }
1156
1157 if (merge_point_too_heavy(C, n_ctrl)) {
1158 return false;
1159 }
1160
1161 // Do not do 'split-if' if some paths are dead. First do dead code
1162 // elimination and then see if its still profitable.
1163 for (uint i = 1; i < n_ctrl->req(); i++) {
1164 if (n_ctrl->in(i) == C->top()) {
1165 return false;
1166 }
1167 }
1168
1169 // If trying to do a 'Split-If' at the loop head, it is only
1170 // profitable if the cmp folds up on BOTH paths. Otherwise we
1171 // risk peeling a loop forever.
1172
1173 // CNC - Disabled for now. Requires careful handling of loop
1174 // body selection for the cloned code. Also, make sure we check
1175 // for any input path not being in the same loop as n_ctrl. For
1176 // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1177 // because the alternative loop entry points won't be converted
1178 // into LoopNodes.
1179 IdealLoopTree *n_loop = get_loop(n_ctrl);
1180 for (uint j = 1; j < n_ctrl->req(); j++) {
1181 if (get_loop(n_ctrl->in(j)) != n_loop) {
1182 return false;
1183 }
1184 }
1185
1186 // Check for safety of the merge point.
1187 if (!merge_point_safe(n_ctrl)) {
1188 return false;
1189 }
1190
1191 return true;
1192}
1193
1194//------------------------------split_if_with_blocks_post----------------------
1195// Do the real work in a non-recursive function. CFG hackery wants to be
1196// in the post-order, so it can dirty the I-DOM info and not use the dirtied
1197// info.
1198void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
1199
1200 // Cloning Cmp through Phi's involves the split-if transform.
1201 // FastLock is not used by an If
1202 if (n->is_Cmp() && !n->is_FastLock()) {
1203 Node *n_ctrl = get_ctrl(n);
1204 // Determine if the Node has inputs from some local Phi.
1205 // Returns the block to clone thru.
1206 Node *n_blk = has_local_phi_input(n);
1207 if (n_blk != n_ctrl) {
1208 return;
1209 }
1210
1211 if (!can_split_if(n_ctrl)) {
1212 return;
1213 }
1214
1215 if (n->outcnt() != 1) {
1216 return; // Multiple bool's from 1 compare?
1217 }
1218 Node *bol = n->unique_out();
1219 assert(bol->is_Bool(), "expect a bool here");
1220 if (bol->outcnt() != 1) {
1221 return;// Multiple branches from 1 compare?
1222 }
1223 Node *iff = bol->unique_out();
1224
1225 // Check some safety conditions
1226 if (iff->is_If()) { // Classic split-if?
1227 if (iff->in(0) != n_ctrl) {
1228 return; // Compare must be in same blk as if
1229 }
1230 } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1231 // Can't split CMove with different control edge.
1232 if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) {
1233 return;
1234 }
1235 if (get_ctrl(iff->in(2)) == n_ctrl ||
1236 get_ctrl(iff->in(3)) == n_ctrl) {
1237 return; // Inputs not yet split-up
1238 }
1239 if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) {
1240 return; // Loop-invar test gates loop-varying CMOVE
1241 }
1242 } else {
1243 return; // some other kind of node, such as an Allocate
1244 }
1245
1246 // When is split-if profitable? Every 'win' on means some control flow
1247 // goes dead, so it's almost always a win.
1248 int policy = 0;
1249 // Split compare 'n' through the merge point if it is profitable
1250 Node *phi = split_thru_phi( n, n_ctrl, policy);
1251 if (!phi) {
1252 return;
1253 }
1254
1255 // Found a Phi to split thru!
1256 // Replace 'n' with the new phi
1257 _igvn.replace_node(n, phi);
1258
1259 // Now split the bool up thru the phi
1260 Node *bolphi = split_thru_phi(bol, n_ctrl, -1);
1261 guarantee(bolphi != NULL, "null boolean phi node");
1262
1263 _igvn.replace_node(bol, bolphi);
1264 assert(iff->in(1) == bolphi, "");
1265
1266 if (bolphi->Value(&_igvn)->singleton()) {
1267 return;
1268 }
1269
1270 // Conditional-move? Must split up now
1271 if (!iff->is_If()) {
1272 Node *cmovphi = split_thru_phi(iff, n_ctrl, -1);
1273 _igvn.replace_node(iff, cmovphi);
1274 return;
1275 }
1276
1277 // Now split the IF
1278 do_split_if(iff);
1279 return;
1280 }
1281
1282 // Two identical ifs back to back can be merged
1283 if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) {
1284 Node *n_ctrl = n->in(0);
1285 PhiNode* bolphi = PhiNode::make_blank(n_ctrl, n->in(1));
1286 IfNode* dom_if = idom(n_ctrl)->as_If();
1287 Node* proj_true = dom_if->proj_out(1);
1288 Node* proj_false = dom_if->proj_out(0);
1289 Node* con_true = _igvn.makecon(TypeInt::ONE);
1290 Node* con_false = _igvn.makecon(TypeInt::ZERO);
1291
1292 for (uint i = 1; i < n_ctrl->req(); i++) {
1293 if (is_dominator(proj_true, n_ctrl->in(i))) {
1294 bolphi->init_req(i, con_true);
1295 } else {
1296 assert(is_dominator(proj_false, n_ctrl->in(i)), "bad if");
1297 bolphi->init_req(i, con_false);
1298 }
1299 }
1300 register_new_node(bolphi, n_ctrl);
1301 _igvn.replace_input_of(n, 1, bolphi);
1302
1303 // Now split the IF
1304 do_split_if(n);
1305 return;
1306 }
1307
1308 // Check for an IF ready to split; one that has its
1309 // condition codes input coming from a Phi at the block start.
1310 int n_op = n->Opcode();
1311
1312 // Check for an IF being dominated by another IF same test
1313 if (n_op == Op_If ||
1314 n_op == Op_RangeCheck) {
1315 Node *bol = n->in(1);
1316 uint max = bol->outcnt();
1317 // Check for same test used more than once?
1318 if (max > 1 && bol->is_Bool()) {
1319 // Search up IDOMs to see if this IF is dominated.
1320 Node *cutoff = get_ctrl(bol);
1321
1322 // Now search up IDOMs till cutoff, looking for a dominating test
1323 Node *prevdom = n;
1324 Node *dom = idom(prevdom);
1325 while (dom != cutoff) {
1326 if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom) {
1327 // Replace the dominated test with an obvious true or false.
1328 // Place it on the IGVN worklist for later cleanup.
1329 C->set_major_progress();
1330 dominated_by(prevdom, n, false, true);
1331#ifndef PRODUCT
1332 if( VerifyLoopOptimizations ) verify();
1333#endif
1334 return;
1335 }
1336 prevdom = dom;
1337 dom = idom(prevdom);
1338 }
1339 }
1340 }
1341
1342 // See if a shared loop-varying computation has no loop-varying uses.
1343 // Happens if something is only used for JVM state in uncommon trap exits,
1344 // like various versions of induction variable+offset. Clone the
1345 // computation per usage to allow it to sink out of the loop.
1346 if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
1347 Node *n_ctrl = get_ctrl(n);
1348 IdealLoopTree *n_loop = get_loop(n_ctrl);
1349 if( n_loop != _ltree_root ) {
1350 DUIterator_Fast imax, i = n->fast_outs(imax);
1351 for (; i < imax; i++) {
1352 Node* u = n->fast_out(i);
1353 if( !has_ctrl(u) ) break; // Found control user
1354 IdealLoopTree *u_loop = get_loop(get_ctrl(u));
1355 if( u_loop == n_loop ) break; // Found loop-varying use
1356 if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
1357 if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
1358 }
1359 bool did_break = (i < imax); // Did we break out of the previous loop?
1360 if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
1361 Node *late_load_ctrl = NULL;
1362 if (n->is_Load()) {
1363 // If n is a load, get and save the result from get_late_ctrl(),
1364 // to be later used in calculating the control for n's clones.
1365 clear_dom_lca_tags();
1366 late_load_ctrl = get_late_ctrl(n, n_ctrl);
1367 }
1368 // If n is a load, and the late control is the same as the current
1369 // control, then the cloning of n is a pointless exercise, because
1370 // GVN will ensure that we end up where we started.
1371 if (!n->is_Load() || late_load_ctrl != n_ctrl) {
1372 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
1373 for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
1374 Node *u = n->last_out(j); // Clone private computation per use
1375 _igvn.rehash_node_delayed(u);
1376 Node *x = n->clone(); // Clone computation
1377 Node *x_ctrl = NULL;
1378 if( u->is_Phi() ) {
1379 // Replace all uses of normal nodes. Replace Phi uses
1380 // individually, so the separate Nodes can sink down
1381 // different paths.
1382 uint k = 1;
1383 while( u->in(k) != n ) k++;
1384 u->set_req( k, x );
1385 // x goes next to Phi input path
1386 x_ctrl = u->in(0)->in(k);
1387 --j;
1388 } else { // Normal use
1389 // Replace all uses
1390 for( uint k = 0; k < u->req(); k++ ) {
1391 if( u->in(k) == n ) {
1392 u->set_req( k, x );
1393 --j;
1394 }
1395 }
1396 x_ctrl = get_ctrl(u);
1397 }
1398
1399 // Find control for 'x' next to use but not inside inner loops.
1400 // For inner loop uses get the preheader area.
1401 x_ctrl = place_near_use(x_ctrl);
1402
1403 if (bs->sink_node(this, n, x, x_ctrl, n_ctrl)) {
1404 continue;
1405 }
1406
1407 if (n->is_Load()) {
1408 // For loads, add a control edge to a CFG node outside of the loop
1409 // to force them to not combine and return back inside the loop
1410 // during GVN optimization (4641526).
1411 //
1412 // Because we are setting the actual control input, factor in
1413 // the result from get_late_ctrl() so we respect any
1414 // anti-dependences. (6233005).
1415 x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
1416
1417 // Don't allow the control input to be a CFG splitting node.
1418 // Such nodes should only have ProjNodes as outs, e.g. IfNode
1419 // should only have IfTrueNode and IfFalseNode (4985384).
1420 x_ctrl = find_non_split_ctrl(x_ctrl);
1421 assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1422
1423 x->set_req(0, x_ctrl);
1424 }
1425 register_new_node(x, x_ctrl);
1426
1427 // Some institutional knowledge is needed here: 'x' is
1428 // yanked because if the optimizer runs GVN on it all the
1429 // cloned x's will common up and undo this optimization and
1430 // be forced back in the loop. This is annoying because it
1431 // makes +VerifyOpto report false-positives on progress. I
1432 // tried setting control edges on the x's to force them to
1433 // not combine, but the matching gets worried when it tries
1434 // to fold a StoreP and an AddP together (as part of an
1435 // address expression) and the AddP and StoreP have
1436 // different controls.
1437 if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1438 }
1439 _igvn.remove_dead_node(n);
1440 }
1441 }
1442 }
1443 }
1444
1445 try_move_store_after_loop(n);
1446
1447 // Check for Opaque2's who's loop has disappeared - who's input is in the
1448 // same loop nest as their output. Remove 'em, they are no longer useful.
1449 if( n_op == Op_Opaque2 &&
1450 n->in(1) != NULL &&
1451 get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1452 _igvn.replace_node( n, n->in(1) );
1453 }
1454}
1455
1456//------------------------------split_if_with_blocks---------------------------
1457// Check for aggressive application of 'split-if' optimization,
1458// using basic block level info.
1459void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) {
1460 Node* root = C->root();
1461 visited.set(root->_idx); // first, mark root as visited
1462 // Do pre-visit work for root
1463 Node* n = split_if_with_blocks_pre(root);
1464 uint cnt = n->outcnt();
1465 uint i = 0;
1466
1467 while (true) {
1468 // Visit all children
1469 if (i < cnt) {
1470 Node* use = n->raw_out(i);
1471 ++i;
1472 if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1473 // Now do pre-visit work for this use
1474 use = split_if_with_blocks_pre(use);
1475 nstack.push(n, i); // Save parent and next use's index.
1476 n = use; // Process all children of current use.
1477 cnt = use->outcnt();
1478 i = 0;
1479 }
1480 }
1481 else {
1482 // All of n's children have been processed, complete post-processing.
1483 if (cnt != 0 && !n->is_Con()) {
1484 assert(has_node(n), "no dead nodes");
1485 split_if_with_blocks_post(n);
1486 }
1487 if (must_throttle_split_if()) {
1488 nstack.clear();
1489 }
1490 if (nstack.is_empty()) {
1491 // Finished all nodes on stack.
1492 break;
1493 }
1494 // Get saved parent node and next use's index. Visit the rest of uses.
1495 n = nstack.node();
1496 cnt = n->outcnt();
1497 i = nstack.index();
1498 nstack.pop();
1499 }
1500 }
1501}
1502
1503
1504//=============================================================================
1505//
1506// C L O N E A L O O P B O D Y
1507//
1508
1509//------------------------------clone_iff--------------------------------------
1510// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1511// "Nearly" because all Nodes have been cloned from the original in the loop,
1512// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
1513// through the Phi recursively, and return a Bool.
1514Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
1515
1516 // Convert this Phi into a Phi merging Bools
1517 uint i;
1518 for (i = 1; i < phi->req(); i++) {
1519 Node *b = phi->in(i);
1520 if (b->is_Phi()) {
1521 _igvn.replace_input_of(phi, i, clone_iff(b->as_Phi(), loop));
1522 } else {
1523 assert(b->is_Bool() || b->Opcode() == Op_Opaque4, "");
1524 }
1525 }
1526
1527 Node* n = phi->in(1);
1528 Node* sample_opaque = NULL;
1529 Node *sample_bool = NULL;
1530 if (n->Opcode() == Op_Opaque4) {
1531 sample_opaque = n;
1532 sample_bool = n->in(1);
1533 assert(sample_bool->is_Bool(), "wrong type");
1534 } else {
1535 sample_bool = n;
1536 }
1537 Node *sample_cmp = sample_bool->in(1);
1538
1539 // Make Phis to merge the Cmp's inputs.
1540 PhiNode *phi1 = new PhiNode(phi->in(0), Type::TOP);
1541 PhiNode *phi2 = new PhiNode(phi->in(0), Type::TOP);
1542 for (i = 1; i < phi->req(); i++) {
1543 Node *n1 = sample_opaque == NULL ? phi->in(i)->in(1)->in(1) : phi->in(i)->in(1)->in(1)->in(1);
1544 Node *n2 = sample_opaque == NULL ? phi->in(i)->in(1)->in(2) : phi->in(i)->in(1)->in(1)->in(2);
1545 phi1->set_req(i, n1);
1546 phi2->set_req(i, n2);
1547 phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1548 phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1549 }
1550 // See if these Phis have been made before.
1551 // Register with optimizer
1552 Node *hit1 = _igvn.hash_find_insert(phi1);
1553 if (hit1) { // Hit, toss just made Phi
1554 _igvn.remove_dead_node(phi1); // Remove new phi
1555 assert(hit1->is_Phi(), "" );
1556 phi1 = (PhiNode*)hit1; // Use existing phi
1557 } else { // Miss
1558 _igvn.register_new_node_with_optimizer(phi1);
1559 }
1560 Node *hit2 = _igvn.hash_find_insert(phi2);
1561 if (hit2) { // Hit, toss just made Phi
1562 _igvn.remove_dead_node(phi2); // Remove new phi
1563 assert(hit2->is_Phi(), "" );
1564 phi2 = (PhiNode*)hit2; // Use existing phi
1565 } else { // Miss
1566 _igvn.register_new_node_with_optimizer(phi2);
1567 }
1568 // Register Phis with loop/block info
1569 set_ctrl(phi1, phi->in(0));
1570 set_ctrl(phi2, phi->in(0));
1571 // Make a new Cmp
1572 Node *cmp = sample_cmp->clone();
1573 cmp->set_req(1, phi1);
1574 cmp->set_req(2, phi2);
1575 _igvn.register_new_node_with_optimizer(cmp);
1576 set_ctrl(cmp, phi->in(0));
1577
1578 // Make a new Bool
1579 Node *b = sample_bool->clone();
1580 b->set_req(1,cmp);
1581 _igvn.register_new_node_with_optimizer(b);
1582 set_ctrl(b, phi->in(0));
1583
1584 if (sample_opaque != NULL) {
1585 Node* opaque = sample_opaque->clone();
1586 opaque->set_req(1, b);
1587 _igvn.register_new_node_with_optimizer(opaque);
1588 set_ctrl(opaque, phi->in(0));
1589 return opaque;
1590 }
1591
1592 assert(b->is_Bool(), "");
1593 return b;
1594}
1595
1596//------------------------------clone_bool-------------------------------------
1597// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1598// "Nearly" because all Nodes have been cloned from the original in the loop,
1599// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
1600// through the Phi recursively, and return a Bool.
1601CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1602 uint i;
1603 // Convert this Phi into a Phi merging Bools
1604 for( i = 1; i < phi->req(); i++ ) {
1605 Node *b = phi->in(i);
1606 if( b->is_Phi() ) {
1607 _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1608 } else {
1609 assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1610 }
1611 }
1612
1613 Node *sample_cmp = phi->in(1);
1614
1615 // Make Phis to merge the Cmp's inputs.
1616 PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1617 PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1618 for( uint j = 1; j < phi->req(); j++ ) {
1619 Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1620 Node *n1, *n2;
1621 if( cmp_top->is_Cmp() ) {
1622 n1 = cmp_top->in(1);
1623 n2 = cmp_top->in(2);
1624 } else {
1625 n1 = n2 = cmp_top;
1626 }
1627 phi1->set_req( j, n1 );
1628 phi2->set_req( j, n2 );
1629 phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1630 phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1631 }
1632
1633 // See if these Phis have been made before.
1634 // Register with optimizer
1635 Node *hit1 = _igvn.hash_find_insert(phi1);
1636 if( hit1 ) { // Hit, toss just made Phi
1637 _igvn.remove_dead_node(phi1); // Remove new phi
1638 assert( hit1->is_Phi(), "" );
1639 phi1 = (PhiNode*)hit1; // Use existing phi
1640 } else { // Miss
1641 _igvn.register_new_node_with_optimizer(phi1);
1642 }
1643 Node *hit2 = _igvn.hash_find_insert(phi2);
1644 if( hit2 ) { // Hit, toss just made Phi
1645 _igvn.remove_dead_node(phi2); // Remove new phi
1646 assert( hit2->is_Phi(), "" );
1647 phi2 = (PhiNode*)hit2; // Use existing phi
1648 } else { // Miss
1649 _igvn.register_new_node_with_optimizer(phi2);
1650 }
1651 // Register Phis with loop/block info
1652 set_ctrl(phi1, phi->in(0));
1653 set_ctrl(phi2, phi->in(0));
1654 // Make a new Cmp
1655 Node *cmp = sample_cmp->clone();
1656 cmp->set_req( 1, phi1 );
1657 cmp->set_req( 2, phi2 );
1658 _igvn.register_new_node_with_optimizer(cmp);
1659 set_ctrl(cmp, phi->in(0));
1660
1661 assert( cmp->is_Cmp(), "" );
1662 return (CmpNode*)cmp;
1663}
1664
1665//------------------------------sink_use---------------------------------------
1666// If 'use' was in the loop-exit block, it now needs to be sunk
1667// below the post-loop merge point.
1668void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1669 if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1670 set_ctrl(use, post_loop);
1671 for (DUIterator j = use->outs(); use->has_out(j); j++)
1672 sink_use(use->out(j), post_loop);
1673 }
1674}
1675
1676void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new,
1677 IdealLoopTree* loop, IdealLoopTree* outer_loop,
1678 Node_List*& split_if_set, Node_List*& split_bool_set,
1679 Node_List*& split_cex_set, Node_List& worklist,
1680 uint new_counter, CloneLoopMode mode) {
1681 Node* nnn = old_new[old->_idx];
1682 // Copy uses to a worklist, so I can munge the def-use info
1683 // with impunity.
1684 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1685 worklist.push(old->fast_out(j));
1686
1687 while( worklist.size() ) {
1688 Node *use = worklist.pop();
1689 if (!has_node(use)) continue; // Ignore dead nodes
1690 if (use->in(0) == C->top()) continue;
1691 IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1692 // Check for data-use outside of loop - at least one of OLD or USE
1693 // must not be a CFG node.
1694#ifdef ASSERT
1695 if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) {
1696 Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint();
1697 assert(mode == ControlAroundStripMined && use == sfpt, "missed a node");
1698 }
1699#endif
1700 if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) {
1701
1702 // If the Data use is an IF, that means we have an IF outside of the
1703 // loop that is switching on a condition that is set inside of the
1704 // loop. Happens if people set a loop-exit flag; then test the flag
1705 // in the loop to break the loop, then test is again outside of the
1706 // loop to determine which way the loop exited.
1707 // Loop predicate If node connects to Bool node through Opaque1 node.
1708 if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
1709 // Since this code is highly unlikely, we lazily build the worklist
1710 // of such Nodes to go split.
1711 if (!split_if_set) {
1712 ResourceArea *area = Thread::current()->resource_area();
1713 split_if_set = new Node_List(area);
1714 }
1715 split_if_set->push(use);
1716 }
1717 if (use->is_Bool()) {
1718 if (!split_bool_set) {
1719 ResourceArea *area = Thread::current()->resource_area();
1720 split_bool_set = new Node_List(area);
1721 }
1722 split_bool_set->push(use);
1723 }
1724 if (use->Opcode() == Op_CreateEx) {
1725 if (!split_cex_set) {
1726 ResourceArea *area = Thread::current()->resource_area();
1727 split_cex_set = new Node_List(area);
1728 }
1729 split_cex_set->push(use);
1730 }
1731
1732
1733 // Get "block" use is in
1734 uint idx = 0;
1735 while( use->in(idx) != old ) idx++;
1736 Node *prev = use->is_CFG() ? use : get_ctrl(use);
1737 assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" );
1738 Node *cfg = prev->_idx >= new_counter
1739 ? prev->in(2)
1740 : idom(prev);
1741 if( use->is_Phi() ) // Phi use is in prior block
1742 cfg = prev->in(idx); // NOT in block of Phi itself
1743 if (cfg->is_top()) { // Use is dead?
1744 _igvn.replace_input_of(use, idx, C->top());
1745 continue;
1746 }
1747
1748 // If use is referenced through control edge... (idx == 0)
1749 if (mode == IgnoreStripMined && idx == 0) {
1750 LoopNode *head = loop->_head->as_Loop();
1751 if (head->is_strip_mined() && is_dominator(head->outer_loop_exit(), prev)) {
1752 // That node is outside the inner loop, leave it outside the
1753 // outer loop as well to not confuse verification code.
1754 assert(!loop->_parent->is_member(use_loop), "should be out of the outer loop");
1755 _igvn.replace_input_of(use, 0, head->outer_loop_exit());
1756 continue;
1757 }
1758 }
1759
1760 while(!outer_loop->is_member(get_loop(cfg))) {
1761 prev = cfg;
1762 cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1763 }
1764 // If the use occurs after merging several exits from the loop, then
1765 // old value must have dominated all those exits. Since the same old
1766 // value was used on all those exits we did not need a Phi at this
1767 // merge point. NOW we do need a Phi here. Each loop exit value
1768 // is now merged with the peeled body exit; each exit gets its own
1769 // private Phi and those Phis need to be merged here.
1770 Node *phi;
1771 if( prev->is_Region() ) {
1772 if( idx == 0 ) { // Updating control edge?
1773 phi = prev; // Just use existing control
1774 } else { // Else need a new Phi
1775 phi = PhiNode::make( prev, old );
1776 // Now recursively fix up the new uses of old!
1777 for( uint i = 1; i < prev->req(); i++ ) {
1778 worklist.push(phi); // Onto worklist once for each 'old' input
1779 }
1780 }
1781 } else {
1782 // Get new RegionNode merging old and new loop exits
1783 prev = old_new[prev->_idx];
1784 assert( prev, "just made this in step 7" );
1785 if( idx == 0) { // Updating control edge?
1786 phi = prev; // Just use existing control
1787 } else { // Else need a new Phi
1788 // Make a new Phi merging data values properly
1789 phi = PhiNode::make( prev, old );
1790 phi->set_req( 1, nnn );
1791 }
1792 }
1793 // If inserting a new Phi, check for prior hits
1794 if( idx != 0 ) {
1795 Node *hit = _igvn.hash_find_insert(phi);
1796 if( hit == NULL ) {
1797 _igvn.register_new_node_with_optimizer(phi); // Register new phi
1798 } else { // or
1799 // Remove the new phi from the graph and use the hit
1800 _igvn.remove_dead_node(phi);
1801 phi = hit; // Use existing phi
1802 }
1803 set_ctrl(phi, prev);
1804 }
1805 // Make 'use' use the Phi instead of the old loop body exit value
1806 _igvn.replace_input_of(use, idx, phi);
1807 if( use->_idx >= new_counter ) { // If updating new phis
1808 // Not needed for correctness, but prevents a weak assert
1809 // in AddPNode from tripping (when we end up with different
1810 // base & derived Phis that will become the same after
1811 // IGVN does CSE).
1812 Node *hit = _igvn.hash_find_insert(use);
1813 if( hit ) // Go ahead and re-hash for hits.
1814 _igvn.replace_node( use, hit );
1815 }
1816
1817 // If 'use' was in the loop-exit block, it now needs to be sunk
1818 // below the post-loop merge point.
1819 sink_use( use, prev );
1820 }
1821 }
1822}
1823
1824static void clone_outer_loop_helper(Node* n, const IdealLoopTree *loop, const IdealLoopTree* outer_loop,
1825 const Node_List &old_new, Unique_Node_List& wq, PhaseIdealLoop* phase,
1826 bool check_old_new) {
1827 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1828 Node* u = n->fast_out(j);
1829 assert(check_old_new || old_new[u->_idx] == NULL, "shouldn't have been cloned");
1830 if (!u->is_CFG() && (!check_old_new || old_new[u->_idx] == NULL)) {
1831 Node* c = phase->get_ctrl(u);
1832 IdealLoopTree* u_loop = phase->get_loop(c);
1833 assert(!loop->is_member(u_loop), "can be in outer loop or out of both loops only");
1834 if (outer_loop->is_member(u_loop)) {
1835 wq.push(u);
1836 }
1837 }
1838 }
1839}
1840
1841void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
1842 IdealLoopTree* outer_loop, int dd, Node_List &old_new,
1843 Node_List& extra_data_nodes) {
1844 if (head->is_strip_mined() && mode != IgnoreStripMined) {
1845 CountedLoopNode* cl = head->as_CountedLoop();
1846 Node* l = cl->outer_loop();
1847 Node* tail = cl->outer_loop_tail();
1848 IfNode* le = cl->outer_loop_end();
1849 Node* sfpt = cl->outer_safepoint();
1850 CountedLoopEndNode* cle = cl->loopexit();
1851 CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop();
1852 CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit_or_null();
1853 Node* cle_out = cle->proj_out(false);
1854
1855 Node* new_sfpt = NULL;
1856 Node* new_cle_out = cle_out->clone();
1857 old_new.map(cle_out->_idx, new_cle_out);
1858 if (mode == CloneIncludesStripMined) {
1859 // clone outer loop body
1860 Node* new_l = l->clone();
1861 Node* new_tail = tail->clone();
1862 IfNode* new_le = le->clone()->as_If();
1863 new_sfpt = sfpt->clone();
1864
1865 set_loop(new_l, outer_loop->_parent);
1866 set_idom(new_l, new_l->in(LoopNode::EntryControl), dd);
1867 set_loop(new_cle_out, outer_loop->_parent);
1868 set_idom(new_cle_out, new_cle, dd);
1869 set_loop(new_sfpt, outer_loop->_parent);
1870 set_idom(new_sfpt, new_cle_out, dd);
1871 set_loop(new_le, outer_loop->_parent);
1872 set_idom(new_le, new_sfpt, dd);
1873 set_loop(new_tail, outer_loop->_parent);
1874 set_idom(new_tail, new_le, dd);
1875 set_idom(new_cl, new_l, dd);
1876
1877 old_new.map(l->_idx, new_l);
1878 old_new.map(tail->_idx, new_tail);
1879 old_new.map(le->_idx, new_le);
1880 old_new.map(sfpt->_idx, new_sfpt);
1881
1882 new_l->set_req(LoopNode::LoopBackControl, new_tail);
1883 new_l->set_req(0, new_l);
1884 new_tail->set_req(0, new_le);
1885 new_le->set_req(0, new_sfpt);
1886 new_sfpt->set_req(0, new_cle_out);
1887 new_cle_out->set_req(0, new_cle);
1888 new_cl->set_req(LoopNode::EntryControl, new_l);
1889
1890 _igvn.register_new_node_with_optimizer(new_l);
1891 _igvn.register_new_node_with_optimizer(new_tail);
1892 _igvn.register_new_node_with_optimizer(new_le);
1893 } else {
1894 Node *newhead = old_new[loop->_head->_idx];
1895 newhead->as_Loop()->clear_strip_mined();
1896 _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl));
1897 set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1898 }
1899 // Look at data node that were assigned a control in the outer
1900 // loop: they are kept in the outer loop by the safepoint so start
1901 // from the safepoint node's inputs.
1902 IdealLoopTree* outer_loop = get_loop(l);
1903 Node_Stack stack(2);
1904 stack.push(sfpt, 1);
1905 uint new_counter = C->unique();
1906 while (stack.size() > 0) {
1907 Node* n = stack.node();
1908 uint i = stack.index();
1909 while (i < n->req() &&
1910 (n->in(i) == NULL ||
1911 !has_ctrl(n->in(i)) ||
1912 get_loop(get_ctrl(n->in(i))) != outer_loop ||
1913 (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) {
1914 i++;
1915 }
1916 if (i < n->req()) {
1917 stack.set_index(i+1);
1918 stack.push(n->in(i), 0);
1919 } else {
1920 assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet");
1921 Node* m = n == sfpt ? new_sfpt : n->clone();
1922 if (m != NULL) {
1923 for (uint i = 0; i < n->req(); i++) {
1924 if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) {
1925 m->set_req(i, old_new[m->in(i)->_idx]);
1926 }
1927 }
1928 } else {
1929 assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?");
1930 }
1931 if (n != sfpt) {
1932 extra_data_nodes.push(n);
1933 _igvn.register_new_node_with_optimizer(m);
1934 assert(get_ctrl(n) == cle_out, "what other control?");
1935 set_ctrl(m, new_cle_out);
1936 old_new.map(n->_idx, m);
1937 }
1938 stack.pop();
1939 }
1940 }
1941 if (mode == CloneIncludesStripMined) {
1942 _igvn.register_new_node_with_optimizer(new_sfpt);
1943 _igvn.register_new_node_with_optimizer(new_cle_out);
1944 }
1945 // Some other transformation may have pessimistically assign some
1946 // data nodes to the outer loop. Set their control so they are out
1947 // of the outer loop.
1948 ResourceMark rm;
1949 Unique_Node_List wq;
1950 for (uint i = 0; i < extra_data_nodes.size(); i++) {
1951 Node* old = extra_data_nodes.at(i);
1952 clone_outer_loop_helper(old, loop, outer_loop, old_new, wq, this, true);
1953 }
1954 Node* new_ctrl = cl->outer_loop_exit();
1955 assert(get_loop(new_ctrl) != outer_loop, "must be out of the loop nest");
1956 for (uint i = 0; i < wq.size(); i++) {
1957 Node* n = wq.at(i);
1958 set_ctrl(n, new_ctrl);
1959 clone_outer_loop_helper(n, loop, outer_loop, old_new, wq, this, false);
1960 }
1961 } else {
1962 Node *newhead = old_new[loop->_head->_idx];
1963 set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1964 }
1965}
1966
1967//------------------------------clone_loop-------------------------------------
1968//
1969// C L O N E A L O O P B O D Y
1970//
1971// This is the basic building block of the loop optimizations. It clones an
1972// entire loop body. It makes an old_new loop body mapping; with this mapping
1973// you can find the new-loop equivalent to an old-loop node. All new-loop
1974// nodes are exactly equal to their old-loop counterparts, all edges are the
1975// same. All exits from the old-loop now have a RegionNode that merges the
1976// equivalent new-loop path. This is true even for the normal "loop-exit"
1977// condition. All uses of loop-invariant old-loop values now come from (one
1978// or more) Phis that merge their new-loop equivalents.
1979//
1980// This operation leaves the graph in an illegal state: there are two valid
1981// control edges coming from the loop pre-header to both loop bodies. I'll
1982// definitely have to hack the graph after running this transform.
1983//
1984// From this building block I will further edit edges to perform loop peeling
1985// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1986//
1987// Parameter side_by_size_idom:
1988// When side_by_size_idom is NULL, the dominator tree is constructed for
1989// the clone loop to dominate the original. Used in construction of
1990// pre-main-post loop sequence.
1991// When nonnull, the clone and original are side-by-side, both are
1992// dominated by the side_by_side_idom node. Used in construction of
1993// unswitched loops.
1994void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1995 CloneLoopMode mode, Node* side_by_side_idom) {
1996
1997 LoopNode* head = loop->_head->as_Loop();
1998 head->verify_strip_mined(1);
1999
2000 if (C->do_vector_loop() && PrintOpto) {
2001 const char* mname = C->method()->name()->as_quoted_ascii();
2002 if (mname != NULL) {
2003 tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
2004 }
2005 }
2006
2007 CloneMap& cm = C->clone_map();
2008 Dict* dict = cm.dict();
2009 if (C->do_vector_loop()) {
2010 cm.set_clone_idx(cm.max_gen()+1);
2011#ifndef PRODUCT
2012 if (PrintOpto) {
2013 tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
2014 loop->dump_head();
2015 }
2016#endif
2017 }
2018
2019 // Step 1: Clone the loop body. Make the old->new mapping.
2020 uint i;
2021 for( i = 0; i < loop->_body.size(); i++ ) {
2022 Node *old = loop->_body.at(i);
2023 Node *nnn = old->clone();
2024 old_new.map( old->_idx, nnn );
2025 if (C->do_vector_loop()) {
2026 cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
2027 }
2028 _igvn.register_new_node_with_optimizer(nnn);
2029 }
2030
2031 IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
2032
2033 // Step 2: Fix the edges in the new body. If the old input is outside the
2034 // loop use it. If the old input is INside the loop, use the corresponding
2035 // new node instead.
2036 for( i = 0; i < loop->_body.size(); i++ ) {
2037 Node *old = loop->_body.at(i);
2038 Node *nnn = old_new[old->_idx];
2039 // Fix CFG/Loop controlling the new node
2040 if (has_ctrl(old)) {
2041 set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
2042 } else {
2043 set_loop(nnn, outer_loop->_parent);
2044 if (old->outcnt() > 0) {
2045 set_idom( nnn, old_new[idom(old)->_idx], dd );
2046 }
2047 }
2048 // Correct edges to the new node
2049 for( uint j = 0; j < nnn->req(); j++ ) {
2050 Node *n = nnn->in(j);
2051 if( n ) {
2052 IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
2053 if( loop->is_member( old_in_loop ) )
2054 nnn->set_req(j, old_new[n->_idx]);
2055 }
2056 }
2057 _igvn.hash_find_insert(nnn);
2058 }
2059
2060 ResourceArea *area = Thread::current()->resource_area();
2061 Node_List extra_data_nodes(area); // data nodes in the outer strip mined loop
2062 clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
2063
2064 // Step 3: Now fix control uses. Loop varying control uses have already
2065 // been fixed up (as part of all input edges in Step 2). Loop invariant
2066 // control uses must be either an IfFalse or an IfTrue. Make a merge
2067 // point to merge the old and new IfFalse/IfTrue nodes; make the use
2068 // refer to this.
2069 Node_List worklist(area);
2070 uint new_counter = C->unique();
2071 for( i = 0; i < loop->_body.size(); i++ ) {
2072 Node* old = loop->_body.at(i);
2073 if( !old->is_CFG() ) continue;
2074
2075 // Copy uses to a worklist, so I can munge the def-use info
2076 // with impunity.
2077 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
2078 worklist.push(old->fast_out(j));
2079
2080 while( worklist.size() ) { // Visit all uses
2081 Node *use = worklist.pop();
2082 if (!has_node(use)) continue; // Ignore dead nodes
2083 IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
2084 if( !loop->is_member( use_loop ) && use->is_CFG() ) {
2085 // Both OLD and USE are CFG nodes here.
2086 assert( use->is_Proj(), "" );
2087 Node* nnn = old_new[old->_idx];
2088
2089 Node* newuse = NULL;
2090 if (head->is_strip_mined() && mode != IgnoreStripMined) {
2091 CountedLoopNode* cl = head->as_CountedLoop();
2092 CountedLoopEndNode* cle = cl->loopexit();
2093 Node* cle_out = cle->proj_out_or_null(false);
2094 if (use == cle_out) {
2095 IfNode* le = cl->outer_loop_end();
2096 use = le->proj_out(false);
2097 use_loop = get_loop(use);
2098 if (mode == CloneIncludesStripMined) {
2099 nnn = old_new[le->_idx];
2100 } else {
2101 newuse = old_new[cle_out->_idx];
2102 }
2103 }
2104 }
2105 if (newuse == NULL) {
2106 newuse = use->clone();
2107 }
2108
2109 // Clone the loop exit control projection
2110 if (C->do_vector_loop()) {
2111 cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
2112 }
2113 newuse->set_req(0,nnn);
2114 _igvn.register_new_node_with_optimizer(newuse);
2115 set_loop(newuse, use_loop);
2116 set_idom(newuse, nnn, dom_depth(nnn) + 1 );
2117
2118 // We need a Region to merge the exit from the peeled body and the
2119 // exit from the old loop body.
2120 RegionNode *r = new RegionNode(3);
2121 // Map the old use to the new merge point
2122 old_new.map( use->_idx, r );
2123 uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
2124 assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
2125
2126 // The original user of 'use' uses 'r' instead.
2127 for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
2128 Node* useuse = use->last_out(l);
2129 _igvn.rehash_node_delayed(useuse);
2130 uint uses_found = 0;
2131 if( useuse->in(0) == use ) {
2132 useuse->set_req(0, r);
2133 uses_found++;
2134 if( useuse->is_CFG() ) {
2135 assert( dom_depth(useuse) > dd_r, "" );
2136 set_idom(useuse, r, dom_depth(useuse));
2137 }
2138 }
2139 for( uint k = 1; k < useuse->req(); k++ ) {
2140 if( useuse->in(k) == use ) {
2141 useuse->set_req(k, r);
2142 uses_found++;
2143 if (useuse->is_Loop() && k == LoopNode::EntryControl) {
2144 assert(dom_depth(useuse) > dd_r , "");
2145 set_idom(useuse, r, dom_depth(useuse));
2146 }
2147 }
2148 }
2149 l -= uses_found; // we deleted 1 or more copies of this edge
2150 }
2151
2152 // Now finish up 'r'
2153 r->set_req( 1, newuse );
2154 r->set_req( 2, use );
2155 _igvn.register_new_node_with_optimizer(r);
2156 set_loop(r, use_loop);
2157 set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
2158 } // End of if a loop-exit test
2159 }
2160 }
2161
2162 // Step 4: If loop-invariant use is not control, it must be dominated by a
2163 // loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region
2164 // there if needed. Make a Phi there merging old and new used values.
2165 Node_List *split_if_set = NULL;
2166 Node_List *split_bool_set = NULL;
2167 Node_List *split_cex_set = NULL;
2168 for( i = 0; i < loop->_body.size(); i++ ) {
2169 Node* old = loop->_body.at(i);
2170 clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2171 split_bool_set, split_cex_set, worklist, new_counter,
2172 mode);
2173 }
2174
2175 for (i = 0; i < extra_data_nodes.size(); i++) {
2176 Node* old = extra_data_nodes.at(i);
2177 clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2178 split_bool_set, split_cex_set, worklist, new_counter,
2179 mode);
2180 }
2181
2182 // Check for IFs that need splitting/cloning. Happens if an IF outside of
2183 // the loop uses a condition set in the loop. The original IF probably
2184 // takes control from one or more OLD Regions (which in turn get from NEW
2185 // Regions). In any case, there will be a set of Phis for each merge point
2186 // from the IF up to where the original BOOL def exists the loop.
2187 if (split_if_set) {
2188 while (split_if_set->size()) {
2189 Node *iff = split_if_set->pop();
2190 if (iff->in(1)->is_Phi()) {
2191 Node *b = clone_iff(iff->in(1)->as_Phi(), loop);
2192 _igvn.replace_input_of(iff, 1, b);
2193 }
2194 }
2195 }
2196 if (split_bool_set) {
2197 while (split_bool_set->size()) {
2198 Node *b = split_bool_set->pop();
2199 Node *phi = b->in(1);
2200 assert(phi->is_Phi(), "");
2201 CmpNode *cmp = clone_bool((PhiNode*)phi, loop);
2202 _igvn.replace_input_of(b, 1, cmp);
2203 }
2204 }
2205 if (split_cex_set) {
2206 while (split_cex_set->size()) {
2207 Node *b = split_cex_set->pop();
2208 assert(b->in(0)->is_Region(), "");
2209 assert(b->in(1)->is_Phi(), "");
2210 assert(b->in(0)->in(0) == b->in(1)->in(0), "");
2211 split_up(b, b->in(0), NULL);
2212 }
2213 }
2214
2215}
2216
2217
2218//---------------------- stride_of_possible_iv -------------------------------------
2219// Looks for an iff/bool/comp with one operand of the compare
2220// being a cycle involving an add and a phi,
2221// with an optional truncation (left-shift followed by a right-shift)
2222// of the add. Returns zero if not an iv.
2223int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
2224 Node* trunc1 = NULL;
2225 Node* trunc2 = NULL;
2226 const TypeInt* ttype = NULL;
2227 if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
2228 return 0;
2229 }
2230 BoolNode* bl = iff->in(1)->as_Bool();
2231 Node* cmp = bl->in(1);
2232 if (!cmp || (cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU)) {
2233 return 0;
2234 }
2235 // Must have an invariant operand
2236 if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
2237 return 0;
2238 }
2239 Node* add2 = NULL;
2240 Node* cmp1 = cmp->in(1);
2241 if (cmp1->is_Phi()) {
2242 // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
2243 Node* phi = cmp1;
2244 for (uint i = 1; i < phi->req(); i++) {
2245 Node* in = phi->in(i);
2246 Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
2247 &trunc1, &trunc2, &ttype);
2248 if (add && add->in(1) == phi) {
2249 add2 = add->in(2);
2250 break;
2251 }
2252 }
2253 } else {
2254 // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
2255 Node* addtrunc = cmp1;
2256 Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
2257 &trunc1, &trunc2, &ttype);
2258 if (add && add->in(1)->is_Phi()) {
2259 Node* phi = add->in(1);
2260 for (uint i = 1; i < phi->req(); i++) {
2261 if (phi->in(i) == addtrunc) {
2262 add2 = add->in(2);
2263 break;
2264 }
2265 }
2266 }
2267 }
2268 if (add2 != NULL) {
2269 const TypeInt* add2t = _igvn.type(add2)->is_int();
2270 if (add2t->is_con()) {
2271 return add2t->get_con();
2272 }
2273 }
2274 return 0;
2275}
2276
2277
2278//---------------------- stay_in_loop -------------------------------------
2279// Return the (unique) control output node that's in the loop (if it exists.)
2280Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
2281 Node* unique = NULL;
2282 if (!n) return NULL;
2283 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2284 Node* use = n->fast_out(i);
2285 if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
2286 if (unique != NULL) {
2287 return NULL;
2288 }
2289 unique = use;
2290 }
2291 }
2292 return unique;
2293}
2294
2295//------------------------------ register_node -------------------------------------
2296// Utility to register node "n" with PhaseIdealLoop
2297void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
2298 _igvn.register_new_node_with_optimizer(n);
2299 loop->_body.push(n);
2300 if (n->is_CFG()) {
2301 set_loop(n, loop);
2302 set_idom(n, pred, ddepth);
2303 } else {
2304 set_ctrl(n, pred);
2305 }
2306}
2307
2308//------------------------------ proj_clone -------------------------------------
2309// Utility to create an if-projection
2310ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
2311 ProjNode* c = p->clone()->as_Proj();
2312 c->set_req(0, iff);
2313 return c;
2314}
2315
2316//------------------------------ short_circuit_if -------------------------------------
2317// Force the iff control output to be the live_proj
2318Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
2319 guarantee(live_proj != NULL, "null projection");
2320 int proj_con = live_proj->_con;
2321 assert(proj_con == 0 || proj_con == 1, "false or true projection");
2322 Node *con = _igvn.intcon(proj_con);
2323 set_ctrl(con, C->root());
2324 if (iff) {
2325 iff->set_req(1, con);
2326 }
2327 return con;
2328}
2329
2330//------------------------------ insert_if_before_proj -------------------------------------
2331// Insert a new if before an if projection (* - new node)
2332//
2333// before
2334// if(test)
2335// / \
2336// v v
2337// other-proj proj (arg)
2338//
2339// after
2340// if(test)
2341// / \
2342// / v
2343// | * proj-clone
2344// v |
2345// other-proj v
2346// * new_if(relop(cmp[IU](left,right)))
2347// / \
2348// v v
2349// * new-proj proj
2350// (returned)
2351//
2352ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
2353 IfNode* iff = proj->in(0)->as_If();
2354 IdealLoopTree *loop = get_loop(proj);
2355 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2356 int ddepth = dom_depth(proj);
2357
2358 _igvn.rehash_node_delayed(iff);
2359 _igvn.rehash_node_delayed(proj);
2360
2361 proj->set_req(0, NULL); // temporary disconnect
2362 ProjNode* proj2 = proj_clone(proj, iff);
2363 register_node(proj2, loop, iff, ddepth);
2364
2365 Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
2366 register_node(cmp, loop, proj2, ddepth);
2367
2368 BoolNode* bol = new BoolNode(cmp, relop);
2369 register_node(bol, loop, proj2, ddepth);
2370
2371 int opcode = iff->Opcode();
2372 assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
2373 IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
2374 new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
2375 register_node(new_if, loop, proj2, ddepth);
2376
2377 proj->set_req(0, new_if); // reattach
2378 set_idom(proj, new_if, ddepth);
2379
2380 ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
2381 guarantee(new_exit != NULL, "null exit node");
2382 register_node(new_exit, get_loop(other_proj), new_if, ddepth);
2383
2384 return new_exit;
2385}
2386
2387//------------------------------ insert_region_before_proj -------------------------------------
2388// Insert a region before an if projection (* - new node)
2389//
2390// before
2391// if(test)
2392// / |
2393// v |
2394// proj v
2395// other-proj
2396//
2397// after
2398// if(test)
2399// / |
2400// v |
2401// * proj-clone v
2402// | other-proj
2403// v
2404// * new-region
2405// |
2406// v
2407// * dum_if
2408// / \
2409// v \
2410// * dum-proj v
2411// proj
2412//
2413RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2414 IfNode* iff = proj->in(0)->as_If();
2415 IdealLoopTree *loop = get_loop(proj);
2416 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2417 int ddepth = dom_depth(proj);
2418
2419 _igvn.rehash_node_delayed(iff);
2420 _igvn.rehash_node_delayed(proj);
2421
2422 proj->set_req(0, NULL); // temporary disconnect
2423 ProjNode* proj2 = proj_clone(proj, iff);
2424 register_node(proj2, loop, iff, ddepth);
2425
2426 RegionNode* reg = new RegionNode(2);
2427 reg->set_req(1, proj2);
2428 register_node(reg, loop, iff, ddepth);
2429
2430 IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2431 register_node(dum_if, loop, reg, ddepth);
2432
2433 proj->set_req(0, dum_if); // reattach
2434 set_idom(proj, dum_if, ddepth);
2435
2436 ProjNode* dum_proj = proj_clone(other_proj, dum_if);
2437 register_node(dum_proj, loop, dum_if, ddepth);
2438
2439 return reg;
2440}
2441
2442//------------------------------ insert_cmpi_loop_exit -------------------------------------
2443// Clone a signed compare loop exit from an unsigned compare and
2444// insert it before the unsigned cmp on the stay-in-loop path.
2445// All new nodes inserted in the dominator tree between the original
2446// if and it's projections. The original if test is replaced with
2447// a constant to force the stay-in-loop path.
2448//
2449// This is done to make sure that the original if and it's projections
2450// still dominate the same set of control nodes, that the ctrl() relation
2451// from data nodes to them is preserved, and that their loop nesting is
2452// preserved.
2453//
2454// before
2455// if(i <u limit) unsigned compare loop exit
2456// / |
2457// v v
2458// exit-proj stay-in-loop-proj
2459//
2460// after
2461// if(stay-in-loop-const) original if
2462// / |
2463// / v
2464// / if(i < limit) new signed test
2465// / / |
2466// / / v
2467// / / if(i <u limit) new cloned unsigned test
2468// / / / |
2469// v v v |
2470// region |
2471// | |
2472// dum-if |
2473// / | |
2474// ether | |
2475// v v
2476// exit-proj stay-in-loop-proj
2477//
2478IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
2479 const bool Signed = true;
2480 const bool Unsigned = false;
2481
2482 BoolNode* bol = if_cmpu->in(1)->as_Bool();
2483 if (bol->_test._test != BoolTest::lt) return NULL;
2484 CmpNode* cmpu = bol->in(1)->as_Cmp();
2485 if (cmpu->Opcode() != Op_CmpU) return NULL;
2486 int stride = stride_of_possible_iv(if_cmpu);
2487 if (stride == 0) return NULL;
2488
2489 Node* lp_proj = stay_in_loop(if_cmpu, loop);
2490 guarantee(lp_proj != NULL, "null loop node");
2491
2492 ProjNode* lp_continue = lp_proj->as_Proj();
2493 ProjNode* lp_exit = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
2494
2495 Node* limit = NULL;
2496 if (stride > 0) {
2497 limit = cmpu->in(2);
2498 } else {
2499 limit = _igvn.makecon(TypeInt::ZERO);
2500 set_ctrl(limit, C->root());
2501 }
2502 // Create a new region on the exit path
2503 RegionNode* reg = insert_region_before_proj(lp_exit);
2504 guarantee(reg != NULL, "null region node");
2505
2506 // Clone the if-cmpu-true-false using a signed compare
2507 BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
2508 ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
2509 reg->add_req(cmpi_exit);
2510
2511 // Clone the if-cmpu-true-false
2512 BoolTest::mask rel_u = bol->_test._test;
2513 ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
2514 reg->add_req(cmpu_exit);
2515
2516 // Force original if to stay in loop.
2517 short_circuit_if(if_cmpu, lp_continue);
2518
2519 return cmpi_exit->in(0)->as_If();
2520}
2521
2522//------------------------------ remove_cmpi_loop_exit -------------------------------------
2523// Remove a previously inserted signed compare loop exit.
2524void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
2525 Node* lp_proj = stay_in_loop(if_cmp, loop);
2526 assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
2527 stay_in_loop(lp_proj, loop)->is_If() &&
2528 stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
2529 Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
2530 set_ctrl(con, C->root());
2531 if_cmp->set_req(1, con);
2532}
2533
2534//------------------------------ scheduled_nodelist -------------------------------------
2535// Create a post order schedule of nodes that are in the
2536// "member" set. The list is returned in "sched".
2537// The first node in "sched" is the loop head, followed by
2538// nodes which have no inputs in the "member" set, and then
2539// followed by the nodes that have an immediate input dependence
2540// on a node in "sched".
2541void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
2542
2543 assert(member.test(loop->_head->_idx), "loop head must be in member set");
2544 Arena *a = Thread::current()->resource_area();
2545 VectorSet visited(a);
2546 Node_Stack nstack(a, loop->_body.size());
2547
2548 Node* n = loop->_head; // top of stack is cached in "n"
2549 uint idx = 0;
2550 visited.set(n->_idx);
2551
2552 // Initially push all with no inputs from within member set
2553 for(uint i = 0; i < loop->_body.size(); i++ ) {
2554 Node *elt = loop->_body.at(i);
2555 if (member.test(elt->_idx)) {
2556 bool found = false;
2557 for (uint j = 0; j < elt->req(); j++) {
2558 Node* def = elt->in(j);
2559 if (def && member.test(def->_idx) && def != elt) {
2560 found = true;
2561 break;
2562 }
2563 }
2564 if (!found && elt != loop->_head) {
2565 nstack.push(n, idx);
2566 n = elt;
2567 assert(!visited.test(n->_idx), "not seen yet");
2568 visited.set(n->_idx);
2569 }
2570 }
2571 }
2572
2573 // traverse out's that are in the member set
2574 while (true) {
2575 if (idx < n->outcnt()) {
2576 Node* use = n->raw_out(idx);
2577 idx++;
2578 if (!visited.test_set(use->_idx)) {
2579 if (member.test(use->_idx)) {
2580 nstack.push(n, idx);
2581 n = use;
2582 idx = 0;
2583 }
2584 }
2585 } else {
2586 // All outputs processed
2587 sched.push(n);
2588 if (nstack.is_empty()) break;
2589 n = nstack.node();
2590 idx = nstack.index();
2591 nstack.pop();
2592 }
2593 }
2594}
2595
2596
2597//------------------------------ has_use_in_set -------------------------------------
2598// Has a use in the vector set
2599bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
2600 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2601 Node* use = n->fast_out(j);
2602 if (vset.test(use->_idx)) {
2603 return true;
2604 }
2605 }
2606 return false;
2607}
2608
2609
2610//------------------------------ has_use_internal_to_set -------------------------------------
2611// Has use internal to the vector set (ie. not in a phi at the loop head)
2612bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
2613 Node* head = loop->_head;
2614 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2615 Node* use = n->fast_out(j);
2616 if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
2617 return true;
2618 }
2619 }
2620 return false;
2621}
2622
2623
2624//------------------------------ clone_for_use_outside_loop -------------------------------------
2625// clone "n" for uses that are outside of loop
2626int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
2627 int cloned = 0;
2628 assert(worklist.size() == 0, "should be empty");
2629 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2630 Node* use = n->fast_out(j);
2631 if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
2632 worklist.push(use);
2633 }
2634 }
2635 while( worklist.size() ) {
2636 Node *use = worklist.pop();
2637 if (!has_node(use) || use->in(0) == C->top()) continue;
2638 uint j;
2639 for (j = 0; j < use->req(); j++) {
2640 if (use->in(j) == n) break;
2641 }
2642 assert(j < use->req(), "must be there");
2643
2644 // clone "n" and insert it between the inputs of "n" and the use outside the loop
2645 Node* n_clone = n->clone();
2646 _igvn.replace_input_of(use, j, n_clone);
2647 cloned++;
2648 Node* use_c;
2649 if (!use->is_Phi()) {
2650 use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
2651 } else {
2652 // Use in a phi is considered a use in the associated predecessor block
2653 use_c = use->in(0)->in(j);
2654 }
2655 set_ctrl(n_clone, use_c);
2656 assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
2657 get_loop(use_c)->_body.push(n_clone);
2658 _igvn.register_new_node_with_optimizer(n_clone);
2659#ifndef PRODUCT
2660 if (TracePartialPeeling) {
2661 tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
2662 }
2663#endif
2664 }
2665 return cloned;
2666}
2667
2668
2669//------------------------------ clone_for_special_use_inside_loop -------------------------------------
2670// clone "n" for special uses that are in the not_peeled region.
2671// If these def-uses occur in separate blocks, the code generator
2672// marks the method as not compilable. For example, if a "BoolNode"
2673// is in a different basic block than the "IfNode" that uses it, then
2674// the compilation is aborted in the code generator.
2675void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
2676 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2677 if (n->is_Phi() || n->is_Load()) {
2678 return;
2679 }
2680 assert(worklist.size() == 0, "should be empty");
2681 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2682 Node* use = n->fast_out(j);
2683 if ( not_peel.test(use->_idx) &&
2684 (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2685 use->in(1) == n) {
2686 worklist.push(use);
2687 }
2688 }
2689 if (worklist.size() > 0) {
2690 // clone "n" and insert it between inputs of "n" and the use
2691 Node* n_clone = n->clone();
2692 loop->_body.push(n_clone);
2693 _igvn.register_new_node_with_optimizer(n_clone);
2694 set_ctrl(n_clone, get_ctrl(n));
2695 sink_list.push(n_clone);
2696 not_peel <<= n_clone->_idx; // add n_clone to not_peel set.
2697#ifndef PRODUCT
2698 if (TracePartialPeeling) {
2699 tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2700 }
2701#endif
2702 while( worklist.size() ) {
2703 Node *use = worklist.pop();
2704 _igvn.rehash_node_delayed(use);
2705 for (uint j = 1; j < use->req(); j++) {
2706 if (use->in(j) == n) {
2707 use->set_req(j, n_clone);
2708 }
2709 }
2710 }
2711 }
2712}
2713
2714
2715//------------------------------ insert_phi_for_loop -------------------------------------
2716// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2717void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2718 Node *phi = PhiNode::make(lp, back_edge_val);
2719 phi->set_req(LoopNode::EntryControl, lp_entry_val);
2720 // Use existing phi if it already exists
2721 Node *hit = _igvn.hash_find_insert(phi);
2722 if( hit == NULL ) {
2723 _igvn.register_new_node_with_optimizer(phi);
2724 set_ctrl(phi, lp);
2725 } else {
2726 // Remove the new phi from the graph and use the hit
2727 _igvn.remove_dead_node(phi);
2728 phi = hit;
2729 }
2730 _igvn.replace_input_of(use, idx, phi);
2731}
2732
2733#ifdef ASSERT
2734//------------------------------ is_valid_loop_partition -------------------------------------
2735// Validate the loop partition sets: peel and not_peel
2736bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2737 VectorSet& not_peel ) {
2738 uint i;
2739 // Check that peel_list entries are in the peel set
2740 for (i = 0; i < peel_list.size(); i++) {
2741 if (!peel.test(peel_list.at(i)->_idx)) {
2742 return false;
2743 }
2744 }
2745 // Check at loop members are in one of peel set or not_peel set
2746 for (i = 0; i < loop->_body.size(); i++ ) {
2747 Node *def = loop->_body.at(i);
2748 uint di = def->_idx;
2749 // Check that peel set elements are in peel_list
2750 if (peel.test(di)) {
2751 if (not_peel.test(di)) {
2752 return false;
2753 }
2754 // Must be in peel_list also
2755 bool found = false;
2756 for (uint j = 0; j < peel_list.size(); j++) {
2757 if (peel_list.at(j)->_idx == di) {
2758 found = true;
2759 break;
2760 }
2761 }
2762 if (!found) {
2763 return false;
2764 }
2765 } else if (not_peel.test(di)) {
2766 if (peel.test(di)) {
2767 return false;
2768 }
2769 } else {
2770 return false;
2771 }
2772 }
2773 return true;
2774}
2775
2776//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2777// Ensure a use outside of loop is of the right form
2778bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2779 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2780 return (use->is_Phi() &&
2781 use_c->is_Region() && use_c->req() == 3 &&
2782 (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2783 use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2784 use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2785 loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2786}
2787
2788//------------------------------ is_valid_clone_loop_form -------------------------------------
2789// Ensure that all uses outside of loop are of the right form
2790bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2791 uint orig_exit_idx, uint clone_exit_idx) {
2792 uint len = peel_list.size();
2793 for (uint i = 0; i < len; i++) {
2794 Node *def = peel_list.at(i);
2795
2796 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2797 Node *use = def->fast_out(j);
2798 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2799 if (!loop->is_member(get_loop(use_c))) {
2800 // use is not in the loop, check for correct structure
2801 if (use->in(0) == def) {
2802 // Okay
2803 } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2804 return false;
2805 }
2806 }
2807 }
2808 }
2809 return true;
2810}
2811#endif
2812
2813//------------------------------ partial_peel -------------------------------------
2814// Partially peel (aka loop rotation) the top portion of a loop (called
2815// the peel section below) by cloning it and placing one copy just before
2816// the new loop head and the other copy at the bottom of the new loop.
2817//
2818// before after where it came from
2819//
2820// stmt1 stmt1
2821// loop: stmt2 clone
2822// stmt2 if condA goto exitA clone
2823// if condA goto exitA new_loop: new
2824// stmt3 stmt3 clone
2825// if !condB goto loop if condB goto exitB clone
2826// exitB: stmt2 orig
2827// stmt4 if !condA goto new_loop orig
2828// exitA: goto exitA
2829// exitB:
2830// stmt4
2831// exitA:
2832//
2833// Step 1: find the cut point: an exit test on probable
2834// induction variable.
2835// Step 2: schedule (with cloning) operations in the peel
2836// section that can be executed after the cut into
2837// the section that is not peeled. This may need
2838// to clone operations into exit blocks. For
2839// instance, a reference to A[i] in the not-peel
2840// section and a reference to B[i] in an exit block
2841// may cause a left-shift of i by 2 to be placed
2842// in the peel block. This step will clone the left
2843// shift into the exit block and sink the left shift
2844// from the peel to the not-peel section.
2845// Step 3: clone the loop, retarget the control, and insert
2846// phis for values that are live across the new loop
2847// head. This is very dependent on the graph structure
2848// from clone_loop. It creates region nodes for
2849// exit control and associated phi nodes for values
2850// flow out of the loop through that exit. The region
2851// node is dominated by the clone's control projection.
2852// So the clone's peel section is placed before the
2853// new loop head, and the clone's not-peel section is
2854// forms the top part of the new loop. The original
2855// peel section forms the tail of the new loop.
2856// Step 4: update the dominator tree and recompute the
2857// dominator depth.
2858//
2859// orig
2860//
2861// stmt1
2862// |
2863// v
2864// loop predicate
2865// |
2866// v
2867// loop<----+
2868// | |
2869// stmt2 |
2870// | |
2871// v |
2872// ifA |
2873// / | |
2874// v v |
2875// false true ^ <-- last_peel
2876// / | |
2877// / ===|==cut |
2878// / stmt3 | <-- first_not_peel
2879// / | |
2880// | v |
2881// v ifB |
2882// exitA: / \ |
2883// / \ |
2884// v v |
2885// false true |
2886// / \ |
2887// / ----+
2888// |
2889// v
2890// exitB:
2891// stmt4
2892//
2893//
2894// after clone loop
2895//
2896// stmt1
2897// |
2898// v
2899// loop predicate
2900// / \
2901// clone / \ orig
2902// / \
2903// / \
2904// v v
2905// +---->loop loop<----+
2906// | | | |
2907// | stmt2 stmt2 |
2908// | | | |
2909// | v v |
2910// | ifA ifA |
2911// | | \ / | |
2912// | v v v v |
2913// ^ true false false true ^ <-- last_peel
2914// | | ^ \ / | |
2915// | cut==|== \ \ / ===|==cut |
2916// | stmt3 \ \ / stmt3 | <-- first_not_peel
2917// | | dom | | | |
2918// | v \ 1v v2 v |
2919// | ifB regionA ifB |
2920// | / \ | / \ |
2921// | / \ v / \ |
2922// | v v exitA: v v |
2923// | true false false true |
2924// | / ^ \ / \ |
2925// +---- \ \ / ----+
2926// dom \ /
2927// \ 1v v2
2928// regionB
2929// |
2930// v
2931// exitB:
2932// stmt4
2933//
2934//
2935// after partial peel
2936//
2937// stmt1
2938// |
2939// v
2940// loop predicate
2941// /
2942// clone / orig
2943// / TOP
2944// / \
2945// v v
2946// TOP->loop loop----+
2947// | | |
2948// stmt2 stmt2 |
2949// | | |
2950// v v |
2951// ifA ifA |
2952// | \ / | |
2953// v v v v |
2954// true false false true | <-- last_peel
2955// | ^ \ / +------|---+
2956// +->newloop \ \ / === ==cut | |
2957// | stmt3 \ \ / TOP | |
2958// | | dom | | stmt3 | | <-- first_not_peel
2959// | v \ 1v v2 v | |
2960// | ifB regionA ifB ^ v
2961// | / \ | / \ | |
2962// | / \ v / \ | |
2963// | v v exitA: v v | |
2964// | true false false true | |
2965// | / ^ \ / \ | |
2966// | | \ \ / v | |
2967// | | dom \ / TOP | |
2968// | | \ 1v v2 | |
2969// ^ v regionB | |
2970// | | | | |
2971// | | v ^ v
2972// | | exitB: | |
2973// | | stmt4 | |
2974// | +------------>-----------------+ |
2975// | |
2976// +-----------------<---------------------+
2977//
2978//
2979// final graph
2980//
2981// stmt1
2982// |
2983// v
2984// loop predicate
2985// |
2986// v
2987// stmt2 clone
2988// |
2989// v
2990// ........> ifA clone
2991// : / |
2992// dom / |
2993// : v v
2994// : false true
2995// : | |
2996// : | v
2997// : | newloop<-----+
2998// : | | |
2999// : | stmt3 clone |
3000// : | | |
3001// : | v |
3002// : | ifB |
3003// : | / \ |
3004// : | v v |
3005// : | false true |
3006// : | | | |
3007// : | v stmt2 |
3008// : | exitB: | |
3009// : | stmt4 v |
3010// : | ifA orig |
3011// : | / \ |
3012// : | / \ |
3013// : | v v |
3014// : | false true |
3015// : | / \ |
3016// : v v -----+
3017// RegionA
3018// |
3019// v
3020// exitA
3021//
3022bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
3023
3024 assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
3025 if (!loop->_head->is_Loop()) {
3026 return false;
3027 }
3028 LoopNode *head = loop->_head->as_Loop();
3029
3030 if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
3031 return false;
3032 }
3033
3034 // Check for complex exit control
3035 for (uint ii = 0; ii < loop->_body.size(); ii++) {
3036 Node *n = loop->_body.at(ii);
3037 int opc = n->Opcode();
3038 if (n->is_Call() ||
3039 opc == Op_Catch ||
3040 opc == Op_CatchProj ||
3041 opc == Op_Jump ||
3042 opc == Op_JumpProj) {
3043#ifndef PRODUCT
3044 if (TracePartialPeeling) {
3045 tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
3046 }
3047#endif
3048 return false;
3049 }
3050 }
3051
3052 int dd = dom_depth(head);
3053
3054 // Step 1: find cut point
3055
3056 // Walk up dominators to loop head looking for first loop exit
3057 // which is executed on every path thru loop.
3058 IfNode *peel_if = NULL;
3059 IfNode *peel_if_cmpu = NULL;
3060
3061 Node *iff = loop->tail();
3062 while (iff != head) {
3063 if (iff->is_If()) {
3064 Node *ctrl = get_ctrl(iff->in(1));
3065 if (ctrl->is_top()) return false; // Dead test on live IF.
3066 // If loop-varying exit-test, check for induction variable
3067 if (loop->is_member(get_loop(ctrl)) &&
3068 loop->is_loop_exit(iff) &&
3069 is_possible_iv_test(iff)) {
3070 Node* cmp = iff->in(1)->in(1);
3071 if (cmp->Opcode() == Op_CmpI) {
3072 peel_if = iff->as_If();
3073 } else {
3074 assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
3075 peel_if_cmpu = iff->as_If();
3076 }
3077 }
3078 }
3079 iff = idom(iff);
3080 }
3081
3082 // Prefer signed compare over unsigned compare.
3083 IfNode* new_peel_if = NULL;
3084 if (peel_if == NULL) {
3085 if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
3086 return false; // No peel point found
3087 }
3088 new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
3089 if (new_peel_if == NULL) {
3090 return false; // No peel point found
3091 }
3092 peel_if = new_peel_if;
3093 }
3094 Node* last_peel = stay_in_loop(peel_if, loop);
3095 Node* first_not_peeled = stay_in_loop(last_peel, loop);
3096 if (first_not_peeled == NULL || first_not_peeled == head) {
3097 return false;
3098 }
3099
3100#ifndef PRODUCT
3101 if (TraceLoopOpts) {
3102 tty->print("PartialPeel ");
3103 loop->dump_head();
3104 }
3105
3106 if (TracePartialPeeling) {
3107 tty->print_cr("before partial peel one iteration");
3108 Node_List wl;
3109 Node* t = head->in(2);
3110 while (true) {
3111 wl.push(t);
3112 if (t == head) break;
3113 t = idom(t);
3114 }
3115 while (wl.size() > 0) {
3116 Node* tt = wl.pop();
3117 tt->dump();
3118 if (tt == last_peel) tty->print_cr("-- cut --");
3119 }
3120 }
3121#endif
3122 ResourceArea *area = Thread::current()->resource_area();
3123 VectorSet peel(area);
3124 VectorSet not_peel(area);
3125 Node_List peel_list(area);
3126 Node_List worklist(area);
3127 Node_List sink_list(area);
3128
3129 if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
3130 return false;
3131 }
3132
3133 // Set of cfg nodes to peel are those that are executable from
3134 // the head through last_peel.
3135 assert(worklist.size() == 0, "should be empty");
3136 worklist.push(head);
3137 peel.set(head->_idx);
3138 while (worklist.size() > 0) {
3139 Node *n = worklist.pop();
3140 if (n != last_peel) {
3141 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3142 Node* use = n->fast_out(j);
3143 if (use->is_CFG() &&
3144 loop->is_member(get_loop(use)) &&
3145 !peel.test_set(use->_idx)) {
3146 worklist.push(use);
3147 }
3148 }
3149 }
3150 }
3151
3152 // Set of non-cfg nodes to peel are those that are control
3153 // dependent on the cfg nodes.
3154 uint i;
3155 for(i = 0; i < loop->_body.size(); i++ ) {
3156 Node *n = loop->_body.at(i);
3157 Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
3158 if (peel.test(n_c->_idx)) {
3159 peel.set(n->_idx);
3160 } else {
3161 not_peel.set(n->_idx);
3162 }
3163 }
3164
3165 // Step 2: move operations from the peeled section down into the
3166 // not-peeled section
3167
3168 // Get a post order schedule of nodes in the peel region
3169 // Result in right-most operand.
3170 scheduled_nodelist(loop, peel, peel_list );
3171
3172 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3173
3174 // For future check for too many new phis
3175 uint old_phi_cnt = 0;
3176 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
3177 Node* use = head->fast_out(j);
3178 if (use->is_Phi()) old_phi_cnt++;
3179 }
3180
3181#ifndef PRODUCT
3182 if (TracePartialPeeling) {
3183 tty->print_cr("\npeeled list");
3184 }
3185#endif
3186
3187 // Evacuate nodes in peel region into the not_peeled region if possible
3188 uint new_phi_cnt = 0;
3189 uint cloned_for_outside_use = 0;
3190 for (i = 0; i < peel_list.size();) {
3191 Node* n = peel_list.at(i);
3192#ifndef PRODUCT
3193 if (TracePartialPeeling) n->dump();
3194#endif
3195 bool incr = true;
3196 if ( !n->is_CFG() ) {
3197
3198 if ( has_use_in_set(n, not_peel) ) {
3199
3200 // If not used internal to the peeled region,
3201 // move "n" from peeled to not_peeled region.
3202
3203 if ( !has_use_internal_to_set(n, peel, loop) ) {
3204
3205 // if not pinned and not a load (which maybe anti-dependent on a store)
3206 // and not a CMove (Matcher expects only bool->cmove).
3207 if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove()) {
3208 cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
3209 sink_list.push(n);
3210 peel >>= n->_idx; // delete n from peel set.
3211 not_peel <<= n->_idx; // add n to not_peel set.
3212 peel_list.remove(i);
3213 incr = false;
3214#ifndef PRODUCT
3215 if (TracePartialPeeling) {
3216 tty->print_cr("sink to not_peeled region: %d newbb: %d",
3217 n->_idx, get_ctrl(n)->_idx);
3218 }
3219#endif
3220 }
3221 } else {
3222 // Otherwise check for special def-use cases that span
3223 // the peel/not_peel boundary such as bool->if
3224 clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
3225 new_phi_cnt++;
3226 }
3227 }
3228 }
3229 if (incr) i++;
3230 }
3231
3232 if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
3233#ifndef PRODUCT
3234 if (TracePartialPeeling) {
3235 tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c",
3236 new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
3237 }
3238#endif
3239 if (new_peel_if != NULL) {
3240 remove_cmpi_loop_exit(new_peel_if, loop);
3241 }
3242 // Inhibit more partial peeling on this loop
3243 assert(!head->is_partial_peel_loop(), "not partial peeled");
3244 head->mark_partial_peel_failed();
3245 if (cloned_for_outside_use > 0) {
3246 // Terminate this round of loop opts because
3247 // the graph outside this loop was changed.
3248 C->set_major_progress();
3249 return true;
3250 }
3251 return false;
3252 }
3253
3254 // Step 3: clone loop, retarget control, and insert new phis
3255
3256 // Create new loop head for new phis and to hang
3257 // the nodes being moved (sinked) from the peel region.
3258 LoopNode* new_head = new LoopNode(last_peel, last_peel);
3259 new_head->set_unswitch_count(head->unswitch_count()); // Preserve
3260 _igvn.register_new_node_with_optimizer(new_head);
3261 assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
3262 _igvn.replace_input_of(first_not_peeled, 0, new_head);
3263 set_loop(new_head, loop);
3264 loop->_body.push(new_head);
3265 not_peel.set(new_head->_idx);
3266 set_idom(new_head, last_peel, dom_depth(first_not_peeled));
3267 set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
3268
3269 while (sink_list.size() > 0) {
3270 Node* n = sink_list.pop();
3271 set_ctrl(n, new_head);
3272 }
3273
3274 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3275
3276 clone_loop(loop, old_new, dd, IgnoreStripMined);
3277
3278 const uint clone_exit_idx = 1;
3279 const uint orig_exit_idx = 2;
3280 assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
3281
3282 Node* head_clone = old_new[head->_idx];
3283 LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop();
3284 Node* orig_tail_clone = head_clone->in(2);
3285
3286 // Add phi if "def" node is in peel set and "use" is not
3287
3288 for(i = 0; i < peel_list.size(); i++ ) {
3289 Node *def = peel_list.at(i);
3290 if (!def->is_CFG()) {
3291 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
3292 Node *use = def->fast_out(j);
3293 if (has_node(use) && use->in(0) != C->top() &&
3294 (!peel.test(use->_idx) ||
3295 (use->is_Phi() && use->in(0) == head)) ) {
3296 worklist.push(use);
3297 }
3298 }
3299 while( worklist.size() ) {
3300 Node *use = worklist.pop();
3301 for (uint j = 1; j < use->req(); j++) {
3302 Node* n = use->in(j);
3303 if (n == def) {
3304
3305 // "def" is in peel set, "use" is not in peel set
3306 // or "use" is in the entry boundary (a phi) of the peel set
3307
3308 Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
3309
3310 if ( loop->is_member(get_loop( use_c )) ) {
3311 // use is in loop
3312 if (old_new[use->_idx] != NULL) { // null for dead code
3313 Node* use_clone = old_new[use->_idx];
3314 _igvn.replace_input_of(use, j, C->top());
3315 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
3316 }
3317 } else {
3318 assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
3319 // use is not in the loop, check if the live range includes the cut
3320 Node* lp_if = use_c->in(orig_exit_idx)->in(0);
3321 if (not_peel.test(lp_if->_idx)) {
3322 assert(j == orig_exit_idx, "use from original loop");
3323 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
3324 }
3325 }
3326 }
3327 }
3328 }
3329 }
3330 }
3331
3332 // Step 3b: retarget control
3333
3334 // Redirect control to the new loop head if a cloned node in
3335 // the not_peeled region has control that points into the peeled region.
3336 // This necessary because the cloned peeled region will be outside
3337 // the loop.
3338 // from to
3339 // cloned-peeled <---+
3340 // new_head_clone: | <--+
3341 // cloned-not_peeled in(0) in(0)
3342 // orig-peeled
3343
3344 for(i = 0; i < loop->_body.size(); i++ ) {
3345 Node *n = loop->_body.at(i);
3346 if (!n->is_CFG() && n->in(0) != NULL &&
3347 not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
3348 Node* n_clone = old_new[n->_idx];
3349 _igvn.replace_input_of(n_clone, 0, new_head_clone);
3350 }
3351 }
3352
3353 // Backedge of the surviving new_head (the clone) is original last_peel
3354 _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
3355
3356 // Cut first node in original not_peel set
3357 _igvn.rehash_node_delayed(new_head); // Multiple edge updates:
3358 new_head->set_req(LoopNode::EntryControl, C->top()); // use rehash_node_delayed / set_req instead of
3359 new_head->set_req(LoopNode::LoopBackControl, C->top()); // multiple replace_input_of calls
3360
3361 // Copy head_clone back-branch info to original head
3362 // and remove original head's loop entry and
3363 // clone head's back-branch
3364 _igvn.rehash_node_delayed(head); // Multiple edge updates
3365 head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl));
3366 head->set_req(LoopNode::LoopBackControl, C->top());
3367 _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
3368
3369 // Similarly modify the phis
3370 for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
3371 Node* use = head->fast_out(k);
3372 if (use->is_Phi() && use->outcnt() > 0) {
3373 Node* use_clone = old_new[use->_idx];
3374 _igvn.rehash_node_delayed(use); // Multiple edge updates
3375 use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl));
3376 use->set_req(LoopNode::LoopBackControl, C->top());
3377 _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
3378 }
3379 }
3380
3381 // Step 4: update dominator tree and dominator depth
3382
3383 set_idom(head, orig_tail_clone, dd);
3384 recompute_dom_depth();
3385
3386 // Inhibit more partial peeling on this loop
3387 new_head_clone->set_partial_peel_loop();
3388 C->set_major_progress();
3389 loop->record_for_igvn();
3390
3391#ifndef PRODUCT
3392 if (TracePartialPeeling) {
3393 tty->print_cr("\nafter partial peel one iteration");
3394 Node_List wl(area);
3395 Node* t = last_peel;
3396 while (true) {
3397 wl.push(t);
3398 if (t == head_clone) break;
3399 t = idom(t);
3400 }
3401 while (wl.size() > 0) {
3402 Node* tt = wl.pop();
3403 if (tt == head) tty->print_cr("orig head");
3404 else if (tt == new_head_clone) tty->print_cr("new head");
3405 else if (tt == head_clone) tty->print_cr("clone head");
3406 tt->dump();
3407 }
3408 }
3409#endif
3410 return true;
3411}
3412
3413//------------------------------reorg_offsets----------------------------------
3414// Reorganize offset computations to lower register pressure. Mostly
3415// prevent loop-fallout uses of the pre-incremented trip counter (which are
3416// then alive with the post-incremented trip counter forcing an extra
3417// register move)
3418void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
3419 // Perform it only for canonical counted loops.
3420 // Loop's shape could be messed up by iteration_split_impl.
3421 if (!loop->_head->is_CountedLoop())
3422 return;
3423 if (!loop->_head->as_Loop()->is_valid_counted_loop())
3424 return;
3425
3426 CountedLoopNode *cl = loop->_head->as_CountedLoop();
3427 CountedLoopEndNode *cle = cl->loopexit();
3428 Node *exit = cle->proj_out(false);
3429 Node *phi = cl->phi();
3430
3431 // Check for the special case when using the pre-incremented trip-counter on
3432 // the fall-out path (forces the pre-incremented and post-incremented trip
3433 // counter to be live at the same time). Fix this by adjusting to use the
3434 // post-increment trip counter.
3435
3436 bool progress = true;
3437 while (progress) {
3438 progress = false;
3439 for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
3440 Node* use = phi->fast_out(i); // User of trip-counter
3441 if (!has_ctrl(use)) continue;
3442 Node *u_ctrl = get_ctrl(use);
3443 if (use->is_Phi()) {
3444 u_ctrl = NULL;
3445 for (uint j = 1; j < use->req(); j++)
3446 if (use->in(j) == phi)
3447 u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
3448 }
3449 IdealLoopTree *u_loop = get_loop(u_ctrl);
3450 // Look for loop-invariant use
3451 if (u_loop == loop) continue;
3452 if (loop->is_member(u_loop)) continue;
3453 // Check that use is live out the bottom. Assuming the trip-counter
3454 // update is right at the bottom, uses of of the loop middle are ok.
3455 if (dom_lca(exit, u_ctrl) != exit) continue;
3456 // Hit! Refactor use to use the post-incremented tripcounter.
3457 // Compute a post-increment tripcounter.
3458 Node* c = exit;
3459 if (cl->is_strip_mined()) {
3460 IdealLoopTree* outer_loop = get_loop(cl->outer_loop());
3461 if (!outer_loop->is_member(u_loop)) {
3462 c = cl->outer_loop_exit();
3463 }
3464 }
3465 Node *opaq = new Opaque2Node(C, cle->incr());
3466 register_new_node(opaq, c);
3467 Node *neg_stride = _igvn.intcon(-cle->stride_con());
3468 set_ctrl(neg_stride, C->root());
3469 Node *post = new AddINode(opaq, neg_stride);
3470 register_new_node(post, c);
3471 _igvn.rehash_node_delayed(use);
3472 for (uint j = 1; j < use->req(); j++) {
3473 if (use->in(j) == phi)
3474 use->set_req(j, post);
3475 }
3476 // Since DU info changed, rerun loop
3477 progress = true;
3478 break;
3479 }
3480 }
3481
3482}
3483