loopTransform.cpp source code [OpenJDK/src/hotspot/share/opto/loopTransform.cpp]

1	/*
2	* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4	*
5	* This code is free software; you can redistribute it and/or modify it
6	* under the terms of the GNU General Public License version 2 only, as
7	* published by the Free Software Foundation.
8	*
9	* This code is distributed in the hope that it will be useful, but WITHOUT
10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12	* version 2 for more details (a copy is included in the LICENSE file that
13	* accompanied this code).
14	*
15	* You should have received a copy of the GNU General Public License version
16	* 2 along with this work; if not, write to the Free Software Foundation,
17	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18	*
19	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20	* or visit www.oracle.com if you need additional information or have any
21	* questions.
22	*
23	*/
24
25	#include "precompiled.hpp"
26	#include "compiler/compileLog.hpp"
27	#include "memory/allocation.inline.hpp"
28	#include "opto/addnode.hpp"
29	#include "opto/callnode.hpp"
30	#include "opto/castnode.hpp"
31	#include "opto/connode.hpp"
32	#include "opto/convertnode.hpp"
33	#include "opto/divnode.hpp"
34	#include "opto/loopnode.hpp"
35	#include "opto/mulnode.hpp"
36	#include "opto/movenode.hpp"
37	#include "opto/opaquenode.hpp"
38	#include "opto/rootnode.hpp"
39	#include "opto/runtime.hpp"
40	#include "opto/subnode.hpp"
41	#include "opto/superword.hpp"
42	#include "opto/vectornode.hpp"
43
44	//------------------------------is_loop_exit-----------------------------------
45	// Given an IfNode, return the loop-exiting projection or NULL if both
46	// arms remain in the loop.
47	Node IdealLoopTree::is_loop_exit(Node iff) const {
48	if (iff->outcnt() != `2`) return NULL; // Ignore partially dead tests
49	PhaseIdealLoop *phase = _phase;
50	// Test is an IfNode, has 2 projections. If BOTH are in the loop
51	// we need loop unswitching instead of peeling.
52	if (!is_member(phase->get_loop(iff->raw_out(`0`))))
53	return iff->raw_out(`0`);
54	if (!is_member(phase->get_loop(iff->raw_out(`1`))))
55	return iff->raw_out(`1`);
56	return NULL;
57	}
58
59
60	//=============================================================================
61
62
63	//------------------------------record_for_igvn----------------------------
64	// Put loop body on igvn work list
65	void IdealLoopTree::record_for_igvn() {
66	for (uint i = `0`; i < _body.size(); i++) {
67	Node *n = _body.at(i);
68	_phase->_igvn._worklist.push(n);
69	}
70	// put body of outer strip mined loop on igvn work list as well
71	if (_head->is_CountedLoop() && _head->as_Loop()->is_strip_mined()) {
72	CountedLoopNode* l = _head->as_CountedLoop();
73	Node* outer_loop = l->outer_loop();
74	assert(outer_loop != NULL, "missing piece of strip mined loop");
75	_phase->_igvn._worklist.push(outer_loop);
76	Node* outer_loop_tail = l->outer_loop_tail();
77	assert(outer_loop_tail != NULL, "missing piece of strip mined loop");
78	_phase->_igvn._worklist.push(outer_loop_tail);
79	Node* outer_loop_end = l->outer_loop_end();
80	assert(outer_loop_end != NULL, "missing piece of strip mined loop");
81	_phase->_igvn._worklist.push(outer_loop_end);
82	Node* outer_safepoint = l->outer_safepoint();
83	assert(outer_safepoint != NULL, "missing piece of strip mined loop");
84	_phase->_igvn._worklist.push(outer_safepoint);
85	Node* cle_out = _head->as_CountedLoop()->loopexit()->proj_out(false);
86	assert(cle_out != NULL, "missing piece of strip mined loop");
87	_phase->_igvn._worklist.push(cle_out);
88	}
89	}
90
91	//------------------------------compute_exact_trip_count-----------------------
92	// Compute loop trip count if possible. Do not recalculate trip count for
93	// split loops (pre-main-post) which have their limits and inits behind Opaque node.
94	void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
95	if (!_head->as_Loop()->is_valid_counted_loop()) {
96	return;
97	}
98	CountedLoopNode* cl = _head->as_CountedLoop();
99	// Trip count may become nonexact for iteration split loops since
100	// RCE modifies limits. Note, _trip_count value is not reset since
101	// it is used to limit unrolling of main loop.
102	cl->set_nonexact_trip_count();
103
104	// Loop's test should be part of loop.
105	if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
106	return; // Infinite loop
107
108	#ifdef ASSERT
109	BoolTest::mask bt = cl->loopexit()->test_trip();
110	assert(bt == BoolTest::lt \|\| bt == BoolTest::gt \|\|
111	bt == BoolTest::ne, "canonical test is expected");
112	#endif
113
114	Node* init_n = cl->init_trip();
115	Node* limit_n = cl->limit();
116	if (init_n != NULL && limit_n != NULL) {
117	// Use longs to avoid integer overflow.
118	int stride_con = cl->stride_con();
119	const TypeInt* init_type = phase->_igvn.type(init_n)->is_int();
120	const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
121	jlong init_con = (stride_con > `0`) ? init_type->_lo : init_type->_hi;
122	jlong limit_con = (stride_con > `0`) ? limit_type->_hi : limit_type->_lo;
123	int stride_m = stride_con - (stride_con > `0` ? `1` : -`1`);
124	jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
125	if (trip_count > `0` && (julong)trip_count < (julong)max_juint) {
126	if (init_n->is_Con() && limit_n->is_Con()) {
127	// Set exact trip count.
128	cl->set_exact_trip_count((uint)trip_count);
129	} else if (cl->unrolled_count() == `1`) {
130	// Set maximum trip count before unrolling.
131	cl->set_trip_count((uint)trip_count);
132	}
133	}
134	}
135	}
136
137	//------------------------------compute_profile_trip_cnt----------------------------
138	// Compute loop trip count from profile data as
139	// (backedge_count + loop_exit_count) / loop_exit_count
140
141	float IdealLoopTree::compute_profile_trip_cnt_helper(Node* n) {
142	if (n->is_If()) {
143	IfNode *iff = n->as_If();
144	if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
145	Node *exit = is_loop_exit(iff);
146	if (exit) {
147	float exit_prob = iff->_prob;
148	if (exit->Opcode() == Op_IfFalse) {
149	exit_prob = `1.0` - exit_prob;
150	}
151	if (exit_prob > PROB_MIN) {
152	float exit_cnt = iff->_fcnt * exit_prob;
153	return exit_cnt;
154	}
155	}
156	}
157	}
158	if (n->is_Jump()) {
159	JumpNode *jmp = n->as_Jump();
160	if (jmp->_fcnt != COUNT_UNKNOWN) {
161	float* probs = jmp->_probs;
162	float exit_prob = `0`;
163	PhaseIdealLoop *phase = _phase;
164	for (DUIterator_Fast imax, i = jmp->fast_outs(imax); i < imax; i++) {
165	JumpProjNode* u = jmp->fast_out(i)->as_JumpProj();
166	if (!is_member(_phase->get_loop(u))) {
167	exit_prob += probs[u->_con];
168	}
169	}
170	return exit_prob * jmp->_fcnt;
171	}
172	}
173	return `0`;
174	}
175
176	void IdealLoopTree::compute_profile_trip_cnt(PhaseIdealLoop *phase) {
177	if (!_head->is_Loop()) {
178	return;
179	}
180	LoopNode* head = _head->as_Loop();
181	if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
182	return; // Already computed
183	}
184	float trip_cnt = (float)max_jint; // default is big
185
186	Node* back = head->in(LoopNode::LoopBackControl);
187	while (back != head) {
188	if ((back->Opcode() == Op_IfTrue \|\| back->Opcode() == Op_IfFalse) &&
189	back->in(`0`) &&
190	back->in(`0`)->is_If() &&
191	back->in(`0`)->as_If()->_fcnt != COUNT_UNKNOWN &&
192	back->in(`0`)->as_If()->_prob != PROB_UNKNOWN &&
193	(back->Opcode() == Op_IfTrue ? `1`-back->in(`0`)->as_If()->_prob : back->in(`0`)->as_If()->_prob) > PROB_MIN) {
194	break;
195	}
196	back = phase->idom(back);
197	}
198	if (back != head) {
199	assert((back->Opcode() == Op_IfTrue \|\| back->Opcode() == Op_IfFalse) &&
200	back->in(`0`), "if-projection exists");
201	IfNode* back_if = back->in(`0`)->as_If();
202	float loop_back_cnt = back_if->_fcnt * (back->Opcode() == Op_IfTrue ? back_if->_prob : (`1` - back_if->_prob));
203
204	// Now compute a loop exit count
205	float loop_exit_cnt = `0.0f`;
206	if (_child == NULL) {
207	for (uint i = `0`; i < _body.size(); i++) {
208	Node *n = _body [i];
209	loop_exit_cnt += compute_profile_trip_cnt_helper(n);
210	}
211	} else {
212	ResourceMark rm;
213	Unique_Node_List wq;
214	wq.push(back);
215	for (uint i = `0`; i < wq.size(); i++) {
216	Node *n = wq.at(i);
217	assert(n->is_CFG(), "only control nodes");
218	if (n != head) {
219	if (n->is_Region()) {
220	for (uint j = `1`; j < n->req(); j++) {
221	wq.push(n->in(j));
222	}
223	} else {
224	loop_exit_cnt += compute_profile_trip_cnt_helper(n);
225	wq.push(n->in(`0`));
226	}
227	}
228	}
229
230	}
231	if (loop_exit_cnt > `0.0f`) {
232	trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
233	} else {
234	// No exit count so use
235	trip_cnt = loop_back_cnt;
236	}
237	} else {
238	head->mark_profile_trip_failed();
239	}
240	#ifndef PRODUCT
241	if (TraceProfileTripCount) {
242	tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt);
243	}
244	#endif
245	head->set_profile_trip_cnt(trip_cnt);
246	}
247
248	//---------------------is_invariant_addition-----------------------------
249	// Return nonzero index of invariant operand for an Add or Sub
250	// of (nonconstant) invariant and variant values. Helper for reassociate_invariants.
251	int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
252	int op = n->Opcode();
253	if (op == Op_AddI \|\| op == Op_SubI) {
254	bool in1_invar = this->is_invariant(n->in(`1`));
255	bool in2_invar = this->is_invariant(n->in(`2`));
256	if (in1_invar && !in2_invar) return `1`;
257	if (!in1_invar && in2_invar) return `2`;
258	}
259	return `0`;
260	}
261
262	//---------------------reassociate_add_sub-----------------------------
263	// Reassociate invariant add and subtract expressions:
264	//
265	// inv1 + (x + inv2) => ( inv1 + inv2) + x
266	// (x + inv2) + inv1 => ( inv1 + inv2) + x
267	// inv1 + (x - inv2) => ( inv1 - inv2) + x
268	// inv1 - (inv2 - x) => ( inv1 - inv2) + x
269	// (x + inv2) - inv1 => (-inv1 + inv2) + x
270	// (x - inv2) + inv1 => ( inv1 - inv2) + x
271	// (x - inv2) - inv1 => (-inv1 - inv2) + x
272	// inv1 + (inv2 - x) => ( inv1 + inv2) - x
273	// inv1 - (x - inv2) => ( inv1 + inv2) - x
274	// (inv2 - x) + inv1 => ( inv1 + inv2) - x
275	// (inv2 - x) - inv1 => (-inv1 + inv2) - x
276	// inv1 - (x + inv2) => ( inv1 - inv2) - x
277	//
278	Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) {
279	if ((!n1->is_Add() && !n1->is_Sub()) \|\| n1->outcnt() == `0`) return NULL;
280	if (is_invariant(n1)) return NULL;
281	int inv1_idx = is_invariant_addition(n1, phase);
282	if (!inv1_idx) return NULL;
283	// Don't mess with add of constant (igvn moves them to expression tree root.)
284	if (n1->is_Add() && n1->in(`2`)->is_Con()) return NULL;
285	Node* inv1 = n1->in(inv1_idx);
286	Node* n2 = n1->in(`3` - inv1_idx);
287	int inv2_idx = is_invariant_addition(n2, phase);
288	if (!inv2_idx) return NULL;
289
290	if (!phase->may_require_nodes(`10`, `10`)) return NULL;
291
292	Node* x = n2->in(`3` - inv2_idx);
293	Node* inv2 = n2->in(inv2_idx);
294
295	bool neg_x = n2->is_Sub() && inv2_idx == `1`;
296	bool neg_inv2 = n2->is_Sub() && inv2_idx == `2`;
297	bool neg_inv1 = n1->is_Sub() && inv1_idx == `2`;
298	if (n1->is_Sub() && inv1_idx == `1`) {
299	neg_x = !neg_x;
300	neg_inv2 = !neg_inv2;
301	}
302	Node* inv1_c = phase->get_ctrl(inv1);
303	Node* inv2_c = phase->get_ctrl(inv2);
304	Node* n_inv1;
305	if (neg_inv1) {
306	Node *zero = phase->_igvn.intcon(`0`);
307	phase->set_ctrl(zero, phase->C->root());
308	n_inv1 = new SubINode (zero, inv1);
309	phase->register_new_node(n_inv1, inv1_c);
310	} else {
311	n_inv1 = inv1;
312	}
313	Node* inv;
314	if (neg_inv2) {
315	inv = new SubINode (n_inv1, inv2);
316	} else {
317	inv = new AddINode (n_inv1, inv2);
318	}
319	phase->register_new_node(inv, phase->get_early_ctrl(inv));
320
321	Node* addx;
322	if (neg_x) {
323	addx = new SubINode (inv, x);
324	} else {
325	addx = new AddINode (x, inv);
326	}
327	phase->register_new_node(addx, phase->get_ctrl(x));
328	phase->_igvn.replace_node(n1, addx);
329	assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
330	_body.yank(n1);
331	return addx;
332	}
333
334	//---------------------reassociate_invariants-----------------------------
335	// Reassociate invariant expressions:
336	void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
337	for (int i = _body.size() - `1`; i >= `0`; i--) {
338	Node *n = _body.at(i);
339	for (int j = `0`; j < `5`; j++) {
340	Node* nn = reassociate_add_sub(n, phase);
341	if (nn == NULL) break;
342	n = nn; // again
343	}
344	}
345	}
346
347	//------------------------------policy_peeling---------------------------------
348	// Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
349	// is applicable if we can make a loop-invariant test (usually a null-check)
350	// execute before we enter the loop. When TRUE, the estimated node budget is
351	// also requested.
352	bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
353	uint estimate = estimate_peeling(phase);
354
355	return estimate == `0` ? false : phase->may_require_nodes(estimate);
356	}
357
358	// Perform actual policy and size estimate for the loop peeling transform, and
359	// return the estimated loop size if peeling is applicable, otherwise return
360	// zero. No node budget is allocated.
361	uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
362
363	// If nodes are depleted, some transform has miscalculated its needs.
364	assert(!phase->exceeding_node_budget(), "sanity");
365
366	// Peeling does loop cloning which can result in O(N^2) node construction.
367	if (_body.size() > `255`) {
368	return `0`; // Suppress too large body size.
369	}
370	// Optimistic estimate that approximates loop body complexity via data and
371	// control flow fan-out (instead of using the more pessimistic: BodySize^2).
372	uint estimate = est_loop_clone_sz(`2`);
373
374	if (phase->exceeding_node_budget(estimate)) {
375	return `0`; // Too large to safely clone.
376	}
377
378	// Check for vectorized loops, any peeling done was already applied.
379	if (_head->is_CountedLoop()) {
380	CountedLoopNode* cl = _head->as_CountedLoop();
381	if (cl->is_unroll_only() \|\| cl->trip_count() == `1`) {
382	return `0`;
383	}
384	}
385
386	Node* test = tail();
387
388	while (test != _head) { // Scan till run off top of loop
389	if (test->is_If()) { // Test?
390	Node *ctrl = phase->get_ctrl(test->in(`1`));
391	if (ctrl->is_top()) {
392	return `0`; // Found dead test on live IF? No peeling!
393	}
394	// Standard IF only has one input value to check for loop invariance.
395	assert(test->Opcode() == Op_If \|\|
396	test->Opcode() == Op_CountedLoopEnd \|\|
397	test->Opcode() == Op_RangeCheck,
398	"Check this code when new subtype is added");
399	// Condition is not a member of this loop?
400	if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
401	return estimate; // Found reason to peel!
402	}
403	}
404	// Walk up dominators to loop _head looking for test which is executed on
405	// every path through the loop.
406	test = phase->idom(test);
407	}
408	return `0`;
409	}
410
411	//------------------------------peeled_dom_test_elim---------------------------
412	// If we got the effect of peeling, either by actually peeling or by making
413	// a pre-loop which must execute at least once, we can remove all
414	// loop-invariant dominated tests in the main body.
415	void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree *loop, Node_List &old_new) {
416	bool progress = true;
417	while (progress) {
418	progress = false; // Reset for next iteration
419	Node prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();*
420	Node *test = prev->in(`0`);
421	while (test != loop->_head) { // Scan till run off top of loop
422
423	int p_op = prev->Opcode();
424	if ((p_op == Op_IfFalse \|\| p_op == Op_IfTrue) &&
425	test->is_If() && // Test?
426	!test->in(`1`)->is_Con() && // And not already obvious?
427	// Condition is not a member of this loop?
428	!loop->is_member(get_loop(get_ctrl(test->in(`1`))))){
429	// Walk loop body looking for instances of this test
430	for (uint i = `0`; i < loop->_body.size(); i++) {
431	Node *n = loop->_body.at(i);
432	if (n->is_If() && n->in(`1`) == test->in(`1`) /&& n != loop->tail()->in(0)/) {
433	// IfNode was dominated by version in peeled loop body
434	progress = true;
435	dominated_by(old_new [prev->_idx], n);
436	}
437	}
438	}
439	prev = test;
440	test = idom(test);
441	} // End of scan tests in loop
442
443	} // End of while (progress)
444	}
445
446	//------------------------------do_peeling-------------------------------------
447	// Peel the first iteration of the given loop.
448	// Step 1: Clone the loop body. The clone becomes the peeled iteration.
449	// The pre-loop illegally has 2 control users (old & new loops).
450	// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
451	// Do this by making the old-loop fall-in edges act as if they came
452	// around the loopback from the prior iteration (follow the old-loop
453	// backedges) and then map to the new peeled iteration. This leaves
454	// the pre-loop with only 1 user (the new peeled iteration), but the
455	// peeled-loop backedge has 2 users.
456	// Step 3: Cut the backedge on the clone (so its not a loop) and remove the
457	// extra backedge user.
458	//
459	// orig
460	//
461	// stmt1
462	// \|
463	// v
464	// loop predicate
465	// \|
466	// v
467	// loop<----+
468	// \| \|
469	// stmt2 \|
470	// \| \|
471	// v \|
472	// if ^
473	// / \ \|
474	// / \ \|
475	// v v \|
476	// false true \|
477	// / \ \|
478	// / ----+
479	// \|
480	// v
481	// exit
482	//
483	//
484	// after clone loop
485	//
486	// stmt1
487	// \|
488	// v
489	// loop predicate
490	// / \
491	// clone / \ orig
492	// / \
493	// / \
494	// v v
495	// +---->loop clone loop<----+
496	// \| \| \| \|
497	// \| stmt2 clone stmt2 \|
498	// \| \| \| \|
499	// \| v v \|
500	// ^ if clone If ^
501	// \| / \ / \ \|
502	// \| / \ / \ \|
503	// \| v v v v \|
504	// \| true false false true \|
505	// \| / \ / \ \|
506	// +---- \ / ----+
507	// \ /
508	// 1v v2
509	// region
510	// \|
511	// v
512	// exit
513	//
514	//
515	// after peel and predicate move
516	//
517	// stmt1
518	// /
519	// /
520	// clone / orig
521	// /
522	// / +----------+
523	// / \| \|
524	// / loop predicate \|
525	// / \| \|
526	// v v \|
527	// TOP-->loop clone loop<----+ \|
528	// \| \| \| \|
529	// stmt2 clone stmt2 \| \|
530	// \| \| \| ^
531	// v v \| \|
532	// if clone If ^ \|
533	// / \ / \ \| \|
534	// / \ / \ \| \|
535	// v v v v \| \|
536	// true false false true \| \|
537	// \| \ / \ \| \|
538	// \| \ / ----+ ^
539	// \| \ / \|
540	// \| 1v v2 \|
541	// v region \|
542	// \| \| \|
543	// \| v \|
544	// \| exit \|
545	// \| \|
546	// +--------------->-----------------+
547	//
548	//
549	// final graph
550	//
551	// stmt1
552	// \|
553	// v
554	// stmt2 clone
555	// \|
556	// v
557	// if clone
558	// / \|
559	// / \|
560	// v v
561	// false true
562	// \| \|
563	// \| v
564	// \| loop predicate
565	// \| \|
566	// \| v
567	// \| loop<----+
568	// \| \| \|
569	// \| stmt2 \|
570	// \| \| \|
571	// \| v \|
572	// v if ^
573	// \| / \ \|
574	// \| / \ \|
575	// \| v v \|
576	// \| false true \|
577	// \| \| \ \|
578	// v v --+
579	// region
580	// \|
581	// v
582	// exit
583	//
584	void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
585
586	C->set_major_progress();
587	// Peeling a 'main' loop in a pre/main/post situation obfuscates the
588	// 'pre' loop from the main and the 'pre' can no longer have its
589	// iterations adjusted. Therefore, we need to declare this loop as
590	// no longer a 'main' loop; it will need new pre and post loops before
591	// we can do further RCE.
592	#ifndef PRODUCT
593	if (TraceLoopOpts) {
594	tty->print("Peel ");
595	loop->dump_head();
596	}
597	#endif
598	LoopNode* head = loop->_head->as_Loop();
599	bool counted_loop = head->is_CountedLoop();
600	if (counted_loop) {
601	CountedLoopNode *cl = head->as_CountedLoop();
602	assert(cl->trip_count() > `0`, "peeling a fully unrolled loop");
603	cl->set_trip_count(cl->trip_count() - `1`);
604	if (cl->is_main_loop()) {
605	cl->set_normal_loop();
606	#ifndef PRODUCT
607	if (PrintOpto && VerifyLoopOptimizations) {
608	tty->print("Peeling a 'main' loop; resetting to 'normal' ");
609	loop->dump_head();
610	}
611	#endif
612	}
613	}
614	Node* entry = head->in(LoopNode::EntryControl);
615
616	// Step 1: Clone the loop body. The clone becomes the peeled iteration.
617	// The pre-loop illegally has 2 control users (old & new loops).
618	clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined);
619
620	// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
621	// Do this by making the old-loop fall-in edges act as if they came
622	// around the loopback from the prior iteration (follow the old-loop
623	// backedges) and then map to the new peeled iteration. This leaves
624	// the pre-loop with only 1 user (the new peeled iteration), but the
625	// peeled-loop backedge has 2 users.
626	Node* new_entry = old_new [head->in(LoopNode::LoopBackControl)->_idx];
627	_igvn.hash_delete(head->skip_strip_mined());
628	head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry);
629	for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
630	Node* old = head->fast_out(j);
631	if (old->in(`0`) == loop->_head && old->req() == `3` && old->is_Phi()) {
632	Node* new_exit_value = old_new [old->in(LoopNode::LoopBackControl)->_idx];
633	if (!new_exit_value) // Backedge value is ALSO loop invariant?
634	// Then loop body backedge value remains the same.
635	new_exit_value = old->in(LoopNode::LoopBackControl);
636	_igvn.hash_delete(old);
637	old->set_req(LoopNode::EntryControl, new_exit_value);
638	}
639	}
640
641
642	// Step 3: Cut the backedge on the clone (so its not a loop) and remove the
643	// extra backedge user.
644	Node* new_head = old_new [head->_idx];
645	_igvn.hash_delete(new_head);
646	new_head->set_req(LoopNode::LoopBackControl, C->top());
647	for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
648	Node* use = new_head->fast_out(j2);
649	if (use->in(`0`) == new_head && use->req() == `3` && use->is_Phi()) {
650	_igvn.hash_delete(use);
651	use->set_req(LoopNode::LoopBackControl, C->top());
652	}
653	}
654
655	// Step 4: Correct dom-depth info. Set to loop-head depth.
656
657	int dd = dom_depth(head);
658	set_idom(head, head->in(`1`), dd);
659	for (uint j3 = `0`; j3 < loop->_body.size(); j3++) {
660	Node *old = loop->_body.at(j3);
661	Node *nnn = old_new [old->_idx];
662	if (!has_ctrl(nnn)) {
663	set_idom(nnn, idom(nnn), dd-`1`);
664	}
665	}
666
667	// Now force out all loop-invariant dominating tests. The optimizer
668	// finds some, but we _know_ they are all useless.
669	peeled_dom_test_elim(loop,old_new);
670
671	loop->record_for_igvn();
672	}
673
674	// The Estimated Loop Unroll Size: UnrollFactor (106% * BodySize + BC) + CC,*
675	// where BC and CC are (totally) ad-hoc/magic "body" and "clone" constants,
676	// respectively, used to ensure that node usage estimates made are on the safe
677	// side, for the most part. This is a simplified version of the loop clone
678	// size calculation in est_loop_clone_sz(), defined for unroll factors larger
679	// than one (>1), performing an overflow check and returning 'UINT_MAX' in
680	// case of an overflow.
681	static uint est_loop_unroll_sz(uint factor, uint size) {
682	precond(`0` < factor);
683
684	uint const bc = `5`;
685	uint const cc = `7`;
686	uint const sz = size + (size + `15`) / `16`;
687	uint estimate = factor * (sz + bc) + cc;
688
689	return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
690	}
691
692	#define EMPTY_LOOP_SIZE 7 // Number of nodes in an empty loop.
693
694	//------------------------------policy_maximally_unroll------------------------
695	// Calculate the exact loop trip-count and return TRUE if loop can be fully,
696	// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
697	// node budget is also requested.
698	bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop phase) const* {
699	CountedLoopNode *cl = _head->as_CountedLoop();
700	assert(cl->is_normal_loop(), "");
701	if (!cl->is_valid_counted_loop()) {
702	return false; // Malformed counted loop
703	}
704	if (!cl->has_exact_trip_count()) {
705	// Trip count is not exact.
706	return false;
707	}
708
709	uint trip_count = cl->trip_count();
710	// Note, max_juint is used to indicate unknown trip count.
711	assert(trip_count > `1`, "one iteration loop should be optimized out already");
712	assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
713
714	// If nodes are depleted, some transform has miscalculated its needs.
715	assert(!phase->exceeding_node_budget(), "sanity");
716
717	// Real policy: if we maximally unroll, does it get too big?
718	// Allow the unrolled mess to get larger than standard loop
719	// size. After all, it will no longer be a loop.
720	uint body_size = _body.size();
721	uint unroll_limit = (uint)LoopUnrollLimit * `4`;
722	assert((intx)unroll_limit == LoopUnrollLimit * `4`, "LoopUnrollLimit must fit in 32bits");
723	if (trip_count > unroll_limit \|\| body_size > unroll_limit) {
724	return false;
725	}
726
727	// Take into account that after unroll conjoined heads and tails will fold,
728	// otherwise policy_unroll() may allow more unrolling than max unrolling.
729	uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
730
731	if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
732	return false;
733	}
734
735	// Fully unroll a loop with few iterations regardless next conditions since
736	// following loop optimizations will split such loop anyway (pre-main-post).
737	if (trip_count <= `3`) {
738	return phase->may_require_nodes(new_body_size);
739	}
740
741	if (new_body_size > unroll_limit \|\|
742	// Unrolling can result in a large amount of node construction
743	phase->exceeding_node_budget(new_body_size)) {
744	return false;
745	}
746
747	// Do not unroll a loop with String intrinsics code.
748	// String intrinsics are large and have loops.
749	for (uint k = `0`; k < _body.size(); k++) {
750	Node* n = _body.at(k);
751	switch (n->Opcode()) {
752	case Op_StrComp:
753	case Op_StrEquals:
754	case Op_StrIndexOf:
755	case Op_StrIndexOfChar:
756	case Op_EncodeISOArray:
757	case Op_AryEq:
758	case Op_HasNegatives: {
759	return false;
760	}
761	#if INCLUDE_RTM_OPT
762	case Op_FastLock:
763	case Op_FastUnlock: {
764	// Don't unroll RTM locking code because it is large.
765	if (UseRTMLocking) {
766	return false;
767	}
768	}
769	#endif
770	} // switch
771	}
772
773	return phase->may_require_nodes(new_body_size);
774	}
775
776
777	//------------------------------policy_unroll----------------------------------
778	// Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
779	// the loop is a counted loop and the loop body is small enough. When TRUE,
780	// the estimated node budget is also requested.
781	bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
782
783	CountedLoopNode *cl = _head->as_CountedLoop();
784	assert(cl->is_normal_loop() \|\| cl->is_main_loop(), "");
785
786	if (!cl->is_valid_counted_loop()) {
787	return false; // Malformed counted loop
788	}
789
790	// If nodes are depleted, some transform has miscalculated its needs.
791	assert(!phase->exceeding_node_budget(), "sanity");
792
793	// Protect against over-unrolling.
794	// After split at least one iteration will be executed in pre-loop.
795	if (cl->trip_count() <= (cl->is_normal_loop() ? `2u` : `1u`)) {
796	return false;
797	}
798	_local_loop_unroll_limit = LoopUnrollLimit;
799	_local_loop_unroll_factor = `4`;
800	int future_unroll_cnt = cl->unrolled_count() * `2`;
801	if (!cl->is_vectorized_loop()) {
802	if (future_unroll_cnt > LoopMaxUnroll) return false;
803	} else {
804	// obey user constraints on vector mapped loops with additional unrolling applied
805	int unroll_constraint = (cl->slp_max_unroll()) ? cl->slp_max_unroll() : `1`;
806	if ((future_unroll_cnt / unroll_constraint) > LoopMaxUnroll) return false;
807	}
808
809	// Check for initial stride being a small enough constant
810	if (abs(cl->stride_con()) > (`1`<<`2`)future_unroll_cnt) return* false;
811
812	// Don't unroll if the next round of unrolling would push us
813	// over the expected trip count of the loop. One is subtracted
814	// from the expected trip count because the pre-loop normally
815	// executes 1 iteration.
816	if (UnrollLimitForProfileCheck > `0` &&
817	cl->profile_trip_cnt() != COUNT_UNKNOWN &&
818	future_unroll_cnt > UnrollLimitForProfileCheck &&
819	(float)future_unroll_cnt > cl->profile_trip_cnt() - `1.0`) {
820	return false;
821	}
822
823	// When unroll count is greater than LoopUnrollMin, don't unroll if:
824	// the residual iterations are more than 10% of the trip count
825	// and rounds of "unroll,optimize" are not making significant progress
826	// Progress defined as current size less than 20% larger than previous size.
827	if (UseSuperWord && cl->node_count_before_unroll() > `0` &&
828	future_unroll_cnt > LoopUnrollMin &&
829	(future_unroll_cnt - `1`) * (`100` / LoopPercentProfileLimit) > cl->profile_trip_cnt() &&
830	`1.2` * cl->node_count_before_unroll() < (double)_body.size()) {
831	return false;
832	}
833
834	Node *init_n = cl->init_trip();
835	Node *limit_n = cl->limit();
836	int stride_con = cl->stride_con();
837	if (limit_n == NULL) return false; // We will dereference it below.
838
839	// Non-constant bounds.
840	// Protect against over-unrolling when init or/and limit are not constant
841	// (so that trip_count's init value is maxint) but iv range is known.
842	if (init_n == NULL \|\| !init_n->is_Con() \|\| !limit_n->is_Con()) {
843	Node* phi = cl->phi();
844	if (phi != NULL) {
845	assert(phi->is_Phi() && phi->in(`0`) == _head, "Counted loop should have iv phi.");
846	const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
847	int next_stride = stride_con * `2`; // stride after this unroll
848	if (next_stride > `0`) {
849	if (iv_type->_lo + next_stride <= iv_type->_lo \|\| // overflow
850	iv_type->_lo + next_stride > iv_type->_hi) {
851	return false; // over-unrolling
852	}
853	} else if (next_stride < `0`) {
854	if (iv_type->_hi + next_stride >= iv_type->_hi \|\| // overflow
855	iv_type->_hi + next_stride < iv_type->_lo) {
856	return false; // over-unrolling
857	}
858	}
859	}
860	}
861
862	// After unroll limit will be adjusted: new_limit = limit-stride.
863	// Bailout if adjustment overflow.
864	const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
865	if ((stride_con > `0` && ((limit_type->_hi - stride_con) >= limit_type->_hi)) \|\|
866	(stride_con < `0` && ((limit_type->_lo - stride_con) <= limit_type->_lo)))
867	return false; // overflow
868
869	// Adjust body_size to determine if we unroll or not
870	uint body_size = _body.size();
871	// Key test to unroll loop in CRC32 java code
872	int xors_in_loop = `0`;
873	// Also count ModL, DivL and MulL which expand mightly
874	for (uint k = `0`; k < _body.size(); k++) {
875	Node* n = _body.at(k);
876	switch (n->Opcode()) {
877	case Op_XorI: xors_in_loop++; break; // CRC32 java code
878	case Op_ModL: body_size += `30`; break;
879	case Op_DivL: body_size += `30`; break;
880	case Op_MulL: body_size += `10`; break;
881	case Op_StrComp:
882	case Op_StrEquals:
883	case Op_StrIndexOf:
884	case Op_StrIndexOfChar:
885	case Op_EncodeISOArray:
886	case Op_AryEq:
887	case Op_HasNegatives: {
888	// Do not unroll a loop with String intrinsics code.
889	// String intrinsics are large and have loops.
890	return false;
891	}
892	#if INCLUDE_RTM_OPT
893	case Op_FastLock:
894	case Op_FastUnlock: {
895	// Don't unroll RTM locking code because it is large.
896	if (UseRTMLocking) {
897	return false;
898	}
899	}
900	#endif
901	} // switch
902	}
903
904	if (UseSuperWord) {
905	if (!cl->is_reduction_loop()) {
906	phase->mark_reductions(this);
907	}
908
909	// Only attempt slp analysis when user controls do not prohibit it
910	if (LoopMaxUnroll > _local_loop_unroll_factor) {
911	// Once policy_slp_analysis succeeds, mark the loop with the
912	// maximal unroll factor so that we minimize analysis passes
913	if (future_unroll_cnt >= _local_loop_unroll_factor) {
914	policy_unroll_slp_analysis(cl, phase, future_unroll_cnt);
915	}
916	}
917	}
918
919	int slp_max_unroll_factor = cl->slp_max_unroll();
920	if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
921	LoopMaxUnroll = slp_max_unroll_factor;
922	}
923
924	uint estimate = est_loop_clone_sz(`2`);
925
926	if (cl->has_passed_slp()) {
927	if (slp_max_unroll_factor >= future_unroll_cnt) {
928	return phase->may_require_nodes(estimate);
929	}
930	return false; // Loop too big.
931	}
932
933	// Check for being too big
934	if (body_size > (uint)_local_loop_unroll_limit) {
935	if ((cl->is_subword_loop() \|\| xors_in_loop >= `4`) && body_size < `4u` * LoopUnrollLimit) {
936	return phase->may_require_nodes(estimate);
937	}
938	return false; // Loop too big.
939	}
940
941	if (cl->is_unroll_only()) {
942	if (TraceSuperWordLoopUnrollAnalysis) {
943	tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
944	slp_max_unroll_factor, future_unroll_cnt);
945	}
946	}
947
948	// Unroll once! (Each trip will soon do double iterations)
949	return phase->may_require_nodes(estimate);
950	}
951
952	void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode cl, PhaseIdealLoop phase, int future_unroll_cnt) {
953
954	// If nodes are depleted, some transform has miscalculated its needs.
955	assert(!phase->exceeding_node_budget(), "sanity");
956
957	// Enable this functionality target by target as needed
958	if (SuperWordLoopUnrollAnalysis) {
959	if (!cl->was_slp_analyzed()) {
960	SuperWord sw(phase);
961	sw.transform_loop(this, false);
962
963	// If the loop is slp canonical analyze it
964	if (sw.early_return() == false) {
965	sw.unrolling_analysis(_local_loop_unroll_factor);
966	}
967	}
968
969	if (cl->has_passed_slp()) {
970	int slp_max_unroll_factor = cl->slp_max_unroll();
971	if (slp_max_unroll_factor >= future_unroll_cnt) {
972	int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
973	if (new_limit > LoopUnrollLimit) {
974	if (TraceSuperWordLoopUnrollAnalysis) {
975	tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
976	}
977	_local_loop_unroll_limit = new_limit;
978	}
979	}
980	}
981	}
982	}
983
984	//------------------------------policy_align-----------------------------------
985	// Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
986	// expression that does the alignment. Note that only one array base can be
987	// aligned in a loop (unless the VM guarantees mutual alignment). Note that
988	// if we vectorize short memory ops into longer memory ops, we may want to
989	// increase alignment.
990	bool IdealLoopTree::policy_align(PhaseIdealLoop phase) const* {
991	return false;
992	}
993
994	//------------------------------policy_range_check-----------------------------
995	// Return TRUE or FALSE if the loop should be range-check-eliminated or not.
996	// When TRUE, the estimated node budget is also requested.
997	//
998	// We will actually perform iteration-splitting, a more powerful form of RCE.
999	bool IdealLoopTree::policy_range_check(PhaseIdealLoop phase) const* {
1000	if (!RangeCheckElimination) return false;
1001
1002	// If nodes are depleted, some transform has miscalculated its needs.
1003	assert(!phase->exceeding_node_budget(), "sanity");
1004
1005	CountedLoopNode *cl = _head->as_CountedLoop();
1006	// If we unrolled with no intention of doing RCE and we later changed our
1007	// minds, we got no pre-loop. Either we need to make a new pre-loop, or we
1008	// have to disallow RCE.
1009	if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
1010	Node *trip_counter = cl->phi();
1011
1012	// check for vectorized loops, some opts are no longer needed
1013	if (cl->is_unroll_only()) return false;
1014
1015	// Check loop body for tests of trip-counter plus loop-invariant vs
1016	// loop-invariant.
1017	for (uint i = `0`; i < _body.size(); i++) {
1018	Node *iff = _body [i];
1019	if (iff->Opcode() == Op_If \|\|
1020	iff->Opcode() == Op_RangeCheck) { // Test?
1021
1022	// Comparing trip+off vs limit
1023	Node *bol = iff->in(`1`);
1024	if (bol->req() != `2`) {
1025	continue; // dead constant test
1026	}
1027	if (!bol->is_Bool()) {
1028	assert(bol->Opcode() == Op_Conv2B, "predicate check only");
1029	continue;
1030	}
1031	if (bol->as_Bool()->_test._test == BoolTest::ne) {
1032	continue; // not RC
1033	}
1034	Node *cmp = bol->in(`1`);
1035	Node *rc_exp = cmp->in(`1`);
1036	Node *limit = cmp->in(`2`);
1037
1038	Node *limit_c = phase->get_ctrl(limit);
1039	if (limit_c == phase->C->top()) {
1040	return false; // Found dead test on live IF? No RCE!
1041	}
1042	if (is_member(phase->get_loop(limit_c))) {
1043	// Compare might have operands swapped; commute them
1044	rc_exp = cmp->in(`2`);
1045	limit = cmp->in(`1`);
1046	limit_c = phase->get_ctrl(limit);
1047	if (is_member(phase->get_loop(limit_c))) {
1048	continue; // Both inputs are loop varying; cannot RCE
1049	}
1050	}
1051
1052	if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
1053	continue;
1054	}
1055	// Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
1056	// projections. If BOTH are in the loop we need loop unswitching instead
1057	// of iteration splitting.
1058	if (is_loop_exit(iff)) {
1059	// Found valid reason to split iterations (if there is room).
1060	// NOTE: Usually a gross overestimate.
1061	return phase->may_require_nodes(est_loop_clone_sz(`2`));
1062	}
1063	} // End of is IF
1064	}
1065
1066	return false;
1067	}
1068
1069	//------------------------------policy_peel_only-------------------------------
1070	// Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful
1071	// for unrolling loops with NO array accesses.
1072	bool IdealLoopTree::policy_peel_only(PhaseIdealLoop phase) const* {
1073
1074	// If nodes are depleted, some transform has miscalculated its needs.
1075	assert(!phase->exceeding_node_budget(), "sanity");
1076
1077	// check for vectorized loops, any peeling done was already applied
1078	if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
1079	return false;
1080	}
1081
1082	for (uint i = `0`; i < _body.size(); i++) {
1083	if (_body [i]->is_Mem()) {
1084	return false;
1085	}
1086	}
1087	// No memory accesses at all!
1088	return true;
1089	}
1090
1091	//------------------------------clone_up_backedge_goo--------------------------
1092	// If Node n lives in the back_ctrl block and cannot float, we clone a private
1093	// version of n in preheader_ctrl block and return that, otherwise return n.
1094	Node PhaseIdealLoop::clone_up_backedge_goo(Node back_ctrl, Node preheader_ctrl, Node n, VectorSet &visited, Node_Stack &clones) {
1095	if (get_ctrl(n) != back_ctrl) return n;
1096
1097	// Only visit once
1098	if (visited.test_set(n->_idx)) {
1099	Node *x = clones.find(n->_idx);
1100	return (x != NULL) ? x : n;
1101	}
1102
1103	Node x = NULL; // If required, a clone of 'n'*
1104	// Check for 'n' being pinned in the backedge.
1105	if (n->in(`0`) && n->in(`0`) == back_ctrl) {
1106	assert(clones.find(n->_idx) == NULL, "dead loop");
1107	x = n->clone(); // Clone a copy of 'n' to preheader
1108	clones.push(x, n->_idx);
1109	x->set_req(`0`, preheader_ctrl); // Fix x's control input to preheader
1110	}
1111
1112	// Recursive fixup any other input edges into x.
1113	// If there are no changes we can just return 'n', otherwise
1114	// we need to clone a private copy and change it.
1115	for (uint i = `1`; i < n->req(); i++) {
1116	Node *g = clone_up_backedge_goo(back_ctrl, preheader_ctrl, n->in(i), visited, clones);
1117	if (g != n->in(i)) {
1118	if (!x) {
1119	assert(clones.find(n->_idx) == NULL, "dead loop");
1120	x = n->clone();
1121	clones.push(x, n->_idx);
1122	}
1123	x->set_req(i, g);
1124	}
1125	}
1126	if (x) { // x can legally float to pre-header location
1127	register_new_node(x, preheader_ctrl);
1128	return x;
1129	} else { // raise n to cover LCA of uses
1130	set_ctrl(n, find_non_split_ctrl(back_ctrl->in(`0`)));
1131	}
1132	return n;
1133	}
1134
1135	Node* PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
1136	Node* castii = new CastIINode (incr, TypeInt::INT, true);
1137	castii->set_req(`0`, ctrl);
1138	register_new_node(castii, ctrl);
1139	for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
1140	Node* n = incr->fast_out(i);
1141	if (n->is_Phi() && n->in(`0`) == loop) {
1142	int nrep = n->replace_edge(incr, castii);
1143	return castii;
1144	}
1145	}
1146	return NULL;
1147	}
1148
1149	// Make a copy of the skeleton range check predicates before the main
1150	// loop and set the initial value of loop as input. After unrolling,
1151	// the range of values for the induction variable in the main loop can
1152	// fall outside the allowed range of values by the array access (main
1153	// loop is never executed). When that happens, range check
1154	// CastII/ConvI2L nodes cause some data paths to die. For consistency,
1155	// the control paths must die too but the range checks were removed by
1156	// predication. The range checks that we add here guarantee that they do.
1157	void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* start, Node* end,
1158	IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1159	uint dd_main_head) {
1160	if (predicate != NULL) {
1161	IfNode* iff = predicate->in(`0`)->as_If();
1162	ProjNode* uncommon_proj = iff->proj_out(`1` - predicate->as_Proj()->_con);
1163	Node* rgn = uncommon_proj->unique_ctrl_out();
1164	assert(rgn->is_Region() \|\| rgn->is_Call(), "must be a region or call uct");
1165	assert(iff->in(`1`)->in(`1`)->Opcode() == Op_Opaque1, "unexpected predicate shape");
1166	predicate = iff->in(`0`);
1167	Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
1168	Node* prev_proj = current_proj;
1169	while (predicate != NULL && predicate->is_Proj() && predicate->in(`0`)->is_If()) {
1170	iff = predicate->in(`0`)->as_If();
1171	uncommon_proj = iff->proj_out(`1` - predicate->as_Proj()->_con);
1172	if (uncommon_proj->unique_ctrl_out() != rgn)
1173	break;
1174	if (iff->in(`1`)->Opcode() == Op_Opaque4) {
1175	assert(skeleton_predicate_has_opaque(iff), "unexpected");
1176	// Clone the predicate twice and initialize one with the initial
1177	// value of the loop induction variable. Leave the other predicate
1178	// to be initialized when increasing the stride during loop unrolling.
1179	prev_proj = clone_skeleton_predicate(iff, start, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1180	assert(skeleton_predicate_has_opaque(prev_proj->in(`0`)->as_If()) == (start->Opcode() == Op_Opaque1), "");
1181	prev_proj = clone_skeleton_predicate(iff, end, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
1182	assert(skeleton_predicate_has_opaque(prev_proj->in(`0`)->as_If()) == (end->Opcode() == Op_Opaque1), "");
1183	// Remove the skeleton predicate from the pre-loop
1184	_igvn.replace_input_of(iff, `1`, _igvn.intcon(`1`));
1185	}
1186	predicate = predicate->in(`0`)->in(`0`);
1187	}
1188	_igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
1189	set_idom(outer_main_head, prev_proj, dd_main_head);
1190	}
1191	}
1192
1193	static bool skeleton_follow_inputs(Node* n, int op) {
1194	return (n->is_Bool() \|\|
1195	n->is_Cmp() \|\|
1196	op == Op_AndL \|\|
1197	op == Op_OrL \|\|
1198	op == Op_RShiftL \|\|
1199	op == Op_LShiftL \|\|
1200	op == Op_AddL \|\|
1201	op == Op_AddI \|\|
1202	op == Op_MulL \|\|
1203	op == Op_MulI \|\|
1204	op == Op_SubL \|\|
1205	op == Op_SubI \|\|
1206	op == Op_ConvI2L);
1207	}
1208
1209	bool PhaseIdealLoop::skeleton_predicate_has_opaque(IfNode* iff) {
1210	ResourceMark rm;
1211	Unique_Node_List wq;
1212	wq.push(iff->in(`1`)->in(`1`));
1213	for (uint i = `0`; i < wq.size(); i++) {
1214	Node* n = wq.at(i);
1215	int op = n->Opcode();
1216	if (skeleton_follow_inputs(n, op)) {
1217	for (uint j = `1`; j < n->req(); j++) {
1218	Node* m = n->in(j);
1219	if (m != NULL) {
1220	wq.push(m);
1221	}
1222	}
1223	continue;
1224	}
1225	if (op == Op_Opaque1) {
1226	return true;
1227	}
1228	}
1229	return false;
1230	}
1231
1232	Node* PhaseIdealLoop::clone_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,
1233	Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) {
1234	Node_Stack to_clone(`2`);
1235	to_clone.push(iff->in(`1`), `1`);
1236	uint current = C->unique();
1237	Node* result = NULL;
1238	// Look for the opaque node to replace with the new value
1239	// and clone everything in between. We keep the Opaque4 node
1240	// so the duplicated predicates are eliminated once loop
1241	// opts are over: they are here only to keep the IR graph
1242	// consistent.
1243	do {
1244	Node* n = to_clone.node();
1245	uint i = to_clone.index();
1246	Node* m = n->in(i);
1247	int op = m->Opcode();
1248	if (skeleton_follow_inputs(m, op)) {
1249	to_clone.push(m, `1`);
1250	continue;
1251	}
1252	if (op == Op_Opaque1) {
1253	if (n->_idx < current) {
1254	n = n->clone();
1255	}
1256	n->set_req(i, value);
1257	register_new_node(n, current_proj);
1258	to_clone.set_node(n);
1259	}
1260	for (;;) {
1261	Node* cur = to_clone.node();
1262	uint j = to_clone.index();
1263	if (j+`1` < cur->req()) {
1264	to_clone.set_index(j+`1`);
1265	break;
1266	}
1267	to_clone.pop();
1268	if (to_clone.size() == `0`) {
1269	result = cur;
1270	break;
1271	}
1272	Node* next = to_clone.node();
1273	j = to_clone.index();
1274	if (next->in(j) != cur) {
1275	assert(cur->_idx >= current \|\| next->in(j)->Opcode() == Op_Opaque1, "new node or Opaque1 being replaced");
1276	if (next->_idx < current) {
1277	next = next->clone();
1278	register_new_node(next, current_proj);
1279	to_clone.set_node(next);
1280	}
1281	next->set_req(j, cur);
1282	}
1283	}
1284	} while (result == NULL);
1285	assert(result->_idx >= current, "new node expected");
1286
1287	Node* proj = predicate->clone();
1288	Node* other_proj = uncommon_proj->clone();
1289	Node* new_iff = iff->clone();
1290	new_iff->set_req(`1`, result);
1291	proj->set_req(`0`, new_iff);
1292	other_proj->set_req(`0`, new_iff);
1293	Node frame = new* ParmNode (C->start(), TypeFunc::FramePtr);
1294	register_new_node(frame, C->start());
1295	// It's impossible for the predicate to fail at runtime. Use an Halt node.
1296	Node* halt = new HaltNode (other_proj, frame);
1297	C->root()->add_req(halt);
1298	new_iff->set_req(`0`, prev_proj);
1299
1300	register_control(new_iff, outer_loop->_parent, prev_proj);
1301	register_control(proj, outer_loop->_parent, new_iff);
1302	register_control(other_proj, _ltree_root, new_iff);
1303	register_control(halt, _ltree_root, other_proj);
1304	return proj;
1305	}
1306
1307	void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* start, Node* end,
1308	IdealLoopTree* outer_loop, LoopNode* outer_main_head,
1309	uint dd_main_head) {
1310	if (UseLoopPredicate) {
1311	Node* entry = pre_head->in(LoopNode::EntryControl);
1312	Node* predicate = NULL;
1313	predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
1314	if (predicate != NULL) {
1315	entry = skip_loop_predicates(entry);
1316	}
1317	Node* profile_predicate = NULL;
1318	if (UseProfiledLoopPredicate) {
1319	profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
1320	if (profile_predicate != NULL) {
1321	entry = skip_loop_predicates(entry);
1322	}
1323	}
1324	predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
1325	duplicate_predicates_helper(predicate, start, end, outer_loop, outer_main_head, dd_main_head);
1326	duplicate_predicates_helper(profile_predicate, start, end, outer_loop, outer_main_head, dd_main_head);
1327	}
1328	}
1329
1330	//------------------------------insert_pre_post_loops--------------------------
1331	// Insert pre and post loops. If peel_only is set, the pre-loop can not have
1332	// more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
1333	// alignment. Useful to unroll loops that do no array accesses.
1334	void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree loop, Node_List &old_new, bool* peel_only) {
1335
1336	#ifndef PRODUCT
1337	if (TraceLoopOpts) {
1338	if (peel_only)
1339	tty->print("PeelMainPost ");
1340	else
1341	tty->print("PreMainPost ");
1342	loop->dump_head();
1343	}
1344	#endif
1345	C->set_major_progress();
1346
1347	// Find common pieces of the loop being guarded with pre & post loops
1348	CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1349	assert(main_head->is_normal_loop(), "");
1350	CountedLoopEndNode *main_end = main_head->loopexit();
1351	assert(main_end->outcnt() == `2`, "1 true, 1 false path only");
1352
1353	Node *pre_header= main_head->in(LoopNode::EntryControl);
1354	Node *init = main_head->init_trip();
1355	Node *incr = main_end ->incr();
1356	Node *limit = main_end ->limit();
1357	Node *stride = main_end ->stride();
1358	Node *cmp = main_end ->cmp_node();
1359	BoolTest::mask b_test = main_end->test_trip();
1360
1361	// Need only 1 user of 'bol' because I will be hacking the loop bounds.
1362	Node *bol = main_end->in(CountedLoopEndNode::TestValue);
1363	if (bol->outcnt() != `1`) {
1364	bol = bol->clone();
1365	register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
1366	_igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
1367	}
1368	// Need only 1 user of 'cmp' because I will be hacking the loop bounds.
1369	if (cmp->outcnt() != `1`) {
1370	cmp = cmp->clone();
1371	register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
1372	_igvn.replace_input_of(bol, `1`, cmp);
1373	}
1374
1375	// Add the post loop
1376	CountedLoopNode *post_head = NULL;
1377	Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1378
1379	//------------------------------
1380	// Step B: Create Pre-Loop.
1381
1382	// Step B1: Clone the loop body. The clone becomes the pre-loop. The main
1383	// loop pre-header illegally has 2 control users (old & new loops).
1384	LoopNode* outer_main_head = main_head;
1385	IdealLoopTree* outer_loop = loop;
1386	if (main_head->is_strip_mined()) {
1387	main_head->verify_strip_mined(`1`);
1388	outer_main_head = main_head->outer_loop();
1389	outer_loop = loop->_parent;
1390	assert(outer_loop->_head == outer_main_head, "broken loop tree");
1391	}
1392	uint dd_main_head = dom_depth(outer_main_head);
1393	clone_loop(loop, old_new, dd_main_head, ControlAroundStripMined);
1394	CountedLoopNode* pre_head = old_new [main_head->_idx]->as_CountedLoop();
1395	CountedLoopEndNode* pre_end = old_new [main_end ->_idx]->as_CountedLoopEnd();
1396	pre_head->set_pre_loop(main_head);
1397	Node *pre_incr = old_new [incr->_idx];
1398
1399	// Reduce the pre-loop trip count.
1400	pre_end->_prob = PROB_FAIR;
1401
1402	// Find the pre-loop normal exit.
1403	Node* pre_exit = pre_end->proj_out(false);
1404	assert(pre_exit->Opcode() == Op_IfFalse, "");
1405	IfFalseNode new_pre_exit = new* IfFalseNode (pre_end);
1406	_igvn.register_new_node_with_optimizer(new_pre_exit);
1407	set_idom(new_pre_exit, pre_end, dd_main_head);
1408	set_loop(new_pre_exit, outer_loop->_parent);
1409
1410	// Step B2: Build a zero-trip guard for the main-loop. After leaving the
1411	// pre-loop, the main-loop may not execute at all. Later in life this
1412	// zero-trip guard will become the minimum-trip guard when we unroll
1413	// the main-loop.
1414	Node min_opaq = new* Opaque1Node (C, limit);
1415	Node min_cmp = new* CmpINode (pre_incr, min_opaq);
1416	Node min_bol = new* BoolNode (min_cmp, b_test);
1417	register_new_node(min_opaq, new_pre_exit);
1418	register_new_node(min_cmp , new_pre_exit);
1419	register_new_node(min_bol , new_pre_exit);
1420
1421	// Build the IfNode (assume the main-loop is executed always).
1422	IfNode min_iff = new* IfNode (new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN);
1423	_igvn.register_new_node_with_optimizer(min_iff);
1424	set_idom(min_iff, new_pre_exit, dd_main_head);
1425	set_loop(min_iff, outer_loop->_parent);
1426
1427	// Plug in the false-path, taken if we need to skip main-loop
1428	_igvn.hash_delete(pre_exit);
1429	pre_exit->set_req(`0`, min_iff);
1430	set_idom(pre_exit, min_iff, dd_main_head);
1431	set_idom(pre_exit->unique_ctrl_out(), min_iff, dd_main_head);
1432	// Make the true-path, must enter the main loop
1433	Node min_taken = new* IfTrueNode (min_iff);
1434	_igvn.register_new_node_with_optimizer(min_taken);
1435	set_idom(min_taken, min_iff, dd_main_head);
1436	set_loop(min_taken, outer_loop->_parent);
1437	// Plug in the true path
1438	_igvn.hash_delete(outer_main_head);
1439	outer_main_head->set_req(LoopNode::EntryControl, min_taken);
1440	set_idom(outer_main_head, min_taken, dd_main_head);
1441
1442	Arena *a = Thread::current()->resource_area();
1443	VectorSet visited(a);
1444	Node_Stack clones(a, main_head->back_control()->outcnt());
1445	// Step B3: Make the fall-in values to the main-loop come from the
1446	// fall-out values of the pre-loop.
1447	for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
1448	Node* main_phi = main_head->fast_out(i2);
1449	if (main_phi->is_Phi() && main_phi->in(`0`) == main_head && main_phi->outcnt() > `0`) {
1450	Node *pre_phi = old_new [main_phi->_idx];
1451	Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
1452	main_head->skip_strip_mined()->in(LoopNode::EntryControl),
1453	pre_phi->in(LoopNode::LoopBackControl),
1454	visited, clones);
1455	_igvn.hash_delete(main_phi);
1456	main_phi->set_req(LoopNode::EntryControl, fallpre);
1457	}
1458	}
1459
1460	// Nodes inside the loop may be control dependent on a predicate
1461	// that was moved before the preloop. If the back branch of the main
1462	// or post loops becomes dead, those nodes won't be dependent on the
1463	// test that guards that loop nest anymore which could lead to an
1464	// incorrect array access because it executes independently of the
1465	// test that was guarding the loop nest. We add a special CastII on
1466	// the if branch that enters the loop, between the input induction
1467	// variable value and the induction variable Phi to preserve correct
1468	// dependencies.
1469
1470	// CastII for the main loop:
1471	Node* castii = cast_incr_before_loop(pre_incr, min_taken, main_head);
1472	assert(castii != NULL, "no castII inserted");
1473	Node* opaque_castii = new Opaque1Node (C, castii);
1474	register_new_node(opaque_castii, outer_main_head->in(LoopNode::EntryControl));
1475	duplicate_predicates(pre_head, castii, opaque_castii, outer_loop, outer_main_head, dd_main_head);
1476
1477	// Step B4: Shorten the pre-loop to run only 1 iteration (for now).
1478	// RCE and alignment may change this later.
1479	Node *cmp_end = pre_end->cmp_node();
1480	assert(cmp_end->in(`2`) == limit, "");
1481	Node pre_limit = new* AddINode (init, stride);
1482
1483	// Save the original loop limit in this Opaque1 node for
1484	// use by range check elimination.
1485	Node pre_opaq = new* Opaque1Node (C, pre_limit, limit);
1486
1487	register_new_node(pre_limit, pre_head->in(`0`));
1488	register_new_node(pre_opaq , pre_head->in(`0`));
1489
1490	// Since no other users of pre-loop compare, I can hack limit directly
1491	assert(cmp_end->outcnt() == `1`, "no other users");
1492	_igvn.hash_delete(cmp_end);
1493	cmp_end->set_req(`2`, peel_only ? pre_limit : pre_opaq);
1494
1495	// Special case for not-equal loop bounds:
1496	// Change pre loop test, main loop test, and the
1497	// main loop guard test to use lt or gt depending on stride
1498	// direction:
1499	// positive stride use <
1500	// negative stride use >
1501	//
1502	// not-equal test is kept for post loop to handle case
1503	// when init > limit when stride > 0 (and reverse).
1504
1505	if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
1506
1507	BoolTest::mask new_test = (main_end->stride_con() > `0`) ? BoolTest::lt : BoolTest::gt;
1508	// Modify pre loop end condition
1509	Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1510	BoolNode* new_bol0 = new BoolNode (pre_bol->in(`1`), new_test);
1511	register_new_node(new_bol0, pre_head->in(`0`));
1512	_igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
1513	// Modify main loop guard condition
1514	assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
1515	BoolNode* new_bol1 = new BoolNode (min_bol->in(`1`), new_test);
1516	register_new_node(new_bol1, new_pre_exit);
1517	_igvn.hash_delete(min_iff);
1518	min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
1519	// Modify main loop end condition
1520	BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
1521	BoolNode* new_bol2 = new BoolNode (main_bol->in(`1`), new_test);
1522	register_new_node(new_bol2, main_end->in(CountedLoopEndNode::TestControl));
1523	_igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
1524	}
1525
1526	// Flag main loop
1527	main_head->set_main_loop();
1528	if (peel_only) {
1529	main_head->set_main_no_pre_loop();
1530	}
1531
1532	// Subtract a trip count for the pre-loop.
1533	main_head->set_trip_count(main_head->trip_count() - `1`);
1534
1535	// It's difficult to be precise about the trip-counts
1536	// for the pre/post loops. They are usually very short,
1537	// so guess that 4 trips is a reasonable value.
1538	post_head->set_profile_trip_cnt(`4.0`);
1539	pre_head->set_profile_trip_cnt(`4.0`);
1540
1541	// Now force out all loop-invariant dominating tests. The optimizer
1542	// finds some, but we _know_ they are all useless.
1543	peeled_dom_test_elim(loop,old_new);
1544	loop->record_for_igvn();
1545	}
1546
1547	//------------------------------insert_vector_post_loop------------------------
1548	// Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1549	// unroll_policy has already informed us that more unrolling is about to
1550	// happen to the main loop. The resultant post loop will serve as a
1551	// vectorized drain loop.
1552	void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1553	if (!loop->_head->is_CountedLoop()) return;
1554
1555	CountedLoopNode *cl = loop->_head->as_CountedLoop();
1556
1557	// only process vectorized main loops
1558	if (!cl->is_vectorized_loop() \|\| !cl->is_main_loop()) return;
1559
1560	int slp_max_unroll_factor = cl->slp_max_unroll();
1561	int cur_unroll = cl->unrolled_count();
1562
1563	if (slp_max_unroll_factor == `0`) return;
1564
1565	// only process atomic unroll vector loops (not super unrolled after vectorization)
1566	if (cur_unroll != slp_max_unroll_factor) return;
1567
1568	// we only ever process this one time
1569	if (cl->has_atomic_post_loop()) return;
1570
1571	if (!may_require_nodes(loop->est_loop_clone_sz(`2`))) {
1572	return;
1573	}
1574
1575	#ifndef PRODUCT
1576	if (TraceLoopOpts) {
1577	tty->print("PostVector ");
1578	loop->dump_head();
1579	}
1580	#endif
1581	C->set_major_progress();
1582
1583	// Find common pieces of the loop being guarded with pre & post loops
1584	CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1585	CountedLoopEndNode *main_end = main_head->loopexit();
1586	// diagnostic to show loop end is not properly formed
1587	assert(main_end->outcnt() == `2`, "1 true, 1 false path only");
1588
1589	// mark this loop as processed
1590	main_head->mark_has_atomic_post_loop();
1591
1592	Node *incr = main_end->incr();
1593	Node *limit = main_end->limit();
1594
1595	// In this case we throw away the result as we are not using it to connect anything else.
1596	CountedLoopNode *post_head = NULL;
1597	insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1598
1599	// It's difficult to be precise about the trip-counts
1600	// for post loops. They are usually very short,
1601	// so guess that unit vector trips is a reasonable value.
1602	post_head->set_profile_trip_cnt(cur_unroll);
1603
1604	// Now force out all loop-invariant dominating tests. The optimizer
1605	// finds some, but we _know_ they are all useless.
1606	peeled_dom_test_elim(loop, old_new);
1607	loop->record_for_igvn();
1608	}
1609
1610
1611	//-------------------------insert_scalar_rced_post_loop------------------------
1612	// Insert a copy of the rce'd main loop as a post loop,
1613	// We have not unrolled the main loop, so this is the right time to inject this.
1614	// Later we will examine the partner of this post loop pair which still has range checks
1615	// to see inject code which tests at runtime if the range checks are applicable.
1616	void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1617	if (!loop->_head->is_CountedLoop()) return;
1618
1619	CountedLoopNode *cl = loop->_head->as_CountedLoop();
1620
1621	// only process RCE'd main loops
1622	if (!cl->is_main_loop() \|\| cl->range_checks_present()) return;
1623
1624	#ifndef PRODUCT
1625	if (TraceLoopOpts) {
1626	tty->print("PostScalarRce ");
1627	loop->dump_head();
1628	}
1629	#endif
1630	C->set_major_progress();
1631
1632	// Find common pieces of the loop being guarded with pre & post loops
1633	CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1634	CountedLoopEndNode *main_end = main_head->loopexit();
1635	// diagnostic to show loop end is not properly formed
1636	assert(main_end->outcnt() == `2`, "1 true, 1 false path only");
1637
1638	Node *incr = main_end->incr();
1639	Node *limit = main_end->limit();
1640
1641	// In this case we throw away the result as we are not using it to connect anything else.
1642	CountedLoopNode *post_head = NULL;
1643	insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
1644
1645	// It's difficult to be precise about the trip-counts
1646	// for post loops. They are usually very short,
1647	// so guess that unit vector trips is a reasonable value.
1648	post_head->set_profile_trip_cnt(`4.0`);
1649	post_head->set_is_rce_post_loop();
1650
1651	// Now force out all loop-invariant dominating tests. The optimizer
1652	// finds some, but we _know_ they are all useless.
1653	peeled_dom_test_elim(loop, old_new);
1654	loop->record_for_igvn();
1655	}
1656
1657
1658	//------------------------------insert_post_loop-------------------------------
1659	// Insert post loops. Add a post loop to the given loop passed.
1660	Node PhaseIdealLoop::insert_post_loop(IdealLoopTree loop, Node_List &old_new,
1661	CountedLoopNode main_head, CountedLoopEndNode main_end,
1662	Node incr, Node limit, CountedLoopNode *&post_head) {
1663	IfNode* outer_main_end = main_end;
1664	IdealLoopTree* outer_loop = loop;
1665	if (main_head->is_strip_mined()) {
1666	main_head->verify_strip_mined(`1`);
1667	outer_main_end = main_head->outer_loop_end();
1668	outer_loop = loop->_parent;
1669	assert(outer_loop->_head == main_head->in(LoopNode::EntryControl), "broken loop tree");
1670	}
1671
1672	//------------------------------
1673	// Step A: Create a new post-Loop.
1674	Node* main_exit = outer_main_end->proj_out(false);
1675	assert(main_exit->Opcode() == Op_IfFalse, "");
1676	int dd_main_exit = dom_depth(main_exit);
1677
1678	// Step A1: Clone the loop body of main. The clone becomes the post-loop.
1679	// The main loop pre-header illegally has 2 control users (old & new loops).
1680	clone_loop(loop, old_new, dd_main_exit, ControlAroundStripMined);
1681	assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
1682	post_head = old_new [main_head->_idx]->as_CountedLoop();
1683	post_head->set_normal_loop();
1684	post_head->set_post_loop(main_head);
1685
1686	// Reduce the post-loop trip count.
1687	CountedLoopEndNode* post_end = old_new [main_end->_idx]->as_CountedLoopEnd();
1688	post_end->_prob = PROB_FAIR;
1689
1690	// Build the main-loop normal exit.
1691	IfFalseNode new_main_exit = new* IfFalseNode (outer_main_end);
1692	_igvn.register_new_node_with_optimizer(new_main_exit);
1693	set_idom(new_main_exit, outer_main_end, dd_main_exit);
1694	set_loop(new_main_exit, outer_loop->_parent);
1695
1696	// Step A2: Build a zero-trip guard for the post-loop. After leaving the
1697	// main-loop, the post-loop may not execute at all. We 'opaque' the incr
1698	// (the previous loop trip-counter exit value) because we will be changing
1699	// the exit value (via additional unrolling) so we cannot constant-fold away the zero
1700	// trip guard until all unrolling is done.
1701	Node zer_opaq = new* Opaque1Node (C, incr);
1702	Node zer_cmp = new* CmpINode (zer_opaq, limit);
1703	Node zer_bol = new* BoolNode (zer_cmp, main_end->test_trip());
1704	register_new_node(zer_opaq, new_main_exit);
1705	register_new_node(zer_cmp, new_main_exit);
1706	register_new_node(zer_bol, new_main_exit);
1707
1708	// Build the IfNode
1709	IfNode zer_iff = new* IfNode (new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
1710	_igvn.register_new_node_with_optimizer(zer_iff);
1711	set_idom(zer_iff, new_main_exit, dd_main_exit);
1712	set_loop(zer_iff, outer_loop->_parent);
1713
1714	// Plug in the false-path, taken if we need to skip this post-loop
1715	_igvn.replace_input_of(main_exit, `0`, zer_iff);
1716	set_idom(main_exit, zer_iff, dd_main_exit);
1717	set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
1718	// Make the true-path, must enter this post loop
1719	Node zer_taken = new* IfTrueNode (zer_iff);
1720	_igvn.register_new_node_with_optimizer(zer_taken);
1721	set_idom(zer_taken, zer_iff, dd_main_exit);
1722	set_loop(zer_taken, outer_loop->_parent);
1723	// Plug in the true path
1724	_igvn.hash_delete(post_head);
1725	post_head->set_req(LoopNode::EntryControl, zer_taken);
1726	set_idom(post_head, zer_taken, dd_main_exit);
1727
1728	Arena *a = Thread::current()->resource_area();
1729	VectorSet visited(a);
1730	Node_Stack clones(a, main_head->back_control()->outcnt());
1731	// Step A3: Make the fall-in values to the post-loop come from the
1732	// fall-out values of the main-loop.
1733	for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
1734	Node* main_phi = main_head->fast_out(i);
1735	if (main_phi->is_Phi() && main_phi->in(`0`) == main_head && main_phi->outcnt() > `0`) {
1736	Node *cur_phi = old_new [main_phi->_idx];
1737	Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
1738	post_head->init_control(),
1739	main_phi->in(LoopNode::LoopBackControl),
1740	visited, clones);
1741	_igvn.hash_delete(cur_phi);
1742	cur_phi->set_req(LoopNode::EntryControl, fallnew);
1743	}
1744	}
1745
1746	// CastII for the new post loop:
1747	Node* castii = cast_incr_before_loop(zer_opaq->in(`1`), zer_taken, post_head);
1748	assert(castii != NULL, "no castII inserted");
1749
1750	return new_main_exit;
1751	}
1752
1753	//------------------------------is_invariant-----------------------------
1754	// Return true if n is invariant
1755	bool IdealLoopTree::is_invariant(Node* n) const {
1756	Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
1757	if (n_c->is_top()) return false;
1758	return !is_member(_phase->get_loop(n_c));
1759	}
1760
1761	void PhaseIdealLoop::update_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con) {
1762	// Search for skeleton predicates and update them according to the new stride
1763	Node* entry = ctrl;
1764	Node* prev_proj = ctrl;
1765	LoopNode* outer_loop_head = loop_head->skip_strip_mined();
1766	IdealLoopTree* outer_loop = get_loop(outer_loop_head);
1767	while (entry != NULL && entry->is_Proj() && entry->in(`0`)->is_If()) {
1768	IfNode* iff = entry->in(`0`)->as_If();
1769	ProjNode* proj = iff->proj_out(`1` - entry->as_Proj()->_con);
1770	if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
1771	break;
1772	}
1773	if (iff->in(`1`)->Opcode() == Op_Opaque4) {
1774	// Look for predicate with an Opaque1 node that can be used as a template
1775	if (!skeleton_predicate_has_opaque(iff)) {
1776	// No Opaque1 node? It's either the check for the first value
1777	// of the first iteration or the check for the last value of
1778	// the first iteration of an unrolled loop. We can't
1779	// tell. Kill it in any case.
1780	_igvn.replace_input_of(iff, `1`, iff->in(`1`)->in(`2`));
1781	} else {
1782	// Add back the predicate for the value at the beginning of the first entry
1783	prev_proj = clone_skeleton_predicate(iff, init, entry, proj, ctrl, outer_loop, prev_proj);
1784	assert(!skeleton_predicate_has_opaque(prev_proj->in(`0`)->as_If()), "unexpected");
1785	// Compute the value of the loop induction variable at the end of the
1786	// first iteration of the unrolled loop: init + new_stride_con - init_inc
1787	int init_inc = stride_con/loop_head->unrolled_count();
1788	assert(init_inc != `0`, "invalid loop increment");
1789	int new_stride_con = stride_con * `2`;
1790	Node* max_value = _igvn.intcon(new_stride_con - init_inc);
1791	max_value = new AddINode (init, max_value);
1792	register_new_node(max_value, get_ctrl(iff->in(`1`)));
1793	prev_proj = clone_skeleton_predicate(iff, max_value, entry, proj, ctrl, outer_loop, prev_proj);
1794	assert(!skeleton_predicate_has_opaque(prev_proj->in(`0`)->as_If()), "unexpected");
1795	}
1796	}
1797	entry = entry->in(`0`)->in(`0`);
1798	}
1799	if (prev_proj != ctrl) {
1800	_igvn.replace_input_of(outer_loop_head, LoopNode::EntryControl, prev_proj);
1801	set_idom(outer_loop_head, prev_proj, dom_depth(outer_loop_head));
1802	}
1803	}
1804
1805	//------------------------------do_unroll--------------------------------------
1806	// Unroll the loop body one step - make each trip do 2 iterations.
1807	void PhaseIdealLoop::do_unroll(IdealLoopTree loop, Node_List &old_new, bool* adjust_min_trip) {
1808	assert(LoopUnrollLimit, "");
1809	CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
1810	CountedLoopEndNode *loop_end = loop_head->loopexit();
1811	#ifndef PRODUCT
1812	if (PrintOpto && VerifyLoopOptimizations) {
1813	tty->print("Unrolling ");
1814	loop->dump_head();
1815	} else if (TraceLoopOpts) {
1816	if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
1817	tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*`2`, loop_head->trip_count());
1818	} else {
1819	tty->print("Unroll %d ", loop_head->unrolled_count()*`2`);
1820	}
1821	loop->dump_head();
1822	}
1823
1824	if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations \|\| TraceLoopOpts))) {
1825	Arena* arena = Thread::current()->resource_area();
1826	Node_Stack stack(arena, C->live_nodes() >> `2`);
1827	Node_List rpo_list;
1828	VectorSet visited(arena);
1829	visited.set(loop_head->_idx);
1830	rpo(loop_head, stack, visited, rpo_list);
1831	dump(loop, rpo_list.size(), rpo_list);
1832	}
1833	#endif
1834
1835	// Remember loop node count before unrolling to detect
1836	// if rounds of unroll,optimize are making progress
1837	loop_head->set_node_count_before_unroll(loop->_body.size());
1838
1839	Node *ctrl = loop_head->skip_strip_mined()->in(LoopNode::EntryControl);
1840	Node *limit = loop_head->limit();
1841	Node *init = loop_head->init_trip();
1842	Node *stride = loop_head->stride();
1843
1844	Node *opaq = NULL;
1845	if (adjust_min_trip) { // If not maximally unrolling, need adjustment
1846	// Search for zero-trip guard.
1847
1848	// Check the shape of the graph at the loop entry. If an inappropriate
1849	// graph shape is encountered, the compiler bails out loop unrolling;
1850	// compilation of the method will still succeed.
1851	if (!is_canonical_loop_entry(loop_head)) {
1852	return;
1853	}
1854	opaq = loop_head->skip_predicates()->in(`0`)->in(`1`)->in(`1`)->in(`2`);
1855	// Zero-trip test uses an 'opaque' node which is not shared.
1856	assert(opaq->outcnt() == `1` && opaq->in(`1`) == limit, "");
1857	}
1858
1859	C->set_major_progress();
1860
1861	Node* new_limit = NULL;
1862	int stride_con = stride->get_int();
1863	int stride_p = (stride_con > `0`) ? stride_con : -stride_con;
1864	uint old_trip_count = loop_head->trip_count();
1865	// Verify that unroll policy result is still valid.
1866	assert(old_trip_count > `1` &&
1867	(!adjust_min_trip \|\| stride_p <= (`1`<<`3`)*loop_head->unrolled_count()), "sanity");
1868
1869	update_skeleton_predicates(ctrl, loop_head, init, stride_con);
1870
1871	// Adjust loop limit to keep valid iterations number after unroll.
1872	// Use (limit - stride) instead of (((limit - init)/stride) & (-2))stride*
1873	// which may overflow.
1874	if (!adjust_min_trip) {
1875	assert(old_trip_count > `1` && (old_trip_count & `1`) == `0`,
1876	"odd trip count for maximally unroll");
1877	// Don't need to adjust limit for maximally unroll since trip count is even.
1878	} else if (loop_head->has_exact_trip_count() && init->is_Con()) {
1879	// Loop's limit is constant. Loop's init could be constant when pre-loop
1880	// become peeled iteration.
1881	jlong init_con = init->get_int();
1882	// We can keep old loop limit if iterations count stays the same:
1883	// old_trip_count == new_trip_count 2*
1884	// Note: since old_trip_count >= 2 then new_trip_count >= 1
1885	// so we also don't need to adjust zero trip test.
1886	jlong limit_con = limit->get_int();
1887	// (stride_con2) not overflow since stride_con <= 8.*
1888	int new_stride_con = stride_con * `2`;
1889	int stride_m = new_stride_con - (stride_con > `0` ? `1` : -`1`);
1890	jlong trip_count = (limit_con - init_con + stride_m)/new_stride_con;
1891	// New trip count should satisfy next conditions.
1892	assert(trip_count > `0` && (julong)trip_count < (julong)max_juint/`2`, "sanity");
1893	uint new_trip_count = (uint)trip_count;
1894	adjust_min_trip = (old_trip_count != new_trip_count*`2`);
1895	}
1896
1897	if (adjust_min_trip) {
1898	// Step 2: Adjust the trip limit if it is called for.
1899	// The adjustment amount is -stride. Need to make sure if the
1900	// adjustment underflows or overflows, then the main loop is skipped.
1901	Node* cmp = loop_end->cmp_node();
1902	assert(cmp->in(`2`) == limit, "sanity");
1903	assert(opaq != NULL && opaq->in(`1`) == limit, "sanity");
1904
1905	// Verify that policy_unroll result is still valid.
1906	const TypeInt* limit_type = _igvn.type(limit)->is_int();
1907	assert(stride_con > `0` && ((limit_type->_hi - stride_con) < limit_type->_hi) \|\|
1908	stride_con < `0` && ((limit_type->_lo - stride_con) > limit_type->_lo),
1909	"sanity");
1910
1911	if (limit->is_Con()) {
1912	// The check in policy_unroll and the assert above guarantee
1913	// no underflow if limit is constant.
1914	new_limit = _igvn.intcon(limit->get_int() - stride_con);
1915	set_ctrl(new_limit, C->root());
1916	} else {
1917	// Limit is not constant.
1918	if (loop_head->unrolled_count() == `1`) { // only for first unroll
1919	// Separate limit by Opaque node in case it is an incremented
1920	// variable from previous loop to avoid using pre-incremented
1921	// value which could increase register pressure.
1922	// Otherwise reorg_offsets() optimization will create a separate
1923	// Opaque node for each use of trip-counter and as result
1924	// zero trip guard limit will be different from loop limit.
1925	assert(has_ctrl(opaq), "should have it");
1926	Node* opaq_ctrl = get_ctrl(opaq);
1927	limit = new Opaque2Node (C, limit);
1928	register_new_node(limit, opaq_ctrl);
1929	}
1930	if ((stride_con > `0` && (java_subtract(limit_type->_lo, stride_con) < limit_type->_lo)) \|\|
1931	(stride_con < `0` && (java_subtract(limit_type->_hi, stride_con) > limit_type->_hi))) {
1932	// No underflow.
1933	new_limit = new SubINode (limit, stride);
1934	} else {
1935	// (limit - stride) may underflow.
1936	// Clamp the adjustment value with MININT or MAXINT:
1937	//
1938	// new_limit = limit-stride
1939	// if (stride > 0)
1940	// new_limit = (limit < new_limit) ? MININT : new_limit;
1941	// else
1942	// new_limit = (limit > new_limit) ? MAXINT : new_limit;
1943	//
1944	BoolTest::mask bt = loop_end->test_trip();
1945	assert(bt == BoolTest::lt \|\| bt == BoolTest::gt, "canonical test is expected");
1946	Node* adj_max = _igvn.intcon((stride_con > `0`) ? min_jint : max_jint);
1947	set_ctrl(adj_max, C->root());
1948	Node* old_limit = NULL;
1949	Node* adj_limit = NULL;
1950	Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
1951	if (loop_head->unrolled_count() > `1` &&
1952	limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
1953	limit->in(CMoveNode::IfTrue) == adj_max &&
1954	bol->as_Bool()->_test._test == bt &&
1955	bol->in(`1`)->Opcode() == Op_CmpI &&
1956	bol->in(`1`)->in(`2`) == limit->in(CMoveNode::IfFalse)) {
1957	// Loop was unrolled before.
1958	// Optimize the limit to avoid nested CMove:
1959	// use original limit as old limit.
1960	old_limit = bol->in(`1`)->in(`1`);
1961	// Adjust previous adjusted limit.
1962	adj_limit = limit->in(CMoveNode::IfFalse);
1963	adj_limit = new SubINode (adj_limit, stride);
1964	} else {
1965	old_limit = limit;
1966	adj_limit = new SubINode (limit, stride);
1967	}
1968	assert(old_limit != NULL && adj_limit != NULL, "");
1969	register_new_node(adj_limit, ctrl); // adjust amount
1970	Node* adj_cmp = new CmpINode (old_limit, adj_limit);
1971	register_new_node(adj_cmp, ctrl);
1972	Node* adj_bool = new BoolNode (adj_cmp, bt);
1973	register_new_node(adj_bool, ctrl);
1974	new_limit = new CMoveINode (adj_bool, adj_limit, adj_max, TypeInt::INT);
1975	}
1976	register_new_node(new_limit, ctrl);
1977	}
1978
1979	assert(new_limit != NULL, "");
1980	// Replace in loop test.
1981	assert(loop_end->in(`1`)->in(`1`) == cmp, "sanity");
1982	if (cmp->outcnt() == `1` && loop_end->in(`1`)->outcnt() == `1`) {
1983	// Don't need to create new test since only one user.
1984	_igvn.hash_delete(cmp);
1985	cmp->set_req(`2`, new_limit);
1986	} else {
1987	// Create new test since it is shared.
1988	Node* ctrl2 = loop_end->in(`0`);
1989	Node* cmp2 = cmp->clone();
1990	cmp2->set_req(`2`, new_limit);
1991	register_new_node(cmp2, ctrl2);
1992	Node* bol2 = loop_end->in(`1`)->clone();
1993	bol2->set_req(`1`, cmp2);
1994	register_new_node(bol2, ctrl2);
1995	_igvn.replace_input_of(loop_end, `1`, bol2);
1996	}
1997	// Step 3: Find the min-trip test guaranteed before a 'main' loop.
1998	// Make it a 1-trip test (means at least 2 trips).
1999
2000	// Guard test uses an 'opaque' node which is not shared. Hence I
2001	// can edit it's inputs directly. Hammer in the new limit for the
2002	// minimum-trip guard.
2003	assert(opaq->outcnt() == `1`, "");
2004	_igvn.replace_input_of(opaq, `1`, new_limit);
2005	}
2006
2007	// Adjust max trip count. The trip count is intentionally rounded
2008	// down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
2009	// the main, unrolled, part of the loop will never execute as it is protected
2010	// by the min-trip test. See bug 4834191 for a case where we over-unrolled
2011	// and later determined that part of the unrolled loop was dead.
2012	loop_head->set_trip_count(old_trip_count / `2`);
2013
2014	// Double the count of original iterations in the unrolled loop body.
2015	loop_head->double_unrolled_count();
2016
2017	// ---------
2018	// Step 4: Clone the loop body. Move it inside the loop. This loop body
2019	// represents the odd iterations; since the loop trips an even number of
2020	// times its backedge is never taken. Kill the backedge.
2021	uint dd = dom_depth(loop_head);
2022	clone_loop(loop, old_new, dd, IgnoreStripMined);
2023
2024	// Make backedges of the clone equal to backedges of the original.
2025	// Make the fall-in from the original come from the fall-out of the clone.
2026	for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
2027	Node* phi = loop_head->fast_out(j);
2028	if (phi->is_Phi() && phi->in(`0`) == loop_head && phi->outcnt() > `0`) {
2029	Node *newphi = old_new [phi->_idx];
2030	_igvn.hash_delete(phi);
2031	_igvn.hash_delete(newphi);
2032
2033	phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl));
2034	newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl));
2035	phi ->set_req(LoopNode::LoopBackControl, C->top());
2036	}
2037	}
2038	Node *clone_head = old_new [loop_head->_idx];
2039	_igvn.hash_delete(clone_head);
2040	loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl));
2041	clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl));
2042	loop_head ->set_req(LoopNode::LoopBackControl, C->top());
2043	loop->_head = clone_head; // New loop header
2044
2045	set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd);
2046	set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd);
2047
2048	// Kill the clone's backedge
2049	Node *newcle = old_new [loop_end->_idx];
2050	_igvn.hash_delete(newcle);
2051	Node *one = _igvn.intcon(`1`);
2052	set_ctrl(one, C->root());
2053	newcle->set_req(`1`, one);
2054	// Force clone into same loop body
2055	uint max = loop->_body.size();
2056	for (uint k = `0`; k < max; k++) {
2057	Node *old = loop->_body.at(k);
2058	Node *nnn = old_new [old->_idx];
2059	loop->_body.push(nnn);
2060	if (!has_ctrl(old)) {
2061	set_loop(nnn, loop);
2062	}
2063	}
2064
2065	loop->record_for_igvn();
2066	loop_head->clear_strip_mined();
2067
2068	#ifndef PRODUCT
2069	if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations \|\| TraceLoopOpts))) {
2070	tty->print("\nnew loop after unroll\n"); loop->dump_head();
2071	for (uint i = `0`; i < loop->_body.size(); i++) {
2072	loop->_body.at(i)->dump();
2073	}
2074	if (C->clone_map().is_debug()) {
2075	tty->print("\nCloneMap\n");
2076	Dict* dict = C->clone_map().dict();
2077	DictI i(dict);
2078	tty->print_cr("Dict@%p[%d] = ", dict, dict->Size());
2079	for (int ii = `0`; i.test(); ++i, ++ii) {
2080	NodeCloneInfo cl((uint64_t)dict->operator[]((void*)i._key));
2081	tty->print("%d->%d:%d,", (int)(intptr_t)i._key, cl.idx(), cl.gen());
2082	if (ii % `10` == `9`) {
2083	tty->print_cr(" ");
2084	}
2085	}
2086	tty->print_cr(" ");
2087	}
2088	}
2089	#endif
2090	}
2091
2092	//------------------------------do_maximally_unroll----------------------------
2093
2094	void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new) {
2095	CountedLoopNode *cl = loop->_head->as_CountedLoop();
2096	assert(cl->has_exact_trip_count(), "trip count is not exact");
2097	assert(cl->trip_count() > `0`, "");
2098	#ifndef PRODUCT
2099	if (TraceLoopOpts) {
2100	tty->print("MaxUnroll %d ", cl->trip_count());
2101	loop->dump_head();
2102	}
2103	#endif
2104
2105	// If loop is tripping an odd number of times, peel odd iteration
2106	if ((cl->trip_count() & `1`) == `1`) {
2107	do_peeling(loop, old_new);
2108	}
2109
2110	// Now its tripping an even number of times remaining. Double loop body.
2111	// Do not adjust pre-guards; they are not needed and do not exist.
2112	if (cl->trip_count() > `0`) {
2113	assert((cl->trip_count() & `1`) == `0`, "missed peeling");
2114	do_unroll(loop, old_new, false);
2115	}
2116	}
2117
2118	void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
2119	if (SuperWordReductions == false) return;
2120
2121	CountedLoopNode* loop_head = loop->_head->as_CountedLoop();
2122	if (loop_head->unrolled_count() > `1`) {
2123	return;
2124	}
2125
2126	Node* trip_phi = loop_head->phi();
2127	for (DUIterator_Fast imax, i = loop_head->fast_outs(imax); i < imax; i++) {
2128	Node* phi = loop_head->fast_out(i);
2129	if (phi->is_Phi() && phi->outcnt() > `0` && phi != trip_phi) {
2130	// For definitions which are loop inclusive and not tripcounts.
2131	Node* def_node = phi->in(LoopNode::LoopBackControl);
2132
2133	if (def_node != NULL) {
2134	Node* n_ctrl = get_ctrl(def_node);
2135	if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
2136	// Now test it to see if it fits the standard pattern for a reduction operator.
2137	int opc = def_node->Opcode();
2138	if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())
2139	\|\| opc == Op_MinD \|\| opc == Op_MinF \|\| opc == Op_MaxD \|\| opc == Op_MaxF) {
2140	if (!def_node->is_reduction()) { // Not marked yet
2141	// To be a reduction, the arithmetic node must have the phi as input and provide a def to it
2142	bool ok = false;
2143	for (unsigned j = `1`; j < def_node->req(); j++) {
2144	Node* in = def_node->in(j);
2145	if (in == phi) {
2146	ok = true;
2147	break;
2148	}
2149	}
2150
2151	// do nothing if we did not match the initial criteria
2152	if (ok == false) {
2153	continue;
2154	}
2155
2156	// The result of the reduction must not be used in the loop
2157	for (DUIterator_Fast imax, i = def_node->fast_outs(imax); i < imax && ok; i++) {
2158	Node* u = def_node->fast_out(i);
2159	if (!loop->is_member(get_loop(ctrl_or_self(u)))) {
2160	continue;
2161	}
2162	if (u == phi) {
2163	continue;
2164	}
2165	ok = false;
2166	}
2167
2168	// iff the uses conform
2169	if (ok) {
2170	def_node->add_flag(Node::Flag_is_reduction);
2171	loop_head->mark_has_reductions();
2172	}
2173	}
2174	}
2175	}
2176	}
2177	}
2178	}
2179	}
2180
2181	//------------------------------adjust_limit-----------------------------------
2182	// Helper function for add_constraint().
2183	Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node offset, Node rc_limit, Node loop_limit, Node pre_ctrl, bool round_up) {
2184	// Compute "I :: (limit-offset)/scale"
2185	Node con = new* SubINode (rc_limit, offset);
2186	register_new_node(con, pre_ctrl);
2187	Node X = new* DivINode (`0`, con, scale);
2188	register_new_node(X, pre_ctrl);
2189
2190	// When the absolute value of scale is greater than one, the integer
2191	// division may round limit down so add one to the limit.
2192	if (round_up) {
2193	X = new AddINode (X, _igvn.intcon(`1`));
2194	register_new_node(X, pre_ctrl);
2195	}
2196
2197	// Adjust loop limit
2198	loop_limit = (stride_con > `0`)
2199	? (Node)(new* MinINode (loop_limit, X))
2200	: (Node)(new* MaxINode (loop_limit, X));
2201	register_new_node(loop_limit, pre_ctrl);
2202	return loop_limit;
2203	}
2204
2205	//------------------------------add_constraint---------------------------------
2206	// Constrain the main loop iterations so the conditions:
2207	// low_limit <= scale_con I + offset < upper_limit*
2208	// always holds true. That is, either increase the number of iterations in
2209	// the pre-loop or the post-loop until the condition holds true in the main
2210	// loop. Stride, scale, offset and limit are all loop invariant. Further,
2211	// stride and scale are constants (offset and limit often are).
2212	void PhaseIdealLoop::add_constraint(int stride_con, int scale_con, Node offset, Node low_limit, Node upper_limit, Node pre_ctrl, Node pre_limit, Node main_limit) {
2213	// For positive stride, the pre-loop limit always uses a MAX function
2214	// and the main loop a MIN function. For negative stride these are
2215	// reversed.
2216
2217	// Also for positive stridescale the affine function is increasing, so the*
2218	// pre-loop must check for underflow and the post-loop for overflow.
2219	// Negative stridescale reverses this; pre-loop checks for overflow and*
2220	// post-loop for underflow.
2221
2222	Node *scale = _igvn.intcon(scale_con);
2223	set_ctrl(scale, C->root());
2224
2225	if ((stride_con^scale_con) >= `0`) { // Use XOR to avoid overflow
2226	// The overflow limit: scaleI+offset < upper_limit*
2227	// For main-loop compute
2228	// ( if (scale > 0) / and stride > 0 /
2229	// I < (upper_limit-offset)/scale
2230	// else / scale < 0 and stride < 0 /
2231	// I > (upper_limit-offset)/scale
2232	// )
2233	//
2234	// (upper_limit-offset) may overflow or underflow.
2235	// But it is fine since main loop will either have
2236	// less iterations or will be skipped in such case.
2237	main_limit = adjust_limit(stride_con, scale, offset, upper_limit, main_limit, pre_ctrl, false);
2238
2239	// The underflow limit: low_limit <= scaleI+offset.*
2240	// For pre-loop compute
2241	// NOT(scaleI+offset >= low_limit)*
2242	// scaleI+offset < low_limit*
2243	// ( if (scale > 0) / and stride > 0 /
2244	// I < (low_limit-offset)/scale
2245	// else / scale < 0 and stride < 0 /
2246	// I > (low_limit-offset)/scale
2247	// )
2248
2249	if (low_limit->get_int() == -max_jint) {
2250	// We need this guard when scalepre_limit+offset >= limit*
2251	// due to underflow. So we need execute pre-loop until
2252	// scaleI+offset >= min_int. But (min_int-offset) will*
2253	// underflow when offset > 0 and X will be > original_limit
2254	// when stride > 0. To avoid it we replace positive offset with 0.
2255	//
2256	// Also (min_int+1 == -max_int) is used instead of min_int here
2257	// to avoid problem with scale == -1 (min_int/(-1) == min_int).
2258	Node* shift = _igvn.intcon(`31`);
2259	set_ctrl(shift, C->root());
2260	Node* sign = new RShiftINode (offset, shift);
2261	register_new_node(sign, pre_ctrl);
2262	offset = new AndINode (offset, sign);
2263	register_new_node(offset, pre_ctrl);
2264	} else {
2265	assert(low_limit->get_int() == `0`, "wrong low limit for range check");
2266	// The only problem we have here when offset == min_int
2267	// since (0-min_int) == min_int. It may be fine for stride > 0
2268	// but for stride < 0 X will be < original_limit. To avoid it
2269	// max(pre_limit, original_limit) is used in do_range_check().
2270	}
2271	// Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
2272	pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, pre_limit, pre_ctrl,
2273	scale_con > `1` && stride_con > `0`);
2274
2275	} else { // stride_conscale_con < 0*
2276	// For negative stridescale pre-loop checks for overflow and*
2277	// post-loop for underflow.
2278	//
2279	// The overflow limit: scaleI+offset < upper_limit*
2280	// For pre-loop compute
2281	// NOT(scaleI+offset < upper_limit)*
2282	// scaleI+offset >= upper_limit*
2283	// scaleI+offset+1 > upper_limit*
2284	// ( if (scale < 0) / and stride > 0 /
2285	// I < (upper_limit-(offset+1))/scale
2286	// else / scale > 0 and stride < 0 /
2287	// I > (upper_limit-(offset+1))/scale
2288	// )
2289	//
2290	// (upper_limit-offset-1) may underflow or overflow.
2291	// To avoid it min(pre_limit, original_limit) is used
2292	// in do_range_check() for stride > 0 and max() for < 0.
2293	Node *one = _igvn.intcon(`1`);
2294	set_ctrl(one, C->root());
2295
2296	Node plus_one = new* AddINode (offset, one);
2297	register_new_node(plus_one, pre_ctrl);
2298	// Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
2299	pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, pre_limit, pre_ctrl,
2300	scale_con < -`1` && stride_con > `0`);
2301
2302	if (low_limit->get_int() == -max_jint) {
2303	// We need this guard when scalemain_limit+offset >= limit*
2304	// due to underflow. So we need execute main-loop while
2305	// scaleI+offset+1 > min_int. But (min_int-offset-1) will*
2306	// underflow when (offset+1) > 0 and X will be < main_limit
2307	// when scale < 0 (and stride > 0). To avoid it we replace
2308	// positive (offset+1) with 0.
2309	//
2310	// Also (min_int+1 == -max_int) is used instead of min_int here
2311	// to avoid problem with scale == -1 (min_int/(-1) == min_int).
2312	Node* shift = _igvn.intcon(`31`);
2313	set_ctrl(shift, C->root());
2314	Node* sign = new RShiftINode (plus_one, shift);
2315	register_new_node(sign, pre_ctrl);
2316	plus_one = new AndINode (plus_one, sign);
2317	register_new_node(plus_one, pre_ctrl);
2318	} else {
2319	assert(low_limit->get_int() == `0`, "wrong low limit for range check");
2320	// The only problem we have here when offset == max_int
2321	// since (max_int+1) == min_int and (0-min_int) == min_int.
2322	// But it is fine since main loop will either have
2323	// less iterations or will be skipped in such case.
2324	}
2325	// The underflow limit: low_limit <= scaleI+offset.*
2326	// For main-loop compute
2327	// scaleI+offset+1 > low_limit*
2328	// ( if (scale < 0) / and stride > 0 /
2329	// I < (low_limit-(offset+1))/scale
2330	// else / scale > 0 and stride < 0 /
2331	// I > (low_limit-(offset+1))/scale
2332	// )
2333
2334	main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, main_limit, pre_ctrl,
2335	false);
2336	}
2337	}
2338
2339
2340	//------------------------------is_scaled_iv---------------------------------
2341	// Return true if exp is a constant times an induction var
2342	bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
2343	if (exp == iv) {
2344	if (p_scale != NULL) {
2345	*p_scale = `1`;
2346	}
2347	return true;
2348	}
2349	int opc = exp->Opcode();
2350	if (opc == Op_MulI) {
2351	if (exp->in(`1`) == iv && exp->in(`2`)->is_Con()) {
2352	if (p_scale != NULL) {
2353	*p_scale = exp->in(`2`)->get_int();
2354	}
2355	return true;
2356	}
2357	if (exp->in(`2`) == iv && exp->in(`1`)->is_Con()) {
2358	if (p_scale != NULL) {
2359	*p_scale = exp->in(`1`)->get_int();
2360	}
2361	return true;
2362	}
2363	} else if (opc == Op_LShiftI) {
2364	if (exp->in(`1`) == iv && exp->in(`2`)->is_Con()) {
2365	if (p_scale != NULL) {
2366	*p_scale = `1` << exp->in(`2`)->get_int();
2367	}
2368	return true;
2369	}
2370	}
2371	return false;
2372	}
2373
2374	//-----------------------------is_scaled_iv_plus_offset------------------------------
2375	// Return true if exp is a simple induction variable expression: k1iv + (invar + k2)*
2376	bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
2377	if (is_scaled_iv(exp, iv, p_scale)) {
2378	if (p_offset != NULL) {
2379	Node *zero = _igvn.intcon(`0`);
2380	set_ctrl(zero, C->root());
2381	*p_offset = zero;
2382	}
2383	return true;
2384	}
2385	int opc = exp->Opcode();
2386	if (opc == Op_AddI) {
2387	if (is_scaled_iv(exp->in(`1`), iv, p_scale)) {
2388	if (p_offset != NULL) {
2389	*p_offset = exp->in(`2`);
2390	}
2391	return true;
2392	}
2393	if (is_scaled_iv(exp->in(`2`), iv, p_scale)) {
2394	if (p_offset != NULL) {
2395	*p_offset = exp->in(`1`);
2396	}
2397	return true;
2398	}
2399	if (exp->in(`2`)->is_Con()) {
2400	Node* offset2 = NULL;
2401	if (depth < `2` &&
2402	is_scaled_iv_plus_offset(exp->in(`1`), iv, p_scale,
2403	p_offset != NULL ? &offset2 : NULL, depth+`1`)) {
2404	if (p_offset != NULL) {
2405	Node *ctrl_off2 = get_ctrl(offset2);
2406	Node* offset = new AddINode (offset2, exp->in(`2`));
2407	register_new_node(offset, ctrl_off2);
2408	*p_offset = offset;
2409	}
2410	return true;
2411	}
2412	}
2413	} else if (opc == Op_SubI) {
2414	if (is_scaled_iv(exp->in(`1`), iv, p_scale)) {
2415	if (p_offset != NULL) {
2416	Node *zero = _igvn.intcon(`0`);
2417	set_ctrl(zero, C->root());
2418	Node *ctrl_off = get_ctrl(exp->in(`2`));
2419	Node* offset = new SubINode (zero, exp->in(`2`));
2420	register_new_node(offset, ctrl_off);
2421	*p_offset = offset;
2422	}
2423	return true;
2424	}
2425	if (is_scaled_iv(exp->in(`2`), iv, p_scale)) {
2426	if (p_offset != NULL) {
2427	p_scale = -`1`;
2428	*p_offset = exp->in(`1`);
2429	}
2430	return true;
2431	}
2432	}
2433	return false;
2434	}
2435
2436	// Same as PhaseIdealLoop::duplicate_predicates() but for range checks
2437	// eliminated by iteration splitting.
2438	Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
2439	Node* predicate_proj, int scale_con, Node* offset,
2440	Node* limit, jint stride_con, Node* value) {
2441	bool overflow = false;
2442	BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, value, NULL, stride_con, limit, (stride_con > `0`) != (scale_con > `0`), overflow);
2443	Node* opaque_bol = new Opaque4Node (C, bol, _igvn.intcon(`1`));
2444	register_new_node(opaque_bol, predicate_proj);
2445	IfNode* new_iff = NULL;
2446	if (overflow) {
2447	new_iff = new IfNode (predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2448	} else {
2449	new_iff = new RangeCheckNode (predicate_proj, opaque_bol, PROB_MAX, COUNT_UNKNOWN);
2450	}
2451	register_control(new_iff, loop->_parent, predicate_proj);
2452	Node* iffalse = new IfFalseNode (new_iff);
2453	register_control(iffalse, _ltree_root, new_iff);
2454	ProjNode* iftrue = new IfTrueNode (new_iff);
2455	register_control(iftrue, loop->_parent, new_iff);
2456	Node frame = new* ParmNode (C->start(), TypeFunc::FramePtr);
2457	register_new_node(frame, C->start());
2458	Node* halt = new HaltNode (iffalse, frame);
2459	register_control(halt, _ltree_root, iffalse);
2460	C->root()->add_req(halt);
2461	return iftrue;
2462	}
2463
2464	//------------------------------do_range_check---------------------------------
2465	// Eliminate range-checks and other trip-counter vs loop-invariant tests.
2466	int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
2467	#ifndef PRODUCT
2468	if (PrintOpto && VerifyLoopOptimizations) {
2469	tty->print("Range Check Elimination ");
2470	loop->dump_head();
2471	} else if (TraceLoopOpts) {
2472	tty->print("RangeCheck ");
2473	loop->dump_head();
2474	}
2475	#endif
2476
2477	assert(RangeCheckElimination, "");
2478	CountedLoopNode *cl = loop->_head->as_CountedLoop();
2479	// If we fail before trying to eliminate range checks, set multiversion state
2480	int closed_range_checks = `1`;
2481
2482	// protect against stride not being a constant
2483	if (!cl->stride_is_con()) {
2484	return closed_range_checks;
2485	}
2486	// Find the trip counter; we are iteration splitting based on it
2487	Node *trip_counter = cl->phi();
2488	// Find the main loop limit; we will trim it's iterations
2489	// to not ever trip end tests
2490	Node *main_limit = cl->limit();
2491
2492	// Check graph shape. Cannot optimize a loop if zero-trip
2493	// Opaque1 node is optimized away and then another round
2494	// of loop opts attempted.
2495	if (!is_canonical_loop_entry(cl)) {
2496	return closed_range_checks;
2497	}
2498
2499	// Need to find the main-loop zero-trip guard
2500	Node *ctrl = cl->skip_predicates();
2501	Node *iffm = ctrl->in(`0`);
2502	Node *opqzm = iffm->in(`1`)->in(`1`)->in(`2`);
2503	assert(opqzm->in(`1`) == main_limit, "do not understand situation");
2504
2505	// Find the pre-loop limit; we will expand its iterations to
2506	// not ever trip low tests.
2507	Node *p_f = iffm->in(`0`);
2508	// pre loop may have been optimized out
2509	if (p_f->Opcode() != Op_IfFalse) {
2510	return closed_range_checks;
2511	}
2512	CountedLoopEndNode *pre_end = p_f->in(`0`)->as_CountedLoopEnd();
2513	assert(pre_end->loopnode()->is_pre_loop(), "");
2514	Node *pre_opaq1 = pre_end->limit();
2515	// Occasionally it's possible for a pre-loop Opaque1 node to be
2516	// optimized away and then another round of loop opts attempted.
2517	// We can not optimize this particular loop in that case.
2518	if (pre_opaq1->Opcode() != Op_Opaque1) {
2519	return closed_range_checks;
2520	}
2521	Opaque1Node pre_opaq = (Opaque1Node)pre_opaq1;
2522	Node *pre_limit = pre_opaq->in(`1`);
2523
2524	// Where do we put new limit calculations
2525	Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
2526
2527	// Ensure the original loop limit is available from the
2528	// pre-loop Opaque1 node.
2529	Node *orig_limit = pre_opaq->original_loop_limit();
2530	if (orig_limit == NULL \|\| _igvn.type(orig_limit) == Type::TOP) {
2531	return closed_range_checks;
2532	}
2533	// Must know if its a count-up or count-down loop
2534
2535	int stride_con = cl->stride_con();
2536	Node *zero = _igvn.intcon(`0`);
2537	Node *one = _igvn.intcon(`1`);
2538	// Use symmetrical int range [-max_jint,max_jint]
2539	Node *mini = _igvn.intcon(-max_jint);
2540	set_ctrl(zero, C->root());
2541	set_ctrl(one, C->root());
2542	set_ctrl(mini, C->root());
2543
2544	// Range checks that do not dominate the loop backedge (ie.
2545	// conditionally executed) can lengthen the pre loop limit beyond
2546	// the original loop limit. To prevent this, the pre limit is
2547	// (for stride > 0) MINed with the original loop limit (MAXed
2548	// stride < 0) when some range_check (rc) is conditionally
2549	// executed.
2550	bool conditional_rc = false;
2551
2552	// Count number of range checks and reduce by load range limits, if zero,
2553	// the loop is in canonical form to multiversion.
2554	closed_range_checks = `0`;
2555
2556	Node* predicate_proj = cl->skip_strip_mined()->in(LoopNode::EntryControl);
2557	assert(predicate_proj->is_Proj() && predicate_proj->in(`0`)->is_If(), "if projection only");
2558
2559	// Check loop body for tests of trip-counter plus loop-invariant vs loop-variant.
2560	for (uint i = `0`; i < loop->_body.size(); i++) {
2561	Node *iff = loop->_body [i];
2562	if (iff->Opcode() == Op_If \|\|
2563	iff->Opcode() == Op_RangeCheck) { // Test?
2564	// Test is an IfNode, has 2 projections. If BOTH are in the loop
2565	// we need loop unswitching instead of iteration splitting.
2566	closed_range_checks++;
2567	Node *exit = loop->is_loop_exit(iff);
2568	if (!exit) continue;
2569	int flip = (exit->Opcode() == Op_IfTrue) ? `1` : `0`;
2570
2571	// Get boolean condition to test
2572	Node *i1 = iff->in(`1`);
2573	if (!i1->is_Bool()) continue;
2574	BoolNode *bol = i1->as_Bool();
2575	BoolTest b_test = bol->_test;
2576	// Flip sense of test if exit condition is flipped
2577	if (flip) {
2578	b_test = b_test.negate();
2579	}
2580	// Get compare
2581	Node *cmp = bol->in(`1`);
2582
2583	// Look for trip_counter + offset vs limit
2584	Node *rc_exp = cmp->in(`1`);
2585	Node *limit = cmp->in(`2`);
2586	int scale_con= `1`; // Assume trip counter not scaled
2587
2588	Node *limit_c = get_ctrl(limit);
2589	if (loop->is_member(get_loop(limit_c))) {
2590	// Compare might have operands swapped; commute them
2591	b_test = b_test.commute();
2592	rc_exp = cmp->in(`2`);
2593	limit = cmp->in(`1`);
2594	limit_c = get_ctrl(limit);
2595	if (loop->is_member(get_loop(limit_c))) {
2596	continue; // Both inputs are loop varying; cannot RCE
2597	}
2598	}
2599	// Here we know 'limit' is loop invariant
2600
2601	// 'limit' maybe pinned below the zero trip test (probably from a
2602	// previous round of rce), in which case, it can't be used in the
2603	// zero trip test expression which must occur before the zero test's if.
2604	if (is_dominator(ctrl, limit_c)) {
2605	continue; // Don't rce this check but continue looking for other candidates.
2606	}
2607
2608	// Check for scaled induction variable plus an offset
2609	Node *offset = NULL;
2610
2611	if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
2612	continue;
2613	}
2614
2615	Node *offset_c = get_ctrl(offset);
2616	if (loop->is_member(get_loop(offset_c))) {
2617	continue; // Offset is not really loop invariant
2618	}
2619	// Here we know 'offset' is loop invariant.
2620
2621	// As above for the 'limit', the 'offset' maybe pinned below the
2622	// zero trip test.
2623	if (is_dominator(ctrl, offset_c)) {
2624	continue; // Don't rce this check but continue looking for other candidates.
2625	}
2626	#ifdef ASSERT
2627	if (TraceRangeLimitCheck) {
2628	tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
2629	bol->dump(`2`);
2630	}
2631	#endif
2632	// At this point we have the expression as:
2633	// scale_con trip_counter + offset :: limit*
2634	// where scale_con, offset and limit are loop invariant. Trip_counter
2635	// monotonically increases by stride_con, a constant. Both (or either)
2636	// stride_con and scale_con can be negative which will flip about the
2637	// sense of the test.
2638
2639	// Adjust pre and main loop limits to guard the correct iteration set
2640	if (cmp->Opcode() == Op_CmpU) { // Unsigned compare is really 2 tests
2641	if (b_test._test == BoolTest::lt) { // Range checks always use lt
2642	// The underflow and overflow limits: 0 <= scaleI+offset < limit*
2643	add_constraint(stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
2644	// (0-offset)/scale could be outside of loop iterations range.
2645	conditional_rc = true;
2646	Node* init = cl->init_trip();
2647	Node* opaque_init = new Opaque1Node (C, init);
2648	register_new_node(opaque_init, predicate_proj);
2649	// template predicate so it can be updated on next unrolling
2650	predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, opaque_init);
2651	assert(skeleton_predicate_has_opaque(predicate_proj->in(`0`)->as_If()), "unexpected");
2652	// predicate on first value of first iteration
2653	predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, init);
2654	assert(!skeleton_predicate_has_opaque(predicate_proj->in(`0`)->as_If()), "unexpected");
2655	int init_inc = stride_con/cl->unrolled_count();
2656	assert(init_inc != `0`, "invalid loop increment");
2657	Node* max_value = _igvn.intcon(stride_con - init_inc);
2658	max_value = new AddINode (init, max_value);
2659	register_new_node(max_value, predicate_proj);
2660	// predicate on last value of first iteration (in case unrolling has already happened)
2661	predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con, max_value);
2662	assert(!skeleton_predicate_has_opaque(predicate_proj->in(`0`)->as_If()), "unexpected");
2663	} else {
2664	if (PrintOpto) {
2665	tty->print_cr("missed RCE opportunity");
2666	}
2667	continue; // In release mode, ignore it
2668	}
2669	} else { // Otherwise work on normal compares
2670	switch(b_test._test) {
2671	case BoolTest::gt:
2672	// Fall into GE case
2673	case BoolTest::ge:
2674	// Convert (Iscale+offset) >= Limit to (I(-scale)+(-offset)) <= -Limit
2675	scale_con = -scale_con;
2676	offset = new SubINode (zero, offset);
2677	register_new_node(offset, pre_ctrl);
2678	limit = new SubINode (zero, limit);
2679	register_new_node(limit, pre_ctrl);
2680	// Fall into LE case
2681	case BoolTest::le:
2682	if (b_test._test != BoolTest::gt) {
2683	// Convert X <= Y to X < Y+1
2684	limit = new AddINode (limit, one);
2685	register_new_node(limit, pre_ctrl);
2686	}
2687	// Fall into LT case
2688	case BoolTest::lt:
2689	// The underflow and overflow limits: MIN_INT <= scaleI+offset < limit*
2690	// Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
2691	// to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
2692	add_constraint(stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
2693	// ((MIN_INT+1)-offset)/scale could be outside of loop iterations range.
2694	// Note: negative offset is replaced with 0 but (MIN_INT+1)/scale could
2695	// still be outside of loop range.
2696	conditional_rc = true;
2697	break;
2698	default:
2699	if (PrintOpto) {
2700	tty->print_cr("missed RCE opportunity");
2701	}
2702	continue; // Unhandled case
2703	}
2704	}
2705
2706	// Kill the eliminated test
2707	C->set_major_progress();
2708	Node *kill_con = _igvn.intcon(`1`-flip);
2709	set_ctrl(kill_con, C->root());
2710	_igvn.replace_input_of(iff, `1`, kill_con);
2711	// Find surviving projection
2712	assert(iff->is_If(), "");
2713	ProjNode* dp = ((IfNode*)iff)->proj_out(`1`-flip);
2714	// Find loads off the surviving projection; remove their control edge
2715	for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
2716	Node* cd = dp->fast_out(i); // Control-dependent node
2717	if (cd->is_Load() && cd->depends_only_on_test()) { // Loads can now float around in the loop
2718	// Allow the load to float around in the loop, or before it
2719	// but NOT before the pre-loop.
2720	_igvn.replace_input_of(cd, `0`, ctrl); // ctrl, not NULL
2721	--i;
2722	--imax;
2723	}
2724	}
2725	if (limit->Opcode() == Op_LoadRange) {
2726	closed_range_checks--;
2727	}
2728	} // End of is IF
2729	}
2730	if (predicate_proj != cl->skip_strip_mined()->in(LoopNode::EntryControl)) {
2731	_igvn.replace_input_of(cl->skip_strip_mined(), LoopNode::EntryControl, predicate_proj);
2732	set_idom(cl->skip_strip_mined(), predicate_proj, dom_depth(cl->skip_strip_mined()));
2733	}
2734
2735	// Update loop limits
2736	if (conditional_rc) {
2737	pre_limit = (stride_con > `0`) ? (Node)new* MinINode (pre_limit, orig_limit)
2738	: (Node)new* MaxINode (pre_limit, orig_limit);
2739	register_new_node(pre_limit, pre_ctrl);
2740	}
2741	_igvn.replace_input_of(pre_opaq, `1`, pre_limit);
2742
2743	// Note:: we are making the main loop limit no longer precise;
2744	// need to round up based on stride.
2745	cl->set_nonexact_trip_count();
2746	Node *main_cle = cl->loopexit();
2747	Node *main_bol = main_cle->in(`1`);
2748	// Hacking loop bounds; need private copies of exit test
2749	if (main_bol->outcnt() > `1`) { // BoolNode shared?
2750	main_bol = main_bol->clone(); // Clone a private BoolNode
2751	register_new_node(main_bol, main_cle->in(`0`));
2752	_igvn.replace_input_of(main_cle, `1`, main_bol);
2753	}
2754	Node *main_cmp = main_bol->in(`1`);
2755	if (main_cmp->outcnt() > `1`) { // CmpNode shared?
2756	main_cmp = main_cmp->clone(); // Clone a private CmpNode
2757	register_new_node(main_cmp, main_cle->in(`0`));
2758	_igvn.replace_input_of(main_bol, `1`, main_cmp);
2759	}
2760	// Hack the now-private loop bounds
2761	_igvn.replace_input_of(main_cmp, `2`, main_limit);
2762	// The OpaqueNode is unshared by design
2763	assert(opqzm->outcnt() == `1`, "cannot hack shared node");
2764	_igvn.replace_input_of(opqzm, `1`, main_limit);
2765
2766	return closed_range_checks;
2767	}
2768
2769	//------------------------------has_range_checks-------------------------------
2770	// Check to see if RCE cleaned the current loop of range-checks.
2771	void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) {
2772	assert(RangeCheckElimination, "");
2773
2774	// skip if not a counted loop
2775	if (!loop->is_counted()) return;
2776
2777	CountedLoopNode *cl = loop->_head->as_CountedLoop();
2778
2779	// skip this loop if it is already checked
2780	if (cl->has_been_range_checked()) return;
2781
2782	// Now check for existence of range checks
2783	for (uint i = `0`; i < loop->_body.size(); i++) {
2784	Node *iff = loop->_body [i];
2785	int iff_opc = iff->Opcode();
2786	if (iff_opc == Op_If \|\| iff_opc == Op_RangeCheck) {
2787	cl->mark_has_range_checks();
2788	break;
2789	}
2790	}
2791	cl->set_has_been_range_checked();
2792	}
2793
2794	//-------------------------multi_version_post_loops----------------------------
2795	// Check the range checks that remain, if simple, use the bounds to guard
2796	// which version to a post loop we execute, one with range checks or one without
2797	bool PhaseIdealLoop::multi_version_post_loops(IdealLoopTree rce_loop, IdealLoopTree legacy_loop) {
2798	bool multi_version_succeeded = false;
2799	assert(RangeCheckElimination, "");
2800	CountedLoopNode *legacy_cl = legacy_loop->_head->as_CountedLoop();
2801	assert(legacy_cl->is_post_loop(), "");
2802
2803	// Check for existence of range checks using the unique instance to make a guard with
2804	Unique_Node_List worklist;
2805	for (uint i = `0`; i < legacy_loop->_body.size(); i++) {
2806	Node *iff = legacy_loop->_body [i];
2807	int iff_opc = iff->Opcode();
2808	if (iff_opc == Op_If \|\| iff_opc == Op_RangeCheck) {
2809	worklist.push(iff);
2810	}
2811	}
2812
2813	// Find RCE'd post loop so that we can stage its guard.
2814	if (!is_canonical_loop_entry(legacy_cl)) return multi_version_succeeded;
2815	Node* ctrl = legacy_cl->in(LoopNode::EntryControl);
2816	Node* iffm = ctrl->in(`0`);
2817
2818	// Now we test that both the post loops are connected
2819	Node* post_loop_region = iffm->in(`0`);
2820	if (post_loop_region == NULL) return multi_version_succeeded;
2821	if (!post_loop_region->is_Region()) return multi_version_succeeded;
2822	Node* covering_region = post_loop_region->in(RegionNode::Control+`1`);
2823	if (covering_region == NULL) return multi_version_succeeded;
2824	if (!covering_region->is_Region()) return multi_version_succeeded;
2825	Node* p_f = covering_region->in(RegionNode::Control);
2826	if (p_f == NULL) return multi_version_succeeded;
2827	if (!p_f->is_IfFalse()) return multi_version_succeeded;
2828	if (!p_f->in(`0`)->is_CountedLoopEnd()) return multi_version_succeeded;
2829	CountedLoopEndNode* rce_loop_end = p_f->in(`0`)->as_CountedLoopEnd();
2830	if (rce_loop_end == NULL) return multi_version_succeeded;
2831	CountedLoopNode* rce_cl = rce_loop_end->loopnode();
2832	if (rce_cl == NULL \|\| !rce_cl->is_post_loop()) return multi_version_succeeded;
2833	CountedLoopNode *known_rce_cl = rce_loop->_head->as_CountedLoop();
2834	if (rce_cl != known_rce_cl) return multi_version_succeeded;
2835
2836	// Then we fetch the cover entry test
2837	ctrl = rce_cl->in(LoopNode::EntryControl);
2838	if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return multi_version_succeeded;
2839
2840	#ifndef PRODUCT
2841	if (TraceLoopOpts) {
2842	tty->print("PostMultiVersion\n");
2843	rce_loop->dump_head();
2844	legacy_loop->dump_head();
2845	}
2846	#endif
2847
2848	// Now fetch the limit we want to compare against
2849	Node *limit = rce_cl->limit();
2850	bool first_time = true;
2851
2852	// If we got this far, we identified the post loop which has been RCE'd and
2853	// we have a work list. Now we will try to transform the if guard to cause
2854	// the loop pair to be multi version executed with the determination left to runtime
2855	// or the optimizer if full information is known about the given arrays at compile time.
2856	Node *last_min = NULL;
2857	multi_version_succeeded = true;
2858	while (worklist.size()) {
2859	Node* rc_iffm = worklist.pop();
2860	if (rc_iffm->is_If()) {
2861	Node *rc_bolzm = rc_iffm->in(`1`);
2862	if (rc_bolzm->is_Bool()) {
2863	Node *rc_cmpzm = rc_bolzm->in(`1`);
2864	if (rc_cmpzm->is_Cmp()) {
2865	Node *rc_left = rc_cmpzm->in(`2`);
2866	if (rc_left->Opcode() != Op_LoadRange) {
2867	multi_version_succeeded = false;
2868	break;
2869	}
2870	if (first_time) {
2871	last_min = rc_left;
2872	first_time = false;
2873	} else {
2874	Node cur_min = new* MinINode (last_min, rc_left);
2875	last_min = cur_min;
2876	_igvn.register_new_node_with_optimizer(last_min);
2877	}
2878	}
2879	}
2880	}
2881	}
2882
2883	// All we have to do is update the limit of the rce loop
2884	// with the min of our expression and the current limit.
2885	// We will use this expression to replace the current limit.
2886	if (last_min && multi_version_succeeded) {
2887	Node cur_min = new* MinINode (last_min, limit);
2888	_igvn.register_new_node_with_optimizer(cur_min);
2889	Node *cmp_node = rce_loop_end->cmp_node();
2890	_igvn.replace_input_of(cmp_node, `2`, cur_min);
2891	set_ctrl(cur_min, ctrl);
2892	set_loop(cur_min, rce_loop->_parent);
2893
2894	legacy_cl->mark_is_multiversioned();
2895	rce_cl->mark_is_multiversioned();
2896	multi_version_succeeded = true;
2897
2898	C->set_major_progress();
2899	}
2900
2901	return multi_version_succeeded;
2902	}
2903
2904	//-------------------------poison_rce_post_loop--------------------------------
2905	// Causes the rce'd post loop to be optimized away if multiversioning fails
2906	void PhaseIdealLoop::poison_rce_post_loop(IdealLoopTree *rce_loop) {
2907	CountedLoopNode *rce_cl = rce_loop->_head->as_CountedLoop();
2908	Node* ctrl = rce_cl->in(LoopNode::EntryControl);
2909	if (ctrl->is_IfTrue() \|\| ctrl->is_IfFalse()) {
2910	Node* iffm = ctrl->in(`0`);
2911	if (iffm->is_If()) {
2912	Node* cur_bool = iffm->in(`1`);
2913	if (cur_bool->is_Bool()) {
2914	Node* cur_cmp = cur_bool->in(`1`);
2915	if (cur_cmp->is_Cmp()) {
2916	BoolTest::mask new_test = BoolTest::gt;
2917	BoolNode new_bool = new* BoolNode (cur_cmp, new_test);
2918	_igvn.replace_node(cur_bool, new_bool);
2919	_igvn._worklist.push(new_bool);
2920	Node* left_op = cur_cmp->in(`1`);
2921	_igvn.replace_input_of(cur_cmp, `2`, left_op);
2922	C->set_major_progress();
2923	}
2924	}
2925	}
2926	}
2927	}
2928
2929	//------------------------------DCE_loop_body----------------------------------
2930	// Remove simplistic dead code from loop body
2931	void IdealLoopTree::DCE_loop_body() {
2932	for (uint i = `0`; i < _body.size(); i++) {
2933	if (_body.at(i)->outcnt() == `0`) {
2934	_body.map(i, _body.pop());
2935	i--; // Ensure we revisit the updated index.
2936	}
2937	}
2938	}
2939
2940
2941	//------------------------------adjust_loop_exit_prob--------------------------
2942	// Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
2943	// Replace with a 1-in-10 exit guess.
2944	void IdealLoopTree::adjust_loop_exit_prob(PhaseIdealLoop *phase) {
2945	Node *test = tail();
2946	while (test != _head) {
2947	uint top = test->Opcode();
2948	if (top == Op_IfTrue \|\| top == Op_IfFalse) {
2949	int test_con = ((ProjNode*)test)->_con;
2950	assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
2951	IfNode *iff = test->in(`0`)->as_If();
2952	if (iff->outcnt() == `2`) { // Ignore dead tests
2953	Node *bol = iff->in(`1`);
2954	if (bol && bol->req() > `1` && bol->in(`1`) &&
2955	((bol->in(`1`)->Opcode() == Op_StorePConditional) \|\|
2956	(bol->in(`1`)->Opcode() == Op_StoreIConditional) \|\|
2957	(bol->in(`1`)->Opcode() == Op_StoreLConditional) \|\|
2958	(bol->in(`1`)->Opcode() == Op_CompareAndExchangeB) \|\|
2959	(bol->in(`1`)->Opcode() == Op_CompareAndExchangeS) \|\|
2960	(bol->in(`1`)->Opcode() == Op_CompareAndExchangeI) \|\|
2961	(bol->in(`1`)->Opcode() == Op_CompareAndExchangeL) \|\|
2962	(bol->in(`1`)->Opcode() == Op_CompareAndExchangeP) \|\|
2963	(bol->in(`1`)->Opcode() == Op_CompareAndExchangeN) \|\|
2964	(bol->in(`1`)->Opcode() == Op_WeakCompareAndSwapB) \|\|
2965	(bol->in(`1`)->Opcode() == Op_WeakCompareAndSwapS) \|\|
2966	(bol->in(`1`)->Opcode() == Op_WeakCompareAndSwapI) \|\|
2967	(bol->in(`1`)->Opcode() == Op_WeakCompareAndSwapL) \|\|
2968	(bol->in(`1`)->Opcode() == Op_WeakCompareAndSwapP) \|\|
2969	(bol->in(`1`)->Opcode() == Op_WeakCompareAndSwapN) \|\|
2970	(bol->in(`1`)->Opcode() == Op_CompareAndSwapB) \|\|
2971	(bol->in(`1`)->Opcode() == Op_CompareAndSwapS) \|\|
2972	(bol->in(`1`)->Opcode() == Op_CompareAndSwapI) \|\|
2973	(bol->in(`1`)->Opcode() == Op_CompareAndSwapL) \|\|
2974	(bol->in(`1`)->Opcode() == Op_CompareAndSwapP) \|\|
2975	(bol->in(`1`)->Opcode() == Op_CompareAndSwapN) \|\|
2976	(bol->in(`1`)->Opcode() == Op_ShenandoahCompareAndExchangeP) \|\|
2977	(bol->in(`1`)->Opcode() == Op_ShenandoahCompareAndExchangeN) \|\|
2978	(bol->in(`1`)->Opcode() == Op_ShenandoahWeakCompareAndSwapP) \|\|
2979	(bol->in(`1`)->Opcode() == Op_ShenandoahWeakCompareAndSwapN) \|\|
2980	(bol->in(`1`)->Opcode() == Op_ShenandoahCompareAndSwapP) \|\|
2981	(bol->in(`1`)->Opcode() == Op_ShenandoahCompareAndSwapN)))
2982	return; // Allocation loops RARELY take backedge
2983	// Find the OTHER exit path from the IF
2984	Node* ex = iff->proj_out(`1`-test_con);
2985	float p = iff->_prob;
2986	if (!phase->is_member(this, ex) && iff->_fcnt == COUNT_UNKNOWN) {
2987	if (top == Op_IfTrue) {
2988	if (p < (PROB_FAIR + PROB_UNLIKELY_MAG(`3`))) {
2989	iff->_prob = PROB_STATIC_FREQUENT;
2990	}
2991	} else {
2992	if (p > (PROB_FAIR - PROB_UNLIKELY_MAG(`3`))) {
2993	iff->_prob = PROB_STATIC_INFREQUENT;
2994	}
2995	}
2996	}
2997	}
2998	}
2999	test = phase->idom(test);
3000	}
3001	}
3002
3003	#ifdef ASSERT
3004	static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
3005	Node *ctrl = cl->skip_predicates();
3006	assert(ctrl->Opcode() == Op_IfTrue \|\| ctrl->Opcode() == Op_IfFalse, "");
3007	Node *iffm = ctrl->in(`0`);
3008	assert(iffm->Opcode() == Op_If, "");
3009	Node *p_f = iffm->in(`0`);
3010	assert(p_f->Opcode() == Op_IfFalse, "");
3011	CountedLoopEndNode *pre_end = p_f->in(`0`)->as_CountedLoopEnd();
3012	assert(pre_end->loopnode()->is_pre_loop(), "");
3013	return pre_end->loopnode();
3014	}
3015	#endif
3016
3017	// Remove the main and post loops and make the pre loop execute all
3018	// iterations. Useful when the pre loop is found empty.
3019	void IdealLoopTree::remove_main_post_loops(CountedLoopNode cl, PhaseIdealLoop phase) {
3020	CountedLoopEndNode* pre_end = cl->loopexit();
3021	Node* pre_cmp = pre_end->cmp_node();
3022	if (pre_cmp->in(`2`)->Opcode() != Op_Opaque1) {
3023	// Only safe to remove the main loop if the compiler optimized it
3024	// out based on an unknown number of iterations
3025	return;
3026	}
3027
3028	// Can we find the main loop?
3029	if (_next == NULL) {
3030	return;
3031	}
3032
3033	Node* next_head = _next->_head;
3034	if (!next_head->is_CountedLoop()) {
3035	return;
3036	}
3037
3038	CountedLoopNode* main_head = next_head->as_CountedLoop();
3039	if (!main_head->is_main_loop()) {
3040	return;
3041	}
3042
3043	assert(locate_pre_from_main(main_head) == cl, "bad main loop");
3044	Node* main_iff = main_head->skip_predicates()->in(`0`);
3045
3046	// Remove the Opaque1Node of the pre loop and make it execute all iterations
3047	phase->_igvn.replace_input_of(pre_cmp, `2`, pre_cmp->in(`2`)->in(`2`));
3048	// Remove the Opaque1Node of the main loop so it can be optimized out
3049	Node* main_cmp = main_iff->in(`1`)->in(`1`);
3050	assert(main_cmp->in(`2`)->Opcode() == Op_Opaque1, "main loop has no opaque node?");
3051	phase->_igvn.replace_input_of(main_cmp, `2`, main_cmp->in(`2`)->in(`1`));
3052	}
3053
3054	//------------------------------do_remove_empty_loop---------------------------
3055	// We always attempt remove empty loops. The approach is to replace the trip
3056	// counter with the value it will have on the last iteration. This will break
3057	// the loop.
3058	bool IdealLoopTree::do_remove_empty_loop(PhaseIdealLoop *phase) {
3059	// Minimum size must be empty loop
3060	if (_body.size() > EMPTY_LOOP_SIZE) {
3061	return false;
3062	}
3063	if (!_head->is_CountedLoop()) {
3064	return false; // Dead loop
3065	}
3066	CountedLoopNode *cl = _head->as_CountedLoop();
3067	if (!cl->is_valid_counted_loop()) {
3068	return false; // Malformed loop
3069	}
3070	if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)))) {
3071	return false; // Infinite loop
3072	}
3073	if (cl->is_pre_loop()) {
3074	// If the loop we are removing is a pre-loop then the main and post loop
3075	// can be removed as well.
3076	remove_main_post_loops(cl, phase);
3077	}
3078
3079	#ifdef ASSERT
3080	// Ensure only one phi which is the iv.
3081	Node* iv = NULL;
3082	for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
3083	Node* n = cl->fast_out(i);
3084	if (n->Opcode() == Op_Phi) {
3085	assert(iv == NULL, "Too many phis");
3086	iv = n;
3087	}
3088	}
3089	assert(iv == cl->phi(), "Wrong phi");
3090	#endif
3091
3092	// main and post loops have explicitly created zero trip guard
3093	bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
3094	if (needs_guard) {
3095	// Skip guard if values not overlap.
3096	const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
3097	const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
3098	int stride_con = cl->stride_con();
3099	if (stride_con > `0`) {
3100	needs_guard = (init_t->_hi >= limit_t->_lo);
3101	} else {
3102	needs_guard = (init_t->_lo <= limit_t->_hi);
3103	}
3104	}
3105	if (needs_guard) {
3106	// Check for an obvious zero trip guard.
3107	Node* inctrl = PhaseIdealLoop::skip_all_loop_predicates(cl->skip_predicates());
3108	if (inctrl->Opcode() == Op_IfTrue \|\| inctrl->Opcode() == Op_IfFalse) {
3109	bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
3110	// The test should look like just the backedge of a CountedLoop
3111	Node* iff = inctrl->in(`0`);
3112	if (iff->is_If()) {
3113	Node* bol = iff->in(`1`);
3114	if (bol->is_Bool()) {
3115	BoolTest test = bol->as_Bool()->_test;
3116	if (maybe_swapped) {
3117	test._test = test.commute();
3118	test._test = test.negate();
3119	}
3120	if (test._test == cl->loopexit()->test_trip()) {
3121	Node* cmp = bol->in(`1`);
3122	int init_idx = maybe_swapped ? `2` : `1`;
3123	int limit_idx = maybe_swapped ? `1` : `2`;
3124	if (cmp->is_Cmp() && cmp->in(init_idx) == cl->init_trip() && cmp->in(limit_idx) == cl->limit()) {
3125	needs_guard = false;
3126	}
3127	}
3128	}
3129	}
3130	}
3131	}
3132
3133	#ifndef PRODUCT
3134	if (PrintOpto) {
3135	tty->print("Removing empty loop with%s zero trip guard", needs_guard ? "out" : "");
3136	this->dump_head();
3137	} else if (TraceLoopOpts) {
3138	tty->print("Empty with%s zero trip guard ", needs_guard ? "out" : "");
3139	this->dump_head();
3140	}
3141	#endif
3142
3143	if (needs_guard) {
3144	// Peel the loop to ensure there's a zero trip guard
3145	Node_List old_new;
3146	phase->do_peeling(this, old_new);
3147	}
3148
3149	// Replace the phi at loop head with the final value of the last
3150	// iteration. Then the CountedLoopEnd will collapse (backedge never
3151	// taken) and all loop-invariant uses of the exit values will be correct.
3152	Node *phi = cl->phi();
3153	Node exact_limit = phase->exact_limit(this*);
3154	if (exact_limit != cl->limit()) {
3155	// We also need to replace the original limit to collapse loop exit.
3156	Node* cmp = cl->loopexit()->cmp_node();
3157	assert(cl->limit() == cmp->in(`2`), "sanity");
3158	phase->_igvn._worklist.push(cmp->in(`2`)); // put limit on worklist
3159	phase->_igvn.replace_input_of(cmp, `2`, exact_limit); // put cmp on worklist
3160	}
3161	// Note: the final value after increment should not overflow since
3162	// counted loop has limit check predicate.
3163	Node final = new* SubINode (exact_limit, cl->stride());
3164	phase->register_new_node(final,cl->in(LoopNode::EntryControl));
3165	phase->_igvn.replace_node(phi,final);
3166	phase->C->set_major_progress();
3167	return true;
3168	}
3169
3170	//------------------------------do_one_iteration_loop--------------------------
3171	// Convert one iteration loop into normal code.
3172	bool IdealLoopTree::do_one_iteration_loop(PhaseIdealLoop *phase) {
3173	if (!_head->as_Loop()->is_valid_counted_loop()) {
3174	return false; // Only for counted loop
3175	}
3176	CountedLoopNode *cl = _head->as_CountedLoop();
3177	if (!cl->has_exact_trip_count() \|\| cl->trip_count() != `1`) {
3178	return false;
3179	}
3180
3181	#ifndef PRODUCT
3182	if (TraceLoopOpts) {
3183	tty->print("OneIteration ");
3184	this->dump_head();
3185	}
3186	#endif
3187
3188	Node *init_n = cl->init_trip();
3189	#ifdef ASSERT
3190	// Loop boundaries should be constant since trip count is exact.
3191	assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration");
3192	#endif
3193	// Replace the phi at loop head with the value of the init_trip.
3194	// Then the CountedLoopEnd will collapse (backedge will not be taken)
3195	// and all loop-invariant uses of the exit values will be correct.
3196	phase->_igvn.replace_node(cl->phi(), cl->init_trip());
3197	phase->C->set_major_progress();
3198	return true;
3199	}
3200
3201	//=============================================================================
3202	//------------------------------iteration_split_impl---------------------------
3203	bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_new) {
3204	// Compute loop trip count if possible.
3205	compute_trip_count(phase);
3206
3207	// Convert one iteration loop into normal code.
3208	if (do_one_iteration_loop(phase)) {
3209	return true;
3210	}
3211	// Check and remove empty loops (spam micro-benchmarks)
3212	if (do_remove_empty_loop(phase)) {
3213	return true; // Here we removed an empty loop
3214	}
3215
3216	AutoNodeBudget node_budget(phase);
3217
3218	// Non-counted loops may be peeled; exactly 1 iteration is peeled.
3219	// This removes loop-invariant tests (usually null checks).
3220	if (!_head->is_CountedLoop()) { // Non-counted loop
3221	if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
3222	// Partial peel succeeded so terminate this round of loop opts
3223	return false;
3224	}
3225	if (policy_peeling(phase)) { // Should we peel?
3226	if (PrintOpto) { tty->print_cr("should_peel"); }
3227	phase->do_peeling(this, old_new);
3228	} else if (policy_unswitching(phase)) {
3229	phase->do_unswitching(this, old_new);
3230	}
3231	return true;
3232	}
3233	CountedLoopNode *cl = _head->as_CountedLoop();
3234
3235	if (!cl->is_valid_counted_loop()) return true; // Ignore various kinds of broken loops
3236
3237	// Do nothing special to pre- and post- loops
3238	if (cl->is_pre_loop() \|\| cl->is_post_loop()) return true;
3239
3240	// Compute loop trip count from profile data
3241	compute_profile_trip_cnt(phase);
3242
3243	// Before attempting fancy unrolling, RCE or alignment, see if we want
3244	// to completely unroll this loop or do loop unswitching.
3245	if (cl->is_normal_loop()) {
3246	if (policy_unswitching(phase)) {
3247	phase->do_unswitching(this, old_new);
3248	return true;
3249	}
3250	if (policy_maximally_unroll(phase)) {
3251	// Here we did some unrolling and peeling. Eventually we will
3252	// completely unroll this loop and it will no longer be a loop.
3253	phase->do_maximally_unroll(this, old_new);
3254	return true;
3255	}
3256	}
3257
3258	uint est_peeling = estimate_peeling(phase);
3259	bool should_peel = `0` < est_peeling;
3260
3261	// Counted loops may be peeled, may need some iterations run up
3262	// front for RCE, and may want to align loop refs to a cache
3263	// line. Thus we clone a full loop up front whose trip count is
3264	// at least 1 (if peeling), but may be several more.
3265
3266	// The main loop will start cache-line aligned with at least 1
3267	// iteration of the unrolled body (zero-trip test required) and
3268	// will have some range checks removed.
3269
3270	// A post-loop will finish any odd iterations (leftover after
3271	// unrolling), plus any needed for RCE purposes.
3272
3273	bool should_unroll = policy_unroll(phase);
3274	bool should_rce = policy_range_check(phase);
3275	// TODO: Remove align -- not used.
3276	bool should_align = policy_align(phase);
3277
3278	// If not RCE'ing (iteration splitting) or Aligning, then we do not need a
3279	// pre-loop. We may still need to peel an initial iteration but we will not
3280	// be needing an unknown number of pre-iterations.
3281	//
3282	// Basically, if may_rce_align reports FALSE first time through, we will not
3283	// be able to later do RCE or Aligning on this loop.
3284	bool may_rce_align = !policy_peel_only(phase) \|\| should_rce \|\| should_align;
3285
3286	// If we have any of these conditions (RCE, alignment, unrolling) met, then
3287	// we switch to the pre-/main-/post-loop model. This model also covers
3288	// peeling.
3289	if (should_rce \|\| should_align \|\| should_unroll) {
3290	if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
3291	uint estimate = est_loop_clone_sz(`3`);
3292	if (!phase->may_require_nodes(estimate)) {
3293	return false;
3294	}
3295	phase->insert_pre_post_loops(this, old_new, !may_rce_align);
3296	}
3297	// Adjust the pre- and main-loop limits to let the pre and post loops run
3298	// with full checks, but the main-loop with no checks. Remove said checks
3299	// from the main body.
3300	if (should_rce) {
3301	if (phase->do_range_check(this, old_new) != `0`) {
3302	cl->mark_has_range_checks();
3303	}
3304	} else if (PostLoopMultiversioning) {
3305	phase->has_range_checks(this);
3306	}
3307
3308	if (should_unroll && !should_peel && PostLoopMultiversioning) {
3309	// Try to setup multiversioning on main loops before they are unrolled
3310	if (cl->is_main_loop() && (cl->unrolled_count() == `1`)) {
3311	phase->insert_scalar_rced_post_loop(this, old_new);
3312	}
3313	}
3314
3315	// Double loop body for unrolling. Adjust the minimum-trip test (will do
3316	// twice as many iterations as before) and the main body limit (only do
3317	// an even number of trips). If we are peeling, we might enable some RCE
3318	// and we'd rather unroll the post-RCE'd loop SO... do not unroll if
3319	// peeling.
3320	if (should_unroll && !should_peel) {
3321	if (SuperWordLoopUnrollAnalysis) {
3322	phase->insert_vector_post_loop(this, old_new);
3323	}
3324	phase->do_unroll(this, old_new, true);
3325	}
3326
3327	// Adjust the pre-loop limits to align the main body iterations.
3328	if (should_align) {
3329	Unimplemented();
3330	}
3331	} else { // Else we have an unchanged counted loop
3332	if (should_peel) { // Might want to peel but do nothing else
3333	if (phase->may_require_nodes(est_peeling)) {
3334	phase->do_peeling(this, old_new);
3335	}
3336	}
3337	}
3338	return true;
3339	}
3340
3341
3342	//=============================================================================
3343	//------------------------------iteration_split--------------------------------
3344	bool IdealLoopTree::iteration_split(PhaseIdealLoop* phase, Node_List &old_new) {
3345	// Recursively iteration split nested loops
3346	if (_child && !_child->iteration_split(phase, old_new)) {
3347	return false;
3348	}
3349
3350	// Clean out prior deadwood
3351	DCE_loop_body();
3352
3353	// Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
3354	// Replace with a 1-in-10 exit guess.
3355	if (!is_root() && is_loop()) {
3356	adjust_loop_exit_prob(phase);
3357	}
3358
3359	// Unrolling, RCE and peeling efforts, iff innermost loop.
3360	if (_allow_optimizations && is_innermost()) {
3361	if (!_has_call) {
3362	if (!iteration_split_impl(phase, old_new)) {
3363	return false;
3364	}
3365	} else {
3366	AutoNodeBudget node_budget(phase);
3367	if (policy_unswitching(phase)) {
3368	phase->do_unswitching(this, old_new);
3369	}
3370	}
3371	}
3372
3373	// Minor offset re-organization to remove loop-fallout uses of
3374	// trip counter when there was no major reshaping.
3375	phase->reorg_offsets(this);
3376
3377	if (_next && !_next->iteration_split(phase, old_new)) {
3378	return false;
3379	}
3380	return true;
3381	}
3382
3383
3384	//=============================================================================
3385	// Process all the loops in the loop tree and replace any fill
3386	// patterns with an intrinsic version.
3387	bool PhaseIdealLoop::do_intrinsify_fill() {
3388	bool changed = false;
3389	for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
3390	IdealLoopTree* lpt = iter.current();
3391	changed \|= intrinsify_fill(lpt);
3392	}
3393	return changed;
3394	}
3395
3396
3397	// Examine an inner loop looking for a a single store of an invariant
3398	// value in a unit stride loop,
3399	bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node& store, Node& store_value,
3400	Node& shift, Node& con) {
3401	const char* msg = NULL;
3402	Node* msg_node = NULL;
3403
3404	store_value = NULL;
3405	con = NULL;
3406	shift = NULL;
3407
3408	// Process the loop looking for stores. If there are multiple
3409	// stores or extra control flow give at this point.
3410	CountedLoopNode* head = lpt->_head->as_CountedLoop();
3411	for (uint i = `0`; msg == NULL && i < lpt->_body.size(); i++) {
3412	Node* n = lpt->_body.at(i);
3413	if (n->outcnt() == `0`) continue; // Ignore dead
3414	if (n->is_Store()) {
3415	if (store != NULL) {
3416	msg = "multiple stores";
3417	break;
3418	}
3419	int opc = n->Opcode();
3420	if (opc == Op_StoreP \|\| opc == Op_StoreN \|\| opc == Op_StoreNKlass \|\| opc == Op_StoreCM) {
3421	msg = "oop fills not handled";
3422	break;
3423	}
3424	Node* value = n->in(MemNode::ValueIn);
3425	if (!lpt->is_invariant(value)) {
3426	msg = "variant store value";
3427	} else if (!_igvn.type(n->in(MemNode::Address))->isa_aryptr()) {
3428	msg = "not array address";
3429	}
3430	store = n;
3431	store_value = value;
3432	} else if (n->is_If() && n != head->loopexit_or_null()) {
3433	msg = "extra control flow";
3434	msg_node = n;
3435	}
3436	}
3437
3438	if (store == NULL) {
3439	// No store in loop
3440	return false;
3441	}
3442
3443	if (msg == NULL && head->stride_con() != `1`) {
3444	// could handle negative strides too
3445	if (head->stride_con() < `0`) {
3446	msg = "negative stride";
3447	} else {
3448	msg = "non-unit stride";
3449	}
3450	}
3451
3452	if (msg == NULL && !store->in(MemNode::Address)->is_AddP()) {
3453	msg = "can't handle store address";
3454	msg_node = store->in(MemNode::Address);
3455	}
3456
3457	if (msg == NULL &&
3458	(!store->in(MemNode::Memory)->is_Phi() \|\|
3459	store->in(MemNode::Memory)->in(LoopNode::LoopBackControl) != store)) {
3460	msg = "store memory isn't proper phi";
3461	msg_node = store->in(MemNode::Memory);
3462	}
3463
3464	// Make sure there is an appropriate fill routine
3465	BasicType t = store->as_Mem()->memory_type();
3466	const char* fill_name;
3467	if (msg == NULL &&
3468	StubRoutines::select_fill_function(t, false, fill_name) == NULL) {
3469	msg = "unsupported store";
3470	msg_node = store;
3471	}
3472
3473	if (msg != NULL) {
3474	#ifndef PRODUCT
3475	if (TraceOptimizeFill) {
3476	tty->print_cr("not fill intrinsic candidate: %s", msg);
3477	if (msg_node != NULL) msg_node->dump();
3478	}
3479	#endif
3480	return false;
3481	}
3482
3483	// Make sure the address expression can be handled. It should be
3484	// head->phi elsize + con. head->phi might have a ConvI2L(CastII()).*
3485	Node* elements[`4`];
3486	Node* cast = NULL;
3487	Node* conv = NULL;
3488	bool found_index = false;
3489	int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
3490	for (int e = `0`; e < count; e++) {
3491	Node* n = elements[e];
3492	if (n->is_Con() && con == NULL) {
3493	con = n;
3494	} else if (n->Opcode() == Op_LShiftX && shift == NULL) {
3495	Node* value = n->in(`1`);
3496	#ifdef _LP64
3497	if (value->Opcode() == Op_ConvI2L) {
3498	conv = value;
3499	value = value->in(`1`);
3500	}
3501	if (value->Opcode() == Op_CastII &&
3502	value->as_CastII()->has_range_check()) {
3503	// Skip range check dependent CastII nodes
3504	cast = value;
3505	value = value->in(`1`);
3506	}
3507	#endif
3508	if (value != head->phi()) {
3509	msg = "unhandled shift in address";
3510	} else {
3511	if (type2aelembytes(store->as_Mem()->memory_type(), true) != (`1` << n->in(`2`)->get_int())) {
3512	msg = "scale doesn't match";
3513	} else {
3514	found_index = true;
3515	shift = n;
3516	}
3517	}
3518	} else if (n->Opcode() == Op_ConvI2L && conv == NULL) {
3519	conv = n;
3520	n = n->in(`1`);
3521	if (n->Opcode() == Op_CastII &&
3522	n->as_CastII()->has_range_check()) {
3523	// Skip range check dependent CastII nodes
3524	cast = n;
3525	n = n->in(`1`);
3526	}
3527	if (n == head->phi()) {
3528	found_index = true;
3529	} else {
3530	msg = "unhandled input to ConvI2L";
3531	}
3532	} else if (n == head->phi()) {
3533	// no shift, check below for allowed cases
3534	found_index = true;
3535	} else {
3536	msg = "unhandled node in address";
3537	msg_node = n;
3538	}
3539	}
3540
3541	if (count == -`1`) {
3542	msg = "malformed address expression";
3543	msg_node = store;
3544	}
3545
3546	if (!found_index) {
3547	msg = "missing use of index";
3548	}
3549
3550	// byte sized items won't have a shift
3551	if (msg == NULL && shift == NULL && t != T_BYTE && t != T_BOOLEAN) {
3552	msg = "can't find shift";
3553	msg_node = store;
3554	}
3555
3556	if (msg != NULL) {
3557	#ifndef PRODUCT
3558	if (TraceOptimizeFill) {
3559	tty->print_cr("not fill intrinsic: %s", msg);
3560	if (msg_node != NULL) msg_node->dump();
3561	}
3562	#endif
3563	return false;
3564	}
3565
3566	// No make sure all the other nodes in the loop can be handled
3567	VectorSet ok(Thread::current()->resource_area());
3568
3569	// store related values are ok
3570	ok.set(store->_idx);
3571	ok.set(store->in(MemNode::Memory)->_idx);
3572
3573	CountedLoopEndNode* loop_exit = head->loopexit();
3574
3575	// Loop structure is ok
3576	ok.set(head->_idx);
3577	ok.set(loop_exit->_idx);
3578	ok.set(head->phi()->_idx);
3579	ok.set(head->incr()->_idx);
3580	ok.set(loop_exit->cmp_node()->_idx);
3581	ok.set(loop_exit->in(`1`)->_idx);
3582
3583	// Address elements are ok
3584	if (con) ok.set(con->_idx);
3585	if (shift) ok.set(shift->_idx);
3586	if (cast) ok.set(cast->_idx);
3587	if (conv) ok.set(conv->_idx);
3588
3589	for (uint i = `0`; msg == NULL && i < lpt->_body.size(); i++) {
3590	Node* n = lpt->_body.at(i);
3591	if (n->outcnt() == `0`) continue; // Ignore dead
3592	if (ok.test(n->_idx)) continue;
3593	// Backedge projection is ok
3594	if (n->is_IfTrue() && n->in(`0`) == loop_exit) continue;
3595	if (!n->is_AddP()) {
3596	msg = "unhandled node";
3597	msg_node = n;
3598	break;
3599	}
3600	}
3601
3602	// Make sure no unexpected values are used outside the loop
3603	for (uint i = `0`; msg == NULL && i < lpt->_body.size(); i++) {
3604	Node* n = lpt->_body.at(i);
3605	// These values can be replaced with other nodes if they are used
3606	// outside the loop.
3607	if (n == store \|\| n == loop_exit \|\| n == head->incr() \|\| n == store->in(MemNode::Memory)) continue;
3608	for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) {
3609	Node* use = iter.get();
3610	if (!lpt->_body.contains(use)) {
3611	msg = "node is used outside loop";
3612	// lpt->_body.dump();
3613	msg_node = n;
3614	break;
3615	}
3616	}
3617	}
3618
3619	#ifdef ASSERT
3620	if (TraceOptimizeFill) {
3621	if (msg != NULL) {
3622	tty->print_cr("no fill intrinsic: %s", msg);
3623	if (msg_node != NULL) msg_node->dump();
3624	} else {
3625	tty->print_cr("fill intrinsic for:");
3626	}
3627	store->dump();
3628	if (Verbose) {
3629	lpt->_body.dump();
3630	}
3631	}
3632	#endif
3633
3634	return msg == NULL;
3635	}
3636
3637
3638
3639	bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
3640	// Only for counted inner loops
3641	if (!lpt->is_counted() \|\| !lpt->is_innermost()) {
3642	return false;
3643	}
3644
3645	// Must have constant stride
3646	CountedLoopNode* head = lpt->_head->as_CountedLoop();
3647	if (!head->is_valid_counted_loop() \|\| !head->is_normal_loop()) {
3648	return false;
3649	}
3650
3651	head->verify_strip_mined(`1`);
3652
3653	// Check that the body only contains a store of a loop invariant
3654	// value that is indexed by the loop phi.
3655	Node* store = NULL;
3656	Node* store_value = NULL;
3657	Node* shift = NULL;
3658	Node* offset = NULL;
3659	if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
3660	return false;
3661	}
3662
3663	Node* exit = head->loopexit()->proj_out_or_null(`0`);
3664	if (exit == NULL) {
3665	return false;
3666	}
3667
3668	#ifndef PRODUCT
3669	if (TraceLoopOpts) {
3670	tty->print("ArrayFill ");
3671	lpt->dump_head();
3672	}
3673	#endif
3674
3675	// Now replace the whole loop body by a call to a fill routine that
3676	// covers the same region as the loop.
3677	Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);
3678
3679	// Build an expression for the beginning of the copy region
3680	Node* index = head->init_trip();
3681	#ifdef _LP64
3682	index = new ConvI2LNode (index);
3683	_igvn.register_new_node_with_optimizer(index);
3684	#endif
3685	if (shift != NULL) {
3686	// byte arrays don't require a shift but others do.
3687	index = new LShiftXNode (index, shift->in(`2`));
3688	_igvn.register_new_node_with_optimizer(index);
3689	}
3690	index = new AddPNode (base, base, index);
3691	_igvn.register_new_node_with_optimizer(index);
3692	Node* from = new AddPNode (base, index, offset);
3693	_igvn.register_new_node_with_optimizer(from);
3694	// Compute the number of elements to copy
3695	Node* len = new SubINode (head->limit(), head->init_trip());
3696	_igvn.register_new_node_with_optimizer(len);
3697
3698	BasicType t = store->as_Mem()->memory_type();
3699	bool aligned = false;
3700	if (offset != NULL && head->init_trip()->is_Con()) {
3701	int element_size = type2aelembytes(t);
3702	aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == `0`;
3703	}
3704
3705	// Build a call to the fill routine
3706	const char* fill_name;
3707	address fill = StubRoutines::select_fill_function(t, aligned, fill_name);
3708	assert(fill != NULL, "what?");
3709
3710	// Convert float/double to int/long for fill routines
3711	if (t == T_FLOAT) {
3712	store_value = new MoveF2INode (store_value);
3713	_igvn.register_new_node_with_optimizer(store_value);
3714	} else if (t == T_DOUBLE) {
3715	store_value = new MoveD2LNode (store_value);
3716	_igvn.register_new_node_with_optimizer(store_value);
3717	}
3718
3719	Node* mem_phi = store->in(MemNode::Memory);
3720	Node* result_ctrl;
3721	Node* result_mem;
3722	const TypeFunc* call_type = OptoRuntime::array_fill_Type();
3723	CallLeafNode call = new* CallLeafNoFPNode (call_type, fill,
3724	fill_name, TypeAryPtr::get_array_body_type(t));
3725	uint cnt = `0`;
3726	call->init_req(TypeFunc::Parms + cnt++, from);
3727	call->init_req(TypeFunc::Parms + cnt++, store_value);
3728	#ifdef _LP64
3729	len = new ConvI2LNode (len);
3730	_igvn.register_new_node_with_optimizer(len);
3731	#endif
3732	call->init_req(TypeFunc::Parms + cnt++, len);
3733	#ifdef _LP64
3734	call->init_req(TypeFunc::Parms + cnt++, C->top());
3735	#endif
3736	call->init_req(TypeFunc::Control, head->init_control());
3737	call->init_req(TypeFunc::I_O, C->top()); // Does no I/O.
3738	call->init_req(TypeFunc::Memory, mem_phi->in(LoopNode::EntryControl));
3739	call->init_req(TypeFunc::ReturnAdr, C->start()->proj_out_or_null(TypeFunc::ReturnAdr));
3740	call->init_req(TypeFunc::FramePtr, C->start()->proj_out_or_null(TypeFunc::FramePtr));
3741	_igvn.register_new_node_with_optimizer(call);
3742	result_ctrl = new ProjNode (call,TypeFunc::Control);
3743	_igvn.register_new_node_with_optimizer(result_ctrl);
3744	result_mem = new ProjNode (call,TypeFunc::Memory);
3745	_igvn.register_new_node_with_optimizer(result_mem);
3746
3747	/ Disable following optimization until proper fix (add missing checks).*
3748
3749	// If this fill is tightly coupled to an allocation and overwrites
3750	// the whole body, allow it to take over the zeroing.
3751	AllocateNode alloc = AllocateNode::Ideal_allocation(base, this);*
3752	if (alloc != NULL && alloc->is_AllocateArray()) {
3753	Node length = alloc->as_AllocateArray()->Ideal_length();*
3754	if (head->limit() == length &&
3755	head->init_trip() == _igvn.intcon(0)) {
3756	if (TraceOptimizeFill) {
3757	tty->print_cr("Eliminated zeroing in allocation");
3758	}
3759	alloc->maybe_set_complete(&_igvn);
3760	} else {
3761	#ifdef ASSERT
3762	if (TraceOptimizeFill) {
3763	tty->print_cr("filling array but bounds don't match");
3764	alloc->dump();
3765	head->init_trip()->dump();
3766	head->limit()->dump();
3767	length->dump();
3768	}
3769	#endif
3770	}
3771	}
3772	*/
3773
3774	if (head->is_strip_mined()) {
3775	// Inner strip mined loop goes away so get rid of outer strip
3776	// mined loop
3777	Node* outer_sfpt = head->outer_safepoint();
3778	Node* in = outer_sfpt->in(`0`);
3779	Node* outer_out = head->outer_loop_exit();
3780	lazy_replace(outer_out, in);
3781	_igvn.replace_input_of(outer_sfpt, `0`, C->top());
3782	}
3783
3784	// Redirect the old control and memory edges that are outside the loop.
3785	// Sometimes the memory phi of the head is used as the outgoing
3786	// state of the loop. It's safe in this case to replace it with the
3787	// result_mem.
3788	_igvn.replace_node(store->in(MemNode::Memory), result_mem);
3789	lazy_replace(exit, result_ctrl);
3790	_igvn.replace_node(store, result_mem);
3791	// Any uses the increment outside of the loop become the loop limit.
3792	_igvn.replace_node(head->incr(), head->limit());
3793
3794	// Disconnect the head from the loop.
3795	for (uint i = `0`; i < lpt->_body.size(); i++) {
3796	Node* n = lpt->_body.at(i);
3797	_igvn.replace_node(n, C->top());
3798	}
3799
3800	return true;
3801	}
3802

Browse the source code of OpenJDK/src/hotspot/share/opto/loopTransform.cpp