output.cpp source code [OpenJDK/src/hotspot/share/opto/output.cpp]

1	/*
2	* Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4	*
5	* This code is free software; you can redistribute it and/or modify it
6	* under the terms of the GNU General Public License version 2 only, as
7	* published by the Free Software Foundation.
8	*
9	* This code is distributed in the hope that it will be useful, but WITHOUT
10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12	* version 2 for more details (a copy is included in the LICENSE file that
13	* accompanied this code).
14	*
15	* You should have received a copy of the GNU General Public License version
16	* 2 along with this work; if not, write to the Free Software Foundation,
17	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18	*
19	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20	* or visit www.oracle.com if you need additional information or have any
21	* questions.
22	*
23	*/
24
25	#include "precompiled.hpp"
26	#include "asm/assembler.inline.hpp"
27	#include "asm/macroAssembler.inline.hpp"
28	#include "code/compiledIC.hpp"
29	#include "code/debugInfo.hpp"
30	#include "code/debugInfoRec.hpp"
31	#include "compiler/compileBroker.hpp"
32	#include "compiler/compilerDirectives.hpp"
33	#include "compiler/oopMap.hpp"
34	#include "memory/allocation.inline.hpp"
35	#include "opto/ad.hpp"
36	#include "opto/callnode.hpp"
37	#include "opto/cfgnode.hpp"
38	#include "opto/locknode.hpp"
39	#include "opto/machnode.hpp"
40	#include "opto/optoreg.hpp"
41	#include "opto/output.hpp"
42	#include "opto/regalloc.hpp"
43	#include "opto/runtime.hpp"
44	#include "opto/subnode.hpp"
45	#include "opto/type.hpp"
46	#include "runtime/handles.inline.hpp"
47	#include "utilities/xmlstream.hpp"
48
49	#ifndef PRODUCT
50	#define DEBUG_ARG(x) , x
51	#else
52	#define DEBUG_ARG(x)
53	#endif
54
55	// Convert Nodes to instruction bits and pass off to the VM
56	void Compile::Output() {
57	// RootNode goes
58	assert( _cfg->get_root_block()->number_of_nodes() == `0`, "" );
59
60	// The number of new nodes (mostly MachNop) is proportional to
61	// the number of java calls and inner loops which are aligned.
62	if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*`3` +
63	C->inner_loops()*(OptoLoopAlignment-`1`)),
64	"out of nodes before code generation" ) ) {
65	return;
66	}
67	// Make sure I can find the Start Node
68	Block *entry = _cfg->get_block(`1`);
69	Block *broot = _cfg->get_root_block();
70
71	const StartNode *start = entry->head()->as_Start();
72
73	// Replace StartNode with prolog
74	MachPrologNode prolog = new* MachPrologNode ();
75	entry->map_node(prolog, `0`);
76	_cfg->map_node_to_block(prolog, entry);
77	_cfg->unmap_node_from_block(start); // start is no longer in any block
78
79	// Virtual methods need an unverified entry point
80
81	if( is_osr_compilation() ) {
82	if( PoisonOSREntry ) {
83	// TODO: Should use a ShouldNotReachHereNode...
84	_cfg->insert( broot, `0`, new MachBreakpointNode () );
85	}
86	} else {
87	if( _method && !_method->flags().is_static() ) {
88	// Insert unvalidated entry point
89	_cfg->insert( broot, `0`, new MachUEPNode () );
90	}
91
92	}
93
94	// Break before main entry point
95	if ((_method && C->directive()->BreakAtExecuteOption) \|\|
96	(OptoBreakpoint && is_method_compilation()) \|\|
97	(OptoBreakpointOSR && is_osr_compilation()) \|\|
98	(OptoBreakpointC2R && !_method) ) {
99	// checking for _method means that OptoBreakpoint does not apply to
100	// runtime stubs or frame converters
101	_cfg->insert( entry, `1`, new MachBreakpointNode () );
102	}
103
104	// Insert epilogs before every return
105	for (uint i = `0`; i < _cfg->number_of_blocks(); i++) {
106	Block* block = _cfg->get_block(i);
107	if (!block->is_connector() && block->non_connector_successor(`0`) == _cfg->get_root_block()) { // Found a program exit point?
108	Node* m = block->end();
109	if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) {
110	MachEpilogNode* epilog = new MachEpilogNode (m->as_Mach()->ideal_Opcode() == Op_Return);
111	block->add_inst(epilog);
112	_cfg->map_node_to_block(epilog, block);
113	}
114	}
115	}
116
117	uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + `1`);
118	blk_starts[`0`] = `0`;
119
120	// Initialize code buffer and process short branches.
121	CodeBuffer* cb = init_buffer(blk_starts);
122
123	if (cb == NULL \|\| failing()) {
124	return;
125	}
126
127	ScheduleAndBundle();
128
129	#ifndef PRODUCT
130	if (trace_opto_output()) {
131	tty->print("\n---- After ScheduleAndBundle ----\n");
132	for (uint i = `0`; i < _cfg->number_of_blocks(); i++) {
133	tty->print("\nBB#%03d:\n", i);
134	Block* block = _cfg->get_block(i);
135	for (uint j = `0`; j < block->number_of_nodes(); j++) {
136	Node* n = block->get_node(j);
137	OptoReg::Name reg = _regalloc->get_reg_first(n);
138	tty->print(" %-6s ", reg >= `0` && reg < REG_COUNT ? Matcher::regName[reg] : "");
139	n->dump();
140	}
141	}
142	}
143	#endif
144
145	if (failing()) {
146	return;
147	}
148
149	BuildOopMaps();
150
151	if (failing()) {
152	return;
153	}
154
155	fill_buffer(cb, blk_starts);
156	}
157
158	bool Compile::need_stack_bang(int frame_size_in_bytes) const {
159	// Determine if we need to generate a stack overflow check.
160	// Do it if the method is not a stub function and
161	// has java calls or has frame size > vm_page_size/8.
162	// The debug VM checks that deoptimization doesn't trigger an
163	// unexpected stack overflow (compiled method stack banging should
164	// guarantee it doesn't happen) so we always need the stack bang in
165	// a debug VM.
166	return (UseStackBanging && stub_function() == NULL &&
167	(has_java_calls() \|\| frame_size_in_bytes > os::vm_page_size()>>`3`
168	DEBUG_ONLY(\|\| true)));
169	}
170
171	bool Compile::need_register_stack_bang() const {
172	// Determine if we need to generate a register stack overflow check.
173	// This is only used on architectures which have split register
174	// and memory stacks (ie. IA64).
175	// Bang if the method is not a stub function and has java calls
176	return (stub_function() == NULL && has_java_calls());
177	}
178
179
180	// Compute the size of first NumberOfLoopInstrToAlign instructions at the top
181	// of a loop. When aligning a loop we need to provide enough instructions
182	// in cpu's fetch buffer to feed decoders. The loop alignment could be
183	// avoided if we have enough instructions in fetch buffer at the head of a loop.
184	// By default, the size is set to 999999 by Block's constructor so that
185	// a loop will be aligned if the size is not reset here.
186	//
187	// Note: Mach instructions could contain several HW instructions
188	// so the size is estimated only.
189	//
190	void Compile::compute_loop_first_inst_sizes() {
191	// The next condition is used to gate the loop alignment optimization.
192	// Don't aligned a loop if there are enough instructions at the head of a loop
193	// or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
194	// is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
195	// equal to 11 bytes which is the largest address NOP instruction.
196	if (MaxLoopPad < OptoLoopAlignment - `1`) {
197	uint last_block = _cfg->number_of_blocks() - `1`;
198	for (uint i = `1`; i <= last_block; i++) {
199	Block* block = _cfg->get_block(i);
200	// Check the first loop's block which requires an alignment.
201	if (block->loop_alignment() > (uint)relocInfo::addr_unit()) {
202	uint sum_size = `0`;
203	uint inst_cnt = NumberOfLoopInstrToAlign;
204	inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
205
206	// Check subsequent fallthrough blocks if the loop's first
207	// block(s) does not have enough instructions.
208	Block *nb = block;
209	while(inst_cnt > `0` &&
210	i < last_block &&
211	!_cfg->get_block(i + `1`)->has_loop_alignment() &&
212	!nb->has_successor(block)) {
213	i++;
214	nb = _cfg->get_block(i);
215	inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
216	} // while( inst_cnt > 0 && i < last_block )
217
218	block->set_first_inst_size(sum_size);
219	} // f( b->head()->is_Loop() )
220	} // for( i <= last_block )
221	} // if( MaxLoopPad < OptoLoopAlignment-1 )
222	}
223
224	// The architecture description provides short branch variants for some long
225	// branch instructions. Replace eligible long branches with short branches.
226	void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
227	// Compute size of each block, method size, and relocation information size
228	uint nblocks = _cfg->number_of_blocks();
229
230	uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
231	uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
232	int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks);
233
234	// Collect worst case block paddings
235	int* block_worst_case_pad = NEW_RESOURCE_ARRAY(int, nblocks);
236	memset(block_worst_case_pad, `0`, nblocks * sizeof(int));
237
238	DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); )
239	DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); )
240
241	bool has_short_branch_candidate = false;
242
243	// Initialize the sizes to 0
244	code_size = `0`; // Size in bytes of generated code
245	stub_size = `0`; // Size in bytes of all stub entries
246	// Size in bytes of all relocation entries, including those in local stubs.
247	// Start with 2-bytes of reloc info for the unvalidated entry point
248	reloc_size = `1`; // Number of relocation entries
249
250	// Make three passes. The first computes pessimistic blk_starts,
251	// relative jmp_offset and reloc_size information. The second performs
252	// short branch substitution using the pessimistic sizing. The
253	// third inserts nops where needed.
254
255	// Step one, perform a pessimistic sizing pass.
256	uint last_call_adr = max_juint;
257	uint last_avoid_back_to_back_adr = max_juint;
258	uint nop_size = (new MachNopNode ())->size(_regalloc);
259	for (uint i = `0`; i < nblocks; i++) { // For all blocks
260	Block* block = _cfg->get_block(i);
261
262	// During short branch replacement, we store the relative (to blk_starts)
263	// offset of jump in jmp_offset, rather than the absolute offset of jump.
264	// This is so that we do not need to recompute sizes of all nodes when
265	// we compute correct blk_starts in our next sizing pass.
266	jmp_offset[i] = `0`;
267	jmp_size[i] = `0`;
268	jmp_nidx[i] = -`1`;
269	DEBUG_ONLY( jmp_target[i] = `0`; )
270	DEBUG_ONLY( jmp_rule[i] = `0`; )
271
272	// Sum all instruction sizes to compute block size
273	uint last_inst = block->number_of_nodes();
274	uint blk_size = `0`;
275	for (uint j = `0`; j < last_inst; j++) {
276	Node* nj = block->get_node(j);
277	// Handle machine instruction nodes
278	if (nj->is_Mach()) {
279	MachNode *mach = nj->as_Mach();
280	blk_size += (mach->alignment_required() - `1`) * relocInfo::addr_unit(); // assume worst case padding
281	reloc_size += mach->reloc();
282	if (mach->is_MachCall()) {
283	// add size information for trampoline stub
284	// class CallStubImpl is platform-specific and defined in the .ad files.*
285	stub_size += CallStubImpl::size_call_trampoline();
286	reloc_size += CallStubImpl::reloc_call_trampoline();
287
288	MachCallNode *mcall = mach->as_MachCall();
289	// This destination address is NOT PC-relative
290
291	mcall->method_set((intptr_t)mcall->entry_point());
292
293	if (mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method) {
294	stub_size += CompiledStaticCall::to_interp_stub_size();
295	reloc_size += CompiledStaticCall::reloc_to_interp_stub();
296	#if INCLUDE_AOT
297	stub_size += CompiledStaticCall::to_aot_stub_size();
298	reloc_size += CompiledStaticCall::reloc_to_aot_stub();
299	#endif
300	}
301	} else if (mach->is_MachSafePoint()) {
302	// If call/safepoint are adjacent, account for possible
303	// nop to disambiguate the two safepoints.
304	// ScheduleAndBundle() can rearrange nodes in a block,
305	// check for all offsets inside this block.
306	if (last_call_adr >= blk_starts[i]) {
307	blk_size += nop_size;
308	}
309	}
310	if (mach->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
311	// Nop is inserted between "avoid back to back" instructions.
312	// ScheduleAndBundle() can rearrange nodes in a block,
313	// check for all offsets inside this block.
314	if (last_avoid_back_to_back_adr >= blk_starts[i]) {
315	blk_size += nop_size;
316	}
317	}
318	if (mach->may_be_short_branch()) {
319	if (!nj->is_MachBranch()) {
320	#ifndef PRODUCT
321	nj->dump(`3`);
322	#endif
323	Unimplemented();
324	}
325	assert(jmp_nidx[i] == -`1`, "block should have only one branch");
326	jmp_offset[i] = blk_size;
327	jmp_size[i] = nj->size(_regalloc);
328	jmp_nidx[i] = j;
329	has_short_branch_candidate = true;
330	}
331	}
332	blk_size += nj->size(_regalloc);
333	// Remember end of call offset
334	if (nj->is_MachCall() && !nj->is_MachCallLeaf()) {
335	last_call_adr = blk_starts[i]+blk_size;
336	}
337	// Remember end of avoid_back_to_back offset
338	if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) {
339	last_avoid_back_to_back_adr = blk_starts[i]+blk_size;
340	}
341	}
342
343	// When the next block starts a loop, we may insert pad NOP
344	// instructions. Since we cannot know our future alignment,
345	// assume the worst.
346	if (i < nblocks - `1`) {
347	Block* nb = _cfg->get_block(i + `1`);
348	int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
349	if (max_loop_pad > `0`) {
350	assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
351	// Adjust last_call_adr and/or last_avoid_back_to_back_adr.
352	// If either is the last instruction in this block, bump by
353	// max_loop_pad in lock-step with blk_size, so sizing
354	// calculations in subsequent blocks still can conservatively
355	// detect that it may the last instruction in this block.
356	if (last_call_adr == blk_starts[i]+blk_size) {
357	last_call_adr += max_loop_pad;
358	}
359	if (last_avoid_back_to_back_adr == blk_starts[i]+blk_size) {
360	last_avoid_back_to_back_adr += max_loop_pad;
361	}
362	blk_size += max_loop_pad;
363	block_worst_case_pad[i + `1`] = max_loop_pad;
364	}
365	}
366
367	// Save block size; update total method size
368	blk_starts[i+`1`] = blk_starts[i]+blk_size;
369	}
370
371	// Step two, replace eligible long jumps.
372	bool progress = true;
373	uint last_may_be_short_branch_adr = max_juint;
374	while (has_short_branch_candidate && progress) {
375	progress = false;
376	has_short_branch_candidate = false;
377	int adjust_block_start = `0`;
378	for (uint i = `0`; i < nblocks; i++) {
379	Block* block = _cfg->get_block(i);
380	int idx = jmp_nidx[i];
381	MachNode* mach = (idx == -`1`) ? NULL: block->get_node(idx)->as_Mach();
382	if (mach != NULL && mach->may_be_short_branch()) {
383	#ifdef ASSERT
384	assert(jmp_size[i] > `0` && mach->is_MachBranch(), "sanity");
385	int j;
386	// Find the branch; ignore trailing NOPs.
387	for (j = block->number_of_nodes()-`1`; j>=`0`; j--) {
388	Node* n = block->get_node(j);
389	if (!n->is_Mach() \|\| n->as_Mach()->ideal_Opcode() != Op_Con)
390	break;
391	}
392	assert(j >= `0` && j == idx && block->get_node(j) == (Node*)mach, "sanity");
393	#endif
394	int br_size = jmp_size[i];
395	int br_offs = blk_starts[i] + jmp_offset[i];
396
397	// This requires the TRUE branch target be in succs[0]
398	uint bnum = block->non_connector_successor(`0`)->_pre_order;
399	int offset = blk_starts[bnum] - br_offs;
400	if (bnum > i) { // adjust following block's offset
401	offset -= adjust_block_start;
402	}
403
404	// This block can be a loop header, account for the padding
405	// in the previous block.
406	int block_padding = block_worst_case_pad[i];
407	assert(i == `0` \|\| block_padding == `0` \|\| br_offs >= block_padding, "Should have at least a padding on top");
408	// In the following code a nop could be inserted before
409	// the branch which will increase the backward distance.
410	bool needs_padding = ((uint)(br_offs - block_padding) == last_may_be_short_branch_adr);
411	assert(!needs_padding \|\| jmp_offset[i] == `0`, "padding only branches at the beginning of block");
412
413	if (needs_padding && offset <= `0`)
414	offset -= nop_size;
415
416	if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
417	// We've got a winner. Replace this branch.
418	MachNode* replacement = mach->as_MachBranch()->short_branch_version();
419
420	// Update the jmp_size.
421	int new_size = replacement->size(_regalloc);
422	int diff = br_size - new_size;
423	assert(diff >= (int)nop_size, "short_branch size should be smaller");
424	// Conservatively take into account padding between
425	// avoid_back_to_back branches. Previous branch could be
426	// converted into avoid_back_to_back branch during next
427	// rounds.
428	if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
429	jmp_offset[i] += nop_size;
430	diff -= nop_size;
431	}
432	adjust_block_start += diff;
433	block->map_node(replacement, idx);
434	mach->subsume_by(replacement, C);
435	mach = replacement;
436	progress = true;
437
438	jmp_size[i] = new_size;
439	DEBUG_ONLY( jmp_target[i] = bnum; );
440	DEBUG_ONLY( jmp_rule[i] = mach->rule(); );
441	} else {
442	// The jump distance is not short, try again during next iteration.
443	has_short_branch_candidate = true;
444	}
445	} // (mach->may_be_short_branch())
446	if (mach != NULL && (mach->may_be_short_branch() \|\|
447	mach->avoid_back_to_back(MachNode::AVOID_AFTER))) {
448	last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i];
449	}
450	blk_starts[i+`1`] -= adjust_block_start;
451	}
452	}
453
454	#ifdef ASSERT
455	for (uint i = `0`; i < nblocks; i++) { // For all blocks
456	if (jmp_target[i] != `0`) {
457	int br_size = jmp_size[i];
458	int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
459	if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
460	tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
461	}
462	assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp");
463	}
464	}
465	#endif
466
467	// Step 3, compute the offsets of all blocks, will be done in fill_buffer()
468	// after ScheduleAndBundle().
469
470	// ------------------
471	// Compute size for code buffer
472	code_size = blk_starts[nblocks];
473
474	// Relocation records
475	reloc_size += `1`; // Relo entry for exception handler
476
477	// Adjust reloc_size to number of record of relocation info
478	// Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
479	// a relocation index.
480	// The CodeBuffer will expand the locs array if this estimate is too low.
481	reloc_size = `10` / sizeof*(relocInfo);
482	}
483
484	//------------------------------FillLocArray-----------------------------------
485	// Create a bit of debug info and append it to the array. The mapping is from
486	// Java local or expression stack to constant, register or stack-slot. For
487	// doubles, insert 2 mappings and return 1 (to tell the caller that the next
488	// entry has been taken care of and caller should skip it).
489	static LocationValue new_loc_value( PhaseRegAlloc ra, OptoReg::Name regnum, Location::Type l_type ) {
490	// This should never have accepted Bad before
491	assert(OptoReg::is_valid(regnum), "location must be valid");
492	return (OptoReg::is_reg(regnum))
493	? new LocationValue (Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
494	: new LocationValue (Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
495	}
496
497
498	ObjectValue*
499	Compile::sv_for_node_id(GrowableArray<ScopeValue> objs, int id) {
500	for (int i = `0`; i < objs->length(); i++) {
501	assert(objs->at(i)->is_object(), "corrupt object cache");
502	ObjectValue* sv = (ObjectValue*) objs->at(i);
503	if (sv->id() == id) {
504	return sv;
505	}
506	}
507	// Otherwise..
508	return NULL;
509	}
510
511	void Compile::set_sv_for_object_node(GrowableArray<ScopeValue> objs,
512	ObjectValue* sv ) {
513	assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition");
514	objs->append(sv);
515	}
516
517
518	void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
519	GrowableArray<ScopeValue> array,
520	GrowableArray<ScopeValue> objs ) {
521	assert( local, "use _top instead of null" );
522	if (array->length() != idx) {
523	assert(array->length() == idx + `1`, "Unexpected array count");
524	// Old functionality:
525	// return
526	// New functionality:
527	// Assert if the local is not top. In product mode let the new node
528	// override the old entry.
529	assert(local == top(), "LocArray collision");
530	if (local == top()) {
531	return;
532	}
533	array->pop();
534	}
535	const Type *t = local->bottom_type();
536
537	// Is it a safepoint scalar object node?
538	if (local->is_SafePointScalarObject()) {
539	SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject();
540
541	ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx);
542	if (sv == NULL) {
543	ciKlass* cik = t->is_oopptr()->klass();
544	assert(cik->is_instance_klass() \|\|
545	cik->is_array_klass(), "Not supported allocation.");
546	sv = new ObjectValue (spobj->_idx,
547	new ConstantOopWriteValue (cik->java_mirror()->constant_encoding()));
548	Compile::set_sv_for_object_node(objs, sv);
549
550	uint first_ind = spobj->first_index(sfpt->jvms());
551	for (uint i = `0`; i < spobj->n_fields(); i++) {
552	Node* fld_node = sfpt->in(first_ind+i);
553	(void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs);
554	}
555	}
556	array->append(sv);
557	return;
558	}
559
560	// Grab the register number for the local
561	OptoReg::Name regnum = _regalloc->get_reg_first(local);
562	if( OptoReg::is_valid(regnum) ) {// Got a register/stack?
563	// Record the double as two float registers.
564	// The register mask for such a value always specifies two adjacent
565	// float registers, with the lower register number even.
566	// Normally, the allocation of high and low words to these registers
567	// is irrelevant, because nearly all operations on register pairs
568	// (e.g., StoreD) treat them as a single unit.
569	// Here, we assume in addition that the words in these two registers
570	// stored "naturally" (by operations like StoreD and double stores
571	// within the interpreter) such that the lower-numbered register
572	// is written to the lower memory address. This may seem like
573	// a machine dependency, but it is not--it is a requirement on
574	// the author of the <arch>.ad file to ensure that, for every
575	// even/odd double-register pair to which a double may be allocated,
576	// the word in the even single-register is stored to the first
577	// memory word. (Note that register numbers are completely
578	// arbitrary, and are not tied to any machine-level encodings.)
579	#ifdef _LP64
580	if( t->base() == Type::DoubleBot \|\| t->base() == Type::DoubleCon ) {
581	array->append(new ConstantIntValue ((jint)`0`));
582	array->append(new_loc_value( _regalloc, regnum, Location::dbl ));
583	} else if ( t->base() == Type::Long ) {
584	array->append(new ConstantIntValue ((jint)`0`));
585	array->append(new_loc_value( _regalloc, regnum, Location::lng ));
586	} else if ( t->base() == Type::RawPtr ) {
587	// jsr/ret return address which must be restored into a the full
588	// width 64-bit stack slot.
589	array->append(new_loc_value( _regalloc, regnum, Location::lng ));
590	}
591	#else //_LP64
592	#ifdef SPARC
593	if (t->base() == Type::Long && OptoReg::is_reg(regnum)) {
594	// For SPARC we have to swap high and low words for
595	// long values stored in a single-register (g0-g7).
596	array->append(new_loc_value( _regalloc, regnum , Location::normal ));
597	array->append(new_loc_value( _regalloc, OptoReg::add(regnum,`1`), Location::normal ));
598	} else
599	#endif //SPARC
600	if( t->base() == Type::DoubleBot \|\| t->base() == Type::DoubleCon \|\| t->base() == Type::Long ) {
601	// Repack the double/long as two jints.
602	// The convention the interpreter uses is that the second local
603	// holds the first raw word of the native double representation.
604	// This is actually reasonable, since locals and stack arrays
605	// grow downwards in all implementations.
606	// (If, on some machine, the interpreter's Java locals or stack
607	// were to grow upwards, the embedded doubles would be word-swapped.)
608	array->append(new_loc_value( _regalloc, OptoReg::add(regnum,`1`), Location::normal ));
609	array->append(new_loc_value( _regalloc, regnum , Location::normal ));
610	}
611	#endif //_LP64
612	else if( (t->base() == Type::FloatBot \|\| t->base() == Type::FloatCon) &&
613	OptoReg::is_reg(regnum) ) {
614	array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
615	? Location::float_in_dbl : Location::normal ));
616	} else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
617	array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
618	? Location::int_in_long : Location::normal ));
619	} else if( t->base() == Type::NarrowOop ) {
620	array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
621	} else {
622	array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
623	}
624	return;
625	}
626
627	// No register. It must be constant data.
628	switch (t->base()) {
629	case Type::Half: // Second half of a double
630	ShouldNotReachHere(); // Caller should skip 2nd halves
631	break;
632	case Type::AnyPtr:
633	array->append(new ConstantOopWriteValue (NULL));
634	break;
635	case Type::AryPtr:
636	case Type::InstPtr: // fall through
637	array->append(new ConstantOopWriteValue (t->isa_oopptr()->const_oop()->constant_encoding()));
638	break;
639	case Type::NarrowOop:
640	if (t == TypeNarrowOop::NULL_PTR) {
641	array->append(new ConstantOopWriteValue (NULL));
642	} else {
643	array->append(new ConstantOopWriteValue (t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
644	}
645	break;
646	case Type::Int:
647	array->append(new ConstantIntValue (t->is_int()->get_con()));
648	break;
649	case Type::RawPtr:
650	// A return address (T_ADDRESS).
651	assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)`0x10000`, "must be a valid BCI");
652	#ifdef _LP64
653	// Must be restored to the full-width 64-bit stack slot.
654	array->append(new ConstantLongValue (t->is_ptr()->get_con()));
655	#else
656	array->append(new ConstantIntValue(t->is_ptr()->get_con()));
657	#endif
658	break;
659	case Type::FloatCon: {
660	float f = t->is_float_constant()->getf();
661	array->append(new ConstantIntValue (jint_cast(f)));
662	break;
663	}
664	case Type::DoubleCon: {
665	jdouble d = t->is_double_constant()->getd();
666	#ifdef _LP64
667	array->append(new ConstantIntValue ((jint)`0`));
668	array->append(new ConstantDoubleValue (d));
669	#else
670	// Repack the double as two jints.
671	// The convention the interpreter uses is that the second local
672	// holds the first raw word of the native double representation.
673	// This is actually reasonable, since locals and stack arrays
674	// grow downwards in all implementations.
675	// (If, on some machine, the interpreter's Java locals or stack
676	// were to grow upwards, the embedded doubles would be word-swapped.)
677	jlong_accessor acc;
678	acc.long_value = jlong_cast(d);
679	array->append(new ConstantIntValue(acc.words[`1`]));
680	array->append(new ConstantIntValue(acc.words[`0`]));
681	#endif
682	break;
683	}
684	case Type::Long: {
685	jlong d = t->is_long()->get_con();
686	#ifdef _LP64
687	array->append(new ConstantIntValue ((jint)`0`));
688	array->append(new ConstantLongValue (d));
689	#else
690	// Repack the long as two jints.
691	// The convention the interpreter uses is that the second local
692	// holds the first raw word of the native double representation.
693	// This is actually reasonable, since locals and stack arrays
694	// grow downwards in all implementations.
695	// (If, on some machine, the interpreter's Java locals or stack
696	// were to grow upwards, the embedded doubles would be word-swapped.)
697	jlong_accessor acc;
698	acc.long_value = d;
699	array->append(new ConstantIntValue(acc.words[`1`]));
700	array->append(new ConstantIntValue(acc.words[`0`]));
701	#endif
702	break;
703	}
704	case Type::Top: // Add an illegal value here
705	array->append(new LocationValue (Location ()));
706	break;
707	default:
708	ShouldNotReachHere();
709	break;
710	}
711	}
712
713	// Determine if this node starts a bundle
714	bool Compile::starts_bundle(const Node n) const* {
715	return (_node_bundling_limit > n->_idx &&
716	_node_bundling_base[n->_idx].starts_bundle());
717	}
718
719	//--------------------------Process_OopMap_Node--------------------------------
720	void Compile::Process_OopMap_Node(MachNode mach, int* current_offset) {
721
722	// Handle special safepoint nodes for synchronization
723	MachSafePointNode *sfn = mach->as_MachSafePoint();
724	MachCallNode *mcall;
725
726	int safepoint_pc_offset = current_offset;
727	bool is_method_handle_invoke = false;
728	bool return_oop = false;
729
730	// Add the safepoint in the DebugInfoRecorder
731	if( !mach->is_MachCall() ) {
732	mcall = NULL;
733	debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
734	} else {
735	mcall = mach->as_MachCall();
736
737	// Is the call a MethodHandle call?
738	if (mcall->is_MachCallJava()) {
739	if (mcall->as_MachCallJava()->_method_handle_invoke) {
740	assert(has_method_handle_invokes(), "must have been set during call generation");
741	is_method_handle_invoke = true;
742	}
743	}
744
745	// Check if a call returns an object.
746	if (mcall->returns_pointer()) {
747	return_oop = true;
748	}
749	safepoint_pc_offset += mcall->ret_addr_offset();
750	debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map);
751	}
752
753	// Loop over the JVMState list to add scope information
754	// Do not skip safepoints with a NULL method, they need monitor info
755	JVMState* youngest_jvms = sfn->jvms();
756	int max_depth = youngest_jvms->depth();
757
758	// Allocate the object pool for scalar-replaced objects -- the map from
759	// small-integer keys (which can be recorded in the local and ostack
760	// arrays) to descriptions of the object state.
761	GrowableArray<ScopeValue> objs = new GrowableArray<ScopeValue*>();
762
763	// Visit scopes from oldest to youngest.
764	for (int depth = `1`; depth <= max_depth; depth++) {
765	JVMState* jvms = youngest_jvms->of_depth(depth);
766	int idx;
767	ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
768	// Safepoints that do not have method() set only provide oop-map and monitor info
769	// to support GC; these do not support deoptimization.
770	int num_locs = (method == NULL) ? `0` : jvms->loc_size();
771	int num_exps = (method == NULL) ? `0` : jvms->stk_size();
772	int num_mon = jvms->nof_monitors();
773	assert(method == NULL \|\| jvms->bci() < `0` \|\| num_locs == method->max_locals(),
774	"JVMS local count must match that of the method");
775
776	// Add Local and Expression Stack Information
777
778	// Insert locals into the locarray
779	GrowableArray<ScopeValue> locarray = new GrowableArray<ScopeValue*>(num_locs);
780	for( idx = `0`; idx < num_locs; idx++ ) {
781	FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs );
782	}
783
784	// Insert expression stack entries into the exparray
785	GrowableArray<ScopeValue> exparray = new GrowableArray<ScopeValue*>(num_exps);
786	for( idx = `0`; idx < num_exps; idx++ ) {
787	FillLocArray( idx, sfn, sfn->stack(jvms, idx), exparray, objs );
788	}
789
790	// Add in mappings of the monitors
791	assert( !method \|\|
792	!method->is_synchronized() \|\|
793	method->is_native() \|\|
794	num_mon > `0` \|\|
795	!GenerateSynchronizationCode,
796	"monitors must always exist for synchronized methods");
797
798	// Build the growable array of ScopeValues for exp stack
799	GrowableArray<MonitorValue> monarray = new GrowableArray<MonitorValue*>(num_mon);
800
801	// Loop over monitors and insert into array
802	for (idx = `0`; idx < num_mon; idx++) {
803	// Grab the node that defines this monitor
804	Node* box_node = sfn->monitor_box(jvms, idx);
805	Node* obj_node = sfn->monitor_obj(jvms, idx);
806
807	// Create ScopeValue for object
808	ScopeValue *scval = NULL;
809
810	if (obj_node->is_SafePointScalarObject()) {
811	SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject();
812	scval = Compile::sv_for_node_id(objs, spobj->_idx);
813	if (scval == NULL) {
814	const Type *t = spobj->bottom_type();
815	ciKlass* cik = t->is_oopptr()->klass();
816	assert(cik->is_instance_klass() \|\|
817	cik->is_array_klass(), "Not supported allocation.");
818	ObjectValue* sv = new ObjectValue (spobj->_idx,
819	new ConstantOopWriteValue (cik->java_mirror()->constant_encoding()));
820	Compile::set_sv_for_object_node(objs, sv);
821
822	uint first_ind = spobj->first_index(youngest_jvms);
823	for (uint i = `0`; i < spobj->n_fields(); i++) {
824	Node* fld_node = sfn->in(first_ind+i);
825	(void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs);
826	}
827	scval = sv;
828	}
829	} else if (!obj_node->is_Con()) {
830	OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node);
831	if( obj_node->bottom_type()->base() == Type::NarrowOop ) {
832	scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop );
833	} else {
834	scval = new_loc_value( _regalloc, obj_reg, Location::oop );
835	}
836	} else {
837	const TypePtr *tp = obj_node->get_ptr_type();
838	scval = new ConstantOopWriteValue (tp->is_oopptr()->const_oop()->constant_encoding());
839	}
840
841	OptoReg::Name box_reg = BoxLockNode::reg(box_node);
842	Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
843	bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated());
844	monarray->append(new MonitorValue (scval, basic_lock, eliminated));
845	}
846
847	// We dump the object pool first, since deoptimization reads it in first.
848	debug_info()->dump_object_pool(objs);
849
850	// Build first class objects to pass to scope
851	DebugToken *locvals = debug_info()->create_scope_values(locarray);
852	DebugToken *expvals = debug_info()->create_scope_values(exparray);
853	DebugToken *monvals = debug_info()->create_monitor_values(monarray);
854
855	// Make method available for all Safepoints
856	ciMethod* scope_method = method ? method : _method;
857	// Describe the scope here
858	assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= `0x10000`, "must be a valid or entry BCI");
859	assert(!jvms->should_reexecute() \|\| depth == max_depth, "reexecute allowed only for the youngest");
860	// Now we can describe the scope.
861	methodHandle null_mh;
862	bool rethrow_exception = false;
863	debug_info()->describe_scope(safepoint_pc_offset, null_mh, scope_method, jvms->bci(), jvms->should_reexecute(), rethrow_exception, is_method_handle_invoke, return_oop, locvals, expvals, monvals);
864	} // End jvms loop
865
866	// Mark the end of the scope set.
867	debug_info()->end_safepoint(safepoint_pc_offset);
868	}
869
870
871
872	// A simplified version of Process_OopMap_Node, to handle non-safepoints.
873	class NonSafepointEmitter {
874	Compile* C;
875	JVMState* _pending_jvms;
876	int _pending_offset;
877
878	void emit_non_safepoint();
879
880	public:
881	NonSafepointEmitter(Compile* compile) {
882	this->C = compile;
883	_pending_jvms = NULL;
884	_pending_offset = `0`;
885	}
886
887	void observe_instruction(Node* n, int pc_offset) {
888	if (!C->debug_info()->recording_non_safepoints()) return;
889
890	Node_Notes* nn = C->node_notes_at(n->_idx);
891	if (nn == NULL \|\| nn->jvms() == NULL) return;
892	if (_pending_jvms != NULL &&
893	_pending_jvms->same_calls_as(nn->jvms())) {
894	// Repeated JVMS? Stretch it up here.
895	_pending_offset = pc_offset;
896	} else {
897	if (_pending_jvms != NULL &&
898	_pending_offset < pc_offset) {
899	emit_non_safepoint();
900	}
901	_pending_jvms = NULL;
902	if (pc_offset > C->debug_info()->last_pc_offset()) {
903	// This is the only way _pending_jvms can become non-NULL:
904	_pending_jvms = nn->jvms();
905	_pending_offset = pc_offset;
906	}
907	}
908	}
909
910	// Stay out of the way of real safepoints:
911	void observe_safepoint(JVMState* jvms, int pc_offset) {
912	if (_pending_jvms != NULL &&
913	!_pending_jvms->same_calls_as(jvms) &&
914	_pending_offset < pc_offset) {
915	emit_non_safepoint();
916	}
917	_pending_jvms = NULL;
918	}
919
920	void flush_at_end() {
921	if (_pending_jvms != NULL) {
922	emit_non_safepoint();
923	}
924	_pending_jvms = NULL;
925	}
926	};
927
928	void NonSafepointEmitter::emit_non_safepoint() {
929	JVMState* youngest_jvms = _pending_jvms;
930	int pc_offset = _pending_offset;
931
932	// Clear it now:
933	_pending_jvms = NULL;
934
935	DebugInformationRecorder* debug_info = C->debug_info();
936	assert(debug_info->recording_non_safepoints(), "sanity");
937
938	debug_info->add_non_safepoint(pc_offset);
939	int max_depth = youngest_jvms->depth();
940
941	// Visit scopes from oldest to youngest.
942	for (int depth = `1`; depth <= max_depth; depth++) {
943	JVMState* jvms = youngest_jvms->of_depth(depth);
944	ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
945	assert(!jvms->should_reexecute() \|\| depth==max_depth, "reexecute allowed only for the youngest");
946	methodHandle null_mh;
947	debug_info->describe_scope(pc_offset, null_mh, method, jvms->bci(), jvms->should_reexecute());
948	}
949
950	// Mark the end of the scope set.
951	debug_info->end_non_safepoint(pc_offset);
952	}
953
954	//------------------------------init_buffer------------------------------------
955	CodeBuffer* Compile::init_buffer(uint* blk_starts) {
956
957	// Set the initially allocated size
958	int code_req = initial_code_capacity;
959	int locs_req = initial_locs_capacity;
960	int stub_req = initial_stub_capacity;
961	int const_req = initial_const_capacity;
962
963	int pad_req = NativeCall::instruction_size;
964	// The extra spacing after the code is necessary on some platforms.
965	// Sometimes we need to patch in a jump after the last instruction,
966	// if the nmethod has been deoptimized. (See 4932387, 4894843.)
967
968	// Compute the byte offset where we can store the deopt pc.
969	if (fixed_slots() != `0`) {
970	_orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
971	}
972
973	// Compute prolog code size
974	_method_size = `0`;
975	_frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
976	#if defined(IA64) && !defined(AIX)
977	if (save_argument_registers()) {
978	// 4815101: this is a stub with implicit and unknown precision fp args.
979	// The usual spill mechanism can only generate stfd's in this case, which
980	// doesn't work if the fp reg to spill contains a single-precision denorm.
981	// Instead, we hack around the normal spill mechanism using stfspill's and
982	// ldffill's in the MachProlog and MachEpilog emit methods. We allocate
983	// space here for the fp arg regs (f8-f15) we're going to thusly spill.
984	//
985	// If we ever implement 16-byte 'registers' == stack slots, we can
986	// get rid of this hack and have SpillCopy generate stfspill/ldffill
987	// instead of stfd/stfs/ldfd/ldfs.
988	_frame_slots += `8`*(`16`/BytesPerInt);
989	}
990	#endif
991	assert(_frame_slots >= `0` && _frame_slots < `1000000`, "sanity check");
992
993	if (has_mach_constant_base_node()) {
994	uint add_size = `0`;
995	// Fill the constant table.
996	// Note: This must happen before shorten_branches.
997	for (uint i = `0`; i < _cfg->number_of_blocks(); i++) {
998	Block* b = _cfg->get_block(i);
999
1000	for (uint j = `0`; j < b->number_of_nodes(); j++) {
1001	Node* n = b->get_node(j);
1002
1003	// If the node is a MachConstantNode evaluate the constant
1004	// value section.
1005	if (n->is_MachConstant()) {
1006	MachConstantNode* machcon = n->as_MachConstant();
1007	machcon->eval_constant(C);
1008	} else if (n->is_Mach()) {
1009	// On Power there are more nodes that issue constants.
1010	add_size += (n->as_Mach()->ins_num_consts() * `8`);
1011	}
1012	}
1013	}
1014
1015	// Calculate the offsets of the constants and the size of the
1016	// constant table (including the padding to the next section).
1017	constant_table().calculate_offsets_and_size();
1018	const_req = constant_table().size() + add_size;
1019	}
1020
1021	// Initialize the space for the BufferBlob used to find and verify
1022	// instruction size in MachNode::emit_size()
1023	init_scratch_buffer_blob(const_req);
1024	if (failing()) return NULL; // Out of memory
1025
1026	// Pre-compute the length of blocks and replace
1027	// long branches with short if machine supports it.
1028	shorten_branches(blk_starts, code_req, locs_req, stub_req);
1029
1030	// nmethod and CodeBuffer count stubs & constants as part of method's code.
1031	// class HandlerImpl is platform-specific and defined in the .ad files.*
1032	int exception_handler_req = HandlerImpl::size_exception_handler() + MAX_stubs_size; // add marginal slop for handler
1033	int deopt_handler_req = HandlerImpl::size_deopt_handler() + MAX_stubs_size; // add marginal slop for handler
1034	stub_req += MAX_stubs_size; // ensure per-stub margin
1035	code_req += MAX_inst_size; // ensure per-instruction margin
1036
1037	if (StressCodeBuffers)
1038	code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = `0x10`; // force expansion
1039
1040	int total_req =
1041	const_req +
1042	code_req +
1043	pad_req +
1044	stub_req +
1045	exception_handler_req +
1046	deopt_handler_req; // deopt handler
1047
1048	if (has_method_handle_invokes())
1049	total_req += deopt_handler_req; // deopt MH handler
1050
1051	CodeBuffer* cb = code_buffer();
1052	cb->initialize(total_req, locs_req);
1053
1054	// Have we run out of code space?
1055	if ((cb->blob() == NULL) \|\| (!CompileBroker::should_compile_new_jobs())) {
1056	C->record_failure("CodeCache is full");
1057	return NULL;
1058	}
1059	// Configure the code buffer.
1060	cb->initialize_consts_size(const_req);
1061	cb->initialize_stubs_size(stub_req);
1062	cb->initialize_oop_recorder(env()->oop_recorder());
1063
1064	// fill in the nop array for bundling computations
1065	MachNode *_nop_list[Bundle::_nop_count];
1066	Bundle::initialize_nops(_nop_list);
1067
1068	return cb;
1069	}
1070
1071	//------------------------------fill_buffer------------------------------------
1072	void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
1073	// blk_starts[] contains offsets calculated during short branches processing,
1074	// offsets should not be increased during following steps.
1075
1076	// Compute the size of first NumberOfLoopInstrToAlign instructions at head
1077	// of a loop. It is used to determine the padding for loop alignment.
1078	compute_loop_first_inst_sizes();
1079
1080	// Create oopmap set.
1081	_oop_map_set = new OopMapSet ();
1082
1083	// !!!!! This preserves old handling of oopmaps for now
1084	debug_info()->set_oopmaps(_oop_map_set);
1085
1086	uint nblocks = _cfg->number_of_blocks();
1087	// Count and start of implicit null check instructions
1088	uint inct_cnt = `0`;
1089	uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+`1`);
1090
1091	// Count and start of calls
1092	uint *call_returns = NEW_RESOURCE_ARRAY(uint, nblocks+`1`);
1093
1094	uint return_offset = `0`;
1095	int nop_size = (new MachNopNode ())->size(_regalloc);
1096
1097	int previous_offset = `0`;
1098	int current_offset = `0`;
1099	int last_call_offset = -`1`;
1100	int last_avoid_back_to_back_offset = -`1`;
1101	#ifdef ASSERT
1102	uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks);
1103	uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
1104	uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
1105	uint* jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks);
1106	#endif
1107
1108	// Create an array of unused labels, one for each basic block, if printing is enabled
1109	#if defined(SUPPORT_OPTO_ASSEMBLY)
1110	int *node_offsets = NULL;
1111	uint node_offset_limit = unique();
1112
1113	if (print_assembly()) {
1114	node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit);
1115	}
1116	if (node_offsets != NULL) {
1117	// We need to initialize. Unused array elements may contain garbage and mess up PrintOptoAssembly.
1118	memset(node_offsets, `0`, node_offset_limit*sizeof(int));
1119	}
1120	#endif
1121
1122	NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily
1123
1124	// Emit the constant table.
1125	if (has_mach_constant_base_node()) {
1126	constant_table().emit(*cb);
1127	}
1128
1129	// Create an array of labels, one for each basic block
1130	Label *blk_labels = NEW_RESOURCE_ARRAY(Label, nblocks+`1`);
1131	for (uint i=`0`; i <= nblocks; i++) {
1132	blk_labels[i].init();
1133	}
1134
1135	// ------------------
1136	// Now fill in the code buffer
1137	Node *delay_slot = NULL;
1138
1139	for (uint i = `0`; i < nblocks; i++) {
1140	Block* block = _cfg->get_block(i);
1141	Node* head = block->head();
1142
1143	// If this block needs to start aligned (i.e, can be reached other
1144	// than by falling-thru from the previous block), then force the
1145	// start of a new bundle.
1146	if (Pipeline::requires_bundling() && starts_bundle(head)) {
1147	cb->flush_bundle(true);
1148	}
1149
1150	#ifdef ASSERT
1151	if (!block->is_connector()) {
1152	stringStream st;
1153	block->dump_head(_cfg, &st);
1154	MacroAssembler(cb).block_comment(st.as_string());
1155	}
1156	jmp_target[i] = `0`;
1157	jmp_offset[i] = `0`;
1158	jmp_size[i] = `0`;
1159	jmp_rule[i] = `0`;
1160	#endif
1161	int blk_offset = current_offset;
1162
1163	// Define the label at the beginning of the basic block
1164	MacroAssembler (cb).bind(blk_labels[block->_pre_order]);
1165
1166	uint last_inst = block->number_of_nodes();
1167
1168	// Emit block normally, except for last instruction.
1169	// Emit means "dump code bits into code buffer".
1170	for (uint j = `0`; j<last_inst; j++) {
1171
1172	// Get the node
1173	Node* n = block->get_node(j);
1174
1175	// See if delay slots are supported
1176	if (valid_bundle_info(n) &&
1177	node_bundling(n)->used_in_unconditional_delay()) {
1178	assert(delay_slot == NULL, "no use of delay slot node");
1179	assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
1180
1181	delay_slot = n;
1182	continue;
1183	}
1184
1185	// If this starts a new instruction group, then flush the current one
1186	// (but allow split bundles)
1187	if (Pipeline::requires_bundling() && starts_bundle(n))
1188	cb->flush_bundle(false);
1189
1190	// Special handling for SafePoint/Call Nodes
1191	bool is_mcall = false;
1192	if (n->is_Mach()) {
1193	MachNode *mach = n->as_Mach();
1194	is_mcall = n->is_MachCall();
1195	bool is_sfn = n->is_MachSafePoint();
1196
1197	// If this requires all previous instructions be flushed, then do so
1198	if (is_sfn \|\| is_mcall \|\| mach->alignment_required() != `1`) {
1199	cb->flush_bundle(true);
1200	current_offset = cb->insts_size();
1201	}
1202
1203	// A padding may be needed again since a previous instruction
1204	// could be moved to delay slot.
1205
1206	// align the instruction if necessary
1207	int padding = mach->compute_padding(current_offset);
1208	// Make sure safepoint node for polling is distinct from a call's
1209	// return by adding a nop if needed.
1210	if (is_sfn && !is_mcall && padding == `0` && current_offset == last_call_offset) {
1211	padding = nop_size;
1212	}
1213	if (padding == `0` && mach->avoid_back_to_back(MachNode::AVOID_BEFORE) &&
1214	current_offset == last_avoid_back_to_back_offset) {
1215	// Avoid back to back some instructions.
1216	padding = nop_size;
1217	}
1218
1219	if (padding > `0`) {
1220	assert((padding % nop_size) == `0`, "padding is not a multiple of NOP size");
1221	int nops_cnt = padding / nop_size;
1222	MachNode nop = new* MachNopNode (nops_cnt);
1223	block->insert_node(nop, j++);
1224	last_inst++;
1225	_cfg->map_node_to_block(nop, block);
1226	// Ensure enough space.
1227	cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
1228	if ((cb->blob() == NULL) \|\| (!CompileBroker::should_compile_new_jobs())) {
1229	C->record_failure("CodeCache is full");
1230	return;
1231	}
1232	nop->emit(*cb, _regalloc);
1233	cb->flush_bundle(true);
1234	current_offset = cb->insts_size();
1235	}
1236
1237	// Remember the start of the last call in a basic block
1238	if (is_mcall) {
1239	MachCallNode *mcall = mach->as_MachCall();
1240
1241	// This destination address is NOT PC-relative
1242	mcall->method_set((intptr_t)mcall->entry_point());
1243
1244	// Save the return address
1245	call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset();
1246
1247	if (mcall->is_MachCallLeaf()) {
1248	is_mcall = false;
1249	is_sfn = false;
1250	}
1251	}
1252
1253	// sfn will be valid whenever mcall is valid now because of inheritance
1254	if (is_sfn \|\| is_mcall) {
1255
1256	// Handle special safepoint nodes for synchronization
1257	if (!is_mcall) {
1258	MachSafePointNode *sfn = mach->as_MachSafePoint();
1259	// !!!!! Stubs only need an oopmap right now, so bail out
1260	if (sfn->jvms()->method() == NULL) {
1261	// Write the oopmap directly to the code blob??!!
1262	continue;
1263	}
1264	} // End synchronization
1265
1266	non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
1267	current_offset);
1268	Process_OopMap_Node(mach, current_offset);
1269	} // End if safepoint
1270
1271	// If this is a null check, then add the start of the previous instruction to the list
1272	else if( mach->is_MachNullCheck() ) {
1273	inct_starts[inct_cnt++] = previous_offset;
1274	}
1275
1276	// If this is a branch, then fill in the label with the target BB's label
1277	else if (mach->is_MachBranch()) {
1278	// This requires the TRUE branch target be in succs[0]
1279	uint block_num = block->non_connector_successor(`0`)->_pre_order;
1280
1281	// Try to replace long branch if delay slot is not used,
1282	// it is mostly for back branches since forward branch's
1283	// distance is not updated yet.
1284	bool delay_slot_is_used = valid_bundle_info(n) &&
1285	node_bundling(n)->use_unconditional_delay();
1286	if (!delay_slot_is_used && mach->may_be_short_branch()) {
1287	assert(delay_slot == NULL, "not expecting delay slot node");
1288	int br_size = n->size(_regalloc);
1289	int offset = blk_starts[block_num] - current_offset;
1290	if (block_num >= i) {
1291	// Current and following block's offset are not
1292	// finalized yet, adjust distance by the difference
1293	// between calculated and final offsets of current block.
1294	offset -= (blk_starts[i] - blk_offset);
1295	}
1296	// In the following code a nop could be inserted before
1297	// the branch which will increase the backward distance.
1298	bool needs_padding = (current_offset == last_avoid_back_to_back_offset);
1299	if (needs_padding && offset <= `0`)
1300	offset -= nop_size;
1301
1302	if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
1303	// We've got a winner. Replace this branch.
1304	MachNode* replacement = mach->as_MachBranch()->short_branch_version();
1305
1306	// Update the jmp_size.
1307	int new_size = replacement->size(_regalloc);
1308	assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
1309	// Insert padding between avoid_back_to_back branches.
1310	if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
1311	MachNode nop = new* MachNopNode ();
1312	block->insert_node(nop, j++);
1313	_cfg->map_node_to_block(nop, block);
1314	last_inst++;
1315	nop->emit(*cb, _regalloc);
1316	cb->flush_bundle(true);
1317	current_offset = cb->insts_size();
1318	}
1319	#ifdef ASSERT
1320	jmp_target[i] = block_num;
1321	jmp_offset[i] = current_offset - blk_offset;
1322	jmp_size[i] = new_size;
1323	jmp_rule[i] = mach->rule();
1324	#endif
1325	block->map_node(replacement, j);
1326	mach->subsume_by(replacement, C);
1327	n = replacement;
1328	mach = replacement;
1329	}
1330	}
1331	mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
1332	} else if (mach->ideal_Opcode() == Op_Jump) {
1333	for (uint h = `0`; h < block->_num_succs; h++) {
1334	Block* succs_block = block->_succs [h];
1335	for (uint j = `1`; j < succs_block->num_preds(); j++) {
1336	Node* jpn = succs_block->pred(j);
1337	if (jpn->is_JumpProj() && jpn->in(`0`) == mach) {
1338	uint block_num = succs_block->non_connector()->_pre_order;
1339	Label *blkLabel = &blk_labels[block_num];
1340	mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
1341	}
1342	}
1343	}
1344	}
1345	#ifdef ASSERT
1346	// Check that oop-store precedes the card-mark
1347	else if (mach->ideal_Opcode() == Op_StoreCM) {
1348	uint storeCM_idx = j;
1349	int count = `0`;
1350	for (uint prec = mach->req(); prec < mach->len(); prec++) {
1351	Node oop_store = mach->in(prec); // Precedence edge*
1352	if (oop_store == NULL) continue;
1353	count++;
1354	uint i4;
1355	for (i4 = `0`; i4 < last_inst; ++i4) {
1356	if (block->get_node(i4) == oop_store) {
1357	break;
1358	}
1359	}
1360	// Note: This test can provide a false failure if other precedence
1361	// edges have been added to the storeCMNode.
1362	assert(i4 == last_inst \|\| i4 < storeCM_idx, "CM card-mark executes before oop-store");
1363	}
1364	assert(count > `0`, "storeCM expects at least one precedence edge");
1365	}
1366	#endif
1367	else if (!n->is_Proj()) {
1368	// Remember the beginning of the previous instruction, in case
1369	// it's followed by a flag-kill and a null-check. Happens on
1370	// Intel all the time, with add-to-memory kind of opcodes.
1371	previous_offset = current_offset;
1372	}
1373
1374	// Not an else-if!
1375	// If this is a trap based cmp then add its offset to the list.
1376	if (mach->is_TrapBasedCheckNode()) {
1377	inct_starts[inct_cnt++] = current_offset;
1378	}
1379	}
1380
1381	// Verify that there is sufficient space remaining
1382	cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
1383	if ((cb->blob() == NULL) \|\| (!CompileBroker::should_compile_new_jobs())) {
1384	C->record_failure("CodeCache is full");
1385	return;
1386	}
1387
1388	// Save the offset for the listing
1389	#if defined(SUPPORT_OPTO_ASSEMBLY)
1390	if ((node_offsets != NULL) && (n->_idx < node_offset_limit)) {
1391	node_offsets[n->_idx] = cb->insts_size();
1392	}
1393	#endif
1394
1395	// "Normal" instruction case
1396	DEBUG_ONLY( uint instr_offset = cb->insts_size(); )
1397	n->emit(*cb, _regalloc);
1398	current_offset = cb->insts_size();
1399
1400	// Above we only verified that there is enough space in the instruction section.
1401	// However, the instruction may emit stubs that cause code buffer expansion.
1402	// Bail out here if expansion failed due to a lack of code cache space.
1403	if (failing()) {
1404	return;
1405	}
1406
1407	#ifdef ASSERT
1408	if (n->size(_regalloc) < (current_offset-instr_offset)) {
1409	n->dump();
1410	assert(false, "wrong size of mach node");
1411	}
1412	#endif
1413	non_safepoints.observe_instruction(n, current_offset);
1414
1415	// mcall is last "call" that can be a safepoint
1416	// record it so we can see if a poll will directly follow it
1417	// in which case we'll need a pad to make the PcDesc sites unique
1418	// see 5010568. This can be slightly inaccurate but conservative
1419	// in the case that return address is not actually at current_offset.
1420	// This is a small price to pay.
1421
1422	if (is_mcall) {
1423	last_call_offset = current_offset;
1424	}
1425
1426	if (n->is_Mach() && n->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) {
1427	// Avoid back to back some instructions.
1428	last_avoid_back_to_back_offset = current_offset;
1429	}
1430
1431	// See if this instruction has a delay slot
1432	if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
1433	guarantee(delay_slot != NULL, "expecting delay slot node");
1434
1435	// Back up 1 instruction
1436	cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size());
1437
1438	// Save the offset for the listing
1439	#if defined(SUPPORT_OPTO_ASSEMBLY)
1440	if ((node_offsets != NULL) && (delay_slot->_idx < node_offset_limit)) {
1441	node_offsets[delay_slot->_idx] = cb->insts_size();
1442	}
1443	#endif
1444
1445	// Support a SafePoint in the delay slot
1446	if (delay_slot->is_MachSafePoint()) {
1447	MachNode *mach = delay_slot->as_Mach();
1448	// !!!!! Stubs only need an oopmap right now, so bail out
1449	if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) {
1450	// Write the oopmap directly to the code blob??!!
1451	delay_slot = NULL;
1452	continue;
1453	}
1454
1455	int adjusted_offset = current_offset - Pipeline::instr_unit_size();
1456	non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
1457	adjusted_offset);
1458	// Generate an OopMap entry
1459	Process_OopMap_Node(mach, adjusted_offset);
1460	}
1461
1462	// Insert the delay slot instruction
1463	delay_slot->emit(*cb, _regalloc);
1464
1465	// Don't reuse it
1466	delay_slot = NULL;
1467	}
1468
1469	} // End for all instructions in block
1470
1471	// If the next block is the top of a loop, pad this block out to align
1472	// the loop top a little. Helps prevent pipe stalls at loop back branches.
1473	if (i < nblocks-`1`) {
1474	Block *nb = _cfg->get_block(i + `1`);
1475	int padding = nb->alignment_padding(current_offset);
1476	if( padding > `0` ) {
1477	MachNode nop = new* MachNopNode (padding / nop_size);
1478	block->insert_node(nop, block->number_of_nodes());
1479	_cfg->map_node_to_block(nop, block);
1480	nop->emit(*cb, _regalloc);
1481	current_offset = cb->insts_size();
1482	}
1483	}
1484	// Verify that the distance for generated before forward
1485	// short branches is still valid.
1486	guarantee((int)(blk_starts[i+`1`] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size");
1487
1488	// Save new block start offset
1489	blk_starts[i] = blk_offset;
1490	} // End of for all blocks
1491	blk_starts[nblocks] = current_offset;
1492
1493	non_safepoints.flush_at_end();
1494
1495	// Offset too large?
1496	if (failing()) return;
1497
1498	// Define a pseudo-label at the end of the code
1499	MacroAssembler (cb).bind( blk_labels[nblocks] );
1500
1501	// Compute the size of the first block
1502	_first_block_size = blk_labels[`1`].loc_pos() - blk_labels[`0`].loc_pos();
1503
1504	#ifdef ASSERT
1505	for (uint i = `0`; i < nblocks; i++) { // For all blocks
1506	if (jmp_target[i] != `0`) {
1507	int br_size = jmp_size[i];
1508	int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
1509	if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
1510	tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
1511	assert(false, "Displacement too large for short jmp");
1512	}
1513	}
1514	}
1515	#endif
1516
1517	#ifndef PRODUCT
1518	// Information on the size of the method, without the extraneous code
1519	Scheduling::increment_method_size(cb->insts_size());
1520	#endif
1521
1522	// ------------------
1523	// Fill in exception table entries.
1524	FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels);
1525
1526	// Only java methods have exception handlers and deopt handlers
1527	// class HandlerImpl is platform-specific and defined in the .ad files.*
1528	if (_method) {
1529	// Emit the exception handler code.
1530	_code_offsets.set_value(CodeOffsets::Exceptions, HandlerImpl::emit_exception_handler(*cb));
1531	if (failing()) {
1532	return; // CodeBuffer::expand failed
1533	}
1534	// Emit the deopt handler code.
1535	_code_offsets.set_value(CodeOffsets::Deopt, HandlerImpl::emit_deopt_handler(*cb));
1536
1537	// Emit the MethodHandle deopt handler code (if required).
1538	if (has_method_handle_invokes() && !failing()) {
1539	// We can use the same code as for the normal deopt handler, we
1540	// just need a different entry point address.
1541	_code_offsets.set_value(CodeOffsets::DeoptMH, HandlerImpl::emit_deopt_handler(*cb));
1542	}
1543	}
1544
1545	// One last check for failed CodeBuffer::expand:
1546	if ((cb->blob() == NULL) \|\| (!CompileBroker::should_compile_new_jobs())) {
1547	C->record_failure("CodeCache is full");
1548	return;
1549	}
1550
1551	#if defined(SUPPORT_ABSTRACT_ASSEMBLY) \|\| defined(SUPPORT_ASSEMBLY) \|\| defined(SUPPORT_OPTO_ASSEMBLY)
1552	if (print_assembly()) {
1553	tty->cr();
1554	tty->print_cr("============================= C2-compiled nmethod ==============================");
1555	}
1556	#endif
1557
1558	#if defined(SUPPORT_OPTO_ASSEMBLY)
1559	// Dump the assembly code, including basic-block numbers
1560	if (print_assembly()) {
1561	ttyLocker ttyl; // keep the following output all in one block
1562	if (!VMThread::should_terminate()) { // test this under the tty lock
1563	// This output goes directly to the tty, not the compiler log.
1564	// To enable tools to match it up with the compilation activity,
1565	// be sure to tag this tty output with the compile ID.
1566	if (xtty != NULL) {
1567	xtty->head("opto_assembly compile_id='%d'%s", compile_id(),
1568	is_osr_compilation() ? " compile_kind='osr'" :
1569	"");
1570	}
1571	if (method() != NULL) {
1572	tty->print_cr("----------------------------------- MetaData -----------------------------------");
1573	method()->print_metadata();
1574	} else if (stub_name() != NULL) {
1575	tty->print_cr("----------------------------- RuntimeStub %s -------------------------------", stub_name());
1576	}
1577	tty->cr();
1578	tty->print_cr("--------------------------------- OptoAssembly ---------------------------------");
1579	dump_asm(node_offsets, node_offset_limit);
1580	tty->print_cr("--------------------------------------------------------------------------------");
1581	if (xtty != NULL) {
1582	// print_metadata and dump_asm above may safepoint which makes us loose the ttylock.
1583	// Retake lock too make sure the end tag is coherent, and that xmlStream->pop_tag is done
1584	// thread safe
1585	ttyLocker ttyl2;
1586	xtty->tail("opto_assembly");
1587	}
1588	}
1589	}
1590	#endif
1591	}
1592
1593	void Compile::FillExceptionTables(uint cnt, uint call_returns, uint inct_starts, Label *blk_labels) {
1594	_inc_table.set_size(cnt);
1595
1596	uint inct_cnt = `0`;
1597	for (uint i = `0`; i < _cfg->number_of_blocks(); i++) {
1598	Block* block = _cfg->get_block(i);
1599	Node *n = NULL;
1600	int j;
1601
1602	// Find the branch; ignore trailing NOPs.
1603	for (j = block->number_of_nodes() - `1`; j >= `0`; j--) {
1604	n = block->get_node(j);
1605	if (!n->is_Mach() \|\| n->as_Mach()->ideal_Opcode() != Op_Con) {
1606	break;
1607	}
1608	}
1609
1610	// If we didn't find anything, continue
1611	if (j < `0`) {
1612	continue;
1613	}
1614
1615	// Compute ExceptionHandlerTable subtable entry and add it
1616	// (skip empty blocks)
1617	if (n->is_Catch()) {
1618
1619	// Get the offset of the return from the call
1620	uint call_return = call_returns[block->_pre_order];
1621	#ifdef ASSERT
1622	assert( call_return > `0`, "no call seen for this basic block" );
1623	while (block->get_node(--j)->is_MachProj()) ;
1624	assert(block->get_node(j)->is_MachCall(), "CatchProj must follow call");
1625	#endif
1626	// last instruction is a CatchNode, find it's CatchProjNodes
1627	int nof_succs = block->_num_succs;
1628	// allocate space
1629	GrowableArray<intptr_t> handler_bcis(nof_succs);
1630	GrowableArray<intptr_t> handler_pcos(nof_succs);
1631	// iterate through all successors
1632	for (int j = `0`; j < nof_succs; j++) {
1633	Block* s = block->_succs [j];
1634	bool found_p = false;
1635	for (uint k = `1`; k < s->num_preds(); k++) {
1636	Node* pk = s->pred(k);
1637	if (pk->is_CatchProj() && pk->in(`0`) == n) {
1638	const CatchProjNode* p = pk->as_CatchProj();
1639	found_p = true;
1640	// add the corresponding handler bci & pco information
1641	if (p->_con != CatchProjNode::fall_through_index) {
1642	// p leads to an exception handler (and is not fall through)
1643	assert(s == _cfg->get_block(s->_pre_order), "bad numbering");
1644	// no duplicates, please
1645	if (!handler_bcis.contains(p->handler_bci())) {
1646	uint block_num = s->non_connector()->_pre_order;
1647	handler_bcis.append(p->handler_bci());
1648	handler_pcos.append(blk_labels[block_num].loc_pos());
1649	}
1650	}
1651	}
1652	}
1653	assert(found_p, "no matching predecessor found");
1654	// Note: Due to empty block removal, one block may have
1655	// several CatchProj inputs, from the same Catch.
1656	}
1657
1658	// Set the offset of the return from the call
1659	assert(handler_bcis.find(-`1`) != -`1`, "must have default handler");
1660	_handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos);
1661	continue;
1662	}
1663
1664	// Handle implicit null exception table updates
1665	if (n->is_MachNullCheck()) {
1666	uint block_num = block->non_connector_successor(`0`)->_pre_order;
1667	_inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
1668	continue;
1669	}
1670	// Handle implicit exception table updates: trap instructions.
1671	if (n->is_Mach() && n->as_Mach()->is_TrapBasedCheckNode()) {
1672	uint block_num = block->non_connector_successor(`0`)->_pre_order;
1673	_inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
1674	continue;
1675	}
1676	} // End of for all blocks fill in exception table entries
1677	}
1678
1679	// Static Variables
1680	#ifndef PRODUCT
1681	uint Scheduling::_total_nop_size = `0`;
1682	uint Scheduling::_total_method_size = `0`;
1683	uint Scheduling::_total_branches = `0`;
1684	uint Scheduling::_total_unconditional_delays = `0`;
1685	uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+`1`];
1686	#endif
1687
1688	// Initializer for class Scheduling
1689
1690	Scheduling::Scheduling(Arena *arena, Compile &compile)
1691	: _arena(arena),
1692	_cfg(compile.cfg()),
1693	_regalloc(compile.regalloc()),
1694	_scheduled (arena),
1695	_available (arena),
1696	_reg_node (arena),
1697	_pinch_free_list (arena),
1698	_next_node(NULL),
1699	_bundle_instr_count(`0`),
1700	_bundle_cycle_number(`0`),
1701	_bundle_use (`0`, `0`, resource_count, &_bundle_use_elements[`0`])
1702	#ifndef PRODUCT
1703	, _branches(`0`)
1704	, _unconditional_delays(`0`)
1705	#endif
1706	{
1707	// Create a MachNopNode
1708	_nop = new MachNopNode ();
1709
1710	// Now that the nops are in the array, save the count
1711	// (but allow entries for the nops)
1712	_node_bundling_limit = compile.unique();
1713	uint node_max = _regalloc->node_regs_max_index();
1714
1715	compile.set_node_bundling_limit(_node_bundling_limit);
1716
1717	// This one is persistent within the Compile class
1718	_node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max);
1719
1720	// Allocate space for fixed-size arrays
1721	_node_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
1722	_uses = NEW_ARENA_ARRAY(arena, short, node_max);
1723	_current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
1724
1725	// Clear the arrays
1726	for (uint i = `0`; i < node_max; i++) {
1727	::new (&_node_bundling_base[i]) Bundle ();
1728	}
1729	memset(_node_latency, `0`, node_max * sizeof(unsigned short));
1730	memset(_uses, `0`, node_max * sizeof(short));
1731	memset(_current_latency, `0`, node_max * sizeof(unsigned short));
1732
1733	// Clear the bundling information
1734	memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements));
1735
1736	// Get the last node
1737	Block* block = _cfg->get_block(_cfg->number_of_blocks() - `1`);
1738
1739	_next_node = block->get_node(block->number_of_nodes() - `1`);
1740	}
1741
1742	#ifndef PRODUCT
1743	// Scheduling destructor
1744	Scheduling::~Scheduling() {
1745	_total_branches += _branches;
1746	_total_unconditional_delays += _unconditional_delays;
1747	}
1748	#endif
1749
1750	// Step ahead "i" cycles
1751	void Scheduling::step(uint i) {
1752
1753	Bundle *bundle = node_bundling(_next_node);
1754	bundle->set_starts_bundle();
1755
1756	// Update the bundle record, but leave the flags information alone
1757	if (_bundle_instr_count > `0`) {
1758	bundle->set_instr_count(_bundle_instr_count);
1759	bundle->set_resources_used(_bundle_use.resourcesUsed());
1760	}
1761
1762	// Update the state information
1763	_bundle_instr_count = `0`;
1764	_bundle_cycle_number += i;
1765	_bundle_use.step(i);
1766	}
1767
1768	void Scheduling::step_and_clear() {
1769	Bundle *bundle = node_bundling(_next_node);
1770	bundle->set_starts_bundle();
1771
1772	// Update the bundle record
1773	if (_bundle_instr_count > `0`) {
1774	bundle->set_instr_count(_bundle_instr_count);
1775	bundle->set_resources_used(_bundle_use.resourcesUsed());
1776
1777	_bundle_cycle_number += `1`;
1778	}
1779
1780	// Clear the bundling information
1781	_bundle_instr_count = `0`;
1782	_bundle_use.reset();
1783
1784	memcpy(_bundle_use_elements,
1785	Pipeline_Use::elaborated_elements,
1786	sizeof(Pipeline_Use::elaborated_elements));
1787	}
1788
1789	// Perform instruction scheduling and bundling over the sequence of
1790	// instructions in backwards order.
1791	void Compile::ScheduleAndBundle() {
1792
1793	// Don't optimize this if it isn't a method
1794	if (!_method)
1795	return;
1796
1797	// Don't optimize this if scheduling is disabled
1798	if (!do_scheduling())
1799	return;
1800
1801	// Scheduling code works only with pairs (16 bytes) maximum.
1802	if (max_vector_size() > `16`)
1803	return;
1804
1805	TracePhase tp("isched", &timers[_t_instrSched]);
1806
1807	// Create a data structure for all the scheduling information
1808	Scheduling scheduling(Thread::current()->resource_area(), *this);
1809
1810	// Walk backwards over each basic block, computing the needed alignment
1811	// Walk over all the basic blocks
1812	scheduling.DoScheduling();
1813	}
1814
1815	// Compute the latency of all the instructions. This is fairly simple,
1816	// because we already have a legal ordering. Walk over the instructions
1817	// from first to last, and compute the latency of the instruction based
1818	// on the latency of the preceding instruction(s).
1819	void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
1820	#ifndef PRODUCT
1821	if (_cfg->C->trace_opto_output())
1822	tty->print("# -> ComputeLocalLatenciesForward\n");
1823	#endif
1824
1825	// Walk over all the schedulable instructions
1826	for( uint j=_bb_start; j < _bb_end; j++ ) {
1827
1828	// This is a kludge, forcing all latency calculations to start at 1.
1829	// Used to allow latency 0 to force an instruction to the beginning
1830	// of the bb
1831	uint latency = `1`;
1832	Node *use = bb->get_node(j);
1833	uint nlen = use->len();
1834
1835	// Walk over all the inputs
1836	for ( uint k=`0`; k < nlen; k++ ) {
1837	Node *def = use->in(k);
1838	if (!def)
1839	continue;
1840
1841	uint l = _node_latency[def->_idx] + use->latency(k);
1842	if (latency < l)
1843	latency = l;
1844	}
1845
1846	_node_latency[use->_idx] = latency;
1847
1848	#ifndef PRODUCT
1849	if (_cfg->C->trace_opto_output()) {
1850	tty->print("# latency %4d: ", latency);
1851	use->dump();
1852	}
1853	#endif
1854	}
1855
1856	#ifndef PRODUCT
1857	if (_cfg->C->trace_opto_output())
1858	tty->print("# <- ComputeLocalLatenciesForward\n");
1859	#endif
1860
1861	} // end ComputeLocalLatenciesForward
1862
1863	// See if this node fits into the present instruction bundle
1864	bool Scheduling::NodeFitsInBundle(Node *n) {
1865	uint n_idx = n->_idx;
1866
1867	// If this is the unconditional delay instruction, then it fits
1868	if (n == _unconditional_delay_slot) {
1869	#ifndef PRODUCT
1870	if (_cfg->C->trace_opto_output())
1871	tty->print("# NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx);
1872	#endif
1873	return (true);
1874	}
1875
1876	// If the node cannot be scheduled this cycle, skip it
1877	if (_current_latency[n_idx] > _bundle_cycle_number) {
1878	#ifndef PRODUCT
1879	if (_cfg->C->trace_opto_output())
1880	tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
1881	n->_idx, _current_latency[n_idx], _bundle_cycle_number);
1882	#endif
1883	return (false);
1884	}
1885
1886	const Pipeline *node_pipeline = n->pipeline();
1887
1888	uint instruction_count = node_pipeline->instructionCount();
1889	if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == `0`)
1890	instruction_count = `0`;
1891	else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
1892	instruction_count++;
1893
1894	if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) {
1895	#ifndef PRODUCT
1896	if (_cfg->C->trace_opto_output())
1897	tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
1898	n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
1899	#endif
1900	return (false);
1901	}
1902
1903	// Don't allow non-machine nodes to be handled this way
1904	if (!n->is_Mach() && instruction_count == `0`)
1905	return (false);
1906
1907	// See if there is any overlap
1908	uint delay = _bundle_use.full_latency(`0`, node_pipeline->resourceUse());
1909
1910	if (delay > `0`) {
1911	#ifndef PRODUCT
1912	if (_cfg->C->trace_opto_output())
1913	tty->print("# NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx);
1914	#endif
1915	return false;
1916	}
1917
1918	#ifndef PRODUCT
1919	if (_cfg->C->trace_opto_output())
1920	tty->print("# NodeFitsInBundle [%4d]: TRUE\n", n_idx);
1921	#endif
1922
1923	return true;
1924	}
1925
1926	Node * Scheduling::ChooseNodeToBundle() {
1927	uint siz = _available.size();
1928
1929	if (siz == `0`) {
1930
1931	#ifndef PRODUCT
1932	if (_cfg->C->trace_opto_output())
1933	tty->print("# ChooseNodeToBundle: NULL\n");
1934	#endif
1935	return (NULL);
1936	}
1937
1938	// Fast path, if only 1 instruction in the bundle
1939	if (siz == `1`) {
1940	#ifndef PRODUCT
1941	if (_cfg->C->trace_opto_output()) {
1942	tty->print("# ChooseNodeToBundle (only 1): ");
1943	_available[`0`]->dump();
1944	}
1945	#endif
1946	return (_available [`0`]);
1947	}
1948
1949	// Don't bother, if the bundle is already full
1950	if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) {
1951	for ( uint i = `0`; i < siz; i++ ) {
1952	Node *n = _available [i];
1953
1954	// Skip projections, we'll handle them another way
1955	if (n->is_Proj())
1956	continue;
1957
1958	// This presupposed that instructions are inserted into the
1959	// available list in a legality order; i.e. instructions that
1960	// must be inserted first are at the head of the list
1961	if (NodeFitsInBundle(n)) {
1962	#ifndef PRODUCT
1963	if (_cfg->C->trace_opto_output()) {
1964	tty->print("# ChooseNodeToBundle: ");
1965	n->dump();
1966	}
1967	#endif
1968	return (n);
1969	}
1970	}
1971	}
1972
1973	// Nothing fits in this bundle, choose the highest priority
1974	#ifndef PRODUCT
1975	if (_cfg->C->trace_opto_output()) {
1976	tty->print("# ChooseNodeToBundle: ");
1977	_available[`0`]->dump();
1978	}
1979	#endif
1980
1981	return _available [`0`];
1982	}
1983
1984	void Scheduling::AddNodeToAvailableList(Node *n) {
1985	assert( !n->is_Proj(), "projections never directly made available" );
1986	#ifndef PRODUCT
1987	if (_cfg->C->trace_opto_output()) {
1988	tty->print("# AddNodeToAvailableList: ");
1989	n->dump();
1990	}
1991	#endif
1992
1993	int latency = _current_latency[n->_idx];
1994
1995	// Insert in latency order (insertion sort)
1996	uint i;
1997	for ( i=`0`; i < _available.size(); i++ )
1998	if (_current_latency[_available [i]->_idx] > latency)
1999	break;
2000
2001	// Special Check for compares following branches
2002	if( n->is_Mach() && _scheduled.size() > `0` ) {
2003	int op = n->as_Mach()->ideal_Opcode();
2004	Node *last = _scheduled [`0`];
2005	if( last->is_MachIf() && last->in(`1`) == n &&
2006	( op == Op_CmpI \|\|
2007	op == Op_CmpU \|\|
2008	op == Op_CmpUL \|\|
2009	op == Op_CmpP \|\|
2010	op == Op_CmpF \|\|
2011	op == Op_CmpD \|\|
2012	op == Op_CmpL ) ) {
2013
2014	// Recalculate position, moving to front of same latency
2015	for ( i=`0` ; i < _available.size(); i++ )
2016	if (_current_latency[_available [i]->_idx] >= latency)
2017	break;
2018	}
2019	}
2020
2021	// Insert the node in the available list
2022	_available.insert(i, n);
2023
2024	#ifndef PRODUCT
2025	if (_cfg->C->trace_opto_output())
2026	dump_available();
2027	#endif
2028	}
2029
2030	void Scheduling::DecrementUseCounts(Node n, const* Block *bb) {
2031	for ( uint i=`0`; i < n->len(); i++ ) {
2032	Node *def = n->in(i);
2033	if (!def) continue;
2034	if( def->is_Proj() ) // If this is a machine projection, then
2035	def = def->in(`0`); // propagate usage thru to the base instruction
2036
2037	if(_cfg->get_block_for_node(def) != bb) { // Ignore if not block-local
2038	continue;
2039	}
2040
2041	// Compute the latency
2042	uint l = _bundle_cycle_number + n->latency(i);
2043	if (_current_latency[def->_idx] < l)
2044	_current_latency[def->_idx] = l;
2045
2046	// If this does not have uses then schedule it
2047	if ((--_uses[def->_idx]) == `0`)
2048	AddNodeToAvailableList(def);
2049	}
2050	}
2051
2052	void Scheduling::AddNodeToBundle(Node n, const* Block *bb) {
2053	#ifndef PRODUCT
2054	if (_cfg->C->trace_opto_output()) {
2055	tty->print("# AddNodeToBundle: ");
2056	n->dump();
2057	}
2058	#endif
2059
2060	// Remove this from the available list
2061	uint i;
2062	for (i = `0`; i < _available.size(); i++)
2063	if (_available [i] == n)
2064	break;
2065	assert(i < _available.size(), "entry in _available list not found");
2066	_available.remove(i);
2067
2068	// See if this fits in the current bundle
2069	const Pipeline *node_pipeline = n->pipeline();
2070	const Pipeline_Use& node_usage = node_pipeline->resourceUse();
2071
2072	// Check for instructions to be placed in the delay slot. We
2073	// do this before we actually schedule the current instruction,
2074	// because the delay slot follows the current instruction.
2075	if (Pipeline::_branch_has_delay_slot &&
2076	node_pipeline->hasBranchDelay() &&
2077	!_unconditional_delay_slot) {
2078
2079	uint siz = _available.size();
2080
2081	// Conditional branches can support an instruction that
2082	// is unconditionally executed and not dependent by the
2083	// branch, OR a conditionally executed instruction if
2084	// the branch is taken. In practice, this means that
2085	// the first instruction at the branch target is
2086	// copied to the delay slot, and the branch goes to
2087	// the instruction after that at the branch target
2088	if ( n->is_MachBranch() ) {
2089
2090	assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" );
2091	assert( !n->is_Catch(), "should not look for delay slot for Catch" );
2092
2093	#ifndef PRODUCT
2094	_branches++;
2095	#endif
2096
2097	// At least 1 instruction is on the available list
2098	// that is not dependent on the branch
2099	for (uint i = `0`; i < siz; i++) {
2100	Node *d = _available [i];
2101	const Pipeline *avail_pipeline = d->pipeline();
2102
2103	// Don't allow safepoints in the branch shadow, that will
2104	// cause a number of difficulties
2105	if ( avail_pipeline->instructionCount() == `1` &&
2106	!avail_pipeline->hasMultipleBundles() &&
2107	!avail_pipeline->hasBranchDelay() &&
2108	Pipeline::instr_has_unit_size() &&
2109	d->size(_regalloc) == Pipeline::instr_unit_size() &&
2110	NodeFitsInBundle(d) &&
2111	!node_bundling(d)->used_in_delay()) {
2112
2113	if (d->is_Mach() && !d->is_MachSafePoint()) {
2114	// A node that fits in the delay slot was found, so we need to
2115	// set the appropriate bits in the bundle pipeline information so
2116	// that it correctly indicates resource usage. Later, when we
2117	// attempt to add this instruction to the bundle, we will skip
2118	// setting the resource usage.
2119	_unconditional_delay_slot = d;
2120	node_bundling(n)->set_use_unconditional_delay();
2121	node_bundling(d)->set_used_in_unconditional_delay();
2122	_bundle_use.add_usage(avail_pipeline->resourceUse());
2123	_current_latency[d->_idx] = _bundle_cycle_number;
2124	_next_node = d;
2125	++_bundle_instr_count;
2126	#ifndef PRODUCT
2127	_unconditional_delays++;
2128	#endif
2129	break;
2130	}
2131	}
2132	}
2133	}
2134
2135	// No delay slot, add a nop to the usage
2136	if (!_unconditional_delay_slot) {
2137	// See if adding an instruction in the delay slot will overflow
2138	// the bundle.
2139	if (!NodeFitsInBundle(_nop)) {
2140	#ifndef PRODUCT
2141	if (_cfg->C->trace_opto_output())
2142	tty->print("# * STEP(1 instruction for delay slot) *\n");
2143	#endif
2144	step(`1`);
2145	}
2146
2147	_bundle_use.add_usage(_nop->pipeline()->resourceUse());
2148	_next_node = _nop;
2149	++_bundle_instr_count;
2150	}
2151
2152	// See if the instruction in the delay slot requires a
2153	// step of the bundles
2154	if (!NodeFitsInBundle(n)) {
2155	#ifndef PRODUCT
2156	if (_cfg->C->trace_opto_output())
2157	tty->print("# * STEP(branch won't fit) *\n");
2158	#endif
2159	// Update the state information
2160	_bundle_instr_count = `0`;
2161	_bundle_cycle_number += `1`;
2162	_bundle_use.step(`1`);
2163	}
2164	}
2165
2166	// Get the number of instructions
2167	uint instruction_count = node_pipeline->instructionCount();
2168	if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == `0`)
2169	instruction_count = `0`;
2170
2171	// Compute the latency information
2172	uint delay = `0`;
2173
2174	if (instruction_count > `0` \|\| !node_pipeline->mayHaveNoCode()) {
2175	int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number;
2176	if (relative_latency < `0`)
2177	relative_latency = `0`;
2178
2179	delay = _bundle_use.full_latency(relative_latency, node_usage);
2180
2181	// Does not fit in this bundle, start a new one
2182	if (delay > `0`) {
2183	step(delay);
2184
2185	#ifndef PRODUCT
2186	if (_cfg->C->trace_opto_output())
2187	tty->print("# * STEP(%d) *\n", delay);
2188	#endif
2189	}
2190	}
2191
2192	// If this was placed in the delay slot, ignore it
2193	if (n != _unconditional_delay_slot) {
2194
2195	if (delay == `0`) {
2196	if (node_pipeline->hasMultipleBundles()) {
2197	#ifndef PRODUCT
2198	if (_cfg->C->trace_opto_output())
2199	tty->print("# * STEP(multiple instructions) *\n");
2200	#endif
2201	step(`1`);
2202	}
2203
2204	else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) {
2205	#ifndef PRODUCT
2206	if (_cfg->C->trace_opto_output())
2207	tty->print("# * STEP(%d >= %d instructions) *\n",
2208	instruction_count + _bundle_instr_count,
2209	Pipeline::_max_instrs_per_cycle);
2210	#endif
2211	step(`1`);
2212	}
2213	}
2214
2215	if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
2216	_bundle_instr_count++;
2217
2218	// Set the node's latency
2219	_current_latency[n->_idx] = _bundle_cycle_number;
2220
2221	// Now merge the functional unit information
2222	if (instruction_count > `0` \|\| !node_pipeline->mayHaveNoCode())
2223	_bundle_use.add_usage(node_usage);
2224
2225	// Increment the number of instructions in this bundle
2226	_bundle_instr_count += instruction_count;
2227
2228	// Remember this node for later
2229	if (n->is_Mach())
2230	_next_node = n;
2231	}
2232
2233	// It's possible to have a BoxLock in the graph and in the _bbs mapping but
2234	// not in the bb->_nodes array. This happens for debug-info-only BoxLocks.
2235	// 'Schedule' them (basically ignore in the schedule) but do not insert them
2236	// into the block. All other scheduled nodes get put in the schedule here.
2237	int op = n->Opcode();
2238	if( (op == Op_Node && n->req() == `0`) \|\| // anti-dependence node OR
2239	(op != Op_Node && // Not an unused antidepedence node and
2240	// not an unallocated boxlock
2241	(OptoReg::is_valid(_regalloc->get_reg_first(n)) \|\| op != Op_BoxLock)) ) {
2242
2243	// Push any trailing projections
2244	if( bb->get_node(bb->number_of_nodes()-`1`) != n ) {
2245	for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2246	Node *foi = n->fast_out(i);
2247	if( foi->is_Proj() )
2248	_scheduled.push(foi);
2249	}
2250	}
2251
2252	// Put the instruction in the schedule list
2253	_scheduled.push(n);
2254	}
2255
2256	#ifndef PRODUCT
2257	if (_cfg->C->trace_opto_output())
2258	dump_available();
2259	#endif
2260
2261	// Walk all the definitions, decrementing use counts, and
2262	// if a definition has a 0 use count, place it in the available list.
2263	DecrementUseCounts(n,bb);
2264	}
2265
2266	// This method sets the use count within a basic block. We will ignore all
2267	// uses outside the current basic block. As we are doing a backwards walk,
2268	// any node we reach that has a use count of 0 may be scheduled. This also
2269	// avoids the problem of cyclic references from phi nodes, as long as phi
2270	// nodes are at the front of the basic block. This method also initializes
2271	// the available list to the set of instructions that have no uses within this
2272	// basic block.
2273	void Scheduling::ComputeUseCount(const Block *bb) {
2274	#ifndef PRODUCT
2275	if (_cfg->C->trace_opto_output())
2276	tty->print("# -> ComputeUseCount\n");
2277	#endif
2278
2279	// Clear the list of available and scheduled instructions, just in case
2280	_available.clear();
2281	_scheduled.clear();
2282
2283	// No delay slot specified
2284	_unconditional_delay_slot = NULL;
2285
2286	#ifdef ASSERT
2287	for( uint i=`0`; i < bb->number_of_nodes(); i++ )
2288	assert( _uses[bb->get_node(i)->_idx] == `0`, "_use array not clean" );
2289	#endif
2290
2291	// Force the _uses count to never go to zero for unscheduable pieces
2292	// of the block
2293	for( uint k = `0`; k < _bb_start; k++ )
2294	_uses[bb->get_node(k)->_idx] = `1`;
2295	for( uint l = _bb_end; l < bb->number_of_nodes(); l++ )
2296	_uses[bb->get_node(l)->_idx] = `1`;
2297
2298	// Iterate backwards over the instructions in the block. Don't count the
2299	// branch projections at end or the block header instructions.
2300	for( uint j = _bb_end-`1`; j >= _bb_start; j-- ) {
2301	Node *n = bb->get_node(j);
2302	if( n->is_Proj() ) continue; // Projections handled another way
2303
2304	// Account for all uses
2305	for ( uint k = `0`; k < n->len(); k++ ) {
2306	Node *inp = n->in(k);
2307	if (!inp) continue;
2308	assert(inp != n, "no cycles allowed" );
2309	if (_cfg->get_block_for_node(inp) == bb) { // Block-local use?
2310	if (inp->is_Proj()) { // Skip through Proj's
2311	inp = inp->in(`0`);
2312	}
2313	++_uses[inp->_idx]; // Count 1 block-local use
2314	}
2315	}
2316
2317	// If this instruction has a 0 use count, then it is available
2318	if (!_uses[n->_idx]) {
2319	_current_latency[n->_idx] = _bundle_cycle_number;
2320	AddNodeToAvailableList(n);
2321	}
2322
2323	#ifndef PRODUCT
2324	if (_cfg->C->trace_opto_output()) {
2325	tty->print("# uses: %3d: ", _uses[n->_idx]);
2326	n->dump();
2327	}
2328	#endif
2329	}
2330
2331	#ifndef PRODUCT
2332	if (_cfg->C->trace_opto_output())
2333	tty->print("# <- ComputeUseCount\n");
2334	#endif
2335	}
2336
2337	// This routine performs scheduling on each basic block in reverse order,
2338	// using instruction latencies and taking into account function unit
2339	// availability.
2340	void Scheduling::DoScheduling() {
2341	#ifndef PRODUCT
2342	if (_cfg->C->trace_opto_output())
2343	tty->print("# -> DoScheduling\n");
2344	#endif
2345
2346	Block *succ_bb = NULL;
2347	Block *bb;
2348
2349	// Walk over all the basic blocks in reverse order
2350	for (int i = _cfg->number_of_blocks() - `1`; i >= `0`; succ_bb = bb, i--) {
2351	bb = _cfg->get_block(i);
2352
2353	#ifndef PRODUCT
2354	if (_cfg->C->trace_opto_output()) {
2355	tty->print("# Schedule BB#%03d (initial)\n", i);
2356	for (uint j = `0`; j < bb->number_of_nodes(); j++) {
2357	bb->get_node(j)->dump();
2358	}
2359	}
2360	#endif
2361
2362	// On the head node, skip processing
2363	if (bb == _cfg->get_root_block()) {
2364	continue;
2365	}
2366
2367	// Skip empty, connector blocks
2368	if (bb->is_connector())
2369	continue;
2370
2371	// If the following block is not the sole successor of
2372	// this one, then reset the pipeline information
2373	if (bb->_num_succs != `1` \|\| bb->non_connector_successor(`0`) != succ_bb) {
2374	#ifndef PRODUCT
2375	if (_cfg->C->trace_opto_output()) {
2376	tty->print("*** bundle start of next BB, node %d, for %d instructions\n",
2377	_next_node->_idx, _bundle_instr_count);
2378	}
2379	#endif
2380	step_and_clear();
2381	}
2382
2383	// Leave untouched the starting instruction, any Phis, a CreateEx node
2384	// or Top. bb->get_node(_bb_start) is the first schedulable instruction.
2385	_bb_end = bb->number_of_nodes()-`1`;
2386	for( _bb_start=`1`; _bb_start <= _bb_end; _bb_start++ ) {
2387	Node *n = bb->get_node(_bb_start);
2388	// Things not matched, like Phinodes and ProjNodes don't get scheduled.
2389	// Also, MachIdealNodes do not get scheduled
2390	if( !n->is_Mach() ) continue; // Skip non-machine nodes
2391	MachNode *mach = n->as_Mach();
2392	int iop = mach->ideal_Opcode();
2393	if( iop == Op_CreateEx ) continue; // CreateEx is pinned
2394	if( iop == Op_Con ) continue; // Do not schedule Top
2395	if( iop == Op_Node && // Do not schedule PhiNodes, ProjNodes
2396	mach->pipeline() == MachNode::pipeline_class() &&
2397	!n->is_SpillCopy() && !n->is_MachMerge() ) // Breakpoints, Prolog, etc
2398	continue;
2399	break; // Funny loop structure to be sure...
2400	}
2401	// Compute last "interesting" instruction in block - last instruction we
2402	// might schedule. _bb_end points just after last schedulable inst. We
2403	// normally schedule conditional branches (despite them being forced last
2404	// in the block), because they have delay slots we can fill. Calls all
2405	// have their delay slots filled in the template expansions, so we don't
2406	// bother scheduling them.
2407	Node *last = bb->get_node(_bb_end);
2408	// Ignore trailing NOPs.
2409	while (_bb_end > `0` && last->is_Mach() &&
2410	last->as_Mach()->ideal_Opcode() == Op_Con) {
2411	last = bb->get_node(--_bb_end);
2412	}
2413	assert(!last->is_Mach() \|\| last->as_Mach()->ideal_Opcode() != Op_Con, "");
2414	if( last->is_Catch() \|\|
2415	(last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
2416	// There might be a prior call. Skip it.
2417	while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj());
2418	} else if( last->is_MachNullCheck() ) {
2419	// Backup so the last null-checked memory instruction is
2420	// outside the schedulable range. Skip over the nullcheck,
2421	// projection, and the memory nodes.
2422	Node *mem = last->in(`1`);
2423	do {
2424	_bb_end--;
2425	} while (mem != bb->get_node(_bb_end));
2426	} else {
2427	// Set _bb_end to point after last schedulable inst.
2428	_bb_end++;
2429	}
2430
2431	assert( _bb_start <= _bb_end, "inverted block ends" );
2432
2433	// Compute the register antidependencies for the basic block
2434	ComputeRegisterAntidependencies(bb);
2435	if (_cfg->C->failing()) return; // too many D-U pinch points
2436
2437	// Compute intra-bb latencies for the nodes
2438	ComputeLocalLatenciesForward(bb);
2439
2440	// Compute the usage within the block, and set the list of all nodes
2441	// in the block that have no uses within the block.
2442	ComputeUseCount(bb);
2443
2444	// Schedule the remaining instructions in the block
2445	while ( _available.size() > `0` ) {
2446	Node *n = ChooseNodeToBundle();
2447	guarantee(n != NULL, "no nodes available");
2448	AddNodeToBundle(n,bb);
2449	}
2450
2451	assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" );
2452	#ifdef ASSERT
2453	for( uint l = _bb_start; l < _bb_end; l++ ) {
2454	Node *n = bb->get_node(l);
2455	uint m;
2456	for( m = `0`; m < _bb_end-_bb_start; m++ )
2457	if( _scheduled[m] == n )
2458	break;
2459	assert( m < _bb_end-_bb_start, "instruction missing in schedule" );
2460	}
2461	#endif
2462
2463	// Now copy the instructions (in reverse order) back to the block
2464	for ( uint k = _bb_start; k < _bb_end; k++ )
2465	bb->map_node(_scheduled [_bb_end-k-`1`], k);
2466
2467	#ifndef PRODUCT
2468	if (_cfg->C->trace_opto_output()) {
2469	tty->print("# Schedule BB#%03d (final)\n", i);
2470	uint current = `0`;
2471	for (uint j = `0`; j < bb->number_of_nodes(); j++) {
2472	Node *n = bb->get_node(j);
2473	if( valid_bundle_info(n) ) {
2474	Bundle *bundle = node_bundling(n);
2475	if (bundle->instr_count() > `0` \|\| bundle->flags() > `0`) {
2476	tty->print("*** Bundle: ");
2477	bundle->dump();
2478	}
2479	n->dump();
2480	}
2481	}
2482	}
2483	#endif
2484	#ifdef ASSERT
2485	verify_good_schedule(bb,"after block local scheduling");
2486	#endif
2487	}
2488
2489	#ifndef PRODUCT
2490	if (_cfg->C->trace_opto_output())
2491	tty->print("# <- DoScheduling\n");
2492	#endif
2493
2494	// Record final node-bundling array location
2495	_regalloc->C->set_node_bundling_base(_node_bundling_base);
2496
2497	} // end DoScheduling
2498
2499	// Verify that no live-range used in the block is killed in the block by a
2500	// wrong DEF. This doesn't verify live-ranges that span blocks.
2501
2502	// Check for edge existence. Used to avoid adding redundant precedence edges.
2503	static bool edge_from_to( Node from, Node to ) {
2504	for( uint i=`0`; i<from->len(); i++ )
2505	if( from->in(i) == to )
2506	return true;
2507	return false;
2508	}
2509
2510	#ifdef ASSERT
2511	void Scheduling::verify_do_def( Node n, OptoReg::Name def, const* char *msg ) {
2512	// Check for bad kills
2513	if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
2514	Node *prior_use = _reg_node[def];
2515	if( prior_use && !edge_from_to(prior_use,n) ) {
2516	tty->print("%s = ",OptoReg::as_VMReg(def)->name());
2517	n->dump();
2518	tty->print_cr("...");
2519	prior_use->dump();
2520	assert(edge_from_to(prior_use,n), "%s", msg);
2521	}
2522	_reg_node.map(def,NULL); // Kill live USEs
2523	}
2524	}
2525
2526	void Scheduling::verify_good_schedule( Block b, const* char *msg ) {
2527
2528	// Zap to something reasonable for the verify code
2529	_reg_node.clear();
2530
2531	// Walk over the block backwards. Check to make sure each DEF doesn't
2532	// kill a live value (other than the one it's supposed to). Add each
2533	// USE to the live set.
2534	for( uint i = b->number_of_nodes()-`1`; i >= _bb_start; i-- ) {
2535	Node *n = b->get_node(i);
2536	int n_op = n->Opcode();
2537	if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
2538	// Fat-proj kills a slew of registers
2539	RegMask rm = n->out_RegMask();// Make local copy
2540	while( rm.is_NotEmpty() ) {
2541	OptoReg::Name kill = rm.find_first_elem();
2542	rm.Remove(kill);
2543	verify_do_def( n, kill, msg );
2544	}
2545	} else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes
2546	// Get DEF'd registers the normal way
2547	verify_do_def( n, _regalloc->get_reg_first(n), msg );
2548	verify_do_def( n, _regalloc->get_reg_second(n), msg );
2549	}
2550
2551	// Now make all USEs live
2552	for( uint i=`1`; i<n->req(); i++ ) {
2553	Node *def = n->in(i);
2554	assert(def != `0`, "input edge required");
2555	OptoReg::Name reg_lo = _regalloc->get_reg_first(def);
2556	OptoReg::Name reg_hi = _regalloc->get_reg_second(def);
2557	if( OptoReg::is_valid(reg_lo) ) {
2558	assert(!_reg_node[reg_lo] \|\| edge_from_to(_reg_node[reg_lo],def), "%s", msg);
2559	_reg_node.map(reg_lo,n);
2560	}
2561	if( OptoReg::is_valid(reg_hi) ) {
2562	assert(!_reg_node[reg_hi] \|\| edge_from_to(_reg_node[reg_hi],def), "%s", msg);
2563	_reg_node.map(reg_hi,n);
2564	}
2565	}
2566
2567	}
2568
2569	// Zap to something reasonable for the Antidependence code
2570	_reg_node.clear();
2571	}
2572	#endif
2573
2574	// Conditionally add precedence edges. Avoid putting edges on Projs.
2575	static void add_prec_edge_from_to( Node from, Node to ) {
2576	if( from->is_Proj() ) { // Put precedence edge on Proj's input
2577	assert( from->req() == `1` && (from->len() == `1` \|\| from->in(`1`)==`0`), "no precedence edges on projections" );
2578	from = from->in(`0`);
2579	}
2580	if( from != to && // No cycles (for things like LD L0,[L0+4] )
2581	!edge_from_to( from, to ) ) // Avoid duplicate edge
2582	from->add_prec(to);
2583	}
2584
2585	void Scheduling::anti_do_def( Block b, Node def, OptoReg::Name def_reg, int is_def ) {
2586	if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
2587	return;
2588
2589	Node pinch = _reg_node [def_reg]; // Get pinch point*
2590	if ((pinch == NULL) \|\| _cfg->get_block_for_node(pinch) != b \|\| // No pinch-point yet?
2591	is_def ) { // Check for a true def (not a kill)
2592	_reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
2593	return;
2594	}
2595
2596	Node kill = def; // Rename 'def' to more descriptive 'kill'*
2597	debug_only( def = (Node*)((intptr_t)`0xdeadbeef`); )
2598
2599	// After some number of kills there _may_ be a later def
2600	Node *later_def = NULL;
2601
2602	// Finding a kill requires a real pinch-point.
2603	// Check for not already having a pinch-point.
2604	// Pinch points are Op_Node's.
2605	if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
2606	later_def = pinch; // Must be def/kill as optimistic pinch-point
2607	if ( _pinch_free_list.size() > `0`) {
2608	pinch = _pinch_free_list.pop();
2609	} else {
2610	pinch = new Node (`1`); // Pinch point to-be
2611	}
2612	if (pinch->_idx >= _regalloc->node_regs_max_index()) {
2613	_cfg->C->record_method_not_compilable("too many D-U pinch points");
2614	return;
2615	}
2616	_cfg->map_node_to_block(pinch, b); // Pretend it's valid in this block (lazy init)
2617	_reg_node.map(def_reg,pinch); // Record pinch-point
2618	//_regalloc->set_bad(pinch->_idx); // Already initialized this way.
2619	if( later_def->outcnt() == `0` \|\| later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
2620	pinch->init_req(`0`, _cfg->C->top()); // set not NULL for the next call
2621	add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
2622	later_def = NULL; // and no later def
2623	}
2624	pinch->set_req(`0`,later_def); // Hook later def so we can find it
2625	} else { // Else have valid pinch point
2626	if( pinch->in(`0`) ) // If there is a later-def
2627	later_def = pinch->in(`0`); // Get it
2628	}
2629
2630	// Add output-dependence edge from later def to kill
2631	if( later_def ) // If there is some original def
2632	add_prec_edge_from_to(later_def,kill); // Add edge from def to kill
2633
2634	// See if current kill is also a use, and so is forced to be the pinch-point.
2635	if( pinch->Opcode() == Op_Node ) {
2636	Node *uses = kill->is_Proj() ? kill->in(`0`) : kill;
2637	for( uint i=`1`; i<uses->req(); i++ ) {
2638	if( _regalloc->get_reg_first(uses->in(i)) == def_reg \|\|
2639	_regalloc->get_reg_second(uses->in(i)) == def_reg ) {
2640	// Yes, found a use/kill pinch-point
2641	pinch->set_req(`0`,NULL); //
2642	pinch->replace_by(kill); // Move anti-dep edges up
2643	pinch = kill;
2644	_reg_node.map(def_reg,pinch);
2645	return;
2646	}
2647	}
2648	}
2649
2650	// Add edge from kill to pinch-point
2651	add_prec_edge_from_to(kill,pinch);
2652	}
2653
2654	void Scheduling::anti_do_use( Block b, Node use, OptoReg::Name use_reg ) {
2655	if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
2656	return;
2657	Node pinch = _reg_node [use_reg]; // Get pinch point*
2658	// Check for no later def_reg/kill in block
2659	if ((pinch != NULL) && _cfg->get_block_for_node(pinch) == b &&
2660	// Use has to be block-local as well
2661	_cfg->get_block_for_node(use) == b) {
2662	if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
2663	pinch->req() == `1` ) { // pinch not yet in block?
2664	pinch->del_req(`0`); // yank pointer to later-def, also set flag
2665	// Insert the pinch-point in the block just after the last use
2666	b->insert_node(pinch, b->find_node(use) + `1`);
2667	_bb_end++; // Increase size scheduled region in block
2668	}
2669
2670	add_prec_edge_from_to(pinch,use);
2671	}
2672	}
2673
2674	// We insert antidependences between the reads and following write of
2675	// allocated registers to prevent illegal code motion. Hopefully, the
2676	// number of added references should be fairly small, especially as we
2677	// are only adding references within the current basic block.
2678	void Scheduling::ComputeRegisterAntidependencies(Block *b) {
2679
2680	#ifdef ASSERT
2681	verify_good_schedule(b,"before block local scheduling");
2682	#endif
2683
2684	// A valid schedule, for each register independently, is an endless cycle
2685	// of: a def, then some uses (connected to the def by true dependencies),
2686	// then some kills (defs with no uses), finally the cycle repeats with a new
2687	// def. The uses are allowed to float relative to each other, as are the
2688	// kills. No use is allowed to slide past a kill (or def). This requires
2689	// antidependencies between all uses of a single def and all kills that
2690	// follow, up to the next def. More edges are redundant, because later defs
2691	// & kills are already serialized with true or antidependencies. To keep
2692	// the edge count down, we add a 'pinch point' node if there's more than
2693	// one use or more than one kill/def.
2694
2695	// We add dependencies in one bottom-up pass.
2696
2697	// For each instruction we handle it's DEFs/KILLs, then it's USEs.
2698
2699	// For each DEF/KILL, we check to see if there's a prior DEF/KILL for this
2700	// register. If not, we record the DEF/KILL in _reg_node, the
2701	// register-to-def mapping. If there is a prior DEF/KILL, we insert a
2702	// "pinch point", a new Node that's in the graph but not in the block.
2703	// We put edges from the prior and current DEF/KILLs to the pinch point.
2704	// We put the pinch point in _reg_node. If there's already a pinch point
2705	// we merely add an edge from the current DEF/KILL to the pinch point.
2706
2707	// After doing the DEF/KILLs, we handle USEs. For each used register, we
2708	// put an edge from the pinch point to the USE.
2709
2710	// To be expedient, the _reg_node array is pre-allocated for the whole
2711	// compilation. _reg_node is lazily initialized; it either contains a NULL,
2712	// or a valid def/kill/pinch-point, or a leftover node from some prior
2713	// block. Leftover node from some prior block is treated like a NULL (no
2714	// prior def, so no anti-dependence needed). Valid def is distinguished by
2715	// it being in the current block.
2716	bool fat_proj_seen = false;
2717	uint last_safept = _bb_end-`1`;
2718	Node* end_node = (_bb_end-`1` >= _bb_start) ? b->get_node(last_safept) : NULL;
2719	Node* last_safept_node = end_node;
2720	for( uint i = _bb_end-`1`; i >= _bb_start; i-- ) {
2721	Node *n = b->get_node(i);
2722	int is_def = n->outcnt(); // def if some uses prior to adding precedence edges
2723	if( n->is_MachProj() && n->ideal_reg() == MachProjNode::fat_proj ) {
2724	// Fat-proj kills a slew of registers
2725	// This can add edges to 'n' and obscure whether or not it was a def,
2726	// hence the is_def flag.
2727	fat_proj_seen = true;
2728	RegMask rm = n->out_RegMask();// Make local copy
2729	while( rm.is_NotEmpty() ) {
2730	OptoReg::Name kill = rm.find_first_elem();
2731	rm.Remove(kill);
2732	anti_do_def( b, n, kill, is_def );
2733	}
2734	} else {
2735	// Get DEF'd registers the normal way
2736	anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
2737	anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
2738	}
2739
2740	// Kill projections on a branch should appear to occur on the
2741	// branch, not afterwards, so grab the masks from the projections
2742	// and process them.
2743	if (n->is_MachBranch() \|\| (n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_Jump)) {
2744	for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2745	Node* use = n->fast_out(i);
2746	if (use->is_Proj()) {
2747	RegMask rm = use->out_RegMask();// Make local copy
2748	while( rm.is_NotEmpty() ) {
2749	OptoReg::Name kill = rm.find_first_elem();
2750	rm.Remove(kill);
2751	anti_do_def( b, n, kill, false );
2752	}
2753	}
2754	}
2755	}
2756
2757	// Check each register used by this instruction for a following DEF/KILL
2758	// that must occur afterward and requires an anti-dependence edge.
2759	for( uint j=`0`; j<n->req(); j++ ) {
2760	Node *def = n->in(j);
2761	if( def ) {
2762	assert( !def->is_MachProj() \|\| def->ideal_reg() != MachProjNode::fat_proj, "" );
2763	anti_do_use( b, n, _regalloc->get_reg_first(def) );
2764	anti_do_use( b, n, _regalloc->get_reg_second(def) );
2765	}
2766	}
2767	// Do not allow defs of new derived values to float above GC
2768	// points unless the base is definitely available at the GC point.
2769
2770	Node *m = b->get_node(i);
2771
2772	// Add precedence edge from following safepoint to use of derived pointer
2773	if( last_safept_node != end_node &&
2774	m != last_safept_node) {
2775	for (uint k = `1`; k < m->req(); k++) {
2776	const Type *t = m->in(k)->bottom_type();
2777	if( t->isa_oop_ptr() &&
2778	t->is_ptr()->offset() != `0` ) {
2779	last_safept_node->add_prec( m );
2780	break;
2781	}
2782	}
2783	}
2784
2785	if( n->jvms() ) { // Precedence edge from derived to safept
2786	// Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
2787	if( b->get_node(last_safept) != last_safept_node ) {
2788	last_safept = b->find_node(last_safept_node);
2789	}
2790	for( uint j=last_safept; j > i; j-- ) {
2791	Node *mach = b->get_node(j);
2792	if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP )
2793	mach->add_prec( n );
2794	}
2795	last_safept = i;
2796	last_safept_node = m;
2797	}
2798	}
2799
2800	if (fat_proj_seen) {
2801	// Garbage collect pinch nodes that were not consumed.
2802	// They are usually created by a fat kill MachProj for a call.
2803	garbage_collect_pinch_nodes();
2804	}
2805	}
2806
2807	// Garbage collect pinch nodes for reuse by other blocks.
2808	//
2809	// The block scheduler's insertion of anti-dependence
2810	// edges creates many pinch nodes when the block contains
2811	// 2 or more Calls. A pinch node is used to prevent a
2812	// combinatorial explosion of edges. If a set of kills for a
2813	// register is anti-dependent on a set of uses (or defs), rather
2814	// than adding an edge in the graph between each pair of kill
2815	// and use (or def), a pinch is inserted between them:
2816	//
2817	// use1 use2 use3
2818	// \ \| /
2819	// \ \| /
2820	// pinch
2821	// / \| \
2822	// / \| \
2823	// kill1 kill2 kill3
2824	//
2825	// One pinch node is created per register killed when
2826	// the second call is encountered during a backwards pass
2827	// over the block. Most of these pinch nodes are never
2828	// wired into the graph because the register is never
2829	// used or def'ed in the block.
2830	//
2831	void Scheduling::garbage_collect_pinch_nodes() {
2832	#ifndef PRODUCT
2833	if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
2834	#endif
2835	int trace_cnt = `0`;
2836	for (uint k = `0`; k < _reg_node.Size(); k++) {
2837	Node* pinch = _reg_node [k];
2838	if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
2839	// no predecence input edges
2840	(pinch->req() == pinch->len() \|\| pinch->in(pinch->req()) == NULL) ) {
2841	cleanup_pinch(pinch);
2842	_pinch_free_list.push(pinch);
2843	_reg_node.map(k, NULL);
2844	#ifndef PRODUCT
2845	if (_cfg->C->trace_opto_output()) {
2846	trace_cnt++;
2847	if (trace_cnt > `40`) {
2848	tty->print("\n");
2849	trace_cnt = `0`;
2850	}
2851	tty->print(" %d", pinch->_idx);
2852	}
2853	#endif
2854	}
2855	}
2856	#ifndef PRODUCT
2857	if (_cfg->C->trace_opto_output()) tty->print("\n");
2858	#endif
2859	}
2860
2861	// Clean up a pinch node for reuse.
2862	void Scheduling::cleanup_pinch( Node *pinch ) {
2863	assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == `1`, "just checking");
2864
2865	for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) {
2866	Node* use = pinch->last_out(i);
2867	uint uses_found = `0`;
2868	for (uint j = use->req(); j < use->len(); j++) {
2869	if (use->in(j) == pinch) {
2870	use->rm_prec(j);
2871	uses_found++;
2872	}
2873	}
2874	assert(uses_found > `0`, "must be a precedence edge");
2875	i -= uses_found; // we deleted 1 or more copies of this edge
2876	}
2877	// May have a later_def entry
2878	pinch->set_req(`0`, NULL);
2879	}
2880
2881	#ifndef PRODUCT
2882
2883	void Scheduling::dump_available() const {
2884	tty->print("#Availist ");
2885	for (uint i = `0`; i < _available.size(); i++)
2886	tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]);
2887	tty->cr();
2888	}
2889
2890	// Print Scheduling Statistics
2891	void Scheduling::print_statistics() {
2892	// Print the size added by nops for bundling
2893	tty->print("Nops added %d bytes to total of %d bytes",
2894	_total_nop_size, _total_method_size);
2895	if (_total_method_size > `0`)
2896	tty->print(", for %.2f%%",
2897	((double)_total_nop_size) / ((double) _total_method_size) * `100.0`);
2898	tty->print("\n");
2899
2900	// Print the number of branch shadows filled
2901	if (Pipeline::_branch_has_delay_slot) {
2902	tty->print("Of %d branches, %d had unconditional delay slots filled",
2903	_total_branches, _total_unconditional_delays);
2904	if (_total_branches > `0`)
2905	tty->print(", for %.2f%%",
2906	((double)_total_unconditional_delays) / ((double)_total_branches) * `100.0`);
2907	tty->print("\n");
2908	}
2909
2910	uint total_instructions = `0`, total_bundles = `0`;
2911
2912	for (uint i = `1`; i <= Pipeline::_max_instrs_per_cycle; i++) {
2913	uint bundle_count = _total_instructions_per_bundle[i];
2914	total_instructions += bundle_count * i;
2915	total_bundles += bundle_count;
2916	}
2917
2918	if (total_bundles > `0`)
2919	tty->print("Average ILP (excluding nops) is %.2f\n",
2920	((double)total_instructions) / ((double)total_bundles));
2921	}
2922	#endif
2923

Browse the source code of OpenJDK/src/hotspot/share/opto/output.cpp