1 | /* |
2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "classfile/systemDictionary.hpp" |
27 | #include "compiler/compileLog.hpp" |
28 | #include "gc/shared/barrierSet.hpp" |
29 | #include "gc/shared/c2/barrierSetC2.hpp" |
30 | #include "memory/allocation.inline.hpp" |
31 | #include "memory/resourceArea.hpp" |
32 | #include "oops/objArrayKlass.hpp" |
33 | #include "opto/addnode.hpp" |
34 | #include "opto/arraycopynode.hpp" |
35 | #include "opto/cfgnode.hpp" |
36 | #include "opto/compile.hpp" |
37 | #include "opto/connode.hpp" |
38 | #include "opto/convertnode.hpp" |
39 | #include "opto/loopnode.hpp" |
40 | #include "opto/machnode.hpp" |
41 | #include "opto/matcher.hpp" |
42 | #include "opto/memnode.hpp" |
43 | #include "opto/mulnode.hpp" |
44 | #include "opto/narrowptrnode.hpp" |
45 | #include "opto/phaseX.hpp" |
46 | #include "opto/regmask.hpp" |
47 | #include "opto/rootnode.hpp" |
48 | #include "utilities/align.hpp" |
49 | #include "utilities/copy.hpp" |
50 | #include "utilities/macros.hpp" |
51 | #include "utilities/vmError.hpp" |
52 | #if INCLUDE_ZGC |
53 | #include "gc/z/c2/zBarrierSetC2.hpp" |
54 | #endif |
55 | |
56 | // Portions of code courtesy of Clifford Click |
57 | |
58 | // Optimization - Graph Style |
59 | |
60 | static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st); |
61 | |
62 | //============================================================================= |
63 | uint MemNode::size_of() const { return sizeof(*this); } |
64 | |
65 | const TypePtr *MemNode::adr_type() const { |
66 | Node* adr = in(Address); |
67 | if (adr == NULL) return NULL; // node is dead |
68 | const TypePtr* cross_check = NULL; |
69 | DEBUG_ONLY(cross_check = _adr_type); |
70 | return calculate_adr_type(adr->bottom_type(), cross_check); |
71 | } |
72 | |
73 | bool MemNode::check_if_adr_maybe_raw(Node* adr) { |
74 | if (adr != NULL) { |
75 | if (adr->bottom_type()->base() == Type::RawPtr || adr->bottom_type()->base() == Type::AnyPtr) { |
76 | return true; |
77 | } |
78 | } |
79 | return false; |
80 | } |
81 | |
82 | #ifndef PRODUCT |
83 | void MemNode::dump_spec(outputStream *st) const { |
84 | if (in(Address) == NULL) return; // node is dead |
85 | #ifndef ASSERT |
86 | // fake the missing field |
87 | const TypePtr* _adr_type = NULL; |
88 | if (in(Address) != NULL) |
89 | _adr_type = in(Address)->bottom_type()->isa_ptr(); |
90 | #endif |
91 | dump_adr_type(this, _adr_type, st); |
92 | |
93 | Compile* C = Compile::current(); |
94 | if (C->alias_type(_adr_type)->is_volatile()) { |
95 | st->print(" Volatile!" ); |
96 | } |
97 | if (_unaligned_access) { |
98 | st->print(" unaligned" ); |
99 | } |
100 | if (_mismatched_access) { |
101 | st->print(" mismatched" ); |
102 | } |
103 | if (_unsafe_access) { |
104 | st->print(" unsafe" ); |
105 | } |
106 | } |
107 | |
108 | void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) { |
109 | st->print(" @" ); |
110 | if (adr_type == NULL) { |
111 | st->print("NULL" ); |
112 | } else { |
113 | adr_type->dump_on(st); |
114 | Compile* C = Compile::current(); |
115 | Compile::AliasType* atp = NULL; |
116 | if (C->have_alias_type(adr_type)) atp = C->alias_type(adr_type); |
117 | if (atp == NULL) |
118 | st->print(", idx=?\?;" ); |
119 | else if (atp->index() == Compile::AliasIdxBot) |
120 | st->print(", idx=Bot;" ); |
121 | else if (atp->index() == Compile::AliasIdxTop) |
122 | st->print(", idx=Top;" ); |
123 | else if (atp->index() == Compile::AliasIdxRaw) |
124 | st->print(", idx=Raw;" ); |
125 | else { |
126 | ciField* field = atp->field(); |
127 | if (field) { |
128 | st->print(", name=" ); |
129 | field->print_name_on(st); |
130 | } |
131 | st->print(", idx=%d;" , atp->index()); |
132 | } |
133 | } |
134 | } |
135 | |
136 | extern void print_alias_types(); |
137 | |
138 | #endif |
139 | |
140 | Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase) { |
141 | assert((t_oop != NULL), "sanity" ); |
142 | bool is_instance = t_oop->is_known_instance_field(); |
143 | bool is_boxed_value_load = t_oop->is_ptr_to_boxed_value() && |
144 | (load != NULL) && load->is_Load() && |
145 | (phase->is_IterGVN() != NULL); |
146 | if (!(is_instance || is_boxed_value_load)) |
147 | return mchain; // don't try to optimize non-instance types |
148 | uint instance_id = t_oop->instance_id(); |
149 | Node *start_mem = phase->C->start()->proj_out_or_null(TypeFunc::Memory); |
150 | Node *prev = NULL; |
151 | Node *result = mchain; |
152 | while (prev != result) { |
153 | prev = result; |
154 | if (result == start_mem) |
155 | break; // hit one of our sentinels |
156 | // skip over a call which does not affect this memory slice |
157 | if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) { |
158 | Node *proj_in = result->in(0); |
159 | if (proj_in->is_Allocate() && proj_in->_idx == instance_id) { |
160 | break; // hit one of our sentinels |
161 | } else if (proj_in->is_Call()) { |
162 | // ArrayCopyNodes processed here as well |
163 | CallNode *call = proj_in->as_Call(); |
164 | if (!call->may_modify(t_oop, phase)) { // returns false for instances |
165 | result = call->in(TypeFunc::Memory); |
166 | } |
167 | } else if (proj_in->is_Initialize()) { |
168 | AllocateNode* alloc = proj_in->as_Initialize()->allocation(); |
169 | // Stop if this is the initialization for the object instance which |
170 | // contains this memory slice, otherwise skip over it. |
171 | if ((alloc == NULL) || (alloc->_idx == instance_id)) { |
172 | break; |
173 | } |
174 | if (is_instance) { |
175 | result = proj_in->in(TypeFunc::Memory); |
176 | } else if (is_boxed_value_load) { |
177 | Node* klass = alloc->in(AllocateNode::KlassNode); |
178 | const TypeKlassPtr* tklass = phase->type(klass)->is_klassptr(); |
179 | if (tklass->klass_is_exact() && !tklass->klass()->equals(t_oop->klass())) { |
180 | result = proj_in->in(TypeFunc::Memory); // not related allocation |
181 | } |
182 | } |
183 | } else if (proj_in->is_MemBar()) { |
184 | ArrayCopyNode* ac = NULL; |
185 | if (ArrayCopyNode::may_modify(t_oop, proj_in->as_MemBar(), phase, ac)) { |
186 | break; |
187 | } |
188 | result = proj_in->in(TypeFunc::Memory); |
189 | } else { |
190 | assert(false, "unexpected projection" ); |
191 | } |
192 | } else if (result->is_ClearArray()) { |
193 | if (!is_instance || !ClearArrayNode::step_through(&result, instance_id, phase)) { |
194 | // Can not bypass initialization of the instance |
195 | // we are looking for. |
196 | break; |
197 | } |
198 | // Otherwise skip it (the call updated 'result' value). |
199 | } else if (result->is_MergeMem()) { |
200 | result = step_through_mergemem(phase, result->as_MergeMem(), t_oop, NULL, tty); |
201 | } |
202 | } |
203 | return result; |
204 | } |
205 | |
206 | Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, Node *load, PhaseGVN *phase) { |
207 | const TypeOopPtr* t_oop = t_adr->isa_oopptr(); |
208 | if (t_oop == NULL) |
209 | return mchain; // don't try to optimize non-oop types |
210 | Node* result = optimize_simple_memory_chain(mchain, t_oop, load, phase); |
211 | bool is_instance = t_oop->is_known_instance_field(); |
212 | PhaseIterGVN *igvn = phase->is_IterGVN(); |
213 | if (is_instance && igvn != NULL && result->is_Phi()) { |
214 | PhiNode *mphi = result->as_Phi(); |
215 | assert(mphi->bottom_type() == Type::MEMORY, "memory phi required" ); |
216 | const TypePtr *t = mphi->adr_type(); |
217 | if (t == TypePtr::BOTTOM || t == TypeRawPtr::BOTTOM || |
218 | (t->isa_oopptr() && !t->is_oopptr()->is_known_instance() && |
219 | t->is_oopptr()->cast_to_exactness(true) |
220 | ->is_oopptr()->cast_to_ptr_type(t_oop->ptr()) |
221 | ->is_oopptr()->cast_to_instance_id(t_oop->instance_id()) == t_oop)) { |
222 | // clone the Phi with our address type |
223 | result = mphi->split_out_instance(t_adr, igvn); |
224 | } else { |
225 | assert(phase->C->get_alias_index(t) == phase->C->get_alias_index(t_adr), "correct memory chain" ); |
226 | } |
227 | } |
228 | return result; |
229 | } |
230 | |
231 | static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st) { |
232 | uint alias_idx = phase->C->get_alias_index(tp); |
233 | Node *mem = mmem; |
234 | #ifdef ASSERT |
235 | { |
236 | // Check that current type is consistent with the alias index used during graph construction |
237 | assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx" ); |
238 | bool consistent = adr_check == NULL || adr_check->empty() || |
239 | phase->C->must_alias(adr_check, alias_idx ); |
240 | // Sometimes dead array references collapse to a[-1], a[-2], or a[-3] |
241 | if( !consistent && adr_check != NULL && !adr_check->empty() && |
242 | tp->isa_aryptr() && tp->offset() == Type::OffsetBot && |
243 | adr_check->isa_aryptr() && adr_check->offset() != Type::OffsetBot && |
244 | ( adr_check->offset() == arrayOopDesc::length_offset_in_bytes() || |
245 | adr_check->offset() == oopDesc::klass_offset_in_bytes() || |
246 | adr_check->offset() == oopDesc::mark_offset_in_bytes() ) ) { |
247 | // don't assert if it is dead code. |
248 | consistent = true; |
249 | } |
250 | if( !consistent ) { |
251 | st->print("alias_idx==%d, adr_check==" , alias_idx); |
252 | if( adr_check == NULL ) { |
253 | st->print("NULL" ); |
254 | } else { |
255 | adr_check->dump(); |
256 | } |
257 | st->cr(); |
258 | print_alias_types(); |
259 | assert(consistent, "adr_check must match alias idx" ); |
260 | } |
261 | } |
262 | #endif |
263 | // TypeOopPtr::NOTNULL+any is an OOP with unknown offset - generally |
264 | // means an array I have not precisely typed yet. Do not do any |
265 | // alias stuff with it any time soon. |
266 | const TypeOopPtr *toop = tp->isa_oopptr(); |
267 | if( tp->base() != Type::AnyPtr && |
268 | !(toop && |
269 | toop->klass() != NULL && |
270 | toop->klass()->is_java_lang_Object() && |
271 | toop->offset() == Type::OffsetBot) ) { |
272 | // compress paths and change unreachable cycles to TOP |
273 | // If not, we can update the input infinitely along a MergeMem cycle |
274 | // Equivalent code in PhiNode::Ideal |
275 | Node* m = phase->transform(mmem); |
276 | // If transformed to a MergeMem, get the desired slice |
277 | // Otherwise the returned node represents memory for every slice |
278 | mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m; |
279 | // Update input if it is progress over what we have now |
280 | } |
281 | return mem; |
282 | } |
283 | |
284 | //--------------------------Ideal_common--------------------------------------- |
285 | // Look for degenerate control and memory inputs. Bypass MergeMem inputs. |
286 | // Unhook non-raw memories from complete (macro-expanded) initializations. |
287 | Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) { |
288 | // If our control input is a dead region, kill all below the region |
289 | Node *ctl = in(MemNode::Control); |
290 | if (ctl && remove_dead_region(phase, can_reshape)) |
291 | return this; |
292 | ctl = in(MemNode::Control); |
293 | // Don't bother trying to transform a dead node |
294 | if (ctl && ctl->is_top()) return NodeSentinel; |
295 | |
296 | PhaseIterGVN *igvn = phase->is_IterGVN(); |
297 | // Wait if control on the worklist. |
298 | if (ctl && can_reshape && igvn != NULL) { |
299 | Node* bol = NULL; |
300 | Node* cmp = NULL; |
301 | if (ctl->in(0)->is_If()) { |
302 | assert(ctl->is_IfTrue() || ctl->is_IfFalse(), "sanity" ); |
303 | bol = ctl->in(0)->in(1); |
304 | if (bol->is_Bool()) |
305 | cmp = ctl->in(0)->in(1)->in(1); |
306 | } |
307 | if (igvn->_worklist.member(ctl) || |
308 | (bol != NULL && igvn->_worklist.member(bol)) || |
309 | (cmp != NULL && igvn->_worklist.member(cmp)) ) { |
310 | // This control path may be dead. |
311 | // Delay this memory node transformation until the control is processed. |
312 | phase->is_IterGVN()->_worklist.push(this); |
313 | return NodeSentinel; // caller will return NULL |
314 | } |
315 | } |
316 | // Ignore if memory is dead, or self-loop |
317 | Node *mem = in(MemNode::Memory); |
318 | if (phase->type( mem ) == Type::TOP) return NodeSentinel; // caller will return NULL |
319 | assert(mem != this, "dead loop in MemNode::Ideal" ); |
320 | |
321 | if (can_reshape && igvn != NULL && igvn->_worklist.member(mem)) { |
322 | // This memory slice may be dead. |
323 | // Delay this mem node transformation until the memory is processed. |
324 | phase->is_IterGVN()->_worklist.push(this); |
325 | return NodeSentinel; // caller will return NULL |
326 | } |
327 | |
328 | Node *address = in(MemNode::Address); |
329 | const Type *t_adr = phase->type(address); |
330 | if (t_adr == Type::TOP) return NodeSentinel; // caller will return NULL |
331 | |
332 | if (can_reshape && is_unsafe_access() && (t_adr == TypePtr::NULL_PTR)) { |
333 | // Unsafe off-heap access with zero address. Remove access and other control users |
334 | // to not confuse optimizations and add a HaltNode to fail if this is ever executed. |
335 | assert(ctl != NULL, "unsafe accesses should be control dependent" ); |
336 | for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { |
337 | Node* u = ctl->fast_out(i); |
338 | if (u != ctl) { |
339 | igvn->rehash_node_delayed(u); |
340 | int nb = u->replace_edge(ctl, phase->C->top()); |
341 | --i, imax -= nb; |
342 | } |
343 | } |
344 | Node* frame = igvn->transform(new ParmNode(phase->C->start(), TypeFunc::FramePtr)); |
345 | Node* halt = igvn->transform(new HaltNode(ctl, frame)); |
346 | phase->C->root()->add_req(halt); |
347 | return this; |
348 | } |
349 | |
350 | if (can_reshape && igvn != NULL && |
351 | (igvn->_worklist.member(address) || |
352 | (igvn->_worklist.size() > 0 && t_adr != adr_type())) ) { |
353 | // The address's base and type may change when the address is processed. |
354 | // Delay this mem node transformation until the address is processed. |
355 | phase->is_IterGVN()->_worklist.push(this); |
356 | return NodeSentinel; // caller will return NULL |
357 | } |
358 | |
359 | // Do NOT remove or optimize the next lines: ensure a new alias index |
360 | // is allocated for an oop pointer type before Escape Analysis. |
361 | // Note: C++ will not remove it since the call has side effect. |
362 | if (t_adr->isa_oopptr()) { |
363 | int alias_idx = phase->C->get_alias_index(t_adr->is_ptr()); |
364 | } |
365 | |
366 | Node* base = NULL; |
367 | if (address->is_AddP()) { |
368 | base = address->in(AddPNode::Base); |
369 | } |
370 | if (base != NULL && phase->type(base)->higher_equal(TypePtr::NULL_PTR) && |
371 | !t_adr->isa_rawptr()) { |
372 | // Note: raw address has TOP base and top->higher_equal(TypePtr::NULL_PTR) is true. |
373 | // Skip this node optimization if its address has TOP base. |
374 | return NodeSentinel; // caller will return NULL |
375 | } |
376 | |
377 | // Avoid independent memory operations |
378 | Node* old_mem = mem; |
379 | |
380 | // The code which unhooks non-raw memories from complete (macro-expanded) |
381 | // initializations was removed. After macro-expansion all stores catched |
382 | // by Initialize node became raw stores and there is no information |
383 | // which memory slices they modify. So it is unsafe to move any memory |
384 | // operation above these stores. Also in most cases hooked non-raw memories |
385 | // were already unhooked by using information from detect_ptr_independence() |
386 | // and find_previous_store(). |
387 | |
388 | if (mem->is_MergeMem()) { |
389 | MergeMemNode* mmem = mem->as_MergeMem(); |
390 | const TypePtr *tp = t_adr->is_ptr(); |
391 | |
392 | mem = step_through_mergemem(phase, mmem, tp, adr_type(), tty); |
393 | } |
394 | |
395 | if (mem != old_mem) { |
396 | set_req(MemNode::Memory, mem); |
397 | if (can_reshape && old_mem->outcnt() == 0 && igvn != NULL) { |
398 | igvn->_worklist.push(old_mem); |
399 | } |
400 | if (phase->type(mem) == Type::TOP) return NodeSentinel; |
401 | return this; |
402 | } |
403 | |
404 | // let the subclass continue analyzing... |
405 | return NULL; |
406 | } |
407 | |
408 | // Helper function for proving some simple control dominations. |
409 | // Attempt to prove that all control inputs of 'dom' dominate 'sub'. |
410 | // Already assumes that 'dom' is available at 'sub', and that 'sub' |
411 | // is not a constant (dominated by the method's StartNode). |
412 | // Used by MemNode::find_previous_store to prove that the |
413 | // control input of a memory operation predates (dominates) |
414 | // an allocation it wants to look past. |
415 | bool MemNode::all_controls_dominate(Node* dom, Node* sub) { |
416 | if (dom == NULL || dom->is_top() || sub == NULL || sub->is_top()) |
417 | return false; // Conservative answer for dead code |
418 | |
419 | // Check 'dom'. Skip Proj and CatchProj nodes. |
420 | dom = dom->find_exact_control(dom); |
421 | if (dom == NULL || dom->is_top()) |
422 | return false; // Conservative answer for dead code |
423 | |
424 | if (dom == sub) { |
425 | // For the case when, for example, 'sub' is Initialize and the original |
426 | // 'dom' is Proj node of the 'sub'. |
427 | return false; |
428 | } |
429 | |
430 | if (dom->is_Con() || dom->is_Start() || dom->is_Root() || dom == sub) |
431 | return true; |
432 | |
433 | // 'dom' dominates 'sub' if its control edge and control edges |
434 | // of all its inputs dominate or equal to sub's control edge. |
435 | |
436 | // Currently 'sub' is either Allocate, Initialize or Start nodes. |
437 | // Or Region for the check in LoadNode::Ideal(); |
438 | // 'sub' should have sub->in(0) != NULL. |
439 | assert(sub->is_Allocate() || sub->is_Initialize() || sub->is_Start() || |
440 | sub->is_Region() || sub->is_Call(), "expecting only these nodes" ); |
441 | |
442 | // Get control edge of 'sub'. |
443 | Node* orig_sub = sub; |
444 | sub = sub->find_exact_control(sub->in(0)); |
445 | if (sub == NULL || sub->is_top()) |
446 | return false; // Conservative answer for dead code |
447 | |
448 | assert(sub->is_CFG(), "expecting control" ); |
449 | |
450 | if (sub == dom) |
451 | return true; |
452 | |
453 | if (sub->is_Start() || sub->is_Root()) |
454 | return false; |
455 | |
456 | { |
457 | // Check all control edges of 'dom'. |
458 | |
459 | ResourceMark rm; |
460 | Arena* arena = Thread::current()->resource_area(); |
461 | Node_List nlist(arena); |
462 | Unique_Node_List dom_list(arena); |
463 | |
464 | dom_list.push(dom); |
465 | bool only_dominating_controls = false; |
466 | |
467 | for (uint next = 0; next < dom_list.size(); next++) { |
468 | Node* n = dom_list.at(next); |
469 | if (n == orig_sub) |
470 | return false; // One of dom's inputs dominated by sub. |
471 | if (!n->is_CFG() && n->pinned()) { |
472 | // Check only own control edge for pinned non-control nodes. |
473 | n = n->find_exact_control(n->in(0)); |
474 | if (n == NULL || n->is_top()) |
475 | return false; // Conservative answer for dead code |
476 | assert(n->is_CFG(), "expecting control" ); |
477 | dom_list.push(n); |
478 | } else if (n->is_Con() || n->is_Start() || n->is_Root()) { |
479 | only_dominating_controls = true; |
480 | } else if (n->is_CFG()) { |
481 | if (n->dominates(sub, nlist)) |
482 | only_dominating_controls = true; |
483 | else |
484 | return false; |
485 | } else { |
486 | // First, own control edge. |
487 | Node* m = n->find_exact_control(n->in(0)); |
488 | if (m != NULL) { |
489 | if (m->is_top()) |
490 | return false; // Conservative answer for dead code |
491 | dom_list.push(m); |
492 | } |
493 | // Now, the rest of edges. |
494 | uint cnt = n->req(); |
495 | for (uint i = 1; i < cnt; i++) { |
496 | m = n->find_exact_control(n->in(i)); |
497 | if (m == NULL || m->is_top()) |
498 | continue; |
499 | dom_list.push(m); |
500 | } |
501 | } |
502 | } |
503 | return only_dominating_controls; |
504 | } |
505 | } |
506 | |
507 | //---------------------detect_ptr_independence--------------------------------- |
508 | // Used by MemNode::find_previous_store to prove that two base |
509 | // pointers are never equal. |
510 | // The pointers are accompanied by their associated allocations, |
511 | // if any, which have been previously discovered by the caller. |
512 | bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1, |
513 | Node* p2, AllocateNode* a2, |
514 | PhaseTransform* phase) { |
515 | // Attempt to prove that these two pointers cannot be aliased. |
516 | // They may both manifestly be allocations, and they should differ. |
517 | // Or, if they are not both allocations, they can be distinct constants. |
518 | // Otherwise, one is an allocation and the other a pre-existing value. |
519 | if (a1 == NULL && a2 == NULL) { // neither an allocation |
520 | return (p1 != p2) && p1->is_Con() && p2->is_Con(); |
521 | } else if (a1 != NULL && a2 != NULL) { // both allocations |
522 | return (a1 != a2); |
523 | } else if (a1 != NULL) { // one allocation a1 |
524 | // (Note: p2->is_Con implies p2->in(0)->is_Root, which dominates.) |
525 | return all_controls_dominate(p2, a1); |
526 | } else { //(a2 != NULL) // one allocation a2 |
527 | return all_controls_dominate(p1, a2); |
528 | } |
529 | return false; |
530 | } |
531 | |
532 | |
533 | // Find an arraycopy that must have set (can_see_stored_value=true) or |
534 | // could have set (can_see_stored_value=false) the value for this load |
535 | Node* LoadNode::find_previous_arraycopy(PhaseTransform* phase, Node* ld_alloc, Node*& mem, bool can_see_stored_value) const { |
536 | if (mem->is_Proj() && mem->in(0) != NULL && (mem->in(0)->Opcode() == Op_MemBarStoreStore || |
537 | mem->in(0)->Opcode() == Op_MemBarCPUOrder)) { |
538 | Node* mb = mem->in(0); |
539 | if (mb->in(0) != NULL && mb->in(0)->is_Proj() && |
540 | mb->in(0)->in(0) != NULL && mb->in(0)->in(0)->is_ArrayCopy()) { |
541 | ArrayCopyNode* ac = mb->in(0)->in(0)->as_ArrayCopy(); |
542 | if (ac->is_clonebasic()) { |
543 | intptr_t offset; |
544 | AllocateNode* alloc = AllocateNode::Ideal_allocation(ac->in(ArrayCopyNode::Dest), phase, offset); |
545 | if (alloc != NULL && alloc == ld_alloc) { |
546 | return ac; |
547 | } |
548 | } |
549 | } |
550 | } else if (mem->is_Proj() && mem->in(0) != NULL && mem->in(0)->is_ArrayCopy()) { |
551 | ArrayCopyNode* ac = mem->in(0)->as_ArrayCopy(); |
552 | |
553 | if (ac->is_arraycopy_validated() || |
554 | ac->is_copyof_validated() || |
555 | ac->is_copyofrange_validated()) { |
556 | Node* ld_addp = in(MemNode::Address); |
557 | if (ld_addp->is_AddP()) { |
558 | Node* ld_base = ld_addp->in(AddPNode::Address); |
559 | Node* ld_offs = ld_addp->in(AddPNode::Offset); |
560 | |
561 | Node* dest = ac->in(ArrayCopyNode::Dest); |
562 | |
563 | if (dest == ld_base) { |
564 | const TypeX *ld_offs_t = phase->type(ld_offs)->isa_intptr_t(); |
565 | if (ac->modifies(ld_offs_t->_lo, ld_offs_t->_hi, phase, can_see_stored_value)) { |
566 | return ac; |
567 | } |
568 | if (!can_see_stored_value) { |
569 | mem = ac->in(TypeFunc::Memory); |
570 | } |
571 | } |
572 | } |
573 | } |
574 | } |
575 | return NULL; |
576 | } |
577 | |
578 | // The logic for reordering loads and stores uses four steps: |
579 | // (a) Walk carefully past stores and initializations which we |
580 | // can prove are independent of this load. |
581 | // (b) Observe that the next memory state makes an exact match |
582 | // with self (load or store), and locate the relevant store. |
583 | // (c) Ensure that, if we were to wire self directly to the store, |
584 | // the optimizer would fold it up somehow. |
585 | // (d) Do the rewiring, and return, depending on some other part of |
586 | // the optimizer to fold up the load. |
587 | // This routine handles steps (a) and (b). Steps (c) and (d) are |
588 | // specific to loads and stores, so they are handled by the callers. |
589 | // (Currently, only LoadNode::Ideal has steps (c), (d). More later.) |
590 | // |
591 | Node* MemNode::find_previous_store(PhaseTransform* phase) { |
592 | Node* ctrl = in(MemNode::Control); |
593 | Node* adr = in(MemNode::Address); |
594 | intptr_t offset = 0; |
595 | Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); |
596 | AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); |
597 | |
598 | if (offset == Type::OffsetBot) |
599 | return NULL; // cannot unalias unless there are precise offsets |
600 | |
601 | const bool adr_maybe_raw = check_if_adr_maybe_raw(adr); |
602 | const TypeOopPtr *addr_t = adr->bottom_type()->isa_oopptr(); |
603 | |
604 | intptr_t size_in_bytes = memory_size(); |
605 | |
606 | Node* mem = in(MemNode::Memory); // start searching here... |
607 | |
608 | int cnt = 50; // Cycle limiter |
609 | for (;;) { // While we can dance past unrelated stores... |
610 | if (--cnt < 0) break; // Caught in cycle or a complicated dance? |
611 | |
612 | Node* prev = mem; |
613 | if (mem->is_Store()) { |
614 | Node* st_adr = mem->in(MemNode::Address); |
615 | intptr_t st_offset = 0; |
616 | Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); |
617 | if (st_base == NULL) |
618 | break; // inscrutable pointer |
619 | |
620 | // For raw accesses it's not enough to prove that constant offsets don't intersect. |
621 | // We need the bases to be the equal in order for the offset check to make sense. |
622 | if ((adr_maybe_raw || check_if_adr_maybe_raw(st_adr)) && st_base != base) { |
623 | break; |
624 | } |
625 | |
626 | if (st_offset != offset && st_offset != Type::OffsetBot) { |
627 | const int MAX_STORE = BytesPerLong; |
628 | if (st_offset >= offset + size_in_bytes || |
629 | st_offset <= offset - MAX_STORE || |
630 | st_offset <= offset - mem->as_Store()->memory_size()) { |
631 | // Success: The offsets are provably independent. |
632 | // (You may ask, why not just test st_offset != offset and be done? |
633 | // The answer is that stores of different sizes can co-exist |
634 | // in the same sequence of RawMem effects. We sometimes initialize |
635 | // a whole 'tile' of array elements with a single jint or jlong.) |
636 | mem = mem->in(MemNode::Memory); |
637 | continue; // (a) advance through independent store memory |
638 | } |
639 | } |
640 | if (st_base != base && |
641 | detect_ptr_independence(base, alloc, |
642 | st_base, |
643 | AllocateNode::Ideal_allocation(st_base, phase), |
644 | phase)) { |
645 | // Success: The bases are provably independent. |
646 | mem = mem->in(MemNode::Memory); |
647 | continue; // (a) advance through independent store memory |
648 | } |
649 | |
650 | // (b) At this point, if the bases or offsets do not agree, we lose, |
651 | // since we have not managed to prove 'this' and 'mem' independent. |
652 | if (st_base == base && st_offset == offset) { |
653 | return mem; // let caller handle steps (c), (d) |
654 | } |
655 | |
656 | } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { |
657 | InitializeNode* st_init = mem->in(0)->as_Initialize(); |
658 | AllocateNode* st_alloc = st_init->allocation(); |
659 | if (st_alloc == NULL) |
660 | break; // something degenerated |
661 | bool known_identical = false; |
662 | bool known_independent = false; |
663 | if (alloc == st_alloc) |
664 | known_identical = true; |
665 | else if (alloc != NULL) |
666 | known_independent = true; |
667 | else if (all_controls_dominate(this, st_alloc)) |
668 | known_independent = true; |
669 | |
670 | if (known_independent) { |
671 | // The bases are provably independent: Either they are |
672 | // manifestly distinct allocations, or else the control |
673 | // of this load dominates the store's allocation. |
674 | int alias_idx = phase->C->get_alias_index(adr_type()); |
675 | if (alias_idx == Compile::AliasIdxRaw) { |
676 | mem = st_alloc->in(TypeFunc::Memory); |
677 | } else { |
678 | mem = st_init->memory(alias_idx); |
679 | } |
680 | continue; // (a) advance through independent store memory |
681 | } |
682 | |
683 | // (b) at this point, if we are not looking at a store initializing |
684 | // the same allocation we are loading from, we lose. |
685 | if (known_identical) { |
686 | // From caller, can_see_stored_value will consult find_captured_store. |
687 | return mem; // let caller handle steps (c), (d) |
688 | } |
689 | |
690 | } else if (find_previous_arraycopy(phase, alloc, mem, false) != NULL) { |
691 | if (prev != mem) { |
692 | // Found an arraycopy but it doesn't affect that load |
693 | continue; |
694 | } |
695 | // Found an arraycopy that may affect that load |
696 | return mem; |
697 | } else if (addr_t != NULL && addr_t->is_known_instance_field()) { |
698 | // Can't use optimize_simple_memory_chain() since it needs PhaseGVN. |
699 | if (mem->is_Proj() && mem->in(0)->is_Call()) { |
700 | // ArrayCopyNodes processed here as well. |
701 | CallNode *call = mem->in(0)->as_Call(); |
702 | if (!call->may_modify(addr_t, phase)) { |
703 | mem = call->in(TypeFunc::Memory); |
704 | continue; // (a) advance through independent call memory |
705 | } |
706 | } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) { |
707 | ArrayCopyNode* ac = NULL; |
708 | if (ArrayCopyNode::may_modify(addr_t, mem->in(0)->as_MemBar(), phase, ac)) { |
709 | break; |
710 | } |
711 | mem = mem->in(0)->in(TypeFunc::Memory); |
712 | continue; // (a) advance through independent MemBar memory |
713 | } else if (mem->is_ClearArray()) { |
714 | if (ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) { |
715 | // (the call updated 'mem' value) |
716 | continue; // (a) advance through independent allocation memory |
717 | } else { |
718 | // Can not bypass initialization of the instance |
719 | // we are looking for. |
720 | return mem; |
721 | } |
722 | } else if (mem->is_MergeMem()) { |
723 | int alias_idx = phase->C->get_alias_index(adr_type()); |
724 | mem = mem->as_MergeMem()->memory_at(alias_idx); |
725 | continue; // (a) advance through independent MergeMem memory |
726 | } |
727 | } |
728 | |
729 | // Unless there is an explicit 'continue', we must bail out here, |
730 | // because 'mem' is an inscrutable memory state (e.g., a call). |
731 | break; |
732 | } |
733 | |
734 | return NULL; // bail out |
735 | } |
736 | |
737 | //----------------------calculate_adr_type------------------------------------- |
738 | // Helper function. Notices when the given type of address hits top or bottom. |
739 | // Also, asserts a cross-check of the type against the expected address type. |
740 | const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) { |
741 | if (t == Type::TOP) return NULL; // does not touch memory any more? |
742 | #ifdef PRODUCT |
743 | cross_check = NULL; |
744 | #else |
745 | if (!VerifyAliases || VMError::is_error_reported() || Node::in_dump()) cross_check = NULL; |
746 | #endif |
747 | const TypePtr* tp = t->isa_ptr(); |
748 | if (tp == NULL) { |
749 | assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide" ); |
750 | return TypePtr::BOTTOM; // touches lots of memory |
751 | } else { |
752 | #ifdef ASSERT |
753 | // %%%% [phh] We don't check the alias index if cross_check is |
754 | // TypeRawPtr::BOTTOM. Needs to be investigated. |
755 | if (cross_check != NULL && |
756 | cross_check != TypePtr::BOTTOM && |
757 | cross_check != TypeRawPtr::BOTTOM) { |
758 | // Recheck the alias index, to see if it has changed (due to a bug). |
759 | Compile* C = Compile::current(); |
760 | assert(C->get_alias_index(cross_check) == C->get_alias_index(tp), |
761 | "must stay in the original alias category" ); |
762 | // The type of the address must be contained in the adr_type, |
763 | // disregarding "null"-ness. |
764 | // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.) |
765 | const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr(); |
766 | assert(cross_check->meet(tp_notnull) == cross_check->remove_speculative(), |
767 | "real address must not escape from expected memory type" ); |
768 | } |
769 | #endif |
770 | return tp; |
771 | } |
772 | } |
773 | |
774 | //============================================================================= |
775 | // Should LoadNode::Ideal() attempt to remove control edges? |
776 | bool LoadNode::can_remove_control() const { |
777 | return true; |
778 | } |
779 | uint LoadNode::size_of() const { return sizeof(*this); } |
780 | bool LoadNode::cmp( const Node &n ) const |
781 | { return !Type::cmp( _type, ((LoadNode&)n)._type ); } |
782 | const Type *LoadNode::bottom_type() const { return _type; } |
783 | uint LoadNode::ideal_reg() const { |
784 | return _type->ideal_reg(); |
785 | } |
786 | |
787 | #ifndef PRODUCT |
788 | void LoadNode::dump_spec(outputStream *st) const { |
789 | MemNode::dump_spec(st); |
790 | if( !Verbose && !WizardMode ) { |
791 | // standard dump does this in Verbose and WizardMode |
792 | st->print(" #" ); _type->dump_on(st); |
793 | } |
794 | if (!depends_only_on_test()) { |
795 | st->print(" (does not depend only on test)" ); |
796 | } |
797 | } |
798 | #endif |
799 | |
800 | #ifdef ASSERT |
801 | //----------------------------is_immutable_value------------------------------- |
802 | // Helper function to allow a raw load without control edge for some cases |
803 | bool LoadNode::is_immutable_value(Node* adr) { |
804 | return (adr->is_AddP() && adr->in(AddPNode::Base)->is_top() && |
805 | adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && |
806 | (adr->in(AddPNode::Offset)->find_intptr_t_con(-1) == |
807 | in_bytes(JavaThread::osthread_offset()))); |
808 | } |
809 | #endif |
810 | |
811 | //----------------------------LoadNode::make----------------------------------- |
812 | // Polymorphic factory method: |
813 | Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, |
814 | ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { |
815 | Compile* C = gvn.C; |
816 | |
817 | // sanity check the alias category against the created node type |
818 | assert(!(adr_type->isa_oopptr() && |
819 | adr_type->offset() == oopDesc::klass_offset_in_bytes()), |
820 | "use LoadKlassNode instead" ); |
821 | assert(!(adr_type->isa_aryptr() && |
822 | adr_type->offset() == arrayOopDesc::length_offset_in_bytes()), |
823 | "use LoadRangeNode instead" ); |
824 | // Check control edge of raw loads |
825 | assert( ctl != NULL || C->get_alias_index(adr_type) != Compile::AliasIdxRaw || |
826 | // oop will be recorded in oop map if load crosses safepoint |
827 | rt->isa_oopptr() || is_immutable_value(adr), |
828 | "raw memory operations should have control edge" ); |
829 | LoadNode* load = NULL; |
830 | switch (bt) { |
831 | case T_BOOLEAN: load = new LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; |
832 | case T_BYTE: load = new LoadBNode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; |
833 | case T_INT: load = new LoadINode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; |
834 | case T_CHAR: load = new LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; |
835 | case T_SHORT: load = new LoadSNode (ctl, mem, adr, adr_type, rt->is_int(), mo, control_dependency); break; |
836 | case T_LONG: load = new LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency); break; |
837 | case T_FLOAT: load = new LoadFNode (ctl, mem, adr, adr_type, rt, mo, control_dependency); break; |
838 | case T_DOUBLE: load = new LoadDNode (ctl, mem, adr, adr_type, rt, mo, control_dependency); break; |
839 | case T_ADDRESS: load = new LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(), mo, control_dependency); break; |
840 | case T_OBJECT: |
841 | #ifdef _LP64 |
842 | if (adr->bottom_type()->is_ptr_to_narrowoop()) { |
843 | load = new LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency); |
844 | } else |
845 | #endif |
846 | { |
847 | assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop" ); |
848 | load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr(), mo, control_dependency); |
849 | } |
850 | break; |
851 | default: |
852 | ShouldNotReachHere(); |
853 | break; |
854 | } |
855 | assert(load != NULL, "LoadNode should have been created" ); |
856 | if (unaligned) { |
857 | load->set_unaligned_access(); |
858 | } |
859 | if (mismatched) { |
860 | load->set_mismatched_access(); |
861 | } |
862 | if (unsafe) { |
863 | load->set_unsafe_access(); |
864 | } |
865 | if (load->Opcode() == Op_LoadN) { |
866 | Node* ld = gvn.transform(load); |
867 | return new DecodeNNode(ld, ld->bottom_type()->make_ptr()); |
868 | } |
869 | |
870 | return load; |
871 | } |
872 | |
873 | LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, |
874 | ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { |
875 | bool require_atomic = true; |
876 | LoadLNode* load = new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic); |
877 | if (unaligned) { |
878 | load->set_unaligned_access(); |
879 | } |
880 | if (mismatched) { |
881 | load->set_mismatched_access(); |
882 | } |
883 | if (unsafe) { |
884 | load->set_unsafe_access(); |
885 | } |
886 | return load; |
887 | } |
888 | |
889 | LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, |
890 | ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { |
891 | bool require_atomic = true; |
892 | LoadDNode* load = new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic); |
893 | if (unaligned) { |
894 | load->set_unaligned_access(); |
895 | } |
896 | if (mismatched) { |
897 | load->set_mismatched_access(); |
898 | } |
899 | if (unsafe) { |
900 | load->set_unsafe_access(); |
901 | } |
902 | return load; |
903 | } |
904 | |
905 | |
906 | |
907 | //------------------------------hash------------------------------------------- |
908 | uint LoadNode::hash() const { |
909 | // unroll addition of interesting fields |
910 | return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address); |
911 | } |
912 | |
913 | static bool skip_through_membars(Compile::AliasType* atp, const TypeInstPtr* tp, bool eliminate_boxing) { |
914 | if ((atp != NULL) && (atp->index() >= Compile::AliasIdxRaw)) { |
915 | bool non_volatile = (atp->field() != NULL) && !atp->field()->is_volatile(); |
916 | bool is_stable_ary = FoldStableValues && |
917 | (tp != NULL) && (tp->isa_aryptr() != NULL) && |
918 | tp->isa_aryptr()->is_stable(); |
919 | |
920 | return (eliminate_boxing && non_volatile) || is_stable_ary; |
921 | } |
922 | |
923 | return false; |
924 | } |
925 | |
926 | // Is the value loaded previously stored by an arraycopy? If so return |
927 | // a load node that reads from the source array so we may be able to |
928 | // optimize out the ArrayCopy node later. |
929 | Node* LoadNode::can_see_arraycopy_value(Node* st, PhaseGVN* phase) const { |
930 | Node* ld_adr = in(MemNode::Address); |
931 | intptr_t ld_off = 0; |
932 | AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off); |
933 | Node* ac = find_previous_arraycopy(phase, ld_alloc, st, true); |
934 | if (ac != NULL) { |
935 | assert(ac->is_ArrayCopy(), "what kind of node can this be?" ); |
936 | |
937 | Node* mem = ac->in(TypeFunc::Memory); |
938 | Node* ctl = ac->in(0); |
939 | Node* src = ac->in(ArrayCopyNode::Src); |
940 | |
941 | if (!ac->as_ArrayCopy()->is_clonebasic() && !phase->type(src)->isa_aryptr()) { |
942 | return NULL; |
943 | } |
944 | |
945 | LoadNode* ld = clone()->as_Load(); |
946 | Node* addp = in(MemNode::Address)->clone(); |
947 | if (ac->as_ArrayCopy()->is_clonebasic()) { |
948 | assert(ld_alloc != NULL, "need an alloc" ); |
949 | assert(addp->is_AddP(), "address must be addp" ); |
950 | assert(ac->in(ArrayCopyNode::Dest)->is_AddP(), "dest must be an address" ); |
951 | BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); |
952 | assert(bs->step_over_gc_barrier(addp->in(AddPNode::Base)) == bs->step_over_gc_barrier(ac->in(ArrayCopyNode::Dest)->in(AddPNode::Base)), "strange pattern" ); |
953 | assert(bs->step_over_gc_barrier(addp->in(AddPNode::Address)) == bs->step_over_gc_barrier(ac->in(ArrayCopyNode::Dest)->in(AddPNode::Address)), "strange pattern" ); |
954 | addp->set_req(AddPNode::Base, src->in(AddPNode::Base)); |
955 | addp->set_req(AddPNode::Address, src->in(AddPNode::Address)); |
956 | } else { |
957 | assert(ac->as_ArrayCopy()->is_arraycopy_validated() || |
958 | ac->as_ArrayCopy()->is_copyof_validated() || |
959 | ac->as_ArrayCopy()->is_copyofrange_validated(), "only supported cases" ); |
960 | assert(addp->in(AddPNode::Base) == addp->in(AddPNode::Address), "should be" ); |
961 | addp->set_req(AddPNode::Base, src); |
962 | addp->set_req(AddPNode::Address, src); |
963 | |
964 | const TypeAryPtr* ary_t = phase->type(in(MemNode::Address))->isa_aryptr(); |
965 | BasicType ary_elem = ary_t->klass()->as_array_klass()->element_type()->basic_type(); |
966 | uint = arrayOopDesc::base_offset_in_bytes(ary_elem); |
967 | uint shift = exact_log2(type2aelembytes(ary_elem)); |
968 | |
969 | Node* diff = phase->transform(new SubINode(ac->in(ArrayCopyNode::SrcPos), ac->in(ArrayCopyNode::DestPos))); |
970 | #ifdef _LP64 |
971 | diff = phase->transform(new ConvI2LNode(diff)); |
972 | #endif |
973 | diff = phase->transform(new LShiftXNode(diff, phase->intcon(shift))); |
974 | |
975 | Node* offset = phase->transform(new AddXNode(addp->in(AddPNode::Offset), diff)); |
976 | addp->set_req(AddPNode::Offset, offset); |
977 | } |
978 | addp = phase->transform(addp); |
979 | #ifdef ASSERT |
980 | const TypePtr* adr_type = phase->type(addp)->is_ptr(); |
981 | ld->_adr_type = adr_type; |
982 | #endif |
983 | ld->set_req(MemNode::Address, addp); |
984 | ld->set_req(0, ctl); |
985 | ld->set_req(MemNode::Memory, mem); |
986 | // load depends on the tests that validate the arraycopy |
987 | ld->_control_dependency = Pinned; |
988 | return ld; |
989 | } |
990 | return NULL; |
991 | } |
992 | |
993 | |
994 | //---------------------------can_see_stored_value------------------------------ |
995 | // This routine exists to make sure this set of tests is done the same |
996 | // everywhere. We need to make a coordinated change: first LoadNode::Ideal |
997 | // will change the graph shape in a way which makes memory alive twice at the |
998 | // same time (uses the Oracle model of aliasing), then some |
999 | // LoadXNode::Identity will fold things back to the equivalence-class model |
1000 | // of aliasing. |
1001 | Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const { |
1002 | Node* ld_adr = in(MemNode::Address); |
1003 | intptr_t ld_off = 0; |
1004 | Node* ld_base = AddPNode::Ideal_base_and_offset(ld_adr, phase, ld_off); |
1005 | Node* ld_alloc = AllocateNode::Ideal_allocation(ld_base, phase); |
1006 | const TypeInstPtr* tp = phase->type(ld_adr)->isa_instptr(); |
1007 | Compile::AliasType* atp = (tp != NULL) ? phase->C->alias_type(tp) : NULL; |
1008 | // This is more general than load from boxing objects. |
1009 | if (skip_through_membars(atp, tp, phase->C->eliminate_boxing())) { |
1010 | uint alias_idx = atp->index(); |
1011 | bool final = !atp->is_rewritable(); |
1012 | Node* result = NULL; |
1013 | Node* current = st; |
1014 | // Skip through chains of MemBarNodes checking the MergeMems for |
1015 | // new states for the slice of this load. Stop once any other |
1016 | // kind of node is encountered. Loads from final memory can skip |
1017 | // through any kind of MemBar but normal loads shouldn't skip |
1018 | // through MemBarAcquire since the could allow them to move out of |
1019 | // a synchronized region. |
1020 | while (current->is_Proj()) { |
1021 | int opc = current->in(0)->Opcode(); |
1022 | if ((final && (opc == Op_MemBarAcquire || |
1023 | opc == Op_MemBarAcquireLock || |
1024 | opc == Op_LoadFence)) || |
1025 | opc == Op_MemBarRelease || |
1026 | opc == Op_StoreFence || |
1027 | opc == Op_MemBarReleaseLock || |
1028 | opc == Op_MemBarStoreStore || |
1029 | opc == Op_MemBarCPUOrder) { |
1030 | Node* mem = current->in(0)->in(TypeFunc::Memory); |
1031 | if (mem->is_MergeMem()) { |
1032 | MergeMemNode* merge = mem->as_MergeMem(); |
1033 | Node* new_st = merge->memory_at(alias_idx); |
1034 | if (new_st == merge->base_memory()) { |
1035 | // Keep searching |
1036 | current = new_st; |
1037 | continue; |
1038 | } |
1039 | // Save the new memory state for the slice and fall through |
1040 | // to exit. |
1041 | result = new_st; |
1042 | } |
1043 | } |
1044 | break; |
1045 | } |
1046 | if (result != NULL) { |
1047 | st = result; |
1048 | } |
1049 | } |
1050 | |
1051 | // Loop around twice in the case Load -> Initialize -> Store. |
1052 | // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.) |
1053 | for (int trip = 0; trip <= 1; trip++) { |
1054 | |
1055 | if (st->is_Store()) { |
1056 | Node* st_adr = st->in(MemNode::Address); |
1057 | if (!phase->eqv(st_adr, ld_adr)) { |
1058 | // Try harder before giving up. Unify base pointers with casts (e.g., raw/non-raw pointers). |
1059 | intptr_t st_off = 0; |
1060 | Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_off); |
1061 | if (ld_base == NULL) return NULL; |
1062 | if (st_base == NULL) return NULL; |
1063 | if (!ld_base->eqv_uncast(st_base, /*keep_deps=*/true)) return NULL; |
1064 | if (ld_off != st_off) return NULL; |
1065 | if (ld_off == Type::OffsetBot) return NULL; |
1066 | // Same base, same offset. |
1067 | // Possible improvement for arrays: check index value instead of absolute offset. |
1068 | |
1069 | // At this point we have proven something like this setup: |
1070 | // B = << base >> |
1071 | // L = LoadQ(AddP(Check/CastPP(B), #Off)) |
1072 | // S = StoreQ(AddP( B , #Off), V) |
1073 | // (Actually, we haven't yet proven the Q's are the same.) |
1074 | // In other words, we are loading from a casted version of |
1075 | // the same pointer-and-offset that we stored to. |
1076 | // Casted version may carry a dependency and it is respected. |
1077 | // Thus, we are able to replace L by V. |
1078 | } |
1079 | // Now prove that we have a LoadQ matched to a StoreQ, for some Q. |
1080 | if (store_Opcode() != st->Opcode()) |
1081 | return NULL; |
1082 | return st->in(MemNode::ValueIn); |
1083 | } |
1084 | |
1085 | // A load from a freshly-created object always returns zero. |
1086 | // (This can happen after LoadNode::Ideal resets the load's memory input |
1087 | // to find_captured_store, which returned InitializeNode::zero_memory.) |
1088 | if (st->is_Proj() && st->in(0)->is_Allocate() && |
1089 | (st->in(0) == ld_alloc) && |
1090 | (ld_off >= st->in(0)->as_Allocate()->minimum_header_size())) { |
1091 | // return a zero value for the load's basic type |
1092 | // (This is one of the few places where a generic PhaseTransform |
1093 | // can create new nodes. Think of it as lazily manifesting |
1094 | // virtually pre-existing constants.) |
1095 | return phase->zerocon(memory_type()); |
1096 | } |
1097 | |
1098 | // A load from an initialization barrier can match a captured store. |
1099 | if (st->is_Proj() && st->in(0)->is_Initialize()) { |
1100 | InitializeNode* init = st->in(0)->as_Initialize(); |
1101 | AllocateNode* alloc = init->allocation(); |
1102 | if ((alloc != NULL) && (alloc == ld_alloc)) { |
1103 | // examine a captured store value |
1104 | st = init->find_captured_store(ld_off, memory_size(), phase); |
1105 | if (st != NULL) { |
1106 | continue; // take one more trip around |
1107 | } |
1108 | } |
1109 | } |
1110 | |
1111 | // Load boxed value from result of valueOf() call is input parameter. |
1112 | if (this->is_Load() && ld_adr->is_AddP() && |
1113 | (tp != NULL) && tp->is_ptr_to_boxed_value()) { |
1114 | intptr_t ignore = 0; |
1115 | Node* base = AddPNode::Ideal_base_and_offset(ld_adr, phase, ignore); |
1116 | BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); |
1117 | base = bs->step_over_gc_barrier(base); |
1118 | if (base != NULL && base->is_Proj() && |
1119 | base->as_Proj()->_con == TypeFunc::Parms && |
1120 | base->in(0)->is_CallStaticJava() && |
1121 | base->in(0)->as_CallStaticJava()->is_boxing_method()) { |
1122 | return base->in(0)->in(TypeFunc::Parms); |
1123 | } |
1124 | } |
1125 | |
1126 | break; |
1127 | } |
1128 | |
1129 | return NULL; |
1130 | } |
1131 | |
1132 | //----------------------is_instance_field_load_with_local_phi------------------ |
1133 | bool LoadNode::is_instance_field_load_with_local_phi(Node* ctrl) { |
1134 | if( in(Memory)->is_Phi() && in(Memory)->in(0) == ctrl && |
1135 | in(Address)->is_AddP() ) { |
1136 | const TypeOopPtr* t_oop = in(Address)->bottom_type()->isa_oopptr(); |
1137 | // Only instances and boxed values. |
1138 | if( t_oop != NULL && |
1139 | (t_oop->is_ptr_to_boxed_value() || |
1140 | t_oop->is_known_instance_field()) && |
1141 | t_oop->offset() != Type::OffsetBot && |
1142 | t_oop->offset() != Type::OffsetTop) { |
1143 | return true; |
1144 | } |
1145 | } |
1146 | return false; |
1147 | } |
1148 | |
1149 | //------------------------------Identity--------------------------------------- |
1150 | // Loads are identity if previous store is to same address |
1151 | Node* LoadNode::Identity(PhaseGVN* phase) { |
1152 | // If the previous store-maker is the right kind of Store, and the store is |
1153 | // to the same address, then we are equal to the value stored. |
1154 | Node* mem = in(Memory); |
1155 | Node* value = can_see_stored_value(mem, phase); |
1156 | if( value ) { |
1157 | // byte, short & char stores truncate naturally. |
1158 | // A load has to load the truncated value which requires |
1159 | // some sort of masking operation and that requires an |
1160 | // Ideal call instead of an Identity call. |
1161 | if (memory_size() < BytesPerInt) { |
1162 | // If the input to the store does not fit with the load's result type, |
1163 | // it must be truncated via an Ideal call. |
1164 | if (!phase->type(value)->higher_equal(phase->type(this))) |
1165 | return this; |
1166 | } |
1167 | // (This works even when value is a Con, but LoadNode::Value |
1168 | // usually runs first, producing the singleton type of the Con.) |
1169 | return value; |
1170 | } |
1171 | |
1172 | // Search for an existing data phi which was generated before for the same |
1173 | // instance's field to avoid infinite generation of phis in a loop. |
1174 | Node *region = mem->in(0); |
1175 | if (is_instance_field_load_with_local_phi(region)) { |
1176 | const TypeOopPtr *addr_t = in(Address)->bottom_type()->isa_oopptr(); |
1177 | int this_index = phase->C->get_alias_index(addr_t); |
1178 | int this_offset = addr_t->offset(); |
1179 | int this_iid = addr_t->instance_id(); |
1180 | if (!addr_t->is_known_instance() && |
1181 | addr_t->is_ptr_to_boxed_value()) { |
1182 | // Use _idx of address base (could be Phi node) for boxed values. |
1183 | intptr_t ignore = 0; |
1184 | Node* base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore); |
1185 | if (base == NULL) { |
1186 | return this; |
1187 | } |
1188 | this_iid = base->_idx; |
1189 | } |
1190 | const Type* this_type = bottom_type(); |
1191 | for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
1192 | Node* phi = region->fast_out(i); |
1193 | if (phi->is_Phi() && phi != mem && |
1194 | phi->as_Phi()->is_same_inst_field(this_type, (int)mem->_idx, this_iid, this_index, this_offset)) { |
1195 | return phi; |
1196 | } |
1197 | } |
1198 | } |
1199 | |
1200 | return this; |
1201 | } |
1202 | |
1203 | // Construct an equivalent unsigned load. |
1204 | Node* LoadNode::convert_to_unsigned_load(PhaseGVN& gvn) { |
1205 | BasicType bt = T_ILLEGAL; |
1206 | const Type* rt = NULL; |
1207 | switch (Opcode()) { |
1208 | case Op_LoadUB: return this; |
1209 | case Op_LoadUS: return this; |
1210 | case Op_LoadB: bt = T_BOOLEAN; rt = TypeInt::UBYTE; break; |
1211 | case Op_LoadS: bt = T_CHAR; rt = TypeInt::CHAR; break; |
1212 | default: |
1213 | assert(false, "no unsigned variant: %s" , Name()); |
1214 | return NULL; |
1215 | } |
1216 | return LoadNode::make(gvn, in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), |
1217 | raw_adr_type(), rt, bt, _mo, _control_dependency, |
1218 | is_unaligned_access(), is_mismatched_access()); |
1219 | } |
1220 | |
1221 | // Construct an equivalent signed load. |
1222 | Node* LoadNode::convert_to_signed_load(PhaseGVN& gvn) { |
1223 | BasicType bt = T_ILLEGAL; |
1224 | const Type* rt = NULL; |
1225 | switch (Opcode()) { |
1226 | case Op_LoadUB: bt = T_BYTE; rt = TypeInt::BYTE; break; |
1227 | case Op_LoadUS: bt = T_SHORT; rt = TypeInt::SHORT; break; |
1228 | case Op_LoadB: // fall through |
1229 | case Op_LoadS: // fall through |
1230 | case Op_LoadI: // fall through |
1231 | case Op_LoadL: return this; |
1232 | default: |
1233 | assert(false, "no signed variant: %s" , Name()); |
1234 | return NULL; |
1235 | } |
1236 | return LoadNode::make(gvn, in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address), |
1237 | raw_adr_type(), rt, bt, _mo, _control_dependency, |
1238 | is_unaligned_access(), is_mismatched_access()); |
1239 | } |
1240 | |
1241 | // We're loading from an object which has autobox behaviour. |
1242 | // If this object is result of a valueOf call we'll have a phi |
1243 | // merging a newly allocated object and a load from the cache. |
1244 | // We want to replace this load with the original incoming |
1245 | // argument to the valueOf call. |
1246 | Node* LoadNode::eliminate_autobox(PhaseGVN* phase) { |
1247 | assert(phase->C->eliminate_boxing(), "sanity" ); |
1248 | intptr_t ignore = 0; |
1249 | Node* base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore); |
1250 | if ((base == NULL) || base->is_Phi()) { |
1251 | // Push the loads from the phi that comes from valueOf up |
1252 | // through it to allow elimination of the loads and the recovery |
1253 | // of the original value. It is done in split_through_phi(). |
1254 | return NULL; |
1255 | } else if (base->is_Load() || |
1256 | (base->is_DecodeN() && base->in(1)->is_Load())) { |
1257 | // Eliminate the load of boxed value for integer types from the cache |
1258 | // array by deriving the value from the index into the array. |
1259 | // Capture the offset of the load and then reverse the computation. |
1260 | |
1261 | // Get LoadN node which loads a boxing object from 'cache' array. |
1262 | if (base->is_DecodeN()) { |
1263 | base = base->in(1); |
1264 | } |
1265 | if (!base->in(Address)->is_AddP()) { |
1266 | return NULL; // Complex address |
1267 | } |
1268 | AddPNode* address = base->in(Address)->as_AddP(); |
1269 | Node* cache_base = address->in(AddPNode::Base); |
1270 | if ((cache_base != NULL) && cache_base->is_DecodeN()) { |
1271 | // Get ConP node which is static 'cache' field. |
1272 | cache_base = cache_base->in(1); |
1273 | } |
1274 | if ((cache_base != NULL) && cache_base->is_Con()) { |
1275 | const TypeAryPtr* base_type = cache_base->bottom_type()->isa_aryptr(); |
1276 | if ((base_type != NULL) && base_type->is_autobox_cache()) { |
1277 | Node* elements[4]; |
1278 | int shift = exact_log2(type2aelembytes(T_OBJECT)); |
1279 | int count = address->unpack_offsets(elements, ARRAY_SIZE(elements)); |
1280 | if (count > 0 && elements[0]->is_Con() && |
1281 | (count == 1 || |
1282 | (count == 2 && elements[1]->Opcode() == Op_LShiftX && |
1283 | elements[1]->in(2) == phase->intcon(shift)))) { |
1284 | ciObjArray* array = base_type->const_oop()->as_obj_array(); |
1285 | // Fetch the box object cache[0] at the base of the array and get its value |
1286 | ciInstance* box = array->obj_at(0)->as_instance(); |
1287 | ciInstanceKlass* ik = box->klass()->as_instance_klass(); |
1288 | assert(ik->is_box_klass(), "sanity" ); |
1289 | assert(ik->nof_nonstatic_fields() == 1, "change following code" ); |
1290 | if (ik->nof_nonstatic_fields() == 1) { |
1291 | // This should be true nonstatic_field_at requires calling |
1292 | // nof_nonstatic_fields so check it anyway |
1293 | ciConstant c = box->field_value(ik->nonstatic_field_at(0)); |
1294 | BasicType bt = c.basic_type(); |
1295 | // Only integer types have boxing cache. |
1296 | assert(bt == T_BOOLEAN || bt == T_CHAR || |
1297 | bt == T_BYTE || bt == T_SHORT || |
1298 | bt == T_INT || bt == T_LONG, "wrong type = %s" , type2name(bt)); |
1299 | jlong cache_low = (bt == T_LONG) ? c.as_long() : c.as_int(); |
1300 | if (cache_low != (int)cache_low) { |
1301 | return NULL; // should not happen since cache is array indexed by value |
1302 | } |
1303 | jlong offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT) - (cache_low << shift); |
1304 | if (offset != (int)offset) { |
1305 | return NULL; // should not happen since cache is array indexed by value |
1306 | } |
1307 | // Add up all the offsets making of the address of the load |
1308 | Node* result = elements[0]; |
1309 | for (int i = 1; i < count; i++) { |
1310 | result = phase->transform(new AddXNode(result, elements[i])); |
1311 | } |
1312 | // Remove the constant offset from the address and then |
1313 | result = phase->transform(new AddXNode(result, phase->MakeConX(-(int)offset))); |
1314 | // remove the scaling of the offset to recover the original index. |
1315 | if (result->Opcode() == Op_LShiftX && result->in(2) == phase->intcon(shift)) { |
1316 | // Peel the shift off directly but wrap it in a dummy node |
1317 | // since Ideal can't return existing nodes |
1318 | result = new RShiftXNode(result->in(1), phase->intcon(0)); |
1319 | } else if (result->is_Add() && result->in(2)->is_Con() && |
1320 | result->in(1)->Opcode() == Op_LShiftX && |
1321 | result->in(1)->in(2) == phase->intcon(shift)) { |
1322 | // We can't do general optimization: ((X<<Z) + Y) >> Z ==> X + (Y>>Z) |
1323 | // but for boxing cache access we know that X<<Z will not overflow |
1324 | // (there is range check) so we do this optimizatrion by hand here. |
1325 | Node* add_con = new RShiftXNode(result->in(2), phase->intcon(shift)); |
1326 | result = new AddXNode(result->in(1)->in(1), phase->transform(add_con)); |
1327 | } else { |
1328 | result = new RShiftXNode(result, phase->intcon(shift)); |
1329 | } |
1330 | #ifdef _LP64 |
1331 | if (bt != T_LONG) { |
1332 | result = new ConvL2INode(phase->transform(result)); |
1333 | } |
1334 | #else |
1335 | if (bt == T_LONG) { |
1336 | result = new ConvI2LNode(phase->transform(result)); |
1337 | } |
1338 | #endif |
1339 | // Boxing/unboxing can be done from signed & unsigned loads (e.g. LoadUB -> ... -> LoadB pair). |
1340 | // Need to preserve unboxing load type if it is unsigned. |
1341 | switch(this->Opcode()) { |
1342 | case Op_LoadUB: |
1343 | result = new AndINode(phase->transform(result), phase->intcon(0xFF)); |
1344 | break; |
1345 | case Op_LoadUS: |
1346 | result = new AndINode(phase->transform(result), phase->intcon(0xFFFF)); |
1347 | break; |
1348 | } |
1349 | return result; |
1350 | } |
1351 | } |
1352 | } |
1353 | } |
1354 | } |
1355 | return NULL; |
1356 | } |
1357 | |
1358 | static bool stable_phi(PhiNode* phi, PhaseGVN *phase) { |
1359 | Node* region = phi->in(0); |
1360 | if (region == NULL) { |
1361 | return false; // Wait stable graph |
1362 | } |
1363 | uint cnt = phi->req(); |
1364 | for (uint i = 1; i < cnt; i++) { |
1365 | Node* rc = region->in(i); |
1366 | if (rc == NULL || phase->type(rc) == Type::TOP) |
1367 | return false; // Wait stable graph |
1368 | Node* in = phi->in(i); |
1369 | if (in == NULL || phase->type(in) == Type::TOP) |
1370 | return false; // Wait stable graph |
1371 | } |
1372 | return true; |
1373 | } |
1374 | //------------------------------split_through_phi------------------------------ |
1375 | // Split instance or boxed field load through Phi. |
1376 | Node *LoadNode::split_through_phi(PhaseGVN *phase) { |
1377 | Node* mem = in(Memory); |
1378 | Node* address = in(Address); |
1379 | const TypeOopPtr *t_oop = phase->type(address)->isa_oopptr(); |
1380 | |
1381 | assert((t_oop != NULL) && |
1382 | (t_oop->is_known_instance_field() || |
1383 | t_oop->is_ptr_to_boxed_value()), "invalide conditions" ); |
1384 | |
1385 | Compile* C = phase->C; |
1386 | intptr_t ignore = 0; |
1387 | Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore); |
1388 | bool base_is_phi = (base != NULL) && base->is_Phi(); |
1389 | bool load_boxed_values = t_oop->is_ptr_to_boxed_value() && C->aggressive_unboxing() && |
1390 | (base != NULL) && (base == address->in(AddPNode::Base)) && |
1391 | phase->type(base)->higher_equal(TypePtr::NOTNULL); |
1392 | |
1393 | if (!((mem->is_Phi() || base_is_phi) && |
1394 | (load_boxed_values || t_oop->is_known_instance_field()))) { |
1395 | return NULL; // memory is not Phi |
1396 | } |
1397 | |
1398 | if (mem->is_Phi()) { |
1399 | if (!stable_phi(mem->as_Phi(), phase)) { |
1400 | return NULL; // Wait stable graph |
1401 | } |
1402 | uint cnt = mem->req(); |
1403 | // Check for loop invariant memory. |
1404 | if (cnt == 3) { |
1405 | for (uint i = 1; i < cnt; i++) { |
1406 | Node* in = mem->in(i); |
1407 | Node* m = optimize_memory_chain(in, t_oop, this, phase); |
1408 | if (m == mem) { |
1409 | if (i == 1) { |
1410 | // if the first edge was a loop, check second edge too. |
1411 | // If both are replaceable - we are in an infinite loop |
1412 | Node *n = optimize_memory_chain(mem->in(2), t_oop, this, phase); |
1413 | if (n == mem) { |
1414 | break; |
1415 | } |
1416 | } |
1417 | set_req(Memory, mem->in(cnt - i)); |
1418 | return this; // made change |
1419 | } |
1420 | } |
1421 | } |
1422 | } |
1423 | if (base_is_phi) { |
1424 | if (!stable_phi(base->as_Phi(), phase)) { |
1425 | return NULL; // Wait stable graph |
1426 | } |
1427 | uint cnt = base->req(); |
1428 | // Check for loop invariant memory. |
1429 | if (cnt == 3) { |
1430 | for (uint i = 1; i < cnt; i++) { |
1431 | if (base->in(i) == base) { |
1432 | return NULL; // Wait stable graph |
1433 | } |
1434 | } |
1435 | } |
1436 | } |
1437 | |
1438 | bool load_boxed_phi = load_boxed_values && base_is_phi && (base->in(0) == mem->in(0)); |
1439 | |
1440 | // Split through Phi (see original code in loopopts.cpp). |
1441 | assert(C->have_alias_type(t_oop), "instance should have alias type" ); |
1442 | |
1443 | // Do nothing here if Identity will find a value |
1444 | // (to avoid infinite chain of value phis generation). |
1445 | if (!phase->eqv(this, phase->apply_identity(this))) |
1446 | return NULL; |
1447 | |
1448 | // Select Region to split through. |
1449 | Node* region; |
1450 | if (!base_is_phi) { |
1451 | assert(mem->is_Phi(), "sanity" ); |
1452 | region = mem->in(0); |
1453 | // Skip if the region dominates some control edge of the address. |
1454 | if (!MemNode::all_controls_dominate(address, region)) |
1455 | return NULL; |
1456 | } else if (!mem->is_Phi()) { |
1457 | assert(base_is_phi, "sanity" ); |
1458 | region = base->in(0); |
1459 | // Skip if the region dominates some control edge of the memory. |
1460 | if (!MemNode::all_controls_dominate(mem, region)) |
1461 | return NULL; |
1462 | } else if (base->in(0) != mem->in(0)) { |
1463 | assert(base_is_phi && mem->is_Phi(), "sanity" ); |
1464 | if (MemNode::all_controls_dominate(mem, base->in(0))) { |
1465 | region = base->in(0); |
1466 | } else if (MemNode::all_controls_dominate(address, mem->in(0))) { |
1467 | region = mem->in(0); |
1468 | } else { |
1469 | return NULL; // complex graph |
1470 | } |
1471 | } else { |
1472 | assert(base->in(0) == mem->in(0), "sanity" ); |
1473 | region = mem->in(0); |
1474 | } |
1475 | |
1476 | const Type* this_type = this->bottom_type(); |
1477 | int this_index = C->get_alias_index(t_oop); |
1478 | int this_offset = t_oop->offset(); |
1479 | int this_iid = t_oop->instance_id(); |
1480 | if (!t_oop->is_known_instance() && load_boxed_values) { |
1481 | // Use _idx of address base for boxed values. |
1482 | this_iid = base->_idx; |
1483 | } |
1484 | PhaseIterGVN* igvn = phase->is_IterGVN(); |
1485 | Node* phi = new PhiNode(region, this_type, NULL, mem->_idx, this_iid, this_index, this_offset); |
1486 | for (uint i = 1; i < region->req(); i++) { |
1487 | Node* x; |
1488 | Node* the_clone = NULL; |
1489 | if (region->in(i) == C->top()) { |
1490 | x = C->top(); // Dead path? Use a dead data op |
1491 | } else { |
1492 | x = this->clone(); // Else clone up the data op |
1493 | the_clone = x; // Remember for possible deletion. |
1494 | // Alter data node to use pre-phi inputs |
1495 | if (this->in(0) == region) { |
1496 | x->set_req(0, region->in(i)); |
1497 | } else { |
1498 | x->set_req(0, NULL); |
1499 | } |
1500 | if (mem->is_Phi() && (mem->in(0) == region)) { |
1501 | x->set_req(Memory, mem->in(i)); // Use pre-Phi input for the clone. |
1502 | } |
1503 | if (address->is_Phi() && address->in(0) == region) { |
1504 | x->set_req(Address, address->in(i)); // Use pre-Phi input for the clone |
1505 | } |
1506 | if (base_is_phi && (base->in(0) == region)) { |
1507 | Node* base_x = base->in(i); // Clone address for loads from boxed objects. |
1508 | Node* adr_x = phase->transform(new AddPNode(base_x,base_x,address->in(AddPNode::Offset))); |
1509 | x->set_req(Address, adr_x); |
1510 | } |
1511 | } |
1512 | // Check for a 'win' on some paths |
1513 | const Type *t = x->Value(igvn); |
1514 | |
1515 | bool singleton = t->singleton(); |
1516 | |
1517 | // See comments in PhaseIdealLoop::split_thru_phi(). |
1518 | if (singleton && t == Type::TOP) { |
1519 | singleton &= region->is_Loop() && (i != LoopNode::EntryControl); |
1520 | } |
1521 | |
1522 | if (singleton) { |
1523 | x = igvn->makecon(t); |
1524 | } else { |
1525 | // We now call Identity to try to simplify the cloned node. |
1526 | // Note that some Identity methods call phase->type(this). |
1527 | // Make sure that the type array is big enough for |
1528 | // our new node, even though we may throw the node away. |
1529 | // (This tweaking with igvn only works because x is a new node.) |
1530 | igvn->set_type(x, t); |
1531 | // If x is a TypeNode, capture any more-precise type permanently into Node |
1532 | // otherwise it will be not updated during igvn->transform since |
1533 | // igvn->type(x) is set to x->Value() already. |
1534 | x->raise_bottom_type(t); |
1535 | Node *y = igvn->apply_identity(x); |
1536 | if (y != x) { |
1537 | x = y; |
1538 | } else { |
1539 | y = igvn->hash_find_insert(x); |
1540 | if (y) { |
1541 | x = y; |
1542 | } else { |
1543 | // Else x is a new node we are keeping |
1544 | // We do not need register_new_node_with_optimizer |
1545 | // because set_type has already been called. |
1546 | igvn->_worklist.push(x); |
1547 | } |
1548 | } |
1549 | } |
1550 | if (x != the_clone && the_clone != NULL) { |
1551 | igvn->remove_dead_node(the_clone); |
1552 | } |
1553 | phi->set_req(i, x); |
1554 | } |
1555 | // Record Phi |
1556 | igvn->register_new_node_with_optimizer(phi); |
1557 | return phi; |
1558 | } |
1559 | |
1560 | //------------------------------Ideal------------------------------------------ |
1561 | // If the load is from Field memory and the pointer is non-null, it might be possible to |
1562 | // zero out the control input. |
1563 | // If the offset is constant and the base is an object allocation, |
1564 | // try to hook me up to the exact initializing store. |
1565 | Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
1566 | Node* p = MemNode::Ideal_common(phase, can_reshape); |
1567 | if (p) return (p == NodeSentinel) ? NULL : p; |
1568 | |
1569 | Node* ctrl = in(MemNode::Control); |
1570 | Node* address = in(MemNode::Address); |
1571 | bool progress = false; |
1572 | |
1573 | bool addr_mark = ((phase->type(address)->isa_oopptr() || phase->type(address)->isa_narrowoop()) && |
1574 | phase->type(address)->is_ptr()->offset() == oopDesc::mark_offset_in_bytes()); |
1575 | |
1576 | // Skip up past a SafePoint control. Cannot do this for Stores because |
1577 | // pointer stores & cardmarks must stay on the same side of a SafePoint. |
1578 | if( ctrl != NULL && ctrl->Opcode() == Op_SafePoint && |
1579 | phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw && |
1580 | !addr_mark ) { |
1581 | ctrl = ctrl->in(0); |
1582 | set_req(MemNode::Control,ctrl); |
1583 | progress = true; |
1584 | } |
1585 | |
1586 | intptr_t ignore = 0; |
1587 | Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore); |
1588 | if (base != NULL |
1589 | && phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw) { |
1590 | // Check for useless control edge in some common special cases |
1591 | if (in(MemNode::Control) != NULL |
1592 | && can_remove_control() |
1593 | && phase->type(base)->higher_equal(TypePtr::NOTNULL) |
1594 | && all_controls_dominate(base, phase->C->start())) { |
1595 | // A method-invariant, non-null address (constant or 'this' argument). |
1596 | set_req(MemNode::Control, NULL); |
1597 | progress = true; |
1598 | } |
1599 | } |
1600 | |
1601 | Node* mem = in(MemNode::Memory); |
1602 | const TypePtr *addr_t = phase->type(address)->isa_ptr(); |
1603 | |
1604 | if (can_reshape && (addr_t != NULL)) { |
1605 | // try to optimize our memory input |
1606 | Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, this, phase); |
1607 | if (opt_mem != mem) { |
1608 | set_req(MemNode::Memory, opt_mem); |
1609 | if (phase->type( opt_mem ) == Type::TOP) return NULL; |
1610 | return this; |
1611 | } |
1612 | const TypeOopPtr *t_oop = addr_t->isa_oopptr(); |
1613 | if ((t_oop != NULL) && |
1614 | (t_oop->is_known_instance_field() || |
1615 | t_oop->is_ptr_to_boxed_value())) { |
1616 | PhaseIterGVN *igvn = phase->is_IterGVN(); |
1617 | if (igvn != NULL && igvn->_worklist.member(opt_mem)) { |
1618 | // Delay this transformation until memory Phi is processed. |
1619 | phase->is_IterGVN()->_worklist.push(this); |
1620 | return NULL; |
1621 | } |
1622 | // Split instance field load through Phi. |
1623 | Node* result = split_through_phi(phase); |
1624 | if (result != NULL) return result; |
1625 | |
1626 | if (t_oop->is_ptr_to_boxed_value()) { |
1627 | Node* result = eliminate_autobox(phase); |
1628 | if (result != NULL) return result; |
1629 | } |
1630 | } |
1631 | } |
1632 | |
1633 | // Is there a dominating load that loads the same value? Leave |
1634 | // anything that is not a load of a field/array element (like |
1635 | // barriers etc.) alone |
1636 | if (in(0) != NULL && !adr_type()->isa_rawptr() && can_reshape) { |
1637 | for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
1638 | Node *use = mem->fast_out(i); |
1639 | if (use != this && |
1640 | use->Opcode() == Opcode() && |
1641 | use->in(0) != NULL && |
1642 | use->in(0) != in(0) && |
1643 | use->in(Address) == in(Address)) { |
1644 | Node* ctl = in(0); |
1645 | for (int i = 0; i < 10 && ctl != NULL; i++) { |
1646 | ctl = IfNode::up_one_dom(ctl); |
1647 | if (ctl == use->in(0)) { |
1648 | set_req(0, use->in(0)); |
1649 | return this; |
1650 | } |
1651 | } |
1652 | } |
1653 | } |
1654 | } |
1655 | |
1656 | // Check for prior store with a different base or offset; make Load |
1657 | // independent. Skip through any number of them. Bail out if the stores |
1658 | // are in an endless dead cycle and report no progress. This is a key |
1659 | // transform for Reflection. However, if after skipping through the Stores |
1660 | // we can't then fold up against a prior store do NOT do the transform as |
1661 | // this amounts to using the 'Oracle' model of aliasing. It leaves the same |
1662 | // array memory alive twice: once for the hoisted Load and again after the |
1663 | // bypassed Store. This situation only works if EVERYBODY who does |
1664 | // anti-dependence work knows how to bypass. I.e. we need all |
1665 | // anti-dependence checks to ask the same Oracle. Right now, that Oracle is |
1666 | // the alias index stuff. So instead, peek through Stores and IFF we can |
1667 | // fold up, do so. |
1668 | Node* prev_mem = find_previous_store(phase); |
1669 | if (prev_mem != NULL) { |
1670 | Node* value = can_see_arraycopy_value(prev_mem, phase); |
1671 | if (value != NULL) { |
1672 | return value; |
1673 | } |
1674 | } |
1675 | // Steps (a), (b): Walk past independent stores to find an exact match. |
1676 | if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) { |
1677 | // (c) See if we can fold up on the spot, but don't fold up here. |
1678 | // Fold-up might require truncation (for LoadB/LoadS/LoadUS) or |
1679 | // just return a prior value, which is done by Identity calls. |
1680 | if (can_see_stored_value(prev_mem, phase)) { |
1681 | // Make ready for step (d): |
1682 | set_req(MemNode::Memory, prev_mem); |
1683 | return this; |
1684 | } |
1685 | } |
1686 | |
1687 | return progress ? this : NULL; |
1688 | } |
1689 | |
1690 | // Helper to recognize certain Klass fields which are invariant across |
1691 | // some group of array types (e.g., int[] or all T[] where T < Object). |
1692 | const Type* |
1693 | LoadNode::load_array_final_field(const TypeKlassPtr *tkls, |
1694 | ciKlass* klass) const { |
1695 | if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) { |
1696 | // The field is Klass::_modifier_flags. Return its (constant) value. |
1697 | // (Folds up the 2nd indirection in aClassConstant.getModifiers().) |
1698 | assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags" ); |
1699 | return TypeInt::make(klass->modifier_flags()); |
1700 | } |
1701 | if (tkls->offset() == in_bytes(Klass::access_flags_offset())) { |
1702 | // The field is Klass::_access_flags. Return its (constant) value. |
1703 | // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).) |
1704 | assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags" ); |
1705 | return TypeInt::make(klass->access_flags()); |
1706 | } |
1707 | if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) { |
1708 | // The field is Klass::_layout_helper. Return its constant value if known. |
1709 | assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper" ); |
1710 | return TypeInt::make(klass->layout_helper()); |
1711 | } |
1712 | |
1713 | // No match. |
1714 | return NULL; |
1715 | } |
1716 | |
1717 | //------------------------------Value----------------------------------------- |
1718 | const Type* LoadNode::Value(PhaseGVN* phase) const { |
1719 | // Either input is TOP ==> the result is TOP |
1720 | Node* mem = in(MemNode::Memory); |
1721 | const Type *t1 = phase->type(mem); |
1722 | if (t1 == Type::TOP) return Type::TOP; |
1723 | Node* adr = in(MemNode::Address); |
1724 | const TypePtr* tp = phase->type(adr)->isa_ptr(); |
1725 | if (tp == NULL || tp->empty()) return Type::TOP; |
1726 | int off = tp->offset(); |
1727 | assert(off != Type::OffsetTop, "case covered by TypePtr::empty" ); |
1728 | Compile* C = phase->C; |
1729 | |
1730 | // Try to guess loaded type from pointer type |
1731 | if (tp->isa_aryptr()) { |
1732 | const TypeAryPtr* ary = tp->is_aryptr(); |
1733 | const Type* t = ary->elem(); |
1734 | |
1735 | // Determine whether the reference is beyond the header or not, by comparing |
1736 | // the offset against the offset of the start of the array's data. |
1737 | // Different array types begin at slightly different offsets (12 vs. 16). |
1738 | // We choose T_BYTE as an example base type that is least restrictive |
1739 | // as to alignment, which will therefore produce the smallest |
1740 | // possible base offset. |
1741 | const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE); |
1742 | const bool = (off >= min_base_off); |
1743 | |
1744 | // Try to constant-fold a stable array element. |
1745 | if (FoldStableValues && !is_mismatched_access() && ary->is_stable()) { |
1746 | // Make sure the reference is not into the header and the offset is constant |
1747 | ciObject* aobj = ary->const_oop(); |
1748 | if (aobj != NULL && off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) { |
1749 | int stable_dimension = (ary->stable_dimension() > 0 ? ary->stable_dimension() - 1 : 0); |
1750 | const Type* con_type = Type::make_constant_from_array_element(aobj->as_array(), off, |
1751 | stable_dimension, |
1752 | memory_type(), is_unsigned()); |
1753 | if (con_type != NULL) { |
1754 | return con_type; |
1755 | } |
1756 | } |
1757 | } |
1758 | |
1759 | // Don't do this for integer types. There is only potential profit if |
1760 | // the element type t is lower than _type; that is, for int types, if _type is |
1761 | // more restrictive than t. This only happens here if one is short and the other |
1762 | // char (both 16 bits), and in those cases we've made an intentional decision |
1763 | // to use one kind of load over the other. See AndINode::Ideal and 4965907. |
1764 | // Also, do not try to narrow the type for a LoadKlass, regardless of offset. |
1765 | // |
1766 | // Yes, it is possible to encounter an expression like (LoadKlass p1:(AddP x x 8)) |
1767 | // where the _gvn.type of the AddP is wider than 8. This occurs when an earlier |
1768 | // copy p0 of (AddP x x 8) has been proven equal to p1, and the p0 has been |
1769 | // subsumed by p1. If p1 is on the worklist but has not yet been re-transformed, |
1770 | // it is possible that p1 will have a type like Foo*[int+]:NotNull*+any. |
1771 | // In fact, that could have been the original type of p1, and p1 could have |
1772 | // had an original form like p1:(AddP x x (LShiftL quux 3)), where the |
1773 | // expression (LShiftL quux 3) independently optimized to the constant 8. |
1774 | if ((t->isa_int() == NULL) && (t->isa_long() == NULL) |
1775 | && (_type->isa_vect() == NULL) |
1776 | && Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) { |
1777 | // t might actually be lower than _type, if _type is a unique |
1778 | // concrete subclass of abstract class t. |
1779 | if (off_beyond_header || off == Type::OffsetBot) { // is the offset beyond the header? |
1780 | const Type* jt = t->join_speculative(_type); |
1781 | // In any case, do not allow the join, per se, to empty out the type. |
1782 | if (jt->empty() && !t->empty()) { |
1783 | // This can happen if a interface-typed array narrows to a class type. |
1784 | jt = _type; |
1785 | } |
1786 | #ifdef ASSERT |
1787 | if (phase->C->eliminate_boxing() && adr->is_AddP()) { |
1788 | // The pointers in the autobox arrays are always non-null |
1789 | Node* base = adr->in(AddPNode::Base); |
1790 | if ((base != NULL) && base->is_DecodeN()) { |
1791 | // Get LoadN node which loads IntegerCache.cache field |
1792 | base = base->in(1); |
1793 | } |
1794 | if ((base != NULL) && base->is_Con()) { |
1795 | const TypeAryPtr* base_type = base->bottom_type()->isa_aryptr(); |
1796 | if ((base_type != NULL) && base_type->is_autobox_cache()) { |
1797 | // It could be narrow oop |
1798 | assert(jt->make_ptr()->ptr() == TypePtr::NotNull,"sanity" ); |
1799 | } |
1800 | } |
1801 | } |
1802 | #endif |
1803 | return jt; |
1804 | } |
1805 | } |
1806 | } else if (tp->base() == Type::InstPtr) { |
1807 | assert( off != Type::OffsetBot || |
1808 | // arrays can be cast to Objects |
1809 | tp->is_oopptr()->klass()->is_java_lang_Object() || |
1810 | // unsafe field access may not have a constant offset |
1811 | C->has_unsafe_access(), |
1812 | "Field accesses must be precise" ); |
1813 | // For oop loads, we expect the _type to be precise. |
1814 | |
1815 | // Optimize loads from constant fields. |
1816 | const TypeInstPtr* tinst = tp->is_instptr(); |
1817 | ciObject* const_oop = tinst->const_oop(); |
1818 | if (!is_mismatched_access() && off != Type::OffsetBot && const_oop != NULL && const_oop->is_instance()) { |
1819 | const Type* con_type = Type::make_constant_from_field(const_oop->as_instance(), off, is_unsigned(), memory_type()); |
1820 | if (con_type != NULL) { |
1821 | return con_type; |
1822 | } |
1823 | } |
1824 | } else if (tp->base() == Type::KlassPtr) { |
1825 | assert( off != Type::OffsetBot || |
1826 | // arrays can be cast to Objects |
1827 | tp->is_klassptr()->klass()->is_java_lang_Object() || |
1828 | // also allow array-loading from the primary supertype |
1829 | // array during subtype checks |
1830 | Opcode() == Op_LoadKlass, |
1831 | "Field accesses must be precise" ); |
1832 | // For klass/static loads, we expect the _type to be precise |
1833 | } else if (tp->base() == Type::RawPtr && adr->is_Load() && off == 0) { |
1834 | /* With mirrors being an indirect in the Klass* |
1835 | * the VM is now using two loads. LoadKlass(LoadP(LoadP(Klass, mirror_offset), zero_offset)) |
1836 | * The LoadP from the Klass has a RawPtr type (see LibraryCallKit::load_mirror_from_klass). |
1837 | * |
1838 | * So check the type and klass of the node before the LoadP. |
1839 | */ |
1840 | Node* adr2 = adr->in(MemNode::Address); |
1841 | const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr(); |
1842 | if (tkls != NULL && !StressReflectiveCode) { |
1843 | ciKlass* klass = tkls->klass(); |
1844 | if (klass->is_loaded() && tkls->klass_is_exact() && tkls->offset() == in_bytes(Klass::java_mirror_offset())) { |
1845 | assert(adr->Opcode() == Op_LoadP, "must load an oop from _java_mirror" ); |
1846 | assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror" ); |
1847 | return TypeInstPtr::make(klass->java_mirror()); |
1848 | } |
1849 | } |
1850 | } |
1851 | |
1852 | const TypeKlassPtr *tkls = tp->isa_klassptr(); |
1853 | if (tkls != NULL && !StressReflectiveCode) { |
1854 | ciKlass* klass = tkls->klass(); |
1855 | if (klass->is_loaded() && tkls->klass_is_exact()) { |
1856 | // We are loading a field from a Klass metaobject whose identity |
1857 | // is known at compile time (the type is "exact" or "precise"). |
1858 | // Check for fields we know are maintained as constants by the VM. |
1859 | if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) { |
1860 | // The field is Klass::_super_check_offset. Return its (constant) value. |
1861 | // (Folds up type checking code.) |
1862 | assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset" ); |
1863 | return TypeInt::make(klass->super_check_offset()); |
1864 | } |
1865 | // Compute index into primary_supers array |
1866 | juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(Klass*); |
1867 | // Check for overflowing; use unsigned compare to handle the negative case. |
1868 | if( depth < ciKlass::primary_super_limit() ) { |
1869 | // The field is an element of Klass::_primary_supers. Return its (constant) value. |
1870 | // (Folds up type checking code.) |
1871 | assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers" ); |
1872 | ciKlass *ss = klass->super_of_depth(depth); |
1873 | return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR; |
1874 | } |
1875 | const Type* aift = load_array_final_field(tkls, klass); |
1876 | if (aift != NULL) return aift; |
1877 | } |
1878 | |
1879 | // We can still check if we are loading from the primary_supers array at a |
1880 | // shallow enough depth. Even though the klass is not exact, entries less |
1881 | // than or equal to its super depth are correct. |
1882 | if (klass->is_loaded() ) { |
1883 | ciType *inner = klass; |
1884 | while( inner->is_obj_array_klass() ) |
1885 | inner = inner->as_obj_array_klass()->base_element_type(); |
1886 | if( inner->is_instance_klass() && |
1887 | !inner->as_instance_klass()->flags().is_interface() ) { |
1888 | // Compute index into primary_supers array |
1889 | juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(Klass*); |
1890 | // Check for overflowing; use unsigned compare to handle the negative case. |
1891 | if( depth < ciKlass::primary_super_limit() && |
1892 | depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case |
1893 | // The field is an element of Klass::_primary_supers. Return its (constant) value. |
1894 | // (Folds up type checking code.) |
1895 | assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers" ); |
1896 | ciKlass *ss = klass->super_of_depth(depth); |
1897 | return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR; |
1898 | } |
1899 | } |
1900 | } |
1901 | |
1902 | // If the type is enough to determine that the thing is not an array, |
1903 | // we can give the layout_helper a positive interval type. |
1904 | // This will help short-circuit some reflective code. |
1905 | if (tkls->offset() == in_bytes(Klass::layout_helper_offset()) |
1906 | && !klass->is_array_klass() // not directly typed as an array |
1907 | && !klass->is_interface() // specifically not Serializable & Cloneable |
1908 | && !klass->is_java_lang_Object() // not the supertype of all T[] |
1909 | ) { |
1910 | // Note: When interfaces are reliable, we can narrow the interface |
1911 | // test to (klass != Serializable && klass != Cloneable). |
1912 | assert(Opcode() == Op_LoadI, "must load an int from _layout_helper" ); |
1913 | jint min_size = Klass::instance_layout_helper(oopDesc::header_size(), false); |
1914 | // The key property of this type is that it folds up tests |
1915 | // for array-ness, since it proves that the layout_helper is positive. |
1916 | // Thus, a generic value like the basic object layout helper works fine. |
1917 | return TypeInt::make(min_size, max_jint, Type::WidenMin); |
1918 | } |
1919 | } |
1920 | |
1921 | // If we are loading from a freshly-allocated object, produce a zero, |
1922 | // if the load is provably beyond the header of the object. |
1923 | // (Also allow a variable load from a fresh array to produce zero.) |
1924 | const TypeOopPtr *tinst = tp->isa_oopptr(); |
1925 | bool is_instance = (tinst != NULL) && tinst->is_known_instance_field(); |
1926 | bool is_boxed_value = (tinst != NULL) && tinst->is_ptr_to_boxed_value(); |
1927 | if (ReduceFieldZeroing || is_instance || is_boxed_value) { |
1928 | Node* value = can_see_stored_value(mem,phase); |
1929 | if (value != NULL && value->is_Con()) { |
1930 | assert(value->bottom_type()->higher_equal(_type),"sanity" ); |
1931 | return value->bottom_type(); |
1932 | } |
1933 | } |
1934 | |
1935 | if (is_instance) { |
1936 | // If we have an instance type and our memory input is the |
1937 | // programs's initial memory state, there is no matching store, |
1938 | // so just return a zero of the appropriate type |
1939 | Node *mem = in(MemNode::Memory); |
1940 | if (mem->is_Parm() && mem->in(0)->is_Start()) { |
1941 | assert(mem->as_Parm()->_con == TypeFunc::Memory, "must be memory Parm" ); |
1942 | return Type::get_zero_type(_type->basic_type()); |
1943 | } |
1944 | } |
1945 | return _type; |
1946 | } |
1947 | |
1948 | //------------------------------match_edge------------------------------------- |
1949 | // Do we Match on this edge index or not? Match only the address. |
1950 | uint LoadNode::match_edge(uint idx) const { |
1951 | return idx == MemNode::Address; |
1952 | } |
1953 | |
1954 | //--------------------------LoadBNode::Ideal-------------------------------------- |
1955 | // |
1956 | // If the previous store is to the same address as this load, |
1957 | // and the value stored was larger than a byte, replace this load |
1958 | // with the value stored truncated to a byte. If no truncation is |
1959 | // needed, the replacement is done in LoadNode::Identity(). |
1960 | // |
1961 | Node *LoadBNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
1962 | Node* mem = in(MemNode::Memory); |
1963 | Node* value = can_see_stored_value(mem,phase); |
1964 | if( value && !phase->type(value)->higher_equal( _type ) ) { |
1965 | Node *result = phase->transform( new LShiftINode(value, phase->intcon(24)) ); |
1966 | return new RShiftINode(result, phase->intcon(24)); |
1967 | } |
1968 | // Identity call will handle the case where truncation is not needed. |
1969 | return LoadNode::Ideal(phase, can_reshape); |
1970 | } |
1971 | |
1972 | const Type* LoadBNode::Value(PhaseGVN* phase) const { |
1973 | Node* mem = in(MemNode::Memory); |
1974 | Node* value = can_see_stored_value(mem,phase); |
1975 | if (value != NULL && value->is_Con() && |
1976 | !value->bottom_type()->higher_equal(_type)) { |
1977 | // If the input to the store does not fit with the load's result type, |
1978 | // it must be truncated. We can't delay until Ideal call since |
1979 | // a singleton Value is needed for split_thru_phi optimization. |
1980 | int con = value->get_int(); |
1981 | return TypeInt::make((con << 24) >> 24); |
1982 | } |
1983 | return LoadNode::Value(phase); |
1984 | } |
1985 | |
1986 | //--------------------------LoadUBNode::Ideal------------------------------------- |
1987 | // |
1988 | // If the previous store is to the same address as this load, |
1989 | // and the value stored was larger than a byte, replace this load |
1990 | // with the value stored truncated to a byte. If no truncation is |
1991 | // needed, the replacement is done in LoadNode::Identity(). |
1992 | // |
1993 | Node* LoadUBNode::Ideal(PhaseGVN* phase, bool can_reshape) { |
1994 | Node* mem = in(MemNode::Memory); |
1995 | Node* value = can_see_stored_value(mem, phase); |
1996 | if (value && !phase->type(value)->higher_equal(_type)) |
1997 | return new AndINode(value, phase->intcon(0xFF)); |
1998 | // Identity call will handle the case where truncation is not needed. |
1999 | return LoadNode::Ideal(phase, can_reshape); |
2000 | } |
2001 | |
2002 | const Type* LoadUBNode::Value(PhaseGVN* phase) const { |
2003 | Node* mem = in(MemNode::Memory); |
2004 | Node* value = can_see_stored_value(mem,phase); |
2005 | if (value != NULL && value->is_Con() && |
2006 | !value->bottom_type()->higher_equal(_type)) { |
2007 | // If the input to the store does not fit with the load's result type, |
2008 | // it must be truncated. We can't delay until Ideal call since |
2009 | // a singleton Value is needed for split_thru_phi optimization. |
2010 | int con = value->get_int(); |
2011 | return TypeInt::make(con & 0xFF); |
2012 | } |
2013 | return LoadNode::Value(phase); |
2014 | } |
2015 | |
2016 | //--------------------------LoadUSNode::Ideal------------------------------------- |
2017 | // |
2018 | // If the previous store is to the same address as this load, |
2019 | // and the value stored was larger than a char, replace this load |
2020 | // with the value stored truncated to a char. If no truncation is |
2021 | // needed, the replacement is done in LoadNode::Identity(). |
2022 | // |
2023 | Node *LoadUSNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
2024 | Node* mem = in(MemNode::Memory); |
2025 | Node* value = can_see_stored_value(mem,phase); |
2026 | if( value && !phase->type(value)->higher_equal( _type ) ) |
2027 | return new AndINode(value,phase->intcon(0xFFFF)); |
2028 | // Identity call will handle the case where truncation is not needed. |
2029 | return LoadNode::Ideal(phase, can_reshape); |
2030 | } |
2031 | |
2032 | const Type* LoadUSNode::Value(PhaseGVN* phase) const { |
2033 | Node* mem = in(MemNode::Memory); |
2034 | Node* value = can_see_stored_value(mem,phase); |
2035 | if (value != NULL && value->is_Con() && |
2036 | !value->bottom_type()->higher_equal(_type)) { |
2037 | // If the input to the store does not fit with the load's result type, |
2038 | // it must be truncated. We can't delay until Ideal call since |
2039 | // a singleton Value is needed for split_thru_phi optimization. |
2040 | int con = value->get_int(); |
2041 | return TypeInt::make(con & 0xFFFF); |
2042 | } |
2043 | return LoadNode::Value(phase); |
2044 | } |
2045 | |
2046 | //--------------------------LoadSNode::Ideal-------------------------------------- |
2047 | // |
2048 | // If the previous store is to the same address as this load, |
2049 | // and the value stored was larger than a short, replace this load |
2050 | // with the value stored truncated to a short. If no truncation is |
2051 | // needed, the replacement is done in LoadNode::Identity(). |
2052 | // |
2053 | Node *LoadSNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
2054 | Node* mem = in(MemNode::Memory); |
2055 | Node* value = can_see_stored_value(mem,phase); |
2056 | if( value && !phase->type(value)->higher_equal( _type ) ) { |
2057 | Node *result = phase->transform( new LShiftINode(value, phase->intcon(16)) ); |
2058 | return new RShiftINode(result, phase->intcon(16)); |
2059 | } |
2060 | // Identity call will handle the case where truncation is not needed. |
2061 | return LoadNode::Ideal(phase, can_reshape); |
2062 | } |
2063 | |
2064 | const Type* LoadSNode::Value(PhaseGVN* phase) const { |
2065 | Node* mem = in(MemNode::Memory); |
2066 | Node* value = can_see_stored_value(mem,phase); |
2067 | if (value != NULL && value->is_Con() && |
2068 | !value->bottom_type()->higher_equal(_type)) { |
2069 | // If the input to the store does not fit with the load's result type, |
2070 | // it must be truncated. We can't delay until Ideal call since |
2071 | // a singleton Value is needed for split_thru_phi optimization. |
2072 | int con = value->get_int(); |
2073 | return TypeInt::make((con << 16) >> 16); |
2074 | } |
2075 | return LoadNode::Value(phase); |
2076 | } |
2077 | |
2078 | //============================================================================= |
2079 | //----------------------------LoadKlassNode::make------------------------------ |
2080 | // Polymorphic factory method: |
2081 | Node* LoadKlassNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* at, const TypeKlassPtr* tk) { |
2082 | // sanity check the alias category against the created node type |
2083 | const TypePtr *adr_type = adr->bottom_type()->isa_ptr(); |
2084 | assert(adr_type != NULL, "expecting TypeKlassPtr" ); |
2085 | #ifdef _LP64 |
2086 | if (adr_type->is_ptr_to_narrowklass()) { |
2087 | assert(UseCompressedClassPointers, "no compressed klasses" ); |
2088 | Node* load_klass = gvn.transform(new LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass(), MemNode::unordered)); |
2089 | return new DecodeNKlassNode(load_klass, load_klass->bottom_type()->make_ptr()); |
2090 | } |
2091 | #endif |
2092 | assert(!adr_type->is_ptr_to_narrowklass() && !adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop" ); |
2093 | return new LoadKlassNode(ctl, mem, adr, at, tk, MemNode::unordered); |
2094 | } |
2095 | |
2096 | //------------------------------Value------------------------------------------ |
2097 | const Type* LoadKlassNode::Value(PhaseGVN* phase) const { |
2098 | return klass_value_common(phase); |
2099 | } |
2100 | |
2101 | // In most cases, LoadKlassNode does not have the control input set. If the control |
2102 | // input is set, it must not be removed (by LoadNode::Ideal()). |
2103 | bool LoadKlassNode::can_remove_control() const { |
2104 | return false; |
2105 | } |
2106 | |
2107 | const Type* LoadNode::klass_value_common(PhaseGVN* phase) const { |
2108 | // Either input is TOP ==> the result is TOP |
2109 | const Type *t1 = phase->type( in(MemNode::Memory) ); |
2110 | if (t1 == Type::TOP) return Type::TOP; |
2111 | Node *adr = in(MemNode::Address); |
2112 | const Type *t2 = phase->type( adr ); |
2113 | if (t2 == Type::TOP) return Type::TOP; |
2114 | const TypePtr *tp = t2->is_ptr(); |
2115 | if (TypePtr::above_centerline(tp->ptr()) || |
2116 | tp->ptr() == TypePtr::Null) return Type::TOP; |
2117 | |
2118 | // Return a more precise klass, if possible |
2119 | const TypeInstPtr *tinst = tp->isa_instptr(); |
2120 | if (tinst != NULL) { |
2121 | ciInstanceKlass* ik = tinst->klass()->as_instance_klass(); |
2122 | int offset = tinst->offset(); |
2123 | if (ik == phase->C->env()->Class_klass() |
2124 | && (offset == java_lang_Class::klass_offset_in_bytes() || |
2125 | offset == java_lang_Class::array_klass_offset_in_bytes())) { |
2126 | // We are loading a special hidden field from a Class mirror object, |
2127 | // the field which points to the VM's Klass metaobject. |
2128 | ciType* t = tinst->java_mirror_type(); |
2129 | // java_mirror_type returns non-null for compile-time Class constants. |
2130 | if (t != NULL) { |
2131 | // constant oop => constant klass |
2132 | if (offset == java_lang_Class::array_klass_offset_in_bytes()) { |
2133 | if (t->is_void()) { |
2134 | // We cannot create a void array. Since void is a primitive type return null |
2135 | // klass. Users of this result need to do a null check on the returned klass. |
2136 | return TypePtr::NULL_PTR; |
2137 | } |
2138 | return TypeKlassPtr::make(ciArrayKlass::make(t)); |
2139 | } |
2140 | if (!t->is_klass()) { |
2141 | // a primitive Class (e.g., int.class) has NULL for a klass field |
2142 | return TypePtr::NULL_PTR; |
2143 | } |
2144 | // (Folds up the 1st indirection in aClassConstant.getModifiers().) |
2145 | return TypeKlassPtr::make(t->as_klass()); |
2146 | } |
2147 | // non-constant mirror, so we can't tell what's going on |
2148 | } |
2149 | if( !ik->is_loaded() ) |
2150 | return _type; // Bail out if not loaded |
2151 | if (offset == oopDesc::klass_offset_in_bytes()) { |
2152 | if (tinst->klass_is_exact()) { |
2153 | return TypeKlassPtr::make(ik); |
2154 | } |
2155 | // See if we can become precise: no subklasses and no interface |
2156 | // (Note: We need to support verified interfaces.) |
2157 | if (!ik->is_interface() && !ik->has_subklass()) { |
2158 | //assert(!UseExactTypes, "this code should be useless with exact types"); |
2159 | // Add a dependence; if any subclass added we need to recompile |
2160 | if (!ik->is_final()) { |
2161 | // %%% should use stronger assert_unique_concrete_subtype instead |
2162 | phase->C->dependencies()->assert_leaf_type(ik); |
2163 | } |
2164 | // Return precise klass |
2165 | return TypeKlassPtr::make(ik); |
2166 | } |
2167 | |
2168 | // Return root of possible klass |
2169 | return TypeKlassPtr::make(TypePtr::NotNull, ik, 0/*offset*/); |
2170 | } |
2171 | } |
2172 | |
2173 | // Check for loading klass from an array |
2174 | const TypeAryPtr *tary = tp->isa_aryptr(); |
2175 | if( tary != NULL ) { |
2176 | ciKlass *tary_klass = tary->klass(); |
2177 | if (tary_klass != NULL // can be NULL when at BOTTOM or TOP |
2178 | && tary->offset() == oopDesc::klass_offset_in_bytes()) { |
2179 | if (tary->klass_is_exact()) { |
2180 | return TypeKlassPtr::make(tary_klass); |
2181 | } |
2182 | ciArrayKlass *ak = tary->klass()->as_array_klass(); |
2183 | // If the klass is an object array, we defer the question to the |
2184 | // array component klass. |
2185 | if( ak->is_obj_array_klass() ) { |
2186 | assert( ak->is_loaded(), "" ); |
2187 | ciKlass *base_k = ak->as_obj_array_klass()->base_element_klass(); |
2188 | if( base_k->is_loaded() && base_k->is_instance_klass() ) { |
2189 | ciInstanceKlass* ik = base_k->as_instance_klass(); |
2190 | // See if we can become precise: no subklasses and no interface |
2191 | if (!ik->is_interface() && !ik->has_subklass()) { |
2192 | //assert(!UseExactTypes, "this code should be useless with exact types"); |
2193 | // Add a dependence; if any subclass added we need to recompile |
2194 | if (!ik->is_final()) { |
2195 | phase->C->dependencies()->assert_leaf_type(ik); |
2196 | } |
2197 | // Return precise array klass |
2198 | return TypeKlassPtr::make(ak); |
2199 | } |
2200 | } |
2201 | return TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/); |
2202 | } else { // Found a type-array? |
2203 | //assert(!UseExactTypes, "this code should be useless with exact types"); |
2204 | assert( ak->is_type_array_klass(), "" ); |
2205 | return TypeKlassPtr::make(ak); // These are always precise |
2206 | } |
2207 | } |
2208 | } |
2209 | |
2210 | // Check for loading klass from an array klass |
2211 | const TypeKlassPtr *tkls = tp->isa_klassptr(); |
2212 | if (tkls != NULL && !StressReflectiveCode) { |
2213 | ciKlass* klass = tkls->klass(); |
2214 | if( !klass->is_loaded() ) |
2215 | return _type; // Bail out if not loaded |
2216 | if( klass->is_obj_array_klass() && |
2217 | tkls->offset() == in_bytes(ObjArrayKlass::element_klass_offset())) { |
2218 | ciKlass* elem = klass->as_obj_array_klass()->element_klass(); |
2219 | // // Always returning precise element type is incorrect, |
2220 | // // e.g., element type could be object and array may contain strings |
2221 | // return TypeKlassPtr::make(TypePtr::Constant, elem, 0); |
2222 | |
2223 | // The array's TypeKlassPtr was declared 'precise' or 'not precise' |
2224 | // according to the element type's subclassing. |
2225 | return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/); |
2226 | } |
2227 | if( klass->is_instance_klass() && tkls->klass_is_exact() && |
2228 | tkls->offset() == in_bytes(Klass::super_offset())) { |
2229 | ciKlass* sup = klass->as_instance_klass()->super(); |
2230 | // The field is Klass::_super. Return its (constant) value. |
2231 | // (Folds up the 2nd indirection in aClassConstant.getSuperClass().) |
2232 | return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR; |
2233 | } |
2234 | } |
2235 | |
2236 | // Bailout case |
2237 | return LoadNode::Value(phase); |
2238 | } |
2239 | |
2240 | //------------------------------Identity--------------------------------------- |
2241 | // To clean up reflective code, simplify k.java_mirror.as_klass to plain k. |
2242 | // Also feed through the klass in Allocate(...klass...)._klass. |
2243 | Node* LoadKlassNode::Identity(PhaseGVN* phase) { |
2244 | return klass_identity_common(phase); |
2245 | } |
2246 | |
2247 | Node* LoadNode::klass_identity_common(PhaseGVN* phase) { |
2248 | Node* x = LoadNode::Identity(phase); |
2249 | if (x != this) return x; |
2250 | |
2251 | // Take apart the address into an oop and and offset. |
2252 | // Return 'this' if we cannot. |
2253 | Node* adr = in(MemNode::Address); |
2254 | intptr_t offset = 0; |
2255 | Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); |
2256 | if (base == NULL) return this; |
2257 | const TypeOopPtr* toop = phase->type(adr)->isa_oopptr(); |
2258 | if (toop == NULL) return this; |
2259 | |
2260 | // Step over potential GC barrier for OopHandle resolve |
2261 | BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); |
2262 | if (bs->is_gc_barrier_node(base)) { |
2263 | base = bs->step_over_gc_barrier(base); |
2264 | } |
2265 | |
2266 | // We can fetch the klass directly through an AllocateNode. |
2267 | // This works even if the klass is not constant (clone or newArray). |
2268 | if (offset == oopDesc::klass_offset_in_bytes()) { |
2269 | Node* allocated_klass = AllocateNode::Ideal_klass(base, phase); |
2270 | if (allocated_klass != NULL) { |
2271 | return allocated_klass; |
2272 | } |
2273 | } |
2274 | |
2275 | // Simplify k.java_mirror.as_klass to plain k, where k is a Klass*. |
2276 | // See inline_native_Class_query for occurrences of these patterns. |
2277 | // Java Example: x.getClass().isAssignableFrom(y) |
2278 | // |
2279 | // This improves reflective code, often making the Class |
2280 | // mirror go completely dead. (Current exception: Class |
2281 | // mirrors may appear in debug info, but we could clean them out by |
2282 | // introducing a new debug info operator for Klass.java_mirror). |
2283 | |
2284 | if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass() |
2285 | && offset == java_lang_Class::klass_offset_in_bytes()) { |
2286 | if (base->is_Load()) { |
2287 | Node* base2 = base->in(MemNode::Address); |
2288 | if (base2->is_Load()) { /* direct load of a load which is the OopHandle */ |
2289 | Node* adr2 = base2->in(MemNode::Address); |
2290 | const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr(); |
2291 | if (tkls != NULL && !tkls->empty() |
2292 | && (tkls->klass()->is_instance_klass() || |
2293 | tkls->klass()->is_array_klass()) |
2294 | && adr2->is_AddP() |
2295 | ) { |
2296 | int mirror_field = in_bytes(Klass::java_mirror_offset()); |
2297 | if (tkls->offset() == mirror_field) { |
2298 | return adr2->in(AddPNode::Base); |
2299 | } |
2300 | } |
2301 | } |
2302 | } |
2303 | } |
2304 | |
2305 | return this; |
2306 | } |
2307 | |
2308 | |
2309 | //------------------------------Value------------------------------------------ |
2310 | const Type* LoadNKlassNode::Value(PhaseGVN* phase) const { |
2311 | const Type *t = klass_value_common(phase); |
2312 | if (t == Type::TOP) |
2313 | return t; |
2314 | |
2315 | return t->make_narrowklass(); |
2316 | } |
2317 | |
2318 | //------------------------------Identity--------------------------------------- |
2319 | // To clean up reflective code, simplify k.java_mirror.as_klass to narrow k. |
2320 | // Also feed through the klass in Allocate(...klass...)._klass. |
2321 | Node* LoadNKlassNode::Identity(PhaseGVN* phase) { |
2322 | Node *x = klass_identity_common(phase); |
2323 | |
2324 | const Type *t = phase->type( x ); |
2325 | if( t == Type::TOP ) return x; |
2326 | if( t->isa_narrowklass()) return x; |
2327 | assert (!t->isa_narrowoop(), "no narrow oop here" ); |
2328 | |
2329 | return phase->transform(new EncodePKlassNode(x, t->make_narrowklass())); |
2330 | } |
2331 | |
2332 | //------------------------------Value----------------------------------------- |
2333 | const Type* LoadRangeNode::Value(PhaseGVN* phase) const { |
2334 | // Either input is TOP ==> the result is TOP |
2335 | const Type *t1 = phase->type( in(MemNode::Memory) ); |
2336 | if( t1 == Type::TOP ) return Type::TOP; |
2337 | Node *adr = in(MemNode::Address); |
2338 | const Type *t2 = phase->type( adr ); |
2339 | if( t2 == Type::TOP ) return Type::TOP; |
2340 | const TypePtr *tp = t2->is_ptr(); |
2341 | if (TypePtr::above_centerline(tp->ptr())) return Type::TOP; |
2342 | const TypeAryPtr *tap = tp->isa_aryptr(); |
2343 | if( !tap ) return _type; |
2344 | return tap->size(); |
2345 | } |
2346 | |
2347 | //-------------------------------Ideal--------------------------------------- |
2348 | // Feed through the length in AllocateArray(...length...)._length. |
2349 | Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
2350 | Node* p = MemNode::Ideal_common(phase, can_reshape); |
2351 | if (p) return (p == NodeSentinel) ? NULL : p; |
2352 | |
2353 | // Take apart the address into an oop and and offset. |
2354 | // Return 'this' if we cannot. |
2355 | Node* adr = in(MemNode::Address); |
2356 | intptr_t offset = 0; |
2357 | Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); |
2358 | if (base == NULL) return NULL; |
2359 | const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); |
2360 | if (tary == NULL) return NULL; |
2361 | |
2362 | // We can fetch the length directly through an AllocateArrayNode. |
2363 | // This works even if the length is not constant (clone or newArray). |
2364 | if (offset == arrayOopDesc::length_offset_in_bytes()) { |
2365 | AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); |
2366 | if (alloc != NULL) { |
2367 | Node* allocated_length = alloc->Ideal_length(); |
2368 | Node* len = alloc->make_ideal_length(tary, phase); |
2369 | if (allocated_length != len) { |
2370 | // New CastII improves on this. |
2371 | return len; |
2372 | } |
2373 | } |
2374 | } |
2375 | |
2376 | return NULL; |
2377 | } |
2378 | |
2379 | //------------------------------Identity--------------------------------------- |
2380 | // Feed through the length in AllocateArray(...length...)._length. |
2381 | Node* LoadRangeNode::Identity(PhaseGVN* phase) { |
2382 | Node* x = LoadINode::Identity(phase); |
2383 | if (x != this) return x; |
2384 | |
2385 | // Take apart the address into an oop and and offset. |
2386 | // Return 'this' if we cannot. |
2387 | Node* adr = in(MemNode::Address); |
2388 | intptr_t offset = 0; |
2389 | Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); |
2390 | if (base == NULL) return this; |
2391 | const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); |
2392 | if (tary == NULL) return this; |
2393 | |
2394 | // We can fetch the length directly through an AllocateArrayNode. |
2395 | // This works even if the length is not constant (clone or newArray). |
2396 | if (offset == arrayOopDesc::length_offset_in_bytes()) { |
2397 | AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); |
2398 | if (alloc != NULL) { |
2399 | Node* allocated_length = alloc->Ideal_length(); |
2400 | // Do not allow make_ideal_length to allocate a CastII node. |
2401 | Node* len = alloc->make_ideal_length(tary, phase, false); |
2402 | if (allocated_length == len) { |
2403 | // Return allocated_length only if it would not be improved by a CastII. |
2404 | return allocated_length; |
2405 | } |
2406 | } |
2407 | } |
2408 | |
2409 | return this; |
2410 | |
2411 | } |
2412 | |
2413 | //============================================================================= |
2414 | //---------------------------StoreNode::make----------------------------------- |
2415 | // Polymorphic factory method: |
2416 | StoreNode* StoreNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt, MemOrd mo) { |
2417 | assert((mo == unordered || mo == release), "unexpected" ); |
2418 | Compile* C = gvn.C; |
2419 | assert(C->get_alias_index(adr_type) != Compile::AliasIdxRaw || |
2420 | ctl != NULL, "raw memory operations should have control edge" ); |
2421 | |
2422 | switch (bt) { |
2423 | case T_BOOLEAN: val = gvn.transform(new AndINode(val, gvn.intcon(0x1))); // Fall through to T_BYTE case |
2424 | case T_BYTE: return new StoreBNode(ctl, mem, adr, adr_type, val, mo); |
2425 | case T_INT: return new StoreINode(ctl, mem, adr, adr_type, val, mo); |
2426 | case T_CHAR: |
2427 | case T_SHORT: return new StoreCNode(ctl, mem, adr, adr_type, val, mo); |
2428 | case T_LONG: return new StoreLNode(ctl, mem, adr, adr_type, val, mo); |
2429 | case T_FLOAT: return new StoreFNode(ctl, mem, adr, adr_type, val, mo); |
2430 | case T_DOUBLE: return new StoreDNode(ctl, mem, adr, adr_type, val, mo); |
2431 | case T_METADATA: |
2432 | case T_ADDRESS: |
2433 | case T_OBJECT: |
2434 | #ifdef _LP64 |
2435 | if (adr->bottom_type()->is_ptr_to_narrowoop()) { |
2436 | val = gvn.transform(new EncodePNode(val, val->bottom_type()->make_narrowoop())); |
2437 | return new StoreNNode(ctl, mem, adr, adr_type, val, mo); |
2438 | } else if (adr->bottom_type()->is_ptr_to_narrowklass() || |
2439 | (UseCompressedClassPointers && val->bottom_type()->isa_klassptr() && |
2440 | adr->bottom_type()->isa_rawptr())) { |
2441 | val = gvn.transform(new EncodePKlassNode(val, val->bottom_type()->make_narrowklass())); |
2442 | return new StoreNKlassNode(ctl, mem, adr, adr_type, val, mo); |
2443 | } |
2444 | #endif |
2445 | { |
2446 | return new StorePNode(ctl, mem, adr, adr_type, val, mo); |
2447 | } |
2448 | default: |
2449 | ShouldNotReachHere(); |
2450 | return (StoreNode*)NULL; |
2451 | } |
2452 | } |
2453 | |
2454 | StoreLNode* StoreLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) { |
2455 | bool require_atomic = true; |
2456 | return new StoreLNode(ctl, mem, adr, adr_type, val, mo, require_atomic); |
2457 | } |
2458 | |
2459 | StoreDNode* StoreDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) { |
2460 | bool require_atomic = true; |
2461 | return new StoreDNode(ctl, mem, adr, adr_type, val, mo, require_atomic); |
2462 | } |
2463 | |
2464 | |
2465 | //--------------------------bottom_type---------------------------------------- |
2466 | const Type *StoreNode::bottom_type() const { |
2467 | return Type::MEMORY; |
2468 | } |
2469 | |
2470 | //------------------------------hash------------------------------------------- |
2471 | uint StoreNode::hash() const { |
2472 | // unroll addition of interesting fields |
2473 | //return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address) + (uintptr_t)in(ValueIn); |
2474 | |
2475 | // Since they are not commoned, do not hash them: |
2476 | return NO_HASH; |
2477 | } |
2478 | |
2479 | //------------------------------Ideal------------------------------------------ |
2480 | // Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x). |
2481 | // When a store immediately follows a relevant allocation/initialization, |
2482 | // try to capture it into the initialization, or hoist it above. |
2483 | Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
2484 | Node* p = MemNode::Ideal_common(phase, can_reshape); |
2485 | if (p) return (p == NodeSentinel) ? NULL : p; |
2486 | |
2487 | Node* mem = in(MemNode::Memory); |
2488 | Node* address = in(MemNode::Address); |
2489 | // Back-to-back stores to same address? Fold em up. Generally |
2490 | // unsafe if I have intervening uses... Also disallowed for StoreCM |
2491 | // since they must follow each StoreP operation. Redundant StoreCMs |
2492 | // are eliminated just before matching in final_graph_reshape. |
2493 | { |
2494 | Node* st = mem; |
2495 | // If Store 'st' has more than one use, we cannot fold 'st' away. |
2496 | // For example, 'st' might be the final state at a conditional |
2497 | // return. Or, 'st' might be used by some node which is live at |
2498 | // the same time 'st' is live, which might be unschedulable. So, |
2499 | // require exactly ONE user until such time as we clone 'mem' for |
2500 | // each of 'mem's uses (thus making the exactly-1-user-rule hold |
2501 | // true). |
2502 | while (st->is_Store() && st->outcnt() == 1 && st->Opcode() != Op_StoreCM) { |
2503 | // Looking at a dead closed cycle of memory? |
2504 | assert(st != st->in(MemNode::Memory), "dead loop in StoreNode::Ideal" ); |
2505 | assert(Opcode() == st->Opcode() || |
2506 | st->Opcode() == Op_StoreVector || |
2507 | Opcode() == Op_StoreVector || |
2508 | phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw || |
2509 | (Opcode() == Op_StoreL && st->Opcode() == Op_StoreI) || // expanded ClearArrayNode |
2510 | (Opcode() == Op_StoreI && st->Opcode() == Op_StoreL) || // initialization by arraycopy |
2511 | (is_mismatched_access() || st->as_Store()->is_mismatched_access()), |
2512 | "no mismatched stores, except on raw memory: %s %s" , NodeClassNames[Opcode()], NodeClassNames[st->Opcode()]); |
2513 | |
2514 | if (st->in(MemNode::Address)->eqv_uncast(address) && |
2515 | st->as_Store()->memory_size() <= this->memory_size()) { |
2516 | Node* use = st->raw_out(0); |
2517 | phase->igvn_rehash_node_delayed(use); |
2518 | if (can_reshape) { |
2519 | use->set_req_X(MemNode::Memory, st->in(MemNode::Memory), phase->is_IterGVN()); |
2520 | } else { |
2521 | // It's OK to do this in the parser, since DU info is always accurate, |
2522 | // and the parser always refers to nodes via SafePointNode maps. |
2523 | use->set_req(MemNode::Memory, st->in(MemNode::Memory)); |
2524 | } |
2525 | return this; |
2526 | } |
2527 | st = st->in(MemNode::Memory); |
2528 | } |
2529 | } |
2530 | |
2531 | |
2532 | // Capture an unaliased, unconditional, simple store into an initializer. |
2533 | // Or, if it is independent of the allocation, hoist it above the allocation. |
2534 | if (ReduceFieldZeroing && /*can_reshape &&*/ |
2535 | mem->is_Proj() && mem->in(0)->is_Initialize()) { |
2536 | InitializeNode* init = mem->in(0)->as_Initialize(); |
2537 | intptr_t offset = init->can_capture_store(this, phase, can_reshape); |
2538 | if (offset > 0) { |
2539 | Node* moved = init->capture_store(this, offset, phase, can_reshape); |
2540 | // If the InitializeNode captured me, it made a raw copy of me, |
2541 | // and I need to disappear. |
2542 | if (moved != NULL) { |
2543 | // %%% hack to ensure that Ideal returns a new node: |
2544 | mem = MergeMemNode::make(mem); |
2545 | return mem; // fold me away |
2546 | } |
2547 | } |
2548 | } |
2549 | |
2550 | return NULL; // No further progress |
2551 | } |
2552 | |
2553 | //------------------------------Value----------------------------------------- |
2554 | const Type* StoreNode::Value(PhaseGVN* phase) const { |
2555 | // Either input is TOP ==> the result is TOP |
2556 | const Type *t1 = phase->type( in(MemNode::Memory) ); |
2557 | if( t1 == Type::TOP ) return Type::TOP; |
2558 | const Type *t2 = phase->type( in(MemNode::Address) ); |
2559 | if( t2 == Type::TOP ) return Type::TOP; |
2560 | const Type *t3 = phase->type( in(MemNode::ValueIn) ); |
2561 | if( t3 == Type::TOP ) return Type::TOP; |
2562 | return Type::MEMORY; |
2563 | } |
2564 | |
2565 | //------------------------------Identity--------------------------------------- |
2566 | // Remove redundant stores: |
2567 | // Store(m, p, Load(m, p)) changes to m. |
2568 | // Store(, p, x) -> Store(m, p, x) changes to Store(m, p, x). |
2569 | Node* StoreNode::Identity(PhaseGVN* phase) { |
2570 | Node* mem = in(MemNode::Memory); |
2571 | Node* adr = in(MemNode::Address); |
2572 | Node* val = in(MemNode::ValueIn); |
2573 | |
2574 | Node* result = this; |
2575 | |
2576 | // Load then Store? Then the Store is useless |
2577 | if (val->is_Load() && |
2578 | val->in(MemNode::Address)->eqv_uncast(adr) && |
2579 | val->in(MemNode::Memory )->eqv_uncast(mem) && |
2580 | val->as_Load()->store_Opcode() == Opcode()) { |
2581 | result = mem; |
2582 | } |
2583 | |
2584 | // Two stores in a row of the same value? |
2585 | if (result == this && |
2586 | mem->is_Store() && |
2587 | mem->in(MemNode::Address)->eqv_uncast(adr) && |
2588 | mem->in(MemNode::ValueIn)->eqv_uncast(val) && |
2589 | mem->Opcode() == Opcode()) { |
2590 | result = mem; |
2591 | } |
2592 | |
2593 | // Store of zero anywhere into a freshly-allocated object? |
2594 | // Then the store is useless. |
2595 | // (It must already have been captured by the InitializeNode.) |
2596 | if (result == this && |
2597 | ReduceFieldZeroing && phase->type(val)->is_zero_type()) { |
2598 | // a newly allocated object is already all-zeroes everywhere |
2599 | if (mem->is_Proj() && mem->in(0)->is_Allocate()) { |
2600 | result = mem; |
2601 | } |
2602 | |
2603 | if (result == this) { |
2604 | // the store may also apply to zero-bits in an earlier object |
2605 | Node* prev_mem = find_previous_store(phase); |
2606 | // Steps (a), (b): Walk past independent stores to find an exact match. |
2607 | if (prev_mem != NULL) { |
2608 | Node* prev_val = can_see_stored_value(prev_mem, phase); |
2609 | if (prev_val != NULL && phase->eqv(prev_val, val)) { |
2610 | // prev_val and val might differ by a cast; it would be good |
2611 | // to keep the more informative of the two. |
2612 | result = mem; |
2613 | } |
2614 | } |
2615 | } |
2616 | } |
2617 | |
2618 | if (result != this && phase->is_IterGVN() != NULL) { |
2619 | MemBarNode* trailing = trailing_membar(); |
2620 | if (trailing != NULL) { |
2621 | #ifdef ASSERT |
2622 | const TypeOopPtr* t_oop = phase->type(in(Address))->isa_oopptr(); |
2623 | assert(t_oop == NULL || t_oop->is_known_instance_field(), "only for non escaping objects" ); |
2624 | #endif |
2625 | PhaseIterGVN* igvn = phase->is_IterGVN(); |
2626 | trailing->remove(igvn); |
2627 | } |
2628 | } |
2629 | |
2630 | return result; |
2631 | } |
2632 | |
2633 | //------------------------------match_edge------------------------------------- |
2634 | // Do we Match on this edge index or not? Match only memory & value |
2635 | uint StoreNode::match_edge(uint idx) const { |
2636 | return idx == MemNode::Address || idx == MemNode::ValueIn; |
2637 | } |
2638 | |
2639 | //------------------------------cmp-------------------------------------------- |
2640 | // Do not common stores up together. They generally have to be split |
2641 | // back up anyways, so do not bother. |
2642 | bool StoreNode::cmp( const Node &n ) const { |
2643 | return (&n == this); // Always fail except on self |
2644 | } |
2645 | |
2646 | //------------------------------Ideal_masked_input----------------------------- |
2647 | // Check for a useless mask before a partial-word store |
2648 | // (StoreB ... (AndI valIn conIa) ) |
2649 | // If (conIa & mask == mask) this simplifies to |
2650 | // (StoreB ... (valIn) ) |
2651 | Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) { |
2652 | Node *val = in(MemNode::ValueIn); |
2653 | if( val->Opcode() == Op_AndI ) { |
2654 | const TypeInt *t = phase->type( val->in(2) )->isa_int(); |
2655 | if( t && t->is_con() && (t->get_con() & mask) == mask ) { |
2656 | set_req(MemNode::ValueIn, val->in(1)); |
2657 | return this; |
2658 | } |
2659 | } |
2660 | return NULL; |
2661 | } |
2662 | |
2663 | |
2664 | //------------------------------Ideal_sign_extended_input---------------------- |
2665 | // Check for useless sign-extension before a partial-word store |
2666 | // (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) ) |
2667 | // If (conIL == conIR && conIR <= num_bits) this simplifies to |
2668 | // (StoreB ... (valIn) ) |
2669 | Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) { |
2670 | Node *val = in(MemNode::ValueIn); |
2671 | if( val->Opcode() == Op_RShiftI ) { |
2672 | const TypeInt *t = phase->type( val->in(2) )->isa_int(); |
2673 | if( t && t->is_con() && (t->get_con() <= num_bits) ) { |
2674 | Node *shl = val->in(1); |
2675 | if( shl->Opcode() == Op_LShiftI ) { |
2676 | const TypeInt *t2 = phase->type( shl->in(2) )->isa_int(); |
2677 | if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) { |
2678 | set_req(MemNode::ValueIn, shl->in(1)); |
2679 | return this; |
2680 | } |
2681 | } |
2682 | } |
2683 | } |
2684 | return NULL; |
2685 | } |
2686 | |
2687 | //------------------------------value_never_loaded----------------------------------- |
2688 | // Determine whether there are any possible loads of the value stored. |
2689 | // For simplicity, we actually check if there are any loads from the |
2690 | // address stored to, not just for loads of the value stored by this node. |
2691 | // |
2692 | bool StoreNode::value_never_loaded( PhaseTransform *phase) const { |
2693 | Node *adr = in(Address); |
2694 | const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr(); |
2695 | if (adr_oop == NULL) |
2696 | return false; |
2697 | if (!adr_oop->is_known_instance_field()) |
2698 | return false; // if not a distinct instance, there may be aliases of the address |
2699 | for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) { |
2700 | Node *use = adr->fast_out(i); |
2701 | if (use->is_Load() || use->is_LoadStore()) { |
2702 | return false; |
2703 | } |
2704 | } |
2705 | return true; |
2706 | } |
2707 | |
2708 | MemBarNode* StoreNode::trailing_membar() const { |
2709 | if (is_release()) { |
2710 | MemBarNode* trailing_mb = NULL; |
2711 | for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { |
2712 | Node* u = fast_out(i); |
2713 | if (u->is_MemBar()) { |
2714 | if (u->as_MemBar()->trailing_store()) { |
2715 | assert(u->Opcode() == Op_MemBarVolatile, "" ); |
2716 | assert(trailing_mb == NULL, "only one" ); |
2717 | trailing_mb = u->as_MemBar(); |
2718 | #ifdef ASSERT |
2719 | Node* leading = u->as_MemBar()->leading_membar(); |
2720 | assert(leading->Opcode() == Op_MemBarRelease, "incorrect membar" ); |
2721 | assert(leading->as_MemBar()->leading_store(), "incorrect membar pair" ); |
2722 | assert(leading->as_MemBar()->trailing_membar() == u, "incorrect membar pair" ); |
2723 | #endif |
2724 | } else { |
2725 | assert(u->as_MemBar()->standalone(), "" ); |
2726 | } |
2727 | } |
2728 | } |
2729 | return trailing_mb; |
2730 | } |
2731 | return NULL; |
2732 | } |
2733 | |
2734 | |
2735 | //============================================================================= |
2736 | //------------------------------Ideal------------------------------------------ |
2737 | // If the store is from an AND mask that leaves the low bits untouched, then |
2738 | // we can skip the AND operation. If the store is from a sign-extension |
2739 | // (a left shift, then right shift) we can skip both. |
2740 | Node *StoreBNode::Ideal(PhaseGVN *phase, bool can_reshape){ |
2741 | Node *progress = StoreNode::Ideal_masked_input(phase, 0xFF); |
2742 | if( progress != NULL ) return progress; |
2743 | |
2744 | progress = StoreNode::Ideal_sign_extended_input(phase, 24); |
2745 | if( progress != NULL ) return progress; |
2746 | |
2747 | // Finally check the default case |
2748 | return StoreNode::Ideal(phase, can_reshape); |
2749 | } |
2750 | |
2751 | //============================================================================= |
2752 | //------------------------------Ideal------------------------------------------ |
2753 | // If the store is from an AND mask that leaves the low bits untouched, then |
2754 | // we can skip the AND operation |
2755 | Node *StoreCNode::Ideal(PhaseGVN *phase, bool can_reshape){ |
2756 | Node *progress = StoreNode::Ideal_masked_input(phase, 0xFFFF); |
2757 | if( progress != NULL ) return progress; |
2758 | |
2759 | progress = StoreNode::Ideal_sign_extended_input(phase, 16); |
2760 | if( progress != NULL ) return progress; |
2761 | |
2762 | // Finally check the default case |
2763 | return StoreNode::Ideal(phase, can_reshape); |
2764 | } |
2765 | |
2766 | //============================================================================= |
2767 | //------------------------------Identity--------------------------------------- |
2768 | Node* StoreCMNode::Identity(PhaseGVN* phase) { |
2769 | // No need to card mark when storing a null ptr |
2770 | Node* my_store = in(MemNode::OopStore); |
2771 | if (my_store->is_Store()) { |
2772 | const Type *t1 = phase->type( my_store->in(MemNode::ValueIn) ); |
2773 | if( t1 == TypePtr::NULL_PTR ) { |
2774 | return in(MemNode::Memory); |
2775 | } |
2776 | } |
2777 | return this; |
2778 | } |
2779 | |
2780 | //============================================================================= |
2781 | //------------------------------Ideal--------------------------------------- |
2782 | Node *StoreCMNode::Ideal(PhaseGVN *phase, bool can_reshape){ |
2783 | Node* progress = StoreNode::Ideal(phase, can_reshape); |
2784 | if (progress != NULL) return progress; |
2785 | |
2786 | Node* my_store = in(MemNode::OopStore); |
2787 | if (my_store->is_MergeMem()) { |
2788 | Node* mem = my_store->as_MergeMem()->memory_at(oop_alias_idx()); |
2789 | set_req(MemNode::OopStore, mem); |
2790 | return this; |
2791 | } |
2792 | |
2793 | return NULL; |
2794 | } |
2795 | |
2796 | //------------------------------Value----------------------------------------- |
2797 | const Type* StoreCMNode::Value(PhaseGVN* phase) const { |
2798 | // Either input is TOP ==> the result is TOP |
2799 | const Type *t = phase->type( in(MemNode::Memory) ); |
2800 | if( t == Type::TOP ) return Type::TOP; |
2801 | t = phase->type( in(MemNode::Address) ); |
2802 | if( t == Type::TOP ) return Type::TOP; |
2803 | t = phase->type( in(MemNode::ValueIn) ); |
2804 | if( t == Type::TOP ) return Type::TOP; |
2805 | // If extra input is TOP ==> the result is TOP |
2806 | t = phase->type( in(MemNode::OopStore) ); |
2807 | if( t == Type::TOP ) return Type::TOP; |
2808 | |
2809 | return StoreNode::Value( phase ); |
2810 | } |
2811 | |
2812 | |
2813 | //============================================================================= |
2814 | //----------------------------------SCMemProjNode------------------------------ |
2815 | const Type* SCMemProjNode::Value(PhaseGVN* phase) const |
2816 | { |
2817 | return bottom_type(); |
2818 | } |
2819 | |
2820 | //============================================================================= |
2821 | //----------------------------------LoadStoreNode------------------------------ |
2822 | LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ) |
2823 | : Node(required), |
2824 | _type(rt), |
2825 | _adr_type(at), |
2826 | _has_barrier(false) |
2827 | { |
2828 | init_req(MemNode::Control, c ); |
2829 | init_req(MemNode::Memory , mem); |
2830 | init_req(MemNode::Address, adr); |
2831 | init_req(MemNode::ValueIn, val); |
2832 | init_class_id(Class_LoadStore); |
2833 | } |
2834 | |
2835 | uint LoadStoreNode::ideal_reg() const { |
2836 | return _type->ideal_reg(); |
2837 | } |
2838 | |
2839 | bool LoadStoreNode::result_not_used() const { |
2840 | for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) { |
2841 | Node *x = fast_out(i); |
2842 | if (x->Opcode() == Op_SCMemProj) continue; |
2843 | return false; |
2844 | } |
2845 | return true; |
2846 | } |
2847 | |
2848 | MemBarNode* LoadStoreNode::trailing_membar() const { |
2849 | MemBarNode* trailing = NULL; |
2850 | for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { |
2851 | Node* u = fast_out(i); |
2852 | if (u->is_MemBar()) { |
2853 | if (u->as_MemBar()->trailing_load_store()) { |
2854 | assert(u->Opcode() == Op_MemBarAcquire, "" ); |
2855 | assert(trailing == NULL, "only one" ); |
2856 | trailing = u->as_MemBar(); |
2857 | #ifdef ASSERT |
2858 | Node* leading = trailing->leading_membar(); |
2859 | assert(support_IRIW_for_not_multiple_copy_atomic_cpu || leading->Opcode() == Op_MemBarRelease, "incorrect membar" ); |
2860 | assert(leading->as_MemBar()->leading_load_store(), "incorrect membar pair" ); |
2861 | assert(leading->as_MemBar()->trailing_membar() == trailing, "incorrect membar pair" ); |
2862 | #endif |
2863 | } else { |
2864 | assert(u->as_MemBar()->standalone(), "wrong barrier kind" ); |
2865 | } |
2866 | } |
2867 | } |
2868 | |
2869 | return trailing; |
2870 | } |
2871 | |
2872 | uint LoadStoreNode::size_of() const { return sizeof(*this); } |
2873 | |
2874 | //============================================================================= |
2875 | //----------------------------------LoadStoreConditionalNode-------------------- |
2876 | LoadStoreConditionalNode::LoadStoreConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : LoadStoreNode(c, mem, adr, val, NULL, TypeInt::BOOL, 5) { |
2877 | init_req(ExpectedIn, ex ); |
2878 | } |
2879 | |
2880 | //============================================================================= |
2881 | //-------------------------------adr_type-------------------------------------- |
2882 | const TypePtr* ClearArrayNode::adr_type() const { |
2883 | Node *adr = in(3); |
2884 | if (adr == NULL) return NULL; // node is dead |
2885 | return MemNode::calculate_adr_type(adr->bottom_type()); |
2886 | } |
2887 | |
2888 | //------------------------------match_edge------------------------------------- |
2889 | // Do we Match on this edge index or not? Do not match memory |
2890 | uint ClearArrayNode::match_edge(uint idx) const { |
2891 | return idx > 1; |
2892 | } |
2893 | |
2894 | //------------------------------Identity--------------------------------------- |
2895 | // Clearing a zero length array does nothing |
2896 | Node* ClearArrayNode::Identity(PhaseGVN* phase) { |
2897 | return phase->type(in(2))->higher_equal(TypeX::ZERO) ? in(1) : this; |
2898 | } |
2899 | |
2900 | //------------------------------Idealize--------------------------------------- |
2901 | // Clearing a short array is faster with stores |
2902 | Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
2903 | // Already know this is a large node, do not try to ideal it |
2904 | if (!IdealizeClearArrayNode || _is_large) return NULL; |
2905 | |
2906 | const int unit = BytesPerLong; |
2907 | const TypeX* t = phase->type(in(2))->isa_intptr_t(); |
2908 | if (!t) return NULL; |
2909 | if (!t->is_con()) return NULL; |
2910 | intptr_t raw_count = t->get_con(); |
2911 | intptr_t size = raw_count; |
2912 | if (!Matcher::init_array_count_is_in_bytes) size *= unit; |
2913 | // Clearing nothing uses the Identity call. |
2914 | // Negative clears are possible on dead ClearArrays |
2915 | // (see jck test stmt114.stmt11402.val). |
2916 | if (size <= 0 || size % unit != 0) return NULL; |
2917 | intptr_t count = size / unit; |
2918 | // Length too long; communicate this to matchers and assemblers. |
2919 | // Assemblers are responsible to produce fast hardware clears for it. |
2920 | if (size > InitArrayShortSize) { |
2921 | return new ClearArrayNode(in(0), in(1), in(2), in(3), true); |
2922 | } |
2923 | Node *mem = in(1); |
2924 | if( phase->type(mem)==Type::TOP ) return NULL; |
2925 | Node *adr = in(3); |
2926 | const Type* at = phase->type(adr); |
2927 | if( at==Type::TOP ) return NULL; |
2928 | const TypePtr* atp = at->isa_ptr(); |
2929 | // adjust atp to be the correct array element address type |
2930 | if (atp == NULL) atp = TypePtr::BOTTOM; |
2931 | else atp = atp->add_offset(Type::OffsetBot); |
2932 | // Get base for derived pointer purposes |
2933 | if( adr->Opcode() != Op_AddP ) Unimplemented(); |
2934 | Node *base = adr->in(1); |
2935 | |
2936 | Node *zero = phase->makecon(TypeLong::ZERO); |
2937 | Node *off = phase->MakeConX(BytesPerLong); |
2938 | mem = new StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false); |
2939 | count--; |
2940 | while( count-- ) { |
2941 | mem = phase->transform(mem); |
2942 | adr = phase->transform(new AddPNode(base,adr,off)); |
2943 | mem = new StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false); |
2944 | } |
2945 | return mem; |
2946 | } |
2947 | |
2948 | //----------------------------step_through---------------------------------- |
2949 | // Return allocation input memory edge if it is different instance |
2950 | // or itself if it is the one we are looking for. |
2951 | bool ClearArrayNode::step_through(Node** np, uint instance_id, PhaseTransform* phase) { |
2952 | Node* n = *np; |
2953 | assert(n->is_ClearArray(), "sanity" ); |
2954 | intptr_t offset; |
2955 | AllocateNode* alloc = AllocateNode::Ideal_allocation(n->in(3), phase, offset); |
2956 | // This method is called only before Allocate nodes are expanded |
2957 | // during macro nodes expansion. Before that ClearArray nodes are |
2958 | // only generated in PhaseMacroExpand::generate_arraycopy() (before |
2959 | // Allocate nodes are expanded) which follows allocations. |
2960 | assert(alloc != NULL, "should have allocation" ); |
2961 | if (alloc->_idx == instance_id) { |
2962 | // Can not bypass initialization of the instance we are looking for. |
2963 | return false; |
2964 | } |
2965 | // Otherwise skip it. |
2966 | InitializeNode* init = alloc->initialization(); |
2967 | if (init != NULL) |
2968 | *np = init->in(TypeFunc::Memory); |
2969 | else |
2970 | *np = alloc->in(TypeFunc::Memory); |
2971 | return true; |
2972 | } |
2973 | |
2974 | //----------------------------clear_memory------------------------------------- |
2975 | // Generate code to initialize object storage to zero. |
2976 | Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, |
2977 | intptr_t start_offset, |
2978 | Node* end_offset, |
2979 | PhaseGVN* phase) { |
2980 | intptr_t offset = start_offset; |
2981 | |
2982 | int unit = BytesPerLong; |
2983 | if ((offset % unit) != 0) { |
2984 | Node* adr = new AddPNode(dest, dest, phase->MakeConX(offset)); |
2985 | adr = phase->transform(adr); |
2986 | const TypePtr* atp = TypeRawPtr::BOTTOM; |
2987 | mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered); |
2988 | mem = phase->transform(mem); |
2989 | offset += BytesPerInt; |
2990 | } |
2991 | assert((offset % unit) == 0, "" ); |
2992 | |
2993 | // Initialize the remaining stuff, if any, with a ClearArray. |
2994 | return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase); |
2995 | } |
2996 | |
2997 | Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, |
2998 | Node* start_offset, |
2999 | Node* end_offset, |
3000 | PhaseGVN* phase) { |
3001 | if (start_offset == end_offset) { |
3002 | // nothing to do |
3003 | return mem; |
3004 | } |
3005 | |
3006 | int unit = BytesPerLong; |
3007 | Node* zbase = start_offset; |
3008 | Node* zend = end_offset; |
3009 | |
3010 | // Scale to the unit required by the CPU: |
3011 | if (!Matcher::init_array_count_is_in_bytes) { |
3012 | Node* shift = phase->intcon(exact_log2(unit)); |
3013 | zbase = phase->transform(new URShiftXNode(zbase, shift) ); |
3014 | zend = phase->transform(new URShiftXNode(zend, shift) ); |
3015 | } |
3016 | |
3017 | // Bulk clear double-words |
3018 | Node* zsize = phase->transform(new SubXNode(zend, zbase) ); |
3019 | Node* adr = phase->transform(new AddPNode(dest, dest, start_offset) ); |
3020 | mem = new ClearArrayNode(ctl, mem, zsize, adr, false); |
3021 | return phase->transform(mem); |
3022 | } |
3023 | |
3024 | Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, |
3025 | intptr_t start_offset, |
3026 | intptr_t end_offset, |
3027 | PhaseGVN* phase) { |
3028 | if (start_offset == end_offset) { |
3029 | // nothing to do |
3030 | return mem; |
3031 | } |
3032 | |
3033 | assert((end_offset % BytesPerInt) == 0, "odd end offset" ); |
3034 | intptr_t done_offset = end_offset; |
3035 | if ((done_offset % BytesPerLong) != 0) { |
3036 | done_offset -= BytesPerInt; |
3037 | } |
3038 | if (done_offset > start_offset) { |
3039 | mem = clear_memory(ctl, mem, dest, |
3040 | start_offset, phase->MakeConX(done_offset), phase); |
3041 | } |
3042 | if (done_offset < end_offset) { // emit the final 32-bit store |
3043 | Node* adr = new AddPNode(dest, dest, phase->MakeConX(done_offset)); |
3044 | adr = phase->transform(adr); |
3045 | const TypePtr* atp = TypeRawPtr::BOTTOM; |
3046 | mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered); |
3047 | mem = phase->transform(mem); |
3048 | done_offset += BytesPerInt; |
3049 | } |
3050 | assert(done_offset == end_offset, "" ); |
3051 | return mem; |
3052 | } |
3053 | |
3054 | //============================================================================= |
3055 | MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent) |
3056 | : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)), |
3057 | _adr_type(C->get_adr_type(alias_idx)), _kind(Standalone) |
3058 | #ifdef ASSERT |
3059 | , _pair_idx(0) |
3060 | #endif |
3061 | { |
3062 | init_class_id(Class_MemBar); |
3063 | Node* top = C->top(); |
3064 | init_req(TypeFunc::I_O,top); |
3065 | init_req(TypeFunc::FramePtr,top); |
3066 | init_req(TypeFunc::ReturnAdr,top); |
3067 | if (precedent != NULL) |
3068 | init_req(TypeFunc::Parms, precedent); |
3069 | } |
3070 | |
3071 | //------------------------------cmp-------------------------------------------- |
3072 | uint MemBarNode::hash() const { return NO_HASH; } |
3073 | bool MemBarNode::cmp( const Node &n ) const { |
3074 | return (&n == this); // Always fail except on self |
3075 | } |
3076 | |
3077 | //------------------------------make------------------------------------------- |
3078 | MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) { |
3079 | switch (opcode) { |
3080 | case Op_MemBarAcquire: return new MemBarAcquireNode(C, atp, pn); |
3081 | case Op_LoadFence: return new LoadFenceNode(C, atp, pn); |
3082 | case Op_MemBarRelease: return new MemBarReleaseNode(C, atp, pn); |
3083 | case Op_StoreFence: return new StoreFenceNode(C, atp, pn); |
3084 | case Op_MemBarAcquireLock: return new MemBarAcquireLockNode(C, atp, pn); |
3085 | case Op_MemBarReleaseLock: return new MemBarReleaseLockNode(C, atp, pn); |
3086 | case Op_MemBarVolatile: return new MemBarVolatileNode(C, atp, pn); |
3087 | case Op_MemBarCPUOrder: return new MemBarCPUOrderNode(C, atp, pn); |
3088 | case Op_OnSpinWait: return new OnSpinWaitNode(C, atp, pn); |
3089 | case Op_Initialize: return new InitializeNode(C, atp, pn); |
3090 | case Op_MemBarStoreStore: return new MemBarStoreStoreNode(C, atp, pn); |
3091 | default: ShouldNotReachHere(); return NULL; |
3092 | } |
3093 | } |
3094 | |
3095 | void MemBarNode::remove(PhaseIterGVN *igvn) { |
3096 | if (outcnt() != 2) { |
3097 | return; |
3098 | } |
3099 | if (trailing_store() || trailing_load_store()) { |
3100 | MemBarNode* leading = leading_membar(); |
3101 | if (leading != NULL) { |
3102 | assert(leading->trailing_membar() == this, "inconsistent leading/trailing membars" ); |
3103 | leading->remove(igvn); |
3104 | } |
3105 | } |
3106 | igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory)); |
3107 | igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control)); |
3108 | } |
3109 | |
3110 | //------------------------------Ideal------------------------------------------ |
3111 | // Return a node which is more "ideal" than the current node. Strip out |
3112 | // control copies |
3113 | Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
3114 | if (remove_dead_region(phase, can_reshape)) return this; |
3115 | // Don't bother trying to transform a dead node |
3116 | if (in(0) && in(0)->is_top()) { |
3117 | return NULL; |
3118 | } |
3119 | |
3120 | bool progress = false; |
3121 | // Eliminate volatile MemBars for scalar replaced objects. |
3122 | if (can_reshape && req() == (Precedent+1)) { |
3123 | bool eliminate = false; |
3124 | int opc = Opcode(); |
3125 | if ((opc == Op_MemBarAcquire || opc == Op_MemBarVolatile)) { |
3126 | // Volatile field loads and stores. |
3127 | Node* my_mem = in(MemBarNode::Precedent); |
3128 | // The MembarAquire may keep an unused LoadNode alive through the Precedent edge |
3129 | if ((my_mem != NULL) && (opc == Op_MemBarAcquire) && (my_mem->outcnt() == 1)) { |
3130 | // if the Precedent is a decodeN and its input (a Load) is used at more than one place, |
3131 | // replace this Precedent (decodeN) with the Load instead. |
3132 | if ((my_mem->Opcode() == Op_DecodeN) && (my_mem->in(1)->outcnt() > 1)) { |
3133 | Node* load_node = my_mem->in(1); |
3134 | set_req(MemBarNode::Precedent, load_node); |
3135 | phase->is_IterGVN()->_worklist.push(my_mem); |
3136 | my_mem = load_node; |
3137 | } else { |
3138 | assert(my_mem->unique_out() == this, "sanity" ); |
3139 | del_req(Precedent); |
3140 | phase->is_IterGVN()->_worklist.push(my_mem); // remove dead node later |
3141 | my_mem = NULL; |
3142 | } |
3143 | progress = true; |
3144 | } |
3145 | if (my_mem != NULL && my_mem->is_Mem()) { |
3146 | const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr(); |
3147 | // Check for scalar replaced object reference. |
3148 | if( t_oop != NULL && t_oop->is_known_instance_field() && |
3149 | t_oop->offset() != Type::OffsetBot && |
3150 | t_oop->offset() != Type::OffsetTop) { |
3151 | eliminate = true; |
3152 | } |
3153 | } |
3154 | } else if (opc == Op_MemBarRelease) { |
3155 | // Final field stores. |
3156 | Node* alloc = AllocateNode::Ideal_allocation(in(MemBarNode::Precedent), phase); |
3157 | if ((alloc != NULL) && alloc->is_Allocate() && |
3158 | alloc->as_Allocate()->does_not_escape_thread()) { |
3159 | // The allocated object does not escape. |
3160 | eliminate = true; |
3161 | } |
3162 | } |
3163 | if (eliminate) { |
3164 | // Replace MemBar projections by its inputs. |
3165 | PhaseIterGVN* igvn = phase->is_IterGVN(); |
3166 | remove(igvn); |
3167 | // Must return either the original node (now dead) or a new node |
3168 | // (Do not return a top here, since that would break the uniqueness of top.) |
3169 | return new ConINode(TypeInt::ZERO); |
3170 | } |
3171 | } |
3172 | return progress ? this : NULL; |
3173 | } |
3174 | |
3175 | //------------------------------Value------------------------------------------ |
3176 | const Type* MemBarNode::Value(PhaseGVN* phase) const { |
3177 | if( !in(0) ) return Type::TOP; |
3178 | if( phase->type(in(0)) == Type::TOP ) |
3179 | return Type::TOP; |
3180 | return TypeTuple::MEMBAR; |
3181 | } |
3182 | |
3183 | //------------------------------match------------------------------------------ |
3184 | // Construct projections for memory. |
3185 | Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) { |
3186 | switch (proj->_con) { |
3187 | case TypeFunc::Control: |
3188 | case TypeFunc::Memory: |
3189 | return new MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj); |
3190 | } |
3191 | ShouldNotReachHere(); |
3192 | return NULL; |
3193 | } |
3194 | |
3195 | void MemBarNode::set_store_pair(MemBarNode* leading, MemBarNode* trailing) { |
3196 | trailing->_kind = TrailingStore; |
3197 | leading->_kind = LeadingStore; |
3198 | #ifdef ASSERT |
3199 | trailing->_pair_idx = leading->_idx; |
3200 | leading->_pair_idx = leading->_idx; |
3201 | #endif |
3202 | } |
3203 | |
3204 | void MemBarNode::set_load_store_pair(MemBarNode* leading, MemBarNode* trailing) { |
3205 | trailing->_kind = TrailingLoadStore; |
3206 | leading->_kind = LeadingLoadStore; |
3207 | #ifdef ASSERT |
3208 | trailing->_pair_idx = leading->_idx; |
3209 | leading->_pair_idx = leading->_idx; |
3210 | #endif |
3211 | } |
3212 | |
3213 | MemBarNode* MemBarNode::trailing_membar() const { |
3214 | ResourceMark rm; |
3215 | Node* trailing = (Node*)this; |
3216 | VectorSet seen(Thread::current()->resource_area()); |
3217 | Node_Stack multis(0); |
3218 | do { |
3219 | Node* c = trailing; |
3220 | uint i = 0; |
3221 | do { |
3222 | trailing = NULL; |
3223 | for (; i < c->outcnt(); i++) { |
3224 | Node* next = c->raw_out(i); |
3225 | if (next != c && next->is_CFG()) { |
3226 | if (c->is_MultiBranch()) { |
3227 | if (multis.node() == c) { |
3228 | multis.set_index(i+1); |
3229 | } else { |
3230 | multis.push(c, i+1); |
3231 | } |
3232 | } |
3233 | trailing = next; |
3234 | break; |
3235 | } |
3236 | } |
3237 | if (trailing != NULL && !seen.test_set(trailing->_idx)) { |
3238 | break; |
3239 | } |
3240 | while (multis.size() > 0) { |
3241 | c = multis.node(); |
3242 | i = multis.index(); |
3243 | if (i < c->req()) { |
3244 | break; |
3245 | } |
3246 | multis.pop(); |
3247 | } |
3248 | } while (multis.size() > 0); |
3249 | } while (!trailing->is_MemBar() || !trailing->as_MemBar()->trailing()); |
3250 | |
3251 | MemBarNode* mb = trailing->as_MemBar(); |
3252 | assert((mb->_kind == TrailingStore && _kind == LeadingStore) || |
3253 | (mb->_kind == TrailingLoadStore && _kind == LeadingLoadStore), "bad trailing membar" ); |
3254 | assert(mb->_pair_idx == _pair_idx, "bad trailing membar" ); |
3255 | return mb; |
3256 | } |
3257 | |
3258 | MemBarNode* MemBarNode::leading_membar() const { |
3259 | ResourceMark rm; |
3260 | VectorSet seen(Thread::current()->resource_area()); |
3261 | Node_Stack regions(0); |
3262 | Node* leading = in(0); |
3263 | while (leading != NULL && (!leading->is_MemBar() || !leading->as_MemBar()->leading())) { |
3264 | while (leading == NULL || leading->is_top() || seen.test_set(leading->_idx)) { |
3265 | leading = NULL; |
3266 | while (regions.size() > 0 && leading == NULL) { |
3267 | Node* r = regions.node(); |
3268 | uint i = regions.index(); |
3269 | if (i < r->req()) { |
3270 | leading = r->in(i); |
3271 | regions.set_index(i+1); |
3272 | } else { |
3273 | regions.pop(); |
3274 | } |
3275 | } |
3276 | if (leading == NULL) { |
3277 | assert(regions.size() == 0, "all paths should have been tried" ); |
3278 | return NULL; |
3279 | } |
3280 | } |
3281 | if (leading->is_Region()) { |
3282 | regions.push(leading, 2); |
3283 | leading = leading->in(1); |
3284 | } else { |
3285 | leading = leading->in(0); |
3286 | } |
3287 | } |
3288 | #ifdef ASSERT |
3289 | Unique_Node_List wq; |
3290 | wq.push((Node*)this); |
3291 | uint found = 0; |
3292 | for (uint i = 0; i < wq.size(); i++) { |
3293 | Node* n = wq.at(i); |
3294 | if (n->is_Region()) { |
3295 | for (uint j = 1; j < n->req(); j++) { |
3296 | Node* in = n->in(j); |
3297 | if (in != NULL && !in->is_top()) { |
3298 | wq.push(in); |
3299 | } |
3300 | } |
3301 | } else { |
3302 | if (n->is_MemBar() && n->as_MemBar()->leading()) { |
3303 | assert(n == leading, "consistency check failed" ); |
3304 | found++; |
3305 | } else { |
3306 | Node* in = n->in(0); |
3307 | if (in != NULL && !in->is_top()) { |
3308 | wq.push(in); |
3309 | } |
3310 | } |
3311 | } |
3312 | } |
3313 | assert(found == 1 || (found == 0 && leading == NULL), "consistency check failed" ); |
3314 | #endif |
3315 | if (leading == NULL) { |
3316 | return NULL; |
3317 | } |
3318 | MemBarNode* mb = leading->as_MemBar(); |
3319 | assert((mb->_kind == LeadingStore && _kind == TrailingStore) || |
3320 | (mb->_kind == LeadingLoadStore && _kind == TrailingLoadStore), "bad leading membar" ); |
3321 | assert(mb->_pair_idx == _pair_idx, "bad leading membar" ); |
3322 | return mb; |
3323 | } |
3324 | |
3325 | //===========================InitializeNode==================================== |
3326 | // SUMMARY: |
3327 | // This node acts as a memory barrier on raw memory, after some raw stores. |
3328 | // The 'cooked' oop value feeds from the Initialize, not the Allocation. |
3329 | // The Initialize can 'capture' suitably constrained stores as raw inits. |
3330 | // It can coalesce related raw stores into larger units (called 'tiles'). |
3331 | // It can avoid zeroing new storage for memory units which have raw inits. |
3332 | // At macro-expansion, it is marked 'complete', and does not optimize further. |
3333 | // |
3334 | // EXAMPLE: |
3335 | // The object 'new short[2]' occupies 16 bytes in a 32-bit machine. |
3336 | // ctl = incoming control; mem* = incoming memory |
3337 | // (Note: A star * on a memory edge denotes I/O and other standard edges.) |
3338 | // First allocate uninitialized memory and fill in the header: |
3339 | // alloc = (Allocate ctl mem* 16 #short[].klass ...) |
3340 | // ctl := alloc.Control; mem* := alloc.Memory* |
3341 | // rawmem = alloc.Memory; rawoop = alloc.RawAddress |
3342 | // Then initialize to zero the non-header parts of the raw memory block: |
3343 | // init = (Initialize alloc.Control alloc.Memory* alloc.RawAddress) |
3344 | // ctl := init.Control; mem.SLICE(#short[*]) := init.Memory |
3345 | // After the initialize node executes, the object is ready for service: |
3346 | // oop := (CheckCastPP init.Control alloc.RawAddress #short[]) |
3347 | // Suppose its body is immediately initialized as {1,2}: |
3348 | // store1 = (StoreC init.Control init.Memory (+ oop 12) 1) |
3349 | // store2 = (StoreC init.Control store1 (+ oop 14) 2) |
3350 | // mem.SLICE(#short[*]) := store2 |
3351 | // |
3352 | // DETAILS: |
3353 | // An InitializeNode collects and isolates object initialization after |
3354 | // an AllocateNode and before the next possible safepoint. As a |
3355 | // memory barrier (MemBarNode), it keeps critical stores from drifting |
3356 | // down past any safepoint or any publication of the allocation. |
3357 | // Before this barrier, a newly-allocated object may have uninitialized bits. |
3358 | // After this barrier, it may be treated as a real oop, and GC is allowed. |
3359 | // |
3360 | // The semantics of the InitializeNode include an implicit zeroing of |
3361 | // the new object from object header to the end of the object. |
3362 | // (The object header and end are determined by the AllocateNode.) |
3363 | // |
3364 | // Certain stores may be added as direct inputs to the InitializeNode. |
3365 | // These stores must update raw memory, and they must be to addresses |
3366 | // derived from the raw address produced by AllocateNode, and with |
3367 | // a constant offset. They must be ordered by increasing offset. |
3368 | // The first one is at in(RawStores), the last at in(req()-1). |
3369 | // Unlike most memory operations, they are not linked in a chain, |
3370 | // but are displayed in parallel as users of the rawmem output of |
3371 | // the allocation. |
3372 | // |
3373 | // (See comments in InitializeNode::capture_store, which continue |
3374 | // the example given above.) |
3375 | // |
3376 | // When the associated Allocate is macro-expanded, the InitializeNode |
3377 | // may be rewritten to optimize collected stores. A ClearArrayNode |
3378 | // may also be created at that point to represent any required zeroing. |
3379 | // The InitializeNode is then marked 'complete', prohibiting further |
3380 | // capturing of nearby memory operations. |
3381 | // |
3382 | // During macro-expansion, all captured initializations which store |
3383 | // constant values of 32 bits or smaller are coalesced (if advantageous) |
3384 | // into larger 'tiles' 32 or 64 bits. This allows an object to be |
3385 | // initialized in fewer memory operations. Memory words which are |
3386 | // covered by neither tiles nor non-constant stores are pre-zeroed |
3387 | // by explicit stores of zero. (The code shape happens to do all |
3388 | // zeroing first, then all other stores, with both sequences occurring |
3389 | // in order of ascending offsets.) |
3390 | // |
3391 | // Alternatively, code may be inserted between an AllocateNode and its |
3392 | // InitializeNode, to perform arbitrary initialization of the new object. |
3393 | // E.g., the object copying intrinsics insert complex data transfers here. |
3394 | // The initialization must then be marked as 'complete' disable the |
3395 | // built-in zeroing semantics and the collection of initializing stores. |
3396 | // |
3397 | // While an InitializeNode is incomplete, reads from the memory state |
3398 | // produced by it are optimizable if they match the control edge and |
3399 | // new oop address associated with the allocation/initialization. |
3400 | // They return a stored value (if the offset matches) or else zero. |
3401 | // A write to the memory state, if it matches control and address, |
3402 | // and if it is to a constant offset, may be 'captured' by the |
3403 | // InitializeNode. It is cloned as a raw memory operation and rewired |
3404 | // inside the initialization, to the raw oop produced by the allocation. |
3405 | // Operations on addresses which are provably distinct (e.g., to |
3406 | // other AllocateNodes) are allowed to bypass the initialization. |
3407 | // |
3408 | // The effect of all this is to consolidate object initialization |
3409 | // (both arrays and non-arrays, both piecewise and bulk) into a |
3410 | // single location, where it can be optimized as a unit. |
3411 | // |
3412 | // Only stores with an offset less than TrackedInitializationLimit words |
3413 | // will be considered for capture by an InitializeNode. This puts a |
3414 | // reasonable limit on the complexity of optimized initializations. |
3415 | |
3416 | //---------------------------InitializeNode------------------------------------ |
3417 | InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop) |
3418 | : MemBarNode(C, adr_type, rawoop), |
3419 | _is_complete(Incomplete), _does_not_escape(false) |
3420 | { |
3421 | init_class_id(Class_Initialize); |
3422 | |
3423 | assert(adr_type == Compile::AliasIdxRaw, "only valid atp" ); |
3424 | assert(in(RawAddress) == rawoop, "proper init" ); |
3425 | // Note: allocation() can be NULL, for secondary initialization barriers |
3426 | } |
3427 | |
3428 | // Since this node is not matched, it will be processed by the |
3429 | // register allocator. Declare that there are no constraints |
3430 | // on the allocation of the RawAddress edge. |
3431 | const RegMask &InitializeNode::in_RegMask(uint idx) const { |
3432 | // This edge should be set to top, by the set_complete. But be conservative. |
3433 | if (idx == InitializeNode::RawAddress) |
3434 | return *(Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()]); |
3435 | return RegMask::Empty; |
3436 | } |
3437 | |
3438 | Node* InitializeNode::memory(uint alias_idx) { |
3439 | Node* mem = in(Memory); |
3440 | if (mem->is_MergeMem()) { |
3441 | return mem->as_MergeMem()->memory_at(alias_idx); |
3442 | } else { |
3443 | // incoming raw memory is not split |
3444 | return mem; |
3445 | } |
3446 | } |
3447 | |
3448 | bool InitializeNode::is_non_zero() { |
3449 | if (is_complete()) return false; |
3450 | remove_extra_zeroes(); |
3451 | return (req() > RawStores); |
3452 | } |
3453 | |
3454 | void InitializeNode::set_complete(PhaseGVN* phase) { |
3455 | assert(!is_complete(), "caller responsibility" ); |
3456 | _is_complete = Complete; |
3457 | |
3458 | // After this node is complete, it contains a bunch of |
3459 | // raw-memory initializations. There is no need for |
3460 | // it to have anything to do with non-raw memory effects. |
3461 | // Therefore, tell all non-raw users to re-optimize themselves, |
3462 | // after skipping the memory effects of this initialization. |
3463 | PhaseIterGVN* igvn = phase->is_IterGVN(); |
3464 | if (igvn) igvn->add_users_to_worklist(this); |
3465 | } |
3466 | |
3467 | // convenience function |
3468 | // return false if the init contains any stores already |
3469 | bool AllocateNode::maybe_set_complete(PhaseGVN* phase) { |
3470 | InitializeNode* init = initialization(); |
3471 | if (init == NULL || init->is_complete()) return false; |
3472 | init->remove_extra_zeroes(); |
3473 | // for now, if this allocation has already collected any inits, bail: |
3474 | if (init->is_non_zero()) return false; |
3475 | init->set_complete(phase); |
3476 | return true; |
3477 | } |
3478 | |
3479 | void InitializeNode::() { |
3480 | if (req() == RawStores) return; |
3481 | Node* zmem = zero_memory(); |
3482 | uint fill = RawStores; |
3483 | for (uint i = fill; i < req(); i++) { |
3484 | Node* n = in(i); |
3485 | if (n->is_top() || n == zmem) continue; // skip |
3486 | if (fill < i) set_req(fill, n); // compact |
3487 | ++fill; |
3488 | } |
3489 | // delete any empty spaces created: |
3490 | while (fill < req()) { |
3491 | del_req(fill); |
3492 | } |
3493 | } |
3494 | |
3495 | // Helper for remembering which stores go with which offsets. |
3496 | intptr_t InitializeNode::get_store_offset(Node* st, PhaseTransform* phase) { |
3497 | if (!st->is_Store()) return -1; // can happen to dead code via subsume_node |
3498 | intptr_t offset = -1; |
3499 | Node* base = AddPNode::Ideal_base_and_offset(st->in(MemNode::Address), |
3500 | phase, offset); |
3501 | if (base == NULL) return -1; // something is dead, |
3502 | if (offset < 0) return -1; // dead, dead |
3503 | return offset; |
3504 | } |
3505 | |
3506 | // Helper for proving that an initialization expression is |
3507 | // "simple enough" to be folded into an object initialization. |
3508 | // Attempts to prove that a store's initial value 'n' can be captured |
3509 | // within the initialization without creating a vicious cycle, such as: |
3510 | // { Foo p = new Foo(); p.next = p; } |
3511 | // True for constants and parameters and small combinations thereof. |
3512 | bool InitializeNode::detect_init_independence(Node* n, int& count) { |
3513 | if (n == NULL) return true; // (can this really happen?) |
3514 | if (n->is_Proj()) n = n->in(0); |
3515 | if (n == this) return false; // found a cycle |
3516 | if (n->is_Con()) return true; |
3517 | if (n->is_Start()) return true; // params, etc., are OK |
3518 | if (n->is_Root()) return true; // even better |
3519 | |
3520 | Node* ctl = n->in(0); |
3521 | if (ctl != NULL && !ctl->is_top()) { |
3522 | if (ctl->is_Proj()) ctl = ctl->in(0); |
3523 | if (ctl == this) return false; |
3524 | |
3525 | // If we already know that the enclosing memory op is pinned right after |
3526 | // the init, then any control flow that the store has picked up |
3527 | // must have preceded the init, or else be equal to the init. |
3528 | // Even after loop optimizations (which might change control edges) |
3529 | // a store is never pinned *before* the availability of its inputs. |
3530 | if (!MemNode::all_controls_dominate(n, this)) |
3531 | return false; // failed to prove a good control |
3532 | } |
3533 | |
3534 | // Check data edges for possible dependencies on 'this'. |
3535 | if ((count += 1) > 20) return false; // complexity limit |
3536 | for (uint i = 1; i < n->req(); i++) { |
3537 | Node* m = n->in(i); |
3538 | if (m == NULL || m == n || m->is_top()) continue; |
3539 | uint first_i = n->find_edge(m); |
3540 | if (i != first_i) continue; // process duplicate edge just once |
3541 | if (!detect_init_independence(m, count)) { |
3542 | return false; |
3543 | } |
3544 | } |
3545 | |
3546 | return true; |
3547 | } |
3548 | |
3549 | // Here are all the checks a Store must pass before it can be moved into |
3550 | // an initialization. Returns zero if a check fails. |
3551 | // On success, returns the (constant) offset to which the store applies, |
3552 | // within the initialized memory. |
3553 | intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase, bool can_reshape) { |
3554 | const int FAIL = 0; |
3555 | if (st->req() != MemNode::ValueIn + 1) |
3556 | return FAIL; // an inscrutable StoreNode (card mark?) |
3557 | Node* ctl = st->in(MemNode::Control); |
3558 | if (!(ctl != NULL && ctl->is_Proj() && ctl->in(0) == this)) |
3559 | return FAIL; // must be unconditional after the initialization |
3560 | Node* mem = st->in(MemNode::Memory); |
3561 | if (!(mem->is_Proj() && mem->in(0) == this)) |
3562 | return FAIL; // must not be preceded by other stores |
3563 | Node* adr = st->in(MemNode::Address); |
3564 | intptr_t offset; |
3565 | AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset); |
3566 | if (alloc == NULL) |
3567 | return FAIL; // inscrutable address |
3568 | if (alloc != allocation()) |
3569 | return FAIL; // wrong allocation! (store needs to float up) |
3570 | int size_in_bytes = st->memory_size(); |
3571 | if ((size_in_bytes != 0) && (offset % size_in_bytes) != 0) { |
3572 | return FAIL; // mismatched access |
3573 | } |
3574 | Node* val = st->in(MemNode::ValueIn); |
3575 | int complexity_count = 0; |
3576 | if (!detect_init_independence(val, complexity_count)) |
3577 | return FAIL; // stored value must be 'simple enough' |
3578 | |
3579 | // The Store can be captured only if nothing after the allocation |
3580 | // and before the Store is using the memory location that the store |
3581 | // overwrites. |
3582 | bool failed = false; |
3583 | // If is_complete_with_arraycopy() is true the shape of the graph is |
3584 | // well defined and is safe so no need for extra checks. |
3585 | if (!is_complete_with_arraycopy()) { |
3586 | // We are going to look at each use of the memory state following |
3587 | // the allocation to make sure nothing reads the memory that the |
3588 | // Store writes. |
3589 | const TypePtr* t_adr = phase->type(adr)->isa_ptr(); |
3590 | int alias_idx = phase->C->get_alias_index(t_adr); |
3591 | ResourceMark rm; |
3592 | Unique_Node_List mems; |
3593 | mems.push(mem); |
3594 | Node* unique_merge = NULL; |
3595 | for (uint next = 0; next < mems.size(); ++next) { |
3596 | Node *m = mems.at(next); |
3597 | for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { |
3598 | Node *n = m->fast_out(j); |
3599 | if (n->outcnt() == 0) { |
3600 | continue; |
3601 | } |
3602 | if (n == st) { |
3603 | continue; |
3604 | } else if (n->in(0) != NULL && n->in(0) != ctl) { |
3605 | // If the control of this use is different from the control |
3606 | // of the Store which is right after the InitializeNode then |
3607 | // this node cannot be between the InitializeNode and the |
3608 | // Store. |
3609 | continue; |
3610 | } else if (n->is_MergeMem()) { |
3611 | if (n->as_MergeMem()->memory_at(alias_idx) == m) { |
3612 | // We can hit a MergeMemNode (that will likely go away |
3613 | // later) that is a direct use of the memory state |
3614 | // following the InitializeNode on the same slice as the |
3615 | // store node that we'd like to capture. We need to check |
3616 | // the uses of the MergeMemNode. |
3617 | mems.push(n); |
3618 | } |
3619 | } else if (n->is_Mem()) { |
3620 | Node* other_adr = n->in(MemNode::Address); |
3621 | if (other_adr == adr) { |
3622 | failed = true; |
3623 | break; |
3624 | } else { |
3625 | const TypePtr* other_t_adr = phase->type(other_adr)->isa_ptr(); |
3626 | if (other_t_adr != NULL) { |
3627 | int other_alias_idx = phase->C->get_alias_index(other_t_adr); |
3628 | if (other_alias_idx == alias_idx) { |
3629 | // A load from the same memory slice as the store right |
3630 | // after the InitializeNode. We check the control of the |
3631 | // object/array that is loaded from. If it's the same as |
3632 | // the store control then we cannot capture the store. |
3633 | assert(!n->is_Store(), "2 stores to same slice on same control?" ); |
3634 | Node* base = other_adr; |
3635 | assert(base->is_AddP(), "should be addp but is %s" , base->Name()); |
3636 | base = base->in(AddPNode::Base); |
3637 | if (base != NULL) { |
3638 | base = base->uncast(); |
3639 | if (base->is_Proj() && base->in(0) == alloc) { |
3640 | failed = true; |
3641 | break; |
3642 | } |
3643 | } |
3644 | } |
3645 | } |
3646 | } |
3647 | } else { |
3648 | failed = true; |
3649 | break; |
3650 | } |
3651 | } |
3652 | } |
3653 | } |
3654 | if (failed) { |
3655 | if (!can_reshape) { |
3656 | // We decided we couldn't capture the store during parsing. We |
3657 | // should try again during the next IGVN once the graph is |
3658 | // cleaner. |
3659 | phase->C->record_for_igvn(st); |
3660 | } |
3661 | return FAIL; |
3662 | } |
3663 | |
3664 | return offset; // success |
3665 | } |
3666 | |
3667 | // Find the captured store in(i) which corresponds to the range |
3668 | // [start..start+size) in the initialized object. |
3669 | // If there is one, return its index i. If there isn't, return the |
3670 | // negative of the index where it should be inserted. |
3671 | // Return 0 if the queried range overlaps an initialization boundary |
3672 | // or if dead code is encountered. |
3673 | // If size_in_bytes is zero, do not bother with overlap checks. |
3674 | int InitializeNode::captured_store_insertion_point(intptr_t start, |
3675 | int size_in_bytes, |
3676 | PhaseTransform* phase) { |
3677 | const int FAIL = 0, MAX_STORE = BytesPerLong; |
3678 | |
3679 | if (is_complete()) |
3680 | return FAIL; // arraycopy got here first; punt |
3681 | |
3682 | assert(allocation() != NULL, "must be present" ); |
3683 | |
3684 | // no negatives, no header fields: |
3685 | if (start < (intptr_t) allocation()->minimum_header_size()) return FAIL; |
3686 | |
3687 | // after a certain size, we bail out on tracking all the stores: |
3688 | intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize); |
3689 | if (start >= ti_limit) return FAIL; |
3690 | |
3691 | for (uint i = InitializeNode::RawStores, limit = req(); ; ) { |
3692 | if (i >= limit) return -(int)i; // not found; here is where to put it |
3693 | |
3694 | Node* st = in(i); |
3695 | intptr_t st_off = get_store_offset(st, phase); |
3696 | if (st_off < 0) { |
3697 | if (st != zero_memory()) { |
3698 | return FAIL; // bail out if there is dead garbage |
3699 | } |
3700 | } else if (st_off > start) { |
3701 | // ...we are done, since stores are ordered |
3702 | if (st_off < start + size_in_bytes) { |
3703 | return FAIL; // the next store overlaps |
3704 | } |
3705 | return -(int)i; // not found; here is where to put it |
3706 | } else if (st_off < start) { |
3707 | if (size_in_bytes != 0 && |
3708 | start < st_off + MAX_STORE && |
3709 | start < st_off + st->as_Store()->memory_size()) { |
3710 | return FAIL; // the previous store overlaps |
3711 | } |
3712 | } else { |
3713 | if (size_in_bytes != 0 && |
3714 | st->as_Store()->memory_size() != size_in_bytes) { |
3715 | return FAIL; // mismatched store size |
3716 | } |
3717 | return i; |
3718 | } |
3719 | |
3720 | ++i; |
3721 | } |
3722 | } |
3723 | |
3724 | // Look for a captured store which initializes at the offset 'start' |
3725 | // with the given size. If there is no such store, and no other |
3726 | // initialization interferes, then return zero_memory (the memory |
3727 | // projection of the AllocateNode). |
3728 | Node* InitializeNode::find_captured_store(intptr_t start, int size_in_bytes, |
3729 | PhaseTransform* phase) { |
3730 | assert(stores_are_sane(phase), "" ); |
3731 | int i = captured_store_insertion_point(start, size_in_bytes, phase); |
3732 | if (i == 0) { |
3733 | return NULL; // something is dead |
3734 | } else if (i < 0) { |
3735 | return zero_memory(); // just primordial zero bits here |
3736 | } else { |
3737 | Node* st = in(i); // here is the store at this position |
3738 | assert(get_store_offset(st->as_Store(), phase) == start, "sanity" ); |
3739 | return st; |
3740 | } |
3741 | } |
3742 | |
3743 | // Create, as a raw pointer, an address within my new object at 'offset'. |
3744 | Node* InitializeNode::make_raw_address(intptr_t offset, |
3745 | PhaseTransform* phase) { |
3746 | Node* addr = in(RawAddress); |
3747 | if (offset != 0) { |
3748 | Compile* C = phase->C; |
3749 | addr = phase->transform( new AddPNode(C->top(), addr, |
3750 | phase->MakeConX(offset)) ); |
3751 | } |
3752 | return addr; |
3753 | } |
3754 | |
3755 | // Clone the given store, converting it into a raw store |
3756 | // initializing a field or element of my new object. |
3757 | // Caller is responsible for retiring the original store, |
3758 | // with subsume_node or the like. |
3759 | // |
3760 | // From the example above InitializeNode::InitializeNode, |
3761 | // here are the old stores to be captured: |
3762 | // store1 = (StoreC init.Control init.Memory (+ oop 12) 1) |
3763 | // store2 = (StoreC init.Control store1 (+ oop 14) 2) |
3764 | // |
3765 | // Here is the changed code; note the extra edges on init: |
3766 | // alloc = (Allocate ...) |
3767 | // rawoop = alloc.RawAddress |
3768 | // rawstore1 = (StoreC alloc.Control alloc.Memory (+ rawoop 12) 1) |
3769 | // rawstore2 = (StoreC alloc.Control alloc.Memory (+ rawoop 14) 2) |
3770 | // init = (Initialize alloc.Control alloc.Memory rawoop |
3771 | // rawstore1 rawstore2) |
3772 | // |
3773 | Node* InitializeNode::capture_store(StoreNode* st, intptr_t start, |
3774 | PhaseTransform* phase, bool can_reshape) { |
3775 | assert(stores_are_sane(phase), "" ); |
3776 | |
3777 | if (start < 0) return NULL; |
3778 | assert(can_capture_store(st, phase, can_reshape) == start, "sanity" ); |
3779 | |
3780 | Compile* C = phase->C; |
3781 | int size_in_bytes = st->memory_size(); |
3782 | int i = captured_store_insertion_point(start, size_in_bytes, phase); |
3783 | if (i == 0) return NULL; // bail out |
3784 | Node* prev_mem = NULL; // raw memory for the captured store |
3785 | if (i > 0) { |
3786 | prev_mem = in(i); // there is a pre-existing store under this one |
3787 | set_req(i, C->top()); // temporarily disconnect it |
3788 | // See StoreNode::Ideal 'st->outcnt() == 1' for the reason to disconnect. |
3789 | } else { |
3790 | i = -i; // no pre-existing store |
3791 | prev_mem = zero_memory(); // a slice of the newly allocated object |
3792 | if (i > InitializeNode::RawStores && in(i-1) == prev_mem) |
3793 | set_req(--i, C->top()); // reuse this edge; it has been folded away |
3794 | else |
3795 | ins_req(i, C->top()); // build a new edge |
3796 | } |
3797 | Node* new_st = st->clone(); |
3798 | new_st->set_req(MemNode::Control, in(Control)); |
3799 | new_st->set_req(MemNode::Memory, prev_mem); |
3800 | new_st->set_req(MemNode::Address, make_raw_address(start, phase)); |
3801 | new_st = phase->transform(new_st); |
3802 | |
3803 | // At this point, new_st might have swallowed a pre-existing store |
3804 | // at the same offset, or perhaps new_st might have disappeared, |
3805 | // if it redundantly stored the same value (or zero to fresh memory). |
3806 | |
3807 | // In any case, wire it in: |
3808 | phase->igvn_rehash_node_delayed(this); |
3809 | set_req(i, new_st); |
3810 | |
3811 | // The caller may now kill the old guy. |
3812 | DEBUG_ONLY(Node* check_st = find_captured_store(start, size_in_bytes, phase)); |
3813 | assert(check_st == new_st || check_st == NULL, "must be findable" ); |
3814 | assert(!is_complete(), "" ); |
3815 | return new_st; |
3816 | } |
3817 | |
3818 | static bool store_constant(jlong* tiles, int num_tiles, |
3819 | intptr_t st_off, int st_size, |
3820 | jlong con) { |
3821 | if ((st_off & (st_size-1)) != 0) |
3822 | return false; // strange store offset (assume size==2**N) |
3823 | address addr = (address)tiles + st_off; |
3824 | assert(st_off >= 0 && addr+st_size <= (address)&tiles[num_tiles], "oob" ); |
3825 | switch (st_size) { |
3826 | case sizeof(jbyte): *(jbyte*) addr = (jbyte) con; break; |
3827 | case sizeof(jchar): *(jchar*) addr = (jchar) con; break; |
3828 | case sizeof(jint): *(jint*) addr = (jint) con; break; |
3829 | case sizeof(jlong): *(jlong*) addr = (jlong) con; break; |
3830 | default: return false; // strange store size (detect size!=2**N here) |
3831 | } |
3832 | return true; // return success to caller |
3833 | } |
3834 | |
3835 | // Coalesce subword constants into int constants and possibly |
3836 | // into long constants. The goal, if the CPU permits, |
3837 | // is to initialize the object with a small number of 64-bit tiles. |
3838 | // Also, convert floating-point constants to bit patterns. |
3839 | // Non-constants are not relevant to this pass. |
3840 | // |
3841 | // In terms of the running example on InitializeNode::InitializeNode |
3842 | // and InitializeNode::capture_store, here is the transformation |
3843 | // of rawstore1 and rawstore2 into rawstore12: |
3844 | // alloc = (Allocate ...) |
3845 | // rawoop = alloc.RawAddress |
3846 | // tile12 = 0x00010002 |
3847 | // rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12) |
3848 | // init = (Initialize alloc.Control alloc.Memory rawoop rawstore12) |
3849 | // |
3850 | void |
3851 | InitializeNode::coalesce_subword_stores(intptr_t , |
3852 | Node* size_in_bytes, |
3853 | PhaseGVN* phase) { |
3854 | Compile* C = phase->C; |
3855 | |
3856 | assert(stores_are_sane(phase), "" ); |
3857 | // Note: After this pass, they are not completely sane, |
3858 | // since there may be some overlaps. |
3859 | |
3860 | int old_subword = 0, old_long = 0, new_int = 0, new_long = 0; |
3861 | |
3862 | intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize); |
3863 | intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit); |
3864 | size_limit = MIN2(size_limit, ti_limit); |
3865 | size_limit = align_up(size_limit, BytesPerLong); |
3866 | int num_tiles = size_limit / BytesPerLong; |
3867 | |
3868 | // allocate space for the tile map: |
3869 | const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small |
3870 | jlong tiles_buf[small_len]; |
3871 | Node* nodes_buf[small_len]; |
3872 | jlong inits_buf[small_len]; |
3873 | jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0] |
3874 | : NEW_RESOURCE_ARRAY(jlong, num_tiles)); |
3875 | Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0] |
3876 | : NEW_RESOURCE_ARRAY(Node*, num_tiles)); |
3877 | jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0] |
3878 | : NEW_RESOURCE_ARRAY(jlong, num_tiles)); |
3879 | // tiles: exact bitwise model of all primitive constants |
3880 | // nodes: last constant-storing node subsumed into the tiles model |
3881 | // inits: which bytes (in each tile) are touched by any initializations |
3882 | |
3883 | //// Pass A: Fill in the tile model with any relevant stores. |
3884 | |
3885 | Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles); |
3886 | Copy::zero_to_bytes(nodes, sizeof(nodes[0]) * num_tiles); |
3887 | Copy::zero_to_bytes(inits, sizeof(inits[0]) * num_tiles); |
3888 | Node* zmem = zero_memory(); // initially zero memory state |
3889 | for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { |
3890 | Node* st = in(i); |
3891 | intptr_t st_off = get_store_offset(st, phase); |
3892 | |
3893 | // Figure out the store's offset and constant value: |
3894 | if (st_off < header_size) continue; //skip (ignore header) |
3895 | if (st->in(MemNode::Memory) != zmem) continue; //skip (odd store chain) |
3896 | int st_size = st->as_Store()->memory_size(); |
3897 | if (st_off + st_size > size_limit) break; |
3898 | |
3899 | // Record which bytes are touched, whether by constant or not. |
3900 | if (!store_constant(inits, num_tiles, st_off, st_size, (jlong) -1)) |
3901 | continue; // skip (strange store size) |
3902 | |
3903 | const Type* val = phase->type(st->in(MemNode::ValueIn)); |
3904 | if (!val->singleton()) continue; //skip (non-con store) |
3905 | BasicType type = val->basic_type(); |
3906 | |
3907 | jlong con = 0; |
3908 | switch (type) { |
3909 | case T_INT: con = val->is_int()->get_con(); break; |
3910 | case T_LONG: con = val->is_long()->get_con(); break; |
3911 | case T_FLOAT: con = jint_cast(val->getf()); break; |
3912 | case T_DOUBLE: con = jlong_cast(val->getd()); break; |
3913 | default: continue; //skip (odd store type) |
3914 | } |
3915 | |
3916 | if (type == T_LONG && Matcher::isSimpleConstant64(con) && |
3917 | st->Opcode() == Op_StoreL) { |
3918 | continue; // This StoreL is already optimal. |
3919 | } |
3920 | |
3921 | // Store down the constant. |
3922 | store_constant(tiles, num_tiles, st_off, st_size, con); |
3923 | |
3924 | intptr_t j = st_off >> LogBytesPerLong; |
3925 | |
3926 | if (type == T_INT && st_size == BytesPerInt |
3927 | && (st_off & BytesPerInt) == BytesPerInt) { |
3928 | jlong lcon = tiles[j]; |
3929 | if (!Matcher::isSimpleConstant64(lcon) && |
3930 | st->Opcode() == Op_StoreI) { |
3931 | // This StoreI is already optimal by itself. |
3932 | jint* intcon = (jint*) &tiles[j]; |
3933 | intcon[1] = 0; // undo the store_constant() |
3934 | |
3935 | // If the previous store is also optimal by itself, back up and |
3936 | // undo the action of the previous loop iteration... if we can. |
3937 | // But if we can't, just let the previous half take care of itself. |
3938 | st = nodes[j]; |
3939 | st_off -= BytesPerInt; |
3940 | con = intcon[0]; |
3941 | if (con != 0 && st != NULL && st->Opcode() == Op_StoreI) { |
3942 | assert(st_off >= header_size, "still ignoring header" ); |
3943 | assert(get_store_offset(st, phase) == st_off, "must be" ); |
3944 | assert(in(i-1) == zmem, "must be" ); |
3945 | DEBUG_ONLY(const Type* tcon = phase->type(st->in(MemNode::ValueIn))); |
3946 | assert(con == tcon->is_int()->get_con(), "must be" ); |
3947 | // Undo the effects of the previous loop trip, which swallowed st: |
3948 | intcon[0] = 0; // undo store_constant() |
3949 | set_req(i-1, st); // undo set_req(i, zmem) |
3950 | nodes[j] = NULL; // undo nodes[j] = st |
3951 | --old_subword; // undo ++old_subword |
3952 | } |
3953 | continue; // This StoreI is already optimal. |
3954 | } |
3955 | } |
3956 | |
3957 | // This store is not needed. |
3958 | set_req(i, zmem); |
3959 | nodes[j] = st; // record for the moment |
3960 | if (st_size < BytesPerLong) // something has changed |
3961 | ++old_subword; // includes int/float, but who's counting... |
3962 | else ++old_long; |
3963 | } |
3964 | |
3965 | if ((old_subword + old_long) == 0) |
3966 | return; // nothing more to do |
3967 | |
3968 | //// Pass B: Convert any non-zero tiles into optimal constant stores. |
3969 | // Be sure to insert them before overlapping non-constant stores. |
3970 | // (E.g., byte[] x = { 1,2,y,4 } => x[int 0] = 0x01020004, x[2]=y.) |
3971 | for (int j = 0; j < num_tiles; j++) { |
3972 | jlong con = tiles[j]; |
3973 | jlong init = inits[j]; |
3974 | if (con == 0) continue; |
3975 | jint con0, con1; // split the constant, address-wise |
3976 | jint init0, init1; // split the init map, address-wise |
3977 | { union { jlong con; jint intcon[2]; } u; |
3978 | u.con = con; |
3979 | con0 = u.intcon[0]; |
3980 | con1 = u.intcon[1]; |
3981 | u.con = init; |
3982 | init0 = u.intcon[0]; |
3983 | init1 = u.intcon[1]; |
3984 | } |
3985 | |
3986 | Node* old = nodes[j]; |
3987 | assert(old != NULL, "need the prior store" ); |
3988 | intptr_t offset = (j * BytesPerLong); |
3989 | |
3990 | bool split = !Matcher::isSimpleConstant64(con); |
3991 | |
3992 | if (offset < header_size) { |
3993 | assert(offset + BytesPerInt >= header_size, "second int counts" ); |
3994 | assert(*(jint*)&tiles[j] == 0, "junk in header" ); |
3995 | split = true; // only the second word counts |
3996 | // Example: int a[] = { 42 ... } |
3997 | } else if (con0 == 0 && init0 == -1) { |
3998 | split = true; // first word is covered by full inits |
3999 | // Example: int a[] = { ... foo(), 42 ... } |
4000 | } else if (con1 == 0 && init1 == -1) { |
4001 | split = true; // second word is covered by full inits |
4002 | // Example: int a[] = { ... 42, foo() ... } |
4003 | } |
4004 | |
4005 | // Here's a case where init0 is neither 0 nor -1: |
4006 | // byte a[] = { ... 0,0,foo(),0, 0,0,0,42 ... } |
4007 | // Assuming big-endian memory, init0, init1 are 0x0000FF00, 0x000000FF. |
4008 | // In this case the tile is not split; it is (jlong)42. |
4009 | // The big tile is stored down, and then the foo() value is inserted. |
4010 | // (If there were foo(),foo() instead of foo(),0, init0 would be -1.) |
4011 | |
4012 | Node* ctl = old->in(MemNode::Control); |
4013 | Node* adr = make_raw_address(offset, phase); |
4014 | const TypePtr* atp = TypeRawPtr::BOTTOM; |
4015 | |
4016 | // One or two coalesced stores to plop down. |
4017 | Node* st[2]; |
4018 | intptr_t off[2]; |
4019 | int nst = 0; |
4020 | if (!split) { |
4021 | ++new_long; |
4022 | off[nst] = offset; |
4023 | st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, |
4024 | phase->longcon(con), T_LONG, MemNode::unordered); |
4025 | } else { |
4026 | // Omit either if it is a zero. |
4027 | if (con0 != 0) { |
4028 | ++new_int; |
4029 | off[nst] = offset; |
4030 | st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, |
4031 | phase->intcon(con0), T_INT, MemNode::unordered); |
4032 | } |
4033 | if (con1 != 0) { |
4034 | ++new_int; |
4035 | offset += BytesPerInt; |
4036 | adr = make_raw_address(offset, phase); |
4037 | off[nst] = offset; |
4038 | st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, |
4039 | phase->intcon(con1), T_INT, MemNode::unordered); |
4040 | } |
4041 | } |
4042 | |
4043 | // Insert second store first, then the first before the second. |
4044 | // Insert each one just before any overlapping non-constant stores. |
4045 | while (nst > 0) { |
4046 | Node* st1 = st[--nst]; |
4047 | C->copy_node_notes_to(st1, old); |
4048 | st1 = phase->transform(st1); |
4049 | offset = off[nst]; |
4050 | assert(offset >= header_size, "do not smash header" ); |
4051 | int ins_idx = captured_store_insertion_point(offset, /*size:*/0, phase); |
4052 | guarantee(ins_idx != 0, "must re-insert constant store" ); |
4053 | if (ins_idx < 0) ins_idx = -ins_idx; // never overlap |
4054 | if (ins_idx > InitializeNode::RawStores && in(ins_idx-1) == zmem) |
4055 | set_req(--ins_idx, st1); |
4056 | else |
4057 | ins_req(ins_idx, st1); |
4058 | } |
4059 | } |
4060 | |
4061 | if (PrintCompilation && WizardMode) |
4062 | tty->print_cr("Changed %d/%d subword/long constants into %d/%d int/long" , |
4063 | old_subword, old_long, new_int, new_long); |
4064 | if (C->log() != NULL) |
4065 | C->log()->elem("comment that='%d/%d subword/long to %d/%d int/long'" , |
4066 | old_subword, old_long, new_int, new_long); |
4067 | |
4068 | // Clean up any remaining occurrences of zmem: |
4069 | remove_extra_zeroes(); |
4070 | } |
4071 | |
4072 | // Explore forward from in(start) to find the first fully initialized |
4073 | // word, and return its offset. Skip groups of subword stores which |
4074 | // together initialize full words. If in(start) is itself part of a |
4075 | // fully initialized word, return the offset of in(start). If there |
4076 | // are no following full-word stores, or if something is fishy, return |
4077 | // a negative value. |
4078 | intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) { |
4079 | int int_map = 0; |
4080 | intptr_t int_map_off = 0; |
4081 | const int FULL_MAP = right_n_bits(BytesPerInt); // the int_map we hope for |
4082 | |
4083 | for (uint i = start, limit = req(); i < limit; i++) { |
4084 | Node* st = in(i); |
4085 | |
4086 | intptr_t st_off = get_store_offset(st, phase); |
4087 | if (st_off < 0) break; // return conservative answer |
4088 | |
4089 | int st_size = st->as_Store()->memory_size(); |
4090 | if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) { |
4091 | return st_off; // we found a complete word init |
4092 | } |
4093 | |
4094 | // update the map: |
4095 | |
4096 | intptr_t this_int_off = align_down(st_off, BytesPerInt); |
4097 | if (this_int_off != int_map_off) { |
4098 | // reset the map: |
4099 | int_map = 0; |
4100 | int_map_off = this_int_off; |
4101 | } |
4102 | |
4103 | int subword_off = st_off - this_int_off; |
4104 | int_map |= right_n_bits(st_size) << subword_off; |
4105 | if ((int_map & FULL_MAP) == FULL_MAP) { |
4106 | return this_int_off; // we found a complete word init |
4107 | } |
4108 | |
4109 | // Did this store hit or cross the word boundary? |
4110 | intptr_t next_int_off = align_down(st_off + st_size, BytesPerInt); |
4111 | if (next_int_off == this_int_off + BytesPerInt) { |
4112 | // We passed the current int, without fully initializing it. |
4113 | int_map_off = next_int_off; |
4114 | int_map >>= BytesPerInt; |
4115 | } else if (next_int_off > this_int_off + BytesPerInt) { |
4116 | // We passed the current and next int. |
4117 | return this_int_off + BytesPerInt; |
4118 | } |
4119 | } |
4120 | |
4121 | return -1; |
4122 | } |
4123 | |
4124 | |
4125 | // Called when the associated AllocateNode is expanded into CFG. |
4126 | // At this point, we may perform additional optimizations. |
4127 | // Linearize the stores by ascending offset, to make memory |
4128 | // activity as coherent as possible. |
4129 | Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr, |
4130 | intptr_t , |
4131 | Node* size_in_bytes, |
4132 | PhaseGVN* phase) { |
4133 | assert(!is_complete(), "not already complete" ); |
4134 | assert(stores_are_sane(phase), "" ); |
4135 | assert(allocation() != NULL, "must be present" ); |
4136 | |
4137 | remove_extra_zeroes(); |
4138 | |
4139 | if (ReduceFieldZeroing || ReduceBulkZeroing) |
4140 | // reduce instruction count for common initialization patterns |
4141 | coalesce_subword_stores(header_size, size_in_bytes, phase); |
4142 | |
4143 | Node* zmem = zero_memory(); // initially zero memory state |
4144 | Node* inits = zmem; // accumulating a linearized chain of inits |
4145 | #ifdef ASSERT |
4146 | intptr_t first_offset = allocation()->minimum_header_size(); |
4147 | intptr_t last_init_off = first_offset; // previous init offset |
4148 | intptr_t last_init_end = first_offset; // previous init offset+size |
4149 | intptr_t last_tile_end = first_offset; // previous tile offset+size |
4150 | #endif |
4151 | intptr_t zeroes_done = header_size; |
4152 | |
4153 | bool do_zeroing = true; // we might give up if inits are very sparse |
4154 | int big_init_gaps = 0; // how many large gaps have we seen? |
4155 | |
4156 | if (UseTLAB && ZeroTLAB) do_zeroing = false; |
4157 | if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false; |
4158 | |
4159 | for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { |
4160 | Node* st = in(i); |
4161 | intptr_t st_off = get_store_offset(st, phase); |
4162 | if (st_off < 0) |
4163 | break; // unknown junk in the inits |
4164 | if (st->in(MemNode::Memory) != zmem) |
4165 | break; // complicated store chains somehow in list |
4166 | |
4167 | int st_size = st->as_Store()->memory_size(); |
4168 | intptr_t next_init_off = st_off + st_size; |
4169 | |
4170 | if (do_zeroing && zeroes_done < next_init_off) { |
4171 | // See if this store needs a zero before it or under it. |
4172 | intptr_t zeroes_needed = st_off; |
4173 | |
4174 | if (st_size < BytesPerInt) { |
4175 | // Look for subword stores which only partially initialize words. |
4176 | // If we find some, we must lay down some word-level zeroes first, |
4177 | // underneath the subword stores. |
4178 | // |
4179 | // Examples: |
4180 | // byte[] a = { p,q,r,s } => a[0]=p,a[1]=q,a[2]=r,a[3]=s |
4181 | // byte[] a = { x,y,0,0 } => a[0..3] = 0, a[0]=x,a[1]=y |
4182 | // byte[] a = { 0,0,z,0 } => a[0..3] = 0, a[2]=z |
4183 | // |
4184 | // Note: coalesce_subword_stores may have already done this, |
4185 | // if it was prompted by constant non-zero subword initializers. |
4186 | // But this case can still arise with non-constant stores. |
4187 | |
4188 | intptr_t next_full_store = find_next_fullword_store(i, phase); |
4189 | |
4190 | // In the examples above: |
4191 | // in(i) p q r s x y z |
4192 | // st_off 12 13 14 15 12 13 14 |
4193 | // st_size 1 1 1 1 1 1 1 |
4194 | // next_full_s. 12 16 16 16 16 16 16 |
4195 | // z's_done 12 16 16 16 12 16 12 |
4196 | // z's_needed 12 16 16 16 16 16 16 |
4197 | // zsize 0 0 0 0 4 0 4 |
4198 | if (next_full_store < 0) { |
4199 | // Conservative tack: Zero to end of current word. |
4200 | zeroes_needed = align_up(zeroes_needed, BytesPerInt); |
4201 | } else { |
4202 | // Zero to beginning of next fully initialized word. |
4203 | // Or, don't zero at all, if we are already in that word. |
4204 | assert(next_full_store >= zeroes_needed, "must go forward" ); |
4205 | assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary" ); |
4206 | zeroes_needed = next_full_store; |
4207 | } |
4208 | } |
4209 | |
4210 | if (zeroes_needed > zeroes_done) { |
4211 | intptr_t zsize = zeroes_needed - zeroes_done; |
4212 | // Do some incremental zeroing on rawmem, in parallel with inits. |
4213 | zeroes_done = align_down(zeroes_done, BytesPerInt); |
4214 | rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr, |
4215 | zeroes_done, zeroes_needed, |
4216 | phase); |
4217 | zeroes_done = zeroes_needed; |
4218 | if (zsize > InitArrayShortSize && ++big_init_gaps > 2) |
4219 | do_zeroing = false; // leave the hole, next time |
4220 | } |
4221 | } |
4222 | |
4223 | // Collect the store and move on: |
4224 | st->set_req(MemNode::Memory, inits); |
4225 | inits = st; // put it on the linearized chain |
4226 | set_req(i, zmem); // unhook from previous position |
4227 | |
4228 | if (zeroes_done == st_off) |
4229 | zeroes_done = next_init_off; |
4230 | |
4231 | assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any" ); |
4232 | |
4233 | #ifdef ASSERT |
4234 | // Various order invariants. Weaker than stores_are_sane because |
4235 | // a large constant tile can be filled in by smaller non-constant stores. |
4236 | assert(st_off >= last_init_off, "inits do not reverse" ); |
4237 | last_init_off = st_off; |
4238 | const Type* val = NULL; |
4239 | if (st_size >= BytesPerInt && |
4240 | (val = phase->type(st->in(MemNode::ValueIn)))->singleton() && |
4241 | (int)val->basic_type() < (int)T_OBJECT) { |
4242 | assert(st_off >= last_tile_end, "tiles do not overlap" ); |
4243 | assert(st_off >= last_init_end, "tiles do not overwrite inits" ); |
4244 | last_tile_end = MAX2(last_tile_end, next_init_off); |
4245 | } else { |
4246 | intptr_t st_tile_end = align_up(next_init_off, BytesPerLong); |
4247 | assert(st_tile_end >= last_tile_end, "inits stay with tiles" ); |
4248 | assert(st_off >= last_init_end, "inits do not overlap" ); |
4249 | last_init_end = next_init_off; // it's a non-tile |
4250 | } |
4251 | #endif //ASSERT |
4252 | } |
4253 | |
4254 | remove_extra_zeroes(); // clear out all the zmems left over |
4255 | add_req(inits); |
4256 | |
4257 | if (!(UseTLAB && ZeroTLAB)) { |
4258 | // If anything remains to be zeroed, zero it all now. |
4259 | zeroes_done = align_down(zeroes_done, BytesPerInt); |
4260 | // if it is the last unused 4 bytes of an instance, forget about it |
4261 | intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint); |
4262 | if (zeroes_done + BytesPerLong >= size_limit) { |
4263 | AllocateNode* alloc = allocation(); |
4264 | assert(alloc != NULL, "must be present" ); |
4265 | if (alloc != NULL && alloc->Opcode() == Op_Allocate) { |
4266 | Node* klass_node = alloc->in(AllocateNode::KlassNode); |
4267 | ciKlass* k = phase->type(klass_node)->is_klassptr()->klass(); |
4268 | if (zeroes_done == k->layout_helper()) |
4269 | zeroes_done = size_limit; |
4270 | } |
4271 | } |
4272 | if (zeroes_done < size_limit) { |
4273 | rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr, |
4274 | zeroes_done, size_in_bytes, phase); |
4275 | } |
4276 | } |
4277 | |
4278 | set_complete(phase); |
4279 | return rawmem; |
4280 | } |
4281 | |
4282 | |
4283 | #ifdef ASSERT |
4284 | bool InitializeNode::stores_are_sane(PhaseTransform* phase) { |
4285 | if (is_complete()) |
4286 | return true; // stores could be anything at this point |
4287 | assert(allocation() != NULL, "must be present" ); |
4288 | intptr_t last_off = allocation()->minimum_header_size(); |
4289 | for (uint i = InitializeNode::RawStores; i < req(); i++) { |
4290 | Node* st = in(i); |
4291 | intptr_t st_off = get_store_offset(st, phase); |
4292 | if (st_off < 0) continue; // ignore dead garbage |
4293 | if (last_off > st_off) { |
4294 | tty->print_cr("*** bad store offset at %d: " INTX_FORMAT " > " INTX_FORMAT, i, last_off, st_off); |
4295 | this->dump(2); |
4296 | assert(false, "ascending store offsets" ); |
4297 | return false; |
4298 | } |
4299 | last_off = st_off + st->as_Store()->memory_size(); |
4300 | } |
4301 | return true; |
4302 | } |
4303 | #endif //ASSERT |
4304 | |
4305 | |
4306 | |
4307 | |
4308 | //============================MergeMemNode===================================== |
4309 | // |
4310 | // SEMANTICS OF MEMORY MERGES: A MergeMem is a memory state assembled from several |
4311 | // contributing store or call operations. Each contributor provides the memory |
4312 | // state for a particular "alias type" (see Compile::alias_type). For example, |
4313 | // if a MergeMem has an input X for alias category #6, then any memory reference |
4314 | // to alias category #6 may use X as its memory state input, as an exact equivalent |
4315 | // to using the MergeMem as a whole. |
4316 | // Load<6>( MergeMem(<6>: X, ...), p ) <==> Load<6>(X,p) |
4317 | // |
4318 | // (Here, the <N> notation gives the index of the relevant adr_type.) |
4319 | // |
4320 | // In one special case (and more cases in the future), alias categories overlap. |
4321 | // The special alias category "Bot" (Compile::AliasIdxBot) includes all memory |
4322 | // states. Therefore, if a MergeMem has only one contributing input W for Bot, |
4323 | // it is exactly equivalent to that state W: |
4324 | // MergeMem(<Bot>: W) <==> W |
4325 | // |
4326 | // Usually, the merge has more than one input. In that case, where inputs |
4327 | // overlap (i.e., one is Bot), the narrower alias type determines the memory |
4328 | // state for that type, and the wider alias type (Bot) fills in everywhere else: |
4329 | // Load<5>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<5>(W,p) |
4330 | // Load<6>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<6>(X,p) |
4331 | // |
4332 | // A merge can take a "wide" memory state as one of its narrow inputs. |
4333 | // This simply means that the merge observes out only the relevant parts of |
4334 | // the wide input. That is, wide memory states arriving at narrow merge inputs |
4335 | // are implicitly "filtered" or "sliced" as necessary. (This is rare.) |
4336 | // |
4337 | // These rules imply that MergeMem nodes may cascade (via their <Bot> links), |
4338 | // and that memory slices "leak through": |
4339 | // MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y)) <==> MergeMem(<Bot>: W, <7>: Y) |
4340 | // |
4341 | // But, in such a cascade, repeated memory slices can "block the leak": |
4342 | // MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y), <7>: Y') <==> MergeMem(<Bot>: W, <7>: Y') |
4343 | // |
4344 | // In the last example, Y is not part of the combined memory state of the |
4345 | // outermost MergeMem. The system must, of course, prevent unschedulable |
4346 | // memory states from arising, so you can be sure that the state Y is somehow |
4347 | // a precursor to state Y'. |
4348 | // |
4349 | // |
4350 | // REPRESENTATION OF MEMORY MERGES: The indexes used to address the Node::in array |
4351 | // of each MergeMemNode array are exactly the numerical alias indexes, including |
4352 | // but not limited to AliasIdxTop, AliasIdxBot, and AliasIdxRaw. The functions |
4353 | // Compile::alias_type (and kin) produce and manage these indexes. |
4354 | // |
4355 | // By convention, the value of in(AliasIdxTop) (i.e., in(1)) is always the top node. |
4356 | // (Note that this provides quick access to the top node inside MergeMem methods, |
4357 | // without the need to reach out via TLS to Compile::current.) |
4358 | // |
4359 | // As a consequence of what was just described, a MergeMem that represents a full |
4360 | // memory state has an edge in(AliasIdxBot) which is a "wide" memory state, |
4361 | // containing all alias categories. |
4362 | // |
4363 | // MergeMem nodes never (?) have control inputs, so in(0) is NULL. |
4364 | // |
4365 | // All other edges in(N) (including in(AliasIdxRaw), which is in(3)) are either |
4366 | // a memory state for the alias type <N>, or else the top node, meaning that |
4367 | // there is no particular input for that alias type. Note that the length of |
4368 | // a MergeMem is variable, and may be extended at any time to accommodate new |
4369 | // memory states at larger alias indexes. When merges grow, they are of course |
4370 | // filled with "top" in the unused in() positions. |
4371 | // |
4372 | // This use of top is named "empty_memory()", or "empty_mem" (no-memory) as a variable. |
4373 | // (Top was chosen because it works smoothly with passes like GCM.) |
4374 | // |
4375 | // For convenience, we hardwire the alias index for TypeRawPtr::BOTTOM. (It is |
4376 | // the type of random VM bits like TLS references.) Since it is always the |
4377 | // first non-Bot memory slice, some low-level loops use it to initialize an |
4378 | // index variable: for (i = AliasIdxRaw; i < req(); i++). |
4379 | // |
4380 | // |
4381 | // ACCESSORS: There is a special accessor MergeMemNode::base_memory which returns |
4382 | // the distinguished "wide" state. The accessor MergeMemNode::memory_at(N) returns |
4383 | // the memory state for alias type <N>, or (if there is no particular slice at <N>, |
4384 | // it returns the base memory. To prevent bugs, memory_at does not accept <Top> |
4385 | // or <Bot> indexes. The iterator MergeMemStream provides robust iteration over |
4386 | // MergeMem nodes or pairs of such nodes, ensuring that the non-top edges are visited. |
4387 | // |
4388 | // %%%% We may get rid of base_memory as a separate accessor at some point; it isn't |
4389 | // really that different from the other memory inputs. An abbreviation called |
4390 | // "bot_memory()" for "memory_at(AliasIdxBot)" would keep code tidy. |
4391 | // |
4392 | // |
4393 | // PARTIAL MEMORY STATES: During optimization, MergeMem nodes may arise that represent |
4394 | // partial memory states. When a Phi splits through a MergeMem, the copy of the Phi |
4395 | // that "emerges though" the base memory will be marked as excluding the alias types |
4396 | // of the other (narrow-memory) copies which "emerged through" the narrow edges: |
4397 | // |
4398 | // Phi<Bot>(U, MergeMem(<Bot>: W, <8>: Y)) |
4399 | // ==Ideal=> MergeMem(<Bot>: Phi<Bot-8>(U, W), Phi<8>(U, Y)) |
4400 | // |
4401 | // This strange "subtraction" effect is necessary to ensure IGVN convergence. |
4402 | // (It is currently unimplemented.) As you can see, the resulting merge is |
4403 | // actually a disjoint union of memory states, rather than an overlay. |
4404 | // |
4405 | |
4406 | //------------------------------MergeMemNode----------------------------------- |
4407 | Node* MergeMemNode::make_empty_memory() { |
4408 | Node* empty_memory = (Node*) Compile::current()->top(); |
4409 | assert(empty_memory->is_top(), "correct sentinel identity" ); |
4410 | return empty_memory; |
4411 | } |
4412 | |
4413 | MergeMemNode::MergeMemNode(Node *new_base) : Node(1+Compile::AliasIdxRaw) { |
4414 | init_class_id(Class_MergeMem); |
4415 | // all inputs are nullified in Node::Node(int) |
4416 | // set_input(0, NULL); // no control input |
4417 | |
4418 | // Initialize the edges uniformly to top, for starters. |
4419 | Node* empty_mem = make_empty_memory(); |
4420 | for (uint i = Compile::AliasIdxTop; i < req(); i++) { |
4421 | init_req(i,empty_mem); |
4422 | } |
4423 | assert(empty_memory() == empty_mem, "" ); |
4424 | |
4425 | if( new_base != NULL && new_base->is_MergeMem() ) { |
4426 | MergeMemNode* mdef = new_base->as_MergeMem(); |
4427 | assert(mdef->empty_memory() == empty_mem, "consistent sentinels" ); |
4428 | for (MergeMemStream mms(this, mdef); mms.next_non_empty2(); ) { |
4429 | mms.set_memory(mms.memory2()); |
4430 | } |
4431 | assert(base_memory() == mdef->base_memory(), "" ); |
4432 | } else { |
4433 | set_base_memory(new_base); |
4434 | } |
4435 | } |
4436 | |
4437 | // Make a new, untransformed MergeMem with the same base as 'mem'. |
4438 | // If mem is itself a MergeMem, populate the result with the same edges. |
4439 | MergeMemNode* MergeMemNode::make(Node* mem) { |
4440 | return new MergeMemNode(mem); |
4441 | } |
4442 | |
4443 | //------------------------------cmp-------------------------------------------- |
4444 | uint MergeMemNode::hash() const { return NO_HASH; } |
4445 | bool MergeMemNode::cmp( const Node &n ) const { |
4446 | return (&n == this); // Always fail except on self |
4447 | } |
4448 | |
4449 | //------------------------------Identity--------------------------------------- |
4450 | Node* MergeMemNode::Identity(PhaseGVN* phase) { |
4451 | // Identity if this merge point does not record any interesting memory |
4452 | // disambiguations. |
4453 | Node* base_mem = base_memory(); |
4454 | Node* empty_mem = empty_memory(); |
4455 | if (base_mem != empty_mem) { // Memory path is not dead? |
4456 | for (uint i = Compile::AliasIdxRaw; i < req(); i++) { |
4457 | Node* mem = in(i); |
4458 | if (mem != empty_mem && mem != base_mem) { |
4459 | return this; // Many memory splits; no change |
4460 | } |
4461 | } |
4462 | } |
4463 | return base_mem; // No memory splits; ID on the one true input |
4464 | } |
4465 | |
4466 | //------------------------------Ideal------------------------------------------ |
4467 | // This method is invoked recursively on chains of MergeMem nodes |
4468 | Node *MergeMemNode::Ideal(PhaseGVN *phase, bool can_reshape) { |
4469 | // Remove chain'd MergeMems |
4470 | // |
4471 | // This is delicate, because the each "in(i)" (i >= Raw) is interpreted |
4472 | // relative to the "in(Bot)". Since we are patching both at the same time, |
4473 | // we have to be careful to read each "in(i)" relative to the old "in(Bot)", |
4474 | // but rewrite each "in(i)" relative to the new "in(Bot)". |
4475 | Node *progress = NULL; |
4476 | |
4477 | |
4478 | Node* old_base = base_memory(); |
4479 | Node* empty_mem = empty_memory(); |
4480 | if (old_base == empty_mem) |
4481 | return NULL; // Dead memory path. |
4482 | |
4483 | MergeMemNode* old_mbase; |
4484 | if (old_base != NULL && old_base->is_MergeMem()) |
4485 | old_mbase = old_base->as_MergeMem(); |
4486 | else |
4487 | old_mbase = NULL; |
4488 | Node* new_base = old_base; |
4489 | |
4490 | // simplify stacked MergeMems in base memory |
4491 | if (old_mbase) new_base = old_mbase->base_memory(); |
4492 | |
4493 | // the base memory might contribute new slices beyond my req() |
4494 | if (old_mbase) grow_to_match(old_mbase); |
4495 | |
4496 | // Look carefully at the base node if it is a phi. |
4497 | PhiNode* phi_base; |
4498 | if (new_base != NULL && new_base->is_Phi()) |
4499 | phi_base = new_base->as_Phi(); |
4500 | else |
4501 | phi_base = NULL; |
4502 | |
4503 | Node* phi_reg = NULL; |
4504 | uint phi_len = (uint)-1; |
4505 | if (phi_base != NULL && !phi_base->is_copy()) { |
4506 | // do not examine phi if degraded to a copy |
4507 | phi_reg = phi_base->region(); |
4508 | phi_len = phi_base->req(); |
4509 | // see if the phi is unfinished |
4510 | for (uint i = 1; i < phi_len; i++) { |
4511 | if (phi_base->in(i) == NULL) { |
4512 | // incomplete phi; do not look at it yet! |
4513 | phi_reg = NULL; |
4514 | phi_len = (uint)-1; |
4515 | break; |
4516 | } |
4517 | } |
4518 | } |
4519 | |
4520 | // Note: We do not call verify_sparse on entry, because inputs |
4521 | // can normalize to the base_memory via subsume_node or similar |
4522 | // mechanisms. This method repairs that damage. |
4523 | |
4524 | assert(!old_mbase || old_mbase->is_empty_memory(empty_mem), "consistent sentinels" ); |
4525 | |
4526 | // Look at each slice. |
4527 | for (uint i = Compile::AliasIdxRaw; i < req(); i++) { |
4528 | Node* old_in = in(i); |
4529 | // calculate the old memory value |
4530 | Node* old_mem = old_in; |
4531 | if (old_mem == empty_mem) old_mem = old_base; |
4532 | assert(old_mem == memory_at(i), "" ); |
4533 | |
4534 | // maybe update (reslice) the old memory value |
4535 | |
4536 | // simplify stacked MergeMems |
4537 | Node* new_mem = old_mem; |
4538 | MergeMemNode* old_mmem; |
4539 | if (old_mem != NULL && old_mem->is_MergeMem()) |
4540 | old_mmem = old_mem->as_MergeMem(); |
4541 | else |
4542 | old_mmem = NULL; |
4543 | if (old_mmem == this) { |
4544 | // This can happen if loops break up and safepoints disappear. |
4545 | // A merge of BotPtr (default) with a RawPtr memory derived from a |
4546 | // safepoint can be rewritten to a merge of the same BotPtr with |
4547 | // the BotPtr phi coming into the loop. If that phi disappears |
4548 | // also, we can end up with a self-loop of the mergemem. |
4549 | // In general, if loops degenerate and memory effects disappear, |
4550 | // a mergemem can be left looking at itself. This simply means |
4551 | // that the mergemem's default should be used, since there is |
4552 | // no longer any apparent effect on this slice. |
4553 | // Note: If a memory slice is a MergeMem cycle, it is unreachable |
4554 | // from start. Update the input to TOP. |
4555 | new_mem = (new_base == this || new_base == empty_mem)? empty_mem : new_base; |
4556 | } |
4557 | else if (old_mmem != NULL) { |
4558 | new_mem = old_mmem->memory_at(i); |
4559 | } |
4560 | // else preceding memory was not a MergeMem |
4561 | |
4562 | // replace equivalent phis (unfortunately, they do not GVN together) |
4563 | if (new_mem != NULL && new_mem != new_base && |
4564 | new_mem->req() == phi_len && new_mem->in(0) == phi_reg) { |
4565 | if (new_mem->is_Phi()) { |
4566 | PhiNode* phi_mem = new_mem->as_Phi(); |
4567 | for (uint i = 1; i < phi_len; i++) { |
4568 | if (phi_base->in(i) != phi_mem->in(i)) { |
4569 | phi_mem = NULL; |
4570 | break; |
4571 | } |
4572 | } |
4573 | if (phi_mem != NULL) { |
4574 | // equivalent phi nodes; revert to the def |
4575 | new_mem = new_base; |
4576 | } |
4577 | } |
4578 | } |
4579 | |
4580 | // maybe store down a new value |
4581 | Node* new_in = new_mem; |
4582 | if (new_in == new_base) new_in = empty_mem; |
4583 | |
4584 | if (new_in != old_in) { |
4585 | // Warning: Do not combine this "if" with the previous "if" |
4586 | // A memory slice might have be be rewritten even if it is semantically |
4587 | // unchanged, if the base_memory value has changed. |
4588 | set_req(i, new_in); |
4589 | progress = this; // Report progress |
4590 | } |
4591 | } |
4592 | |
4593 | if (new_base != old_base) { |
4594 | set_req(Compile::AliasIdxBot, new_base); |
4595 | // Don't use set_base_memory(new_base), because we need to update du. |
4596 | assert(base_memory() == new_base, "" ); |
4597 | progress = this; |
4598 | } |
4599 | |
4600 | if( base_memory() == this ) { |
4601 | // a self cycle indicates this memory path is dead |
4602 | set_req(Compile::AliasIdxBot, empty_mem); |
4603 | } |
4604 | |
4605 | // Resolve external cycles by calling Ideal on a MergeMem base_memory |
4606 | // Recursion must occur after the self cycle check above |
4607 | if( base_memory()->is_MergeMem() ) { |
4608 | MergeMemNode *new_mbase = base_memory()->as_MergeMem(); |
4609 | Node *m = phase->transform(new_mbase); // Rollup any cycles |
4610 | if( m != NULL && |
4611 | (m->is_top() || |
4612 | (m->is_MergeMem() && m->as_MergeMem()->base_memory() == empty_mem)) ) { |
4613 | // propagate rollup of dead cycle to self |
4614 | set_req(Compile::AliasIdxBot, empty_mem); |
4615 | } |
4616 | } |
4617 | |
4618 | if( base_memory() == empty_mem ) { |
4619 | progress = this; |
4620 | // Cut inputs during Parse phase only. |
4621 | // During Optimize phase a dead MergeMem node will be subsumed by Top. |
4622 | if( !can_reshape ) { |
4623 | for (uint i = Compile::AliasIdxRaw; i < req(); i++) { |
4624 | if( in(i) != empty_mem ) { set_req(i, empty_mem); } |
4625 | } |
4626 | } |
4627 | } |
4628 | |
4629 | if( !progress && base_memory()->is_Phi() && can_reshape ) { |
4630 | // Check if PhiNode::Ideal's "Split phis through memory merges" |
4631 | // transform should be attempted. Look for this->phi->this cycle. |
4632 | uint merge_width = req(); |
4633 | if (merge_width > Compile::AliasIdxRaw) { |
4634 | PhiNode* phi = base_memory()->as_Phi(); |
4635 | for( uint i = 1; i < phi->req(); ++i ) {// For all paths in |
4636 | if (phi->in(i) == this) { |
4637 | phase->is_IterGVN()->_worklist.push(phi); |
4638 | break; |
4639 | } |
4640 | } |
4641 | } |
4642 | } |
4643 | |
4644 | assert(progress || verify_sparse(), "please, no dups of base" ); |
4645 | return progress; |
4646 | } |
4647 | |
4648 | //-------------------------set_base_memory------------------------------------- |
4649 | void MergeMemNode::set_base_memory(Node *new_base) { |
4650 | Node* empty_mem = empty_memory(); |
4651 | set_req(Compile::AliasIdxBot, new_base); |
4652 | assert(memory_at(req()) == new_base, "must set default memory" ); |
4653 | // Clear out other occurrences of new_base: |
4654 | if (new_base != empty_mem) { |
4655 | for (uint i = Compile::AliasIdxRaw; i < req(); i++) { |
4656 | if (in(i) == new_base) set_req(i, empty_mem); |
4657 | } |
4658 | } |
4659 | } |
4660 | |
4661 | //------------------------------out_RegMask------------------------------------ |
4662 | const RegMask &MergeMemNode::out_RegMask() const { |
4663 | return RegMask::Empty; |
4664 | } |
4665 | |
4666 | //------------------------------dump_spec-------------------------------------- |
4667 | #ifndef PRODUCT |
4668 | void MergeMemNode::dump_spec(outputStream *st) const { |
4669 | st->print(" {" ); |
4670 | Node* base_mem = base_memory(); |
4671 | for( uint i = Compile::AliasIdxRaw; i < req(); i++ ) { |
4672 | Node* mem = (in(i) != NULL) ? memory_at(i) : base_mem; |
4673 | if (mem == base_mem) { st->print(" -" ); continue; } |
4674 | st->print( " N%d:" , mem->_idx ); |
4675 | Compile::current()->get_adr_type(i)->dump_on(st); |
4676 | } |
4677 | st->print(" }" ); |
4678 | } |
4679 | #endif // !PRODUCT |
4680 | |
4681 | |
4682 | #ifdef ASSERT |
4683 | static bool might_be_same(Node* a, Node* b) { |
4684 | if (a == b) return true; |
4685 | if (!(a->is_Phi() || b->is_Phi())) return false; |
4686 | // phis shift around during optimization |
4687 | return true; // pretty stupid... |
4688 | } |
4689 | |
4690 | // verify a narrow slice (either incoming or outgoing) |
4691 | static void verify_memory_slice(const MergeMemNode* m, int alias_idx, Node* n) { |
4692 | if (!VerifyAliases) return; // don't bother to verify unless requested |
4693 | if (VMError::is_error_reported()) return; // muzzle asserts when debugging an error |
4694 | if (Node::in_dump()) return; // muzzle asserts when printing |
4695 | assert(alias_idx >= Compile::AliasIdxRaw, "must not disturb base_memory or sentinel" ); |
4696 | assert(n != NULL, "" ); |
4697 | // Elide intervening MergeMem's |
4698 | while (n->is_MergeMem()) { |
4699 | n = n->as_MergeMem()->memory_at(alias_idx); |
4700 | } |
4701 | Compile* C = Compile::current(); |
4702 | const TypePtr* n_adr_type = n->adr_type(); |
4703 | if (n == m->empty_memory()) { |
4704 | // Implicit copy of base_memory() |
4705 | } else if (n_adr_type != TypePtr::BOTTOM) { |
4706 | assert(n_adr_type != NULL, "new memory must have a well-defined adr_type" ); |
4707 | assert(C->must_alias(n_adr_type, alias_idx), "new memory must match selected slice" ); |
4708 | } else { |
4709 | // A few places like make_runtime_call "know" that VM calls are narrow, |
4710 | // and can be used to update only the VM bits stored as TypeRawPtr::BOTTOM. |
4711 | bool expected_wide_mem = false; |
4712 | if (n == m->base_memory()) { |
4713 | expected_wide_mem = true; |
4714 | } else if (alias_idx == Compile::AliasIdxRaw || |
4715 | n == m->memory_at(Compile::AliasIdxRaw)) { |
4716 | expected_wide_mem = true; |
4717 | } else if (!C->alias_type(alias_idx)->is_rewritable()) { |
4718 | // memory can "leak through" calls on channels that |
4719 | // are write-once. Allow this also. |
4720 | expected_wide_mem = true; |
4721 | } |
4722 | assert(expected_wide_mem, "expected narrow slice replacement" ); |
4723 | } |
4724 | } |
4725 | #else // !ASSERT |
4726 | #define verify_memory_slice(m,i,n) (void)(0) // PRODUCT version is no-op |
4727 | #endif |
4728 | |
4729 | |
4730 | //-----------------------------memory_at--------------------------------------- |
4731 | Node* MergeMemNode::memory_at(uint alias_idx) const { |
4732 | assert(alias_idx >= Compile::AliasIdxRaw || |
4733 | alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0, |
4734 | "must avoid base_memory and AliasIdxTop" ); |
4735 | |
4736 | // Otherwise, it is a narrow slice. |
4737 | Node* n = alias_idx < req() ? in(alias_idx) : empty_memory(); |
4738 | Compile *C = Compile::current(); |
4739 | if (is_empty_memory(n)) { |
4740 | // the array is sparse; empty slots are the "top" node |
4741 | n = base_memory(); |
4742 | assert(Node::in_dump() |
4743 | || n == NULL || n->bottom_type() == Type::TOP |
4744 | || n->adr_type() == NULL // address is TOP |
4745 | || n->adr_type() == TypePtr::BOTTOM |
4746 | || n->adr_type() == TypeRawPtr::BOTTOM |
4747 | || Compile::current()->AliasLevel() == 0, |
4748 | "must be a wide memory" ); |
4749 | // AliasLevel == 0 if we are organizing the memory states manually. |
4750 | // See verify_memory_slice for comments on TypeRawPtr::BOTTOM. |
4751 | } else { |
4752 | // make sure the stored slice is sane |
4753 | #ifdef ASSERT |
4754 | if (VMError::is_error_reported() || Node::in_dump()) { |
4755 | } else if (might_be_same(n, base_memory())) { |
4756 | // Give it a pass: It is a mostly harmless repetition of the base. |
4757 | // This can arise normally from node subsumption during optimization. |
4758 | } else { |
4759 | verify_memory_slice(this, alias_idx, n); |
4760 | } |
4761 | #endif |
4762 | } |
4763 | return n; |
4764 | } |
4765 | |
4766 | //---------------------------set_memory_at------------------------------------- |
4767 | void MergeMemNode::set_memory_at(uint alias_idx, Node *n) { |
4768 | verify_memory_slice(this, alias_idx, n); |
4769 | Node* empty_mem = empty_memory(); |
4770 | if (n == base_memory()) n = empty_mem; // collapse default |
4771 | uint need_req = alias_idx+1; |
4772 | if (req() < need_req) { |
4773 | if (n == empty_mem) return; // already the default, so do not grow me |
4774 | // grow the sparse array |
4775 | do { |
4776 | add_req(empty_mem); |
4777 | } while (req() < need_req); |
4778 | } |
4779 | set_req( alias_idx, n ); |
4780 | } |
4781 | |
4782 | |
4783 | |
4784 | //--------------------------iteration_setup------------------------------------ |
4785 | void MergeMemNode::iteration_setup(const MergeMemNode* other) { |
4786 | if (other != NULL) { |
4787 | grow_to_match(other); |
4788 | // invariant: the finite support of mm2 is within mm->req() |
4789 | #ifdef ASSERT |
4790 | for (uint i = req(); i < other->req(); i++) { |
4791 | assert(other->is_empty_memory(other->in(i)), "slice left uncovered" ); |
4792 | } |
4793 | #endif |
4794 | } |
4795 | // Replace spurious copies of base_memory by top. |
4796 | Node* base_mem = base_memory(); |
4797 | if (base_mem != NULL && !base_mem->is_top()) { |
4798 | for (uint i = Compile::AliasIdxBot+1, imax = req(); i < imax; i++) { |
4799 | if (in(i) == base_mem) |
4800 | set_req(i, empty_memory()); |
4801 | } |
4802 | } |
4803 | } |
4804 | |
4805 | //---------------------------grow_to_match------------------------------------- |
4806 | void MergeMemNode::grow_to_match(const MergeMemNode* other) { |
4807 | Node* empty_mem = empty_memory(); |
4808 | assert(other->is_empty_memory(empty_mem), "consistent sentinels" ); |
4809 | // look for the finite support of the other memory |
4810 | for (uint i = other->req(); --i >= req(); ) { |
4811 | if (other->in(i) != empty_mem) { |
4812 | uint new_len = i+1; |
4813 | while (req() < new_len) add_req(empty_mem); |
4814 | break; |
4815 | } |
4816 | } |
4817 | } |
4818 | |
4819 | //---------------------------verify_sparse------------------------------------- |
4820 | #ifndef PRODUCT |
4821 | bool MergeMemNode::verify_sparse() const { |
4822 | assert(is_empty_memory(make_empty_memory()), "sane sentinel" ); |
4823 | Node* base_mem = base_memory(); |
4824 | // The following can happen in degenerate cases, since empty==top. |
4825 | if (is_empty_memory(base_mem)) return true; |
4826 | for (uint i = Compile::AliasIdxRaw; i < req(); i++) { |
4827 | assert(in(i) != NULL, "sane slice" ); |
4828 | if (in(i) == base_mem) return false; // should have been the sentinel value! |
4829 | } |
4830 | return true; |
4831 | } |
4832 | |
4833 | bool MergeMemStream::match_memory(Node* mem, const MergeMemNode* mm, int idx) { |
4834 | Node* n; |
4835 | n = mm->in(idx); |
4836 | if (mem == n) return true; // might be empty_memory() |
4837 | n = (idx == Compile::AliasIdxBot)? mm->base_memory(): mm->memory_at(idx); |
4838 | if (mem == n) return true; |
4839 | while (n->is_Phi() && (n = n->as_Phi()->is_copy()) != NULL) { |
4840 | if (mem == n) return true; |
4841 | if (n == NULL) break; |
4842 | } |
4843 | return false; |
4844 | } |
4845 | #endif // !PRODUCT |
4846 | |