1/*
2 * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24#include "precompiled.hpp"
25#include "opto/castnode.hpp"
26#include "opto/compile.hpp"
27#include "opto/escape.hpp"
28#include "opto/graphKit.hpp"
29#include "opto/loopnode.hpp"
30#include "opto/machnode.hpp"
31#include "opto/macro.hpp"
32#include "opto/memnode.hpp"
33#include "opto/movenode.hpp"
34#include "opto/node.hpp"
35#include "opto/phase.hpp"
36#include "opto/phaseX.hpp"
37#include "opto/rootnode.hpp"
38#include "opto/type.hpp"
39#include "utilities/copy.hpp"
40#include "utilities/growableArray.hpp"
41#include "utilities/macros.hpp"
42#include "gc/z/zBarrierSet.hpp"
43#include "gc/z/c2/zBarrierSetC2.hpp"
44#include "gc/z/zThreadLocalData.hpp"
45#include "gc/z/zBarrierSetRuntime.hpp"
46
47ZBarrierSetC2State::ZBarrierSetC2State(Arena* comp_arena) :
48 _load_barrier_nodes(new (comp_arena) GrowableArray<LoadBarrierNode*>(comp_arena, 8, 0, NULL)) {}
49
50int ZBarrierSetC2State::load_barrier_count() const {
51 return _load_barrier_nodes->length();
52}
53
54void ZBarrierSetC2State::add_load_barrier_node(LoadBarrierNode * n) {
55 assert(!_load_barrier_nodes->contains(n), " duplicate entry in expand list");
56 _load_barrier_nodes->append(n);
57}
58
59void ZBarrierSetC2State::remove_load_barrier_node(LoadBarrierNode * n) {
60 // this function may be called twice for a node so check
61 // that the node is in the array before attempting to remove it
62 if (_load_barrier_nodes->contains(n)) {
63 _load_barrier_nodes->remove(n);
64 }
65}
66
67LoadBarrierNode* ZBarrierSetC2State::load_barrier_node(int idx) const {
68 return _load_barrier_nodes->at(idx);
69}
70
71void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
72 return new(comp_arena) ZBarrierSetC2State(comp_arena);
73}
74
75ZBarrierSetC2State* ZBarrierSetC2::state() const {
76 return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
77}
78
79bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const {
80 // 1. This step follows potential oop projections of a load barrier before expansion
81 if (node->is_Proj()) {
82 node = node->in(0);
83 }
84
85 // 2. This step checks for unexpanded load barriers
86 if (node->is_LoadBarrier()) {
87 return true;
88 }
89
90 // 3. This step checks for the phi corresponding to an optimized load barrier expansion
91 if (node->is_Phi()) {
92 PhiNode* phi = node->as_Phi();
93 Node* n = phi->in(1);
94 if (n != NULL && n->is_LoadBarrierSlowReg()) {
95 return true;
96 }
97 }
98
99 return false;
100}
101
102void ZBarrierSetC2::register_potential_barrier_node(Node* node) const {
103 if (node->is_LoadBarrier()) {
104 state()->add_load_barrier_node(node->as_LoadBarrier());
105 }
106}
107
108void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const {
109 if (node->is_LoadBarrier()) {
110 state()->remove_load_barrier_node(node->as_LoadBarrier());
111 }
112}
113
114void ZBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {
115 // Remove useless LoadBarrier nodes
116 ZBarrierSetC2State* s = state();
117 for (int i = s->load_barrier_count()-1; i >= 0; i--) {
118 LoadBarrierNode* n = s->load_barrier_node(i);
119 if (!useful.member(n)) {
120 unregister_potential_barrier_node(n);
121 }
122 }
123}
124
125void ZBarrierSetC2::enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {
126 if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) {
127 igvn->_worklist.push(node);
128 }
129}
130
131const uint NoBarrier = 0;
132const uint RequireBarrier = 1;
133const uint WeakBarrier = 2;
134const uint ExpandedBarrier = 4;
135
136static bool load_require_barrier(LoadNode* load) { return (load->barrier_data() & RequireBarrier) == RequireBarrier; }
137static bool load_has_weak_barrier(LoadNode* load) { return (load->barrier_data() & WeakBarrier) == WeakBarrier; }
138static bool load_has_expanded_barrier(LoadNode* load) { return (load->barrier_data() & ExpandedBarrier) == ExpandedBarrier; }
139static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); }
140
141static void load_set_barrier(LoadNode* load, bool weak) {
142 if (weak) {
143 load->set_barrier_data(RequireBarrier | WeakBarrier);
144 } else {
145 load->set_barrier_data(RequireBarrier);
146 }
147}
148
149// == LoadBarrierNode ==
150
151LoadBarrierNode::LoadBarrierNode(Compile* C,
152 Node* c,
153 Node* mem,
154 Node* val,
155 Node* adr,
156 bool weak) :
157 MultiNode(Number_of_Inputs),
158 _weak(weak) {
159 init_req(Control, c);
160 init_req(Memory, mem);
161 init_req(Oop, val);
162 init_req(Address, adr);
163 init_req(Similar, C->top());
164
165 init_class_id(Class_LoadBarrier);
166 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
167 bs->register_potential_barrier_node(this);
168}
169
170uint LoadBarrierNode::size_of() const {
171 return sizeof(*this);
172}
173
174bool LoadBarrierNode::cmp(const Node& n) const {
175 ShouldNotReachHere();
176 return false;
177}
178
179const Type *LoadBarrierNode::bottom_type() const {
180 const Type** floadbarrier = (const Type **)(Compile::current()->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
181 Node* in_oop = in(Oop);
182 floadbarrier[Control] = Type::CONTROL;
183 floadbarrier[Memory] = Type::MEMORY;
184 floadbarrier[Oop] = in_oop == NULL ? Type::TOP : in_oop->bottom_type();
185 return TypeTuple::make(Number_of_Outputs, floadbarrier);
186}
187
188const TypePtr* LoadBarrierNode::adr_type() const {
189 ShouldNotReachHere();
190 return NULL;
191}
192
193const Type *LoadBarrierNode::Value(PhaseGVN *phase) const {
194 const Type** floadbarrier = (const Type **)(phase->C->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
195 const Type* val_t = phase->type(in(Oop));
196 floadbarrier[Control] = Type::CONTROL;
197 floadbarrier[Memory] = Type::MEMORY;
198 floadbarrier[Oop] = val_t;
199 return TypeTuple::make(Number_of_Outputs, floadbarrier);
200}
201
202bool LoadBarrierNode::is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n) {
203 if (phase != NULL) {
204 return phase->is_dominator(d, n);
205 }
206
207 for (int i = 0; i < 10 && n != NULL; i++) {
208 n = IfNode::up_one_dom(n, linear_only);
209 if (n == d) {
210 return true;
211 }
212 }
213
214 return false;
215}
216
217LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) {
218 if (is_weak()) {
219 // Weak barriers can't be eliminated
220 return NULL;
221 }
222
223 Node* val = in(LoadBarrierNode::Oop);
224 if (in(Similar)->is_Proj() && in(Similar)->in(0)->is_LoadBarrier()) {
225 LoadBarrierNode* lb = in(Similar)->in(0)->as_LoadBarrier();
226 assert(lb->in(Address) == in(Address), "");
227 // Load barrier on Similar edge dominates so if it now has the Oop field it can replace this barrier.
228 if (lb->in(Oop) == in(Oop)) {
229 return lb;
230 }
231 // Follow chain of load barrier through Similar edges
232 while (!lb->in(Similar)->is_top()) {
233 lb = lb->in(Similar)->in(0)->as_LoadBarrier();
234 assert(lb->in(Address) == in(Address), "");
235 }
236 if (lb != in(Similar)->in(0)) {
237 return lb;
238 }
239 }
240 for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
241 Node* u = val->fast_out(i);
242 if (u != this && u->is_LoadBarrier() && u->in(Oop) == val && u->as_LoadBarrier()->has_true_uses()) {
243 Node* this_ctrl = in(LoadBarrierNode::Control);
244 Node* other_ctrl = u->in(LoadBarrierNode::Control);
245 if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
246 return u->as_LoadBarrier();
247 }
248 }
249 }
250
251 if (can_be_eliminated()) {
252 return NULL;
253 }
254
255 if (!look_for_similar) {
256 return NULL;
257 }
258
259 Node* addr = in(LoadBarrierNode::Address);
260 for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
261 Node* u = addr->fast_out(i);
262 if (u != this && u->is_LoadBarrier() && u->as_LoadBarrier()->has_true_uses()) {
263 Node* this_ctrl = in(LoadBarrierNode::Control);
264 Node* other_ctrl = u->in(LoadBarrierNode::Control);
265 if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
266 ResourceMark rm;
267 Unique_Node_List wq;
268 wq.push(in(LoadBarrierNode::Control));
269 bool ok = true;
270 bool dom_found = false;
271 for (uint next = 0; next < wq.size(); ++next) {
272 Node *n = wq.at(next);
273 if (n->is_top()) {
274 return NULL;
275 }
276 assert(n->is_CFG(), "");
277 if (n->is_SafePoint()) {
278 ok = false;
279 break;
280 }
281 if (n == u) {
282 dom_found = true;
283 continue;
284 }
285 if (n->is_Region()) {
286 for (uint i = 1; i < n->req(); i++) {
287 Node* m = n->in(i);
288 if (m != NULL) {
289 wq.push(m);
290 }
291 }
292 } else {
293 Node* m = n->in(0);
294 if (m != NULL) {
295 wq.push(m);
296 }
297 }
298 }
299 if (ok) {
300 assert(dom_found, "");
301 return u->as_LoadBarrier();
302 }
303 break;
304 }
305 }
306 }
307
308 return NULL;
309}
310
311void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const {
312 // Change to that barrier may affect a dominated barrier so re-push those
313 assert(!is_weak(), "sanity");
314 Node* val = in(LoadBarrierNode::Oop);
315
316 for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
317 Node* u = val->fast_out(i);
318 if (u != this && u->is_LoadBarrier() && u->in(Oop) == val) {
319 Node* this_ctrl = in(Control);
320 Node* other_ctrl = u->in(Control);
321 if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
322 igvn->_worklist.push(u);
323 }
324 }
325
326 Node* addr = in(LoadBarrierNode::Address);
327 for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
328 Node* u = addr->fast_out(i);
329 if (u != this && u->is_LoadBarrier() && u->in(Similar)->is_top()) {
330 Node* this_ctrl = in(Control);
331 Node* other_ctrl = u->in(Control);
332 if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
333 igvn->_worklist.push(u);
334 }
335 }
336 }
337 }
338}
339
340Node *LoadBarrierNode::Identity(PhaseGVN *phase) {
341 LoadBarrierNode* dominating_barrier = has_dominating_barrier(NULL, true, false);
342 if (dominating_barrier != NULL) {
343 assert(!is_weak(), "Weak barriers cant be eliminated");
344 assert(dominating_barrier->in(Oop) == in(Oop), "");
345 return dominating_barrier;
346 }
347
348 return this;
349}
350
351Node *LoadBarrierNode::Ideal(PhaseGVN *phase, bool can_reshape) {
352 if (remove_dead_region(phase, can_reshape)) {
353 return this;
354 }
355
356 Node *val = in(Oop);
357 Node *mem = in(Memory);
358 Node *ctrl = in(Control);
359
360 assert(val->Opcode() != Op_LoadN, "");
361 assert(val->Opcode() != Op_DecodeN, "");
362
363 if (mem->is_MergeMem()) {
364 Node *new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
365 set_req(Memory, new_mem);
366 if (mem->outcnt() == 0 && can_reshape) {
367 phase->is_IterGVN()->_worklist.push(mem);
368 }
369 return this;
370 }
371
372 LoadBarrierNode *dominating_barrier = NULL;
373 if (!is_weak()) {
374 dominating_barrier = has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress());
375 if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) {
376 assert(in(Address) == dominating_barrier->in(Address), "");
377 set_req(Similar, dominating_barrier->proj_out(Oop));
378 return this;
379 }
380 }
381
382 bool eliminate = can_reshape && (dominating_barrier != NULL || !has_true_uses());
383 if (eliminate) {
384 if (can_reshape) {
385 PhaseIterGVN* igvn = phase->is_IterGVN();
386 Node* out_ctrl = proj_out_or_null(Control);
387 Node* out_res = proj_out_or_null(Oop);
388
389 if (out_ctrl != NULL) {
390 igvn->replace_node(out_ctrl, ctrl);
391 }
392
393 // That transformation may cause the Similar edge on the load barrier to be invalid
394 fix_similar_in_uses(igvn);
395 if (out_res != NULL) {
396 if (dominating_barrier != NULL) {
397 assert(!is_weak(), "Sanity");
398 igvn->replace_node(out_res, dominating_barrier->proj_out(Oop));
399 } else {
400 igvn->replace_node(out_res, val);
401 }
402 }
403 }
404 return new ConINode(TypeInt::ZERO);
405 }
406
407 // If the Similar edge is no longer a load barrier, clear it
408 Node* similar = in(Similar);
409 if (!similar->is_top() && !(similar->is_Proj() && similar->in(0)->is_LoadBarrier())) {
410 set_req(Similar, phase->C->top());
411 return this;
412 }
413
414 if (can_reshape && !is_weak()) {
415 // If this barrier is linked through the Similar edge by a
416 // dominated barrier and both barriers have the same Oop field,
417 // the dominated barrier can go away, so push it for reprocessing.
418 // We also want to avoid a barrier to depend on another dominating
419 // barrier through its Similar edge that itself depend on another
420 // barrier through its Similar edge and rather have the first
421 // depend on the third.
422 PhaseIterGVN* igvn = phase->is_IterGVN();
423 Node* out_res = proj_out(Oop);
424 for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
425 Node* u = out_res->fast_out(i);
426 if (u->is_LoadBarrier() && u->in(Similar) == out_res &&
427 (u->in(Oop) == val || !u->in(Similar)->is_top())) {
428 assert(!u->as_LoadBarrier()->is_weak(), "Sanity");
429 igvn->_worklist.push(u);
430 }
431 }
432 push_dominated_barriers(igvn);
433 }
434
435 return NULL;
436}
437
438uint LoadBarrierNode::match_edge(uint idx) const {
439 ShouldNotReachHere();
440 return 0;
441}
442
443void LoadBarrierNode::fix_similar_in_uses(PhaseIterGVN* igvn) {
444 Node* out_res = proj_out_or_null(Oop);
445 if (out_res == NULL) {
446 return;
447 }
448
449 for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
450 Node* u = out_res->fast_out(i);
451 if (u->is_LoadBarrier() && u->in(Similar) == out_res) {
452 igvn->replace_input_of(u, Similar, igvn->C->top());
453 --i;
454 --imax;
455 }
456 }
457}
458
459bool LoadBarrierNode::has_true_uses() const {
460 Node* out_res = proj_out_or_null(Oop);
461 if (out_res != NULL) {
462 for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
463 Node *u = out_res->fast_out(i);
464 if (!u->is_LoadBarrier() || u->in(Similar) != out_res) {
465 return true;
466 }
467 }
468 }
469 return false;
470}
471
472static bool barrier_needed(C2Access& access) {
473 return ZBarrierSet::barrier_needed(access.decorators(), access.type());
474}
475
476Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
477 Node* p = BarrierSetC2::load_at_resolved(access, val_type);
478 if (!barrier_needed(access)) {
479 return p;
480 }
481
482 bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0;
483 if (p->isa_Load()) {
484 load_set_barrier(p->as_Load(), weak);
485 }
486 return p;
487}
488
489Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
490 Node* new_val, const Type* val_type) const {
491 Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
492 LoadStoreNode* lsn = result->as_LoadStore();
493 if (barrier_needed(access)) {
494 lsn->set_has_barrier();
495 }
496 return lsn;
497}
498
499Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
500 Node* new_val, const Type* value_type) const {
501 Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
502 LoadStoreNode* lsn = result->as_LoadStore();
503 if (barrier_needed(access)) {
504 lsn->set_has_barrier();
505 }
506 return lsn;
507}
508
509Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
510 Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
511 LoadStoreNode* lsn = result->as_LoadStore();
512 if (barrier_needed(access)) {
513 lsn->set_has_barrier();
514 }
515 return lsn;
516}
517
518// == Macro Expansion ==
519
520// Optimized, low spill, loadbarrier variant using stub specialized on register used
521void ZBarrierSetC2::expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const {
522 PhaseIterGVN &igvn = phase->igvn();
523 float unlikely = PROB_UNLIKELY(0.999);
524
525 Node* in_ctrl = barrier->in(LoadBarrierNode::Control);
526 Node* in_mem = barrier->in(LoadBarrierNode::Memory);
527 Node* in_val = barrier->in(LoadBarrierNode::Oop);
528 Node* in_adr = barrier->in(LoadBarrierNode::Address);
529
530 Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control);
531 Node* out_res = barrier->proj_out(LoadBarrierNode::Oop);
532
533 assert(barrier->in(LoadBarrierNode::Oop) != NULL, "oop to loadbarrier node cannot be null");
534
535 Node* jthread = igvn.transform(new ThreadLocalNode());
536 Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset()));
537 Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr,
538 TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(),
539 MemNode::unordered));
540 Node* cast = igvn.transform(new CastP2XNode(in_ctrl, in_val));
541 Node* obj_masked = igvn.transform(new AndXNode(cast, bad_mask));
542 Node* cmp = igvn.transform(new CmpXNode(obj_masked, igvn.zerocon(TypeX_X->basic_type())));
543 Node *bol = igvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool();
544 IfNode* iff = igvn.transform(new IfNode(in_ctrl, bol, unlikely, COUNT_UNKNOWN))->as_If();
545 Node* then = igvn.transform(new IfTrueNode(iff));
546 Node* elsen = igvn.transform(new IfFalseNode(iff));
547
548 Node* new_loadp = igvn.transform(new LoadBarrierSlowRegNode(then, in_adr, in_val,
549 (const TypePtr*) in_val->bottom_type(), barrier->is_weak()));
550
551 // Create the final region/phi pair to converge cntl/data paths to downstream code
552 Node* result_region = igvn.transform(new RegionNode(3));
553 result_region->set_req(1, then);
554 result_region->set_req(2, elsen);
555
556 Node* result_phi = igvn.transform(new PhiNode(result_region, TypeInstPtr::BOTTOM));
557 result_phi->set_req(1, new_loadp);
558 result_phi->set_req(2, barrier->in(LoadBarrierNode::Oop));
559
560
561 igvn.replace_node(out_ctrl, result_region);
562 igvn.replace_node(out_res, result_phi);
563
564 assert(barrier->outcnt() == 0,"LoadBarrier macro node has non-null outputs after expansion!");
565
566 igvn.remove_dead_node(barrier);
567 igvn.remove_dead_node(out_ctrl);
568 igvn.remove_dead_node(out_res);
569
570 assert(is_gc_barrier_node(result_phi), "sanity");
571 assert(step_over_gc_barrier(result_phi) == in_val, "sanity");
572
573 phase->C->print_method(PHASE_BARRIER_EXPANSION, 4, barrier->_idx);
574}
575
576bool ZBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
577 ZBarrierSetC2State* s = state();
578 if (s->load_barrier_count() > 0) {
579 PhaseMacroExpand macro(igvn);
580
581 int skipped = 0;
582 while (s->load_barrier_count() > skipped) {
583 int load_barrier_count = s->load_barrier_count();
584 LoadBarrierNode * n = s->load_barrier_node(load_barrier_count-1-skipped);
585 if (igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())) {
586 // Node is unreachable, so don't try to expand it
587 s->remove_load_barrier_node(n);
588 continue;
589 }
590 if (!n->can_be_eliminated()) {
591 skipped++;
592 continue;
593 }
594 expand_loadbarrier_node(&macro, n);
595 assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
596 if (C->failing()) {
597 return true;
598 }
599 }
600 while (s->load_barrier_count() > 0) {
601 int load_barrier_count = s->load_barrier_count();
602 LoadBarrierNode* n = s->load_barrier_node(load_barrier_count - 1);
603 assert(!(igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())), "should have been processed already");
604 assert(!n->can_be_eliminated(), "should have been processed already");
605 expand_loadbarrier_node(&macro, n);
606 assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
607 if (C->failing()) {
608 return true;
609 }
610 }
611 igvn.set_delay_transform(false);
612 igvn.optimize();
613 if (C->failing()) {
614 return true;
615 }
616 }
617
618 return false;
619}
620
621Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const {
622 Node* node = c;
623
624 // 1. This step follows potential oop projections of a load barrier before expansion
625 if (node->is_Proj()) {
626 node = node->in(0);
627 }
628
629 // 2. This step checks for unexpanded load barriers
630 if (node->is_LoadBarrier()) {
631 return node->in(LoadBarrierNode::Oop);
632 }
633
634 // 3. This step checks for the phi corresponding to an optimized load barrier expansion
635 if (node->is_Phi()) {
636 PhiNode* phi = node->as_Phi();
637 Node* n = phi->in(1);
638 if (n != NULL && n->is_LoadBarrierSlowReg()) {
639 assert(c == node, "projections from step 1 should only be seen before macro expansion");
640 return phi->in(2);
641 }
642 }
643
644 return c;
645}
646
647Node* ZBarrierSetC2::step_over_gc_barrier_ctrl(Node* c) const {
648 Node* node = c;
649
650 // 1. This step follows potential ctrl projections of a load barrier before expansion
651 if (node->is_Proj()) {
652 node = node->in(0);
653 }
654
655 // 2. This step checks for unexpanded load barriers
656 if (node->is_LoadBarrier()) {
657 return node->in(LoadBarrierNode::Control);
658 }
659
660 return c;
661}
662
663bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const {
664 return type == T_OBJECT || type == T_ARRAY;
665}
666
667bool ZBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode) const {
668 switch (opcode) {
669 case Op_LoadBarrier:
670 assert(0, "There should be no load barriers left");
671 case Op_ZGetAndSetP:
672 case Op_ZCompareAndExchangeP:
673 case Op_ZCompareAndSwapP:
674 case Op_ZWeakCompareAndSwapP:
675#ifdef ASSERT
676 if (VerifyOptoOopOffsets) {
677 MemNode *mem = n->as_Mem();
678 // Check to see if address types have grounded out somehow.
679 const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
680 ciInstanceKlass *k = tp->klass()->as_instance_klass();
681 bool oop_offset_is_sane = k->contains_field_offset(tp->offset());
682 assert(!tp || oop_offset_is_sane, "");
683 }
684#endif
685 return true;
686 default:
687 return false;
688 }
689}
690
691bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const {
692 switch(opcode) {
693 case Op_CallLeaf:
694 if (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() ||
695 n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr()) {
696 mem_op = true;
697 mem_addr_idx = TypeFunc::Parms + 1;
698 return true;
699 }
700 return false;
701 default:
702 return false;
703 }
704}
705
706bool ZBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const {
707 switch(opcode) {
708 case Op_ZCompareAndExchangeP:
709 case Op_ZCompareAndSwapP:
710 case Op_ZWeakCompareAndSwapP: {
711 Node *mem = n->in(MemNode::Address);
712 Node *keepalive = n->in(5);
713 Node *pair1 = new BinaryNode(mem, keepalive);
714
715 Node *newval = n->in(MemNode::ValueIn);
716 Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
717 Node *pair2 = new BinaryNode(oldval, newval);
718
719 n->set_req(MemNode::Address, pair1);
720 n->set_req(MemNode::ValueIn, pair2);
721 n->del_req(5);
722 n->del_req(LoadStoreConditionalNode::ExpectedIn);
723 return true;
724 }
725 case Op_ZGetAndSetP: {
726 Node *keepalive = n->in(4);
727 Node *newval = n->in(MemNode::ValueIn);
728 Node *pair = new BinaryNode(newval, keepalive);
729 n->set_req(MemNode::ValueIn, pair);
730 n->del_req(4);
731 return true;
732 }
733
734 default:
735 return false;
736 }
737}
738
739// == Verification ==
740
741#ifdef ASSERT
742
743static bool look_for_barrier(Node* n, bool post_parse, VectorSet& visited) {
744 if (visited.test_set(n->_idx)) {
745 return true;
746 }
747
748 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
749 Node* u = n->fast_out(i);
750 if (u->is_LoadBarrier()) {
751 } else if ((u->is_Phi() || u->is_CMove()) && !post_parse) {
752 if (!look_for_barrier(u, post_parse, visited)) {
753 return false;
754 }
755 } else if (u->Opcode() == Op_EncodeP || u->Opcode() == Op_DecodeN) {
756 if (!look_for_barrier(u, post_parse, visited)) {
757 return false;
758 }
759 } else if (u->Opcode() != Op_SCMemProj) {
760 tty->print("bad use"); u->dump();
761 return false;
762 }
763 }
764
765 return true;
766}
767
768void ZBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
769 switch(phase) {
770 case BarrierSetC2::BeforeOptimize:
771 case BarrierSetC2::BeforeLateInsertion:
772 assert(state()->load_barrier_count() == 0, "No barriers inserted yet");
773 break;
774 case BarrierSetC2::BeforeMacroExpand:
775 // Barrier placement should be set by now.
776 verify_gc_barriers(false /*post_parse*/);
777 break;
778 case BarrierSetC2::BeforeCodeGen:
779 // Barriers has been fully expanded.
780 assert(state()->load_barrier_count() == 0, "No more macro barriers");
781 break;
782 default:
783 assert(0, "Phase without verification");
784 }
785}
786
787// post_parse implies that there might be load barriers without uses after parsing
788// That only applies when adding barriers at parse time.
789void ZBarrierSetC2::verify_gc_barriers(bool post_parse) const {
790 ZBarrierSetC2State* s = state();
791 Compile* C = Compile::current();
792 ResourceMark rm;
793 VectorSet visited(Thread::current()->resource_area());
794
795 for (int i = 0; i < s->load_barrier_count(); i++) {
796 LoadBarrierNode* n = s->load_barrier_node(i);
797
798 // The dominating barrier on the same address if it exists and
799 // this barrier must not be applied on the value from the same
800 // load otherwise the value is not reloaded before it's used the
801 // second time.
802 assert(n->in(LoadBarrierNode::Similar)->is_top() ||
803 (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
804 n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Address) == n->in(LoadBarrierNode::Address) &&
805 n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Oop) != n->in(LoadBarrierNode::Oop)),
806 "broken similar edge");
807
808 assert(n->as_LoadBarrier()->has_true_uses(),
809 "found unneeded load barrier");
810
811 // Several load barrier nodes chained through their Similar edge
812 // break the code that remove the barriers in final graph reshape.
813 assert(n->in(LoadBarrierNode::Similar)->is_top() ||
814 (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
815 n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Similar)->is_top()),
816 "chain of Similar load barriers");
817
818 if (!n->in(LoadBarrierNode::Similar)->is_top()) {
819 ResourceMark rm;
820 Unique_Node_List wq;
821 Node* other = n->in(LoadBarrierNode::Similar)->in(0);
822 wq.push(n);
823 for (uint next = 0; next < wq.size(); ++next) {
824 Node *nn = wq.at(next);
825 assert(nn->is_CFG(), "");
826 assert(!nn->is_SafePoint(), "");
827
828 if (nn == other) {
829 continue;
830 }
831
832 if (nn->is_Region()) {
833 for (uint i = 1; i < nn->req(); i++) {
834 Node* m = nn->in(i);
835 if (m != NULL) {
836 wq.push(m);
837 }
838 }
839 } else {
840 Node* m = nn->in(0);
841 if (m != NULL) {
842 wq.push(m);
843 }
844 }
845 }
846 }
847 }
848}
849
850#endif // end verification code
851
852static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl);
853
854// This code is cloning all uses of a load that is between a call and the catch blocks,
855// to each use.
856
857static bool fixup_uses_in_catch(PhaseIdealLoop *phase, Node *start_ctrl, Node *node) {
858
859 if (!phase->has_ctrl(node)) {
860 // This node is floating - doesn't need to be cloned.
861 assert(node != start_ctrl, "check");
862 return false;
863 }
864
865 Node* ctrl = phase->get_ctrl(node);
866 if (ctrl != start_ctrl) {
867 // We are in a successor block - the node is ok.
868 return false; // Unwind
869 }
870
871 // Process successor nodes
872 int outcnt = node->outcnt();
873 for (int i = 0; i < outcnt; i++) {
874 Node* n = node->raw_out(0);
875 assert(!n->is_LoadBarrier(), "Sanity");
876 // Calling recursively, visiting leafs first
877 fixup_uses_in_catch(phase, start_ctrl, n);
878 }
879
880 // Now all successors are outside
881 // - Clone this node to both successors
882 int no_succs = node->outcnt();
883 assert(!node->is_Store(), "Stores not expected here");
884
885 // In some very rare cases a load that doesn't need a barrier will end up here
886 // Treat it as a LoadP and the insertion of phis will be done correctly.
887 if (node->is_Load()) {
888 call_catch_cleanup_one(phase, node->as_Load(), phase->get_ctrl(node));
889 } else {
890 for (DUIterator_Fast jmax, i = node->fast_outs(jmax); i < jmax; i++) {
891 Node* use = node->fast_out(i);
892 Node* clone = node->clone();
893 assert(clone->outcnt() == 0, "");
894
895 assert(use->find_edge(node) != -1, "check");
896 phase->igvn().rehash_node_delayed(use);
897 use->replace_edge(node, clone);
898
899 Node* new_ctrl;
900 if (use->is_block_start()) {
901 new_ctrl = use;
902 } else if (use->is_CFG()) {
903 new_ctrl = use->in(0);
904 assert (new_ctrl != NULL, "");
905 } else {
906 new_ctrl = phase->get_ctrl(use);
907 }
908
909 phase->set_ctrl(clone, new_ctrl);
910
911 if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr(" Clone op %i as %i to control %i", node->_idx, clone->_idx, new_ctrl->_idx);
912 phase->igvn().register_new_node_with_optimizer(clone);
913 --i, --jmax;
914 }
915 assert(node->outcnt() == 0, "must be empty now");
916
917 // Node node is dead.
918 phase->igvn().remove_dead_node(node);
919 }
920 return true; // unwind - return if a use was processed
921}
922
923// Clone a load to a specific catch_proj
924static Node* clone_load_to_catchproj(PhaseIdealLoop* phase, Node* load, Node* catch_proj) {
925 Node* cloned_load = load->clone();
926 cloned_load->set_req(0, catch_proj); // set explicit control
927 phase->set_ctrl(cloned_load, catch_proj); // update
928 if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr(" Clone LOAD %i as %i to control %i", load->_idx, cloned_load->_idx, catch_proj->_idx);
929 phase->igvn().register_new_node_with_optimizer(cloned_load);
930 return cloned_load;
931}
932
933static Node* get_dominating_region(PhaseIdealLoop* phase, Node* node, Node* stop) {
934 Node* region = node;
935 while (!region->isa_Region()) {
936 Node *up = phase->idom(region);
937 assert(up != region, "Must not loop");
938 assert(up != stop, "Must not find original control");
939 region = up;
940 }
941 return region;
942}
943
944// Clone this load to each catch block
945static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) {
946 bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
947 phase->igvn().set_delay_transform(true);
948
949 // Verify pre conditions
950 assert(ctrl->isa_Proj() && ctrl->in(0)->isa_Call(), "Must be a call proj");
951 assert(ctrl->raw_out(0)->isa_Catch(), "Must be a catch");
952
953 if (ctrl->raw_out(0)->isa_Catch()->outcnt() == 1) {
954 if (trace) tty->print_cr("Cleaning up catch: Skipping load %i, call with single catch", load->_idx);
955 return;
956 }
957
958 // Process the loads successor nodes - if any is between
959 // the call and the catch blocks, they need to be cloned to.
960 // This is done recursively
961 int outcnt = load->outcnt();
962 uint index = 0;
963 for (int i = 0; i < outcnt; i++) {
964 if (index < load->outcnt()) {
965 Node *n = load->raw_out(index);
966 assert(!n->is_LoadBarrier(), "Sanity");
967 if (!fixup_uses_in_catch(phase, ctrl, n)) {
968 // if no successor was cloned, progress to next out.
969 index++;
970 }
971 }
972 }
973
974 // Now all the loads uses has been cloned down
975 // Only thing left is to clone the loads, but they must end up
976 // first in the catch blocks.
977
978 // We clone the loads oo the catch blocks only when needed.
979 // An array is used to map the catch blocks to each lazily cloned load.
980 // In that way no extra unnecessary loads are cloned.
981
982 // Any use dominated by original block must have an phi and a region added
983
984 Node* catch_node = ctrl->raw_out(0);
985 int number_of_catch_projs = catch_node->outcnt();
986 Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs);
987 Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs);
988
989 // The phi_map is used to keep track of where phis have already been inserted
990 int phi_map_len = phase->C->unique();
991 Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len);
992 Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len);
993
994 for (unsigned int i = 0; i < load->outcnt(); i++) {
995 Node* load_use_control = NULL;
996 Node* load_use = load->raw_out(i);
997
998 if (phase->has_ctrl(load_use)) {
999 load_use_control = phase->get_ctrl(load_use);
1000 } else {
1001 load_use_control = load_use->in(0);
1002 }
1003 assert(load_use_control != NULL, "sanity");
1004 if (trace) tty->print_cr(" Handling use: %i, with control: %i", load_use->_idx, load_use_control->_idx);
1005
1006 // Some times the loads use is a phi. For them we need to determine from which catch block
1007 // the use is defined.
1008 bool load_use_is_phi = false;
1009 unsigned int load_use_phi_index = 0;
1010 Node* phi_ctrl = NULL;
1011 if (load_use->is_Phi()) {
1012 // Find phi input that matches load
1013 for (unsigned int u = 1; u < load_use->req(); u++) {
1014 if (load_use->in(u) == load) {
1015 load_use_is_phi = true;
1016 load_use_phi_index = u;
1017 assert(load_use->in(0)->is_Region(), "Region or broken");
1018 phi_ctrl = load_use->in(0)->in(u);
1019 assert(phi_ctrl->is_CFG(), "check");
1020 assert(phi_ctrl != load, "check");
1021 break;
1022 }
1023 }
1024 assert(load_use_is_phi, "must find");
1025 assert(load_use_phi_index > 0, "sanity");
1026 }
1027
1028 // For each load use, see which catch projs dominates, create load clone lazily and reconnect
1029 bool found_dominating_catchproj = false;
1030 for (int c = 0; c < number_of_catch_projs; c++) {
1031 Node* catchproj = catch_node->raw_out(c);
1032 assert(catchproj != NULL && catchproj->isa_CatchProj(), "Sanity");
1033
1034 if (!phase->is_dominator(catchproj, load_use_control)) {
1035 if (load_use_is_phi && phase->is_dominator(catchproj, phi_ctrl)) {
1036 // The loads use is local to the catchproj.
1037 // fall out and replace load with catch-local load clone.
1038 } else {
1039 continue;
1040 }
1041 }
1042 assert(!found_dominating_catchproj, "Max one should match");
1043
1044 // Clone loads to catch projs
1045 Node* load_clone = proj_to_load_mapping[c];
1046 if (load_clone == NULL) {
1047 load_clone = clone_load_to_catchproj(phase, load, catchproj);
1048 proj_to_load_mapping[c] = load_clone;
1049 }
1050 phase->igvn().rehash_node_delayed(load_use);
1051
1052 if (load_use_is_phi) {
1053 // phis are special - the load is defined from a specific control flow
1054 load_use->set_req(load_use_phi_index, load_clone);
1055 } else {
1056 // Multipe edges can be replaced at once - on calls for example
1057 load_use->replace_edge(load, load_clone);
1058 }
1059 --i; // more than one edge can have been removed, but the next is in later iterations
1060
1061 // We could break the for-loop after finding a dominating match.
1062 // But keep iterating to catch any bad idom early.
1063 found_dominating_catchproj = true;
1064 }
1065
1066 // We found no single catchproj that dominated the use - The use is at a point after
1067 // where control flow from multiple catch projs have merged. We will have to create
1068 // phi nodes before the use and tie the output from the cloned loads together. It
1069 // can be a single phi or a number of chained phis, depending on control flow
1070 if (!found_dominating_catchproj) {
1071
1072 // Use phi-control if use is a phi
1073 if (load_use_is_phi) {
1074 load_use_control = phi_ctrl;
1075 }
1076 assert(phase->is_dominator(ctrl, load_use_control), "Common use but no dominator");
1077
1078 // Clone a load on all paths
1079 for (int c = 0; c < number_of_catch_projs; c++) {
1080 Node* catchproj = catch_node->raw_out(c);
1081 Node* load_clone = proj_to_load_mapping[c];
1082 if (load_clone == NULL) {
1083 load_clone = clone_load_to_catchproj(phase, load, catchproj);
1084 proj_to_load_mapping[c] = load_clone;
1085 }
1086 }
1087
1088 // Move up dominator tree from use until dom front is reached
1089 Node* next_region = get_dominating_region(phase, load_use_control, ctrl);
1090 while (phase->idom(next_region) != catch_node) {
1091 next_region = phase->idom(next_region);
1092 if (trace) tty->print_cr("Moving up idom to region ctrl %i", next_region->_idx);
1093 }
1094 assert(phase->is_dominator(catch_node, next_region), "Sanity");
1095
1096 // Create or reuse phi node that collect all cloned loads and feed it to the use.
1097 Node* test_phi = phi_map[next_region->_idx];
1098 if ((test_phi != NULL) && test_phi->is_Phi()) {
1099 // Reuse an already created phi
1100 if (trace) tty->print_cr(" Using cached Phi %i on load_use %i", test_phi->_idx, load_use->_idx);
1101 phase->igvn().rehash_node_delayed(load_use);
1102 load_use->replace_edge(load, test_phi);
1103 // Now this use is done
1104 } else {
1105 // Otherwise we need to create one or more phis
1106 PhiNode* next_phi = new PhiNode(next_region, load->type());
1107 phi_map[next_region->_idx] = next_phi; // cache new phi
1108 phase->igvn().rehash_node_delayed(load_use);
1109 load_use->replace_edge(load, next_phi);
1110
1111 int dominators_of_region = 0;
1112 do {
1113 // New phi, connect to region and add all loads as in.
1114 Node* region = next_region;
1115 assert(region->isa_Region() && region->req() > 2, "Catch dead region nodes");
1116 PhiNode* new_phi = next_phi;
1117
1118 if (trace) tty->print_cr("Created Phi %i on load %i with control %i", new_phi->_idx, load->_idx, region->_idx);
1119
1120 // Need to add all cloned loads to the phi, taking care that the right path is matched
1121 dominators_of_region = 0; // reset for new region
1122 for (unsigned int reg_i = 1; reg_i < region->req(); reg_i++) {
1123 Node* region_pred = region->in(reg_i);
1124 assert(region_pred->is_CFG(), "check");
1125 bool pred_has_dominator = false;
1126 for (int c = 0; c < number_of_catch_projs; c++) {
1127 Node* catchproj = catch_node->raw_out(c);
1128 if (phase->is_dominator(catchproj, region_pred)) {
1129 new_phi->set_req(reg_i, proj_to_load_mapping[c]);
1130 if (trace) tty->print_cr(" - Phi in(%i) set to load %i", reg_i, proj_to_load_mapping[c]->_idx);
1131 pred_has_dominator = true;
1132 dominators_of_region++;
1133 break;
1134 }
1135 }
1136
1137 // Sometimes we need to chain several phis.
1138 if (!pred_has_dominator) {
1139 assert(dominators_of_region <= 1, "More than one region can't require extra phi");
1140 if (trace) tty->print_cr(" - Region %i pred %i not dominated by catch proj", region->_idx, region_pred->_idx);
1141 // Continue search on on this region_pred
1142 // - walk up to next region
1143 // - create a new phi and connect to first new_phi
1144 next_region = get_dominating_region(phase, region_pred, ctrl);
1145
1146 // Lookup if there already is a phi, create a new otherwise
1147 Node* test_phi = phi_map[next_region->_idx];
1148 if ((test_phi != NULL) && test_phi->is_Phi()) {
1149 next_phi = test_phi->isa_Phi();
1150 dominators_of_region++; // record that a match was found and that we are done
1151 if (trace) tty->print_cr(" Using cached phi Phi %i on control %i", next_phi->_idx, next_region->_idx);
1152 } else {
1153 next_phi = new PhiNode(next_region, load->type());
1154 phi_map[next_region->_idx] = next_phi;
1155 }
1156 new_phi->set_req(reg_i, next_phi);
1157 }
1158 }
1159
1160 new_phi->set_req(0, region);
1161 phase->igvn().register_new_node_with_optimizer(new_phi);
1162 phase->set_ctrl(new_phi, region);
1163
1164 assert(dominators_of_region != 0, "Must have found one this iteration");
1165 } while (dominators_of_region == 1);
1166 }
1167 --i;
1168 }
1169 } // end of loop over uses
1170
1171 assert(load->outcnt() == 0, "All uses should be handled");
1172 phase->igvn().remove_dead_node(load);
1173 phase->C->print_method(PHASE_CALL_CATCH_CLEANUP, 4, load->_idx);
1174
1175 // Now we should be home
1176 phase->igvn().set_delay_transform(false);
1177}
1178
1179// Sort out the loads that are between a call ant its catch blocks
1180static void process_catch_cleanup_candidate(PhaseIdealLoop* phase, LoadNode* load) {
1181 bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1182
1183 Node* ctrl = phase->get_ctrl(load);
1184 if (!ctrl->is_Proj() || (ctrl->in(0) == NULL) || !ctrl->in(0)->isa_Call()) {
1185 return;
1186 }
1187
1188 Node* catch_node = ctrl->isa_Proj()->raw_out(0);
1189 if (catch_node->is_Catch()) {
1190 if (catch_node->outcnt() > 1) {
1191 call_catch_cleanup_one(phase, load, ctrl);
1192 } else {
1193 if (trace) tty->print_cr("Call catch cleanup with only one catch: load %i ", load->_idx);
1194 }
1195 }
1196}
1197
1198void ZBarrierSetC2::barrier_insertion_phase(Compile* C, PhaseIterGVN& igvn) const {
1199 PhaseIdealLoop::optimize(igvn, LoopOptsZBarrierInsertion);
1200 if (C->failing()) return;
1201}
1202
1203bool ZBarrierSetC2::optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const {
1204
1205 if (mode == LoopOptsZBarrierInsertion) {
1206 // First make sure all loads between call and catch are moved to the catch block
1207 clean_catch_blocks(phase);
1208
1209 // Then expand barriers on all loads
1210 insert_load_barriers(phase);
1211
1212 // Handle all Unsafe that need barriers.
1213 insert_barriers_on_unsafe(phase);
1214
1215 phase->C->clear_major_progress();
1216 return true;
1217 } else {
1218 return false;
1219 }
1220}
1221
1222static bool can_simplify_cas(LoadStoreNode* node) {
1223 if (node->isa_LoadStoreConditional()) {
1224 Node *expected_in = node->as_LoadStoreConditional()->in(LoadStoreConditionalNode::ExpectedIn);
1225 return (expected_in->get_ptr_type() == TypePtr::NULL_PTR);
1226 } else {
1227 return false;
1228 }
1229}
1230
1231static void insert_barrier_before_unsafe(PhaseIdealLoop* phase, LoadStoreNode* old_node) {
1232
1233 Compile *C = phase->C;
1234 PhaseIterGVN &igvn = phase->igvn();
1235 LoadStoreNode* zclone = NULL;
1236
1237 Node *in_ctrl = old_node->in(MemNode::Control);
1238 Node *in_mem = old_node->in(MemNode::Memory);
1239 Node *in_adr = old_node->in(MemNode::Address);
1240 Node *in_val = old_node->in(MemNode::ValueIn);
1241 const TypePtr *adr_type = old_node->adr_type();
1242 const TypePtr* load_type = TypeOopPtr::BOTTOM; // The type for the load we are adding
1243
1244 switch (old_node->Opcode()) {
1245 case Op_CompareAndExchangeP: {
1246 zclone = new ZCompareAndExchangePNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1247 adr_type, old_node->get_ptr_type(), ((CompareAndExchangeNode*)old_node)->order());
1248 load_type = old_node->bottom_type()->is_ptr();
1249 break;
1250 }
1251 case Op_WeakCompareAndSwapP: {
1252 if (can_simplify_cas(old_node)) {
1253 break;
1254 }
1255 zclone = new ZWeakCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1256 ((CompareAndSwapNode*)old_node)->order());
1257 adr_type = TypePtr::BOTTOM;
1258 break;
1259 }
1260 case Op_CompareAndSwapP: {
1261 if (can_simplify_cas(old_node)) {
1262 break;
1263 }
1264 zclone = new ZCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1265 ((CompareAndSwapNode*)old_node)->order());
1266 adr_type = TypePtr::BOTTOM;
1267 break;
1268 }
1269 case Op_GetAndSetP: {
1270 zclone = new ZGetAndSetPNode(in_ctrl, in_mem, in_adr, in_val, old_node->adr_type(), old_node->get_ptr_type());
1271 load_type = old_node->bottom_type()->is_ptr();
1272 break;
1273 }
1274 }
1275 if (zclone != NULL) {
1276 igvn.register_new_node_with_optimizer(zclone, old_node);
1277
1278 // Make load
1279 LoadPNode *load = new LoadPNode(NULL, in_mem, in_adr, adr_type, load_type, MemNode::unordered,
1280 LoadNode::DependsOnlyOnTest);
1281 load_set_expanded_barrier(load);
1282 igvn.register_new_node_with_optimizer(load);
1283 igvn.replace_node(old_node, zclone);
1284
1285 Node *barrier = new LoadBarrierNode(C, NULL, in_mem, load, in_adr, false /* weak */);
1286 Node *barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
1287 Node *barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
1288
1289 igvn.register_new_node_with_optimizer(barrier);
1290 igvn.register_new_node_with_optimizer(barrier_val);
1291 igvn.register_new_node_with_optimizer(barrier_ctrl);
1292
1293 // loop over all of in_ctrl usages and move to barrier_ctrl
1294 for (DUIterator_Last imin, i = in_ctrl->last_outs(imin); i >= imin; --i) {
1295 Node *use = in_ctrl->last_out(i);
1296 uint l;
1297 for (l = 0; use->in(l) != in_ctrl; l++) {}
1298 igvn.replace_input_of(use, l, barrier_ctrl);
1299 }
1300
1301 load->set_req(MemNode::Control, in_ctrl);
1302 barrier->set_req(LoadBarrierNode::Control, in_ctrl);
1303 zclone->add_req(barrier_val); // add req as keep alive.
1304
1305 C->print_method(PHASE_ADD_UNSAFE_BARRIER, 4, zclone->_idx);
1306 }
1307}
1308
1309void ZBarrierSetC2::insert_barriers_on_unsafe(PhaseIdealLoop* phase) const {
1310 Compile *C = phase->C;
1311 PhaseIterGVN &igvn = phase->igvn();
1312 uint new_ids = C->unique();
1313 VectorSet visited(Thread::current()->resource_area());
1314 GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1315 nodeStack.push(C->root());
1316 visited.test_set(C->root()->_idx);
1317
1318 // Traverse all nodes, visit all unsafe ops that require a barrier
1319 while (nodeStack.length() > 0) {
1320 Node *n = nodeStack.pop();
1321
1322 bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
1323 if (is_old_node) {
1324 if (n->is_LoadStore()) {
1325 LoadStoreNode* lsn = n->as_LoadStore();
1326 if (lsn->has_barrier()) {
1327 BasicType bt = lsn->in(MemNode::Address)->bottom_type()->basic_type();
1328 assert ((bt == T_OBJECT || bt == T_ARRAY), "Sanity test");
1329 insert_barrier_before_unsafe(phase, lsn);
1330 }
1331 }
1332 }
1333 for (uint i = 0; i < n->len(); i++) {
1334 if (n->in(i)) {
1335 if (!visited.test_set(n->in(i)->_idx)) {
1336 nodeStack.push(n->in(i));
1337 }
1338 }
1339 }
1340 }
1341
1342 igvn.optimize();
1343 C->print_method(PHASE_ADD_UNSAFE_BARRIER, 2);
1344}
1345
1346// The purpose of ZBarrierSetC2::clean_catch_blocks is to prepare the IR for
1347// splicing in load barrier nodes.
1348//
1349// The problem is that we might have instructions between a call and its catch nodes.
1350// (This is usually handled in PhaseCFG:call_catch_cleanup, which clones mach nodes in
1351// already scheduled blocks.) We can't have loads that require barriers there,
1352// because we need to splice in new control flow, and that would violate the IR.
1353//
1354// clean_catch_blocks find all Loads that require a barrier and clone them and any
1355// dependent instructions to each use. The loads must be in the beginning of the catch block
1356// before any store.
1357//
1358// Sometimes the loads use will be at a place dominated by all catch blocks, then we need
1359// a load in each catch block, and a Phi at the dominated use.
1360
1361void ZBarrierSetC2::clean_catch_blocks(PhaseIdealLoop* phase) const {
1362
1363 Compile *C = phase->C;
1364 uint new_ids = C->unique();
1365 PhaseIterGVN &igvn = phase->igvn();
1366 VectorSet visited(Thread::current()->resource_area());
1367 GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1368 nodeStack.push(C->root());
1369 visited.test_set(C->root()->_idx);
1370
1371 // Traverse all nodes, visit all loads that require a barrier
1372 while(nodeStack.length() > 0) {
1373 Node *n = nodeStack.pop();
1374
1375 for (uint i = 0; i < n->len(); i++) {
1376 if (n->in(i)) {
1377 if (!visited.test_set(n->in(i)->_idx)) {
1378 nodeStack.push(n->in(i));
1379 }
1380 }
1381 }
1382
1383 bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
1384 if (n->is_Load() && is_old_node) {
1385 LoadNode* load = n->isa_Load();
1386 // only care about loads that will have a barrier
1387 if (load_require_barrier(load)) {
1388 process_catch_cleanup_candidate(phase, load);
1389 }
1390 }
1391 }
1392
1393 C->print_method(PHASE_CALL_CATCH_CLEANUP, 2);
1394}
1395
1396class DomDepthCompareClosure : public CompareClosure<LoadNode*> {
1397 PhaseIdealLoop* _phase;
1398
1399public:
1400 DomDepthCompareClosure(PhaseIdealLoop* phase) : _phase(phase) { }
1401
1402 int do_compare(LoadNode* const &n1, LoadNode* const &n2) {
1403 int d1 = _phase->dom_depth(_phase->get_ctrl(n1));
1404 int d2 = _phase->dom_depth(_phase->get_ctrl(n2));
1405 if (d1 == d2) {
1406 // Compare index if the depth is the same, ensures all entries are unique.
1407 return n1->_idx - n2->_idx;
1408 } else {
1409 return d2 - d1;
1410 }
1411 }
1412};
1413
1414// Traverse graph and add all loadPs to list, sorted by dom depth
1415void gather_loadnodes_sorted(PhaseIdealLoop* phase, GrowableArray<LoadNode*>* loadList) {
1416
1417 VectorSet visited(Thread::current()->resource_area());
1418 GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1419 DomDepthCompareClosure ddcc(phase);
1420
1421 nodeStack.push(phase->C->root());
1422 while(nodeStack.length() > 0) {
1423 Node *n = nodeStack.pop();
1424 if (visited.test(n->_idx)) {
1425 continue;
1426 }
1427
1428 if (n->isa_Load()) {
1429 LoadNode *load = n->as_Load();
1430 if (load_require_barrier(load)) {
1431 assert(phase->get_ctrl(load) != NULL, "sanity");
1432 assert(phase->dom_depth(phase->get_ctrl(load)) != 0, "sanity");
1433 loadList->insert_sorted(&ddcc, load);
1434 }
1435 }
1436
1437 visited.set(n->_idx);
1438 for (uint i = 0; i < n->req(); i++) {
1439 if (n->in(i)) {
1440 if (!visited.test(n->in(i)->_idx)) {
1441 nodeStack.push(n->in(i));
1442 }
1443 }
1444 }
1445 }
1446}
1447
1448// Add LoadBarriers to all LoadPs
1449void ZBarrierSetC2::insert_load_barriers(PhaseIdealLoop* phase) const {
1450
1451 bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1452 GrowableArray<LoadNode *> loadList(Thread::current()->resource_area(), 0, 0, NULL);
1453 gather_loadnodes_sorted(phase, &loadList);
1454
1455 PhaseIterGVN &igvn = phase->igvn();
1456 int count = 0;
1457
1458 for (GrowableArrayIterator<LoadNode *> loadIter = loadList.begin(); loadIter != loadList.end(); ++loadIter) {
1459 LoadNode *load = *loadIter;
1460
1461 if (load_has_expanded_barrier(load)) {
1462 continue;
1463 }
1464
1465 do {
1466 // Insert a barrier on a loadP
1467 // if another load is found that needs to be expanded first, retry on that one
1468 LoadNode* result = insert_one_loadbarrier(phase, load, phase->get_ctrl(load));
1469 while (result != NULL) {
1470 result = insert_one_loadbarrier(phase, result, phase->get_ctrl(result));
1471 }
1472 } while (!load_has_expanded_barrier(load));
1473 }
1474
1475 phase->C->print_method(PHASE_INSERT_BARRIER, 2);
1476}
1477
1478void push_antidependent_stores(PhaseIdealLoop* phase, Node_Stack& nodestack, LoadNode* start_load) {
1479 // push all stores on the same mem, that can_alias
1480 // Any load found must be handled first
1481 PhaseIterGVN &igvn = phase->igvn();
1482 int load_alias_idx = igvn.C->get_alias_index(start_load->adr_type());
1483
1484 Node *mem = start_load->in(1);
1485 for (DUIterator_Fast imax, u = mem->fast_outs(imax); u < imax; u++) {
1486 Node *mem_use = mem->fast_out(u);
1487
1488 if (mem_use == start_load) continue;
1489 if (!mem_use->is_Store()) continue;
1490 if (!phase->has_ctrl(mem_use)) continue;
1491 if (phase->get_ctrl(mem_use) != phase->get_ctrl(start_load)) continue;
1492
1493 // add any aliasing store in this block
1494 StoreNode *store = mem_use->isa_Store();
1495 const TypePtr *adr_type = store->adr_type();
1496 if (igvn.C->can_alias(adr_type, load_alias_idx)) {
1497 nodestack.push(store, 0);
1498 }
1499 }
1500}
1501
1502LoadNode* ZBarrierSetC2::insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* start_load, Node* ctrl) const {
1503 bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1504 PhaseIterGVN &igvn = phase->igvn();
1505
1506 // Check for other loadPs at the same loop depth that is reachable by a DFS
1507 // - if found - return it. It needs to be inserted first
1508 // - otherwise proceed and insert barrier
1509
1510 VectorSet visited(Thread::current()->resource_area());
1511 Node_Stack nodestack(100);
1512
1513 nodestack.push(start_load, 0);
1514 push_antidependent_stores(phase, nodestack, start_load);
1515
1516 while(!nodestack.is_empty()) {
1517 Node* n = nodestack.node(); // peek
1518 nodestack.pop();
1519 if (visited.test(n->_idx)) {
1520 continue;
1521 }
1522
1523 if (n->is_Load() && n != start_load && load_require_barrier(n->as_Load()) && !load_has_expanded_barrier(n->as_Load())) {
1524 // Found another load that needs a barrier in the same block. Must expand later loads first.
1525 if (trace) tty->print_cr(" * Found LoadP %i on DFS", n->_idx);
1526 return n->as_Load(); // return node that should be expanded first
1527 }
1528
1529 if (!phase->has_ctrl(n)) continue;
1530 if (phase->get_ctrl(n) != phase->get_ctrl(start_load)) continue;
1531 if (n->is_Phi()) continue;
1532
1533 visited.set(n->_idx);
1534 // push all children
1535 for (DUIterator_Fast imax, ii = n->fast_outs(imax); ii < imax; ii++) {
1536 Node* c = n->fast_out(ii);
1537 if (c != NULL) {
1538 nodestack.push(c, 0);
1539 }
1540 }
1541 }
1542
1543 insert_one_loadbarrier_inner(phase, start_load, ctrl, visited);
1544 return NULL;
1545}
1546
1547void ZBarrierSetC2::insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited2) const {
1548 PhaseIterGVN &igvn = phase->igvn();
1549 Compile* C = igvn.C;
1550 bool trace = C->directive()->ZTraceLoadBarriersOption;
1551
1552 // create barrier
1553 Node* barrier = new LoadBarrierNode(C, NULL, load->in(LoadNode::Memory), NULL, load->in(LoadNode::Address), load_has_weak_barrier(load));
1554 Node* barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
1555 Node* barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
1556
1557 if (trace) tty->print_cr("Insert load %i with barrier: %i and ctrl : %i", load->_idx, barrier->_idx, ctrl->_idx);
1558
1559 // Splice control
1560 // - insert barrier control diamond between loads ctrl and ctrl successor on path to block end.
1561 // - If control successor is a catch, step over to next.
1562 Node* ctrl_succ = NULL;
1563 for (DUIterator_Fast imax, j = ctrl->fast_outs(imax); j < imax; j++) {
1564 Node* tmp = ctrl->fast_out(j);
1565
1566 // - CFG nodes is the ones we are going to splice (1 only!)
1567 // - Phi nodes will continue to hang from the region node!
1568 // - self loops should be skipped
1569 if (tmp->is_Phi() || tmp == ctrl) {
1570 continue;
1571 }
1572
1573 if (tmp->is_CFG()) {
1574 assert(ctrl_succ == NULL, "There can be only one");
1575 ctrl_succ = tmp;
1576 continue;
1577 }
1578 }
1579
1580 // Now splice control
1581 assert(ctrl_succ != load, "sanity");
1582 assert(ctrl_succ != NULL, "Broken IR");
1583 bool found = false;
1584 for(uint k = 0; k < ctrl_succ->req(); k++) {
1585 if (ctrl_succ->in(k) == ctrl) {
1586 assert(!found, "sanity");
1587 if (trace) tty->print_cr(" Move CFG ctrl_succ %i to barrier_ctrl", ctrl_succ->_idx);
1588 igvn.replace_input_of(ctrl_succ, k, barrier_ctrl);
1589 found = true;
1590 k--;
1591 }
1592 }
1593
1594 // For all successors of ctrl - move all visited to become successors of barrier_ctrl instead
1595 for (DUIterator_Fast imax, r = ctrl->fast_outs(imax); r < imax; r++) {
1596 Node* tmp = ctrl->fast_out(r);
1597 if (visited2.test(tmp->_idx) && (tmp != load)) {
1598 if (trace) tty->print_cr(" Move ctrl_succ %i to barrier_ctrl", tmp->_idx);
1599 igvn.replace_input_of(tmp, 0, barrier_ctrl);
1600 --r; --imax;
1601 }
1602 }
1603
1604 // Move the loads user to the barrier
1605 for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
1606 Node* u = load->fast_out(i);
1607 if (u->isa_LoadBarrier()) {
1608 continue;
1609 }
1610
1611 // find correct input - replace with iterator?
1612 for(uint j = 0; j < u->req(); j++) {
1613 if (u->in(j) == load) {
1614 igvn.replace_input_of(u, j, barrier_val);
1615 --i; --imax; // Adjust the iterator of the *outer* loop
1616 break; // some nodes (calls) might have several uses from the same node
1617 }
1618 }
1619 }
1620
1621 // Connect barrier to load and control
1622 barrier->set_req(LoadBarrierNode::Oop, load);
1623 barrier->set_req(LoadBarrierNode::Control, ctrl);
1624
1625 igvn.rehash_node_delayed(load);
1626 igvn.register_new_node_with_optimizer(barrier);
1627 igvn.register_new_node_with_optimizer(barrier_val);
1628 igvn.register_new_node_with_optimizer(barrier_ctrl);
1629 load_set_expanded_barrier(load);
1630
1631 C->print_method(PHASE_INSERT_BARRIER, 3, load->_idx);
1632}
1633
1634// The bad_mask in the ThreadLocalData shouldn't have an anti-dep-check.
1635// The bad_mask address if of type TypeRawPtr, but that will alias
1636// InitializeNodes until the type system is expanded.
1637bool ZBarrierSetC2::needs_anti_dependence_check(const Node* node) const {
1638 MachNode* mnode = node->as_Mach();
1639 if (mnode != NULL) {
1640 intptr_t offset = 0;
1641 const TypePtr *adr_type2 = NULL;
1642 const Node* base = mnode->get_base_and_disp(offset, adr_type2);
1643 if ((base != NULL) &&
1644 (base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_ThreadLocal) &&
1645 (offset == in_bytes(ZThreadLocalData::address_bad_mask_offset()))) {
1646 return false;
1647 }
1648 }
1649 return true;
1650}
1651