1/*
2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
3 *
4 * This code is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 only, as
6 * published by the Free Software Foundation.
7 *
8 * This code is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 * version 2 for more details (a copy is included in the LICENSE file that
12 * accompanied this code).
13 *
14 * You should have received a copy of the GNU General Public License version
15 * 2 along with this work; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24#include "precompiled.hpp"
25#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
26#include "gc/shenandoah/shenandoahForwarding.hpp"
27#include "gc/shenandoah/shenandoahHeap.hpp"
28#include "gc/shenandoah/shenandoahHeapRegion.hpp"
29#include "gc/shenandoah/shenandoahHeuristics.hpp"
30#include "gc/shenandoah/shenandoahRuntime.hpp"
31#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
32#include "interpreter/interpreter.hpp"
33#include "interpreter/interp_masm.hpp"
34#include "runtime/sharedRuntime.hpp"
35#include "runtime/thread.hpp"
36#include "utilities/macros.hpp"
37#ifdef COMPILER1
38#include "c1/c1_LIRAssembler.hpp"
39#include "c1/c1_MacroAssembler.hpp"
40#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
41#endif
42
43#define __ masm->
44
45address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
46
47void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
48 Register src, Register dst, Register count) {
49
50 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
51 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
52 bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops);
53 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
54
55 if (type == T_OBJECT || type == T_ARRAY) {
56#ifdef _LP64
57 if (!checkcast) {
58 if (!obj_int) {
59 // Save count for barrier
60 __ movptr(r11, count);
61 } else if (disjoint) {
62 // Save dst in r11 in the disjoint case
63 __ movq(r11, dst);
64 }
65 }
66#else
67 if (disjoint) {
68 __ mov(rdx, dst); // save 'to'
69 }
70#endif
71
72 if (ShenandoahSATBBarrier && !dest_uninitialized && !ShenandoahHeap::heap()->heuristics()->can_do_traversal_gc()) {
73 Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
74 assert_different_registers(dst, count, thread); // we don't care about src here?
75#ifndef _LP64
76 __ push(thread);
77 __ get_thread(thread);
78#endif
79
80 Label done;
81 // Short-circuit if count == 0.
82 __ testptr(count, count);
83 __ jcc(Assembler::zero, done);
84
85 // Avoid runtime call when not marking.
86 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
87 __ testb(gc_state, ShenandoahHeap::MARKING);
88 __ jcc(Assembler::zero, done);
89
90 __ pusha(); // push registers
91#ifdef _LP64
92 if (count == c_rarg0) {
93 if (dst == c_rarg1) {
94 // exactly backwards!!
95 __ xchgptr(c_rarg1, c_rarg0);
96 } else {
97 __ movptr(c_rarg1, count);
98 __ movptr(c_rarg0, dst);
99 }
100 } else {
101 __ movptr(c_rarg0, dst);
102 __ movptr(c_rarg1, count);
103 }
104 if (UseCompressedOops) {
105 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), 2);
106 } else {
107 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 2);
108 }
109#else
110 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry),
111 dst, count);
112#endif
113 __ popa();
114 __ bind(done);
115 NOT_LP64(__ pop(thread);)
116 }
117 }
118
119}
120
121void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
122 Register src, Register dst, Register count) {
123 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
124 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
125 bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops);
126 Register tmp = rax;
127
128 if (type == T_OBJECT || type == T_ARRAY) {
129#ifdef _LP64
130 if (!checkcast) {
131 if (!obj_int) {
132 // Save count for barrier
133 count = r11;
134 } else if (disjoint && obj_int) {
135 // Use the saved dst in the disjoint case
136 dst = r11;
137 }
138 } else {
139 tmp = rscratch1;
140 }
141#else
142 if (disjoint) {
143 __ mov(dst, rdx); // restore 'to'
144 }
145#endif
146
147 Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
148 assert_different_registers(dst, thread); // do we care about src at all here?
149
150#ifndef _LP64
151 __ push(thread);
152 __ get_thread(thread);
153#endif
154
155 // Short-circuit if count == 0.
156 Label done;
157 __ testptr(count, count);
158 __ jcc(Assembler::zero, done);
159
160 // Skip runtime call if no forwarded objects.
161 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
162 __ testb(gc_state, ShenandoahHeap::UPDATEREFS);
163 __ jcc(Assembler::zero, done);
164
165 __ pusha(); // push registers (overkill)
166#ifdef _LP64
167 if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
168 assert_different_registers(c_rarg1, dst);
169 __ mov(c_rarg1, count);
170 __ mov(c_rarg0, dst);
171 } else {
172 assert_different_registers(c_rarg0, count);
173 __ mov(c_rarg0, dst);
174 __ mov(c_rarg1, count);
175 }
176 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry), 2);
177#else
178 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry),
179 dst, count);
180#endif
181 __ popa();
182
183 __ bind(done);
184 NOT_LP64(__ pop(thread);)
185 }
186}
187
188void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
189 Register obj,
190 Register pre_val,
191 Register thread,
192 Register tmp,
193 bool tosca_live,
194 bool expand_call) {
195
196 if (ShenandoahSATBBarrier) {
197 satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
198 }
199}
200
201void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
202 Register obj,
203 Register pre_val,
204 Register thread,
205 Register tmp,
206 bool tosca_live,
207 bool expand_call) {
208 // If expand_call is true then we expand the call_VM_leaf macro
209 // directly to skip generating the check by
210 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
211
212#ifdef _LP64
213 assert(thread == r15_thread, "must be");
214#endif // _LP64
215
216 Label done;
217 Label runtime;
218
219 assert(pre_val != noreg, "check this code");
220
221 if (obj != noreg) {
222 assert_different_registers(obj, pre_val, tmp);
223 assert(pre_val != rax, "check this code");
224 }
225
226 Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
227 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
228 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
229
230 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
231 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL);
232 __ jcc(Assembler::zero, done);
233
234 // Do we need to load the previous value?
235 if (obj != noreg) {
236 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
237 }
238
239 // Is the previous value null?
240 __ cmpptr(pre_val, (int32_t) NULL_WORD);
241 __ jcc(Assembler::equal, done);
242
243 // Can we store original value in the thread's buffer?
244 // Is index == 0?
245 // (The index field is typed as size_t.)
246
247 __ movptr(tmp, index); // tmp := *index_adr
248 __ cmpptr(tmp, 0); // tmp == 0?
249 __ jcc(Assembler::equal, runtime); // If yes, goto runtime
250
251 __ subptr(tmp, wordSize); // tmp := tmp - wordSize
252 __ movptr(index, tmp); // *index_adr := tmp
253 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr
254
255 // Record the previous value
256 __ movptr(Address(tmp, 0), pre_val);
257 __ jmp(done);
258
259 __ bind(runtime);
260 // save the live input values
261 if(tosca_live) __ push(rax);
262
263 if (obj != noreg && obj != rax)
264 __ push(obj);
265
266 if (pre_val != rax)
267 __ push(pre_val);
268
269 // Calling the runtime using the regular call_VM_leaf mechanism generates
270 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
271 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
272 //
273 // If we care generating the pre-barrier without a frame (e.g. in the
274 // intrinsified Reference.get() routine) then ebp might be pointing to
275 // the caller frame and so this check will most likely fail at runtime.
276 //
277 // Expanding the call directly bypasses the generation of the check.
278 // So when we do not have have a full interpreter frame on the stack
279 // expand_call should be passed true.
280
281 NOT_LP64( __ push(thread); )
282
283#ifdef _LP64
284 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should
285 // pre_val be c_rarg1 (where the call prologue would copy thread argument).
286 // Note: this should not accidentally smash thread, because thread is always r15.
287 assert(thread != c_rarg0, "smashed arg");
288 if (c_rarg0 != pre_val) {
289 __ mov(c_rarg0, pre_val);
290 }
291#endif
292
293 if (expand_call) {
294 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
295#ifdef _LP64
296 if (c_rarg1 != thread) {
297 __ mov(c_rarg1, thread);
298 }
299 // Already moved pre_val into c_rarg0 above
300#else
301 __ push(thread);
302 __ push(pre_val);
303#endif
304 __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2);
305 } else {
306 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread);
307 }
308
309 NOT_LP64( __ pop(thread); )
310
311 // save the live input values
312 if (pre_val != rax)
313 __ pop(pre_val);
314
315 if (obj != noreg && obj != rax)
316 __ pop(obj);
317
318 if(tosca_live) __ pop(rax);
319
320 __ bind(done);
321}
322
323void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
324 assert(ShenandoahCASBarrier, "should be enabled");
325 Label is_null;
326 __ testptr(dst, dst);
327 __ jcc(Assembler::zero, is_null);
328 resolve_forward_pointer_not_null(masm, dst, tmp);
329 __ bind(is_null);
330}
331
332void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
333 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled");
334 // The below loads the mark word, checks if the lowest two bits are
335 // set, and if so, clear the lowest two bits and copy the result
336 // to dst. Otherwise it leaves dst alone.
337 // Implementing this is surprisingly awkward. I do it here by:
338 // - Inverting the mark word
339 // - Test lowest two bits == 0
340 // - If so, set the lowest two bits
341 // - Invert the result back, and copy to dst
342
343 bool borrow_reg = (tmp == noreg);
344 if (borrow_reg) {
345 // No free registers available. Make one useful.
346 tmp = LP64_ONLY(rscratch1) NOT_LP64(rdx);
347 __ push(tmp);
348 }
349
350 Label done;
351 __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
352 __ notptr(tmp);
353 __ testb(tmp, markOopDesc::marked_value);
354 __ jccb(Assembler::notZero, done);
355 __ orptr(tmp, markOopDesc::marked_value);
356 __ notptr(tmp);
357 __ mov(dst, tmp);
358 __ bind(done);
359
360 if (borrow_reg) {
361 __ pop(tmp);
362 }
363}
364
365
366void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) {
367 assert(ShenandoahLoadRefBarrier, "Should be enabled");
368
369 Label done;
370
371#ifdef _LP64
372 Register thread = r15_thread;
373#else
374 Register thread = rcx;
375 if (thread == dst) {
376 thread = rbx;
377 }
378 __ push(thread);
379 __ get_thread(thread);
380#endif
381 assert_different_registers(dst, thread);
382
383 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
384 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
385 __ jccb(Assembler::zero, done);
386
387 if (dst != rax) {
388 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj.
389 }
390
391 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
392
393 if (dst != rax) {
394 __ xchgptr(rax, dst); // Swap back obj with rax.
395 }
396
397 __ bind(done);
398
399#ifndef _LP64
400 __ pop(thread);
401#endif
402}
403
404void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
405 if (ShenandoahStoreValEnqueueBarrier) {
406 storeval_barrier_impl(masm, dst, tmp);
407 }
408}
409
410void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) {
411 assert(ShenandoahStoreValEnqueueBarrier, "should be enabled");
412
413 if (dst == noreg) return;
414
415 if (ShenandoahStoreValEnqueueBarrier) {
416 // The set of registers to be saved+restored is the same as in the write-barrier above.
417 // Those are the commonly used registers in the interpreter.
418 __ pusha();
419 // __ push_callee_saved_registers();
420 __ subptr(rsp, 2 * Interpreter::stackElementSize);
421 __ movdbl(Address(rsp, 0), xmm0);
422
423#ifdef _LP64
424 Register thread = r15_thread;
425#else
426 Register thread = rcx;
427 if (thread == dst || thread == tmp) {
428 thread = rdi;
429 }
430 if (thread == dst || thread == tmp) {
431 thread = rbx;
432 }
433 __ get_thread(thread);
434#endif
435 assert_different_registers(dst, tmp, thread);
436
437 satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false);
438 __ movdbl(xmm0, Address(rsp, 0));
439 __ addptr(rsp, 2 * Interpreter::stackElementSize);
440 //__ pop_callee_saved_registers();
441 __ popa();
442 }
443}
444
445void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) {
446 if (ShenandoahLoadRefBarrier) {
447 Label done;
448 __ testptr(dst, dst);
449 __ jcc(Assembler::zero, done);
450 load_reference_barrier_not_null(masm, dst);
451 __ bind(done);
452 }
453}
454
455void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
456 Register dst, Address src, Register tmp1, Register tmp_thread) {
457 bool on_oop = type == T_OBJECT || type == T_ARRAY;
458 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
459 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
460 bool on_reference = on_weak || on_phantom;
461 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
462 if (on_oop) {
463 load_reference_barrier(masm, dst);
464
465 if (ShenandoahKeepAliveBarrier && on_reference) {
466 const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
467 assert_different_registers(dst, tmp1, tmp_thread);
468 NOT_LP64(__ get_thread(thread));
469 // Generate the SATB pre-barrier code to log the value of
470 // the referent field in an SATB buffer.
471 shenandoah_write_barrier_pre(masm /* masm */,
472 noreg /* obj */,
473 dst /* pre_val */,
474 thread /* thread */,
475 tmp1 /* tmp */,
476 true /* tosca_live */,
477 true /* expand_call */);
478 }
479 }
480}
481
482void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
483 Address dst, Register val, Register tmp1, Register tmp2) {
484
485 bool on_oop = type == T_OBJECT || type == T_ARRAY;
486 bool in_heap = (decorators & IN_HEAP) != 0;
487 bool as_normal = (decorators & AS_NORMAL) != 0;
488 if (on_oop && in_heap) {
489 bool needs_pre_barrier = as_normal;
490
491 Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
492 Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
493 // flatten object address if needed
494 // We do it regardless of precise because we need the registers
495 if (dst.index() == noreg && dst.disp() == 0) {
496 if (dst.base() != tmp1) {
497 __ movptr(tmp1, dst.base());
498 }
499 } else {
500 __ lea(tmp1, dst);
501 }
502
503 assert_different_registers(val, tmp1, tmp2, tmp3, rthread);
504
505#ifndef _LP64
506 __ get_thread(rthread);
507 InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);
508 imasm->save_bcp();
509#endif
510
511 if (needs_pre_barrier) {
512 shenandoah_write_barrier_pre(masm /*masm*/,
513 tmp1 /* obj */,
514 tmp2 /* pre_val */,
515 rthread /* thread */,
516 tmp3 /* tmp */,
517 val != noreg /* tosca_live */,
518 false /* expand_call */);
519 }
520 if (val == noreg) {
521 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
522 } else {
523 storeval_barrier(masm, val, tmp3);
524 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
525 }
526 NOT_LP64(imasm->restore_bcp());
527 } else {
528 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
529 }
530}
531
532// Special Shenandoah CAS implementation that handles false negatives
533// due to concurrent evacuation.
534void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
535 Register res, Address addr, Register oldval, Register newval,
536 bool exchange, Register tmp1, Register tmp2) {
537 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
538 assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
539
540 Label retry, done;
541
542 // Remember oldval for retry logic below
543#ifdef _LP64
544 if (UseCompressedOops) {
545 __ movl(tmp1, oldval);
546 } else
547#endif
548 {
549 __ movptr(tmp1, oldval);
550 }
551
552 // Step 1. Try to CAS with given arguments. If successful, then we are done,
553 // and can safely return.
554 if (os::is_MP()) __ lock();
555#ifdef _LP64
556 if (UseCompressedOops) {
557 __ cmpxchgl(newval, addr);
558 } else
559#endif
560 {
561 __ cmpxchgptr(newval, addr);
562 }
563 __ jcc(Assembler::equal, done, true);
564
565 // Step 2. CAS had failed. This may be a false negative.
566 //
567 // The trouble comes when we compare the to-space pointer with the from-space
568 // pointer to the same object. To resolve this, it will suffice to resolve both
569 // oldval and the value from memory -- this will give both to-space pointers.
570 // If they mismatch, then it was a legitimate failure.
571 //
572#ifdef _LP64
573 if (UseCompressedOops) {
574 __ decode_heap_oop(tmp1);
575 }
576#endif
577 resolve_forward_pointer(masm, tmp1);
578
579#ifdef _LP64
580 if (UseCompressedOops) {
581 __ movl(tmp2, oldval);
582 __ decode_heap_oop(tmp2);
583 } else
584#endif
585 {
586 __ movptr(tmp2, oldval);
587 }
588 resolve_forward_pointer(masm, tmp2);
589
590 __ cmpptr(tmp1, tmp2);
591 __ jcc(Assembler::notEqual, done, true);
592
593 // Step 3. Try to CAS again with resolved to-space pointers.
594 //
595 // Corner case: it may happen that somebody stored the from-space pointer
596 // to memory while we were preparing for retry. Therefore, we can fail again
597 // on retry, and so need to do this in loop, always resolving the failure
598 // witness.
599 __ bind(retry);
600 if (os::is_MP()) __ lock();
601#ifdef _LP64
602 if (UseCompressedOops) {
603 __ cmpxchgl(newval, addr);
604 } else
605#endif
606 {
607 __ cmpxchgptr(newval, addr);
608 }
609 __ jcc(Assembler::equal, done, true);
610
611#ifdef _LP64
612 if (UseCompressedOops) {
613 __ movl(tmp2, oldval);
614 __ decode_heap_oop(tmp2);
615 } else
616#endif
617 {
618 __ movptr(tmp2, oldval);
619 }
620 resolve_forward_pointer(masm, tmp2);
621
622 __ cmpptr(tmp1, tmp2);
623 __ jcc(Assembler::equal, retry, true);
624
625 // Step 4. If we need a boolean result out of CAS, check the flag again,
626 // and promote the result. Note that we handle the flag from both the CAS
627 // itself and from the retry loop.
628 __ bind(done);
629 if (!exchange) {
630 assert(res != NULL, "need result register");
631#ifdef _LP64
632 __ setb(Assembler::equal, res);
633 __ movzbl(res, res);
634#else
635 // Need something else to clean the result, because some registers
636 // do not have byte encoding that movzbl wants. Cannot do the xor first,
637 // because it modifies the flags.
638 Label res_non_zero;
639 __ movptr(res, 1);
640 __ jcc(Assembler::equal, res_non_zero, true);
641 __ xorptr(res, res);
642 __ bind(res_non_zero);
643#endif
644 }
645}
646
647void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) {
648 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
649 if (UseAVX > 2) {
650 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
651 }
652
653 if (UseSSE == 1) {
654 __ subptr(rsp, sizeof(jdouble)*8);
655 for (int n = 0; n < 8; n++) {
656 __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
657 }
658 } else if (UseSSE >= 2) {
659 if (UseAVX > 2) {
660 __ push(rbx);
661 __ movl(rbx, 0xffff);
662 __ kmovwl(k1, rbx);
663 __ pop(rbx);
664 }
665#ifdef COMPILER2
666 if (MaxVectorSize > 16) {
667 if(UseAVX > 2) {
668 // Save upper half of ZMM registers
669 __ subptr(rsp, 32*num_xmm_regs);
670 for (int n = 0; n < num_xmm_regs; n++) {
671 __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
672 }
673 }
674 assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
675 // Save upper half of YMM registers
676 __ subptr(rsp, 16*num_xmm_regs);
677 for (int n = 0; n < num_xmm_regs; n++) {
678 __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
679 }
680 }
681#endif
682 // Save whole 128bit (16 bytes) XMM registers
683 __ subptr(rsp, 16*num_xmm_regs);
684#ifdef _LP64
685 if (VM_Version::supports_evex()) {
686 for (int n = 0; n < num_xmm_regs; n++) {
687 __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
688 }
689 } else {
690 for (int n = 0; n < num_xmm_regs; n++) {
691 __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
692 }
693 }
694#else
695 for (int n = 0; n < num_xmm_regs; n++) {
696 __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
697 }
698#endif
699 }
700}
701
702void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) {
703 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
704 if (UseAVX > 2) {
705 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
706 }
707 if (UseSSE == 1) {
708 for (int n = 0; n < 8; n++) {
709 __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
710 }
711 __ addptr(rsp, sizeof(jdouble)*8);
712 } else if (UseSSE >= 2) {
713 // Restore whole 128bit (16 bytes) XMM registers
714#ifdef _LP64
715 if (VM_Version::supports_evex()) {
716 for (int n = 0; n < num_xmm_regs; n++) {
717 __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
718 }
719 } else {
720 for (int n = 0; n < num_xmm_regs; n++) {
721 __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
722 }
723 }
724#else
725 for (int n = 0; n < num_xmm_regs; n++) {
726 __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
727 }
728#endif
729 __ addptr(rsp, 16*num_xmm_regs);
730
731#ifdef COMPILER2
732 if (MaxVectorSize > 16) {
733 // Restore upper half of YMM registers.
734 for (int n = 0; n < num_xmm_regs; n++) {
735 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
736 }
737 __ addptr(rsp, 16*num_xmm_regs);
738 if (UseAVX > 2) {
739 for (int n = 0; n < num_xmm_regs; n++) {
740 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
741 }
742 __ addptr(rsp, 32*num_xmm_regs);
743 }
744 }
745#endif
746 }
747}
748
749#undef __
750
751#ifdef COMPILER1
752
753#define __ ce->masm()->
754
755void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
756 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
757 // At this point we know that marking is in progress.
758 // If do_load() is true then we have to emit the
759 // load of the previous value; otherwise it has already
760 // been loaded into _pre_val.
761
762 __ bind(*stub->entry());
763 assert(stub->pre_val()->is_register(), "Precondition.");
764
765 Register pre_val_reg = stub->pre_val()->as_register();
766
767 if (stub->do_load()) {
768 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
769 }
770
771 __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
772 __ jcc(Assembler::equal, *stub->continuation());
773 ce->store_parameter(stub->pre_val()->as_register(), 0);
774 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
775 __ jmp(*stub->continuation());
776
777}
778
779void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
780 __ bind(*stub->entry());
781
782 Label done;
783 Register obj = stub->obj()->as_register();
784 Register res = stub->result()->as_register();
785
786 if (res != obj) {
787 __ mov(res, obj);
788 }
789
790 // Check for null.
791 if (stub->needs_null_check()) {
792 __ testptr(res, res);
793 __ jcc(Assembler::zero, done);
794 }
795
796 load_reference_barrier_not_null(ce->masm(), res);
797
798 __ bind(done);
799 __ jmp(*stub->continuation());
800}
801
802#undef __
803
804#define __ sasm->
805
806void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
807 __ prologue("shenandoah_pre_barrier", false);
808 // arg0 : previous value of memory
809
810 __ push(rax);
811 __ push(rdx);
812
813 const Register pre_val = rax;
814 const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
815 const Register tmp = rdx;
816
817 NOT_LP64(__ get_thread(thread);)
818
819 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
820 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
821
822 Label done;
823 Label runtime;
824
825 // Is SATB still active?
826 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
827 __ testb(gc_state, ShenandoahHeap::MARKING | ShenandoahHeap::TRAVERSAL);
828 __ jcc(Assembler::zero, done);
829
830 // Can we store original value in the thread's buffer?
831
832 __ movptr(tmp, queue_index);
833 __ testptr(tmp, tmp);
834 __ jcc(Assembler::zero, runtime);
835 __ subptr(tmp, wordSize);
836 __ movptr(queue_index, tmp);
837 __ addptr(tmp, buffer);
838
839 // prev_val (rax)
840 __ load_parameter(0, pre_val);
841 __ movptr(Address(tmp, 0), pre_val);
842 __ jmp(done);
843
844 __ bind(runtime);
845
846 __ save_live_registers_no_oop_map(true);
847
848 // load the pre-value
849 __ load_parameter(0, rcx);
850 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);
851
852 __ restore_live_registers(true);
853
854 __ bind(done);
855
856 __ pop(rdx);
857 __ pop(rax);
858
859 __ epilogue();
860}
861
862#undef __
863
864#endif // COMPILER1
865
866address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
867 assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
868 return _shenandoah_lrb;
869}
870
871#define __ cgen->assembler()->
872
873address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
874 __ align(CodeEntryAlignment);
875 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
876 address start = __ pc();
877
878 Label resolve_oop, slow_path;
879
880 // We use RDI, which also serves as argument register for slow call.
881 // RAX always holds the src object ptr, except after the slow call and
882 // the cmpxchg, then it holds the result. R8/RBX is used as temporary register.
883
884 Register tmp1 = rdi;
885 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
886
887 __ push(tmp1);
888 __ push(tmp2);
889
890 // Check for object being in the collection set.
891 // TODO: Can we use only 1 register here?
892 // The source object arrives here in rax.
893 // live: rax
894 // live: tmp1
895 __ mov(tmp1, rax);
896 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
897 // live: tmp2
898 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
899 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
900 // unlive: tmp1
901 __ testbool(tmp2);
902 // unlive: tmp2
903 __ jccb(Assembler::notZero, resolve_oop);
904
905 __ pop(tmp2);
906 __ pop(tmp1);
907 __ ret(0);
908
909 __ bind(resolve_oop);
910
911 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes()));
912 // Test if both lowest bits are set. We trick it by negating the bits
913 // then test for both bits clear.
914 __ notptr(tmp2);
915 __ testb(tmp2, markOopDesc::marked_value);
916 __ jccb(Assembler::notZero, slow_path);
917 // Clear both lower bits. It's still inverted, so set them, and then invert back.
918 __ orptr(tmp2, markOopDesc::marked_value);
919 __ notptr(tmp2);
920 // At this point, tmp2 contains the decoded forwarding pointer.
921 __ mov(rax, tmp2);
922
923 __ pop(tmp2);
924 __ pop(tmp1);
925 __ ret(0);
926
927 __ bind(slow_path);
928
929 __ push(rcx);
930 __ push(rdx);
931 __ push(rdi);
932 __ push(rsi);
933#ifdef _LP64
934 __ push(r8);
935 __ push(r9);
936 __ push(r10);
937 __ push(r11);
938 __ push(r12);
939 __ push(r13);
940 __ push(r14);
941 __ push(r15);
942#endif
943
944 save_vector_registers(cgen->assembler());
945 __ movptr(rdi, rax);
946 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_JRT), rdi);
947 restore_vector_registers(cgen->assembler());
948
949#ifdef _LP64
950 __ pop(r15);
951 __ pop(r14);
952 __ pop(r13);
953 __ pop(r12);
954 __ pop(r11);
955 __ pop(r10);
956 __ pop(r9);
957 __ pop(r8);
958#endif
959 __ pop(rsi);
960 __ pop(rdi);
961 __ pop(rdx);
962 __ pop(rcx);
963
964 __ pop(tmp2);
965 __ pop(tmp1);
966 __ ret(0);
967
968 return start;
969}
970
971#undef __
972
973void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
974 if (ShenandoahLoadRefBarrier) {
975 int stub_code_size = 4096;
976 ResourceMark rm;
977 BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
978 CodeBuffer buf(bb);
979 StubCodeGenerator cgen(&buf);
980 _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
981 }
982}
983