shenandoahTaskqueue.hpp source code [OpenJDK/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp]

1	/*
2	* Copyright (c) 2016, 2019, Red Hat, Inc. All rights reserved.
3	*
4	* This code is free software; you can redistribute it and/or modify it
5	* under the terms of the GNU General Public License version 2 only, as
6	* published by the Free Software Foundation.
7	*
8	* This code is distributed in the hope that it will be useful, but WITHOUT
9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11	* version 2 for more details (a copy is included in the LICENSE file that
12	* accompanied this code).
13	*
14	* You should have received a copy of the GNU General Public License version
15	* 2 along with this work; if not, write to the Free Software Foundation,
16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17	*
18	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19	* or visit www.oracle.com if you need additional information or have any
20	* questions.
21	*
22	*/
23
24	#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHTASKQUEUE_HPP
25	#define SHARE_GC_SHENANDOAH_SHENANDOAHTASKQUEUE_HPP
26	#include "gc/shared/owstTaskTerminator.hpp"
27	#include "gc/shared/taskqueue.hpp"
28	#include "memory/allocation.hpp"
29	#include "runtime/mutex.hpp"
30	#include "runtime/thread.hpp"
31
32	template<class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE>
33	class BufferedOverflowTaskQueue: public OverflowTaskQueue<E, F, N>
34	{
35	public:
36	typedef OverflowTaskQueue<E, F, N> taskqueue_t;
37
38	BufferedOverflowTaskQueue() : _buf_empty(true) {};
39
40	TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;)
41
42	// Push task t into the queue. Returns true on success.
43	inline bool push(E t);
44
45	// Attempt to pop from the queue. Returns true on success.
46	inline bool pop(E &t);
47
48	inline void clear();
49
50	inline bool is_empty() const {
51	return _buf_empty && taskqueue_t::is_empty();
52	}
53
54	private:
55	bool _buf_empty;
56	E _elem;
57	};
58
59	// ObjArrayChunkedTask
60	//
61	// Encodes both regular oops, and the array oops plus chunking data for parallel array processing.
62	// The design goal is to make the regular oop ops very fast, because that would be the prevailing
63	// case. On the other hand, it should not block parallel array processing from efficiently dividing
64	// the array work.
65	//
66	// The idea is to steal the bits from the 64-bit oop to encode array data, if needed. For the
67	// proper divide-and-conquer strategies, we want to encode the "blocking" data. It turns out, the
68	// most efficient way to do this is to encode the array block as (chunk 2^pow), where it is assumed*
69	// that the block has the size of 2^pow. This requires for pow to have only 5 bits (2^32) to encode
70	// all possible arrays.
71	//
72	// \|---------oop---------\|-pow-\|--chunk---\|
73	// 0 49 54 64
74	//
75	// By definition, chunk == 0 means "no chunk", i.e. chunking starts from 1.
76	//
77	// This encoding gives a few interesting benefits:
78	//
79	// a) Encoding/decoding regular oops is very simple, because the upper bits are zero in that task:
80	//
81	// \|---------oop---------\|00000\|0000000000\| // no chunk data
82	//
83	// This helps the most ubiquitous path. The initialization amounts to putting the oop into the word
84	// with zero padding. Testing for "chunkedness" is testing for zero with chunk mask.
85	//
86	// b) Splitting tasks for divide-and-conquer is possible. Suppose we have chunk <C, P> that covers
87	// interval [ (C-1)2^P; C2^P ). We can then split it into two chunks:
88	// <2C - 1, P-1>, that covers interval [ (2C - 2)2^(P-1); (2C - 1)2^(P-1) )*
89	// <2C, P-1>, that covers interval [ (2C - 1)2^(P-1); 2C2^(P-1) )*
90	//
91	// Observe that the union of these two intervals is:
92	// [ (2C - 2)2^(P-1); 2C2^(P-1) )
93	//
94	// ...which is the original interval:
95	// [ (C-1)2^P; C2^P )
96	//
97	// c) The divide-and-conquer strategy could even start with chunk <1, round-log2-len(arr)>, and split
98	// down in the parallel threads, which alleviates the upfront (serial) splitting costs.
99	//
100	// Encoding limitations caused by current bitscales mean:
101	// 10 bits for chunk: max 1024 blocks per array
102	// 5 bits for power: max 2^32 array
103	// 49 bits for oop: max 512 TB of addressable space
104	//
105	// Stealing bits from oop trims down the addressable space. Stealing too few bits for chunk ID limits
106	// potential parallelism. Stealing too few bits for pow limits the maximum array size that can be handled.
107	// In future, these might be rebalanced to favor one degree of freedom against another. For example,
108	// if/when Arrays 2.0 bring 2^64-sized arrays, we might need to steal another bit for power. We could regain
109	// some bits back if chunks are counted in ObjArrayMarkingStride units.
110	//
111	// There is also a fallback version that uses plain fields, when we don't have enough space to steal the
112	// bits from the native pointer. It is useful to debug the optimized version.
113	//
114
115	#ifdef _MSC_VER
116	#pragma warning(push)
117	// warning C4522: multiple assignment operators specified
118	#pragma warning( disable:4522 )
119	#endif
120
121	#ifdef _LP64
122	#define SHENANDOAH_OPTIMIZED_OBJTASK 1
123	#else
124	#define SHENANDOAH_OPTIMIZED_OBJTASK 0
125	#endif
126
127	#if SHENANDOAH_OPTIMIZED_OBJTASK
128	class ObjArrayChunkedTask
129	{
130	public:
131	enum {
132	chunk_bits = `10`,
133	pow_bits = `5`,
134	oop_bits = sizeof(uintptr_t)*`8` - chunk_bits - pow_bits
135	};
136	enum {
137	oop_shift = `0`,
138	pow_shift = oop_shift + oop_bits,
139	chunk_shift = pow_shift + pow_bits
140	};
141
142	public:
143	ObjArrayChunkedTask(oop o = NULL) {
144	assert(oopDesc::equals_raw(decode_oop(encode_oop(o)), o), "oop can be encoded: " PTR_FORMAT, p2i(o));
145	_obj = encode_oop(o);
146	}
147	ObjArrayChunkedTask(oop o, int chunk, int pow) {
148	assert(oopDesc::equals_raw(decode_oop(encode_oop(o)), o), "oop can be encoded: " PTR_FORMAT, p2i(o));
149	assert(decode_chunk(encode_chunk(chunk)) == chunk, "chunk can be encoded: %d", chunk);
150	assert(decode_pow(encode_pow(pow)) == pow, "pow can be encoded: %d", pow);
151	_obj = encode_oop(o) \| encode_chunk(chunk) \| encode_pow(pow);
152	}
153	ObjArrayChunkedTask(const ObjArrayChunkedTask& t): _obj(t._obj) { }
154
155	ObjArrayChunkedTask& operator =(const ObjArrayChunkedTask& t) {
156	_obj = t._obj;
157	return *this;
158	}
159	volatile ObjArrayChunkedTask&
160	operator =(const volatile ObjArrayChunkedTask& t) volatile {
161	(void)const_cast<uintptr_t&>(_obj = t._obj);
162	return *this;
163	}
164
165	inline oop decode_oop(uintptr_t val) const {
166	return (oop) reinterpret_cast<void*>((val >> oop_shift) & right_n_bits(oop_bits));
167	}
168
169	inline int decode_chunk(uintptr_t val) const {
170	return (int) ((val >> chunk_shift) & right_n_bits(chunk_bits));
171	}
172
173	inline int decode_pow(uintptr_t val) const {
174	return (int) ((val >> pow_shift) & right_n_bits(pow_bits));
175	}
176
177	inline uintptr_t encode_oop(oop obj) const {
178	return ((uintptr_t)(void*) obj) << oop_shift;
179	}
180
181	inline uintptr_t encode_chunk(int chunk) const {
182	return ((uintptr_t) chunk) << chunk_shift;
183	}
184
185	inline uintptr_t encode_pow(int pow) const {
186	return ((uintptr_t) pow) << pow_shift;
187	}
188
189	inline oop obj() const { return decode_oop(_obj); }
190	inline int chunk() const { return decode_chunk(_obj); }
191	inline int pow() const { return decode_pow(_obj); }
192	inline bool is_not_chunked() const { return (_obj & ~right_n_bits(oop_bits + pow_bits)) == `0`; }
193
194	DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid.
195
196	static uintptr_t max_addressable() {
197	return nth_bit(oop_bits);
198	}
199
200	static int chunk_size() {
201	return nth_bit(chunk_bits);
202	}
203
204	private:
205	uintptr_t _obj;
206	};
207	#else
208	class ObjArrayChunkedTask
209	{
210	public:
211	enum {
212	chunk_bits = `10`,
213	pow_bits = `5`,
214	};
215	public:
216	ObjArrayChunkedTask(oop o = NULL, int chunk = `0`, int pow = `0`): _obj(o) {
217	assert(`0` <= chunk && chunk < nth_bit(chunk_bits), "chunk is sane: %d", chunk);
218	assert(`0` <= pow && pow < nth_bit(pow_bits), "pow is sane: %d", pow);
219	_chunk = chunk;
220	_pow = pow;
221	}
222	ObjArrayChunkedTask(const ObjArrayChunkedTask& t): _obj(t._obj), _chunk(t._chunk), _pow(t._pow) { }
223
224	ObjArrayChunkedTask& operator =(const ObjArrayChunkedTask& t) {
225	_obj = t._obj;
226	_chunk = t._chunk;
227	_pow = t._pow;
228	return *this;
229	}
230	volatile ObjArrayChunkedTask&
231	operator =(const volatile ObjArrayChunkedTask& t) volatile {
232	(void)const_cast<oop&>(_obj = t._obj);
233	_chunk = t._chunk;
234	_pow = t._pow;
235	return *this;
236	}
237
238	inline oop obj() const { return _obj; }
239	inline int chunk() const { return _chunk; }
240	inline int pow() const { return _pow; }
241
242	inline bool is_not_chunked() const { return _chunk == `0`; }
243
244	DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid.
245
246	static size_t max_addressable() {
247	return sizeof(oop);
248	}
249
250	static int chunk_size() {
251	return nth_bit(chunk_bits);
252	}
253
254	private:
255	oop _obj;
256	int _chunk;
257	int _pow;
258	};
259	#endif // SHENANDOAH_OPTIMIZED_OBJTASK
260
261	#ifdef _MSC_VER
262	#pragma warning(pop)
263	#endif
264
265	typedef ObjArrayChunkedTask ShenandoahMarkTask;
266	typedef BufferedOverflowTaskQueue<ShenandoahMarkTask, mtGC> ShenandoahBufferedOverflowTaskQueue;
267	typedef Padded<ShenandoahBufferedOverflowTaskQueue> ShenandoahObjToScanQueue;
268
269	template <class T, MEMFLAGS F>
270	class ParallelClaimableQueueSet: public GenericTaskQueueSet<T, F> {
271	private:
272	DEFINE_PAD_MINUS_SIZE(`0`, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile jint));
273	volatile jint _claimed_index;
274	DEFINE_PAD_MINUS_SIZE(`1`, DEFAULT_CACHE_LINE_SIZE, `0`);
275
276	debug_only(uint _reserved; )
277
278	public:
279	using GenericTaskQueueSet<T, F>::size;
280
281	public:
282	ParallelClaimableQueueSet(int n) : GenericTaskQueueSet<T, F>(n), _claimed_index(`0`) {
283	debug_only(_reserved = `0`; )
284	}
285
286	void clear_claimed() { _claimed_index = `0`; }
287	T* claim_next();
288
289	// reserve queues that not for parallel claiming
290	void reserve(uint n) {
291	assert(n <= size(), "Sanity");
292	_claimed_index = (jint)n;
293	debug_only(_reserved = n;)
294	}
295
296	debug_only(uint get_reserved() const { return (uint)_reserved; })
297	};
298
299	template <class T, MEMFLAGS F>
300	T* ParallelClaimableQueueSet<T, F>::claim_next() {
301	jint size = (jint)GenericTaskQueueSet<T, F>::size();
302
303	if (_claimed_index >= size) {
304	return NULL;
305	}
306
307	jint index = Atomic::add(`1`, &_claimed_index);
308
309	if (index <= size) {
310	return GenericTaskQueueSet<T, F>::queue((uint)index - `1`);
311	} else {
312	return NULL;
313	}
314	}
315
316	class ShenandoahObjToScanQueueSet: public ParallelClaimableQueueSet<ShenandoahObjToScanQueue, mtGC> {
317	public:
318	ShenandoahObjToScanQueueSet(int n) : ParallelClaimableQueueSet<ShenandoahObjToScanQueue, mtGC>(n) {}
319
320	bool is_empty();
321	void clear();
322
323	#if TASKQUEUE_STATS
324	static void print_taskqueue_stats_hdr(outputStream* const st);
325	void print_taskqueue_stats() const;
326	void reset_taskqueue_stats();
327	#endif // TASKQUEUE_STATS
328	};
329
330	class ShenandoahTerminatorTerminator : public TerminatorTerminator {
331	private:
332	ShenandoahHeap* _heap;
333	public:
334	ShenandoahTerminatorTerminator(ShenandoahHeap* const heap) : _heap(heap) { }
335	// return true, terminates immediately, even if there's remaining work left
336	virtual bool should_exit_termination() { return _heap->cancelled_gc(); }
337	};
338
339	class ShenandoahTaskTerminator : public StackObj {
340	private:
341	OWSTTaskTerminator* const _terminator;
342	public:
343	ShenandoahTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set);
344	~ShenandoahTaskTerminator();
345
346	bool offer_termination(ShenandoahTerminatorTerminator* terminator) {
347	return _terminator->offer_termination(terminator);
348	}
349
350	void reset_for_reuse() { _terminator->reset_for_reuse(); }
351	bool offer_termination() { return offer_termination((ShenandoahTerminatorTerminator*)NULL); }
352	};
353
354	#endif // SHARE_GC_SHENANDOAH_SHENANDOAHTASKQUEUE_HPP
355

Browse the source code of OpenJDK/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp