1 | /* |
2 | * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef SHARE_GC_SHARED_TASKQUEUE_INLINE_HPP |
26 | #define SHARE_GC_SHARED_TASKQUEUE_INLINE_HPP |
27 | |
28 | #include "gc/shared/taskqueue.hpp" |
29 | #include "memory/allocation.inline.hpp" |
30 | #include "oops/oop.inline.hpp" |
31 | #include "runtime/atomic.hpp" |
32 | #include "runtime/orderAccess.hpp" |
33 | #include "utilities/debug.hpp" |
34 | #include "utilities/stack.inline.hpp" |
35 | |
36 | template <class T, MEMFLAGS F> |
37 | inline GenericTaskQueueSet<T, F>::GenericTaskQueueSet(uint n) : _n(n) { |
38 | typedef T* GenericTaskQueuePtr; |
39 | _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F); |
40 | for (uint i = 0; i < n; i++) { |
41 | _queues[i] = NULL; |
42 | } |
43 | } |
44 | |
45 | template <class T, MEMFLAGS F> |
46 | inline GenericTaskQueueSet<T, F>::~GenericTaskQueueSet() { |
47 | FREE_C_HEAP_ARRAY(T*, _queues); |
48 | } |
49 | |
50 | template<class E, MEMFLAGS F, unsigned int N> |
51 | inline void GenericTaskQueue<E, F, N>::initialize() { |
52 | _elems = ArrayAllocator<E>::allocate(N, F); |
53 | } |
54 | |
55 | template<class E, MEMFLAGS F, unsigned int N> |
56 | inline GenericTaskQueue<E, F, N>::~GenericTaskQueue() { |
57 | ArrayAllocator<E>::free(const_cast<E*>(_elems), N); |
58 | } |
59 | |
60 | template<class E, MEMFLAGS F, unsigned int N> |
61 | bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { |
62 | if (dirty_n_elems == N - 1) { |
63 | // Actually means 0, so do the push. |
64 | uint localBot = _bottom; |
65 | // g++ complains if the volatile result of the assignment is |
66 | // unused, so we cast the volatile away. We cannot cast directly |
67 | // to void, because gcc treats that as not using the result of the |
68 | // assignment. However, casting to E& means that we trigger an |
69 | // unused-value warning. So, we cast the E& to void. |
70 | (void)const_cast<E&>(_elems[localBot] = t); |
71 | OrderAccess::release_store(&_bottom, increment_index(localBot)); |
72 | TASKQUEUE_STATS_ONLY(stats.record_push()); |
73 | return true; |
74 | } |
75 | return false; |
76 | } |
77 | |
78 | template<class E, MEMFLAGS F, unsigned int N> inline bool |
79 | GenericTaskQueue<E, F, N>::push(E t) { |
80 | uint localBot = _bottom; |
81 | assert(localBot < N, "_bottom out of range." ); |
82 | idx_t top = _age.top(); |
83 | uint dirty_n_elems = dirty_size(localBot, top); |
84 | assert(dirty_n_elems < N, "n_elems out of range." ); |
85 | if (dirty_n_elems < max_elems()) { |
86 | // g++ complains if the volatile result of the assignment is |
87 | // unused, so we cast the volatile away. We cannot cast directly |
88 | // to void, because gcc treats that as not using the result of the |
89 | // assignment. However, casting to E& means that we trigger an |
90 | // unused-value warning. So, we cast the E& to void. |
91 | (void) const_cast<E&>(_elems[localBot] = t); |
92 | OrderAccess::release_store(&_bottom, increment_index(localBot)); |
93 | TASKQUEUE_STATS_ONLY(stats.record_push()); |
94 | return true; |
95 | } else { |
96 | return push_slow(t, dirty_n_elems); |
97 | } |
98 | } |
99 | |
100 | template <class E, MEMFLAGS F, unsigned int N> |
101 | inline bool OverflowTaskQueue<E, F, N>::push(E t) |
102 | { |
103 | if (!taskqueue_t::push(t)) { |
104 | overflow_stack()->push(t); |
105 | TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size())); |
106 | } |
107 | return true; |
108 | } |
109 | |
110 | template <class E, MEMFLAGS F, unsigned int N> |
111 | inline bool OverflowTaskQueue<E, F, N>::try_push_to_taskqueue(E t) { |
112 | return taskqueue_t::push(t); |
113 | } |
114 | |
115 | // pop_local_slow() is done by the owning thread and is trying to |
116 | // get the last task in the queue. It will compete with pop_global() |
117 | // that will be used by other threads. The tag age is incremented |
118 | // whenever the queue goes empty which it will do here if this thread |
119 | // gets the last task or in pop_global() if the queue wraps (top == 0 |
120 | // and pop_global() succeeds, see pop_global()). |
121 | template<class E, MEMFLAGS F, unsigned int N> |
122 | bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) { |
123 | // This queue was observed to contain exactly one element; either this |
124 | // thread will claim it, or a competing "pop_global". In either case, |
125 | // the queue will be logically empty afterwards. Create a new Age value |
126 | // that represents the empty queue for the given value of "_bottom". (We |
127 | // must also increment "tag" because of the case where "bottom == 1", |
128 | // "top == 0". A pop_global could read the queue element in that case, |
129 | // then have the owner thread do a pop followed by another push. Without |
130 | // the incrementing of "tag", the pop_global's CAS could succeed, |
131 | // allowing it to believe it has claimed the stale element.) |
132 | Age newAge((idx_t)localBot, oldAge.tag() + 1); |
133 | // Perhaps a competing pop_global has already incremented "top", in which |
134 | // case it wins the element. |
135 | if (localBot == oldAge.top()) { |
136 | // No competing pop_global has yet incremented "top"; we'll try to |
137 | // install new_age, thus claiming the element. |
138 | Age tempAge = _age.cmpxchg(newAge, oldAge); |
139 | if (tempAge == oldAge) { |
140 | // We win. |
141 | assert(dirty_size(localBot, _age.top()) != N - 1, "sanity" ); |
142 | TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); |
143 | return true; |
144 | } |
145 | } |
146 | // We lose; a completing pop_global gets the element. But the queue is empty |
147 | // and top is greater than bottom. Fix this representation of the empty queue |
148 | // to become the canonical one. |
149 | _age.set(newAge); |
150 | assert(dirty_size(localBot, _age.top()) != N - 1, "sanity" ); |
151 | return false; |
152 | } |
153 | |
154 | template<class E, MEMFLAGS F, unsigned int N> inline bool |
155 | GenericTaskQueue<E, F, N>::pop_local(volatile E& t, uint threshold) { |
156 | uint localBot = _bottom; |
157 | // This value cannot be N-1. That can only occur as a result of |
158 | // the assignment to bottom in this method. If it does, this method |
159 | // resets the size to 0 before the next call (which is sequential, |
160 | // since this is pop_local.) |
161 | uint dirty_n_elems = dirty_size(localBot, _age.top()); |
162 | assert(dirty_n_elems != N - 1, "Shouldn't be possible..." ); |
163 | if (dirty_n_elems <= threshold) return false; |
164 | localBot = decrement_index(localBot); |
165 | _bottom = localBot; |
166 | // This is necessary to prevent any read below from being reordered |
167 | // before the store just above. |
168 | OrderAccess::fence(); |
169 | // g++ complains if the volatile result of the assignment is |
170 | // unused, so we cast the volatile away. We cannot cast directly |
171 | // to void, because gcc treats that as not using the result of the |
172 | // assignment. However, casting to E& means that we trigger an |
173 | // unused-value warning. So, we cast the E& to void. |
174 | (void) const_cast<E&>(t = _elems[localBot]); |
175 | // This is a second read of "age"; the "size()" above is the first. |
176 | // If there's still at least one element in the queue, based on the |
177 | // "_bottom" and "age" we've read, then there can be no interference with |
178 | // a "pop_global" operation, and we're done. |
179 | idx_t tp = _age.top(); // XXX |
180 | if (size(localBot, tp) > 0) { |
181 | assert(dirty_size(localBot, tp) != N - 1, "sanity" ); |
182 | TASKQUEUE_STATS_ONLY(stats.record_pop()); |
183 | return true; |
184 | } else { |
185 | // Otherwise, the queue contained exactly one element; we take the slow |
186 | // path. |
187 | return pop_local_slow(localBot, _age.get()); |
188 | } |
189 | } |
190 | |
191 | template <class E, MEMFLAGS F, unsigned int N> |
192 | bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t) |
193 | { |
194 | if (overflow_empty()) return false; |
195 | t = overflow_stack()->pop(); |
196 | return true; |
197 | } |
198 | |
199 | template<class E, MEMFLAGS F, unsigned int N> |
200 | bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) { |
201 | Age oldAge = _age.get(); |
202 | // Architectures with weak memory model require a barrier here |
203 | // to guarantee that bottom is not older than age, |
204 | // which is crucial for the correctness of the algorithm. |
205 | #if !(defined SPARC || defined IA32 || defined AMD64) |
206 | OrderAccess::fence(); |
207 | #endif |
208 | uint localBot = OrderAccess::load_acquire(&_bottom); |
209 | uint n_elems = size(localBot, oldAge.top()); |
210 | if (n_elems == 0) { |
211 | return false; |
212 | } |
213 | |
214 | // g++ complains if the volatile result of the assignment is |
215 | // unused, so we cast the volatile away. We cannot cast directly |
216 | // to void, because gcc treats that as not using the result of the |
217 | // assignment. However, casting to E& means that we trigger an |
218 | // unused-value warning. So, we cast the E& to void. |
219 | (void) const_cast<E&>(t = _elems[oldAge.top()]); |
220 | Age newAge(oldAge); |
221 | newAge.increment(); |
222 | Age resAge = _age.cmpxchg(newAge, oldAge); |
223 | |
224 | // Note that using "_bottom" here might fail, since a pop_local might |
225 | // have decremented it. |
226 | assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity" ); |
227 | return resAge == oldAge; |
228 | } |
229 | |
230 | inline int randomParkAndMiller(int *seed0) { |
231 | const int a = 16807; |
232 | const int m = 2147483647; |
233 | const int q = 127773; /* m div a */ |
234 | const int r = 2836; /* m mod a */ |
235 | STATIC_ASSERT(sizeof(int) == 4); |
236 | int seed = *seed0; |
237 | int hi = seed / q; |
238 | int lo = seed % q; |
239 | int test = a * lo - r * hi; |
240 | if (test > 0) { |
241 | seed = test; |
242 | } else { |
243 | seed = test + m; |
244 | } |
245 | *seed0 = seed; |
246 | return seed; |
247 | } |
248 | |
249 | template<class E, MEMFLAGS F, unsigned int N> |
250 | int GenericTaskQueue<E, F, N>::next_random_queue_id() { |
251 | return randomParkAndMiller(&_seed); |
252 | } |
253 | |
254 | template<class T, MEMFLAGS F> bool |
255 | GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, E& t) { |
256 | if (_n > 2) { |
257 | T* const local_queue = _queues[queue_num]; |
258 | uint k1 = queue_num; |
259 | |
260 | if (local_queue->is_last_stolen_queue_id_valid()) { |
261 | k1 = local_queue->last_stolen_queue_id(); |
262 | assert(k1 != queue_num, "Should not be the same" ); |
263 | } else { |
264 | while (k1 == queue_num) { |
265 | k1 = local_queue->next_random_queue_id() % _n; |
266 | } |
267 | } |
268 | |
269 | uint k2 = queue_num; |
270 | while (k2 == queue_num || k2 == k1) { |
271 | k2 = local_queue->next_random_queue_id() % _n; |
272 | } |
273 | // Sample both and try the larger. |
274 | uint sz1 = _queues[k1]->size(); |
275 | uint sz2 = _queues[k2]->size(); |
276 | |
277 | uint sel_k = 0; |
278 | bool suc = false; |
279 | |
280 | if (sz2 > sz1) { |
281 | sel_k = k2; |
282 | suc = _queues[k2]->pop_global(t); |
283 | } else if (sz1 > 0) { |
284 | sel_k = k1; |
285 | suc = _queues[k1]->pop_global(t); |
286 | } |
287 | |
288 | if (suc) { |
289 | local_queue->set_last_stolen_queue_id(sel_k); |
290 | } else { |
291 | local_queue->invalidate_last_stolen_queue_id(); |
292 | } |
293 | |
294 | return suc; |
295 | } else if (_n == 2) { |
296 | // Just try the other one. |
297 | uint k = (queue_num + 1) % 2; |
298 | return _queues[k]->pop_global(t); |
299 | } else { |
300 | assert(_n == 1, "can't be zero." ); |
301 | return false; |
302 | } |
303 | } |
304 | |
305 | template<class T, MEMFLAGS F> bool |
306 | GenericTaskQueueSet<T, F>::steal(uint queue_num, E& t) { |
307 | for (uint i = 0; i < 2 * _n; i++) { |
308 | TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal_attempt()); |
309 | if (steal_best_of_2(queue_num, t)) { |
310 | TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal()); |
311 | return true; |
312 | } |
313 | } |
314 | return false; |
315 | } |
316 | |
317 | template <unsigned int N, MEMFLAGS F> |
318 | inline typename TaskQueueSuper<N, F>::Age TaskQueueSuper<N, F>::Age::cmpxchg(const Age new_age, const Age old_age) volatile { |
319 | return Atomic::cmpxchg(new_age._data, &_data, old_age._data); |
320 | } |
321 | |
322 | template<class E, MEMFLAGS F, unsigned int N> |
323 | template<class Fn> |
324 | inline void GenericTaskQueue<E, F, N>::iterate(Fn fn) { |
325 | uint iters = size(); |
326 | uint index = _bottom; |
327 | for (uint i = 0; i < iters; ++i) { |
328 | index = decrement_index(index); |
329 | fn(const_cast<E&>(_elems[index])); // cast away volatility |
330 | } |
331 | } |
332 | |
333 | |
334 | #endif // SHARE_GC_SHARED_TASKQUEUE_INLINE_HPP |
335 | |