| 1 | /* |
| 2 | * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef SHARE_GC_SHARED_TASKQUEUE_INLINE_HPP |
| 26 | #define SHARE_GC_SHARED_TASKQUEUE_INLINE_HPP |
| 27 | |
| 28 | #include "gc/shared/taskqueue.hpp" |
| 29 | #include "memory/allocation.inline.hpp" |
| 30 | #include "oops/oop.inline.hpp" |
| 31 | #include "runtime/atomic.hpp" |
| 32 | #include "runtime/orderAccess.hpp" |
| 33 | #include "utilities/debug.hpp" |
| 34 | #include "utilities/stack.inline.hpp" |
| 35 | |
| 36 | template <class T, MEMFLAGS F> |
| 37 | inline GenericTaskQueueSet<T, F>::GenericTaskQueueSet(uint n) : _n(n) { |
| 38 | typedef T* GenericTaskQueuePtr; |
| 39 | _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F); |
| 40 | for (uint i = 0; i < n; i++) { |
| 41 | _queues[i] = NULL; |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | template <class T, MEMFLAGS F> |
| 46 | inline GenericTaskQueueSet<T, F>::~GenericTaskQueueSet() { |
| 47 | FREE_C_HEAP_ARRAY(T*, _queues); |
| 48 | } |
| 49 | |
| 50 | template<class E, MEMFLAGS F, unsigned int N> |
| 51 | inline void GenericTaskQueue<E, F, N>::initialize() { |
| 52 | _elems = ArrayAllocator<E>::allocate(N, F); |
| 53 | } |
| 54 | |
| 55 | template<class E, MEMFLAGS F, unsigned int N> |
| 56 | inline GenericTaskQueue<E, F, N>::~GenericTaskQueue() { |
| 57 | ArrayAllocator<E>::free(const_cast<E*>(_elems), N); |
| 58 | } |
| 59 | |
| 60 | template<class E, MEMFLAGS F, unsigned int N> |
| 61 | bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { |
| 62 | if (dirty_n_elems == N - 1) { |
| 63 | // Actually means 0, so do the push. |
| 64 | uint localBot = _bottom; |
| 65 | // g++ complains if the volatile result of the assignment is |
| 66 | // unused, so we cast the volatile away. We cannot cast directly |
| 67 | // to void, because gcc treats that as not using the result of the |
| 68 | // assignment. However, casting to E& means that we trigger an |
| 69 | // unused-value warning. So, we cast the E& to void. |
| 70 | (void)const_cast<E&>(_elems[localBot] = t); |
| 71 | OrderAccess::release_store(&_bottom, increment_index(localBot)); |
| 72 | TASKQUEUE_STATS_ONLY(stats.record_push()); |
| 73 | return true; |
| 74 | } |
| 75 | return false; |
| 76 | } |
| 77 | |
| 78 | template<class E, MEMFLAGS F, unsigned int N> inline bool |
| 79 | GenericTaskQueue<E, F, N>::push(E t) { |
| 80 | uint localBot = _bottom; |
| 81 | assert(localBot < N, "_bottom out of range." ); |
| 82 | idx_t top = _age.top(); |
| 83 | uint dirty_n_elems = dirty_size(localBot, top); |
| 84 | assert(dirty_n_elems < N, "n_elems out of range." ); |
| 85 | if (dirty_n_elems < max_elems()) { |
| 86 | // g++ complains if the volatile result of the assignment is |
| 87 | // unused, so we cast the volatile away. We cannot cast directly |
| 88 | // to void, because gcc treats that as not using the result of the |
| 89 | // assignment. However, casting to E& means that we trigger an |
| 90 | // unused-value warning. So, we cast the E& to void. |
| 91 | (void) const_cast<E&>(_elems[localBot] = t); |
| 92 | OrderAccess::release_store(&_bottom, increment_index(localBot)); |
| 93 | TASKQUEUE_STATS_ONLY(stats.record_push()); |
| 94 | return true; |
| 95 | } else { |
| 96 | return push_slow(t, dirty_n_elems); |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | template <class E, MEMFLAGS F, unsigned int N> |
| 101 | inline bool OverflowTaskQueue<E, F, N>::push(E t) |
| 102 | { |
| 103 | if (!taskqueue_t::push(t)) { |
| 104 | overflow_stack()->push(t); |
| 105 | TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size())); |
| 106 | } |
| 107 | return true; |
| 108 | } |
| 109 | |
| 110 | template <class E, MEMFLAGS F, unsigned int N> |
| 111 | inline bool OverflowTaskQueue<E, F, N>::try_push_to_taskqueue(E t) { |
| 112 | return taskqueue_t::push(t); |
| 113 | } |
| 114 | |
| 115 | // pop_local_slow() is done by the owning thread and is trying to |
| 116 | // get the last task in the queue. It will compete with pop_global() |
| 117 | // that will be used by other threads. The tag age is incremented |
| 118 | // whenever the queue goes empty which it will do here if this thread |
| 119 | // gets the last task or in pop_global() if the queue wraps (top == 0 |
| 120 | // and pop_global() succeeds, see pop_global()). |
| 121 | template<class E, MEMFLAGS F, unsigned int N> |
| 122 | bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) { |
| 123 | // This queue was observed to contain exactly one element; either this |
| 124 | // thread will claim it, or a competing "pop_global". In either case, |
| 125 | // the queue will be logically empty afterwards. Create a new Age value |
| 126 | // that represents the empty queue for the given value of "_bottom". (We |
| 127 | // must also increment "tag" because of the case where "bottom == 1", |
| 128 | // "top == 0". A pop_global could read the queue element in that case, |
| 129 | // then have the owner thread do a pop followed by another push. Without |
| 130 | // the incrementing of "tag", the pop_global's CAS could succeed, |
| 131 | // allowing it to believe it has claimed the stale element.) |
| 132 | Age newAge((idx_t)localBot, oldAge.tag() + 1); |
| 133 | // Perhaps a competing pop_global has already incremented "top", in which |
| 134 | // case it wins the element. |
| 135 | if (localBot == oldAge.top()) { |
| 136 | // No competing pop_global has yet incremented "top"; we'll try to |
| 137 | // install new_age, thus claiming the element. |
| 138 | Age tempAge = _age.cmpxchg(newAge, oldAge); |
| 139 | if (tempAge == oldAge) { |
| 140 | // We win. |
| 141 | assert(dirty_size(localBot, _age.top()) != N - 1, "sanity" ); |
| 142 | TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); |
| 143 | return true; |
| 144 | } |
| 145 | } |
| 146 | // We lose; a completing pop_global gets the element. But the queue is empty |
| 147 | // and top is greater than bottom. Fix this representation of the empty queue |
| 148 | // to become the canonical one. |
| 149 | _age.set(newAge); |
| 150 | assert(dirty_size(localBot, _age.top()) != N - 1, "sanity" ); |
| 151 | return false; |
| 152 | } |
| 153 | |
| 154 | template<class E, MEMFLAGS F, unsigned int N> inline bool |
| 155 | GenericTaskQueue<E, F, N>::pop_local(volatile E& t, uint threshold) { |
| 156 | uint localBot = _bottom; |
| 157 | // This value cannot be N-1. That can only occur as a result of |
| 158 | // the assignment to bottom in this method. If it does, this method |
| 159 | // resets the size to 0 before the next call (which is sequential, |
| 160 | // since this is pop_local.) |
| 161 | uint dirty_n_elems = dirty_size(localBot, _age.top()); |
| 162 | assert(dirty_n_elems != N - 1, "Shouldn't be possible..." ); |
| 163 | if (dirty_n_elems <= threshold) return false; |
| 164 | localBot = decrement_index(localBot); |
| 165 | _bottom = localBot; |
| 166 | // This is necessary to prevent any read below from being reordered |
| 167 | // before the store just above. |
| 168 | OrderAccess::fence(); |
| 169 | // g++ complains if the volatile result of the assignment is |
| 170 | // unused, so we cast the volatile away. We cannot cast directly |
| 171 | // to void, because gcc treats that as not using the result of the |
| 172 | // assignment. However, casting to E& means that we trigger an |
| 173 | // unused-value warning. So, we cast the E& to void. |
| 174 | (void) const_cast<E&>(t = _elems[localBot]); |
| 175 | // This is a second read of "age"; the "size()" above is the first. |
| 176 | // If there's still at least one element in the queue, based on the |
| 177 | // "_bottom" and "age" we've read, then there can be no interference with |
| 178 | // a "pop_global" operation, and we're done. |
| 179 | idx_t tp = _age.top(); // XXX |
| 180 | if (size(localBot, tp) > 0) { |
| 181 | assert(dirty_size(localBot, tp) != N - 1, "sanity" ); |
| 182 | TASKQUEUE_STATS_ONLY(stats.record_pop()); |
| 183 | return true; |
| 184 | } else { |
| 185 | // Otherwise, the queue contained exactly one element; we take the slow |
| 186 | // path. |
| 187 | return pop_local_slow(localBot, _age.get()); |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | template <class E, MEMFLAGS F, unsigned int N> |
| 192 | bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t) |
| 193 | { |
| 194 | if (overflow_empty()) return false; |
| 195 | t = overflow_stack()->pop(); |
| 196 | return true; |
| 197 | } |
| 198 | |
| 199 | template<class E, MEMFLAGS F, unsigned int N> |
| 200 | bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) { |
| 201 | Age oldAge = _age.get(); |
| 202 | // Architectures with weak memory model require a barrier here |
| 203 | // to guarantee that bottom is not older than age, |
| 204 | // which is crucial for the correctness of the algorithm. |
| 205 | #if !(defined SPARC || defined IA32 || defined AMD64) |
| 206 | OrderAccess::fence(); |
| 207 | #endif |
| 208 | uint localBot = OrderAccess::load_acquire(&_bottom); |
| 209 | uint n_elems = size(localBot, oldAge.top()); |
| 210 | if (n_elems == 0) { |
| 211 | return false; |
| 212 | } |
| 213 | |
| 214 | // g++ complains if the volatile result of the assignment is |
| 215 | // unused, so we cast the volatile away. We cannot cast directly |
| 216 | // to void, because gcc treats that as not using the result of the |
| 217 | // assignment. However, casting to E& means that we trigger an |
| 218 | // unused-value warning. So, we cast the E& to void. |
| 219 | (void) const_cast<E&>(t = _elems[oldAge.top()]); |
| 220 | Age newAge(oldAge); |
| 221 | newAge.increment(); |
| 222 | Age resAge = _age.cmpxchg(newAge, oldAge); |
| 223 | |
| 224 | // Note that using "_bottom" here might fail, since a pop_local might |
| 225 | // have decremented it. |
| 226 | assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity" ); |
| 227 | return resAge == oldAge; |
| 228 | } |
| 229 | |
| 230 | inline int randomParkAndMiller(int *seed0) { |
| 231 | const int a = 16807; |
| 232 | const int m = 2147483647; |
| 233 | const int q = 127773; /* m div a */ |
| 234 | const int r = 2836; /* m mod a */ |
| 235 | STATIC_ASSERT(sizeof(int) == 4); |
| 236 | int seed = *seed0; |
| 237 | int hi = seed / q; |
| 238 | int lo = seed % q; |
| 239 | int test = a * lo - r * hi; |
| 240 | if (test > 0) { |
| 241 | seed = test; |
| 242 | } else { |
| 243 | seed = test + m; |
| 244 | } |
| 245 | *seed0 = seed; |
| 246 | return seed; |
| 247 | } |
| 248 | |
| 249 | template<class E, MEMFLAGS F, unsigned int N> |
| 250 | int GenericTaskQueue<E, F, N>::next_random_queue_id() { |
| 251 | return randomParkAndMiller(&_seed); |
| 252 | } |
| 253 | |
| 254 | template<class T, MEMFLAGS F> bool |
| 255 | GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, E& t) { |
| 256 | if (_n > 2) { |
| 257 | T* const local_queue = _queues[queue_num]; |
| 258 | uint k1 = queue_num; |
| 259 | |
| 260 | if (local_queue->is_last_stolen_queue_id_valid()) { |
| 261 | k1 = local_queue->last_stolen_queue_id(); |
| 262 | assert(k1 != queue_num, "Should not be the same" ); |
| 263 | } else { |
| 264 | while (k1 == queue_num) { |
| 265 | k1 = local_queue->next_random_queue_id() % _n; |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | uint k2 = queue_num; |
| 270 | while (k2 == queue_num || k2 == k1) { |
| 271 | k2 = local_queue->next_random_queue_id() % _n; |
| 272 | } |
| 273 | // Sample both and try the larger. |
| 274 | uint sz1 = _queues[k1]->size(); |
| 275 | uint sz2 = _queues[k2]->size(); |
| 276 | |
| 277 | uint sel_k = 0; |
| 278 | bool suc = false; |
| 279 | |
| 280 | if (sz2 > sz1) { |
| 281 | sel_k = k2; |
| 282 | suc = _queues[k2]->pop_global(t); |
| 283 | } else if (sz1 > 0) { |
| 284 | sel_k = k1; |
| 285 | suc = _queues[k1]->pop_global(t); |
| 286 | } |
| 287 | |
| 288 | if (suc) { |
| 289 | local_queue->set_last_stolen_queue_id(sel_k); |
| 290 | } else { |
| 291 | local_queue->invalidate_last_stolen_queue_id(); |
| 292 | } |
| 293 | |
| 294 | return suc; |
| 295 | } else if (_n == 2) { |
| 296 | // Just try the other one. |
| 297 | uint k = (queue_num + 1) % 2; |
| 298 | return _queues[k]->pop_global(t); |
| 299 | } else { |
| 300 | assert(_n == 1, "can't be zero." ); |
| 301 | return false; |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | template<class T, MEMFLAGS F> bool |
| 306 | GenericTaskQueueSet<T, F>::steal(uint queue_num, E& t) { |
| 307 | for (uint i = 0; i < 2 * _n; i++) { |
| 308 | TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal_attempt()); |
| 309 | if (steal_best_of_2(queue_num, t)) { |
| 310 | TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal()); |
| 311 | return true; |
| 312 | } |
| 313 | } |
| 314 | return false; |
| 315 | } |
| 316 | |
| 317 | template <unsigned int N, MEMFLAGS F> |
| 318 | inline typename TaskQueueSuper<N, F>::Age TaskQueueSuper<N, F>::Age::cmpxchg(const Age new_age, const Age old_age) volatile { |
| 319 | return Atomic::cmpxchg(new_age._data, &_data, old_age._data); |
| 320 | } |
| 321 | |
| 322 | template<class E, MEMFLAGS F, unsigned int N> |
| 323 | template<class Fn> |
| 324 | inline void GenericTaskQueue<E, F, N>::iterate(Fn fn) { |
| 325 | uint iters = size(); |
| 326 | uint index = _bottom; |
| 327 | for (uint i = 0; i < iters; ++i) { |
| 328 | index = decrement_index(index); |
| 329 | fn(const_cast<E&>(_elems[index])); // cast away volatility |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | |
| 334 | #endif // SHARE_GC_SHARED_TASKQUEUE_INLINE_HPP |
| 335 | |