| 1 | /* |
| 2 | * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #include "precompiled.hpp" |
| 26 | #include "gc/shared/ptrQueue.hpp" |
| 27 | #include "logging/log.hpp" |
| 28 | #include "memory/allocation.hpp" |
| 29 | #include "memory/allocation.inline.hpp" |
| 30 | #include "runtime/atomic.hpp" |
| 31 | #include "runtime/mutex.hpp" |
| 32 | #include "runtime/mutexLocker.hpp" |
| 33 | #include "runtime/orderAccess.hpp" |
| 34 | #include "runtime/thread.inline.hpp" |
| 35 | #include "utilities/globalCounter.inline.hpp" |
| 36 | |
| 37 | #include <new> |
| 38 | |
| 39 | PtrQueue::PtrQueue(PtrQueueSet* qset, bool active) : |
| 40 | _qset(qset), |
| 41 | _active(active), |
| 42 | _index(0), |
| 43 | _capacity_in_bytes(0), |
| 44 | _buf(NULL) |
| 45 | {} |
| 46 | |
| 47 | PtrQueue::~PtrQueue() { |
| 48 | assert(_buf == NULL, "queue must be flushed before delete" ); |
| 49 | } |
| 50 | |
| 51 | void PtrQueue::flush_impl() { |
| 52 | if (_buf != NULL) { |
| 53 | BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); |
| 54 | if (is_empty()) { |
| 55 | // No work to do. |
| 56 | qset()->deallocate_buffer(node); |
| 57 | } else { |
| 58 | qset()->enqueue_completed_buffer(node); |
| 59 | } |
| 60 | _buf = NULL; |
| 61 | set_index(0); |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | void PtrQueue::enqueue_known_active(void* ptr) { |
| 66 | while (_index == 0) { |
| 67 | handle_zero_index(); |
| 68 | } |
| 69 | |
| 70 | assert(_buf != NULL, "postcondition" ); |
| 71 | assert(index() > 0, "postcondition" ); |
| 72 | assert(index() <= capacity(), "invariant" ); |
| 73 | _index -= _element_size; |
| 74 | _buf[index()] = ptr; |
| 75 | } |
| 76 | |
| 77 | void PtrQueue::handle_zero_index() { |
| 78 | assert(index() == 0, "precondition" ); |
| 79 | |
| 80 | if (_buf != NULL) { |
| 81 | handle_completed_buffer(); |
| 82 | } else { |
| 83 | // Bootstrapping kludge; lazily initialize capacity. The initial |
| 84 | // thread's queues are constructed before the second phase of the |
| 85 | // two-phase initialization of the associated qsets. As a result, |
| 86 | // we can't initialize _capacity_in_bytes in the queue constructor. |
| 87 | if (_capacity_in_bytes == 0) { |
| 88 | _capacity_in_bytes = index_to_byte_index(qset()->buffer_size()); |
| 89 | } |
| 90 | allocate_buffer(); |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | void PtrQueue::allocate_buffer() { |
| 95 | _buf = qset()->allocate_buffer(); |
| 96 | reset(); |
| 97 | } |
| 98 | |
| 99 | void PtrQueue::enqueue_completed_buffer() { |
| 100 | assert(_buf != NULL, "precondition" ); |
| 101 | BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); |
| 102 | qset()->enqueue_completed_buffer(node); |
| 103 | allocate_buffer(); |
| 104 | } |
| 105 | |
| 106 | BufferNode* BufferNode::allocate(size_t size) { |
| 107 | size_t byte_size = size * sizeof(void*); |
| 108 | void* data = NEW_C_HEAP_ARRAY(char, buffer_offset() + byte_size, mtGC); |
| 109 | return new (data) BufferNode; |
| 110 | } |
| 111 | |
| 112 | void BufferNode::deallocate(BufferNode* node) { |
| 113 | node->~BufferNode(); |
| 114 | FREE_C_HEAP_ARRAY(char, node); |
| 115 | } |
| 116 | |
| 117 | BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) : |
| 118 | _buffer_size(buffer_size), |
| 119 | _pending_list(), |
| 120 | _free_list(), |
| 121 | _pending_count(0), |
| 122 | _free_count(0), |
| 123 | _transfer_lock(false) |
| 124 | { |
| 125 | strncpy(_name, name, sizeof(_name) - 1); |
| 126 | _name[sizeof(_name) - 1] = '\0'; |
| 127 | } |
| 128 | |
| 129 | BufferNode::Allocator::~Allocator() { |
| 130 | delete_list(_free_list.pop_all()); |
| 131 | delete_list(_pending_list.pop_all()); |
| 132 | } |
| 133 | |
| 134 | void BufferNode::Allocator::delete_list(BufferNode* list) { |
| 135 | while (list != NULL) { |
| 136 | BufferNode* next = list->next(); |
| 137 | DEBUG_ONLY(list->set_next(NULL);) |
| 138 | BufferNode::deallocate(list); |
| 139 | list = next; |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | size_t BufferNode::Allocator::free_count() const { |
| 144 | return Atomic::load(&_free_count); |
| 145 | } |
| 146 | |
| 147 | BufferNode* BufferNode::Allocator::allocate() { |
| 148 | BufferNode* node; |
| 149 | { |
| 150 | // Protect against ABA; see release(). |
| 151 | GlobalCounter::CriticalSection cs(Thread::current()); |
| 152 | node = _free_list.pop(); |
| 153 | } |
| 154 | if (node == NULL) { |
| 155 | node = BufferNode::allocate(_buffer_size); |
| 156 | } else { |
| 157 | // Decrement count after getting buffer from free list. This, along |
| 158 | // with incrementing count before adding to free list, ensures count |
| 159 | // never underflows. |
| 160 | size_t count = Atomic::sub(1u, &_free_count); |
| 161 | assert((count + 1) != 0, "_free_count underflow" ); |
| 162 | } |
| 163 | return node; |
| 164 | } |
| 165 | |
| 166 | // To solve the ABA problem for lock-free stack pop, allocate does the |
| 167 | // pop inside a critical section, and release synchronizes on the |
| 168 | // critical sections before adding to the _free_list. But we don't |
| 169 | // want to make every release have to do a synchronize. Instead, we |
| 170 | // initially place released nodes on the _pending_list, and transfer |
| 171 | // them to the _free_list in batches. Only one transfer at a time is |
| 172 | // permitted, with a lock bit to control access to that phase. A |
| 173 | // transfer takes all the nodes from the _pending_list, synchronizes on |
| 174 | // the _free_list pops, and then adds the former pending nodes to the |
| 175 | // _free_list. While that's happening, other threads might be adding |
| 176 | // other nodes to the _pending_list, to be dealt with by some later |
| 177 | // transfer. |
| 178 | void BufferNode::Allocator::release(BufferNode* node) { |
| 179 | assert(node != NULL, "precondition" ); |
| 180 | assert(node->next() == NULL, "precondition" ); |
| 181 | |
| 182 | // Desired minimum transfer batch size. There is relatively little |
| 183 | // importance to the specific number. It shouldn't be too big, else |
| 184 | // we're wasting space when the release rate is low. If the release |
| 185 | // rate is high, we might accumulate more than this before being |
| 186 | // able to start a new transfer, but that's okay. Also note that |
| 187 | // the allocation rate and the release rate are going to be fairly |
| 188 | // similar, due to how the buffers are used. |
| 189 | const size_t trigger_transfer = 10; |
| 190 | |
| 191 | // Add to pending list. Update count first so no underflow in transfer. |
| 192 | size_t pending_count = Atomic::add(1u, &_pending_count); |
| 193 | _pending_list.push(*node); |
| 194 | if (pending_count > trigger_transfer) { |
| 195 | try_transfer_pending(); |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | // Try to transfer nodes from _pending_list to _free_list, with a |
| 200 | // synchronization delay for any in-progress pops from the _free_list, |
| 201 | // to solve ABA there. Return true if performed a (possibly empty) |
| 202 | // transfer, false if blocked from doing so by some other thread's |
| 203 | // in-progress transfer. |
| 204 | bool BufferNode::Allocator::try_transfer_pending() { |
| 205 | // Attempt to claim the lock. |
| 206 | if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail. |
| 207 | Atomic::cmpxchg(true, &_transfer_lock, false)) { |
| 208 | return false; |
| 209 | } |
| 210 | // Have the lock; perform the transfer. |
| 211 | |
| 212 | // Claim all the pending nodes. |
| 213 | BufferNode* first = _pending_list.pop_all(); |
| 214 | if (first != NULL) { |
| 215 | // Prepare to add the claimed nodes, and update _pending_count. |
| 216 | BufferNode* last = first; |
| 217 | size_t count = 1; |
| 218 | for (BufferNode* next = first->next(); next != NULL; next = next->next()) { |
| 219 | last = next; |
| 220 | ++count; |
| 221 | } |
| 222 | Atomic::sub(count, &_pending_count); |
| 223 | |
| 224 | // Wait for any in-progress pops, to avoid ABA for them. |
| 225 | GlobalCounter::write_synchronize(); |
| 226 | |
| 227 | // Add synchronized nodes to _free_list. |
| 228 | // Update count first so no underflow in allocate(). |
| 229 | Atomic::add(count, &_free_count); |
| 230 | _free_list.prepend(*first, *last); |
| 231 | log_trace(gc, ptrqueue, freelist) |
| 232 | ("Transferred %s pending to free: " SIZE_FORMAT, name(), count); |
| 233 | } |
| 234 | OrderAccess::release_store(&_transfer_lock, false); |
| 235 | return true; |
| 236 | } |
| 237 | |
| 238 | size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) { |
| 239 | try_transfer_pending(); |
| 240 | size_t removed = 0; |
| 241 | for ( ; removed < remove_goal; ++removed) { |
| 242 | BufferNode* node = _free_list.pop(); |
| 243 | if (node == NULL) break; |
| 244 | BufferNode::deallocate(node); |
| 245 | } |
| 246 | size_t new_count = Atomic::sub(removed, &_free_count); |
| 247 | log_debug(gc, ptrqueue, freelist) |
| 248 | ("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT, |
| 249 | name(), removed, new_count); |
| 250 | return removed; |
| 251 | } |
| 252 | |
| 253 | PtrQueueSet::PtrQueueSet(bool notify_when_complete) : |
| 254 | _allocator(NULL), |
| 255 | _cbl_mon(NULL), |
| 256 | _completed_buffers_head(NULL), |
| 257 | _completed_buffers_tail(NULL), |
| 258 | _n_completed_buffers(0), |
| 259 | _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever), |
| 260 | _process_completed_buffers(false), |
| 261 | _notify_when_complete(notify_when_complete), |
| 262 | _all_active(false) |
| 263 | {} |
| 264 | |
| 265 | PtrQueueSet::~PtrQueueSet() { |
| 266 | // There are presently only a couple (derived) instances ever |
| 267 | // created, and they are permanent, so no harm currently done by |
| 268 | // doing nothing here. |
| 269 | } |
| 270 | |
| 271 | void PtrQueueSet::initialize(Monitor* cbl_mon, |
| 272 | BufferNode::Allocator* allocator) { |
| 273 | assert(cbl_mon != NULL && allocator != NULL, "Init order issue?" ); |
| 274 | _cbl_mon = cbl_mon; |
| 275 | _allocator = allocator; |
| 276 | } |
| 277 | |
| 278 | void** PtrQueueSet::allocate_buffer() { |
| 279 | BufferNode* node = _allocator->allocate(); |
| 280 | return BufferNode::make_buffer_from_node(node); |
| 281 | } |
| 282 | |
| 283 | void PtrQueueSet::deallocate_buffer(BufferNode* node) { |
| 284 | _allocator->release(node); |
| 285 | } |
| 286 | |
| 287 | void PtrQueueSet::enqueue_completed_buffer(BufferNode* cbn) { |
| 288 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
| 289 | cbn->set_next(NULL); |
| 290 | if (_completed_buffers_tail == NULL) { |
| 291 | assert(_completed_buffers_head == NULL, "Well-formedness" ); |
| 292 | _completed_buffers_head = cbn; |
| 293 | _completed_buffers_tail = cbn; |
| 294 | } else { |
| 295 | _completed_buffers_tail->set_next(cbn); |
| 296 | _completed_buffers_tail = cbn; |
| 297 | } |
| 298 | _n_completed_buffers++; |
| 299 | |
| 300 | if (!_process_completed_buffers && |
| 301 | (_n_completed_buffers > _process_completed_buffers_threshold)) { |
| 302 | _process_completed_buffers = true; |
| 303 | if (_notify_when_complete) { |
| 304 | _cbl_mon->notify(); |
| 305 | } |
| 306 | } |
| 307 | assert_completed_buffers_list_len_correct_locked(); |
| 308 | } |
| 309 | |
| 310 | BufferNode* PtrQueueSet::get_completed_buffer(size_t stop_at) { |
| 311 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
| 312 | |
| 313 | if (_n_completed_buffers <= stop_at) { |
| 314 | return NULL; |
| 315 | } |
| 316 | |
| 317 | assert(_n_completed_buffers > 0, "invariant" ); |
| 318 | assert(_completed_buffers_head != NULL, "invariant" ); |
| 319 | assert(_completed_buffers_tail != NULL, "invariant" ); |
| 320 | |
| 321 | BufferNode* bn = _completed_buffers_head; |
| 322 | _n_completed_buffers--; |
| 323 | _completed_buffers_head = bn->next(); |
| 324 | if (_completed_buffers_head == NULL) { |
| 325 | assert(_n_completed_buffers == 0, "invariant" ); |
| 326 | _completed_buffers_tail = NULL; |
| 327 | _process_completed_buffers = false; |
| 328 | } |
| 329 | assert_completed_buffers_list_len_correct_locked(); |
| 330 | bn->set_next(NULL); |
| 331 | return bn; |
| 332 | } |
| 333 | |
| 334 | void PtrQueueSet::abandon_completed_buffers() { |
| 335 | BufferNode* buffers_to_delete = NULL; |
| 336 | { |
| 337 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
| 338 | buffers_to_delete = _completed_buffers_head; |
| 339 | _completed_buffers_head = NULL; |
| 340 | _completed_buffers_tail = NULL; |
| 341 | _n_completed_buffers = 0; |
| 342 | _process_completed_buffers = false; |
| 343 | } |
| 344 | while (buffers_to_delete != NULL) { |
| 345 | BufferNode* bn = buffers_to_delete; |
| 346 | buffers_to_delete = bn->next(); |
| 347 | bn->set_next(NULL); |
| 348 | deallocate_buffer(bn); |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | #ifdef ASSERT |
| 353 | |
| 354 | void PtrQueueSet::assert_completed_buffers_list_len_correct_locked() { |
| 355 | assert_lock_strong(_cbl_mon); |
| 356 | size_t n = 0; |
| 357 | for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) { |
| 358 | ++n; |
| 359 | } |
| 360 | assert(n == _n_completed_buffers, |
| 361 | "Completed buffer length is wrong: counted: " SIZE_FORMAT |
| 362 | ", expected: " SIZE_FORMAT, n, _n_completed_buffers); |
| 363 | } |
| 364 | |
| 365 | #endif // ASSERT |
| 366 | |
| 367 | // Merge lists of buffers. Notify the processing threads. |
| 368 | // The source queue is emptied as a result. The queues |
| 369 | // must share the monitor. |
| 370 | void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { |
| 371 | assert(_cbl_mon == src->_cbl_mon, "Should share the same lock" ); |
| 372 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
| 373 | if (_completed_buffers_tail == NULL) { |
| 374 | assert(_completed_buffers_head == NULL, "Well-formedness" ); |
| 375 | _completed_buffers_head = src->_completed_buffers_head; |
| 376 | _completed_buffers_tail = src->_completed_buffers_tail; |
| 377 | } else { |
| 378 | assert(_completed_buffers_head != NULL, "Well formedness" ); |
| 379 | if (src->_completed_buffers_head != NULL) { |
| 380 | _completed_buffers_tail->set_next(src->_completed_buffers_head); |
| 381 | _completed_buffers_tail = src->_completed_buffers_tail; |
| 382 | } |
| 383 | } |
| 384 | _n_completed_buffers += src->_n_completed_buffers; |
| 385 | |
| 386 | src->_n_completed_buffers = 0; |
| 387 | src->_completed_buffers_head = NULL; |
| 388 | src->_completed_buffers_tail = NULL; |
| 389 | src->_process_completed_buffers = false; |
| 390 | |
| 391 | assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || |
| 392 | _completed_buffers_head != NULL && _completed_buffers_tail != NULL, |
| 393 | "Sanity" ); |
| 394 | assert_completed_buffers_list_len_correct_locked(); |
| 395 | } |
| 396 | |
| 397 | void PtrQueueSet::notify_if_necessary() { |
| 398 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
| 399 | if (_n_completed_buffers > _process_completed_buffers_threshold) { |
| 400 | _process_completed_buffers = true; |
| 401 | if (_notify_when_complete) |
| 402 | _cbl_mon->notify(); |
| 403 | } |
| 404 | } |
| 405 | |