1 | /* |
2 | * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "gc/shared/ptrQueue.hpp" |
27 | #include "logging/log.hpp" |
28 | #include "memory/allocation.hpp" |
29 | #include "memory/allocation.inline.hpp" |
30 | #include "runtime/atomic.hpp" |
31 | #include "runtime/mutex.hpp" |
32 | #include "runtime/mutexLocker.hpp" |
33 | #include "runtime/orderAccess.hpp" |
34 | #include "runtime/thread.inline.hpp" |
35 | #include "utilities/globalCounter.inline.hpp" |
36 | |
37 | #include <new> |
38 | |
39 | PtrQueue::PtrQueue(PtrQueueSet* qset, bool active) : |
40 | _qset(qset), |
41 | _active(active), |
42 | _index(0), |
43 | _capacity_in_bytes(0), |
44 | _buf(NULL) |
45 | {} |
46 | |
47 | PtrQueue::~PtrQueue() { |
48 | assert(_buf == NULL, "queue must be flushed before delete" ); |
49 | } |
50 | |
51 | void PtrQueue::flush_impl() { |
52 | if (_buf != NULL) { |
53 | BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); |
54 | if (is_empty()) { |
55 | // No work to do. |
56 | qset()->deallocate_buffer(node); |
57 | } else { |
58 | qset()->enqueue_completed_buffer(node); |
59 | } |
60 | _buf = NULL; |
61 | set_index(0); |
62 | } |
63 | } |
64 | |
65 | void PtrQueue::enqueue_known_active(void* ptr) { |
66 | while (_index == 0) { |
67 | handle_zero_index(); |
68 | } |
69 | |
70 | assert(_buf != NULL, "postcondition" ); |
71 | assert(index() > 0, "postcondition" ); |
72 | assert(index() <= capacity(), "invariant" ); |
73 | _index -= _element_size; |
74 | _buf[index()] = ptr; |
75 | } |
76 | |
77 | void PtrQueue::handle_zero_index() { |
78 | assert(index() == 0, "precondition" ); |
79 | |
80 | if (_buf != NULL) { |
81 | handle_completed_buffer(); |
82 | } else { |
83 | // Bootstrapping kludge; lazily initialize capacity. The initial |
84 | // thread's queues are constructed before the second phase of the |
85 | // two-phase initialization of the associated qsets. As a result, |
86 | // we can't initialize _capacity_in_bytes in the queue constructor. |
87 | if (_capacity_in_bytes == 0) { |
88 | _capacity_in_bytes = index_to_byte_index(qset()->buffer_size()); |
89 | } |
90 | allocate_buffer(); |
91 | } |
92 | } |
93 | |
94 | void PtrQueue::allocate_buffer() { |
95 | _buf = qset()->allocate_buffer(); |
96 | reset(); |
97 | } |
98 | |
99 | void PtrQueue::enqueue_completed_buffer() { |
100 | assert(_buf != NULL, "precondition" ); |
101 | BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); |
102 | qset()->enqueue_completed_buffer(node); |
103 | allocate_buffer(); |
104 | } |
105 | |
106 | BufferNode* BufferNode::allocate(size_t size) { |
107 | size_t byte_size = size * sizeof(void*); |
108 | void* data = NEW_C_HEAP_ARRAY(char, buffer_offset() + byte_size, mtGC); |
109 | return new (data) BufferNode; |
110 | } |
111 | |
112 | void BufferNode::deallocate(BufferNode* node) { |
113 | node->~BufferNode(); |
114 | FREE_C_HEAP_ARRAY(char, node); |
115 | } |
116 | |
117 | BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) : |
118 | _buffer_size(buffer_size), |
119 | _pending_list(), |
120 | _free_list(), |
121 | _pending_count(0), |
122 | _free_count(0), |
123 | _transfer_lock(false) |
124 | { |
125 | strncpy(_name, name, sizeof(_name) - 1); |
126 | _name[sizeof(_name) - 1] = '\0'; |
127 | } |
128 | |
129 | BufferNode::Allocator::~Allocator() { |
130 | delete_list(_free_list.pop_all()); |
131 | delete_list(_pending_list.pop_all()); |
132 | } |
133 | |
134 | void BufferNode::Allocator::delete_list(BufferNode* list) { |
135 | while (list != NULL) { |
136 | BufferNode* next = list->next(); |
137 | DEBUG_ONLY(list->set_next(NULL);) |
138 | BufferNode::deallocate(list); |
139 | list = next; |
140 | } |
141 | } |
142 | |
143 | size_t BufferNode::Allocator::free_count() const { |
144 | return Atomic::load(&_free_count); |
145 | } |
146 | |
147 | BufferNode* BufferNode::Allocator::allocate() { |
148 | BufferNode* node; |
149 | { |
150 | // Protect against ABA; see release(). |
151 | GlobalCounter::CriticalSection cs(Thread::current()); |
152 | node = _free_list.pop(); |
153 | } |
154 | if (node == NULL) { |
155 | node = BufferNode::allocate(_buffer_size); |
156 | } else { |
157 | // Decrement count after getting buffer from free list. This, along |
158 | // with incrementing count before adding to free list, ensures count |
159 | // never underflows. |
160 | size_t count = Atomic::sub(1u, &_free_count); |
161 | assert((count + 1) != 0, "_free_count underflow" ); |
162 | } |
163 | return node; |
164 | } |
165 | |
166 | // To solve the ABA problem for lock-free stack pop, allocate does the |
167 | // pop inside a critical section, and release synchronizes on the |
168 | // critical sections before adding to the _free_list. But we don't |
169 | // want to make every release have to do a synchronize. Instead, we |
170 | // initially place released nodes on the _pending_list, and transfer |
171 | // them to the _free_list in batches. Only one transfer at a time is |
172 | // permitted, with a lock bit to control access to that phase. A |
173 | // transfer takes all the nodes from the _pending_list, synchronizes on |
174 | // the _free_list pops, and then adds the former pending nodes to the |
175 | // _free_list. While that's happening, other threads might be adding |
176 | // other nodes to the _pending_list, to be dealt with by some later |
177 | // transfer. |
178 | void BufferNode::Allocator::release(BufferNode* node) { |
179 | assert(node != NULL, "precondition" ); |
180 | assert(node->next() == NULL, "precondition" ); |
181 | |
182 | // Desired minimum transfer batch size. There is relatively little |
183 | // importance to the specific number. It shouldn't be too big, else |
184 | // we're wasting space when the release rate is low. If the release |
185 | // rate is high, we might accumulate more than this before being |
186 | // able to start a new transfer, but that's okay. Also note that |
187 | // the allocation rate and the release rate are going to be fairly |
188 | // similar, due to how the buffers are used. |
189 | const size_t trigger_transfer = 10; |
190 | |
191 | // Add to pending list. Update count first so no underflow in transfer. |
192 | size_t pending_count = Atomic::add(1u, &_pending_count); |
193 | _pending_list.push(*node); |
194 | if (pending_count > trigger_transfer) { |
195 | try_transfer_pending(); |
196 | } |
197 | } |
198 | |
199 | // Try to transfer nodes from _pending_list to _free_list, with a |
200 | // synchronization delay for any in-progress pops from the _free_list, |
201 | // to solve ABA there. Return true if performed a (possibly empty) |
202 | // transfer, false if blocked from doing so by some other thread's |
203 | // in-progress transfer. |
204 | bool BufferNode::Allocator::try_transfer_pending() { |
205 | // Attempt to claim the lock. |
206 | if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail. |
207 | Atomic::cmpxchg(true, &_transfer_lock, false)) { |
208 | return false; |
209 | } |
210 | // Have the lock; perform the transfer. |
211 | |
212 | // Claim all the pending nodes. |
213 | BufferNode* first = _pending_list.pop_all(); |
214 | if (first != NULL) { |
215 | // Prepare to add the claimed nodes, and update _pending_count. |
216 | BufferNode* last = first; |
217 | size_t count = 1; |
218 | for (BufferNode* next = first->next(); next != NULL; next = next->next()) { |
219 | last = next; |
220 | ++count; |
221 | } |
222 | Atomic::sub(count, &_pending_count); |
223 | |
224 | // Wait for any in-progress pops, to avoid ABA for them. |
225 | GlobalCounter::write_synchronize(); |
226 | |
227 | // Add synchronized nodes to _free_list. |
228 | // Update count first so no underflow in allocate(). |
229 | Atomic::add(count, &_free_count); |
230 | _free_list.prepend(*first, *last); |
231 | log_trace(gc, ptrqueue, freelist) |
232 | ("Transferred %s pending to free: " SIZE_FORMAT, name(), count); |
233 | } |
234 | OrderAccess::release_store(&_transfer_lock, false); |
235 | return true; |
236 | } |
237 | |
238 | size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) { |
239 | try_transfer_pending(); |
240 | size_t removed = 0; |
241 | for ( ; removed < remove_goal; ++removed) { |
242 | BufferNode* node = _free_list.pop(); |
243 | if (node == NULL) break; |
244 | BufferNode::deallocate(node); |
245 | } |
246 | size_t new_count = Atomic::sub(removed, &_free_count); |
247 | log_debug(gc, ptrqueue, freelist) |
248 | ("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT, |
249 | name(), removed, new_count); |
250 | return removed; |
251 | } |
252 | |
253 | PtrQueueSet::PtrQueueSet(bool notify_when_complete) : |
254 | _allocator(NULL), |
255 | _cbl_mon(NULL), |
256 | _completed_buffers_head(NULL), |
257 | _completed_buffers_tail(NULL), |
258 | _n_completed_buffers(0), |
259 | _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever), |
260 | _process_completed_buffers(false), |
261 | _notify_when_complete(notify_when_complete), |
262 | _all_active(false) |
263 | {} |
264 | |
265 | PtrQueueSet::~PtrQueueSet() { |
266 | // There are presently only a couple (derived) instances ever |
267 | // created, and they are permanent, so no harm currently done by |
268 | // doing nothing here. |
269 | } |
270 | |
271 | void PtrQueueSet::initialize(Monitor* cbl_mon, |
272 | BufferNode::Allocator* allocator) { |
273 | assert(cbl_mon != NULL && allocator != NULL, "Init order issue?" ); |
274 | _cbl_mon = cbl_mon; |
275 | _allocator = allocator; |
276 | } |
277 | |
278 | void** PtrQueueSet::allocate_buffer() { |
279 | BufferNode* node = _allocator->allocate(); |
280 | return BufferNode::make_buffer_from_node(node); |
281 | } |
282 | |
283 | void PtrQueueSet::deallocate_buffer(BufferNode* node) { |
284 | _allocator->release(node); |
285 | } |
286 | |
287 | void PtrQueueSet::enqueue_completed_buffer(BufferNode* cbn) { |
288 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
289 | cbn->set_next(NULL); |
290 | if (_completed_buffers_tail == NULL) { |
291 | assert(_completed_buffers_head == NULL, "Well-formedness" ); |
292 | _completed_buffers_head = cbn; |
293 | _completed_buffers_tail = cbn; |
294 | } else { |
295 | _completed_buffers_tail->set_next(cbn); |
296 | _completed_buffers_tail = cbn; |
297 | } |
298 | _n_completed_buffers++; |
299 | |
300 | if (!_process_completed_buffers && |
301 | (_n_completed_buffers > _process_completed_buffers_threshold)) { |
302 | _process_completed_buffers = true; |
303 | if (_notify_when_complete) { |
304 | _cbl_mon->notify(); |
305 | } |
306 | } |
307 | assert_completed_buffers_list_len_correct_locked(); |
308 | } |
309 | |
310 | BufferNode* PtrQueueSet::get_completed_buffer(size_t stop_at) { |
311 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
312 | |
313 | if (_n_completed_buffers <= stop_at) { |
314 | return NULL; |
315 | } |
316 | |
317 | assert(_n_completed_buffers > 0, "invariant" ); |
318 | assert(_completed_buffers_head != NULL, "invariant" ); |
319 | assert(_completed_buffers_tail != NULL, "invariant" ); |
320 | |
321 | BufferNode* bn = _completed_buffers_head; |
322 | _n_completed_buffers--; |
323 | _completed_buffers_head = bn->next(); |
324 | if (_completed_buffers_head == NULL) { |
325 | assert(_n_completed_buffers == 0, "invariant" ); |
326 | _completed_buffers_tail = NULL; |
327 | _process_completed_buffers = false; |
328 | } |
329 | assert_completed_buffers_list_len_correct_locked(); |
330 | bn->set_next(NULL); |
331 | return bn; |
332 | } |
333 | |
334 | void PtrQueueSet::abandon_completed_buffers() { |
335 | BufferNode* buffers_to_delete = NULL; |
336 | { |
337 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
338 | buffers_to_delete = _completed_buffers_head; |
339 | _completed_buffers_head = NULL; |
340 | _completed_buffers_tail = NULL; |
341 | _n_completed_buffers = 0; |
342 | _process_completed_buffers = false; |
343 | } |
344 | while (buffers_to_delete != NULL) { |
345 | BufferNode* bn = buffers_to_delete; |
346 | buffers_to_delete = bn->next(); |
347 | bn->set_next(NULL); |
348 | deallocate_buffer(bn); |
349 | } |
350 | } |
351 | |
352 | #ifdef ASSERT |
353 | |
354 | void PtrQueueSet::assert_completed_buffers_list_len_correct_locked() { |
355 | assert_lock_strong(_cbl_mon); |
356 | size_t n = 0; |
357 | for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) { |
358 | ++n; |
359 | } |
360 | assert(n == _n_completed_buffers, |
361 | "Completed buffer length is wrong: counted: " SIZE_FORMAT |
362 | ", expected: " SIZE_FORMAT, n, _n_completed_buffers); |
363 | } |
364 | |
365 | #endif // ASSERT |
366 | |
367 | // Merge lists of buffers. Notify the processing threads. |
368 | // The source queue is emptied as a result. The queues |
369 | // must share the monitor. |
370 | void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { |
371 | assert(_cbl_mon == src->_cbl_mon, "Should share the same lock" ); |
372 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
373 | if (_completed_buffers_tail == NULL) { |
374 | assert(_completed_buffers_head == NULL, "Well-formedness" ); |
375 | _completed_buffers_head = src->_completed_buffers_head; |
376 | _completed_buffers_tail = src->_completed_buffers_tail; |
377 | } else { |
378 | assert(_completed_buffers_head != NULL, "Well formedness" ); |
379 | if (src->_completed_buffers_head != NULL) { |
380 | _completed_buffers_tail->set_next(src->_completed_buffers_head); |
381 | _completed_buffers_tail = src->_completed_buffers_tail; |
382 | } |
383 | } |
384 | _n_completed_buffers += src->_n_completed_buffers; |
385 | |
386 | src->_n_completed_buffers = 0; |
387 | src->_completed_buffers_head = NULL; |
388 | src->_completed_buffers_tail = NULL; |
389 | src->_process_completed_buffers = false; |
390 | |
391 | assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || |
392 | _completed_buffers_head != NULL && _completed_buffers_tail != NULL, |
393 | "Sanity" ); |
394 | assert_completed_buffers_list_len_correct_locked(); |
395 | } |
396 | |
397 | void PtrQueueSet::notify_if_necessary() { |
398 | MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); |
399 | if (_n_completed_buffers > _process_completed_buffers_threshold) { |
400 | _process_completed_buffers = true; |
401 | if (_notify_when_complete) |
402 | _cbl_mon->notify(); |
403 | } |
404 | } |
405 | |