1 | /* |
2 | * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "gc/shared/collectedHeap.hpp" |
27 | #include "gc/shared/threadLocalAllocBuffer.inline.hpp" |
28 | #include "logging/log.hpp" |
29 | #include "memory/resourceArea.hpp" |
30 | #include "memory/universe.hpp" |
31 | #include "oops/oop.inline.hpp" |
32 | #include "runtime/thread.inline.hpp" |
33 | #include "runtime/threadSMR.hpp" |
34 | #include "utilities/copy.hpp" |
35 | |
36 | size_t ThreadLocalAllocBuffer::_max_size = 0; |
37 | int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0; |
38 | unsigned int ThreadLocalAllocBuffer::_target_refills = 0; |
39 | |
40 | size_t ThreadLocalAllocBuffer::remaining() { |
41 | if (end() == NULL) { |
42 | return 0; |
43 | } |
44 | |
45 | return pointer_delta(hard_end(), top()); |
46 | } |
47 | |
48 | void ThreadLocalAllocBuffer::accumulate_and_reset_statistics(ThreadLocalAllocStats* stats) { |
49 | Thread* thr = thread(); |
50 | size_t capacity = Universe::heap()->tlab_capacity(thr); |
51 | size_t used = Universe::heap()->tlab_used(thr); |
52 | |
53 | _gc_waste += (unsigned)remaining(); |
54 | size_t total_allocated = thr->allocated_bytes(); |
55 | size_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc; |
56 | _allocated_before_last_gc = total_allocated; |
57 | |
58 | print_stats("gc" ); |
59 | |
60 | if (_number_of_refills > 0) { |
61 | // Update allocation history if a reasonable amount of eden was allocated. |
62 | bool update_allocation_history = used > 0.5 * capacity; |
63 | |
64 | if (update_allocation_history) { |
65 | // Average the fraction of eden allocated in a tlab by this |
66 | // thread for use in the next resize operation. |
67 | // _gc_waste is not subtracted because it's included in |
68 | // "used". |
69 | // The result can be larger than 1.0 due to direct to old allocations. |
70 | // These allocations should ideally not be counted but since it is not possible |
71 | // to filter them out here we just cap the fraction to be at most 1.0. |
72 | double alloc_frac = MIN2(1.0, (double) allocated_since_last_gc / used); |
73 | _allocation_fraction.sample(alloc_frac); |
74 | } |
75 | |
76 | stats->update_fast_allocations(_number_of_refills, |
77 | _allocated_size, |
78 | _gc_waste, |
79 | _fast_refill_waste, |
80 | _slow_refill_waste); |
81 | } else { |
82 | assert(_number_of_refills == 0 && _fast_refill_waste == 0 && |
83 | _slow_refill_waste == 0 && _gc_waste == 0, |
84 | "tlab stats == 0" ); |
85 | } |
86 | |
87 | stats->update_slow_allocations(_slow_allocations); |
88 | |
89 | reset_statistics(); |
90 | } |
91 | |
92 | void ThreadLocalAllocBuffer::insert_filler() { |
93 | assert(end() != NULL, "Must not be retired" ); |
94 | if (top() < hard_end()) { |
95 | Universe::heap()->fill_with_dummy_object(top(), hard_end(), true); |
96 | } |
97 | } |
98 | |
99 | void ThreadLocalAllocBuffer::make_parsable() { |
100 | if (end() != NULL) { |
101 | invariants(); |
102 | if (ZeroTLAB) { |
103 | retire(); |
104 | } else { |
105 | insert_filler(); |
106 | } |
107 | } |
108 | } |
109 | |
110 | void ThreadLocalAllocBuffer::retire(ThreadLocalAllocStats* stats) { |
111 | if (stats != NULL) { |
112 | accumulate_and_reset_statistics(stats); |
113 | } |
114 | |
115 | if (end() != NULL) { |
116 | invariants(); |
117 | thread()->incr_allocated_bytes(used_bytes()); |
118 | insert_filler(); |
119 | initialize(NULL, NULL, NULL); |
120 | } |
121 | } |
122 | |
123 | void ThreadLocalAllocBuffer::retire_before_allocation() { |
124 | _slow_refill_waste += (unsigned int)remaining(); |
125 | retire(); |
126 | } |
127 | |
128 | void ThreadLocalAllocBuffer::resize() { |
129 | // Compute the next tlab size using expected allocation amount |
130 | assert(ResizeTLAB, "Should not call this otherwise" ); |
131 | size_t alloc = (size_t)(_allocation_fraction.average() * |
132 | (Universe::heap()->tlab_capacity(thread()) / HeapWordSize)); |
133 | size_t new_size = alloc / _target_refills; |
134 | |
135 | new_size = MIN2(MAX2(new_size, min_size()), max_size()); |
136 | |
137 | size_t aligned_new_size = align_object_size(new_size); |
138 | |
139 | log_trace(gc, tlab)("TLAB new size: thread: " INTPTR_FORMAT " [id: %2d]" |
140 | " refills %d alloc: %8.6f desired_size: " SIZE_FORMAT " -> " SIZE_FORMAT, |
141 | p2i(thread()), thread()->osthread()->thread_id(), |
142 | _target_refills, _allocation_fraction.average(), desired_size(), aligned_new_size); |
143 | |
144 | set_desired_size(aligned_new_size); |
145 | set_refill_waste_limit(initial_refill_waste_limit()); |
146 | } |
147 | |
148 | void ThreadLocalAllocBuffer::reset_statistics() { |
149 | _number_of_refills = 0; |
150 | _fast_refill_waste = 0; |
151 | _slow_refill_waste = 0; |
152 | _gc_waste = 0; |
153 | _slow_allocations = 0; |
154 | _allocated_size = 0; |
155 | } |
156 | |
157 | void ThreadLocalAllocBuffer::fill(HeapWord* start, |
158 | HeapWord* top, |
159 | size_t new_size) { |
160 | _number_of_refills++; |
161 | _allocated_size += new_size; |
162 | print_stats("fill" ); |
163 | assert(top <= start + new_size - alignment_reserve(), "size too small" ); |
164 | |
165 | initialize(start, top, start + new_size - alignment_reserve()); |
166 | |
167 | // Reset amount of internal fragmentation |
168 | set_refill_waste_limit(initial_refill_waste_limit()); |
169 | } |
170 | |
171 | void ThreadLocalAllocBuffer::initialize(HeapWord* start, |
172 | HeapWord* top, |
173 | HeapWord* end) { |
174 | set_start(start); |
175 | set_top(top); |
176 | set_pf_top(top); |
177 | set_end(end); |
178 | set_allocation_end(end); |
179 | invariants(); |
180 | } |
181 | |
182 | void ThreadLocalAllocBuffer::initialize() { |
183 | initialize(NULL, // start |
184 | NULL, // top |
185 | NULL); // end |
186 | |
187 | set_desired_size(initial_desired_size()); |
188 | |
189 | size_t capacity = Universe::heap()->tlab_capacity(thread()) / HeapWordSize; |
190 | double alloc_frac = desired_size() * target_refills() / (double) capacity; |
191 | _allocation_fraction.sample(alloc_frac); |
192 | |
193 | set_refill_waste_limit(initial_refill_waste_limit()); |
194 | |
195 | reset_statistics(); |
196 | } |
197 | |
198 | void ThreadLocalAllocBuffer::startup_initialization() { |
199 | ThreadLocalAllocStats::initialize(); |
200 | |
201 | // Assuming each thread's active tlab is, on average, |
202 | // 1/2 full at a GC |
203 | _target_refills = 100 / (2 * TLABWasteTargetPercent); |
204 | // We need to set initial target refills to 2 to avoid a GC which causes VM |
205 | // abort during VM initialization. |
206 | _target_refills = MAX2(_target_refills, 2U); |
207 | |
208 | #ifdef COMPILER2 |
209 | // If the C2 compiler is present, extra space is needed at the end of |
210 | // TLABs, otherwise prefetching instructions generated by the C2 |
211 | // compiler will fault (due to accessing memory outside of heap). |
212 | // The amount of space is the max of the number of lines to |
213 | // prefetch for array and for instance allocations. (Extra space must be |
214 | // reserved to accommodate both types of allocations.) |
215 | // |
216 | // Only SPARC-specific BIS instructions are known to fault. (Those |
217 | // instructions are generated if AllocatePrefetchStyle==3 and |
218 | // AllocatePrefetchInstr==1). To be on the safe side, however, |
219 | // extra space is reserved for all combinations of |
220 | // AllocatePrefetchStyle and AllocatePrefetchInstr. |
221 | // |
222 | // If the C2 compiler is not present, no space is reserved. |
223 | |
224 | // +1 for rounding up to next cache line, +1 to be safe |
225 | if (is_server_compilation_mode_vm()) { |
226 | int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2; |
227 | _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) / |
228 | (int)HeapWordSize; |
229 | } |
230 | #endif |
231 | |
232 | // During jvm startup, the main thread is initialized |
233 | // before the heap is initialized. So reinitialize it now. |
234 | guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread" ); |
235 | Thread::current()->tlab().initialize(); |
236 | |
237 | log_develop_trace(gc, tlab)("TLAB min: " SIZE_FORMAT " initial: " SIZE_FORMAT " max: " SIZE_FORMAT, |
238 | min_size(), Thread::current()->tlab().initial_desired_size(), max_size()); |
239 | } |
240 | |
241 | size_t ThreadLocalAllocBuffer::initial_desired_size() { |
242 | size_t init_sz = 0; |
243 | |
244 | if (TLABSize > 0) { |
245 | init_sz = TLABSize / HeapWordSize; |
246 | } else { |
247 | // Initial size is a function of the average number of allocating threads. |
248 | unsigned int nof_threads = ThreadLocalAllocStats::allocating_threads_avg(); |
249 | |
250 | init_sz = (Universe::heap()->tlab_capacity(thread()) / HeapWordSize) / |
251 | (nof_threads * target_refills()); |
252 | init_sz = align_object_size(init_sz); |
253 | } |
254 | init_sz = MIN2(MAX2(init_sz, min_size()), max_size()); |
255 | return init_sz; |
256 | } |
257 | |
258 | void ThreadLocalAllocBuffer::print_stats(const char* tag) { |
259 | Log(gc, tlab) log; |
260 | if (!log.is_trace()) { |
261 | return; |
262 | } |
263 | |
264 | Thread* thrd = thread(); |
265 | size_t waste = _gc_waste + _slow_refill_waste + _fast_refill_waste; |
266 | double waste_percent = percent_of(waste, _allocated_size); |
267 | size_t tlab_used = Universe::heap()->tlab_used(thrd); |
268 | log.trace("TLAB: %s thread: " INTPTR_FORMAT " [id: %2d]" |
269 | " desired_size: " SIZE_FORMAT "KB" |
270 | " slow allocs: %d refill waste: " SIZE_FORMAT "B" |
271 | " alloc:%8.5f %8.0fKB refills: %d waste %4.1f%% gc: %dB" |
272 | " slow: %dB fast: %dB" , |
273 | tag, p2i(thrd), thrd->osthread()->thread_id(), |
274 | _desired_size / (K / HeapWordSize), |
275 | _slow_allocations, _refill_waste_limit * HeapWordSize, |
276 | _allocation_fraction.average(), |
277 | _allocation_fraction.average() * tlab_used / K, |
278 | _number_of_refills, waste_percent, |
279 | _gc_waste * HeapWordSize, |
280 | _slow_refill_waste * HeapWordSize, |
281 | _fast_refill_waste * HeapWordSize); |
282 | } |
283 | |
284 | void ThreadLocalAllocBuffer::set_sample_end(bool reset_byte_accumulation) { |
285 | size_t heap_words_remaining = pointer_delta(_end, _top); |
286 | size_t bytes_until_sample = thread()->heap_sampler().bytes_until_sample(); |
287 | size_t words_until_sample = bytes_until_sample / HeapWordSize; |
288 | |
289 | if (reset_byte_accumulation) { |
290 | _bytes_since_last_sample_point = 0; |
291 | } |
292 | |
293 | if (heap_words_remaining > words_until_sample) { |
294 | HeapWord* new_end = _top + words_until_sample; |
295 | set_end(new_end); |
296 | _bytes_since_last_sample_point += bytes_until_sample; |
297 | } else { |
298 | _bytes_since_last_sample_point += heap_words_remaining * HeapWordSize; |
299 | } |
300 | } |
301 | |
302 | Thread* ThreadLocalAllocBuffer::thread() { |
303 | return (Thread*)(((char*)this) + in_bytes(start_offset()) - in_bytes(Thread::tlab_start_offset())); |
304 | } |
305 | |
306 | void ThreadLocalAllocBuffer::set_back_allocation_end() { |
307 | _end = _allocation_end; |
308 | } |
309 | |
310 | HeapWord* ThreadLocalAllocBuffer::hard_end() { |
311 | return _allocation_end + alignment_reserve(); |
312 | } |
313 | |
314 | PerfVariable* ThreadLocalAllocStats::_perf_allocating_threads; |
315 | PerfVariable* ThreadLocalAllocStats::_perf_total_refills; |
316 | PerfVariable* ThreadLocalAllocStats::_perf_max_refills; |
317 | PerfVariable* ThreadLocalAllocStats::_perf_total_allocations; |
318 | PerfVariable* ThreadLocalAllocStats::_perf_total_gc_waste; |
319 | PerfVariable* ThreadLocalAllocStats::_perf_max_gc_waste; |
320 | PerfVariable* ThreadLocalAllocStats::_perf_total_slow_refill_waste; |
321 | PerfVariable* ThreadLocalAllocStats::_perf_max_slow_refill_waste; |
322 | PerfVariable* ThreadLocalAllocStats::_perf_total_fast_refill_waste; |
323 | PerfVariable* ThreadLocalAllocStats::_perf_max_fast_refill_waste; |
324 | PerfVariable* ThreadLocalAllocStats::_perf_total_slow_allocations; |
325 | PerfVariable* ThreadLocalAllocStats::_perf_max_slow_allocations; |
326 | AdaptiveWeightedAverage ThreadLocalAllocStats::_allocating_threads_avg(0); |
327 | |
328 | static PerfVariable* create_perf_variable(const char* name, PerfData::Units unit, TRAPS) { |
329 | ResourceMark rm; |
330 | return PerfDataManager::create_variable(SUN_GC, PerfDataManager::counter_name("tlab" , name), unit, THREAD); |
331 | } |
332 | |
333 | void ThreadLocalAllocStats::initialize() { |
334 | _allocating_threads_avg = AdaptiveWeightedAverage(TLABAllocationWeight); |
335 | _allocating_threads_avg.sample(1); // One allocating thread at startup |
336 | |
337 | if (UsePerfData) { |
338 | EXCEPTION_MARK; |
339 | _perf_allocating_threads = create_perf_variable("allocThreads" , PerfData::U_None, CHECK); |
340 | _perf_total_refills = create_perf_variable("fills" , PerfData::U_None, CHECK); |
341 | _perf_max_refills = create_perf_variable("maxFills" , PerfData::U_None, CHECK); |
342 | _perf_total_allocations = create_perf_variable("alloc" , PerfData::U_Bytes, CHECK); |
343 | _perf_total_gc_waste = create_perf_variable("gcWaste" , PerfData::U_Bytes, CHECK); |
344 | _perf_max_gc_waste = create_perf_variable("maxGcWaste" , PerfData::U_Bytes, CHECK); |
345 | _perf_total_slow_refill_waste = create_perf_variable("slowWaste" , PerfData::U_Bytes, CHECK); |
346 | _perf_max_slow_refill_waste = create_perf_variable("maxSlowWaste" , PerfData::U_Bytes, CHECK); |
347 | _perf_total_fast_refill_waste = create_perf_variable("fastWaste" , PerfData::U_Bytes, CHECK); |
348 | _perf_max_fast_refill_waste = create_perf_variable("maxFastWaste" , PerfData::U_Bytes, CHECK); |
349 | _perf_total_slow_allocations = create_perf_variable("slowAlloc" , PerfData::U_None, CHECK); |
350 | _perf_max_slow_allocations = create_perf_variable("maxSlowAlloc" , PerfData::U_None, CHECK); |
351 | } |
352 | } |
353 | |
354 | ThreadLocalAllocStats::ThreadLocalAllocStats() : |
355 | _allocating_threads(0), |
356 | _total_refills(0), |
357 | _max_refills(0), |
358 | _total_allocations(0), |
359 | _total_gc_waste(0), |
360 | _max_gc_waste(0), |
361 | _total_fast_refill_waste(0), |
362 | _max_fast_refill_waste(0), |
363 | _total_slow_refill_waste(0), |
364 | _max_slow_refill_waste(0), |
365 | _total_slow_allocations(0), |
366 | _max_slow_allocations(0) {} |
367 | |
368 | unsigned int ThreadLocalAllocStats::allocating_threads_avg() { |
369 | return MAX2((unsigned int)(_allocating_threads_avg.average() + 0.5), 1U); |
370 | } |
371 | |
372 | void ThreadLocalAllocStats::update_fast_allocations(unsigned int refills, |
373 | size_t allocations, |
374 | size_t gc_waste, |
375 | size_t fast_refill_waste, |
376 | size_t slow_refill_waste) { |
377 | _allocating_threads += 1; |
378 | _total_refills += refills; |
379 | _max_refills = MAX2(_max_refills, refills); |
380 | _total_allocations += allocations; |
381 | _total_gc_waste += gc_waste; |
382 | _max_gc_waste = MAX2(_max_gc_waste, gc_waste); |
383 | _total_fast_refill_waste += fast_refill_waste; |
384 | _max_fast_refill_waste = MAX2(_max_fast_refill_waste, fast_refill_waste); |
385 | _total_slow_refill_waste += slow_refill_waste; |
386 | _max_slow_refill_waste = MAX2(_max_slow_refill_waste, slow_refill_waste); |
387 | } |
388 | |
389 | void ThreadLocalAllocStats::update_slow_allocations(unsigned int allocations) { |
390 | _total_slow_allocations += allocations; |
391 | _max_slow_allocations = MAX2(_max_slow_allocations, allocations); |
392 | } |
393 | |
394 | void ThreadLocalAllocStats::update(const ThreadLocalAllocStats& other) { |
395 | _allocating_threads += other._allocating_threads; |
396 | _total_refills += other._total_refills; |
397 | _max_refills = MAX2(_max_refills, other._max_refills); |
398 | _total_allocations += other._total_allocations; |
399 | _total_gc_waste += other._total_gc_waste; |
400 | _max_gc_waste = MAX2(_max_gc_waste, other._max_gc_waste); |
401 | _total_fast_refill_waste += other._total_fast_refill_waste; |
402 | _max_fast_refill_waste = MAX2(_max_fast_refill_waste, other._max_fast_refill_waste); |
403 | _total_slow_refill_waste += other._total_slow_refill_waste; |
404 | _max_slow_refill_waste = MAX2(_max_slow_refill_waste, other._max_slow_refill_waste); |
405 | _total_slow_allocations += other._total_slow_allocations; |
406 | _max_slow_allocations = MAX2(_max_slow_allocations, other._max_slow_allocations); |
407 | } |
408 | |
409 | void ThreadLocalAllocStats::reset() { |
410 | _allocating_threads = 0; |
411 | _total_refills = 0; |
412 | _max_refills = 0; |
413 | _total_allocations = 0; |
414 | _total_gc_waste = 0; |
415 | _max_gc_waste = 0; |
416 | _total_fast_refill_waste = 0; |
417 | _max_fast_refill_waste = 0; |
418 | _total_slow_refill_waste = 0; |
419 | _max_slow_refill_waste = 0; |
420 | _total_slow_allocations = 0; |
421 | _max_slow_allocations = 0; |
422 | } |
423 | |
424 | void ThreadLocalAllocStats::publish() { |
425 | if (_total_allocations == 0) { |
426 | return; |
427 | } |
428 | |
429 | _allocating_threads_avg.sample(_allocating_threads); |
430 | |
431 | const size_t waste = _total_gc_waste + _total_slow_refill_waste + _total_fast_refill_waste; |
432 | const double waste_percent = percent_of(waste, _total_allocations); |
433 | log_debug(gc, tlab)("TLAB totals: thrds: %d refills: %d max: %d" |
434 | " slow allocs: %d max %d waste: %4.1f%%" |
435 | " gc: " SIZE_FORMAT "B max: " SIZE_FORMAT "B" |
436 | " slow: " SIZE_FORMAT "B max: " SIZE_FORMAT "B" |
437 | " fast: " SIZE_FORMAT "B max: " SIZE_FORMAT "B" , |
438 | _allocating_threads, _total_refills, _max_refills, |
439 | _total_slow_allocations, _max_slow_allocations, waste_percent, |
440 | _total_gc_waste * HeapWordSize, _max_gc_waste * HeapWordSize, |
441 | _total_slow_refill_waste * HeapWordSize, _max_slow_refill_waste * HeapWordSize, |
442 | _total_fast_refill_waste * HeapWordSize, _max_fast_refill_waste * HeapWordSize); |
443 | |
444 | if (UsePerfData) { |
445 | _perf_allocating_threads ->set_value(_allocating_threads); |
446 | _perf_total_refills ->set_value(_total_refills); |
447 | _perf_max_refills ->set_value(_max_refills); |
448 | _perf_total_allocations ->set_value(_total_allocations); |
449 | _perf_total_gc_waste ->set_value(_total_gc_waste); |
450 | _perf_max_gc_waste ->set_value(_max_gc_waste); |
451 | _perf_total_slow_refill_waste ->set_value(_total_slow_refill_waste); |
452 | _perf_max_slow_refill_waste ->set_value(_max_slow_refill_waste); |
453 | _perf_total_fast_refill_waste ->set_value(_total_fast_refill_waste); |
454 | _perf_max_fast_refill_waste ->set_value(_max_fast_refill_waste); |
455 | _perf_total_slow_allocations ->set_value(_total_slow_allocations); |
456 | _perf_max_slow_allocations ->set_value(_max_slow_allocations); |
457 | } |
458 | } |
459 | |
460 | size_t ThreadLocalAllocBuffer::end_reserve() { |
461 | size_t reserve_size = Universe::heap()->tlab_alloc_reserve(); |
462 | return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch); |
463 | } |
464 | |