1/*
2 * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "gc/shared/collectedHeap.hpp"
27#include "gc/shared/threadLocalAllocBuffer.inline.hpp"
28#include "logging/log.hpp"
29#include "memory/resourceArea.hpp"
30#include "memory/universe.hpp"
31#include "oops/oop.inline.hpp"
32#include "runtime/thread.inline.hpp"
33#include "runtime/threadSMR.hpp"
34#include "utilities/copy.hpp"
35
36size_t ThreadLocalAllocBuffer::_max_size = 0;
37int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
38unsigned int ThreadLocalAllocBuffer::_target_refills = 0;
39
40size_t ThreadLocalAllocBuffer::remaining() {
41 if (end() == NULL) {
42 return 0;
43 }
44
45 return pointer_delta(hard_end(), top());
46}
47
48void ThreadLocalAllocBuffer::accumulate_and_reset_statistics(ThreadLocalAllocStats* stats) {
49 Thread* thr = thread();
50 size_t capacity = Universe::heap()->tlab_capacity(thr);
51 size_t used = Universe::heap()->tlab_used(thr);
52
53 _gc_waste += (unsigned)remaining();
54 size_t total_allocated = thr->allocated_bytes();
55 size_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc;
56 _allocated_before_last_gc = total_allocated;
57
58 print_stats("gc");
59
60 if (_number_of_refills > 0) {
61 // Update allocation history if a reasonable amount of eden was allocated.
62 bool update_allocation_history = used > 0.5 * capacity;
63
64 if (update_allocation_history) {
65 // Average the fraction of eden allocated in a tlab by this
66 // thread for use in the next resize operation.
67 // _gc_waste is not subtracted because it's included in
68 // "used".
69 // The result can be larger than 1.0 due to direct to old allocations.
70 // These allocations should ideally not be counted but since it is not possible
71 // to filter them out here we just cap the fraction to be at most 1.0.
72 double alloc_frac = MIN2(1.0, (double) allocated_since_last_gc / used);
73 _allocation_fraction.sample(alloc_frac);
74 }
75
76 stats->update_fast_allocations(_number_of_refills,
77 _allocated_size,
78 _gc_waste,
79 _fast_refill_waste,
80 _slow_refill_waste);
81 } else {
82 assert(_number_of_refills == 0 && _fast_refill_waste == 0 &&
83 _slow_refill_waste == 0 && _gc_waste == 0,
84 "tlab stats == 0");
85 }
86
87 stats->update_slow_allocations(_slow_allocations);
88
89 reset_statistics();
90}
91
92void ThreadLocalAllocBuffer::insert_filler() {
93 assert(end() != NULL, "Must not be retired");
94 if (top() < hard_end()) {
95 Universe::heap()->fill_with_dummy_object(top(), hard_end(), true);
96 }
97}
98
99void ThreadLocalAllocBuffer::make_parsable() {
100 if (end() != NULL) {
101 invariants();
102 if (ZeroTLAB) {
103 retire();
104 } else {
105 insert_filler();
106 }
107 }
108}
109
110void ThreadLocalAllocBuffer::retire(ThreadLocalAllocStats* stats) {
111 if (stats != NULL) {
112 accumulate_and_reset_statistics(stats);
113 }
114
115 if (end() != NULL) {
116 invariants();
117 thread()->incr_allocated_bytes(used_bytes());
118 insert_filler();
119 initialize(NULL, NULL, NULL);
120 }
121}
122
123void ThreadLocalAllocBuffer::retire_before_allocation() {
124 _slow_refill_waste += (unsigned int)remaining();
125 retire();
126}
127
128void ThreadLocalAllocBuffer::resize() {
129 // Compute the next tlab size using expected allocation amount
130 assert(ResizeTLAB, "Should not call this otherwise");
131 size_t alloc = (size_t)(_allocation_fraction.average() *
132 (Universe::heap()->tlab_capacity(thread()) / HeapWordSize));
133 size_t new_size = alloc / _target_refills;
134
135 new_size = MIN2(MAX2(new_size, min_size()), max_size());
136
137 size_t aligned_new_size = align_object_size(new_size);
138
139 log_trace(gc, tlab)("TLAB new size: thread: " INTPTR_FORMAT " [id: %2d]"
140 " refills %d alloc: %8.6f desired_size: " SIZE_FORMAT " -> " SIZE_FORMAT,
141 p2i(thread()), thread()->osthread()->thread_id(),
142 _target_refills, _allocation_fraction.average(), desired_size(), aligned_new_size);
143
144 set_desired_size(aligned_new_size);
145 set_refill_waste_limit(initial_refill_waste_limit());
146}
147
148void ThreadLocalAllocBuffer::reset_statistics() {
149 _number_of_refills = 0;
150 _fast_refill_waste = 0;
151 _slow_refill_waste = 0;
152 _gc_waste = 0;
153 _slow_allocations = 0;
154 _allocated_size = 0;
155}
156
157void ThreadLocalAllocBuffer::fill(HeapWord* start,
158 HeapWord* top,
159 size_t new_size) {
160 _number_of_refills++;
161 _allocated_size += new_size;
162 print_stats("fill");
163 assert(top <= start + new_size - alignment_reserve(), "size too small");
164
165 initialize(start, top, start + new_size - alignment_reserve());
166
167 // Reset amount of internal fragmentation
168 set_refill_waste_limit(initial_refill_waste_limit());
169}
170
171void ThreadLocalAllocBuffer::initialize(HeapWord* start,
172 HeapWord* top,
173 HeapWord* end) {
174 set_start(start);
175 set_top(top);
176 set_pf_top(top);
177 set_end(end);
178 set_allocation_end(end);
179 invariants();
180}
181
182void ThreadLocalAllocBuffer::initialize() {
183 initialize(NULL, // start
184 NULL, // top
185 NULL); // end
186
187 set_desired_size(initial_desired_size());
188
189 size_t capacity = Universe::heap()->tlab_capacity(thread()) / HeapWordSize;
190 double alloc_frac = desired_size() * target_refills() / (double) capacity;
191 _allocation_fraction.sample(alloc_frac);
192
193 set_refill_waste_limit(initial_refill_waste_limit());
194
195 reset_statistics();
196}
197
198void ThreadLocalAllocBuffer::startup_initialization() {
199 ThreadLocalAllocStats::initialize();
200
201 // Assuming each thread's active tlab is, on average,
202 // 1/2 full at a GC
203 _target_refills = 100 / (2 * TLABWasteTargetPercent);
204 // We need to set initial target refills to 2 to avoid a GC which causes VM
205 // abort during VM initialization.
206 _target_refills = MAX2(_target_refills, 2U);
207
208#ifdef COMPILER2
209 // If the C2 compiler is present, extra space is needed at the end of
210 // TLABs, otherwise prefetching instructions generated by the C2
211 // compiler will fault (due to accessing memory outside of heap).
212 // The amount of space is the max of the number of lines to
213 // prefetch for array and for instance allocations. (Extra space must be
214 // reserved to accommodate both types of allocations.)
215 //
216 // Only SPARC-specific BIS instructions are known to fault. (Those
217 // instructions are generated if AllocatePrefetchStyle==3 and
218 // AllocatePrefetchInstr==1). To be on the safe side, however,
219 // extra space is reserved for all combinations of
220 // AllocatePrefetchStyle and AllocatePrefetchInstr.
221 //
222 // If the C2 compiler is not present, no space is reserved.
223
224 // +1 for rounding up to next cache line, +1 to be safe
225 if (is_server_compilation_mode_vm()) {
226 int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
227 _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
228 (int)HeapWordSize;
229 }
230#endif
231
232 // During jvm startup, the main thread is initialized
233 // before the heap is initialized. So reinitialize it now.
234 guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
235 Thread::current()->tlab().initialize();
236
237 log_develop_trace(gc, tlab)("TLAB min: " SIZE_FORMAT " initial: " SIZE_FORMAT " max: " SIZE_FORMAT,
238 min_size(), Thread::current()->tlab().initial_desired_size(), max_size());
239}
240
241size_t ThreadLocalAllocBuffer::initial_desired_size() {
242 size_t init_sz = 0;
243
244 if (TLABSize > 0) {
245 init_sz = TLABSize / HeapWordSize;
246 } else {
247 // Initial size is a function of the average number of allocating threads.
248 unsigned int nof_threads = ThreadLocalAllocStats::allocating_threads_avg();
249
250 init_sz = (Universe::heap()->tlab_capacity(thread()) / HeapWordSize) /
251 (nof_threads * target_refills());
252 init_sz = align_object_size(init_sz);
253 }
254 init_sz = MIN2(MAX2(init_sz, min_size()), max_size());
255 return init_sz;
256}
257
258void ThreadLocalAllocBuffer::print_stats(const char* tag) {
259 Log(gc, tlab) log;
260 if (!log.is_trace()) {
261 return;
262 }
263
264 Thread* thrd = thread();
265 size_t waste = _gc_waste + _slow_refill_waste + _fast_refill_waste;
266 double waste_percent = percent_of(waste, _allocated_size);
267 size_t tlab_used = Universe::heap()->tlab_used(thrd);
268 log.trace("TLAB: %s thread: " INTPTR_FORMAT " [id: %2d]"
269 " desired_size: " SIZE_FORMAT "KB"
270 " slow allocs: %d refill waste: " SIZE_FORMAT "B"
271 " alloc:%8.5f %8.0fKB refills: %d waste %4.1f%% gc: %dB"
272 " slow: %dB fast: %dB",
273 tag, p2i(thrd), thrd->osthread()->thread_id(),
274 _desired_size / (K / HeapWordSize),
275 _slow_allocations, _refill_waste_limit * HeapWordSize,
276 _allocation_fraction.average(),
277 _allocation_fraction.average() * tlab_used / K,
278 _number_of_refills, waste_percent,
279 _gc_waste * HeapWordSize,
280 _slow_refill_waste * HeapWordSize,
281 _fast_refill_waste * HeapWordSize);
282}
283
284void ThreadLocalAllocBuffer::set_sample_end(bool reset_byte_accumulation) {
285 size_t heap_words_remaining = pointer_delta(_end, _top);
286 size_t bytes_until_sample = thread()->heap_sampler().bytes_until_sample();
287 size_t words_until_sample = bytes_until_sample / HeapWordSize;
288
289 if (reset_byte_accumulation) {
290 _bytes_since_last_sample_point = 0;
291 }
292
293 if (heap_words_remaining > words_until_sample) {
294 HeapWord* new_end = _top + words_until_sample;
295 set_end(new_end);
296 _bytes_since_last_sample_point += bytes_until_sample;
297 } else {
298 _bytes_since_last_sample_point += heap_words_remaining * HeapWordSize;
299 }
300}
301
302Thread* ThreadLocalAllocBuffer::thread() {
303 return (Thread*)(((char*)this) + in_bytes(start_offset()) - in_bytes(Thread::tlab_start_offset()));
304}
305
306void ThreadLocalAllocBuffer::set_back_allocation_end() {
307 _end = _allocation_end;
308}
309
310HeapWord* ThreadLocalAllocBuffer::hard_end() {
311 return _allocation_end + alignment_reserve();
312}
313
314PerfVariable* ThreadLocalAllocStats::_perf_allocating_threads;
315PerfVariable* ThreadLocalAllocStats::_perf_total_refills;
316PerfVariable* ThreadLocalAllocStats::_perf_max_refills;
317PerfVariable* ThreadLocalAllocStats::_perf_total_allocations;
318PerfVariable* ThreadLocalAllocStats::_perf_total_gc_waste;
319PerfVariable* ThreadLocalAllocStats::_perf_max_gc_waste;
320PerfVariable* ThreadLocalAllocStats::_perf_total_slow_refill_waste;
321PerfVariable* ThreadLocalAllocStats::_perf_max_slow_refill_waste;
322PerfVariable* ThreadLocalAllocStats::_perf_total_fast_refill_waste;
323PerfVariable* ThreadLocalAllocStats::_perf_max_fast_refill_waste;
324PerfVariable* ThreadLocalAllocStats::_perf_total_slow_allocations;
325PerfVariable* ThreadLocalAllocStats::_perf_max_slow_allocations;
326AdaptiveWeightedAverage ThreadLocalAllocStats::_allocating_threads_avg(0);
327
328static PerfVariable* create_perf_variable(const char* name, PerfData::Units unit, TRAPS) {
329 ResourceMark rm;
330 return PerfDataManager::create_variable(SUN_GC, PerfDataManager::counter_name("tlab", name), unit, THREAD);
331}
332
333void ThreadLocalAllocStats::initialize() {
334 _allocating_threads_avg = AdaptiveWeightedAverage(TLABAllocationWeight);
335 _allocating_threads_avg.sample(1); // One allocating thread at startup
336
337 if (UsePerfData) {
338 EXCEPTION_MARK;
339 _perf_allocating_threads = create_perf_variable("allocThreads", PerfData::U_None, CHECK);
340 _perf_total_refills = create_perf_variable("fills", PerfData::U_None, CHECK);
341 _perf_max_refills = create_perf_variable("maxFills", PerfData::U_None, CHECK);
342 _perf_total_allocations = create_perf_variable("alloc", PerfData::U_Bytes, CHECK);
343 _perf_total_gc_waste = create_perf_variable("gcWaste", PerfData::U_Bytes, CHECK);
344 _perf_max_gc_waste = create_perf_variable("maxGcWaste", PerfData::U_Bytes, CHECK);
345 _perf_total_slow_refill_waste = create_perf_variable("slowWaste", PerfData::U_Bytes, CHECK);
346 _perf_max_slow_refill_waste = create_perf_variable("maxSlowWaste", PerfData::U_Bytes, CHECK);
347 _perf_total_fast_refill_waste = create_perf_variable("fastWaste", PerfData::U_Bytes, CHECK);
348 _perf_max_fast_refill_waste = create_perf_variable("maxFastWaste", PerfData::U_Bytes, CHECK);
349 _perf_total_slow_allocations = create_perf_variable("slowAlloc", PerfData::U_None, CHECK);
350 _perf_max_slow_allocations = create_perf_variable("maxSlowAlloc", PerfData::U_None, CHECK);
351 }
352}
353
354ThreadLocalAllocStats::ThreadLocalAllocStats() :
355 _allocating_threads(0),
356 _total_refills(0),
357 _max_refills(0),
358 _total_allocations(0),
359 _total_gc_waste(0),
360 _max_gc_waste(0),
361 _total_fast_refill_waste(0),
362 _max_fast_refill_waste(0),
363 _total_slow_refill_waste(0),
364 _max_slow_refill_waste(0),
365 _total_slow_allocations(0),
366 _max_slow_allocations(0) {}
367
368unsigned int ThreadLocalAllocStats::allocating_threads_avg() {
369 return MAX2((unsigned int)(_allocating_threads_avg.average() + 0.5), 1U);
370}
371
372void ThreadLocalAllocStats::update_fast_allocations(unsigned int refills,
373 size_t allocations,
374 size_t gc_waste,
375 size_t fast_refill_waste,
376 size_t slow_refill_waste) {
377 _allocating_threads += 1;
378 _total_refills += refills;
379 _max_refills = MAX2(_max_refills, refills);
380 _total_allocations += allocations;
381 _total_gc_waste += gc_waste;
382 _max_gc_waste = MAX2(_max_gc_waste, gc_waste);
383 _total_fast_refill_waste += fast_refill_waste;
384 _max_fast_refill_waste = MAX2(_max_fast_refill_waste, fast_refill_waste);
385 _total_slow_refill_waste += slow_refill_waste;
386 _max_slow_refill_waste = MAX2(_max_slow_refill_waste, slow_refill_waste);
387}
388
389void ThreadLocalAllocStats::update_slow_allocations(unsigned int allocations) {
390 _total_slow_allocations += allocations;
391 _max_slow_allocations = MAX2(_max_slow_allocations, allocations);
392}
393
394void ThreadLocalAllocStats::update(const ThreadLocalAllocStats& other) {
395 _allocating_threads += other._allocating_threads;
396 _total_refills += other._total_refills;
397 _max_refills = MAX2(_max_refills, other._max_refills);
398 _total_allocations += other._total_allocations;
399 _total_gc_waste += other._total_gc_waste;
400 _max_gc_waste = MAX2(_max_gc_waste, other._max_gc_waste);
401 _total_fast_refill_waste += other._total_fast_refill_waste;
402 _max_fast_refill_waste = MAX2(_max_fast_refill_waste, other._max_fast_refill_waste);
403 _total_slow_refill_waste += other._total_slow_refill_waste;
404 _max_slow_refill_waste = MAX2(_max_slow_refill_waste, other._max_slow_refill_waste);
405 _total_slow_allocations += other._total_slow_allocations;
406 _max_slow_allocations = MAX2(_max_slow_allocations, other._max_slow_allocations);
407}
408
409void ThreadLocalAllocStats::reset() {
410 _allocating_threads = 0;
411 _total_refills = 0;
412 _max_refills = 0;
413 _total_allocations = 0;
414 _total_gc_waste = 0;
415 _max_gc_waste = 0;
416 _total_fast_refill_waste = 0;
417 _max_fast_refill_waste = 0;
418 _total_slow_refill_waste = 0;
419 _max_slow_refill_waste = 0;
420 _total_slow_allocations = 0;
421 _max_slow_allocations = 0;
422}
423
424void ThreadLocalAllocStats::publish() {
425 if (_total_allocations == 0) {
426 return;
427 }
428
429 _allocating_threads_avg.sample(_allocating_threads);
430
431 const size_t waste = _total_gc_waste + _total_slow_refill_waste + _total_fast_refill_waste;
432 const double waste_percent = percent_of(waste, _total_allocations);
433 log_debug(gc, tlab)("TLAB totals: thrds: %d refills: %d max: %d"
434 " slow allocs: %d max %d waste: %4.1f%%"
435 " gc: " SIZE_FORMAT "B max: " SIZE_FORMAT "B"
436 " slow: " SIZE_FORMAT "B max: " SIZE_FORMAT "B"
437 " fast: " SIZE_FORMAT "B max: " SIZE_FORMAT "B",
438 _allocating_threads, _total_refills, _max_refills,
439 _total_slow_allocations, _max_slow_allocations, waste_percent,
440 _total_gc_waste * HeapWordSize, _max_gc_waste * HeapWordSize,
441 _total_slow_refill_waste * HeapWordSize, _max_slow_refill_waste * HeapWordSize,
442 _total_fast_refill_waste * HeapWordSize, _max_fast_refill_waste * HeapWordSize);
443
444 if (UsePerfData) {
445 _perf_allocating_threads ->set_value(_allocating_threads);
446 _perf_total_refills ->set_value(_total_refills);
447 _perf_max_refills ->set_value(_max_refills);
448 _perf_total_allocations ->set_value(_total_allocations);
449 _perf_total_gc_waste ->set_value(_total_gc_waste);
450 _perf_max_gc_waste ->set_value(_max_gc_waste);
451 _perf_total_slow_refill_waste ->set_value(_total_slow_refill_waste);
452 _perf_max_slow_refill_waste ->set_value(_max_slow_refill_waste);
453 _perf_total_fast_refill_waste ->set_value(_total_fast_refill_waste);
454 _perf_max_fast_refill_waste ->set_value(_max_fast_refill_waste);
455 _perf_total_slow_allocations ->set_value(_total_slow_allocations);
456 _perf_max_slow_allocations ->set_value(_max_slow_allocations);
457 }
458}
459
460size_t ThreadLocalAllocBuffer::end_reserve() {
461 size_t reserve_size = Universe::heap()->tlab_alloc_reserve();
462 return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch);
463}
464