1 | /* |
2 | * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef SHARE_GC_SHARED_THREADLOCALALLOCBUFFER_HPP |
26 | #define SHARE_GC_SHARED_THREADLOCALALLOCBUFFER_HPP |
27 | |
28 | #include "gc/shared/gcUtil.hpp" |
29 | #include "runtime/perfData.hpp" |
30 | #include "runtime/vm_version.hpp" |
31 | |
32 | class ThreadLocalAllocStats; |
33 | |
34 | // ThreadLocalAllocBuffer: a descriptor for thread-local storage used by |
35 | // the threads for allocation. |
36 | // It is thread-private at any time, but maybe multiplexed over |
37 | // time across multiple threads. The park()/unpark() pair is |
38 | // used to make it available for such multiplexing. |
39 | // |
40 | // Heap sampling is performed via the end and allocation_end |
41 | // fields. |
42 | // allocation_end contains the real end of the tlab allocation, |
43 | // whereas end can be set to an arbitrary spot in the tlab to |
44 | // trip the return and sample the allocation. |
45 | class ThreadLocalAllocBuffer: public CHeapObj<mtThread> { |
46 | friend class VMStructs; |
47 | friend class JVMCIVMStructs; |
48 | private: |
49 | HeapWord* _start; // address of TLAB |
50 | HeapWord* _top; // address after last allocation |
51 | HeapWord* _pf_top; // allocation prefetch watermark |
52 | HeapWord* _end; // allocation end (can be the sampling end point or _allocation_end) |
53 | HeapWord* _allocation_end; // end for allocations (actual TLAB end, excluding alignment_reserve) |
54 | |
55 | size_t _desired_size; // desired size (including alignment_reserve) |
56 | size_t _refill_waste_limit; // hold onto tlab if free() is larger than this |
57 | size_t _allocated_before_last_gc; // total bytes allocated up until the last gc |
58 | size_t _bytes_since_last_sample_point; // bytes since last sample point. |
59 | |
60 | static size_t _max_size; // maximum size of any TLAB |
61 | static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB |
62 | static unsigned _target_refills; // expected number of refills between GCs |
63 | |
64 | unsigned _number_of_refills; |
65 | unsigned _fast_refill_waste; |
66 | unsigned _slow_refill_waste; |
67 | unsigned _gc_waste; |
68 | unsigned _slow_allocations; |
69 | size_t _allocated_size; |
70 | |
71 | AdaptiveWeightedAverage _allocation_fraction; // fraction of eden allocated in tlabs |
72 | |
73 | void reset_statistics(); |
74 | |
75 | void set_start(HeapWord* start) { _start = start; } |
76 | void set_end(HeapWord* end) { _end = end; } |
77 | void set_allocation_end(HeapWord* ptr) { _allocation_end = ptr; } |
78 | void set_top(HeapWord* top) { _top = top; } |
79 | void set_pf_top(HeapWord* pf_top) { _pf_top = pf_top; } |
80 | void set_desired_size(size_t desired_size) { _desired_size = desired_size; } |
81 | void set_refill_waste_limit(size_t waste) { _refill_waste_limit = waste; } |
82 | |
83 | size_t initial_refill_waste_limit() { return desired_size() / TLABRefillWasteFraction; } |
84 | |
85 | static int target_refills() { return _target_refills; } |
86 | size_t initial_desired_size(); |
87 | |
88 | size_t remaining(); |
89 | |
90 | // Make parsable and release it. |
91 | void reset(); |
92 | |
93 | void invariants() const { assert(top() >= start() && top() <= end(), "invalid tlab" ); } |
94 | |
95 | void initialize(HeapWord* start, HeapWord* top, HeapWord* end); |
96 | |
97 | void insert_filler(); |
98 | |
99 | void accumulate_and_reset_statistics(ThreadLocalAllocStats* stats); |
100 | |
101 | void print_stats(const char* tag); |
102 | |
103 | Thread* thread(); |
104 | |
105 | // statistics |
106 | |
107 | int number_of_refills() const { return _number_of_refills; } |
108 | int fast_refill_waste() const { return _fast_refill_waste; } |
109 | int slow_refill_waste() const { return _slow_refill_waste; } |
110 | int gc_waste() const { return _gc_waste; } |
111 | int slow_allocations() const { return _slow_allocations; } |
112 | |
113 | public: |
114 | ThreadLocalAllocBuffer() : _allocated_before_last_gc(0), _allocation_fraction(TLABAllocationWeight) { |
115 | // do nothing. tlabs must be inited by initialize() calls |
116 | } |
117 | |
118 | static size_t min_size() { return align_object_size(MinTLABSize / HeapWordSize) + alignment_reserve(); } |
119 | static size_t max_size() { assert(_max_size != 0, "max_size not set up" ); return _max_size; } |
120 | static size_t max_size_in_bytes() { return max_size() * BytesPerWord; } |
121 | static void set_max_size(size_t max_size) { _max_size = max_size; } |
122 | |
123 | HeapWord* start() const { return _start; } |
124 | HeapWord* end() const { return _end; } |
125 | HeapWord* top() const { return _top; } |
126 | HeapWord* hard_end(); |
127 | HeapWord* pf_top() const { return _pf_top; } |
128 | size_t desired_size() const { return _desired_size; } |
129 | size_t used() const { return pointer_delta(top(), start()); } |
130 | size_t used_bytes() const { return pointer_delta(top(), start(), 1); } |
131 | size_t free() const { return pointer_delta(end(), top()); } |
132 | // Don't discard tlab if remaining space is larger than this. |
133 | size_t refill_waste_limit() const { return _refill_waste_limit; } |
134 | size_t bytes_since_last_sample_point() const { return _bytes_since_last_sample_point; } |
135 | |
136 | // Allocate size HeapWords. The memory is NOT initialized to zero. |
137 | inline HeapWord* allocate(size_t size); |
138 | |
139 | // Reserve space at the end of TLAB |
140 | static size_t end_reserve(); |
141 | static size_t alignment_reserve() { return align_object_size(end_reserve()); } |
142 | static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; } |
143 | |
144 | // Return tlab size or remaining space in eden such that the |
145 | // space is large enough to hold obj_size and necessary fill space. |
146 | // Otherwise return 0; |
147 | inline size_t compute_size(size_t obj_size); |
148 | |
149 | // Compute the minimal needed tlab size for the given object size. |
150 | static inline size_t compute_min_size(size_t obj_size); |
151 | |
152 | // Record slow allocation |
153 | inline void record_slow_allocation(size_t obj_size); |
154 | |
155 | // Initialization at startup |
156 | static void startup_initialization(); |
157 | |
158 | // Make an in-use tlab parsable. |
159 | void make_parsable(); |
160 | |
161 | // Retire an in-use tlab and optionally collect statistics. |
162 | void retire(ThreadLocalAllocStats* stats = NULL); |
163 | |
164 | // Retire in-use tlab before allocation of a new tlab |
165 | void retire_before_allocation(); |
166 | |
167 | // Resize based on amount of allocation, etc. |
168 | void resize(); |
169 | |
170 | void fill(HeapWord* start, HeapWord* top, size_t new_size); |
171 | void initialize(); |
172 | |
173 | void set_back_allocation_end(); |
174 | void set_sample_end(bool reset_byte_accumulation); |
175 | |
176 | static size_t refill_waste_limit_increment() { return TLABWasteIncrement; } |
177 | |
178 | template <typename T> void addresses_do(T f) { |
179 | f(&_start); |
180 | f(&_top); |
181 | f(&_pf_top); |
182 | f(&_end); |
183 | f(&_allocation_end); |
184 | } |
185 | |
186 | // Code generation support |
187 | static ByteSize start_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _start); } |
188 | static ByteSize end_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _end); } |
189 | static ByteSize top_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _top); } |
190 | static ByteSize pf_top_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _pf_top); } |
191 | }; |
192 | |
193 | class ThreadLocalAllocStats : public StackObj { |
194 | private: |
195 | static PerfVariable* _perf_allocating_threads; |
196 | static PerfVariable* _perf_total_refills; |
197 | static PerfVariable* _perf_max_refills; |
198 | static PerfVariable* _perf_total_allocations; |
199 | static PerfVariable* _perf_total_gc_waste; |
200 | static PerfVariable* _perf_max_gc_waste; |
201 | static PerfVariable* _perf_total_slow_refill_waste; |
202 | static PerfVariable* _perf_max_slow_refill_waste; |
203 | static PerfVariable* _perf_total_fast_refill_waste; |
204 | static PerfVariable* _perf_max_fast_refill_waste; |
205 | static PerfVariable* _perf_total_slow_allocations; |
206 | static PerfVariable* _perf_max_slow_allocations; |
207 | |
208 | static AdaptiveWeightedAverage _allocating_threads_avg; |
209 | |
210 | unsigned int _allocating_threads; |
211 | unsigned int _total_refills; |
212 | unsigned int _max_refills; |
213 | size_t _total_allocations; |
214 | size_t _total_gc_waste; |
215 | size_t _max_gc_waste; |
216 | size_t _total_fast_refill_waste; |
217 | size_t _max_fast_refill_waste; |
218 | size_t _total_slow_refill_waste; |
219 | size_t _max_slow_refill_waste; |
220 | unsigned int _total_slow_allocations; |
221 | unsigned int _max_slow_allocations; |
222 | |
223 | public: |
224 | static void initialize(); |
225 | static unsigned int allocating_threads_avg(); |
226 | |
227 | ThreadLocalAllocStats(); |
228 | |
229 | void update_fast_allocations(unsigned int refills, |
230 | size_t allocations, |
231 | size_t gc_waste, |
232 | size_t fast_refill_waste, |
233 | size_t slow_refill_waste); |
234 | void update_slow_allocations(unsigned int allocations); |
235 | void update(const ThreadLocalAllocStats& other); |
236 | |
237 | void reset(); |
238 | void publish(); |
239 | }; |
240 | |
241 | #endif // SHARE_GC_SHARED_THREADLOCALALLOCBUFFER_HPP |
242 | |