1 | /* |
2 | * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "gc/g1/g1BarrierSet.hpp" |
27 | #include "gc/g1/g1ConcurrentRefine.hpp" |
28 | #include "gc/g1/g1ConcurrentRefineThread.hpp" |
29 | #include "gc/g1/g1DirtyCardQueue.hpp" |
30 | #include "logging/log.hpp" |
31 | #include "memory/allocation.inline.hpp" |
32 | #include "runtime/java.hpp" |
33 | #include "runtime/thread.hpp" |
34 | #include "utilities/debug.hpp" |
35 | #include "utilities/globalDefinitions.hpp" |
36 | #include "utilities/pair.hpp" |
37 | #include <math.h> |
38 | |
39 | G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) { |
40 | G1ConcurrentRefineThread* result = NULL; |
41 | if (initializing || !InjectGCWorkerCreationFailure) { |
42 | result = new G1ConcurrentRefineThread(_cr, worker_id); |
43 | } |
44 | if (result == NULL || result->osthread() == NULL) { |
45 | log_warning(gc)("Failed to create refinement thread %u, no more %s" , |
46 | worker_id, |
47 | result == NULL ? "memory" : "OS threads" ); |
48 | } |
49 | return result; |
50 | } |
51 | |
52 | G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() : |
53 | _cr(NULL), |
54 | _threads(NULL), |
55 | _num_max_threads(0) |
56 | { |
57 | } |
58 | |
59 | G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() { |
60 | for (uint i = 0; i < _num_max_threads; i++) { |
61 | G1ConcurrentRefineThread* t = _threads[i]; |
62 | if (t != NULL) { |
63 | delete t; |
64 | } |
65 | } |
66 | FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads); |
67 | } |
68 | |
69 | jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint num_max_threads) { |
70 | assert(cr != NULL, "G1ConcurrentRefine must not be NULL" ); |
71 | _cr = cr; |
72 | _num_max_threads = num_max_threads; |
73 | |
74 | _threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, num_max_threads, mtGC); |
75 | if (_threads == NULL) { |
76 | vm_shutdown_during_initialization("Could not allocate thread holder array." ); |
77 | return JNI_ENOMEM; |
78 | } |
79 | |
80 | for (uint i = 0; i < num_max_threads; i++) { |
81 | if (UseDynamicNumberOfGCThreads && i != 0 /* Always start first thread. */) { |
82 | _threads[i] = NULL; |
83 | } else { |
84 | _threads[i] = create_refinement_thread(i, true); |
85 | if (_threads[i] == NULL) { |
86 | vm_shutdown_during_initialization("Could not allocate refinement threads." ); |
87 | return JNI_ENOMEM; |
88 | } |
89 | } |
90 | } |
91 | return JNI_OK; |
92 | } |
93 | |
94 | void G1ConcurrentRefineThreadControl::maybe_activate_next(uint cur_worker_id) { |
95 | assert(cur_worker_id < _num_max_threads, |
96 | "Activating another thread from %u not allowed since there can be at most %u" , |
97 | cur_worker_id, _num_max_threads); |
98 | if (cur_worker_id == (_num_max_threads - 1)) { |
99 | // Already the last thread, there is no more thread to activate. |
100 | return; |
101 | } |
102 | |
103 | uint worker_id = cur_worker_id + 1; |
104 | G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id]; |
105 | if (thread_to_activate == NULL) { |
106 | // Still need to create the thread... |
107 | _threads[worker_id] = create_refinement_thread(worker_id, false); |
108 | thread_to_activate = _threads[worker_id]; |
109 | } |
110 | if (thread_to_activate != NULL && !thread_to_activate->is_active()) { |
111 | thread_to_activate->activate(); |
112 | } |
113 | } |
114 | |
115 | void G1ConcurrentRefineThreadControl::print_on(outputStream* st) const { |
116 | for (uint i = 0; i < _num_max_threads; ++i) { |
117 | if (_threads[i] != NULL) { |
118 | _threads[i]->print_on(st); |
119 | st->cr(); |
120 | } |
121 | } |
122 | } |
123 | |
124 | void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) { |
125 | for (uint i = 0; i < _num_max_threads; i++) { |
126 | if (_threads[i] != NULL) { |
127 | tc->do_thread(_threads[i]); |
128 | } |
129 | } |
130 | } |
131 | |
132 | void G1ConcurrentRefineThreadControl::stop() { |
133 | for (uint i = 0; i < _num_max_threads; i++) { |
134 | if (_threads[i] != NULL) { |
135 | _threads[i]->stop(); |
136 | } |
137 | } |
138 | } |
139 | |
140 | // Arbitrary but large limits, to simplify some of the zone calculations. |
141 | // The general idea is to allow expressions like |
142 | // MIN2(x OP y, max_XXX_zone) |
143 | // without needing to check for overflow in "x OP y", because the |
144 | // ranges for x and y have been restricted. |
145 | STATIC_ASSERT(sizeof(LP64_ONLY(jint) NOT_LP64(jshort)) <= (sizeof(size_t)/2)); |
146 | const size_t max_yellow_zone = LP64_ONLY(max_jint) NOT_LP64(max_jshort); |
147 | const size_t max_green_zone = max_yellow_zone / 2; |
148 | const size_t max_red_zone = INT_MAX; // For dcqs.set_max_completed_buffers. |
149 | STATIC_ASSERT(max_yellow_zone <= max_red_zone); |
150 | |
151 | // Range check assertions for green zone values. |
152 | #define assert_zone_constraints_g(green) \ |
153 | do { \ |
154 | size_t azc_g_green = (green); \ |
155 | assert(azc_g_green <= max_green_zone, \ |
156 | "green exceeds max: " SIZE_FORMAT, azc_g_green); \ |
157 | } while (0) |
158 | |
159 | // Range check assertions for green and yellow zone values. |
160 | #define assert_zone_constraints_gy(green, yellow) \ |
161 | do { \ |
162 | size_t azc_gy_green = (green); \ |
163 | size_t azc_gy_yellow = (yellow); \ |
164 | assert_zone_constraints_g(azc_gy_green); \ |
165 | assert(azc_gy_yellow <= max_yellow_zone, \ |
166 | "yellow exceeds max: " SIZE_FORMAT, azc_gy_yellow); \ |
167 | assert(azc_gy_green <= azc_gy_yellow, \ |
168 | "green (" SIZE_FORMAT ") exceeds yellow (" SIZE_FORMAT ")", \ |
169 | azc_gy_green, azc_gy_yellow); \ |
170 | } while (0) |
171 | |
172 | // Range check assertions for green, yellow, and red zone values. |
173 | #define assert_zone_constraints_gyr(green, yellow, red) \ |
174 | do { \ |
175 | size_t azc_gyr_green = (green); \ |
176 | size_t azc_gyr_yellow = (yellow); \ |
177 | size_t azc_gyr_red = (red); \ |
178 | assert_zone_constraints_gy(azc_gyr_green, azc_gyr_yellow); \ |
179 | assert(azc_gyr_red <= max_red_zone, \ |
180 | "red exceeds max: " SIZE_FORMAT, azc_gyr_red); \ |
181 | assert(azc_gyr_yellow <= azc_gyr_red, \ |
182 | "yellow (" SIZE_FORMAT ") exceeds red (" SIZE_FORMAT ")", \ |
183 | azc_gyr_yellow, azc_gyr_red); \ |
184 | } while (0) |
185 | |
186 | // Logging tag sequence for refinement control updates. |
187 | #define CTRL_TAGS gc, ergo, refine |
188 | |
189 | // For logging zone values, ensuring consistency of level and tags. |
190 | #define LOG_ZONES(...) log_debug( CTRL_TAGS )(__VA_ARGS__) |
191 | |
192 | // Package for pair of refinement thread activation and deactivation |
193 | // thresholds. The activation and deactivation levels are resp. the first |
194 | // and second values of the pair. |
195 | typedef Pair<size_t, size_t> Thresholds; |
196 | inline size_t activation_level(const Thresholds& t) { return t.first; } |
197 | inline size_t deactivation_level(const Thresholds& t) { return t.second; } |
198 | |
199 | static Thresholds calc_thresholds(size_t green_zone, |
200 | size_t yellow_zone, |
201 | uint worker_i) { |
202 | double yellow_size = yellow_zone - green_zone; |
203 | double step = yellow_size / G1ConcurrentRefine::max_num_threads(); |
204 | if (worker_i == 0) { |
205 | // Potentially activate worker 0 more aggressively, to keep |
206 | // available buffers near green_zone value. When yellow_size is |
207 | // large we don't want to allow a full step to accumulate before |
208 | // doing any processing, as that might lead to significantly more |
209 | // than green_zone buffers to be processed by update_rs. |
210 | step = MIN2(step, ParallelGCThreads / 2.0); |
211 | } |
212 | size_t activate_offset = static_cast<size_t>(ceil(step * (worker_i + 1))); |
213 | size_t deactivate_offset = static_cast<size_t>(floor(step * worker_i)); |
214 | return Thresholds(green_zone + activate_offset, |
215 | green_zone + deactivate_offset); |
216 | } |
217 | |
218 | G1ConcurrentRefine::G1ConcurrentRefine(size_t green_zone, |
219 | size_t yellow_zone, |
220 | size_t red_zone, |
221 | size_t min_yellow_zone_size) : |
222 | _thread_control(), |
223 | _green_zone(green_zone), |
224 | _yellow_zone(yellow_zone), |
225 | _red_zone(red_zone), |
226 | _min_yellow_zone_size(min_yellow_zone_size) |
227 | { |
228 | assert_zone_constraints_gyr(green_zone, yellow_zone, red_zone); |
229 | } |
230 | |
231 | jint G1ConcurrentRefine::initialize() { |
232 | return _thread_control.initialize(this, max_num_threads()); |
233 | } |
234 | |
235 | static size_t calc_min_yellow_zone_size() { |
236 | size_t step = G1ConcRefinementThresholdStep; |
237 | uint n_workers = G1ConcurrentRefine::max_num_threads(); |
238 | if ((max_yellow_zone / step) < n_workers) { |
239 | return max_yellow_zone; |
240 | } else { |
241 | return step * n_workers; |
242 | } |
243 | } |
244 | |
245 | static size_t calc_init_green_zone() { |
246 | size_t green = G1ConcRefinementGreenZone; |
247 | if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { |
248 | green = ParallelGCThreads; |
249 | } |
250 | return MIN2(green, max_green_zone); |
251 | } |
252 | |
253 | static size_t calc_init_yellow_zone(size_t green, size_t min_size) { |
254 | size_t config = G1ConcRefinementYellowZone; |
255 | size_t size = 0; |
256 | if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) { |
257 | size = green * 2; |
258 | } else if (green < config) { |
259 | size = config - green; |
260 | } |
261 | size = MAX2(size, min_size); |
262 | size = MIN2(size, max_yellow_zone); |
263 | return MIN2(green + size, max_yellow_zone); |
264 | } |
265 | |
266 | static size_t calc_init_red_zone(size_t green, size_t yellow) { |
267 | size_t size = yellow - green; |
268 | if (!FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) { |
269 | size_t config = G1ConcRefinementRedZone; |
270 | if (yellow < config) { |
271 | size = MAX2(size, config - yellow); |
272 | } |
273 | } |
274 | return MIN2(yellow + size, max_red_zone); |
275 | } |
276 | |
277 | G1ConcurrentRefine* G1ConcurrentRefine::create(jint* ecode) { |
278 | size_t min_yellow_zone_size = calc_min_yellow_zone_size(); |
279 | size_t green_zone = calc_init_green_zone(); |
280 | size_t yellow_zone = calc_init_yellow_zone(green_zone, min_yellow_zone_size); |
281 | size_t red_zone = calc_init_red_zone(green_zone, yellow_zone); |
282 | |
283 | LOG_ZONES("Initial Refinement Zones: " |
284 | "green: " SIZE_FORMAT ", " |
285 | "yellow: " SIZE_FORMAT ", " |
286 | "red: " SIZE_FORMAT ", " |
287 | "min yellow size: " SIZE_FORMAT, |
288 | green_zone, yellow_zone, red_zone, min_yellow_zone_size); |
289 | |
290 | G1ConcurrentRefine* cr = new G1ConcurrentRefine(green_zone, |
291 | yellow_zone, |
292 | red_zone, |
293 | min_yellow_zone_size); |
294 | |
295 | if (cr == NULL) { |
296 | *ecode = JNI_ENOMEM; |
297 | vm_shutdown_during_initialization("Could not create G1ConcurrentRefine" ); |
298 | return NULL; |
299 | } |
300 | |
301 | *ecode = cr->initialize(); |
302 | return cr; |
303 | } |
304 | |
305 | void G1ConcurrentRefine::stop() { |
306 | _thread_control.stop(); |
307 | } |
308 | |
309 | G1ConcurrentRefine::~G1ConcurrentRefine() { |
310 | } |
311 | |
312 | void G1ConcurrentRefine::threads_do(ThreadClosure *tc) { |
313 | _thread_control.worker_threads_do(tc); |
314 | } |
315 | |
316 | uint G1ConcurrentRefine::max_num_threads() { |
317 | return G1ConcRefinementThreads; |
318 | } |
319 | |
320 | void G1ConcurrentRefine::print_threads_on(outputStream* st) const { |
321 | _thread_control.print_on(st); |
322 | } |
323 | |
324 | static size_t calc_new_green_zone(size_t green, |
325 | double update_rs_time, |
326 | size_t update_rs_processed_buffers, |
327 | double goal_ms) { |
328 | // Adjust green zone based on whether we're meeting the time goal. |
329 | // Limit to max_green_zone. |
330 | const double inc_k = 1.1, dec_k = 0.9; |
331 | if (update_rs_time > goal_ms) { |
332 | if (green > 0) { |
333 | green = static_cast<size_t>(green * dec_k); |
334 | } |
335 | } else if (update_rs_time < goal_ms && |
336 | update_rs_processed_buffers > green) { |
337 | green = static_cast<size_t>(MAX2(green * inc_k, green + 1.0)); |
338 | green = MIN2(green, max_green_zone); |
339 | } |
340 | return green; |
341 | } |
342 | |
343 | static size_t calc_new_yellow_zone(size_t green, size_t min_yellow_size) { |
344 | size_t size = green * 2; |
345 | size = MAX2(size, min_yellow_size); |
346 | return MIN2(green + size, max_yellow_zone); |
347 | } |
348 | |
349 | static size_t calc_new_red_zone(size_t green, size_t yellow) { |
350 | return MIN2(yellow + (yellow - green), max_red_zone); |
351 | } |
352 | |
353 | void G1ConcurrentRefine::update_zones(double update_rs_time, |
354 | size_t update_rs_processed_buffers, |
355 | double goal_ms) { |
356 | log_trace( CTRL_TAGS )("Updating Refinement Zones: " |
357 | "update_rs time: %.3fms, " |
358 | "update_rs buffers: " SIZE_FORMAT ", " |
359 | "update_rs goal time: %.3fms" , |
360 | update_rs_time, |
361 | update_rs_processed_buffers, |
362 | goal_ms); |
363 | |
364 | _green_zone = calc_new_green_zone(_green_zone, |
365 | update_rs_time, |
366 | update_rs_processed_buffers, |
367 | goal_ms); |
368 | _yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size); |
369 | _red_zone = calc_new_red_zone(_green_zone, _yellow_zone); |
370 | |
371 | assert_zone_constraints_gyr(_green_zone, _yellow_zone, _red_zone); |
372 | LOG_ZONES("Updated Refinement Zones: " |
373 | "green: " SIZE_FORMAT ", " |
374 | "yellow: " SIZE_FORMAT ", " |
375 | "red: " SIZE_FORMAT, |
376 | _green_zone, _yellow_zone, _red_zone); |
377 | } |
378 | |
379 | void G1ConcurrentRefine::adjust(double update_rs_time, |
380 | size_t update_rs_processed_buffers, |
381 | double goal_ms) { |
382 | G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); |
383 | |
384 | if (G1UseAdaptiveConcRefinement) { |
385 | update_zones(update_rs_time, update_rs_processed_buffers, goal_ms); |
386 | |
387 | // Change the barrier params |
388 | if (max_num_threads() == 0) { |
389 | // Disable dcqs notification when there are no threads to notify. |
390 | dcqs.set_process_completed_buffers_threshold(G1DirtyCardQueueSet::ProcessCompletedBuffersThresholdNever); |
391 | } else { |
392 | // Worker 0 is the primary; wakeup is via dcqs notification. |
393 | STATIC_ASSERT(max_yellow_zone <= INT_MAX); |
394 | size_t activate = activation_threshold(0); |
395 | dcqs.set_process_completed_buffers_threshold(activate); |
396 | } |
397 | dcqs.set_max_completed_buffers(red_zone()); |
398 | } |
399 | |
400 | size_t curr_queue_size = dcqs.completed_buffers_num(); |
401 | if ((dcqs.max_completed_buffers() > 0) && |
402 | (curr_queue_size >= yellow_zone())) { |
403 | dcqs.set_completed_buffers_padding(curr_queue_size); |
404 | } else { |
405 | dcqs.set_completed_buffers_padding(0); |
406 | } |
407 | dcqs.notify_if_necessary(); |
408 | } |
409 | |
410 | size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const { |
411 | Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id); |
412 | return activation_level(thresholds); |
413 | } |
414 | |
415 | size_t G1ConcurrentRefine::deactivation_threshold(uint worker_id) const { |
416 | Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id); |
417 | return deactivation_level(thresholds); |
418 | } |
419 | |
420 | uint G1ConcurrentRefine::worker_id_offset() { |
421 | return G1DirtyCardQueueSet::num_par_ids(); |
422 | } |
423 | |
424 | void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers) { |
425 | if (num_cur_buffers > activation_threshold(worker_id + 1)) { |
426 | _thread_control.maybe_activate_next(worker_id); |
427 | } |
428 | } |
429 | |
430 | bool G1ConcurrentRefine::do_refinement_step(uint worker_id) { |
431 | G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); |
432 | |
433 | size_t curr_buffer_num = dcqs.completed_buffers_num(); |
434 | // If the number of the buffers falls down into the yellow zone, |
435 | // that means that the transition period after the evacuation pause has ended. |
436 | // Since the value written to the DCQS is the same for all threads, there is no |
437 | // need to synchronize. |
438 | if (dcqs.completed_buffers_padding() > 0 && curr_buffer_num <= yellow_zone()) { |
439 | dcqs.set_completed_buffers_padding(0); |
440 | } |
441 | |
442 | maybe_activate_more_threads(worker_id, curr_buffer_num); |
443 | |
444 | // Process the next buffer, if there are enough left. |
445 | return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(), |
446 | deactivation_threshold(worker_id)); |
447 | } |
448 | |