| 1 | /* |
| 2 | * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #include "precompiled.hpp" |
| 26 | #include "gc/g1/g1BarrierSet.hpp" |
| 27 | #include "gc/g1/g1ConcurrentRefine.hpp" |
| 28 | #include "gc/g1/g1ConcurrentRefineThread.hpp" |
| 29 | #include "gc/g1/g1DirtyCardQueue.hpp" |
| 30 | #include "logging/log.hpp" |
| 31 | #include "memory/allocation.inline.hpp" |
| 32 | #include "runtime/java.hpp" |
| 33 | #include "runtime/thread.hpp" |
| 34 | #include "utilities/debug.hpp" |
| 35 | #include "utilities/globalDefinitions.hpp" |
| 36 | #include "utilities/pair.hpp" |
| 37 | #include <math.h> |
| 38 | |
| 39 | G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) { |
| 40 | G1ConcurrentRefineThread* result = NULL; |
| 41 | if (initializing || !InjectGCWorkerCreationFailure) { |
| 42 | result = new G1ConcurrentRefineThread(_cr, worker_id); |
| 43 | } |
| 44 | if (result == NULL || result->osthread() == NULL) { |
| 45 | log_warning(gc)("Failed to create refinement thread %u, no more %s" , |
| 46 | worker_id, |
| 47 | result == NULL ? "memory" : "OS threads" ); |
| 48 | } |
| 49 | return result; |
| 50 | } |
| 51 | |
| 52 | G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() : |
| 53 | _cr(NULL), |
| 54 | _threads(NULL), |
| 55 | _num_max_threads(0) |
| 56 | { |
| 57 | } |
| 58 | |
| 59 | G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() { |
| 60 | for (uint i = 0; i < _num_max_threads; i++) { |
| 61 | G1ConcurrentRefineThread* t = _threads[i]; |
| 62 | if (t != NULL) { |
| 63 | delete t; |
| 64 | } |
| 65 | } |
| 66 | FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads); |
| 67 | } |
| 68 | |
| 69 | jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint num_max_threads) { |
| 70 | assert(cr != NULL, "G1ConcurrentRefine must not be NULL" ); |
| 71 | _cr = cr; |
| 72 | _num_max_threads = num_max_threads; |
| 73 | |
| 74 | _threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, num_max_threads, mtGC); |
| 75 | if (_threads == NULL) { |
| 76 | vm_shutdown_during_initialization("Could not allocate thread holder array." ); |
| 77 | return JNI_ENOMEM; |
| 78 | } |
| 79 | |
| 80 | for (uint i = 0; i < num_max_threads; i++) { |
| 81 | if (UseDynamicNumberOfGCThreads && i != 0 /* Always start first thread. */) { |
| 82 | _threads[i] = NULL; |
| 83 | } else { |
| 84 | _threads[i] = create_refinement_thread(i, true); |
| 85 | if (_threads[i] == NULL) { |
| 86 | vm_shutdown_during_initialization("Could not allocate refinement threads." ); |
| 87 | return JNI_ENOMEM; |
| 88 | } |
| 89 | } |
| 90 | } |
| 91 | return JNI_OK; |
| 92 | } |
| 93 | |
| 94 | void G1ConcurrentRefineThreadControl::maybe_activate_next(uint cur_worker_id) { |
| 95 | assert(cur_worker_id < _num_max_threads, |
| 96 | "Activating another thread from %u not allowed since there can be at most %u" , |
| 97 | cur_worker_id, _num_max_threads); |
| 98 | if (cur_worker_id == (_num_max_threads - 1)) { |
| 99 | // Already the last thread, there is no more thread to activate. |
| 100 | return; |
| 101 | } |
| 102 | |
| 103 | uint worker_id = cur_worker_id + 1; |
| 104 | G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id]; |
| 105 | if (thread_to_activate == NULL) { |
| 106 | // Still need to create the thread... |
| 107 | _threads[worker_id] = create_refinement_thread(worker_id, false); |
| 108 | thread_to_activate = _threads[worker_id]; |
| 109 | } |
| 110 | if (thread_to_activate != NULL && !thread_to_activate->is_active()) { |
| 111 | thread_to_activate->activate(); |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | void G1ConcurrentRefineThreadControl::print_on(outputStream* st) const { |
| 116 | for (uint i = 0; i < _num_max_threads; ++i) { |
| 117 | if (_threads[i] != NULL) { |
| 118 | _threads[i]->print_on(st); |
| 119 | st->cr(); |
| 120 | } |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) { |
| 125 | for (uint i = 0; i < _num_max_threads; i++) { |
| 126 | if (_threads[i] != NULL) { |
| 127 | tc->do_thread(_threads[i]); |
| 128 | } |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | void G1ConcurrentRefineThreadControl::stop() { |
| 133 | for (uint i = 0; i < _num_max_threads; i++) { |
| 134 | if (_threads[i] != NULL) { |
| 135 | _threads[i]->stop(); |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | // Arbitrary but large limits, to simplify some of the zone calculations. |
| 141 | // The general idea is to allow expressions like |
| 142 | // MIN2(x OP y, max_XXX_zone) |
| 143 | // without needing to check for overflow in "x OP y", because the |
| 144 | // ranges for x and y have been restricted. |
| 145 | STATIC_ASSERT(sizeof(LP64_ONLY(jint) NOT_LP64(jshort)) <= (sizeof(size_t)/2)); |
| 146 | const size_t max_yellow_zone = LP64_ONLY(max_jint) NOT_LP64(max_jshort); |
| 147 | const size_t max_green_zone = max_yellow_zone / 2; |
| 148 | const size_t max_red_zone = INT_MAX; // For dcqs.set_max_completed_buffers. |
| 149 | STATIC_ASSERT(max_yellow_zone <= max_red_zone); |
| 150 | |
| 151 | // Range check assertions for green zone values. |
| 152 | #define assert_zone_constraints_g(green) \ |
| 153 | do { \ |
| 154 | size_t azc_g_green = (green); \ |
| 155 | assert(azc_g_green <= max_green_zone, \ |
| 156 | "green exceeds max: " SIZE_FORMAT, azc_g_green); \ |
| 157 | } while (0) |
| 158 | |
| 159 | // Range check assertions for green and yellow zone values. |
| 160 | #define assert_zone_constraints_gy(green, yellow) \ |
| 161 | do { \ |
| 162 | size_t azc_gy_green = (green); \ |
| 163 | size_t azc_gy_yellow = (yellow); \ |
| 164 | assert_zone_constraints_g(azc_gy_green); \ |
| 165 | assert(azc_gy_yellow <= max_yellow_zone, \ |
| 166 | "yellow exceeds max: " SIZE_FORMAT, azc_gy_yellow); \ |
| 167 | assert(azc_gy_green <= azc_gy_yellow, \ |
| 168 | "green (" SIZE_FORMAT ") exceeds yellow (" SIZE_FORMAT ")", \ |
| 169 | azc_gy_green, azc_gy_yellow); \ |
| 170 | } while (0) |
| 171 | |
| 172 | // Range check assertions for green, yellow, and red zone values. |
| 173 | #define assert_zone_constraints_gyr(green, yellow, red) \ |
| 174 | do { \ |
| 175 | size_t azc_gyr_green = (green); \ |
| 176 | size_t azc_gyr_yellow = (yellow); \ |
| 177 | size_t azc_gyr_red = (red); \ |
| 178 | assert_zone_constraints_gy(azc_gyr_green, azc_gyr_yellow); \ |
| 179 | assert(azc_gyr_red <= max_red_zone, \ |
| 180 | "red exceeds max: " SIZE_FORMAT, azc_gyr_red); \ |
| 181 | assert(azc_gyr_yellow <= azc_gyr_red, \ |
| 182 | "yellow (" SIZE_FORMAT ") exceeds red (" SIZE_FORMAT ")", \ |
| 183 | azc_gyr_yellow, azc_gyr_red); \ |
| 184 | } while (0) |
| 185 | |
| 186 | // Logging tag sequence for refinement control updates. |
| 187 | #define CTRL_TAGS gc, ergo, refine |
| 188 | |
| 189 | // For logging zone values, ensuring consistency of level and tags. |
| 190 | #define LOG_ZONES(...) log_debug( CTRL_TAGS )(__VA_ARGS__) |
| 191 | |
| 192 | // Package for pair of refinement thread activation and deactivation |
| 193 | // thresholds. The activation and deactivation levels are resp. the first |
| 194 | // and second values of the pair. |
| 195 | typedef Pair<size_t, size_t> Thresholds; |
| 196 | inline size_t activation_level(const Thresholds& t) { return t.first; } |
| 197 | inline size_t deactivation_level(const Thresholds& t) { return t.second; } |
| 198 | |
| 199 | static Thresholds calc_thresholds(size_t green_zone, |
| 200 | size_t yellow_zone, |
| 201 | uint worker_i) { |
| 202 | double yellow_size = yellow_zone - green_zone; |
| 203 | double step = yellow_size / G1ConcurrentRefine::max_num_threads(); |
| 204 | if (worker_i == 0) { |
| 205 | // Potentially activate worker 0 more aggressively, to keep |
| 206 | // available buffers near green_zone value. When yellow_size is |
| 207 | // large we don't want to allow a full step to accumulate before |
| 208 | // doing any processing, as that might lead to significantly more |
| 209 | // than green_zone buffers to be processed by update_rs. |
| 210 | step = MIN2(step, ParallelGCThreads / 2.0); |
| 211 | } |
| 212 | size_t activate_offset = static_cast<size_t>(ceil(step * (worker_i + 1))); |
| 213 | size_t deactivate_offset = static_cast<size_t>(floor(step * worker_i)); |
| 214 | return Thresholds(green_zone + activate_offset, |
| 215 | green_zone + deactivate_offset); |
| 216 | } |
| 217 | |
| 218 | G1ConcurrentRefine::G1ConcurrentRefine(size_t green_zone, |
| 219 | size_t yellow_zone, |
| 220 | size_t red_zone, |
| 221 | size_t min_yellow_zone_size) : |
| 222 | _thread_control(), |
| 223 | _green_zone(green_zone), |
| 224 | _yellow_zone(yellow_zone), |
| 225 | _red_zone(red_zone), |
| 226 | _min_yellow_zone_size(min_yellow_zone_size) |
| 227 | { |
| 228 | assert_zone_constraints_gyr(green_zone, yellow_zone, red_zone); |
| 229 | } |
| 230 | |
| 231 | jint G1ConcurrentRefine::initialize() { |
| 232 | return _thread_control.initialize(this, max_num_threads()); |
| 233 | } |
| 234 | |
| 235 | static size_t calc_min_yellow_zone_size() { |
| 236 | size_t step = G1ConcRefinementThresholdStep; |
| 237 | uint n_workers = G1ConcurrentRefine::max_num_threads(); |
| 238 | if ((max_yellow_zone / step) < n_workers) { |
| 239 | return max_yellow_zone; |
| 240 | } else { |
| 241 | return step * n_workers; |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | static size_t calc_init_green_zone() { |
| 246 | size_t green = G1ConcRefinementGreenZone; |
| 247 | if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { |
| 248 | green = ParallelGCThreads; |
| 249 | } |
| 250 | return MIN2(green, max_green_zone); |
| 251 | } |
| 252 | |
| 253 | static size_t calc_init_yellow_zone(size_t green, size_t min_size) { |
| 254 | size_t config = G1ConcRefinementYellowZone; |
| 255 | size_t size = 0; |
| 256 | if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) { |
| 257 | size = green * 2; |
| 258 | } else if (green < config) { |
| 259 | size = config - green; |
| 260 | } |
| 261 | size = MAX2(size, min_size); |
| 262 | size = MIN2(size, max_yellow_zone); |
| 263 | return MIN2(green + size, max_yellow_zone); |
| 264 | } |
| 265 | |
| 266 | static size_t calc_init_red_zone(size_t green, size_t yellow) { |
| 267 | size_t size = yellow - green; |
| 268 | if (!FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) { |
| 269 | size_t config = G1ConcRefinementRedZone; |
| 270 | if (yellow < config) { |
| 271 | size = MAX2(size, config - yellow); |
| 272 | } |
| 273 | } |
| 274 | return MIN2(yellow + size, max_red_zone); |
| 275 | } |
| 276 | |
| 277 | G1ConcurrentRefine* G1ConcurrentRefine::create(jint* ecode) { |
| 278 | size_t min_yellow_zone_size = calc_min_yellow_zone_size(); |
| 279 | size_t green_zone = calc_init_green_zone(); |
| 280 | size_t yellow_zone = calc_init_yellow_zone(green_zone, min_yellow_zone_size); |
| 281 | size_t red_zone = calc_init_red_zone(green_zone, yellow_zone); |
| 282 | |
| 283 | LOG_ZONES("Initial Refinement Zones: " |
| 284 | "green: " SIZE_FORMAT ", " |
| 285 | "yellow: " SIZE_FORMAT ", " |
| 286 | "red: " SIZE_FORMAT ", " |
| 287 | "min yellow size: " SIZE_FORMAT, |
| 288 | green_zone, yellow_zone, red_zone, min_yellow_zone_size); |
| 289 | |
| 290 | G1ConcurrentRefine* cr = new G1ConcurrentRefine(green_zone, |
| 291 | yellow_zone, |
| 292 | red_zone, |
| 293 | min_yellow_zone_size); |
| 294 | |
| 295 | if (cr == NULL) { |
| 296 | *ecode = JNI_ENOMEM; |
| 297 | vm_shutdown_during_initialization("Could not create G1ConcurrentRefine" ); |
| 298 | return NULL; |
| 299 | } |
| 300 | |
| 301 | *ecode = cr->initialize(); |
| 302 | return cr; |
| 303 | } |
| 304 | |
| 305 | void G1ConcurrentRefine::stop() { |
| 306 | _thread_control.stop(); |
| 307 | } |
| 308 | |
| 309 | G1ConcurrentRefine::~G1ConcurrentRefine() { |
| 310 | } |
| 311 | |
| 312 | void G1ConcurrentRefine::threads_do(ThreadClosure *tc) { |
| 313 | _thread_control.worker_threads_do(tc); |
| 314 | } |
| 315 | |
| 316 | uint G1ConcurrentRefine::max_num_threads() { |
| 317 | return G1ConcRefinementThreads; |
| 318 | } |
| 319 | |
| 320 | void G1ConcurrentRefine::print_threads_on(outputStream* st) const { |
| 321 | _thread_control.print_on(st); |
| 322 | } |
| 323 | |
| 324 | static size_t calc_new_green_zone(size_t green, |
| 325 | double update_rs_time, |
| 326 | size_t update_rs_processed_buffers, |
| 327 | double goal_ms) { |
| 328 | // Adjust green zone based on whether we're meeting the time goal. |
| 329 | // Limit to max_green_zone. |
| 330 | const double inc_k = 1.1, dec_k = 0.9; |
| 331 | if (update_rs_time > goal_ms) { |
| 332 | if (green > 0) { |
| 333 | green = static_cast<size_t>(green * dec_k); |
| 334 | } |
| 335 | } else if (update_rs_time < goal_ms && |
| 336 | update_rs_processed_buffers > green) { |
| 337 | green = static_cast<size_t>(MAX2(green * inc_k, green + 1.0)); |
| 338 | green = MIN2(green, max_green_zone); |
| 339 | } |
| 340 | return green; |
| 341 | } |
| 342 | |
| 343 | static size_t calc_new_yellow_zone(size_t green, size_t min_yellow_size) { |
| 344 | size_t size = green * 2; |
| 345 | size = MAX2(size, min_yellow_size); |
| 346 | return MIN2(green + size, max_yellow_zone); |
| 347 | } |
| 348 | |
| 349 | static size_t calc_new_red_zone(size_t green, size_t yellow) { |
| 350 | return MIN2(yellow + (yellow - green), max_red_zone); |
| 351 | } |
| 352 | |
| 353 | void G1ConcurrentRefine::update_zones(double update_rs_time, |
| 354 | size_t update_rs_processed_buffers, |
| 355 | double goal_ms) { |
| 356 | log_trace( CTRL_TAGS )("Updating Refinement Zones: " |
| 357 | "update_rs time: %.3fms, " |
| 358 | "update_rs buffers: " SIZE_FORMAT ", " |
| 359 | "update_rs goal time: %.3fms" , |
| 360 | update_rs_time, |
| 361 | update_rs_processed_buffers, |
| 362 | goal_ms); |
| 363 | |
| 364 | _green_zone = calc_new_green_zone(_green_zone, |
| 365 | update_rs_time, |
| 366 | update_rs_processed_buffers, |
| 367 | goal_ms); |
| 368 | _yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size); |
| 369 | _red_zone = calc_new_red_zone(_green_zone, _yellow_zone); |
| 370 | |
| 371 | assert_zone_constraints_gyr(_green_zone, _yellow_zone, _red_zone); |
| 372 | LOG_ZONES("Updated Refinement Zones: " |
| 373 | "green: " SIZE_FORMAT ", " |
| 374 | "yellow: " SIZE_FORMAT ", " |
| 375 | "red: " SIZE_FORMAT, |
| 376 | _green_zone, _yellow_zone, _red_zone); |
| 377 | } |
| 378 | |
| 379 | void G1ConcurrentRefine::adjust(double update_rs_time, |
| 380 | size_t update_rs_processed_buffers, |
| 381 | double goal_ms) { |
| 382 | G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); |
| 383 | |
| 384 | if (G1UseAdaptiveConcRefinement) { |
| 385 | update_zones(update_rs_time, update_rs_processed_buffers, goal_ms); |
| 386 | |
| 387 | // Change the barrier params |
| 388 | if (max_num_threads() == 0) { |
| 389 | // Disable dcqs notification when there are no threads to notify. |
| 390 | dcqs.set_process_completed_buffers_threshold(G1DirtyCardQueueSet::ProcessCompletedBuffersThresholdNever); |
| 391 | } else { |
| 392 | // Worker 0 is the primary; wakeup is via dcqs notification. |
| 393 | STATIC_ASSERT(max_yellow_zone <= INT_MAX); |
| 394 | size_t activate = activation_threshold(0); |
| 395 | dcqs.set_process_completed_buffers_threshold(activate); |
| 396 | } |
| 397 | dcqs.set_max_completed_buffers(red_zone()); |
| 398 | } |
| 399 | |
| 400 | size_t curr_queue_size = dcqs.completed_buffers_num(); |
| 401 | if ((dcqs.max_completed_buffers() > 0) && |
| 402 | (curr_queue_size >= yellow_zone())) { |
| 403 | dcqs.set_completed_buffers_padding(curr_queue_size); |
| 404 | } else { |
| 405 | dcqs.set_completed_buffers_padding(0); |
| 406 | } |
| 407 | dcqs.notify_if_necessary(); |
| 408 | } |
| 409 | |
| 410 | size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const { |
| 411 | Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id); |
| 412 | return activation_level(thresholds); |
| 413 | } |
| 414 | |
| 415 | size_t G1ConcurrentRefine::deactivation_threshold(uint worker_id) const { |
| 416 | Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id); |
| 417 | return deactivation_level(thresholds); |
| 418 | } |
| 419 | |
| 420 | uint G1ConcurrentRefine::worker_id_offset() { |
| 421 | return G1DirtyCardQueueSet::num_par_ids(); |
| 422 | } |
| 423 | |
| 424 | void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers) { |
| 425 | if (num_cur_buffers > activation_threshold(worker_id + 1)) { |
| 426 | _thread_control.maybe_activate_next(worker_id); |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | bool G1ConcurrentRefine::do_refinement_step(uint worker_id) { |
| 431 | G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); |
| 432 | |
| 433 | size_t curr_buffer_num = dcqs.completed_buffers_num(); |
| 434 | // If the number of the buffers falls down into the yellow zone, |
| 435 | // that means that the transition period after the evacuation pause has ended. |
| 436 | // Since the value written to the DCQS is the same for all threads, there is no |
| 437 | // need to synchronize. |
| 438 | if (dcqs.completed_buffers_padding() > 0 && curr_buffer_num <= yellow_zone()) { |
| 439 | dcqs.set_completed_buffers_padding(0); |
| 440 | } |
| 441 | |
| 442 | maybe_activate_more_threads(worker_id, curr_buffer_num); |
| 443 | |
| 444 | // Process the next buffer, if there are enough left. |
| 445 | return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(), |
| 446 | deactivation_threshold(worker_id)); |
| 447 | } |
| 448 | |