| 1 | /* | 
|---|
| 2 | * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. | 
|---|
| 3 | * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. | 
|---|
| 4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
|---|
| 5 | * | 
|---|
| 6 | * This code is free software; you can redistribute it and/or modify it | 
|---|
| 7 | * under the terms of the GNU General Public License version 2 only, as | 
|---|
| 8 | * published by the Free Software Foundation. | 
|---|
| 9 | * | 
|---|
| 10 | * This code is distributed in the hope that it will be useful, but WITHOUT | 
|---|
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|---|
| 12 | * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|---|
| 13 | * version 2 for more details (a copy is included in the LICENSE file that | 
|---|
| 14 | * accompanied this code). | 
|---|
| 15 | * | 
|---|
| 16 | * You should have received a copy of the GNU General Public License version | 
|---|
| 17 | * 2 along with this work; if not, write to the Free Software Foundation, | 
|---|
| 18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
|---|
| 19 | * | 
|---|
| 20 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | 
|---|
| 21 | * or visit www.oracle.com if you need additional information or have any | 
|---|
| 22 | * questions. | 
|---|
| 23 | * | 
|---|
| 24 | */ | 
|---|
| 25 |  | 
|---|
| 26 | #include "precompiled.hpp" | 
|---|
| 27 | #include "runtime/handles.inline.hpp" | 
|---|
| 28 | #include "runtime/orderAccess.hpp" | 
|---|
| 29 | #include "runtime/sharedRuntime.hpp" | 
|---|
| 30 | #include "runtime/threadHeapSampler.hpp" | 
|---|
| 31 |  | 
|---|
| 32 | // Cheap random number generator. | 
|---|
| 33 | uint64_t ThreadHeapSampler::_rnd; | 
|---|
| 34 | // Default is 512kb. | 
|---|
| 35 | volatile int ThreadHeapSampler::_sampling_interval = 512 * 1024; | 
|---|
| 36 |  | 
|---|
| 37 | // Ordering here is important: _log_table first, _log_table_initialized second. | 
|---|
| 38 | double ThreadHeapSampler::_log_table[1 << ThreadHeapSampler::FastLogNumBits] = {}; | 
|---|
| 39 |  | 
|---|
| 40 | // Force initialization of the log_table. | 
|---|
| 41 | bool ThreadHeapSampler::_log_table_initialized = init_log_table(); | 
|---|
| 42 |  | 
|---|
| 43 | bool ThreadHeapSampler::init_log_table() { | 
|---|
| 44 | for (int i = 0; i < (1 << FastLogNumBits); i++) { | 
|---|
| 45 | _log_table[i] = (log(1.0 + static_cast<double>(i+0.5) / (1 << FastLogNumBits)) | 
|---|
| 46 | / log(2.0)); | 
|---|
| 47 | } | 
|---|
| 48 | return true; | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | // Returns the next prng value. | 
|---|
| 52 | // pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48 | 
|---|
| 53 | // This is the lrand64 generator. | 
|---|
| 54 | uint64_t ThreadHeapSampler::next_random(uint64_t rnd) { | 
|---|
| 55 | const uint64_t PrngMult = 0x5DEECE66DLL; | 
|---|
| 56 | const uint64_t PrngAdd = 0xB; | 
|---|
| 57 | const uint64_t PrngModPower = 48; | 
|---|
| 58 | const uint64_t PrngModMask = ((uint64_t)1 << PrngModPower) - 1; | 
|---|
| 59 | //assert(IS_SAFE_SIZE_MUL(PrngMult, rnd), "Overflow on multiplication."); | 
|---|
| 60 | //assert(IS_SAFE_SIZE_ADD(PrngMult * rnd, PrngAdd), "Overflow on addition."); | 
|---|
| 61 | return (PrngMult * rnd + PrngAdd) & PrngModMask; | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 | double ThreadHeapSampler::fast_log2(const double& d) { | 
|---|
| 65 | assert(d>0, "bad value passed to assert"); | 
|---|
| 66 | uint64_t x = 0; | 
|---|
| 67 | assert(sizeof(d) == sizeof(x), | 
|---|
| 68 | "double and uint64_t do not have the same size"); | 
|---|
| 69 | x = *reinterpret_cast<const uint64_t*>(&d); | 
|---|
| 70 | const uint32_t x_high = x >> 32; | 
|---|
| 71 | assert(FastLogNumBits <= 20, "FastLogNumBits should be less than 20."); | 
|---|
| 72 | const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask; | 
|---|
| 73 | const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023; | 
|---|
| 74 |  | 
|---|
| 75 | assert(_log_table_initialized, "log table should be initialized"); | 
|---|
| 76 | return exponent + _log_table[y]; | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | // Generates a geometric variable with the specified mean (512K by default). | 
|---|
| 80 | // This is done by generating a random number between 0 and 1 and applying | 
|---|
| 81 | // the inverse cumulative distribution function for an exponential. | 
|---|
| 82 | // Specifically: Let m be the inverse of the sample interval, then | 
|---|
| 83 | // the probability distribution function is m*exp(-mx) so the CDF is | 
|---|
| 84 | // p = 1 - exp(-mx), so | 
|---|
| 85 | // q = 1 - p = exp(-mx) | 
|---|
| 86 | // log_e(q) = -mx | 
|---|
| 87 | // -log_e(q)/m = x | 
|---|
| 88 | // log_2(q) * (-log_e(2) * 1/m) = x | 
|---|
| 89 | // In the code, q is actually in the range 1 to 2**26, hence the -26 below | 
|---|
| 90 | void ThreadHeapSampler::pick_next_geometric_sample() { | 
|---|
| 91 | _rnd = next_random(_rnd); | 
|---|
| 92 | // Take the top 26 bits as the random number | 
|---|
| 93 | // (This plus a 1<<58 sampling bound gives a max possible step of | 
|---|
| 94 | // 5194297183973780480 bytes.  In this case, | 
|---|
| 95 | // for sample_parameter = 1<<19, max possible step is | 
|---|
| 96 | // 9448372 bytes (24 bits). | 
|---|
| 97 | const uint64_t PrngModPower = 48;  // Number of bits in prng | 
|---|
| 98 | // The uint32_t cast is to prevent a (hard-to-reproduce) NAN | 
|---|
| 99 | // under piii debug for some binaries. | 
|---|
| 100 | double q = static_cast<uint32_t>(_rnd >> (PrngModPower - 26)) + 1.0; | 
|---|
| 101 | // Put the computed p-value through the CDF of a geometric. | 
|---|
| 102 | // For faster performance (save ~1/20th exec time), replace | 
|---|
| 103 | // min(0.0, FastLog2(q) - 26)  by  (Fastlog2(q) - 26.000705) | 
|---|
| 104 | // The value 26.000705 is used rather than 26 to compensate | 
|---|
| 105 | // for inaccuracies in FastLog2 which otherwise result in a | 
|---|
| 106 | // negative answer. | 
|---|
| 107 | double log_val = (fast_log2(q) - 26); | 
|---|
| 108 | double result = | 
|---|
| 109 | (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_sampling_interval())) + 1; | 
|---|
| 110 | assert(result > 0 && result < SIZE_MAX, "Result is not in an acceptable range."); | 
|---|
| 111 | size_t interval = static_cast<size_t>(result); | 
|---|
| 112 | _bytes_until_sample = interval; | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) { | 
|---|
| 116 | // Explicitly test if the sampling interval is 0, return 0 to sample every | 
|---|
| 117 | // allocation. | 
|---|
| 118 | if (get_sampling_interval() == 0) { | 
|---|
| 119 | _bytes_until_sample = 0; | 
|---|
| 120 | return; | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | pick_next_geometric_sample(); | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | void ThreadHeapSampler::check_for_sampling(oop obj, size_t allocation_size, size_t bytes_since_allocation) { | 
|---|
| 127 | size_t total_allocated_bytes = bytes_since_allocation + allocation_size; | 
|---|
| 128 |  | 
|---|
| 129 | // If not yet time for a sample, skip it. | 
|---|
| 130 | if (total_allocated_bytes < _bytes_until_sample) { | 
|---|
| 131 | _bytes_until_sample -= total_allocated_bytes; | 
|---|
| 132 | return; | 
|---|
| 133 | } | 
|---|
| 134 |  | 
|---|
| 135 | JvmtiExport::sampled_object_alloc_event_collector(obj); | 
|---|
| 136 |  | 
|---|
| 137 | size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample; | 
|---|
| 138 | pick_next_sample(overflow_bytes); | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|
| 141 | int ThreadHeapSampler::get_sampling_interval() { | 
|---|
| 142 | return OrderAccess::load_acquire(&_sampling_interval); | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 | void ThreadHeapSampler::set_sampling_interval(int sampling_interval) { | 
|---|
| 146 | OrderAccess::release_store(&_sampling_interval, sampling_interval); | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|