1/*
2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26#include "precompiled.hpp"
27#include "runtime/handles.inline.hpp"
28#include "runtime/orderAccess.hpp"
29#include "runtime/sharedRuntime.hpp"
30#include "runtime/threadHeapSampler.hpp"
31
32// Cheap random number generator.
33uint64_t ThreadHeapSampler::_rnd;
34// Default is 512kb.
35volatile int ThreadHeapSampler::_sampling_interval = 512 * 1024;
36
37// Ordering here is important: _log_table first, _log_table_initialized second.
38double ThreadHeapSampler::_log_table[1 << ThreadHeapSampler::FastLogNumBits] = {};
39
40// Force initialization of the log_table.
41bool ThreadHeapSampler::_log_table_initialized = init_log_table();
42
43bool ThreadHeapSampler::init_log_table() {
44 for (int i = 0; i < (1 << FastLogNumBits); i++) {
45 _log_table[i] = (log(1.0 + static_cast<double>(i+0.5) / (1 << FastLogNumBits))
46 / log(2.0));
47 }
48 return true;
49}
50
51// Returns the next prng value.
52// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48
53// This is the lrand64 generator.
54uint64_t ThreadHeapSampler::next_random(uint64_t rnd) {
55 const uint64_t PrngMult = 0x5DEECE66DLL;
56 const uint64_t PrngAdd = 0xB;
57 const uint64_t PrngModPower = 48;
58 const uint64_t PrngModMask = ((uint64_t)1 << PrngModPower) - 1;
59 //assert(IS_SAFE_SIZE_MUL(PrngMult, rnd), "Overflow on multiplication.");
60 //assert(IS_SAFE_SIZE_ADD(PrngMult * rnd, PrngAdd), "Overflow on addition.");
61 return (PrngMult * rnd + PrngAdd) & PrngModMask;
62}
63
64double ThreadHeapSampler::fast_log2(const double& d) {
65 assert(d>0, "bad value passed to assert");
66 uint64_t x = 0;
67 assert(sizeof(d) == sizeof(x),
68 "double and uint64_t do not have the same size");
69 x = *reinterpret_cast<const uint64_t*>(&d);
70 const uint32_t x_high = x >> 32;
71 assert(FastLogNumBits <= 20, "FastLogNumBits should be less than 20.");
72 const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask;
73 const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
74
75 assert(_log_table_initialized, "log table should be initialized");
76 return exponent + _log_table[y];
77}
78
79// Generates a geometric variable with the specified mean (512K by default).
80// This is done by generating a random number between 0 and 1 and applying
81// the inverse cumulative distribution function for an exponential.
82// Specifically: Let m be the inverse of the sample interval, then
83// the probability distribution function is m*exp(-mx) so the CDF is
84// p = 1 - exp(-mx), so
85// q = 1 - p = exp(-mx)
86// log_e(q) = -mx
87// -log_e(q)/m = x
88// log_2(q) * (-log_e(2) * 1/m) = x
89// In the code, q is actually in the range 1 to 2**26, hence the -26 below
90void ThreadHeapSampler::pick_next_geometric_sample() {
91 _rnd = next_random(_rnd);
92 // Take the top 26 bits as the random number
93 // (This plus a 1<<58 sampling bound gives a max possible step of
94 // 5194297183973780480 bytes. In this case,
95 // for sample_parameter = 1<<19, max possible step is
96 // 9448372 bytes (24 bits).
97 const uint64_t PrngModPower = 48; // Number of bits in prng
98 // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
99 // under piii debug for some binaries.
100 double q = static_cast<uint32_t>(_rnd >> (PrngModPower - 26)) + 1.0;
101 // Put the computed p-value through the CDF of a geometric.
102 // For faster performance (save ~1/20th exec time), replace
103 // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705)
104 // The value 26.000705 is used rather than 26 to compensate
105 // for inaccuracies in FastLog2 which otherwise result in a
106 // negative answer.
107 double log_val = (fast_log2(q) - 26);
108 double result =
109 (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_sampling_interval())) + 1;
110 assert(result > 0 && result < SIZE_MAX, "Result is not in an acceptable range.");
111 size_t interval = static_cast<size_t>(result);
112 _bytes_until_sample = interval;
113}
114
115void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) {
116 // Explicitly test if the sampling interval is 0, return 0 to sample every
117 // allocation.
118 if (get_sampling_interval() == 0) {
119 _bytes_until_sample = 0;
120 return;
121 }
122
123 pick_next_geometric_sample();
124}
125
126void ThreadHeapSampler::check_for_sampling(oop obj, size_t allocation_size, size_t bytes_since_allocation) {
127 size_t total_allocated_bytes = bytes_since_allocation + allocation_size;
128
129 // If not yet time for a sample, skip it.
130 if (total_allocated_bytes < _bytes_until_sample) {
131 _bytes_until_sample -= total_allocated_bytes;
132 return;
133 }
134
135 JvmtiExport::sampled_object_alloc_event_collector(obj);
136
137 size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample;
138 pick_next_sample(overflow_bytes);
139}
140
141int ThreadHeapSampler::get_sampling_interval() {
142 return OrderAccess::load_acquire(&_sampling_interval);
143}
144
145void ThreadHeapSampler::set_sampling_interval(int sampling_interval) {
146 OrderAccess::release_store(&_sampling_interval, sampling_interval);
147}
148