1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <cmath>
20#include <cstddef>
21#include <cstdint>
22#include <limits>
23#include <ostream>
24#include <stdexcept>
25#include <string>
26#include <type_traits>
27#include <vector>
28
29#include <folly/CPortability.h>
30#include <folly/Traits.h>
31#include <folly/stats/detail/Bucket.h>
32
33namespace folly {
34
35namespace detail {
36
37/*
38 * A helper class to manage a set of histogram buckets.
39 */
40template <typename T, typename BucketT>
41class HistogramBuckets {
42 public:
43 typedef T ValueType;
44 typedef BucketT BucketType;
45
46 /*
47 * Create a set of histogram buckets.
48 *
49 * One bucket will be created for each bucketSize interval of values within
50 * the specified range. Additionally, one bucket will be created to track
51 * all values that fall below the specified minimum, and one bucket will be
52 * created for all values above the specified maximum.
53 *
54 * If (max - min) is not a multiple of bucketSize, the last bucket will cover
55 * a smaller range of values than the other buckets.
56 *
57 * (max - min) must be larger than or equal to bucketSize.
58 */
59 HistogramBuckets(
60 ValueType bucketSize,
61 ValueType min,
62 ValueType max,
63 const BucketType& defaultBucket);
64
65 /* Returns the bucket size of each bucket in the histogram. */
66 ValueType getBucketSize() const {
67 return bucketSize_;
68 }
69
70 /* Returns the min value at which bucketing begins. */
71 ValueType getMin() const {
72 return min_;
73 }
74
75 /* Returns the max value at which bucketing ends. */
76 ValueType getMax() const {
77 return max_;
78 }
79
80 /*
81 * Returns the number of buckets.
82 *
83 * This includes the total number of buckets for the [min, max) range,
84 * plus 2 extra buckets, one for handling values less than min, and one for
85 * values greater than max.
86 */
87 size_t getNumBuckets() const {
88 return buckets_.size();
89 }
90
91 /* Returns the bucket index into which the given value would fall. */
92 size_t getBucketIdx(ValueType value) const;
93
94 /* Returns the bucket for the specified value */
95 BucketType& getByValue(ValueType value) {
96 return buckets_[getBucketIdx(value)];
97 }
98
99 /* Returns the bucket for the specified value */
100 const BucketType& getByValue(ValueType value) const {
101 return buckets_[getBucketIdx(value)];
102 }
103
104 /*
105 * Returns the bucket at the specified index.
106 *
107 * Note that index 0 is the bucket for all values less than the specified
108 * minimum. Index 1 is the first bucket in the specified bucket range.
109 */
110 BucketType& getByIndex(size_t idx) {
111 return buckets_[idx];
112 }
113
114 /* Returns the bucket at the specified index. */
115 const BucketType& getByIndex(size_t idx) const {
116 return buckets_[idx];
117 }
118
119 /*
120 * Returns the minimum threshold for the bucket at the given index.
121 *
122 * The bucket at the specified index will store values in the range
123 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
124 * max is smaller than bucketMin + bucketSize.
125 */
126 ValueType getBucketMin(size_t idx) const {
127 if (idx == 0) {
128 return std::numeric_limits<ValueType>::min();
129 }
130 if (idx == buckets_.size() - 1) {
131 return max_;
132 }
133
134 return ValueType(min_ + ((idx - 1) * bucketSize_));
135 }
136
137 /*
138 * Returns the maximum threshold for the bucket at the given index.
139 *
140 * The bucket at the specified index will store values in the range
141 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
142 * max is smaller than bucketMin + bucketSize.
143 */
144 ValueType getBucketMax(size_t idx) const {
145 if (idx == buckets_.size() - 1) {
146 return std::numeric_limits<ValueType>::max();
147 }
148
149 return ValueType(min_ + (idx * bucketSize_));
150 }
151
152 /**
153 * Computes the total number of values stored across all buckets.
154 *
155 * Runs in O(numBuckets)
156 *
157 * @param countFn A function that takes a const BucketType&, and returns the
158 * number of values in that bucket
159 * @return Returns the total number of values stored across all buckets
160 */
161 template <typename CountFn>
162 uint64_t computeTotalCount(CountFn countFromBucket) const;
163
164 /**
165 * Determine which bucket the specified percentile falls into.
166 *
167 * Looks for the bucket that contains the Nth percentile data point.
168 *
169 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
170 * @param countFn A function that takes a const BucketType&, and returns the
171 * number of values in that bucket.
172 * @param lowPct The lowest percentile stored in the selected bucket will be
173 * returned via this parameter.
174 * @param highPct The highest percentile stored in the selected bucket will
175 * be returned via this parameter.
176 *
177 * @return Returns the index of the bucket that contains the Nth percentile
178 * data point.
179 */
180 template <typename CountFn>
181 size_t getPercentileBucketIdx(
182 double pct,
183 CountFn countFromBucket,
184 double* lowPct = nullptr,
185 double* highPct = nullptr) const;
186
187 /**
188 * Estimate the value at the specified percentile.
189 *
190 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
191 * @param countFn A function that takes a const BucketType&, and returns the
192 * number of values in that bucket.
193 * @param avgFn A function that takes a const BucketType&, and returns the
194 * average of all the values in that bucket.
195 *
196 * @return Returns an estimate for N, where N is the number where exactly pct
197 * percentage of the data points in the histogram are less than N.
198 */
199 template <typename CountFn, typename AvgFn>
200 ValueType getPercentileEstimate(
201 double pct,
202 CountFn countFromBucket,
203 AvgFn avgFromBucket) const;
204
205 /*
206 * Iterator access to the buckets.
207 *
208 * Note that the first bucket is for all values less than min, and the last
209 * bucket is for all values greater than max. The buckets tracking values in
210 * the [min, max) actually start at the second bucket.
211 */
212 typename std::vector<BucketType>::const_iterator begin() const {
213 return buckets_.begin();
214 }
215 typename std::vector<BucketType>::iterator begin() {
216 return buckets_.begin();
217 }
218 typename std::vector<BucketType>::const_iterator end() const {
219 return buckets_.end();
220 }
221 typename std::vector<BucketType>::iterator end() {
222 return buckets_.end();
223 }
224
225 private:
226 static constexpr bool kIsExact = std::numeric_limits<ValueType>::is_exact;
227 ValueType bucketSize_;
228 ValueType min_;
229 ValueType max_;
230 std::vector<BucketType> buckets_;
231};
232
233} // namespace detail
234
235/*
236 * A basic histogram class.
237 *
238 * Groups data points into equally-sized buckets, and stores the overall sum of
239 * the data points in each bucket, as well as the number of data points in the
240 * bucket.
241 *
242 * The caller must specify the minimum and maximum data points to expect ahead
243 * of time, as well as the bucket width.
244 */
245template <typename T>
246class Histogram {
247 public:
248 typedef T ValueType;
249 typedef detail::Bucket<T> Bucket;
250
251 Histogram(ValueType bucketSize, ValueType min, ValueType max)
252 : buckets_(bucketSize, min, max, Bucket()) {}
253
254 /* Add a data point to the histogram */
255 void addValue(ValueType value) {
256 Bucket& bucket = buckets_.getByValue(value);
257 // NOTE: Overflow is handled elsewhere and tests check this
258 // behavior (see HistogramTest.cpp TestOverflow* tests).
259 // TODO: It would be nice to handle overflow here and redesign this class.
260 auto const addend = to_unsigned(value);
261 bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) + addend);
262 bucket.count += 1;
263 }
264
265 /* Add multiple same data points to the histogram */
266 void addRepeatedValue(ValueType value, uint64_t nSamples) {
267 Bucket& bucket = buckets_.getByValue(value);
268 // NOTE: Overflow is handled elsewhere and tests check this
269 // behavior (see HistogramTest.cpp TestOverflow* tests).
270 // TODO: It would be nice to handle overflow here and redesign this class.
271 auto const addend = to_unsigned(value) * nSamples;
272 bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) + addend);
273 bucket.count += nSamples;
274 }
275
276 /*
277 * Remove a data point to the histogram
278 *
279 * Note that this method does not actually verify that this exact data point
280 * had previously been added to the histogram; it merely subtracts the
281 * requested value from the appropriate bucket's sum.
282 */
283 void removeValue(ValueType value) {
284 Bucket& bucket = buckets_.getByValue(value);
285 // NOTE: Overflow is handled elsewhere and tests check this
286 // behavior (see HistogramTest.cpp TestOverflow* tests).
287 // TODO: It would be nice to handle overflow here and redesign this class.
288 if (bucket.count > 0) {
289 auto const subtrahend = to_unsigned(value);
290 bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) - subtrahend);
291 bucket.count -= 1;
292 } else {
293 bucket.sum = ValueType();
294 bucket.count = 0;
295 }
296 }
297
298 /* Remove multiple same data points from the histogram */
299 void removeRepeatedValue(ValueType value, uint64_t nSamples) {
300 Bucket& bucket = buckets_.getByValue(value);
301 // TODO: It would be nice to handle overflow here.
302 if (bucket.count >= nSamples) {
303 bucket.sum -= value * nSamples;
304 bucket.count -= nSamples;
305 } else {
306 bucket.sum = ValueType();
307 bucket.count = 0;
308 }
309 }
310
311 /* Remove all data points from the histogram */
312 void clear() {
313 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
314 buckets_.getByIndex(i).clear();
315 }
316 }
317
318 /* Subtract another histogram data from the histogram */
319 void subtract(const Histogram& hist) {
320 // the two histogram bucket definitions must match to support
321 // subtract.
322 if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
323 getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
324 throw std::invalid_argument("Cannot subtract input histogram.");
325 }
326
327 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
328 buckets_.getByIndex(i) -= hist.buckets_.getByIndex(i);
329 }
330 }
331
332 /* Merge two histogram data together */
333 void merge(const Histogram& hist) {
334 // the two histogram bucket definitions must match to support
335 // a merge.
336 if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
337 getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
338 throw std::invalid_argument("Cannot merge from input histogram.");
339 }
340
341 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
342 buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
343 }
344 }
345
346 /* Copy bucket values from another histogram */
347 void copy(const Histogram& hist) {
348 // the two histogram bucket definition must match
349 if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
350 getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
351 throw std::invalid_argument("Cannot copy from input histogram.");
352 }
353
354 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
355 buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
356 }
357 }
358
359 /* Returns the bucket size of each bucket in the histogram. */
360 ValueType getBucketSize() const {
361 return buckets_.getBucketSize();
362 }
363 /* Returns the min value at which bucketing begins. */
364 ValueType getMin() const {
365 return buckets_.getMin();
366 }
367 /* Returns the max value at which bucketing ends. */
368 ValueType getMax() const {
369 return buckets_.getMax();
370 }
371 /* Returns the number of buckets */
372 size_t getNumBuckets() const {
373 return buckets_.getNumBuckets();
374 }
375
376 /* Returns the specified bucket (for reading only!) */
377 const Bucket& getBucketByIndex(size_t idx) const {
378 return buckets_.getByIndex(idx);
379 }
380
381 /*
382 * Returns the minimum threshold for the bucket at the given index.
383 *
384 * The bucket at the specified index will store values in the range
385 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
386 * max is smaller than bucketMin + bucketSize.
387 */
388 ValueType getBucketMin(size_t idx) const {
389 return buckets_.getBucketMin(idx);
390 }
391
392 /*
393 * Returns the maximum threshold for the bucket at the given index.
394 *
395 * The bucket at the specified index will store values in the range
396 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
397 * max is smaller than bucketMin + bucketSize.
398 */
399 ValueType getBucketMax(size_t idx) const {
400 return buckets_.getBucketMax(idx);
401 }
402
403 /**
404 * Computes the total number of values stored across all buckets.
405 *
406 * Runs in O(numBuckets)
407 */
408 uint64_t computeTotalCount() const {
409 CountFromBucket countFn;
410 return buckets_.computeTotalCount(countFn);
411 }
412
413 /*
414 * Get the bucket that the specified percentile falls into
415 *
416 * The lowest and highest percentile data points in returned bucket will be
417 * returned in the lowPct and highPct arguments, if they are not nullptr.
418 */
419 size_t getPercentileBucketIdx(
420 double pct,
421 double* lowPct = nullptr,
422 double* highPct = nullptr) const {
423 // We unfortunately can't use lambdas here yet;
424 // Some users of this code are still built with gcc-4.4.
425 CountFromBucket countFn;
426 return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
427 }
428
429 /**
430 * Estimate the value at the specified percentile.
431 *
432 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
433 *
434 * @return Returns an estimate for N, where N is the number where exactly pct
435 * percentage of the data points in the histogram are less than N.
436 */
437 ValueType getPercentileEstimate(double pct) const {
438 CountFromBucket countFn;
439 AvgFromBucket avgFn;
440 return buckets_.getPercentileEstimate(pct, countFn, avgFn);
441 }
442
443 /*
444 * Get a human-readable string describing the histogram contents
445 */
446 std::string debugString() const;
447
448 /*
449 * Write the histogram contents in tab-separated values (TSV) format.
450 * Format is "min max count sum".
451 */
452 void toTSV(std::ostream& out, bool skipEmptyBuckets = true) const;
453
454 struct CountFromBucket {
455 uint64_t operator()(const Bucket& bucket) const {
456 return bucket.count;
457 }
458 };
459 struct AvgFromBucket {
460 ValueType operator()(const Bucket& bucket) const {
461 if (bucket.count == 0) {
462 return ValueType(0);
463 }
464 // Cast bucket.count to a signed integer type. This ensures that we
465 // perform division properly here: If bucket.sum is a signed integer
466 // type but we divide by an unsigned number, unsigned division will be
467 // performed and bucket.sum will be converted to unsigned first.
468 // If bucket.sum is unsigned, the code will still do unsigned division
469 // correctly.
470 //
471 // The only downside is if bucket.count is large enough to be negative
472 // when treated as signed. That should be extremely unlikely, though.
473 return bucket.sum / static_cast<int64_t>(bucket.count);
474 }
475 };
476
477 private:
478 template <typename S, typename = std::enable_if_t<std::is_integral<S>::value>>
479 static constexpr std::make_unsigned_t<S> to_unsigned(S s) {
480 return static_cast<std::make_unsigned_t<S>>(s);
481 }
482 template <
483 typename S,
484 typename = std::enable_if_t<!std::is_integral<S>::value>>
485 static constexpr S to_unsigned(S s) {
486 return s;
487 }
488
489 detail::HistogramBuckets<ValueType, Bucket> buckets_;
490};
491
492} // namespace folly
493
494// MSVC 2017 Update 3/4 has an issue with explicitly instantiating templated
495// functions with default arguments inside templated classes when compiled
496// with /permissive- (the default for the CMake build), so we directly include
497// the -defs as if it were -inl, and don't provide the explicit instantiations.
498// https://developercommunity.visualstudio.com/content/problem/81223/incorrect-error-c5037-with-permissive.html
499#if defined(_MSC_VER) && _MSC_FULL_VER >= 191125506 && \
500 _MSC_FULL_VER <= 191125547
501#define FOLLY_MSVC_USE_WORKAROUND_FOR_C5037 1
502#else
503#define FOLLY_MSVC_USE_WORKAROUND_FOR_C5037 0
504#endif
505
506#if FOLLY_MSVC_USE_WORKAROUND_FOR_C5037
507#include <folly/stats/Histogram-defs.h>
508#endif
509