1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <folly/stats/Histogram.h>
20#include <folly/stats/MultiLevelTimeSeries.h>
21#include <string>
22
23namespace folly {
24
25/*
26 * TimeseriesHistogram tracks data distributions as they change over time.
27 *
28 * Specifically, it is a bucketed histogram with different value ranges assigned
29 * to each bucket. Within each bucket is a MultiLevelTimeSeries from
30 * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
31 * different set of data for different historical time periods, and one can
32 * query data distributions over different trailing time windows.
33 *
34 * For example, this can answer questions: "What is the data distribution over
35 * the last minute? Over the last 10 minutes? Since I last cleared this
36 * histogram?"
37 *
38 * The class can also estimate percentiles and answer questions like: "What was
39 * the 99th percentile data value over the last 10 minutes?"
40 *
41 * Note: that depending on the size of your buckets and the smoothness
42 * of your data distribution, the estimate may be way off from the actual
43 * value. In particular, if the given percentile falls outside of the bucket
44 * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
45 * around 115,000) this estimate may be very wrong.
46 *
47 * The memory usage for a typical histogram is roughly 3k * (# of buckets). All
48 * insertion operations are amortized O(1), and all queries are O(# of buckets).
49 */
50template <
51 class T,
52 class CT = LegacyStatsClock<std::chrono::seconds>,
53 class C = folly::MultiLevelTimeSeries<T, CT>>
54class TimeseriesHistogram {
55 private:
56 // NOTE: T must be equivalent to _signed_ numeric type for our math.
57 static_assert(std::numeric_limits<T>::is_signed, "");
58
59 public:
60 // Values to be inserted into container
61 using ValueType = T;
62 // The container type we use internally for each bucket
63 using ContainerType = C;
64 // Clock, duration, and time point types
65 using Clock = CT;
66 using Duration = typename Clock::duration;
67 using TimePoint = typename Clock::time_point;
68
69 /*
70 * Create a TimeSeries histogram and initialize the bucketing and levels.
71 *
72 * The buckets are created by chopping the range [min, max) into pieces
73 * of size bucketSize, with the last bucket being potentially shorter. Two
74 * additional buckets are always created -- the "under" bucket for the range
75 * (-inf, min) and the "over" bucket for the range [max, +inf).
76 *
77 * @param bucketSize the width of each bucket
78 * @param min the smallest value for the bucket range.
79 * @param max the largest value for the bucket range
80 * @param defaultContainer a pre-initialized timeseries with the desired
81 * number of levels and their durations.
82 */
83 TimeseriesHistogram(
84 ValueType bucketSize,
85 ValueType min,
86 ValueType max,
87 const ContainerType& defaultContainer);
88
89 /* Return the bucket size of each bucket in the histogram. */
90 ValueType getBucketSize() const {
91 return buckets_.getBucketSize();
92 }
93
94 /* Return the min value at which bucketing begins. */
95 ValueType getMin() const {
96 return buckets_.getMin();
97 }
98
99 /* Return the max value at which bucketing ends. */
100 ValueType getMax() const {
101 return buckets_.getMax();
102 }
103
104 /* Return the number of levels of the Timeseries object in each bucket */
105 size_t getNumLevels() const {
106 return buckets_.getByIndex(0).numLevels();
107 }
108
109 /* Return the number of buckets */
110 size_t getNumBuckets() const {
111 return buckets_.getNumBuckets();
112 }
113
114 /*
115 * Return the threshold of the bucket for the given index in range
116 * [0..numBuckets). The bucket will have range [thresh, thresh + bucketSize)
117 * or [thresh, max), whichever is shorter.
118 */
119 ValueType getBucketMin(size_t bucketIdx) const {
120 return buckets_.getBucketMin(bucketIdx);
121 }
122
123 /* Return the actual timeseries in the given bucket (for reading only!) */
124 const ContainerType& getBucket(size_t bucketIdx) const {
125 return buckets_.getByIndex(bucketIdx);
126 }
127
128 /* Total count of values at the given timeseries level (all buckets). */
129 uint64_t count(size_t level) const {
130 uint64_t total = 0;
131 for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
132 total += buckets_.getByIndex(b).count(level);
133 }
134 return total;
135 }
136
137 /* Total count of values added during the given interval (all buckets). */
138 uint64_t count(TimePoint start, TimePoint end) const {
139 uint64_t total = 0;
140 for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
141 total += buckets_.getByIndex(b).count(start, end);
142 }
143 return total;
144 }
145
146 /* Total sum of values at the given timeseries level (all buckets). */
147 ValueType sum(size_t level) const {
148 ValueType total = ValueType();
149 for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
150 total += buckets_.getByIndex(b).sum(level);
151 }
152 return total;
153 }
154
155 /* Total sum of values added during the given interval (all buckets). */
156 ValueType sum(TimePoint start, TimePoint end) const {
157 ValueType total = ValueType();
158 for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
159 total += buckets_.getByIndex(b).sum(start, end);
160 }
161 return total;
162 }
163
164 /* Average of values at the given timeseries level (all buckets). */
165 template <typename ReturnType = double>
166 ReturnType avg(size_t level) const {
167 auto total = ValueType();
168 uint64_t nsamples = 0;
169 computeAvgData(&total, &nsamples, level);
170 return folly::detail::avgHelper<ReturnType>(total, nsamples);
171 }
172
173 /* Average of values added during the given interval (all buckets). */
174 template <typename ReturnType = double>
175 ReturnType avg(TimePoint start, TimePoint end) const {
176 auto total = ValueType();
177 uint64_t nsamples = 0;
178 computeAvgData(&total, &nsamples, start, end);
179 return folly::detail::avgHelper<ReturnType>(total, nsamples);
180 }
181
182 /*
183 * Rate at the given timeseries level (all buckets).
184 * This is the sum of all values divided by the time interval (in seconds).
185 */
186 template <typename ReturnType = double>
187 ReturnType rate(size_t level) const {
188 auto total = ValueType();
189 Duration elapsed(0);
190 computeRateData(&total, &elapsed, level);
191 return folly::detail::rateHelper<ReturnType, Duration, Duration>(
192 total, elapsed);
193 }
194
195 /*
196 * Rate for the given interval (all buckets).
197 * This is the sum of all values divided by the time interval (in seconds).
198 */
199 template <typename ReturnType = double>
200 ReturnType rate(TimePoint start, TimePoint end) const {
201 auto total = ValueType();
202 Duration elapsed(0);
203 computeRateData(&total, &elapsed, start, end);
204 return folly::detail::rateHelper<ReturnType, Duration, Duration>(
205 total, elapsed);
206 }
207
208 /*
209 * Update every underlying timeseries object with the given timestamp. You
210 * must call this directly before querying to ensure that the data in all
211 * buckets is decayed properly.
212 */
213 void update(TimePoint now);
214
215 /* clear all the data from the histogram. */
216 void clear();
217
218 /* Add a value into the histogram with timestamp 'now' */
219 void addValue(TimePoint now, const ValueType& value);
220 /* Add a value the given number of times with timestamp 'now' */
221 void addValue(TimePoint now, const ValueType& value, uint64_t times);
222
223 /*
224 * Add all of the values from the specified histogram.
225 *
226 * All of the values will be added to the current time-slot.
227 *
228 * One use of this is for thread-local caching of frequently updated
229 * histogram data. For example, each thread can store a thread-local
230 * Histogram that is updated frequently, and only add it to the global
231 * TimeseriesHistogram once a second.
232 */
233 void addValues(TimePoint now, const folly::Histogram<ValueType>& values);
234
235 /*
236 * Return an estimate of the value at the given percentile in the histogram
237 * in the given timeseries level. The percentile is estimated as follows:
238 *
239 * - We retrieve a count of the values in each bucket (at the given level)
240 * - We determine via the counts which bucket the given percentile falls in.
241 * - We assume the average value in the bucket is also its median
242 * - We then linearly interpolate within the bucket, by assuming that the
243 * distribution is uniform in the two value ranges [left, median) and
244 * [median, right) where [left, right) is the bucket value range.
245 *
246 * Caveats:
247 * - If the histogram is empty, this always returns ValueType(), usually 0.
248 * - For the 'under' and 'over' special buckets, their range is unbounded
249 * on one side. In order for the interpolation to work, we assume that
250 * the average value in the bucket is equidistant from the two edges of
251 * the bucket. In other words, we assume that the distance between the
252 * average and the known bound is equal to the distance between the average
253 * and the unknown bound.
254 */
255 ValueType getPercentileEstimate(double pct, size_t level) const;
256 /*
257 * Return an estimate of the value at the given percentile in the histogram
258 * in the given historical interval. Please see the documentation for
259 * getPercentileEstimate(double pct, size_t level) for the explanation of the
260 * estimation algorithm.
261 */
262 ValueType getPercentileEstimate(double pct, TimePoint start, TimePoint end)
263 const;
264
265 /*
266 * Return the bucket index that the given percentile falls into (in the
267 * given timeseries level). This index can then be used to retrieve either
268 * the bucket threshold, or other data from inside the bucket.
269 */
270 size_t getPercentileBucketIdx(double pct, size_t level) const;
271 /*
272 * Return the bucket index that the given percentile falls into (in the
273 * given historical interval). This index can then be used to retrieve either
274 * the bucket threshold, or other data from inside the bucket.
275 */
276 size_t getPercentileBucketIdx(double pct, TimePoint start, TimePoint end)
277 const;
278
279 /* Get the bucket threshold for the bucket containing the given pct. */
280 ValueType getPercentileBucketMin(double pct, size_t level) const {
281 return getBucketMin(getPercentileBucketIdx(pct, level));
282 }
283 /* Get the bucket threshold for the bucket containing the given pct. */
284 ValueType getPercentileBucketMin(double pct, TimePoint start, TimePoint end)
285 const {
286 return getBucketMin(getPercentileBucketIdx(pct, start, end));
287 }
288
289 /*
290 * Print out serialized data from all buckets at the given level.
291 * Format is: BUCKET [',' BUCKET ...]
292 * Where: BUCKET == bucketMin ':' count ':' avg
293 */
294 std::string getString(size_t level) const;
295
296 /*
297 * Print out serialized data for all buckets in the historical interval.
298 * For format, please see getString(size_t level).
299 */
300 std::string getString(TimePoint start, TimePoint end) const;
301
302 /*
303 * Legacy APIs that accept a Duration parameters rather than TimePoint.
304 *
305 * These treat the Duration as relative to the clock epoch.
306 * Prefer using the correct TimePoint-based APIs instead. These APIs will
307 * eventually be deprecated and removed.
308 */
309 void update(Duration now) {
310 update(TimePoint(now));
311 }
312 void addValue(Duration now, const ValueType& value) {
313 addValue(TimePoint(now), value);
314 }
315 void addValue(Duration now, const ValueType& value, uint64_t times) {
316 addValue(TimePoint(now), value, times);
317 }
318 void addValues(Duration now, const folly::Histogram<ValueType>& values) {
319 addValues(TimePoint(now), values);
320 }
321
322 private:
323 typedef ContainerType Bucket;
324 struct CountFromLevel {
325 explicit CountFromLevel(size_t level) : level_(level) {}
326
327 uint64_t operator()(const ContainerType& bucket) const {
328 return bucket.count(level_);
329 }
330
331 private:
332 size_t level_;
333 };
334 struct CountFromInterval {
335 explicit CountFromInterval(TimePoint start, TimePoint end)
336 : start_(start), end_(end) {}
337
338 uint64_t operator()(const ContainerType& bucket) const {
339 return bucket.count(start_, end_);
340 }
341
342 private:
343 TimePoint start_;
344 TimePoint end_;
345 };
346
347 struct AvgFromLevel {
348 explicit AvgFromLevel(size_t level) : level_(level) {}
349
350 ValueType operator()(const ContainerType& bucket) const {
351 return bucket.template avg<ValueType>(level_);
352 }
353
354 private:
355 size_t level_;
356 };
357
358 template <typename ReturnType>
359 struct AvgFromInterval {
360 explicit AvgFromInterval(TimePoint start, TimePoint end)
361 : start_(start), end_(end) {}
362
363 ReturnType operator()(const ContainerType& bucket) const {
364 return bucket.template avg<ReturnType>(start_, end_);
365 }
366
367 private:
368 TimePoint start_;
369 TimePoint end_;
370 };
371
372 /*
373 * Special logic for the case of only one unique value registered
374 * (this can happen when clients don't pick good bucket ranges or have
375 * other bugs). It's a lot easier for clients to track down these issues
376 * if they are getting the correct value.
377 */
378 void maybeHandleSingleUniqueValue(const ValueType& value);
379
380 void computeAvgData(ValueType* total, uint64_t* nsamples, size_t level) const;
381 void computeAvgData(
382 ValueType* total,
383 uint64_t* nsamples,
384 TimePoint start,
385 TimePoint end) const;
386 void computeRateData(ValueType* total, Duration* elapsed, size_t level) const;
387 void computeRateData(
388 ValueType* total,
389 Duration* elapsed,
390 TimePoint start,
391 TimePoint end) const;
392
393 folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
394 bool haveNotSeenValue_;
395 bool singleUniqueValue_;
396 ValueType firstValue_;
397};
398} // namespace folly
399