1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <folly/stats/Histogram.h> |
20 | #include <folly/stats/MultiLevelTimeSeries.h> |
21 | #include <string> |
22 | |
23 | namespace folly { |
24 | |
25 | /* |
26 | * TimeseriesHistogram tracks data distributions as they change over time. |
27 | * |
28 | * Specifically, it is a bucketed histogram with different value ranges assigned |
29 | * to each bucket. Within each bucket is a MultiLevelTimeSeries from |
30 | * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a |
31 | * different set of data for different historical time periods, and one can |
32 | * query data distributions over different trailing time windows. |
33 | * |
34 | * For example, this can answer questions: "What is the data distribution over |
35 | * the last minute? Over the last 10 minutes? Since I last cleared this |
36 | * histogram?" |
37 | * |
38 | * The class can also estimate percentiles and answer questions like: "What was |
39 | * the 99th percentile data value over the last 10 minutes?" |
40 | * |
41 | * Note: that depending on the size of your buckets and the smoothness |
42 | * of your data distribution, the estimate may be way off from the actual |
43 | * value. In particular, if the given percentile falls outside of the bucket |
44 | * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is |
45 | * around 115,000) this estimate may be very wrong. |
46 | * |
47 | * The memory usage for a typical histogram is roughly 3k * (# of buckets). All |
48 | * insertion operations are amortized O(1), and all queries are O(# of buckets). |
49 | */ |
50 | template < |
51 | class T, |
52 | class CT = LegacyStatsClock<std::chrono::seconds>, |
53 | class C = folly::MultiLevelTimeSeries<T, CT>> |
54 | class TimeseriesHistogram { |
55 | private: |
56 | // NOTE: T must be equivalent to _signed_ numeric type for our math. |
57 | static_assert(std::numeric_limits<T>::is_signed, "" ); |
58 | |
59 | public: |
60 | // Values to be inserted into container |
61 | using ValueType = T; |
62 | // The container type we use internally for each bucket |
63 | using ContainerType = C; |
64 | // Clock, duration, and time point types |
65 | using Clock = CT; |
66 | using Duration = typename Clock::duration; |
67 | using TimePoint = typename Clock::time_point; |
68 | |
69 | /* |
70 | * Create a TimeSeries histogram and initialize the bucketing and levels. |
71 | * |
72 | * The buckets are created by chopping the range [min, max) into pieces |
73 | * of size bucketSize, with the last bucket being potentially shorter. Two |
74 | * additional buckets are always created -- the "under" bucket for the range |
75 | * (-inf, min) and the "over" bucket for the range [max, +inf). |
76 | * |
77 | * @param bucketSize the width of each bucket |
78 | * @param min the smallest value for the bucket range. |
79 | * @param max the largest value for the bucket range |
80 | * @param defaultContainer a pre-initialized timeseries with the desired |
81 | * number of levels and their durations. |
82 | */ |
83 | TimeseriesHistogram( |
84 | ValueType bucketSize, |
85 | ValueType min, |
86 | ValueType max, |
87 | const ContainerType& defaultContainer); |
88 | |
89 | /* Return the bucket size of each bucket in the histogram. */ |
90 | ValueType getBucketSize() const { |
91 | return buckets_.getBucketSize(); |
92 | } |
93 | |
94 | /* Return the min value at which bucketing begins. */ |
95 | ValueType getMin() const { |
96 | return buckets_.getMin(); |
97 | } |
98 | |
99 | /* Return the max value at which bucketing ends. */ |
100 | ValueType getMax() const { |
101 | return buckets_.getMax(); |
102 | } |
103 | |
104 | /* Return the number of levels of the Timeseries object in each bucket */ |
105 | size_t getNumLevels() const { |
106 | return buckets_.getByIndex(0).numLevels(); |
107 | } |
108 | |
109 | /* Return the number of buckets */ |
110 | size_t getNumBuckets() const { |
111 | return buckets_.getNumBuckets(); |
112 | } |
113 | |
114 | /* |
115 | * Return the threshold of the bucket for the given index in range |
116 | * [0..numBuckets). The bucket will have range [thresh, thresh + bucketSize) |
117 | * or [thresh, max), whichever is shorter. |
118 | */ |
119 | ValueType getBucketMin(size_t bucketIdx) const { |
120 | return buckets_.getBucketMin(bucketIdx); |
121 | } |
122 | |
123 | /* Return the actual timeseries in the given bucket (for reading only!) */ |
124 | const ContainerType& getBucket(size_t bucketIdx) const { |
125 | return buckets_.getByIndex(bucketIdx); |
126 | } |
127 | |
128 | /* Total count of values at the given timeseries level (all buckets). */ |
129 | uint64_t count(size_t level) const { |
130 | uint64_t total = 0; |
131 | for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) { |
132 | total += buckets_.getByIndex(b).count(level); |
133 | } |
134 | return total; |
135 | } |
136 | |
137 | /* Total count of values added during the given interval (all buckets). */ |
138 | uint64_t count(TimePoint start, TimePoint end) const { |
139 | uint64_t total = 0; |
140 | for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) { |
141 | total += buckets_.getByIndex(b).count(start, end); |
142 | } |
143 | return total; |
144 | } |
145 | |
146 | /* Total sum of values at the given timeseries level (all buckets). */ |
147 | ValueType sum(size_t level) const { |
148 | ValueType total = ValueType(); |
149 | for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) { |
150 | total += buckets_.getByIndex(b).sum(level); |
151 | } |
152 | return total; |
153 | } |
154 | |
155 | /* Total sum of values added during the given interval (all buckets). */ |
156 | ValueType sum(TimePoint start, TimePoint end) const { |
157 | ValueType total = ValueType(); |
158 | for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) { |
159 | total += buckets_.getByIndex(b).sum(start, end); |
160 | } |
161 | return total; |
162 | } |
163 | |
164 | /* Average of values at the given timeseries level (all buckets). */ |
165 | template <typename ReturnType = double> |
166 | ReturnType avg(size_t level) const { |
167 | auto total = ValueType(); |
168 | uint64_t nsamples = 0; |
169 | computeAvgData(&total, &nsamples, level); |
170 | return folly::detail::avgHelper<ReturnType>(total, nsamples); |
171 | } |
172 | |
173 | /* Average of values added during the given interval (all buckets). */ |
174 | template <typename ReturnType = double> |
175 | ReturnType avg(TimePoint start, TimePoint end) const { |
176 | auto total = ValueType(); |
177 | uint64_t nsamples = 0; |
178 | computeAvgData(&total, &nsamples, start, end); |
179 | return folly::detail::avgHelper<ReturnType>(total, nsamples); |
180 | } |
181 | |
182 | /* |
183 | * Rate at the given timeseries level (all buckets). |
184 | * This is the sum of all values divided by the time interval (in seconds). |
185 | */ |
186 | template <typename ReturnType = double> |
187 | ReturnType rate(size_t level) const { |
188 | auto total = ValueType(); |
189 | Duration elapsed(0); |
190 | computeRateData(&total, &elapsed, level); |
191 | return folly::detail::rateHelper<ReturnType, Duration, Duration>( |
192 | total, elapsed); |
193 | } |
194 | |
195 | /* |
196 | * Rate for the given interval (all buckets). |
197 | * This is the sum of all values divided by the time interval (in seconds). |
198 | */ |
199 | template <typename ReturnType = double> |
200 | ReturnType rate(TimePoint start, TimePoint end) const { |
201 | auto total = ValueType(); |
202 | Duration elapsed(0); |
203 | computeRateData(&total, &elapsed, start, end); |
204 | return folly::detail::rateHelper<ReturnType, Duration, Duration>( |
205 | total, elapsed); |
206 | } |
207 | |
208 | /* |
209 | * Update every underlying timeseries object with the given timestamp. You |
210 | * must call this directly before querying to ensure that the data in all |
211 | * buckets is decayed properly. |
212 | */ |
213 | void update(TimePoint now); |
214 | |
215 | /* clear all the data from the histogram. */ |
216 | void clear(); |
217 | |
218 | /* Add a value into the histogram with timestamp 'now' */ |
219 | void addValue(TimePoint now, const ValueType& value); |
220 | /* Add a value the given number of times with timestamp 'now' */ |
221 | void addValue(TimePoint now, const ValueType& value, uint64_t times); |
222 | |
223 | /* |
224 | * Add all of the values from the specified histogram. |
225 | * |
226 | * All of the values will be added to the current time-slot. |
227 | * |
228 | * One use of this is for thread-local caching of frequently updated |
229 | * histogram data. For example, each thread can store a thread-local |
230 | * Histogram that is updated frequently, and only add it to the global |
231 | * TimeseriesHistogram once a second. |
232 | */ |
233 | void addValues(TimePoint now, const folly::Histogram<ValueType>& values); |
234 | |
235 | /* |
236 | * Return an estimate of the value at the given percentile in the histogram |
237 | * in the given timeseries level. The percentile is estimated as follows: |
238 | * |
239 | * - We retrieve a count of the values in each bucket (at the given level) |
240 | * - We determine via the counts which bucket the given percentile falls in. |
241 | * - We assume the average value in the bucket is also its median |
242 | * - We then linearly interpolate within the bucket, by assuming that the |
243 | * distribution is uniform in the two value ranges [left, median) and |
244 | * [median, right) where [left, right) is the bucket value range. |
245 | * |
246 | * Caveats: |
247 | * - If the histogram is empty, this always returns ValueType(), usually 0. |
248 | * - For the 'under' and 'over' special buckets, their range is unbounded |
249 | * on one side. In order for the interpolation to work, we assume that |
250 | * the average value in the bucket is equidistant from the two edges of |
251 | * the bucket. In other words, we assume that the distance between the |
252 | * average and the known bound is equal to the distance between the average |
253 | * and the unknown bound. |
254 | */ |
255 | ValueType getPercentileEstimate(double pct, size_t level) const; |
256 | /* |
257 | * Return an estimate of the value at the given percentile in the histogram |
258 | * in the given historical interval. Please see the documentation for |
259 | * getPercentileEstimate(double pct, size_t level) for the explanation of the |
260 | * estimation algorithm. |
261 | */ |
262 | ValueType getPercentileEstimate(double pct, TimePoint start, TimePoint end) |
263 | const; |
264 | |
265 | /* |
266 | * Return the bucket index that the given percentile falls into (in the |
267 | * given timeseries level). This index can then be used to retrieve either |
268 | * the bucket threshold, or other data from inside the bucket. |
269 | */ |
270 | size_t getPercentileBucketIdx(double pct, size_t level) const; |
271 | /* |
272 | * Return the bucket index that the given percentile falls into (in the |
273 | * given historical interval). This index can then be used to retrieve either |
274 | * the bucket threshold, or other data from inside the bucket. |
275 | */ |
276 | size_t getPercentileBucketIdx(double pct, TimePoint start, TimePoint end) |
277 | const; |
278 | |
279 | /* Get the bucket threshold for the bucket containing the given pct. */ |
280 | ValueType getPercentileBucketMin(double pct, size_t level) const { |
281 | return getBucketMin(getPercentileBucketIdx(pct, level)); |
282 | } |
283 | /* Get the bucket threshold for the bucket containing the given pct. */ |
284 | ValueType getPercentileBucketMin(double pct, TimePoint start, TimePoint end) |
285 | const { |
286 | return getBucketMin(getPercentileBucketIdx(pct, start, end)); |
287 | } |
288 | |
289 | /* |
290 | * Print out serialized data from all buckets at the given level. |
291 | * Format is: BUCKET [',' BUCKET ...] |
292 | * Where: BUCKET == bucketMin ':' count ':' avg |
293 | */ |
294 | std::string getString(size_t level) const; |
295 | |
296 | /* |
297 | * Print out serialized data for all buckets in the historical interval. |
298 | * For format, please see getString(size_t level). |
299 | */ |
300 | std::string getString(TimePoint start, TimePoint end) const; |
301 | |
302 | /* |
303 | * Legacy APIs that accept a Duration parameters rather than TimePoint. |
304 | * |
305 | * These treat the Duration as relative to the clock epoch. |
306 | * Prefer using the correct TimePoint-based APIs instead. These APIs will |
307 | * eventually be deprecated and removed. |
308 | */ |
309 | void update(Duration now) { |
310 | update(TimePoint(now)); |
311 | } |
312 | void addValue(Duration now, const ValueType& value) { |
313 | addValue(TimePoint(now), value); |
314 | } |
315 | void addValue(Duration now, const ValueType& value, uint64_t times) { |
316 | addValue(TimePoint(now), value, times); |
317 | } |
318 | void addValues(Duration now, const folly::Histogram<ValueType>& values) { |
319 | addValues(TimePoint(now), values); |
320 | } |
321 | |
322 | private: |
323 | typedef ContainerType Bucket; |
324 | struct CountFromLevel { |
325 | explicit CountFromLevel(size_t level) : level_(level) {} |
326 | |
327 | uint64_t operator()(const ContainerType& bucket) const { |
328 | return bucket.count(level_); |
329 | } |
330 | |
331 | private: |
332 | size_t level_; |
333 | }; |
334 | struct CountFromInterval { |
335 | explicit CountFromInterval(TimePoint start, TimePoint end) |
336 | : start_(start), end_(end) {} |
337 | |
338 | uint64_t operator()(const ContainerType& bucket) const { |
339 | return bucket.count(start_, end_); |
340 | } |
341 | |
342 | private: |
343 | TimePoint start_; |
344 | TimePoint end_; |
345 | }; |
346 | |
347 | struct AvgFromLevel { |
348 | explicit AvgFromLevel(size_t level) : level_(level) {} |
349 | |
350 | ValueType operator()(const ContainerType& bucket) const { |
351 | return bucket.template avg<ValueType>(level_); |
352 | } |
353 | |
354 | private: |
355 | size_t level_; |
356 | }; |
357 | |
358 | template <typename ReturnType> |
359 | struct AvgFromInterval { |
360 | explicit AvgFromInterval(TimePoint start, TimePoint end) |
361 | : start_(start), end_(end) {} |
362 | |
363 | ReturnType operator()(const ContainerType& bucket) const { |
364 | return bucket.template avg<ReturnType>(start_, end_); |
365 | } |
366 | |
367 | private: |
368 | TimePoint start_; |
369 | TimePoint end_; |
370 | }; |
371 | |
372 | /* |
373 | * Special logic for the case of only one unique value registered |
374 | * (this can happen when clients don't pick good bucket ranges or have |
375 | * other bugs). It's a lot easier for clients to track down these issues |
376 | * if they are getting the correct value. |
377 | */ |
378 | void maybeHandleSingleUniqueValue(const ValueType& value); |
379 | |
380 | void computeAvgData(ValueType* total, uint64_t* nsamples, size_t level) const; |
381 | void computeAvgData( |
382 | ValueType* total, |
383 | uint64_t* nsamples, |
384 | TimePoint start, |
385 | TimePoint end) const; |
386 | void computeRateData(ValueType* total, Duration* elapsed, size_t level) const; |
387 | void computeRateData( |
388 | ValueType* total, |
389 | Duration* elapsed, |
390 | TimePoint start, |
391 | TimePoint end) const; |
392 | |
393 | folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_; |
394 | bool haveNotSeenValue_; |
395 | bool singleUniqueValue_; |
396 | ValueType firstValue_; |
397 | }; |
398 | } // namespace folly |
399 | |