1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <chrono> |
20 | #include <vector> |
21 | |
22 | #include <folly/stats/detail/Bucket.h> |
23 | |
24 | namespace folly { |
25 | |
26 | /* |
27 | * A helper clock type to helper older code using BucketedTimeSeries with |
28 | * std::chrono::seconds transition to properly using clock types and time_point |
29 | * objects. |
30 | */ |
31 | template <typename TT = std::chrono::seconds> |
32 | class LegacyStatsClock { |
33 | public: |
34 | using duration = TT; |
35 | using time_point = std::chrono::time_point<LegacyStatsClock, TT>; |
36 | |
37 | // This clock does not actually implement now(), since the older API |
38 | // did not really specify what clock should be used. (In practice most |
39 | // callers unfortuantely used wall clock time rather than a monotonic clock.) |
40 | }; |
41 | |
42 | /* |
43 | * This class represents a bucketed time series which keeps track of values |
44 | * added in the recent past, and merges these values together into a fixed |
45 | * number of buckets to keep a lid on memory use if the number of values |
46 | * added is very large. |
47 | * |
48 | * For example, a BucketedTimeSeries() with duration == 60s and 10 buckets |
49 | * will keep track of 10 6-second buckets, and discard all data added more |
50 | * than 1 minute ago. As time ticks by, a 6-second bucket at a time will |
51 | * be discarded and new data will go into the newly opened bucket. Internally, |
52 | * it uses a circular array of buckets that it reuses as time advances. |
53 | * |
54 | * This class assumes that time advances forwards. The window of time tracked |
55 | * by the timeseries will advance forwards whenever a more recent timestamp is |
56 | * passed to addValue(). While it is possible to pass old time values to |
57 | * addValue(), this will never move the time window backwards. If the old time |
58 | * value falls outside the tracked window of time, the data point will be |
59 | * ignored. |
60 | * |
61 | * This class is not thread-safe -- use your own synchronization! |
62 | */ |
63 | template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>> |
64 | class BucketedTimeSeries { |
65 | public: |
66 | using ValueType = VT; |
67 | using Clock = CT; |
68 | using Duration = typename Clock::duration; |
69 | using TimePoint = typename Clock::time_point; |
70 | using Bucket = detail::Bucket<ValueType>; |
71 | |
72 | /* |
73 | * Create a new BucketedTimeSeries. |
74 | * |
75 | * This creates a new BucketedTimeSeries with the specified number of |
76 | * buckets, storing data for the specified amount of time. |
77 | * |
78 | * If the duration is 0, the BucketedTimeSeries will track data forever, |
79 | * and does not need the rolling buckets. The numBuckets parameter is |
80 | * ignored when duration is 0. |
81 | */ |
82 | BucketedTimeSeries(size_t numBuckets, Duration duration); |
83 | |
84 | /* |
85 | * Create a new BucketedTimeSeries. |
86 | * |
87 | * This constructor is used to reconstruct a timeseries using |
88 | * previously saved data |
89 | */ |
90 | BucketedTimeSeries( |
91 | TimePoint theFirstTime, |
92 | TimePoint theLatestTime, |
93 | Duration maxDuration, |
94 | const std::vector<Bucket>& bucketsList); |
95 | |
96 | /* |
97 | * Adds the value 'val' at time 'now' |
98 | * |
99 | * This function expects time to generally move forwards. The window of time |
100 | * tracked by this time series will move forwards with time. If 'now' is |
101 | * more recent than any time previously seen, addValue() will automatically |
102 | * call update(now) to advance the time window tracked by this data |
103 | * structure. |
104 | * |
105 | * Values in the recent past may be added to the data structure by passing in |
106 | * a slightly older value of 'now', as long as this time point still falls |
107 | * within the tracked duration. If 'now' is older than the tracked duration |
108 | * of time, the data point value will be ignored, and addValue() will return |
109 | * false without doing anything else. |
110 | * |
111 | * Returns true on success, or false if now was older than the tracked time |
112 | * window. |
113 | */ |
114 | bool addValue(TimePoint now, const ValueType& val); |
115 | |
116 | /* |
117 | * Adds the value 'val' the given number of 'times' at time 'now' |
118 | */ |
119 | bool addValue(TimePoint now, const ValueType& val, uint64_t times); |
120 | |
121 | /* |
122 | * Adds the value 'total' as the sum of 'nsamples' samples |
123 | */ |
124 | bool |
125 | addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples); |
126 | |
127 | /* |
128 | * Updates the container to the specified time, doing all the necessary |
129 | * work to rotate the buckets and remove any stale data points. |
130 | * |
131 | * The addValue() methods automatically call update() when adding new data |
132 | * points. However, when reading data from the timeseries, you should make |
133 | * sure to manually call update() before accessing the data. Otherwise you |
134 | * may be reading stale data if update() has not been called recently. |
135 | * |
136 | * Returns the current bucket index after the update. |
137 | */ |
138 | size_t update(TimePoint now); |
139 | |
140 | /* |
141 | * Reset the timeseries to an empty state, |
142 | * as if no data points have ever been added to it. |
143 | */ |
144 | void clear(); |
145 | |
146 | /* |
147 | * Get the latest time that has ever been passed to update() or addValue(). |
148 | * |
149 | * If no data has ever been added to this timeseries, 0 will be returned. |
150 | */ |
151 | TimePoint getLatestTime() const { |
152 | return latestTime_; |
153 | } |
154 | |
155 | /* |
156 | * Get the time of the earliest data point stored in this timeseries. |
157 | * |
158 | * If no data has ever been added to this timeseries, 0 will be returned. |
159 | * |
160 | * If isAllTime() is true, this is simply the time when the first data point |
161 | * was recorded. |
162 | * |
163 | * For non-all-time data, the timestamp reflects the first data point still |
164 | * remembered. As new data points are added, old data will be expired. |
165 | * getEarliestTime() returns the timestamp of the oldest bucket still present |
166 | * in the timeseries. This will never be older than (getLatestTime() - |
167 | * duration()). |
168 | */ |
169 | TimePoint getEarliestTime() const; |
170 | |
171 | /* |
172 | * Return the number of buckets. |
173 | */ |
174 | size_t numBuckets() const { |
175 | return buckets_.size(); |
176 | } |
177 | |
178 | /* |
179 | * Return the maximum duration of data that can be tracked by this |
180 | * BucketedTimeSeries. |
181 | */ |
182 | Duration duration() const { |
183 | return duration_; |
184 | } |
185 | |
186 | /* |
187 | * Returns true if this BucketedTimeSeries stores data for all-time, without |
188 | * ever rolling over into new buckets. |
189 | */ |
190 | bool isAllTime() const { |
191 | return (duration_ == Duration(0)); |
192 | } |
193 | |
194 | /* |
195 | * Returns true if no calls to update() have been made since the last call to |
196 | * clear(). |
197 | */ |
198 | bool empty() const { |
199 | // We set firstTime_ greater than latestTime_ in the constructor and in |
200 | // clear, so we use this to distinguish if the timeseries is empty. |
201 | // |
202 | // Once a data point has been added, latestTime_ will always be greater |
203 | // than or equal to firstTime_. |
204 | return firstTime_ > latestTime_; |
205 | } |
206 | |
207 | /* |
208 | * Returns time of first update() since clear()/constructor. |
209 | * Note that the returned value is only meaningful when empty() is false. |
210 | */ |
211 | TimePoint firstTime() const { |
212 | return firstTime_; |
213 | } |
214 | |
215 | /* |
216 | * Returns time of last update(). |
217 | * Note that the returned value is only meaningful when empty() is false. |
218 | */ |
219 | TimePoint latestTime() const { |
220 | return latestTime_; |
221 | } |
222 | |
223 | /* |
224 | * Returns actual buckets of values |
225 | */ |
226 | const std::vector<Bucket>& buckets() const { |
227 | return buckets_; |
228 | } |
229 | |
230 | /* |
231 | * Get the amount of time tracked by this timeseries. |
232 | * |
233 | * For an all-time timeseries, this returns the length of time since the |
234 | * first data point was added to the time series. |
235 | * |
236 | * Otherwise, this never returns a value greater than the overall timeseries |
237 | * duration. If the first data point was recorded less than a full duration |
238 | * ago, the time since the first data point is returned. If a full duration |
239 | * has elapsed, and we have already thrown away some data, the time since the |
240 | * oldest bucket is returned. |
241 | * |
242 | * For example, say we are tracking 600 seconds worth of data, in 60 buckets. |
243 | * - If less than 600 seconds have elapsed since the first data point, |
244 | * elapsed() returns the total elapsed time so far. |
245 | * - If more than 600 seconds have elapsed, we have already thrown away some |
246 | * data. However, we throw away a full bucket (10 seconds worth) at once, |
247 | * so at any point in time we have from 590 to 600 seconds worth of data. |
248 | * elapsed() will therefore return a value between 590 and 600. |
249 | * |
250 | * Note that you generally should call update() before calling elapsed(), to |
251 | * make sure you are not reading stale data. |
252 | */ |
253 | Duration elapsed() const; |
254 | |
255 | /* |
256 | * Get the amount of time tracked by this timeseries, between the specified |
257 | * start and end times. |
258 | * |
259 | * If the timeseries contains data for the entire time range specified, this |
260 | * simply returns (end - start). However, if start is earlier than |
261 | * getEarliestTime(), this returns (end - getEarliestTime()). |
262 | */ |
263 | Duration elapsed(TimePoint start, TimePoint end) const; |
264 | |
265 | /* |
266 | * Return the sum of all the data points currently tracked by this |
267 | * BucketedTimeSeries. |
268 | * |
269 | * Note that you generally should call update() before calling sum(), to |
270 | * make sure you are not reading stale data. |
271 | */ |
272 | const ValueType& sum() const { |
273 | return total_.sum; |
274 | } |
275 | |
276 | /* |
277 | * Return the number of data points currently tracked by this |
278 | * BucketedTimeSeries. |
279 | * |
280 | * Note that you generally should call update() before calling count(), to |
281 | * make sure you are not reading stale data. |
282 | */ |
283 | uint64_t count() const { |
284 | return total_.count; |
285 | } |
286 | |
287 | /* |
288 | * Return the average value (sum / count). |
289 | * |
290 | * The return type may be specified to control whether floating-point or |
291 | * integer division should be performed. |
292 | * |
293 | * Note that you generally should call update() before calling avg(), to |
294 | * make sure you are not reading stale data. |
295 | */ |
296 | template <typename ReturnType = double> |
297 | ReturnType avg() const { |
298 | return total_.template avg<ReturnType>(); |
299 | } |
300 | |
301 | /* |
302 | * Return the sum divided by the elapsed time. |
303 | * |
304 | * Note that you generally should call update() before calling rate(), to |
305 | * make sure you are not reading stale data. |
306 | */ |
307 | template <typename ReturnType = double, typename Interval = Duration> |
308 | ReturnType rate() const { |
309 | return rateHelper<ReturnType, Interval>(ReturnType(total_.sum), elapsed()); |
310 | } |
311 | |
312 | /* |
313 | * Return the count divided by the elapsed time. |
314 | * |
315 | * The Interval template parameter causes the elapsed time to be converted to |
316 | * the Interval type before using it. For example, if Interval is |
317 | * std::chrono::seconds, the return value will be the count per second. |
318 | * If Interval is std::chrono::hours, the return value will be the count per |
319 | * hour. |
320 | * |
321 | * Note that you generally should call update() before calling countRate(), |
322 | * to make sure you are not reading stale data. |
323 | */ |
324 | template <typename ReturnType = double, typename Interval = Duration> |
325 | ReturnType countRate() const { |
326 | return rateHelper<ReturnType, Interval>( |
327 | ReturnType(total_.count), elapsed()); |
328 | } |
329 | |
330 | /* |
331 | * Estimate the sum of the data points that occurred in the specified time |
332 | * period. |
333 | * |
334 | * The range queried is [start, end). |
335 | * That is, start is inclusive, and end is exclusive. |
336 | * |
337 | * Note that data outside of the timeseries duration will no longer be |
338 | * available for use in the estimation. Specifying a start time earlier than |
339 | * getEarliestTime() will not have much effect, since only data points after |
340 | * that point in time will be counted. |
341 | * |
342 | * Note that the value returned is an estimate, and may not be precise. |
343 | */ |
344 | ValueType sum(TimePoint start, TimePoint end) const; |
345 | |
346 | /* |
347 | * Estimate the number of data points that occurred in the specified time |
348 | * period. |
349 | * |
350 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
351 | * comments apply here as well. |
352 | */ |
353 | uint64_t count(TimePoint start, TimePoint end) const; |
354 | |
355 | /* |
356 | * Estimate the average value during the specified time period. |
357 | * |
358 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
359 | * comments apply here as well. |
360 | */ |
361 | template <typename ReturnType = double> |
362 | ReturnType avg(TimePoint start, TimePoint end) const; |
363 | |
364 | /* |
365 | * Estimate the rate during the specified time period. |
366 | * |
367 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
368 | * comments apply here as well. |
369 | */ |
370 | template <typename ReturnType = double, typename Interval = Duration> |
371 | ReturnType rate(TimePoint start, TimePoint end) const { |
372 | ValueType intervalSum = sum(start, end); |
373 | Duration interval = elapsed(start, end); |
374 | return rateHelper<ReturnType, Interval>(intervalSum, interval); |
375 | } |
376 | |
377 | /* |
378 | * Estimate the rate of data points being added during the specified time |
379 | * period. |
380 | * |
381 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
382 | * comments apply here as well. |
383 | */ |
384 | template <typename ReturnType = double, typename Interval = Duration> |
385 | ReturnType countRate(TimePoint start, TimePoint end) const { |
386 | uint64_t intervalCount = count(start, end); |
387 | Duration interval = elapsed(start, end); |
388 | return rateHelper<ReturnType, Interval>( |
389 | ReturnType(intervalCount), interval); |
390 | } |
391 | |
392 | /* |
393 | * Invoke a function for each bucket. |
394 | * |
395 | * The function will take as arguments the bucket index, |
396 | * the bucket start time, and the start time of the subsequent bucket. |
397 | * |
398 | * It should return true to continue iterating through the buckets, and false |
399 | * to break out of the loop and stop, without calling the function on any |
400 | * more buckets. |
401 | * |
402 | * bool function(const Bucket& bucket, TimePoint bucketStart, |
403 | * TimePoint nextBucketStart) |
404 | */ |
405 | template <typename Function> |
406 | void forEachBucket(Function fn) const; |
407 | |
408 | /* |
409 | * Get the index for the bucket containing the specified time. |
410 | * |
411 | * Note that the index is only valid if this time actually falls within one |
412 | * of the current buckets. If you pass in a value more recent than |
413 | * getLatestTime() or older than (getLatestTime() - elapsed()), the index |
414 | * returned will not be valid. |
415 | * |
416 | * This method may not be called for all-time data. |
417 | */ |
418 | size_t getBucketIdx(TimePoint time) const; |
419 | |
420 | /* |
421 | * Get the bucket at the specified index. |
422 | * |
423 | * This method may not be called for all-time data. |
424 | */ |
425 | const Bucket& getBucketByIndex(size_t idx) const { |
426 | return buckets_[idx]; |
427 | } |
428 | |
429 | /* |
430 | * Compute the bucket index that the specified time falls into, |
431 | * as well as the bucket start time and the next bucket's start time. |
432 | * |
433 | * This method may not be called for all-time data. |
434 | */ |
435 | void getBucketInfo( |
436 | TimePoint time, |
437 | size_t* bucketIdx, |
438 | TimePoint* bucketStart, |
439 | TimePoint* nextBucketStart) const; |
440 | |
441 | /* |
442 | * Legacy APIs that accept a Duration parameters rather than TimePoint. |
443 | * |
444 | * These treat the Duration as relative to the clock epoch. |
445 | * Prefer using the correct TimePoint-based APIs instead. These APIs will |
446 | * eventually be deprecated and removed. |
447 | */ |
448 | bool addValue(Duration now, const ValueType& val) { |
449 | return addValueAggregated(TimePoint(now), val, 1); |
450 | } |
451 | bool addValue(Duration now, const ValueType& val, uint64_t times) { |
452 | return addValueAggregated(TimePoint(now), val * ValueType(times), times); |
453 | } |
454 | bool |
455 | addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) { |
456 | return addValueAggregated(TimePoint(now), total, nsamples); |
457 | } |
458 | size_t update(Duration now) { |
459 | return update(TimePoint(now)); |
460 | } |
461 | |
462 | private: |
463 | template <typename ReturnType = double, typename Interval = Duration> |
464 | ReturnType rateHelper(ReturnType numerator, Duration elapsedTime) const { |
465 | return detail::rateHelper<ReturnType, Duration, Interval>( |
466 | numerator, elapsedTime); |
467 | } |
468 | |
469 | TimePoint getEarliestTimeNonEmpty() const; |
470 | size_t updateBuckets(TimePoint now); |
471 | |
472 | ValueType rangeAdjust( |
473 | TimePoint bucketStart, |
474 | TimePoint nextBucketStart, |
475 | TimePoint start, |
476 | TimePoint end, |
477 | ValueType input) const; |
478 | |
479 | template <typename Function> |
480 | void forEachBucket(TimePoint start, TimePoint end, Function fn) const; |
481 | |
482 | TimePoint firstTime_; // time of first update() since clear()/constructor |
483 | TimePoint latestTime_; // time of last update() |
484 | Duration duration_; // total duration ("window length") of the time series |
485 | |
486 | Bucket total_; // sum and count of everything in time series |
487 | std::vector<Bucket> buckets_; // actual buckets of values |
488 | }; |
489 | |
490 | } // namespace folly |
491 | |