1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <chrono>
20#include <vector>
21
22#include <folly/stats/detail/Bucket.h>
23
24namespace folly {
25
26/*
27 * A helper clock type to helper older code using BucketedTimeSeries with
28 * std::chrono::seconds transition to properly using clock types and time_point
29 * objects.
30 */
31template <typename TT = std::chrono::seconds>
32class LegacyStatsClock {
33 public:
34 using duration = TT;
35 using time_point = std::chrono::time_point<LegacyStatsClock, TT>;
36
37 // This clock does not actually implement now(), since the older API
38 // did not really specify what clock should be used. (In practice most
39 // callers unfortuantely used wall clock time rather than a monotonic clock.)
40};
41
42/*
43 * This class represents a bucketed time series which keeps track of values
44 * added in the recent past, and merges these values together into a fixed
45 * number of buckets to keep a lid on memory use if the number of values
46 * added is very large.
47 *
48 * For example, a BucketedTimeSeries() with duration == 60s and 10 buckets
49 * will keep track of 10 6-second buckets, and discard all data added more
50 * than 1 minute ago. As time ticks by, a 6-second bucket at a time will
51 * be discarded and new data will go into the newly opened bucket. Internally,
52 * it uses a circular array of buckets that it reuses as time advances.
53 *
54 * This class assumes that time advances forwards. The window of time tracked
55 * by the timeseries will advance forwards whenever a more recent timestamp is
56 * passed to addValue(). While it is possible to pass old time values to
57 * addValue(), this will never move the time window backwards. If the old time
58 * value falls outside the tracked window of time, the data point will be
59 * ignored.
60 *
61 * This class is not thread-safe -- use your own synchronization!
62 */
63template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>>
64class BucketedTimeSeries {
65 public:
66 using ValueType = VT;
67 using Clock = CT;
68 using Duration = typename Clock::duration;
69 using TimePoint = typename Clock::time_point;
70 using Bucket = detail::Bucket<ValueType>;
71
72 /*
73 * Create a new BucketedTimeSeries.
74 *
75 * This creates a new BucketedTimeSeries with the specified number of
76 * buckets, storing data for the specified amount of time.
77 *
78 * If the duration is 0, the BucketedTimeSeries will track data forever,
79 * and does not need the rolling buckets. The numBuckets parameter is
80 * ignored when duration is 0.
81 */
82 BucketedTimeSeries(size_t numBuckets, Duration duration);
83
84 /*
85 * Create a new BucketedTimeSeries.
86 *
87 * This constructor is used to reconstruct a timeseries using
88 * previously saved data
89 */
90 BucketedTimeSeries(
91 TimePoint theFirstTime,
92 TimePoint theLatestTime,
93 Duration maxDuration,
94 const std::vector<Bucket>& bucketsList);
95
96 /*
97 * Adds the value 'val' at time 'now'
98 *
99 * This function expects time to generally move forwards. The window of time
100 * tracked by this time series will move forwards with time. If 'now' is
101 * more recent than any time previously seen, addValue() will automatically
102 * call update(now) to advance the time window tracked by this data
103 * structure.
104 *
105 * Values in the recent past may be added to the data structure by passing in
106 * a slightly older value of 'now', as long as this time point still falls
107 * within the tracked duration. If 'now' is older than the tracked duration
108 * of time, the data point value will be ignored, and addValue() will return
109 * false without doing anything else.
110 *
111 * Returns true on success, or false if now was older than the tracked time
112 * window.
113 */
114 bool addValue(TimePoint now, const ValueType& val);
115
116 /*
117 * Adds the value 'val' the given number of 'times' at time 'now'
118 */
119 bool addValue(TimePoint now, const ValueType& val, uint64_t times);
120
121 /*
122 * Adds the value 'total' as the sum of 'nsamples' samples
123 */
124 bool
125 addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples);
126
127 /*
128 * Updates the container to the specified time, doing all the necessary
129 * work to rotate the buckets and remove any stale data points.
130 *
131 * The addValue() methods automatically call update() when adding new data
132 * points. However, when reading data from the timeseries, you should make
133 * sure to manually call update() before accessing the data. Otherwise you
134 * may be reading stale data if update() has not been called recently.
135 *
136 * Returns the current bucket index after the update.
137 */
138 size_t update(TimePoint now);
139
140 /*
141 * Reset the timeseries to an empty state,
142 * as if no data points have ever been added to it.
143 */
144 void clear();
145
146 /*
147 * Get the latest time that has ever been passed to update() or addValue().
148 *
149 * If no data has ever been added to this timeseries, 0 will be returned.
150 */
151 TimePoint getLatestTime() const {
152 return latestTime_;
153 }
154
155 /*
156 * Get the time of the earliest data point stored in this timeseries.
157 *
158 * If no data has ever been added to this timeseries, 0 will be returned.
159 *
160 * If isAllTime() is true, this is simply the time when the first data point
161 * was recorded.
162 *
163 * For non-all-time data, the timestamp reflects the first data point still
164 * remembered. As new data points are added, old data will be expired.
165 * getEarliestTime() returns the timestamp of the oldest bucket still present
166 * in the timeseries. This will never be older than (getLatestTime() -
167 * duration()).
168 */
169 TimePoint getEarliestTime() const;
170
171 /*
172 * Return the number of buckets.
173 */
174 size_t numBuckets() const {
175 return buckets_.size();
176 }
177
178 /*
179 * Return the maximum duration of data that can be tracked by this
180 * BucketedTimeSeries.
181 */
182 Duration duration() const {
183 return duration_;
184 }
185
186 /*
187 * Returns true if this BucketedTimeSeries stores data for all-time, without
188 * ever rolling over into new buckets.
189 */
190 bool isAllTime() const {
191 return (duration_ == Duration(0));
192 }
193
194 /*
195 * Returns true if no calls to update() have been made since the last call to
196 * clear().
197 */
198 bool empty() const {
199 // We set firstTime_ greater than latestTime_ in the constructor and in
200 // clear, so we use this to distinguish if the timeseries is empty.
201 //
202 // Once a data point has been added, latestTime_ will always be greater
203 // than or equal to firstTime_.
204 return firstTime_ > latestTime_;
205 }
206
207 /*
208 * Returns time of first update() since clear()/constructor.
209 * Note that the returned value is only meaningful when empty() is false.
210 */
211 TimePoint firstTime() const {
212 return firstTime_;
213 }
214
215 /*
216 * Returns time of last update().
217 * Note that the returned value is only meaningful when empty() is false.
218 */
219 TimePoint latestTime() const {
220 return latestTime_;
221 }
222
223 /*
224 * Returns actual buckets of values
225 */
226 const std::vector<Bucket>& buckets() const {
227 return buckets_;
228 }
229
230 /*
231 * Get the amount of time tracked by this timeseries.
232 *
233 * For an all-time timeseries, this returns the length of time since the
234 * first data point was added to the time series.
235 *
236 * Otherwise, this never returns a value greater than the overall timeseries
237 * duration. If the first data point was recorded less than a full duration
238 * ago, the time since the first data point is returned. If a full duration
239 * has elapsed, and we have already thrown away some data, the time since the
240 * oldest bucket is returned.
241 *
242 * For example, say we are tracking 600 seconds worth of data, in 60 buckets.
243 * - If less than 600 seconds have elapsed since the first data point,
244 * elapsed() returns the total elapsed time so far.
245 * - If more than 600 seconds have elapsed, we have already thrown away some
246 * data. However, we throw away a full bucket (10 seconds worth) at once,
247 * so at any point in time we have from 590 to 600 seconds worth of data.
248 * elapsed() will therefore return a value between 590 and 600.
249 *
250 * Note that you generally should call update() before calling elapsed(), to
251 * make sure you are not reading stale data.
252 */
253 Duration elapsed() const;
254
255 /*
256 * Get the amount of time tracked by this timeseries, between the specified
257 * start and end times.
258 *
259 * If the timeseries contains data for the entire time range specified, this
260 * simply returns (end - start). However, if start is earlier than
261 * getEarliestTime(), this returns (end - getEarliestTime()).
262 */
263 Duration elapsed(TimePoint start, TimePoint end) const;
264
265 /*
266 * Return the sum of all the data points currently tracked by this
267 * BucketedTimeSeries.
268 *
269 * Note that you generally should call update() before calling sum(), to
270 * make sure you are not reading stale data.
271 */
272 const ValueType& sum() const {
273 return total_.sum;
274 }
275
276 /*
277 * Return the number of data points currently tracked by this
278 * BucketedTimeSeries.
279 *
280 * Note that you generally should call update() before calling count(), to
281 * make sure you are not reading stale data.
282 */
283 uint64_t count() const {
284 return total_.count;
285 }
286
287 /*
288 * Return the average value (sum / count).
289 *
290 * The return type may be specified to control whether floating-point or
291 * integer division should be performed.
292 *
293 * Note that you generally should call update() before calling avg(), to
294 * make sure you are not reading stale data.
295 */
296 template <typename ReturnType = double>
297 ReturnType avg() const {
298 return total_.template avg<ReturnType>();
299 }
300
301 /*
302 * Return the sum divided by the elapsed time.
303 *
304 * Note that you generally should call update() before calling rate(), to
305 * make sure you are not reading stale data.
306 */
307 template <typename ReturnType = double, typename Interval = Duration>
308 ReturnType rate() const {
309 return rateHelper<ReturnType, Interval>(ReturnType(total_.sum), elapsed());
310 }
311
312 /*
313 * Return the count divided by the elapsed time.
314 *
315 * The Interval template parameter causes the elapsed time to be converted to
316 * the Interval type before using it. For example, if Interval is
317 * std::chrono::seconds, the return value will be the count per second.
318 * If Interval is std::chrono::hours, the return value will be the count per
319 * hour.
320 *
321 * Note that you generally should call update() before calling countRate(),
322 * to make sure you are not reading stale data.
323 */
324 template <typename ReturnType = double, typename Interval = Duration>
325 ReturnType countRate() const {
326 return rateHelper<ReturnType, Interval>(
327 ReturnType(total_.count), elapsed());
328 }
329
330 /*
331 * Estimate the sum of the data points that occurred in the specified time
332 * period.
333 *
334 * The range queried is [start, end).
335 * That is, start is inclusive, and end is exclusive.
336 *
337 * Note that data outside of the timeseries duration will no longer be
338 * available for use in the estimation. Specifying a start time earlier than
339 * getEarliestTime() will not have much effect, since only data points after
340 * that point in time will be counted.
341 *
342 * Note that the value returned is an estimate, and may not be precise.
343 */
344 ValueType sum(TimePoint start, TimePoint end) const;
345
346 /*
347 * Estimate the number of data points that occurred in the specified time
348 * period.
349 *
350 * The same caveats documented in the sum(TimePoint start, TimePoint end)
351 * comments apply here as well.
352 */
353 uint64_t count(TimePoint start, TimePoint end) const;
354
355 /*
356 * Estimate the average value during the specified time period.
357 *
358 * The same caveats documented in the sum(TimePoint start, TimePoint end)
359 * comments apply here as well.
360 */
361 template <typename ReturnType = double>
362 ReturnType avg(TimePoint start, TimePoint end) const;
363
364 /*
365 * Estimate the rate during the specified time period.
366 *
367 * The same caveats documented in the sum(TimePoint start, TimePoint end)
368 * comments apply here as well.
369 */
370 template <typename ReturnType = double, typename Interval = Duration>
371 ReturnType rate(TimePoint start, TimePoint end) const {
372 ValueType intervalSum = sum(start, end);
373 Duration interval = elapsed(start, end);
374 return rateHelper<ReturnType, Interval>(intervalSum, interval);
375 }
376
377 /*
378 * Estimate the rate of data points being added during the specified time
379 * period.
380 *
381 * The same caveats documented in the sum(TimePoint start, TimePoint end)
382 * comments apply here as well.
383 */
384 template <typename ReturnType = double, typename Interval = Duration>
385 ReturnType countRate(TimePoint start, TimePoint end) const {
386 uint64_t intervalCount = count(start, end);
387 Duration interval = elapsed(start, end);
388 return rateHelper<ReturnType, Interval>(
389 ReturnType(intervalCount), interval);
390 }
391
392 /*
393 * Invoke a function for each bucket.
394 *
395 * The function will take as arguments the bucket index,
396 * the bucket start time, and the start time of the subsequent bucket.
397 *
398 * It should return true to continue iterating through the buckets, and false
399 * to break out of the loop and stop, without calling the function on any
400 * more buckets.
401 *
402 * bool function(const Bucket& bucket, TimePoint bucketStart,
403 * TimePoint nextBucketStart)
404 */
405 template <typename Function>
406 void forEachBucket(Function fn) const;
407
408 /*
409 * Get the index for the bucket containing the specified time.
410 *
411 * Note that the index is only valid if this time actually falls within one
412 * of the current buckets. If you pass in a value more recent than
413 * getLatestTime() or older than (getLatestTime() - elapsed()), the index
414 * returned will not be valid.
415 *
416 * This method may not be called for all-time data.
417 */
418 size_t getBucketIdx(TimePoint time) const;
419
420 /*
421 * Get the bucket at the specified index.
422 *
423 * This method may not be called for all-time data.
424 */
425 const Bucket& getBucketByIndex(size_t idx) const {
426 return buckets_[idx];
427 }
428
429 /*
430 * Compute the bucket index that the specified time falls into,
431 * as well as the bucket start time and the next bucket's start time.
432 *
433 * This method may not be called for all-time data.
434 */
435 void getBucketInfo(
436 TimePoint time,
437 size_t* bucketIdx,
438 TimePoint* bucketStart,
439 TimePoint* nextBucketStart) const;
440
441 /*
442 * Legacy APIs that accept a Duration parameters rather than TimePoint.
443 *
444 * These treat the Duration as relative to the clock epoch.
445 * Prefer using the correct TimePoint-based APIs instead. These APIs will
446 * eventually be deprecated and removed.
447 */
448 bool addValue(Duration now, const ValueType& val) {
449 return addValueAggregated(TimePoint(now), val, 1);
450 }
451 bool addValue(Duration now, const ValueType& val, uint64_t times) {
452 return addValueAggregated(TimePoint(now), val * ValueType(times), times);
453 }
454 bool
455 addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) {
456 return addValueAggregated(TimePoint(now), total, nsamples);
457 }
458 size_t update(Duration now) {
459 return update(TimePoint(now));
460 }
461
462 private:
463 template <typename ReturnType = double, typename Interval = Duration>
464 ReturnType rateHelper(ReturnType numerator, Duration elapsedTime) const {
465 return detail::rateHelper<ReturnType, Duration, Interval>(
466 numerator, elapsedTime);
467 }
468
469 TimePoint getEarliestTimeNonEmpty() const;
470 size_t updateBuckets(TimePoint now);
471
472 ValueType rangeAdjust(
473 TimePoint bucketStart,
474 TimePoint nextBucketStart,
475 TimePoint start,
476 TimePoint end,
477 ValueType input) const;
478
479 template <typename Function>
480 void forEachBucket(TimePoint start, TimePoint end, Function fn) const;
481
482 TimePoint firstTime_; // time of first update() since clear()/constructor
483 TimePoint latestTime_; // time of last update()
484 Duration duration_; // total duration ("window length") of the time series
485
486 Bucket total_; // sum and count of everything in time series
487 std::vector<Bucket> buckets_; // actual buckets of values
488};
489
490} // namespace folly
491