1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <chrono>
20#include <stdexcept>
21#include <string>
22#include <vector>
23
24#include <folly/String.h>
25#include <folly/stats/BucketedTimeSeries.h>
26#include <glog/logging.h>
27
28namespace folly {
29
30/*
31 * This class represents a timeseries which keeps several levels of data
32 * granularity (similar in principle to the loads reported by the UNIX
33 * 'uptime' command). It uses several instances (one per level) of
34 * BucketedTimeSeries as the underlying storage.
35 *
36 * This can easily be used to track sums (and thus rates or averages) over
37 * several predetermined time periods, as well as all-time sums. For example,
38 * you would use to it to track query rate or response speed over the last
39 * 5, 15, 30, and 60 minutes.
40 *
41 * The MultiLevelTimeSeries takes a list of level durations as an input; the
42 * durations must be strictly increasing. Furthermore a special level can be
43 * provided with a duration of '0' -- this will be an "all-time" level. If
44 * an all-time level is provided, it MUST be the last level present.
45 *
46 * The class assumes that time advances forward -- you can't retroactively add
47 * values for events in the past -- the 'now' argument is provided for better
48 * efficiency and ease of unittesting.
49 *
50 * The class is not thread-safe -- use your own synchronization!
51 */
52template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>>
53class MultiLevelTimeSeries {
54 public:
55 using ValueType = VT;
56 using Clock = CT;
57 using Duration = typename Clock::duration;
58 using TimePoint = typename Clock::time_point;
59 using Level = folly::BucketedTimeSeries<ValueType, Clock>;
60
61 /*
62 * Create a new MultiLevelTimeSeries.
63 *
64 * This creates a new MultiLevelTimeSeries that tracks time series data at the
65 * specified time durations (level). The time series data tracked at each
66 * level is then further divided by numBuckets for memory efficiency.
67 *
68 * The durations must be strictly increasing. Furthermore a special level can
69 * be provided with a duration of '0' -- this will be an "all-time" level. If
70 * an all-time level is provided, it MUST be the last level present.
71 */
72 MultiLevelTimeSeries(
73 size_t numBuckets,
74 size_t numLevels,
75 const Duration levelDurations[]);
76
77 MultiLevelTimeSeries(
78 size_t numBuckets,
79 std::initializer_list<Duration> durations);
80
81 /*
82 * Return the number of buckets used to track time series at each level.
83 */
84 size_t numBuckets() const {
85 // The constructor ensures that levels_ has at least one item
86 return levels_[0].numBuckets();
87 }
88
89 /*
90 * Return the number of levels tracked by MultiLevelTimeSeries.
91 */
92 size_t numLevels() const {
93 return levels_.size();
94 }
95
96 /*
97 * Get the BucketedTimeSeries backing the specified level.
98 *
99 * Note: you should generally call update() or flush() before accessing the
100 * data. Otherwise you may be reading stale data if update() or flush() has
101 * not been called recently.
102 */
103 const Level& getLevel(size_t level) const {
104 CHECK_LT(level, levels_.size());
105 return levels_[level];
106 }
107
108 /*
109 * Get the highest granularity level that is still large enough to contain
110 * data going back to the specified start time.
111 *
112 * Note: you should generally call update() or flush() before accessing the
113 * data. Otherwise you may be reading stale data if update() or flush() has
114 * not been called recently.
115 */
116 const Level& getLevel(TimePoint start) const {
117 for (const auto& level : levels_) {
118 if (level.isAllTime()) {
119 return level;
120 }
121 // Note that we use duration() here rather than elapsed().
122 // If duration is large enough to contain the start time then this level
123 // is good enough, even if elapsed() indicates that no data was recorded
124 // before the specified start time.
125 if (level.getLatestTime() - level.duration() <= start) {
126 return level;
127 }
128 }
129 // We should always have an all-time level, so this is never reached.
130 LOG(FATAL) << "No level of timeseries covers internval"
131 << " from " << start.time_since_epoch().count() << " to now";
132 return levels_.back();
133 }
134
135 /*
136 * Get the BucketedTimeSeries backing the specified level.
137 *
138 * Note: you should generally call update() or flush() before accessing the
139 * data. Otherwise you may be reading stale data if update() or flush() has
140 * not been called recently.
141 */
142 const Level& getLevelByDuration(Duration duration) const {
143 // since the number of levels is expected to be small (less than 5 in most
144 // cases), a simple linear scan would be efficient and is intentionally
145 // chosen here over other alternatives for lookup.
146 for (const auto& level : levels_) {
147 if (level.duration() == duration) {
148 return level;
149 }
150 }
151 throw std::out_of_range(folly::to<std::string>(
152 "No level of duration ", duration.count(), " found"));
153 }
154
155 /*
156 * Return the sum of all the data points currently tracked at this level.
157 *
158 * Note: you should generally call update() or flush() before accessing the
159 * data. Otherwise you may be reading stale data if update() or flush() has
160 * not been called recently.
161 */
162 ValueType sum(size_t level) const {
163 return getLevel(level).sum();
164 }
165
166 /*
167 * Return the average (sum / count) of all the data points currently tracked
168 * at this level.
169 *
170 * The return type may be specified to control whether floating-point or
171 * integer division should be performed.
172 *
173 * Note: you should generally call update() or flush() before accessing the
174 * data. Otherwise you may be reading stale data if update() or flush() has
175 * not been called recently.
176 */
177 template <typename ReturnType = double>
178 ReturnType avg(size_t level) const {
179 return getLevel(level).template avg<ReturnType>();
180 }
181
182 /*
183 * Return the rate (sum divided by elaspsed time) of the all data points
184 * currently tracked at this level.
185 *
186 * Note: you should generally call update() or flush() before accessing the
187 * data. Otherwise you may be reading stale data if update() or flush() has
188 * not been called recently.
189 */
190 template <typename ReturnType = double, typename Interval = Duration>
191 ReturnType rate(size_t level) const {
192 return getLevel(level).template rate<ReturnType, Interval>();
193 }
194
195 /*
196 * Return the number of data points currently tracked at this level.
197 *
198 * Note: you should generally call update() or flush() before accessing the
199 * data. Otherwise you may be reading stale data if update() or flush() has
200 * not been called recently.
201 */
202 uint64_t count(size_t level) const {
203 return getLevel(level).count();
204 }
205
206 /*
207 * Return the count divided by the elapsed time tracked at this level.
208 *
209 * Note: you should generally call update() or flush() before accessing the
210 * data. Otherwise you may be reading stale data if update() or flush() has
211 * not been called recently.
212 */
213 template <typename ReturnType = double, typename Interval = Duration>
214 ReturnType countRate(size_t level) const {
215 return getLevel(level).template countRate<ReturnType, Interval>();
216 }
217
218 /*
219 * Return the sum of all the data points currently tracked at this level.
220 *
221 * This method is identical to sum(size_t level) above but takes in the
222 * duration that the user is interested in querying as the parameter.
223 *
224 * Note: you should generally call update() or flush() before accessing the
225 * data. Otherwise you may be reading stale data if update() or flush() has
226 * not been called recently.
227 */
228 ValueType sum(Duration duration) const {
229 return getLevelByDuration(duration).sum();
230 }
231
232 /*
233 * Return the average (sum / count) of all the data points currently tracked
234 * at this level.
235 *
236 * This method is identical to avg(size_t level) above but takes in the
237 * duration that the user is interested in querying as the parameter.
238 *
239 * Note: you should generally call update() or flush() before accessing the
240 * data. Otherwise you may be reading stale data if update() or flush() has
241 * not been called recently.
242 */
243 template <typename ReturnType = double>
244 ReturnType avg(Duration duration) const {
245 return getLevelByDuration(duration).template avg<ReturnType>();
246 }
247
248 /*
249 * Return the rate (sum divided by elaspsed time) of the all data points
250 * currently tracked at this level.
251 *
252 * This method is identical to rate(size_t level) above but takes in the
253 * duration that the user is interested in querying as the parameter.
254 *
255 * Note: you should generally call update() or flush() before accessing the
256 * data. Otherwise you may be reading stale data if update() or flush() has
257 * not been called recently.
258 */
259 template <typename ReturnType = double, typename Interval = Duration>
260 ReturnType rate(Duration duration) const {
261 return getLevelByDuration(duration).template rate<ReturnType, Interval>();
262 }
263
264 /*
265 * Return the number of data points currently tracked at this level.
266 *
267 * This method is identical to count(size_t level) above but takes in the
268 * duration that the user is interested in querying as the parameter.
269 *
270 * Note: you should generally call update() or flush() before accessing the
271 * data. Otherwise you may be reading stale data if update() or flush() has
272 * not been called recently.
273 */
274 uint64_t count(Duration duration) const {
275 return getLevelByDuration(duration).count();
276 }
277
278 /*
279 * Return the count divided by the elapsed time tracked at this level.
280 *
281 * This method is identical to countRate(size_t level) above but takes in the
282 * duration that the user is interested in querying as the parameter.
283 *
284 * Note: you should generally call update() or flush() before accessing the
285 * data. Otherwise you may be reading stale data if update() or flush() has
286 * not been called recently.
287 */
288 template <typename ReturnType = double, typename Interval = Duration>
289 ReturnType countRate(Duration duration) const {
290 return getLevelByDuration(duration)
291 .template countRate<ReturnType, Interval>();
292 }
293
294 /*
295 * Estimate the sum of the data points that occurred in the specified time
296 * period at this level.
297 *
298 * The range queried is [start, end).
299 * That is, start is inclusive, and end is exclusive.
300 *
301 * Note that data outside of the timeseries duration will no longer be
302 * available for use in the estimation. Specifying a start time earlier than
303 * getEarliestTime() will not have much effect, since only data points after
304 * that point in time will be counted.
305 *
306 * Note that the value returned is an estimate, and may not be precise.
307 *
308 * Note: you should generally call update() or flush() before accessing the
309 * data. Otherwise you may be reading stale data if update() or flush() has
310 * not been called recently.
311 */
312 ValueType sum(TimePoint start, TimePoint end) const {
313 return getLevel(start).sum(start, end);
314 }
315
316 /*
317 * Estimate the average value during the specified time period.
318 *
319 * The same caveats documented in the sum(TimePoint start, TimePoint end)
320 * comments apply here as well.
321 *
322 * Note: you should generally call update() or flush() before accessing the
323 * data. Otherwise you may be reading stale data if update() or flush() has
324 * not been called recently.
325 */
326 template <typename ReturnType = double>
327 ReturnType avg(TimePoint start, TimePoint end) const {
328 return getLevel(start).template avg<ReturnType>(start, end);
329 }
330
331 /*
332 * Estimate the rate during the specified time period.
333 *
334 * The same caveats documented in the sum(TimePoint start, TimePoint end)
335 * comments apply here as well.
336 *
337 * Note: you should generally call update() or flush() before accessing the
338 * data. Otherwise you may be reading stale data if update() or flush() has
339 * not been called recently.
340 */
341 template <typename ReturnType = double>
342 ReturnType rate(TimePoint start, TimePoint end) const {
343 return getLevel(start).template rate<ReturnType>(start, end);
344 }
345
346 /*
347 * Estimate the count during the specified time period.
348 *
349 * The same caveats documented in the sum(TimePoint start, TimePoint end)
350 * comments apply here as well.
351 *
352 * Note: you should generally call update() or flush() before accessing the
353 * data. Otherwise you may be reading stale data if update() or flush() has
354 * not been called recently.
355 */
356 uint64_t count(TimePoint start, TimePoint end) const {
357 return getLevel(start).count(start, end);
358 }
359
360 /*
361 * Adds the value 'val' at time 'now' to all levels.
362 *
363 * Data points added at the same time point is cached internally here and not
364 * propagated to the underlying levels until either flush() is called or when
365 * update from a different time comes.
366 *
367 * This function expects time to always move forwards: it cannot be used to
368 * add historical data points that have occurred in the past. If now is
369 * older than the another timestamp that has already been passed to
370 * addValue() or update(), now will be ignored and the latest timestamp will
371 * be used.
372 */
373 void addValue(TimePoint now, const ValueType& val);
374
375 /*
376 * Adds the value 'val' at time 'now' to all levels.
377 */
378 void addValue(TimePoint now, const ValueType& val, uint64_t times);
379
380 /*
381 * Adds the value 'total' at time 'now' to all levels as the sum of
382 * 'nsamples' samples.
383 */
384 void
385 addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples);
386
387 /*
388 * Update all the levels to the specified time, doing all the necessary
389 * work to rotate the buckets and remove any stale data points.
390 *
391 * When reading data from the timeseries, you should make sure to manually
392 * call update() before accessing the data. Otherwise you may be reading
393 * stale data if update() has not been called recently.
394 */
395 void update(TimePoint now);
396
397 /*
398 * Reset all the timeseries to an empty state as if no data points have ever
399 * been added to it.
400 */
401 void clear();
402
403 /*
404 * Flush all cached updates.
405 */
406 void flush();
407
408 /*
409 * Legacy APIs that accept a Duration parameters rather than TimePoint.
410 *
411 * These treat the Duration as relative to the clock epoch.
412 * Prefer using the correct TimePoint-based APIs instead. These APIs will
413 * eventually be deprecated and removed.
414 */
415 void update(Duration now) {
416 update(TimePoint(now));
417 }
418 void addValue(Duration now, const ValueType& value) {
419 addValue(TimePoint(now), value);
420 }
421 void addValue(Duration now, const ValueType& value, uint64_t times) {
422 addValue(TimePoint(now), value, times);
423 }
424 void
425 addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) {
426 addValueAggregated(TimePoint(now), total, nsamples);
427 }
428
429 private:
430 std::vector<Level> levels_;
431
432 // Updates within the same time interval are cached
433 // They are flushed out when updates from a different time comes,
434 // or flush() is called.
435 TimePoint cachedTime_;
436 ValueType cachedSum_;
437 uint64_t cachedCount_;
438};
439
440} // namespace folly
441