1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <chrono> |
20 | #include <stdexcept> |
21 | #include <string> |
22 | #include <vector> |
23 | |
24 | #include <folly/String.h> |
25 | #include <folly/stats/BucketedTimeSeries.h> |
26 | #include <glog/logging.h> |
27 | |
28 | namespace folly { |
29 | |
30 | /* |
31 | * This class represents a timeseries which keeps several levels of data |
32 | * granularity (similar in principle to the loads reported by the UNIX |
33 | * 'uptime' command). It uses several instances (one per level) of |
34 | * BucketedTimeSeries as the underlying storage. |
35 | * |
36 | * This can easily be used to track sums (and thus rates or averages) over |
37 | * several predetermined time periods, as well as all-time sums. For example, |
38 | * you would use to it to track query rate or response speed over the last |
39 | * 5, 15, 30, and 60 minutes. |
40 | * |
41 | * The MultiLevelTimeSeries takes a list of level durations as an input; the |
42 | * durations must be strictly increasing. Furthermore a special level can be |
43 | * provided with a duration of '0' -- this will be an "all-time" level. If |
44 | * an all-time level is provided, it MUST be the last level present. |
45 | * |
46 | * The class assumes that time advances forward -- you can't retroactively add |
47 | * values for events in the past -- the 'now' argument is provided for better |
48 | * efficiency and ease of unittesting. |
49 | * |
50 | * The class is not thread-safe -- use your own synchronization! |
51 | */ |
52 | template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>> |
53 | class MultiLevelTimeSeries { |
54 | public: |
55 | using ValueType = VT; |
56 | using Clock = CT; |
57 | using Duration = typename Clock::duration; |
58 | using TimePoint = typename Clock::time_point; |
59 | using Level = folly::BucketedTimeSeries<ValueType, Clock>; |
60 | |
61 | /* |
62 | * Create a new MultiLevelTimeSeries. |
63 | * |
64 | * This creates a new MultiLevelTimeSeries that tracks time series data at the |
65 | * specified time durations (level). The time series data tracked at each |
66 | * level is then further divided by numBuckets for memory efficiency. |
67 | * |
68 | * The durations must be strictly increasing. Furthermore a special level can |
69 | * be provided with a duration of '0' -- this will be an "all-time" level. If |
70 | * an all-time level is provided, it MUST be the last level present. |
71 | */ |
72 | MultiLevelTimeSeries( |
73 | size_t numBuckets, |
74 | size_t numLevels, |
75 | const Duration levelDurations[]); |
76 | |
77 | MultiLevelTimeSeries( |
78 | size_t numBuckets, |
79 | std::initializer_list<Duration> durations); |
80 | |
81 | /* |
82 | * Return the number of buckets used to track time series at each level. |
83 | */ |
84 | size_t numBuckets() const { |
85 | // The constructor ensures that levels_ has at least one item |
86 | return levels_[0].numBuckets(); |
87 | } |
88 | |
89 | /* |
90 | * Return the number of levels tracked by MultiLevelTimeSeries. |
91 | */ |
92 | size_t numLevels() const { |
93 | return levels_.size(); |
94 | } |
95 | |
96 | /* |
97 | * Get the BucketedTimeSeries backing the specified level. |
98 | * |
99 | * Note: you should generally call update() or flush() before accessing the |
100 | * data. Otherwise you may be reading stale data if update() or flush() has |
101 | * not been called recently. |
102 | */ |
103 | const Level& getLevel(size_t level) const { |
104 | CHECK_LT(level, levels_.size()); |
105 | return levels_[level]; |
106 | } |
107 | |
108 | /* |
109 | * Get the highest granularity level that is still large enough to contain |
110 | * data going back to the specified start time. |
111 | * |
112 | * Note: you should generally call update() or flush() before accessing the |
113 | * data. Otherwise you may be reading stale data if update() or flush() has |
114 | * not been called recently. |
115 | */ |
116 | const Level& getLevel(TimePoint start) const { |
117 | for (const auto& level : levels_) { |
118 | if (level.isAllTime()) { |
119 | return level; |
120 | } |
121 | // Note that we use duration() here rather than elapsed(). |
122 | // If duration is large enough to contain the start time then this level |
123 | // is good enough, even if elapsed() indicates that no data was recorded |
124 | // before the specified start time. |
125 | if (level.getLatestTime() - level.duration() <= start) { |
126 | return level; |
127 | } |
128 | } |
129 | // We should always have an all-time level, so this is never reached. |
130 | LOG(FATAL) << "No level of timeseries covers internval" |
131 | << " from " << start.time_since_epoch().count() << " to now" ; |
132 | return levels_.back(); |
133 | } |
134 | |
135 | /* |
136 | * Get the BucketedTimeSeries backing the specified level. |
137 | * |
138 | * Note: you should generally call update() or flush() before accessing the |
139 | * data. Otherwise you may be reading stale data if update() or flush() has |
140 | * not been called recently. |
141 | */ |
142 | const Level& getLevelByDuration(Duration duration) const { |
143 | // since the number of levels is expected to be small (less than 5 in most |
144 | // cases), a simple linear scan would be efficient and is intentionally |
145 | // chosen here over other alternatives for lookup. |
146 | for (const auto& level : levels_) { |
147 | if (level.duration() == duration) { |
148 | return level; |
149 | } |
150 | } |
151 | throw std::out_of_range(folly::to<std::string>( |
152 | "No level of duration " , duration.count(), " found" )); |
153 | } |
154 | |
155 | /* |
156 | * Return the sum of all the data points currently tracked at this level. |
157 | * |
158 | * Note: you should generally call update() or flush() before accessing the |
159 | * data. Otherwise you may be reading stale data if update() or flush() has |
160 | * not been called recently. |
161 | */ |
162 | ValueType sum(size_t level) const { |
163 | return getLevel(level).sum(); |
164 | } |
165 | |
166 | /* |
167 | * Return the average (sum / count) of all the data points currently tracked |
168 | * at this level. |
169 | * |
170 | * The return type may be specified to control whether floating-point or |
171 | * integer division should be performed. |
172 | * |
173 | * Note: you should generally call update() or flush() before accessing the |
174 | * data. Otherwise you may be reading stale data if update() or flush() has |
175 | * not been called recently. |
176 | */ |
177 | template <typename ReturnType = double> |
178 | ReturnType avg(size_t level) const { |
179 | return getLevel(level).template avg<ReturnType>(); |
180 | } |
181 | |
182 | /* |
183 | * Return the rate (sum divided by elaspsed time) of the all data points |
184 | * currently tracked at this level. |
185 | * |
186 | * Note: you should generally call update() or flush() before accessing the |
187 | * data. Otherwise you may be reading stale data if update() or flush() has |
188 | * not been called recently. |
189 | */ |
190 | template <typename ReturnType = double, typename Interval = Duration> |
191 | ReturnType rate(size_t level) const { |
192 | return getLevel(level).template rate<ReturnType, Interval>(); |
193 | } |
194 | |
195 | /* |
196 | * Return the number of data points currently tracked at this level. |
197 | * |
198 | * Note: you should generally call update() or flush() before accessing the |
199 | * data. Otherwise you may be reading stale data if update() or flush() has |
200 | * not been called recently. |
201 | */ |
202 | uint64_t count(size_t level) const { |
203 | return getLevel(level).count(); |
204 | } |
205 | |
206 | /* |
207 | * Return the count divided by the elapsed time tracked at this level. |
208 | * |
209 | * Note: you should generally call update() or flush() before accessing the |
210 | * data. Otherwise you may be reading stale data if update() or flush() has |
211 | * not been called recently. |
212 | */ |
213 | template <typename ReturnType = double, typename Interval = Duration> |
214 | ReturnType countRate(size_t level) const { |
215 | return getLevel(level).template countRate<ReturnType, Interval>(); |
216 | } |
217 | |
218 | /* |
219 | * Return the sum of all the data points currently tracked at this level. |
220 | * |
221 | * This method is identical to sum(size_t level) above but takes in the |
222 | * duration that the user is interested in querying as the parameter. |
223 | * |
224 | * Note: you should generally call update() or flush() before accessing the |
225 | * data. Otherwise you may be reading stale data if update() or flush() has |
226 | * not been called recently. |
227 | */ |
228 | ValueType sum(Duration duration) const { |
229 | return getLevelByDuration(duration).sum(); |
230 | } |
231 | |
232 | /* |
233 | * Return the average (sum / count) of all the data points currently tracked |
234 | * at this level. |
235 | * |
236 | * This method is identical to avg(size_t level) above but takes in the |
237 | * duration that the user is interested in querying as the parameter. |
238 | * |
239 | * Note: you should generally call update() or flush() before accessing the |
240 | * data. Otherwise you may be reading stale data if update() or flush() has |
241 | * not been called recently. |
242 | */ |
243 | template <typename ReturnType = double> |
244 | ReturnType avg(Duration duration) const { |
245 | return getLevelByDuration(duration).template avg<ReturnType>(); |
246 | } |
247 | |
248 | /* |
249 | * Return the rate (sum divided by elaspsed time) of the all data points |
250 | * currently tracked at this level. |
251 | * |
252 | * This method is identical to rate(size_t level) above but takes in the |
253 | * duration that the user is interested in querying as the parameter. |
254 | * |
255 | * Note: you should generally call update() or flush() before accessing the |
256 | * data. Otherwise you may be reading stale data if update() or flush() has |
257 | * not been called recently. |
258 | */ |
259 | template <typename ReturnType = double, typename Interval = Duration> |
260 | ReturnType rate(Duration duration) const { |
261 | return getLevelByDuration(duration).template rate<ReturnType, Interval>(); |
262 | } |
263 | |
264 | /* |
265 | * Return the number of data points currently tracked at this level. |
266 | * |
267 | * This method is identical to count(size_t level) above but takes in the |
268 | * duration that the user is interested in querying as the parameter. |
269 | * |
270 | * Note: you should generally call update() or flush() before accessing the |
271 | * data. Otherwise you may be reading stale data if update() or flush() has |
272 | * not been called recently. |
273 | */ |
274 | uint64_t count(Duration duration) const { |
275 | return getLevelByDuration(duration).count(); |
276 | } |
277 | |
278 | /* |
279 | * Return the count divided by the elapsed time tracked at this level. |
280 | * |
281 | * This method is identical to countRate(size_t level) above but takes in the |
282 | * duration that the user is interested in querying as the parameter. |
283 | * |
284 | * Note: you should generally call update() or flush() before accessing the |
285 | * data. Otherwise you may be reading stale data if update() or flush() has |
286 | * not been called recently. |
287 | */ |
288 | template <typename ReturnType = double, typename Interval = Duration> |
289 | ReturnType countRate(Duration duration) const { |
290 | return getLevelByDuration(duration) |
291 | .template countRate<ReturnType, Interval>(); |
292 | } |
293 | |
294 | /* |
295 | * Estimate the sum of the data points that occurred in the specified time |
296 | * period at this level. |
297 | * |
298 | * The range queried is [start, end). |
299 | * That is, start is inclusive, and end is exclusive. |
300 | * |
301 | * Note that data outside of the timeseries duration will no longer be |
302 | * available for use in the estimation. Specifying a start time earlier than |
303 | * getEarliestTime() will not have much effect, since only data points after |
304 | * that point in time will be counted. |
305 | * |
306 | * Note that the value returned is an estimate, and may not be precise. |
307 | * |
308 | * Note: you should generally call update() or flush() before accessing the |
309 | * data. Otherwise you may be reading stale data if update() or flush() has |
310 | * not been called recently. |
311 | */ |
312 | ValueType sum(TimePoint start, TimePoint end) const { |
313 | return getLevel(start).sum(start, end); |
314 | } |
315 | |
316 | /* |
317 | * Estimate the average value during the specified time period. |
318 | * |
319 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
320 | * comments apply here as well. |
321 | * |
322 | * Note: you should generally call update() or flush() before accessing the |
323 | * data. Otherwise you may be reading stale data if update() or flush() has |
324 | * not been called recently. |
325 | */ |
326 | template <typename ReturnType = double> |
327 | ReturnType avg(TimePoint start, TimePoint end) const { |
328 | return getLevel(start).template avg<ReturnType>(start, end); |
329 | } |
330 | |
331 | /* |
332 | * Estimate the rate during the specified time period. |
333 | * |
334 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
335 | * comments apply here as well. |
336 | * |
337 | * Note: you should generally call update() or flush() before accessing the |
338 | * data. Otherwise you may be reading stale data if update() or flush() has |
339 | * not been called recently. |
340 | */ |
341 | template <typename ReturnType = double> |
342 | ReturnType rate(TimePoint start, TimePoint end) const { |
343 | return getLevel(start).template rate<ReturnType>(start, end); |
344 | } |
345 | |
346 | /* |
347 | * Estimate the count during the specified time period. |
348 | * |
349 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
350 | * comments apply here as well. |
351 | * |
352 | * Note: you should generally call update() or flush() before accessing the |
353 | * data. Otherwise you may be reading stale data if update() or flush() has |
354 | * not been called recently. |
355 | */ |
356 | uint64_t count(TimePoint start, TimePoint end) const { |
357 | return getLevel(start).count(start, end); |
358 | } |
359 | |
360 | /* |
361 | * Adds the value 'val' at time 'now' to all levels. |
362 | * |
363 | * Data points added at the same time point is cached internally here and not |
364 | * propagated to the underlying levels until either flush() is called or when |
365 | * update from a different time comes. |
366 | * |
367 | * This function expects time to always move forwards: it cannot be used to |
368 | * add historical data points that have occurred in the past. If now is |
369 | * older than the another timestamp that has already been passed to |
370 | * addValue() or update(), now will be ignored and the latest timestamp will |
371 | * be used. |
372 | */ |
373 | void addValue(TimePoint now, const ValueType& val); |
374 | |
375 | /* |
376 | * Adds the value 'val' at time 'now' to all levels. |
377 | */ |
378 | void addValue(TimePoint now, const ValueType& val, uint64_t times); |
379 | |
380 | /* |
381 | * Adds the value 'total' at time 'now' to all levels as the sum of |
382 | * 'nsamples' samples. |
383 | */ |
384 | void |
385 | addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples); |
386 | |
387 | /* |
388 | * Update all the levels to the specified time, doing all the necessary |
389 | * work to rotate the buckets and remove any stale data points. |
390 | * |
391 | * When reading data from the timeseries, you should make sure to manually |
392 | * call update() before accessing the data. Otherwise you may be reading |
393 | * stale data if update() has not been called recently. |
394 | */ |
395 | void update(TimePoint now); |
396 | |
397 | /* |
398 | * Reset all the timeseries to an empty state as if no data points have ever |
399 | * been added to it. |
400 | */ |
401 | void clear(); |
402 | |
403 | /* |
404 | * Flush all cached updates. |
405 | */ |
406 | void flush(); |
407 | |
408 | /* |
409 | * Legacy APIs that accept a Duration parameters rather than TimePoint. |
410 | * |
411 | * These treat the Duration as relative to the clock epoch. |
412 | * Prefer using the correct TimePoint-based APIs instead. These APIs will |
413 | * eventually be deprecated and removed. |
414 | */ |
415 | void update(Duration now) { |
416 | update(TimePoint(now)); |
417 | } |
418 | void addValue(Duration now, const ValueType& value) { |
419 | addValue(TimePoint(now), value); |
420 | } |
421 | void addValue(Duration now, const ValueType& value, uint64_t times) { |
422 | addValue(TimePoint(now), value, times); |
423 | } |
424 | void |
425 | addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) { |
426 | addValueAggregated(TimePoint(now), total, nsamples); |
427 | } |
428 | |
429 | private: |
430 | std::vector<Level> levels_; |
431 | |
432 | // Updates within the same time interval are cached |
433 | // They are flushed out when updates from a different time comes, |
434 | // or flush() is called. |
435 | TimePoint cachedTime_; |
436 | ValueType cachedSum_; |
437 | uint64_t cachedCount_; |
438 | }; |
439 | |
440 | } // namespace folly |
441 | |