| 1 | /* |
| 2 | * Copyright 2013-present Facebook, Inc. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #pragma once |
| 18 | |
| 19 | #include <chrono> |
| 20 | #include <stdexcept> |
| 21 | #include <string> |
| 22 | #include <vector> |
| 23 | |
| 24 | #include <folly/String.h> |
| 25 | #include <folly/stats/BucketedTimeSeries.h> |
| 26 | #include <glog/logging.h> |
| 27 | |
| 28 | namespace folly { |
| 29 | |
| 30 | /* |
| 31 | * This class represents a timeseries which keeps several levels of data |
| 32 | * granularity (similar in principle to the loads reported by the UNIX |
| 33 | * 'uptime' command). It uses several instances (one per level) of |
| 34 | * BucketedTimeSeries as the underlying storage. |
| 35 | * |
| 36 | * This can easily be used to track sums (and thus rates or averages) over |
| 37 | * several predetermined time periods, as well as all-time sums. For example, |
| 38 | * you would use to it to track query rate or response speed over the last |
| 39 | * 5, 15, 30, and 60 minutes. |
| 40 | * |
| 41 | * The MultiLevelTimeSeries takes a list of level durations as an input; the |
| 42 | * durations must be strictly increasing. Furthermore a special level can be |
| 43 | * provided with a duration of '0' -- this will be an "all-time" level. If |
| 44 | * an all-time level is provided, it MUST be the last level present. |
| 45 | * |
| 46 | * The class assumes that time advances forward -- you can't retroactively add |
| 47 | * values for events in the past -- the 'now' argument is provided for better |
| 48 | * efficiency and ease of unittesting. |
| 49 | * |
| 50 | * The class is not thread-safe -- use your own synchronization! |
| 51 | */ |
| 52 | template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>> |
| 53 | class MultiLevelTimeSeries { |
| 54 | public: |
| 55 | using ValueType = VT; |
| 56 | using Clock = CT; |
| 57 | using Duration = typename Clock::duration; |
| 58 | using TimePoint = typename Clock::time_point; |
| 59 | using Level = folly::BucketedTimeSeries<ValueType, Clock>; |
| 60 | |
| 61 | /* |
| 62 | * Create a new MultiLevelTimeSeries. |
| 63 | * |
| 64 | * This creates a new MultiLevelTimeSeries that tracks time series data at the |
| 65 | * specified time durations (level). The time series data tracked at each |
| 66 | * level is then further divided by numBuckets for memory efficiency. |
| 67 | * |
| 68 | * The durations must be strictly increasing. Furthermore a special level can |
| 69 | * be provided with a duration of '0' -- this will be an "all-time" level. If |
| 70 | * an all-time level is provided, it MUST be the last level present. |
| 71 | */ |
| 72 | MultiLevelTimeSeries( |
| 73 | size_t numBuckets, |
| 74 | size_t numLevels, |
| 75 | const Duration levelDurations[]); |
| 76 | |
| 77 | MultiLevelTimeSeries( |
| 78 | size_t numBuckets, |
| 79 | std::initializer_list<Duration> durations); |
| 80 | |
| 81 | /* |
| 82 | * Return the number of buckets used to track time series at each level. |
| 83 | */ |
| 84 | size_t numBuckets() const { |
| 85 | // The constructor ensures that levels_ has at least one item |
| 86 | return levels_[0].numBuckets(); |
| 87 | } |
| 88 | |
| 89 | /* |
| 90 | * Return the number of levels tracked by MultiLevelTimeSeries. |
| 91 | */ |
| 92 | size_t numLevels() const { |
| 93 | return levels_.size(); |
| 94 | } |
| 95 | |
| 96 | /* |
| 97 | * Get the BucketedTimeSeries backing the specified level. |
| 98 | * |
| 99 | * Note: you should generally call update() or flush() before accessing the |
| 100 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 101 | * not been called recently. |
| 102 | */ |
| 103 | const Level& getLevel(size_t level) const { |
| 104 | CHECK_LT(level, levels_.size()); |
| 105 | return levels_[level]; |
| 106 | } |
| 107 | |
| 108 | /* |
| 109 | * Get the highest granularity level that is still large enough to contain |
| 110 | * data going back to the specified start time. |
| 111 | * |
| 112 | * Note: you should generally call update() or flush() before accessing the |
| 113 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 114 | * not been called recently. |
| 115 | */ |
| 116 | const Level& getLevel(TimePoint start) const { |
| 117 | for (const auto& level : levels_) { |
| 118 | if (level.isAllTime()) { |
| 119 | return level; |
| 120 | } |
| 121 | // Note that we use duration() here rather than elapsed(). |
| 122 | // If duration is large enough to contain the start time then this level |
| 123 | // is good enough, even if elapsed() indicates that no data was recorded |
| 124 | // before the specified start time. |
| 125 | if (level.getLatestTime() - level.duration() <= start) { |
| 126 | return level; |
| 127 | } |
| 128 | } |
| 129 | // We should always have an all-time level, so this is never reached. |
| 130 | LOG(FATAL) << "No level of timeseries covers internval" |
| 131 | << " from " << start.time_since_epoch().count() << " to now" ; |
| 132 | return levels_.back(); |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * Get the BucketedTimeSeries backing the specified level. |
| 137 | * |
| 138 | * Note: you should generally call update() or flush() before accessing the |
| 139 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 140 | * not been called recently. |
| 141 | */ |
| 142 | const Level& getLevelByDuration(Duration duration) const { |
| 143 | // since the number of levels is expected to be small (less than 5 in most |
| 144 | // cases), a simple linear scan would be efficient and is intentionally |
| 145 | // chosen here over other alternatives for lookup. |
| 146 | for (const auto& level : levels_) { |
| 147 | if (level.duration() == duration) { |
| 148 | return level; |
| 149 | } |
| 150 | } |
| 151 | throw std::out_of_range(folly::to<std::string>( |
| 152 | "No level of duration " , duration.count(), " found" )); |
| 153 | } |
| 154 | |
| 155 | /* |
| 156 | * Return the sum of all the data points currently tracked at this level. |
| 157 | * |
| 158 | * Note: you should generally call update() or flush() before accessing the |
| 159 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 160 | * not been called recently. |
| 161 | */ |
| 162 | ValueType sum(size_t level) const { |
| 163 | return getLevel(level).sum(); |
| 164 | } |
| 165 | |
| 166 | /* |
| 167 | * Return the average (sum / count) of all the data points currently tracked |
| 168 | * at this level. |
| 169 | * |
| 170 | * The return type may be specified to control whether floating-point or |
| 171 | * integer division should be performed. |
| 172 | * |
| 173 | * Note: you should generally call update() or flush() before accessing the |
| 174 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 175 | * not been called recently. |
| 176 | */ |
| 177 | template <typename ReturnType = double> |
| 178 | ReturnType avg(size_t level) const { |
| 179 | return getLevel(level).template avg<ReturnType>(); |
| 180 | } |
| 181 | |
| 182 | /* |
| 183 | * Return the rate (sum divided by elaspsed time) of the all data points |
| 184 | * currently tracked at this level. |
| 185 | * |
| 186 | * Note: you should generally call update() or flush() before accessing the |
| 187 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 188 | * not been called recently. |
| 189 | */ |
| 190 | template <typename ReturnType = double, typename Interval = Duration> |
| 191 | ReturnType rate(size_t level) const { |
| 192 | return getLevel(level).template rate<ReturnType, Interval>(); |
| 193 | } |
| 194 | |
| 195 | /* |
| 196 | * Return the number of data points currently tracked at this level. |
| 197 | * |
| 198 | * Note: you should generally call update() or flush() before accessing the |
| 199 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 200 | * not been called recently. |
| 201 | */ |
| 202 | uint64_t count(size_t level) const { |
| 203 | return getLevel(level).count(); |
| 204 | } |
| 205 | |
| 206 | /* |
| 207 | * Return the count divided by the elapsed time tracked at this level. |
| 208 | * |
| 209 | * Note: you should generally call update() or flush() before accessing the |
| 210 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 211 | * not been called recently. |
| 212 | */ |
| 213 | template <typename ReturnType = double, typename Interval = Duration> |
| 214 | ReturnType countRate(size_t level) const { |
| 215 | return getLevel(level).template countRate<ReturnType, Interval>(); |
| 216 | } |
| 217 | |
| 218 | /* |
| 219 | * Return the sum of all the data points currently tracked at this level. |
| 220 | * |
| 221 | * This method is identical to sum(size_t level) above but takes in the |
| 222 | * duration that the user is interested in querying as the parameter. |
| 223 | * |
| 224 | * Note: you should generally call update() or flush() before accessing the |
| 225 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 226 | * not been called recently. |
| 227 | */ |
| 228 | ValueType sum(Duration duration) const { |
| 229 | return getLevelByDuration(duration).sum(); |
| 230 | } |
| 231 | |
| 232 | /* |
| 233 | * Return the average (sum / count) of all the data points currently tracked |
| 234 | * at this level. |
| 235 | * |
| 236 | * This method is identical to avg(size_t level) above but takes in the |
| 237 | * duration that the user is interested in querying as the parameter. |
| 238 | * |
| 239 | * Note: you should generally call update() or flush() before accessing the |
| 240 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 241 | * not been called recently. |
| 242 | */ |
| 243 | template <typename ReturnType = double> |
| 244 | ReturnType avg(Duration duration) const { |
| 245 | return getLevelByDuration(duration).template avg<ReturnType>(); |
| 246 | } |
| 247 | |
| 248 | /* |
| 249 | * Return the rate (sum divided by elaspsed time) of the all data points |
| 250 | * currently tracked at this level. |
| 251 | * |
| 252 | * This method is identical to rate(size_t level) above but takes in the |
| 253 | * duration that the user is interested in querying as the parameter. |
| 254 | * |
| 255 | * Note: you should generally call update() or flush() before accessing the |
| 256 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 257 | * not been called recently. |
| 258 | */ |
| 259 | template <typename ReturnType = double, typename Interval = Duration> |
| 260 | ReturnType rate(Duration duration) const { |
| 261 | return getLevelByDuration(duration).template rate<ReturnType, Interval>(); |
| 262 | } |
| 263 | |
| 264 | /* |
| 265 | * Return the number of data points currently tracked at this level. |
| 266 | * |
| 267 | * This method is identical to count(size_t level) above but takes in the |
| 268 | * duration that the user is interested in querying as the parameter. |
| 269 | * |
| 270 | * Note: you should generally call update() or flush() before accessing the |
| 271 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 272 | * not been called recently. |
| 273 | */ |
| 274 | uint64_t count(Duration duration) const { |
| 275 | return getLevelByDuration(duration).count(); |
| 276 | } |
| 277 | |
| 278 | /* |
| 279 | * Return the count divided by the elapsed time tracked at this level. |
| 280 | * |
| 281 | * This method is identical to countRate(size_t level) above but takes in the |
| 282 | * duration that the user is interested in querying as the parameter. |
| 283 | * |
| 284 | * Note: you should generally call update() or flush() before accessing the |
| 285 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 286 | * not been called recently. |
| 287 | */ |
| 288 | template <typename ReturnType = double, typename Interval = Duration> |
| 289 | ReturnType countRate(Duration duration) const { |
| 290 | return getLevelByDuration(duration) |
| 291 | .template countRate<ReturnType, Interval>(); |
| 292 | } |
| 293 | |
| 294 | /* |
| 295 | * Estimate the sum of the data points that occurred in the specified time |
| 296 | * period at this level. |
| 297 | * |
| 298 | * The range queried is [start, end). |
| 299 | * That is, start is inclusive, and end is exclusive. |
| 300 | * |
| 301 | * Note that data outside of the timeseries duration will no longer be |
| 302 | * available for use in the estimation. Specifying a start time earlier than |
| 303 | * getEarliestTime() will not have much effect, since only data points after |
| 304 | * that point in time will be counted. |
| 305 | * |
| 306 | * Note that the value returned is an estimate, and may not be precise. |
| 307 | * |
| 308 | * Note: you should generally call update() or flush() before accessing the |
| 309 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 310 | * not been called recently. |
| 311 | */ |
| 312 | ValueType sum(TimePoint start, TimePoint end) const { |
| 313 | return getLevel(start).sum(start, end); |
| 314 | } |
| 315 | |
| 316 | /* |
| 317 | * Estimate the average value during the specified time period. |
| 318 | * |
| 319 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
| 320 | * comments apply here as well. |
| 321 | * |
| 322 | * Note: you should generally call update() or flush() before accessing the |
| 323 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 324 | * not been called recently. |
| 325 | */ |
| 326 | template <typename ReturnType = double> |
| 327 | ReturnType avg(TimePoint start, TimePoint end) const { |
| 328 | return getLevel(start).template avg<ReturnType>(start, end); |
| 329 | } |
| 330 | |
| 331 | /* |
| 332 | * Estimate the rate during the specified time period. |
| 333 | * |
| 334 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
| 335 | * comments apply here as well. |
| 336 | * |
| 337 | * Note: you should generally call update() or flush() before accessing the |
| 338 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 339 | * not been called recently. |
| 340 | */ |
| 341 | template <typename ReturnType = double> |
| 342 | ReturnType rate(TimePoint start, TimePoint end) const { |
| 343 | return getLevel(start).template rate<ReturnType>(start, end); |
| 344 | } |
| 345 | |
| 346 | /* |
| 347 | * Estimate the count during the specified time period. |
| 348 | * |
| 349 | * The same caveats documented in the sum(TimePoint start, TimePoint end) |
| 350 | * comments apply here as well. |
| 351 | * |
| 352 | * Note: you should generally call update() or flush() before accessing the |
| 353 | * data. Otherwise you may be reading stale data if update() or flush() has |
| 354 | * not been called recently. |
| 355 | */ |
| 356 | uint64_t count(TimePoint start, TimePoint end) const { |
| 357 | return getLevel(start).count(start, end); |
| 358 | } |
| 359 | |
| 360 | /* |
| 361 | * Adds the value 'val' at time 'now' to all levels. |
| 362 | * |
| 363 | * Data points added at the same time point is cached internally here and not |
| 364 | * propagated to the underlying levels until either flush() is called or when |
| 365 | * update from a different time comes. |
| 366 | * |
| 367 | * This function expects time to always move forwards: it cannot be used to |
| 368 | * add historical data points that have occurred in the past. If now is |
| 369 | * older than the another timestamp that has already been passed to |
| 370 | * addValue() or update(), now will be ignored and the latest timestamp will |
| 371 | * be used. |
| 372 | */ |
| 373 | void addValue(TimePoint now, const ValueType& val); |
| 374 | |
| 375 | /* |
| 376 | * Adds the value 'val' at time 'now' to all levels. |
| 377 | */ |
| 378 | void addValue(TimePoint now, const ValueType& val, uint64_t times); |
| 379 | |
| 380 | /* |
| 381 | * Adds the value 'total' at time 'now' to all levels as the sum of |
| 382 | * 'nsamples' samples. |
| 383 | */ |
| 384 | void |
| 385 | addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples); |
| 386 | |
| 387 | /* |
| 388 | * Update all the levels to the specified time, doing all the necessary |
| 389 | * work to rotate the buckets and remove any stale data points. |
| 390 | * |
| 391 | * When reading data from the timeseries, you should make sure to manually |
| 392 | * call update() before accessing the data. Otherwise you may be reading |
| 393 | * stale data if update() has not been called recently. |
| 394 | */ |
| 395 | void update(TimePoint now); |
| 396 | |
| 397 | /* |
| 398 | * Reset all the timeseries to an empty state as if no data points have ever |
| 399 | * been added to it. |
| 400 | */ |
| 401 | void clear(); |
| 402 | |
| 403 | /* |
| 404 | * Flush all cached updates. |
| 405 | */ |
| 406 | void flush(); |
| 407 | |
| 408 | /* |
| 409 | * Legacy APIs that accept a Duration parameters rather than TimePoint. |
| 410 | * |
| 411 | * These treat the Duration as relative to the clock epoch. |
| 412 | * Prefer using the correct TimePoint-based APIs instead. These APIs will |
| 413 | * eventually be deprecated and removed. |
| 414 | */ |
| 415 | void update(Duration now) { |
| 416 | update(TimePoint(now)); |
| 417 | } |
| 418 | void addValue(Duration now, const ValueType& value) { |
| 419 | addValue(TimePoint(now), value); |
| 420 | } |
| 421 | void addValue(Duration now, const ValueType& value, uint64_t times) { |
| 422 | addValue(TimePoint(now), value, times); |
| 423 | } |
| 424 | void |
| 425 | addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) { |
| 426 | addValueAggregated(TimePoint(now), total, nsamples); |
| 427 | } |
| 428 | |
| 429 | private: |
| 430 | std::vector<Level> levels_; |
| 431 | |
| 432 | // Updates within the same time interval are cached |
| 433 | // They are flushed out when updates from a different time comes, |
| 434 | // or flush() is called. |
| 435 | TimePoint cachedTime_; |
| 436 | ValueType cachedSum_; |
| 437 | uint64_t cachedCount_; |
| 438 | }; |
| 439 | |
| 440 | } // namespace folly |
| 441 | |