| 1 | // Copyright 2017 The Abseil Authors. | 
|---|
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 4 | // you may not use this file except in compliance with the License. | 
|---|
| 5 | // You may obtain a copy of the License at | 
|---|
| 6 | // | 
|---|
| 7 | //      https://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 8 | // | 
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 12 | // See the License for the specific language governing permissions and | 
|---|
| 13 | // limitations under the License. | 
|---|
| 14 |  | 
|---|
| 15 | #include "absl/time/clock.h" | 
|---|
| 16 |  | 
|---|
| 17 | #include "absl/base/attributes.h" | 
|---|
| 18 |  | 
|---|
| 19 | #ifdef _WIN32 | 
|---|
| 20 | #include <windows.h> | 
|---|
| 21 | #endif | 
|---|
| 22 |  | 
|---|
| 23 | #include <algorithm> | 
|---|
| 24 | #include <atomic> | 
|---|
| 25 | #include <cerrno> | 
|---|
| 26 | #include <cstdint> | 
|---|
| 27 | #include <ctime> | 
|---|
| 28 | #include <limits> | 
|---|
| 29 |  | 
|---|
| 30 | #include "absl/base/internal/spinlock.h" | 
|---|
| 31 | #include "absl/base/internal/unscaledcycleclock.h" | 
|---|
| 32 | #include "absl/base/macros.h" | 
|---|
| 33 | #include "absl/base/port.h" | 
|---|
| 34 | #include "absl/base/thread_annotations.h" | 
|---|
| 35 |  | 
|---|
| 36 | namespace absl { | 
|---|
| 37 | Time Now() { | 
|---|
| 38 | // TODO(bww): Get a timespec instead so we don't have to divide. | 
|---|
| 39 | int64_t n = absl::GetCurrentTimeNanos(); | 
|---|
| 40 | if (n >= 0) { | 
|---|
| 41 | return time_internal::FromUnixDuration( | 
|---|
| 42 | time_internal::MakeDuration(n / 1000000000, n % 1000000000 * 4)); | 
|---|
| 43 | } | 
|---|
| 44 | return time_internal::FromUnixDuration(absl::Nanoseconds(n)); | 
|---|
| 45 | } | 
|---|
| 46 | }  // namespace absl | 
|---|
| 47 |  | 
|---|
| 48 | // Decide if we should use the fast GetCurrentTimeNanos() algorithm | 
|---|
| 49 | // based on the cyclecounter, otherwise just get the time directly | 
|---|
| 50 | // from the OS on every call. This can be chosen at compile-time via | 
|---|
| 51 | // -DABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS=[0|1] | 
|---|
| 52 | #ifndef ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS | 
|---|
| 53 | #if ABSL_USE_UNSCALED_CYCLECLOCK | 
|---|
| 54 | #define ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS 1 | 
|---|
| 55 | #else | 
|---|
| 56 | #define ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS 0 | 
|---|
| 57 | #endif | 
|---|
| 58 | #endif | 
|---|
| 59 |  | 
|---|
| 60 | #if defined(__APPLE__) || defined(_WIN32) | 
|---|
| 61 | #include "absl/time/internal/get_current_time_chrono.inc" | 
|---|
| 62 | #else | 
|---|
| 63 | #include "absl/time/internal/get_current_time_posix.inc" | 
|---|
| 64 | #endif | 
|---|
| 65 |  | 
|---|
| 66 | // Allows override by test. | 
|---|
| 67 | #ifndef GET_CURRENT_TIME_NANOS_FROM_SYSTEM | 
|---|
| 68 | #define GET_CURRENT_TIME_NANOS_FROM_SYSTEM() \ | 
|---|
| 69 | ::absl::time_internal::GetCurrentTimeNanosFromSystem() | 
|---|
| 70 | #endif | 
|---|
| 71 |  | 
|---|
| 72 | #if !ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS | 
|---|
| 73 | namespace absl { | 
|---|
| 74 | int64_t GetCurrentTimeNanos() { | 
|---|
| 75 | return GET_CURRENT_TIME_NANOS_FROM_SYSTEM(); | 
|---|
| 76 | } | 
|---|
| 77 | }  // namespace absl | 
|---|
| 78 | #else  // Use the cyclecounter-based implementation below. | 
|---|
| 79 |  | 
|---|
| 80 | // Allows override by test. | 
|---|
| 81 | #ifndef GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW | 
|---|
| 82 | #define GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW() \ | 
|---|
| 83 | ::absl::time_internal::UnscaledCycleClockWrapperForGetCurrentTime::Now() | 
|---|
| 84 | #endif | 
|---|
| 85 |  | 
|---|
| 86 | // The following counters are used only by the test code. | 
|---|
| 87 | static int64_t stats_initializations; | 
|---|
| 88 | static int64_t stats_reinitializations; | 
|---|
| 89 | static int64_t stats_calibrations; | 
|---|
| 90 | static int64_t stats_slow_paths; | 
|---|
| 91 | static int64_t stats_fast_slow_paths; | 
|---|
| 92 |  | 
|---|
| 93 | namespace absl { | 
|---|
| 94 | namespace time_internal { | 
|---|
| 95 | // This is a friend wrapper around UnscaledCycleClock::Now() | 
|---|
| 96 | // (needed to access UnscaledCycleClock). | 
|---|
| 97 | class UnscaledCycleClockWrapperForGetCurrentTime { | 
|---|
| 98 | public: | 
|---|
| 99 | static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); } | 
|---|
| 100 | }; | 
|---|
| 101 | }  // namespace time_internal | 
|---|
| 102 |  | 
|---|
| 103 | // uint64_t is used in this module to provide an extra bit in multiplications | 
|---|
| 104 |  | 
|---|
| 105 | // Return the time in ns as told by the kernel interface.  Place in *cycleclock | 
|---|
| 106 | // the value of the cycleclock at about the time of the syscall. | 
|---|
| 107 | // This call represents the time base that this module synchronizes to. | 
|---|
| 108 | // Ensures that *cycleclock does not step back by up to (1 << 16) from | 
|---|
| 109 | // last_cycleclock, to discard small backward counter steps.  (Larger steps are | 
|---|
| 110 | // assumed to be complete resyncs, which shouldn't happen.  If they do, a full | 
|---|
| 111 | // reinitialization of the outer algorithm should occur.) | 
|---|
| 112 | static int64_t GetCurrentTimeNanosFromKernel(uint64_t last_cycleclock, | 
|---|
| 113 | uint64_t *cycleclock) { | 
|---|
| 114 | // We try to read clock values at about the same time as the kernel clock. | 
|---|
| 115 | // This value gets adjusted up or down as estimate of how long that should | 
|---|
| 116 | // take, so we can reject attempts that take unusually long. | 
|---|
| 117 | static std::atomic<uint64_t> approx_syscall_time_in_cycles{10 * 1000}; | 
|---|
| 118 |  | 
|---|
| 119 | uint64_t local_approx_syscall_time_in_cycles =  // local copy | 
|---|
| 120 | approx_syscall_time_in_cycles.load(std::memory_order_relaxed); | 
|---|
| 121 |  | 
|---|
| 122 | int64_t current_time_nanos_from_system; | 
|---|
| 123 | uint64_t before_cycles; | 
|---|
| 124 | uint64_t after_cycles; | 
|---|
| 125 | uint64_t elapsed_cycles; | 
|---|
| 126 | int loops = 0; | 
|---|
| 127 | do { | 
|---|
| 128 | before_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW(); | 
|---|
| 129 | current_time_nanos_from_system = GET_CURRENT_TIME_NANOS_FROM_SYSTEM(); | 
|---|
| 130 | after_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW(); | 
|---|
| 131 | // elapsed_cycles is unsigned, so is large on overflow | 
|---|
| 132 | elapsed_cycles = after_cycles - before_cycles; | 
|---|
| 133 | if (elapsed_cycles >= local_approx_syscall_time_in_cycles && | 
|---|
| 134 | ++loops == 20) {  // clock changed frequencies?  Back off. | 
|---|
| 135 | loops = 0; | 
|---|
| 136 | if (local_approx_syscall_time_in_cycles < 1000 * 1000) { | 
|---|
| 137 | local_approx_syscall_time_in_cycles = | 
|---|
| 138 | (local_approx_syscall_time_in_cycles + 1) << 1; | 
|---|
| 139 | } | 
|---|
| 140 | approx_syscall_time_in_cycles.store( | 
|---|
| 141 | local_approx_syscall_time_in_cycles, | 
|---|
| 142 | std::memory_order_relaxed); | 
|---|
| 143 | } | 
|---|
| 144 | } while (elapsed_cycles >= local_approx_syscall_time_in_cycles || | 
|---|
| 145 | last_cycleclock - after_cycles < (static_cast<uint64_t>(1) << 16)); | 
|---|
| 146 |  | 
|---|
| 147 | // Number of times in a row we've seen a kernel time call take substantially | 
|---|
| 148 | // less than approx_syscall_time_in_cycles. | 
|---|
| 149 | static std::atomic<uint32_t> seen_smaller{ 0 }; | 
|---|
| 150 |  | 
|---|
| 151 | // Adjust approx_syscall_time_in_cycles to be within a factor of 2 | 
|---|
| 152 | // of the typical time to execute one iteration of the loop above. | 
|---|
| 153 | if ((local_approx_syscall_time_in_cycles >> 1) < elapsed_cycles) { | 
|---|
| 154 | // measured time is no smaller than half current approximation | 
|---|
| 155 | seen_smaller.store(0, std::memory_order_relaxed); | 
|---|
| 156 | } else if (seen_smaller.fetch_add(1, std::memory_order_relaxed) >= 3) { | 
|---|
| 157 | // smaller delays several times in a row; reduce approximation by 12.5% | 
|---|
| 158 | const uint64_t new_approximation = | 
|---|
| 159 | local_approx_syscall_time_in_cycles - | 
|---|
| 160 | (local_approx_syscall_time_in_cycles >> 3); | 
|---|
| 161 | approx_syscall_time_in_cycles.store(new_approximation, | 
|---|
| 162 | std::memory_order_relaxed); | 
|---|
| 163 | seen_smaller.store(0, std::memory_order_relaxed); | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 | *cycleclock = after_cycles; | 
|---|
| 167 | return current_time_nanos_from_system; | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 |  | 
|---|
| 171 | // --------------------------------------------------------------------- | 
|---|
| 172 | // An implementation of reader-write locks that use no atomic ops in the read | 
|---|
| 173 | // case.  This is a generalization of Lamport's method for reading a multiword | 
|---|
| 174 | // clock.  Increment a word on each write acquisition, using the low-order bit | 
|---|
| 175 | // as a spinlock; the word is the high word of the "clock".  Readers read the | 
|---|
| 176 | // high word, then all other data, then the high word again, and repeat the | 
|---|
| 177 | // read if the reads of the high words yields different answers, or an odd | 
|---|
| 178 | // value (either case suggests possible interference from a writer). | 
|---|
| 179 | // Here we use a spinlock to ensure only one writer at a time, rather than | 
|---|
| 180 | // spinning on the bottom bit of the word to benefit from SpinLock | 
|---|
| 181 | // spin-delay tuning. | 
|---|
| 182 |  | 
|---|
| 183 | // Acquire seqlock (*seq) and return the value to be written to unlock. | 
|---|
| 184 | static inline uint64_t SeqAcquire(std::atomic<uint64_t> *seq) { | 
|---|
| 185 | uint64_t x = seq->fetch_add(1, std::memory_order_relaxed); | 
|---|
| 186 |  | 
|---|
| 187 | // We put a release fence between update to *seq and writes to shared data. | 
|---|
| 188 | // Thus all stores to shared data are effectively release operations and | 
|---|
| 189 | // update to *seq above cannot be re-ordered past any of them.  Note that | 
|---|
| 190 | // this barrier is not for the fetch_add above.  A release barrier for the | 
|---|
| 191 | // fetch_add would be before it, not after. | 
|---|
| 192 | std::atomic_thread_fence(std::memory_order_release); | 
|---|
| 193 |  | 
|---|
| 194 | return x + 2;   // original word plus 2 | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | // Release seqlock (*seq) by writing x to it---a value previously returned by | 
|---|
| 198 | // SeqAcquire. | 
|---|
| 199 | static inline void SeqRelease(std::atomic<uint64_t> *seq, uint64_t x) { | 
|---|
| 200 | // The unlock store to *seq must have release ordering so that all | 
|---|
| 201 | // updates to shared data must finish before this store. | 
|---|
| 202 | seq->store(x, std::memory_order_release);  // release lock for readers | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | // --------------------------------------------------------------------- | 
|---|
| 206 |  | 
|---|
| 207 | // "nsscaled" is unit of time equal to a (2**kScale)th of a nanosecond. | 
|---|
| 208 | enum { kScale = 30 }; | 
|---|
| 209 |  | 
|---|
| 210 | // The minimum interval between samples of the time base. | 
|---|
| 211 | // We pick enough time to amortize the cost of the sample, | 
|---|
| 212 | // to get a reasonably accurate cycle counter rate reading, | 
|---|
| 213 | // and not so much that calculations will overflow 64-bits. | 
|---|
| 214 | static const uint64_t kMinNSBetweenSamples = 2000 << 20; | 
|---|
| 215 |  | 
|---|
| 216 | // We require that kMinNSBetweenSamples shifted by kScale | 
|---|
| 217 | // have at least a bit left over for 64-bit calculations. | 
|---|
| 218 | static_assert(((kMinNSBetweenSamples << (kScale + 1)) >> (kScale + 1)) == | 
|---|
| 219 | kMinNSBetweenSamples, | 
|---|
| 220 | "cannot represent kMaxBetweenSamplesNSScaled"); | 
|---|
| 221 |  | 
|---|
| 222 | // A reader-writer lock protecting the static locations below. | 
|---|
| 223 | // See SeqAcquire() and SeqRelease() above. | 
|---|
| 224 | static absl::base_internal::SpinLock lock( | 
|---|
| 225 | absl::base_internal::kLinkerInitialized); | 
|---|
| 226 | static std::atomic<uint64_t> seq(0); | 
|---|
| 227 |  | 
|---|
| 228 | // data from a sample of the kernel's time value | 
|---|
| 229 | struct TimeSampleAtomic { | 
|---|
| 230 | std::atomic<uint64_t> raw_ns;              // raw kernel time | 
|---|
| 231 | std::atomic<uint64_t> base_ns;             // our estimate of time | 
|---|
| 232 | std::atomic<uint64_t> base_cycles;         // cycle counter reading | 
|---|
| 233 | std::atomic<uint64_t> nsscaled_per_cycle;  // cycle period | 
|---|
| 234 | // cycles before we'll sample again (a scaled reciprocal of the period, | 
|---|
| 235 | // to avoid a division on the fast path). | 
|---|
| 236 | std::atomic<uint64_t> min_cycles_per_sample; | 
|---|
| 237 | }; | 
|---|
| 238 | // Same again, but with non-atomic types | 
|---|
| 239 | struct TimeSample { | 
|---|
| 240 | uint64_t raw_ns;                 // raw kernel time | 
|---|
| 241 | uint64_t base_ns;                // our estimate of time | 
|---|
| 242 | uint64_t base_cycles;            // cycle counter reading | 
|---|
| 243 | uint64_t nsscaled_per_cycle;     // cycle period | 
|---|
| 244 | uint64_t min_cycles_per_sample;  // approx cycles before next sample | 
|---|
| 245 | }; | 
|---|
| 246 |  | 
|---|
| 247 | static struct TimeSampleAtomic last_sample;   // the last sample; under seq | 
|---|
| 248 |  | 
|---|
| 249 | static int64_t GetCurrentTimeNanosSlowPath() ABSL_ATTRIBUTE_COLD; | 
|---|
| 250 |  | 
|---|
| 251 | // Read the contents of *atomic into *sample. | 
|---|
| 252 | // Each field is read atomically, but to maintain atomicity between fields, | 
|---|
| 253 | // the access must be done under a lock. | 
|---|
| 254 | static void ReadTimeSampleAtomic(const struct TimeSampleAtomic *atomic, | 
|---|
| 255 | struct TimeSample *sample) { | 
|---|
| 256 | sample->base_ns = atomic->base_ns.load(std::memory_order_relaxed); | 
|---|
| 257 | sample->base_cycles = atomic->base_cycles.load(std::memory_order_relaxed); | 
|---|
| 258 | sample->nsscaled_per_cycle = | 
|---|
| 259 | atomic->nsscaled_per_cycle.load(std::memory_order_relaxed); | 
|---|
| 260 | sample->min_cycles_per_sample = | 
|---|
| 261 | atomic->min_cycles_per_sample.load(std::memory_order_relaxed); | 
|---|
| 262 | sample->raw_ns = atomic->raw_ns.load(std::memory_order_relaxed); | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | // Public routine. | 
|---|
| 266 | // Algorithm:  We wish to compute real time from a cycle counter.  In normal | 
|---|
| 267 | // operation, we construct a piecewise linear approximation to the kernel time | 
|---|
| 268 | // source, using the cycle counter value.  The start of each line segment is at | 
|---|
| 269 | // the same point as the end of the last, but may have a different slope (that | 
|---|
| 270 | // is, a different idea of the cycle counter frequency).  Every couple of | 
|---|
| 271 | // seconds, the kernel time source is sampled and compared with the current | 
|---|
| 272 | // approximation.  A new slope is chosen that, if followed for another couple | 
|---|
| 273 | // of seconds, will correct the error at the current position.  The information | 
|---|
| 274 | // for a sample is in the "last_sample" struct.  The linear approximation is | 
|---|
| 275 | //   estimated_time = last_sample.base_ns + | 
|---|
| 276 | //     last_sample.ns_per_cycle * (counter_reading - last_sample.base_cycles) | 
|---|
| 277 | // (ns_per_cycle is actually stored in different units and scaled, to avoid | 
|---|
| 278 | // overflow).  The base_ns of the next linear approximation is the | 
|---|
| 279 | // estimated_time using the last approximation; the base_cycles is the cycle | 
|---|
| 280 | // counter value at that time; the ns_per_cycle is the number of ns per cycle | 
|---|
| 281 | // measured since the last sample, but adjusted so that most of the difference | 
|---|
| 282 | // between the estimated_time and the kernel time will be corrected by the | 
|---|
| 283 | // estimated time to the next sample.  In normal operation, this algorithm | 
|---|
| 284 | // relies on: | 
|---|
| 285 | // - the cycle counter and kernel time rates not changing a lot in a few | 
|---|
| 286 | //   seconds. | 
|---|
| 287 | // - the client calling into the code often compared to a couple of seconds, so | 
|---|
| 288 | //   the time to the next correction can be estimated. | 
|---|
| 289 | // Any time ns_per_cycle is not known, a major error is detected, or the | 
|---|
| 290 | // assumption about frequent calls is violated, the implementation returns the | 
|---|
| 291 | // kernel time.  It records sufficient data that a linear approximation can | 
|---|
| 292 | // resume a little later. | 
|---|
| 293 |  | 
|---|
| 294 | int64_t GetCurrentTimeNanos() { | 
|---|
| 295 | // read the data from the "last_sample" struct (but don't need raw_ns yet) | 
|---|
| 296 | // The reads of "seq" and test of the values emulate a reader lock. | 
|---|
| 297 | uint64_t base_ns; | 
|---|
| 298 | uint64_t base_cycles; | 
|---|
| 299 | uint64_t nsscaled_per_cycle; | 
|---|
| 300 | uint64_t min_cycles_per_sample; | 
|---|
| 301 | uint64_t seq_read0; | 
|---|
| 302 | uint64_t seq_read1; | 
|---|
| 303 |  | 
|---|
| 304 | // If we have enough information to interpolate, the value returned will be | 
|---|
| 305 | // derived from this cycleclock-derived time estimate.  On some platforms | 
|---|
| 306 | // (POWER) the function to retrieve this value has enough complexity to | 
|---|
| 307 | // contribute to register pressure - reading it early before initializing | 
|---|
| 308 | // the other pieces of the calculation minimizes spill/restore instructions, | 
|---|
| 309 | // minimizing icache cost. | 
|---|
| 310 | uint64_t now_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW(); | 
|---|
| 311 |  | 
|---|
| 312 | // Acquire pairs with the barrier in SeqRelease - if this load sees that | 
|---|
| 313 | // store, the shared-data reads necessarily see that SeqRelease's updates | 
|---|
| 314 | // to the same shared data. | 
|---|
| 315 | seq_read0 = seq.load(std::memory_order_acquire); | 
|---|
| 316 |  | 
|---|
| 317 | base_ns = last_sample.base_ns.load(std::memory_order_relaxed); | 
|---|
| 318 | base_cycles = last_sample.base_cycles.load(std::memory_order_relaxed); | 
|---|
| 319 | nsscaled_per_cycle = | 
|---|
| 320 | last_sample.nsscaled_per_cycle.load(std::memory_order_relaxed); | 
|---|
| 321 | min_cycles_per_sample = | 
|---|
| 322 | last_sample.min_cycles_per_sample.load(std::memory_order_relaxed); | 
|---|
| 323 |  | 
|---|
| 324 | // This acquire fence pairs with the release fence in SeqAcquire.  Since it | 
|---|
| 325 | // is sequenced between reads of shared data and seq_read1, the reads of | 
|---|
| 326 | // shared data are effectively acquiring. | 
|---|
| 327 | std::atomic_thread_fence(std::memory_order_acquire); | 
|---|
| 328 |  | 
|---|
| 329 | // The shared-data reads are effectively acquire ordered, and the | 
|---|
| 330 | // shared-data writes are effectively release ordered. Therefore if our | 
|---|
| 331 | // shared-data reads see any of a particular update's shared-data writes, | 
|---|
| 332 | // seq_read1 is guaranteed to see that update's SeqAcquire. | 
|---|
| 333 | seq_read1 = seq.load(std::memory_order_relaxed); | 
|---|
| 334 |  | 
|---|
| 335 | // Fast path.  Return if min_cycles_per_sample has not yet elapsed since the | 
|---|
| 336 | // last sample, and we read a consistent sample.  The fast path activates | 
|---|
| 337 | // only when min_cycles_per_sample is non-zero, which happens when we get an | 
|---|
| 338 | // estimate for the cycle time.  The predicate will fail if now_cycles < | 
|---|
| 339 | // base_cycles, or if some other thread is in the slow path. | 
|---|
| 340 | // | 
|---|
| 341 | // Since we now read now_cycles before base_ns, it is possible for now_cycles | 
|---|
| 342 | // to be less than base_cycles (if we were interrupted between those loads and | 
|---|
| 343 | // last_sample was updated). This is harmless, because delta_cycles will wrap | 
|---|
| 344 | // and report a time much much bigger than min_cycles_per_sample. In that case | 
|---|
| 345 | // we will take the slow path. | 
|---|
| 346 | uint64_t delta_cycles = now_cycles - base_cycles; | 
|---|
| 347 | if (seq_read0 == seq_read1 && (seq_read0 & 1) == 0 && | 
|---|
| 348 | delta_cycles < min_cycles_per_sample) { | 
|---|
| 349 | return base_ns + ((delta_cycles * nsscaled_per_cycle) >> kScale); | 
|---|
| 350 | } | 
|---|
| 351 | return GetCurrentTimeNanosSlowPath(); | 
|---|
| 352 | } | 
|---|
| 353 |  | 
|---|
| 354 | // Return (a << kScale)/b. | 
|---|
| 355 | // Zero is returned if b==0.   Scaling is performed internally to | 
|---|
| 356 | // preserve precision without overflow. | 
|---|
| 357 | static uint64_t SafeDivideAndScale(uint64_t a, uint64_t b) { | 
|---|
| 358 | // Find maximum safe_shift so that | 
|---|
| 359 | //  0 <= safe_shift <= kScale  and  (a << safe_shift) does not overflow. | 
|---|
| 360 | int safe_shift = kScale; | 
|---|
| 361 | while (((a << safe_shift) >> safe_shift) != a) { | 
|---|
| 362 | safe_shift--; | 
|---|
| 363 | } | 
|---|
| 364 | uint64_t scaled_b = b >> (kScale - safe_shift); | 
|---|
| 365 | uint64_t quotient = 0; | 
|---|
| 366 | if (scaled_b != 0) { | 
|---|
| 367 | quotient = (a << safe_shift) / scaled_b; | 
|---|
| 368 | } | 
|---|
| 369 | return quotient; | 
|---|
| 370 | } | 
|---|
| 371 |  | 
|---|
| 372 | static uint64_t UpdateLastSample( | 
|---|
| 373 | uint64_t now_cycles, uint64_t now_ns, uint64_t delta_cycles, | 
|---|
| 374 | const struct TimeSample *sample) ABSL_ATTRIBUTE_COLD; | 
|---|
| 375 |  | 
|---|
| 376 | // The slow path of GetCurrentTimeNanos().  This is taken while gathering | 
|---|
| 377 | // initial samples, when enough time has elapsed since the last sample, and if | 
|---|
| 378 | // any other thread is writing to last_sample. | 
|---|
| 379 | // | 
|---|
| 380 | // Manually mark this 'noinline' to minimize stack frame size of the fast | 
|---|
| 381 | // path.  Without this, sometimes a compiler may inline this big block of code | 
|---|
| 382 | // into the fast path.  That causes lots of register spills and reloads that | 
|---|
| 383 | // are unnecessary unless the slow path is taken. | 
|---|
| 384 | // | 
|---|
| 385 | // TODO(absl-team): Remove this attribute when our compiler is smart enough | 
|---|
| 386 | // to do the right thing. | 
|---|
| 387 | ABSL_ATTRIBUTE_NOINLINE | 
|---|
| 388 | static int64_t GetCurrentTimeNanosSlowPath() LOCKS_EXCLUDED(lock) { | 
|---|
| 389 | // Serialize access to slow-path.  Fast-path readers are not blocked yet, and | 
|---|
| 390 | // code below must not modify last_sample until the seqlock is acquired. | 
|---|
| 391 | lock.Lock(); | 
|---|
| 392 |  | 
|---|
| 393 | // Sample the kernel time base.  This is the definition of | 
|---|
| 394 | // "now" if we take the slow path. | 
|---|
| 395 | static uint64_t last_now_cycles;  // protected by lock | 
|---|
| 396 | uint64_t now_cycles; | 
|---|
| 397 | uint64_t now_ns = GetCurrentTimeNanosFromKernel(last_now_cycles, &now_cycles); | 
|---|
| 398 | last_now_cycles = now_cycles; | 
|---|
| 399 |  | 
|---|
| 400 | uint64_t estimated_base_ns; | 
|---|
| 401 |  | 
|---|
| 402 | // ---------- | 
|---|
| 403 | // Read the "last_sample" values again; this time holding the write lock. | 
|---|
| 404 | struct TimeSample sample; | 
|---|
| 405 | ReadTimeSampleAtomic(&last_sample, &sample); | 
|---|
| 406 |  | 
|---|
| 407 | // ---------- | 
|---|
| 408 | // Try running the fast path again; another thread may have updated the | 
|---|
| 409 | // sample between our run of the fast path and the sample we just read. | 
|---|
| 410 | uint64_t delta_cycles = now_cycles - sample.base_cycles; | 
|---|
| 411 | if (delta_cycles < sample.min_cycles_per_sample) { | 
|---|
| 412 | // Another thread updated the sample.  This path does not take the seqlock | 
|---|
| 413 | // so that blocked readers can make progress without blocking new readers. | 
|---|
| 414 | estimated_base_ns = sample.base_ns + | 
|---|
| 415 | ((delta_cycles * sample.nsscaled_per_cycle) >> kScale); | 
|---|
| 416 | stats_fast_slow_paths++; | 
|---|
| 417 | } else { | 
|---|
| 418 | estimated_base_ns = | 
|---|
| 419 | UpdateLastSample(now_cycles, now_ns, delta_cycles, &sample); | 
|---|
| 420 | } | 
|---|
| 421 |  | 
|---|
| 422 | lock.Unlock(); | 
|---|
| 423 |  | 
|---|
| 424 | return estimated_base_ns; | 
|---|
| 425 | } | 
|---|
| 426 |  | 
|---|
| 427 | // Main part of the algorithm.  Locks out readers, updates the approximation | 
|---|
| 428 | // using the new sample from the kernel, and stores the result in last_sample | 
|---|
| 429 | // for readers.  Returns the new estimated time. | 
|---|
| 430 | static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns, | 
|---|
| 431 | uint64_t delta_cycles, | 
|---|
| 432 | const struct TimeSample *sample) | 
|---|
| 433 | EXCLUSIVE_LOCKS_REQUIRED(lock) { | 
|---|
| 434 | uint64_t estimated_base_ns = now_ns; | 
|---|
| 435 | uint64_t lock_value = SeqAcquire(&seq);  // acquire seqlock to block readers | 
|---|
| 436 |  | 
|---|
| 437 | // The 5s in the next if-statement limits the time for which we will trust | 
|---|
| 438 | // the cycle counter and our last sample to give a reasonable result. | 
|---|
| 439 | // Errors in the rate of the source clock can be multiplied by the ratio | 
|---|
| 440 | // between this limit and kMinNSBetweenSamples. | 
|---|
| 441 | if (sample->raw_ns == 0 ||  // no recent sample, or clock went backwards | 
|---|
| 442 | sample->raw_ns + static_cast<uint64_t>(5) * 1000 * 1000 * 1000 < now_ns || | 
|---|
| 443 | now_ns < sample->raw_ns || now_cycles < sample->base_cycles) { | 
|---|
| 444 | // record this sample, and forget any previously known slope. | 
|---|
| 445 | last_sample.raw_ns.store(now_ns, std::memory_order_relaxed); | 
|---|
| 446 | last_sample.base_ns.store(estimated_base_ns, std::memory_order_relaxed); | 
|---|
| 447 | last_sample.base_cycles.store(now_cycles, std::memory_order_relaxed); | 
|---|
| 448 | last_sample.nsscaled_per_cycle.store(0, std::memory_order_relaxed); | 
|---|
| 449 | last_sample.min_cycles_per_sample.store(0, std::memory_order_relaxed); | 
|---|
| 450 | stats_initializations++; | 
|---|
| 451 | } else if (sample->raw_ns + 500 * 1000 * 1000 < now_ns && | 
|---|
| 452 | sample->base_cycles + 100 < now_cycles) { | 
|---|
| 453 | // Enough time has passed to compute the cycle time. | 
|---|
| 454 | if (sample->nsscaled_per_cycle != 0) {  // Have a cycle time estimate. | 
|---|
| 455 | // Compute time from counter reading, but avoiding overflow | 
|---|
| 456 | // delta_cycles may be larger than on the fast path. | 
|---|
| 457 | uint64_t estimated_scaled_ns; | 
|---|
| 458 | int s = -1; | 
|---|
| 459 | do { | 
|---|
| 460 | s++; | 
|---|
| 461 | estimated_scaled_ns = (delta_cycles >> s) * sample->nsscaled_per_cycle; | 
|---|
| 462 | } while (estimated_scaled_ns / sample->nsscaled_per_cycle != | 
|---|
| 463 | (delta_cycles >> s)); | 
|---|
| 464 | estimated_base_ns = sample->base_ns + | 
|---|
| 465 | (estimated_scaled_ns >> (kScale - s)); | 
|---|
| 466 | } | 
|---|
| 467 |  | 
|---|
| 468 | // Compute the assumed cycle time kMinNSBetweenSamples ns into the future | 
|---|
| 469 | // assuming the cycle counter rate stays the same as the last interval. | 
|---|
| 470 | uint64_t ns = now_ns - sample->raw_ns; | 
|---|
| 471 | uint64_t measured_nsscaled_per_cycle = SafeDivideAndScale(ns, delta_cycles); | 
|---|
| 472 |  | 
|---|
| 473 | uint64_t assumed_next_sample_delta_cycles = | 
|---|
| 474 | SafeDivideAndScale(kMinNSBetweenSamples, measured_nsscaled_per_cycle); | 
|---|
| 475 |  | 
|---|
| 476 | int64_t diff_ns = now_ns - estimated_base_ns;  // estimate low by this much | 
|---|
| 477 |  | 
|---|
| 478 | // We want to set nsscaled_per_cycle so that our estimate of the ns time | 
|---|
| 479 | // at the assumed cycle time is the assumed ns time. | 
|---|
| 480 | // That is, we want to set nsscaled_per_cycle so: | 
|---|
| 481 | //  kMinNSBetweenSamples + diff_ns  == | 
|---|
| 482 | //  (assumed_next_sample_delta_cycles * nsscaled_per_cycle) >> kScale | 
|---|
| 483 | // But we wish to damp oscillations, so instead correct only most | 
|---|
| 484 | // of our current error, by solving: | 
|---|
| 485 | //  kMinNSBetweenSamples + diff_ns - (diff_ns / 16) == | 
|---|
| 486 | //  (assumed_next_sample_delta_cycles * nsscaled_per_cycle) >> kScale | 
|---|
| 487 | ns = kMinNSBetweenSamples + diff_ns - (diff_ns / 16); | 
|---|
| 488 | uint64_t new_nsscaled_per_cycle = | 
|---|
| 489 | SafeDivideAndScale(ns, assumed_next_sample_delta_cycles); | 
|---|
| 490 | if (new_nsscaled_per_cycle != 0 && | 
|---|
| 491 | diff_ns < 100 * 1000 * 1000 && -diff_ns < 100 * 1000 * 1000) { | 
|---|
| 492 | // record the cycle time measurement | 
|---|
| 493 | last_sample.nsscaled_per_cycle.store( | 
|---|
| 494 | new_nsscaled_per_cycle, std::memory_order_relaxed); | 
|---|
| 495 | uint64_t new_min_cycles_per_sample = | 
|---|
| 496 | SafeDivideAndScale(kMinNSBetweenSamples, new_nsscaled_per_cycle); | 
|---|
| 497 | last_sample.min_cycles_per_sample.store( | 
|---|
| 498 | new_min_cycles_per_sample, std::memory_order_relaxed); | 
|---|
| 499 | stats_calibrations++; | 
|---|
| 500 | } else {  // something went wrong; forget the slope | 
|---|
| 501 | last_sample.nsscaled_per_cycle.store(0, std::memory_order_relaxed); | 
|---|
| 502 | last_sample.min_cycles_per_sample.store(0, std::memory_order_relaxed); | 
|---|
| 503 | estimated_base_ns = now_ns; | 
|---|
| 504 | stats_reinitializations++; | 
|---|
| 505 | } | 
|---|
| 506 | last_sample.raw_ns.store(now_ns, std::memory_order_relaxed); | 
|---|
| 507 | last_sample.base_ns.store(estimated_base_ns, std::memory_order_relaxed); | 
|---|
| 508 | last_sample.base_cycles.store(now_cycles, std::memory_order_relaxed); | 
|---|
| 509 | } else { | 
|---|
| 510 | // have a sample, but no slope; waiting for enough time for a calibration | 
|---|
| 511 | stats_slow_paths++; | 
|---|
| 512 | } | 
|---|
| 513 |  | 
|---|
| 514 | SeqRelease(&seq, lock_value);  // release the readers | 
|---|
| 515 |  | 
|---|
| 516 | return estimated_base_ns; | 
|---|
| 517 | } | 
|---|
| 518 | }  // namespace absl | 
|---|
| 519 | #endif  // ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS | 
|---|
| 520 |  | 
|---|
| 521 | namespace absl { | 
|---|
| 522 | namespace { | 
|---|
| 523 |  | 
|---|
| 524 | // Returns the maximum duration that SleepOnce() can sleep for. | 
|---|
| 525 | constexpr absl::Duration MaxSleep() { | 
|---|
| 526 | #ifdef _WIN32 | 
|---|
| 527 | // Windows Sleep() takes unsigned long argument in milliseconds. | 
|---|
| 528 | return absl::Milliseconds( | 
|---|
| 529 | std::numeric_limits<unsigned long>::max());  // NOLINT(runtime/int) | 
|---|
| 530 | #else | 
|---|
| 531 | return absl::Seconds(std::numeric_limits<time_t>::max()); | 
|---|
| 532 | #endif | 
|---|
| 533 | } | 
|---|
| 534 |  | 
|---|
| 535 | // Sleeps for the given duration. | 
|---|
| 536 | // REQUIRES: to_sleep <= MaxSleep(). | 
|---|
| 537 | void SleepOnce(absl::Duration to_sleep) { | 
|---|
| 538 | #ifdef _WIN32 | 
|---|
| 539 | Sleep(to_sleep / absl::Milliseconds(1)); | 
|---|
| 540 | #else | 
|---|
| 541 | struct timespec sleep_time = absl::ToTimespec(to_sleep); | 
|---|
| 542 | while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) { | 
|---|
| 543 | // Ignore signals and wait for the full interval to elapse. | 
|---|
| 544 | } | 
|---|
| 545 | #endif | 
|---|
| 546 | } | 
|---|
| 547 |  | 
|---|
| 548 | }  // namespace | 
|---|
| 549 | }  // namespace absl | 
|---|
| 550 |  | 
|---|
| 551 | extern "C"{ | 
|---|
| 552 |  | 
|---|
| 553 | ABSL_ATTRIBUTE_WEAK void AbslInternalSleepFor(absl::Duration duration) { | 
|---|
| 554 | while (duration > absl::ZeroDuration()) { | 
|---|
| 555 | absl::Duration to_sleep = std::min(duration, absl::MaxSleep()); | 
|---|
| 556 | absl::SleepOnce(to_sleep); | 
|---|
| 557 | duration -= to_sleep; | 
|---|
| 558 | } | 
|---|
| 559 | } | 
|---|
| 560 |  | 
|---|
| 561 | }  // extern "C" | 
|---|
| 562 |  | 
|---|