| 1 | /* |
| 2 | * hlc.c |
| 3 | * |
| 4 | * Copyright (C) 2008-2016 Aerospike, Inc. |
| 5 | * |
| 6 | * Portions may be licensed to Aerospike, Inc. under one or more contributor |
| 7 | * license agreements. |
| 8 | * |
| 9 | * This program is free software: you can redistribute it and/or modify it under |
| 10 | * the terms of the GNU Affero General Public License as published by the Free |
| 11 | * Software Foundation, either version 3 of the License, or (at your option) any |
| 12 | * later version. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 16 | * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
| 17 | * details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU Affero General Public License |
| 20 | * along with this program. If not, see http://www.gnu.org/licenses/ |
| 21 | */ |
| 22 | |
| 23 | #include "fabric/hlc.h" |
| 24 | |
| 25 | #include <math.h> |
| 26 | #include <sys/param.h> // For MAX() and MIN(). |
| 27 | |
| 28 | #include "citrusleaf/cf_clock.h" |
| 29 | #include "citrusleaf/cf_atomic.h" |
| 30 | |
| 31 | #include "fault.h" |
| 32 | |
| 33 | #include "base/cfg.h" |
| 34 | |
| 35 | /* |
| 36 | * Overview |
| 37 | * ======== |
| 38 | * Hybrid logical clock as described in |
| 39 | * "Logical Physical Clocks and Consistent Snapshots in Globally Distributed |
| 40 | * Databases" available at http://www.cse.buffalo.edu/tech-reports/2014-04.pdf. |
| 41 | * |
| 42 | * Relies on a global 64 bit variable that has the logical time. |
| 43 | * The 48 MSBs include the physical component of the timestamp and the least |
| 44 | * significant 16 bits include the logical component. 48 bits for milliseconds |
| 45 | * since epoch gives us (8925 - years elapsed since epoch today) years before |
| 46 | * wrap around. |
| 47 | * |
| 48 | * The notion of HLC is to bound the skew between the logical clock and phsycial |
| 49 | * clock. This requires rejecting updates to the clock from nodes with large |
| 50 | * clock skews. We DO NOT do that yet and print a warning instead. The current |
| 51 | * envisioned usage is a global monotonically increasing timestamp. Should be |
| 52 | * fixed if we are to use it as a surrogate for wall clock. |
| 53 | * |
| 54 | * Guarantees |
| 55 | * ========== |
| 56 | * 1. Monotonically increasing. (Wraps around after ~8900 years). Service |
| 57 | * restarts might break the monotonicity, however the new clock will leapfrog |
| 58 | * the hlc value before the restart eventually. |
| 59 | * 2. as_hlc_timestamp_update call after every message receipt will ensure the |
| 60 | * message (send hlc ts) < (message receive hlc ts). |
| 61 | * 3. A fixed local timestamp will eventually be marked as happened before a |
| 62 | * remote message. This is an important requirement. For example, in paxos the |
| 63 | * local cluster change timestamp should have happened before some incoming |
| 64 | * heartbeat. The ordering system should not always return a |
| 65 | * AS_HLC_ORDER_INDETERMINATE for a fixed local timestamp and a new message |
| 66 | * received. |
| 67 | * |
| 68 | * Not guaranteed (requires hlc persistence across service restarts) |
| 69 | * ============== |
| 70 | * 1. On service restart the HLC clock will not start where it left off, however |
| 71 | * it will eventually leapfrog the older value. Fixing this requires persistence |
| 72 | * which is not implemented. eventually leapfrogging is alright for all current |
| 73 | * requirements. |
| 74 | * 2. If a as_hlc_msg_timestamp is persisted and compared with a current running |
| 75 | * value, the result may not be correct. |
| 76 | * |
| 77 | * |
| 78 | * Requirements |
| 79 | * ============ |
| 80 | * Subsystems that reply on hlc should have their network messages timestamped |
| 81 | * with hlc timestamps and should invoke the as_hlc_timestamp_update on receipt |
| 82 | * of every message. This will ensure the hlc are in sync across the cluster and |
| 83 | * (send hlc ts) < (message receive hlc ts). |
| 84 | */ |
| 85 | |
| 86 | /** |
| 87 | * Global timestamp with current hlc value. |
| 88 | */ |
| 89 | static as_hlc_timestamp g_now; |
| 90 | |
| 91 | /** |
| 92 | * Previous value of the physical component. |
| 93 | */ |
| 94 | static cf_atomic64 g_prev_physical_component; |
| 95 | |
| 96 | /** |
| 97 | * Previous value of the wall clock, when the physical component changed. |
| 98 | */ |
| 99 | static cf_atomic64 g_prev_wall_clock; |
| 100 | |
| 101 | /* |
| 102 | * ---------------------------------------------------------------------------- |
| 103 | * Globals. |
| 104 | * ---------------------------------------------------------------------------- |
| 105 | */ |
| 106 | /** |
| 107 | * Mask for the physical component of a hlc timestamp. |
| 108 | */ |
| 109 | #define PHYSICAL_TS_MASK 0xffffffffffff0000 |
| 110 | |
| 111 | /** |
| 112 | * Mask for logical component of a hls timestamp. |
| 113 | */ |
| 114 | #define LOGICAL_TS_MASK 0x000000000000ffff |
| 115 | |
| 116 | /** |
| 117 | * Inform when HLC jumps by more than this amount. |
| 118 | */ |
| 119 | #define HLC_JUMP_WARN 5000 |
| 120 | |
| 121 | /** |
| 122 | * Logging macros. |
| 123 | */ |
| 124 | #define CRASH(format, ...) cf_crash(AS_HLC, format, ##__VA_ARGS__) |
| 125 | #define WARNING(format, ...) cf_warning(AS_HLC, format, ##__VA_ARGS__) |
| 126 | #define INFO(format, ...) cf_info(AS_HLC, format, ##__VA_ARGS__) |
| 127 | #define DEBUG(format, ...) cf_debug(AS_HLC, format, ##__VA_ARGS__) |
| 128 | #define DETAIL(format, ...) cf_detail(AS_HLC, format, ##__VA_ARGS__) |
| 129 | #define ASSERT(expression, message, ...) \ |
| 130 | if (!(expression)) {WARNING(message, __VA_ARGS__);} |
| 131 | |
| 132 | /* |
| 133 | * ---------------------------------------------------------------------------- |
| 134 | * Forward declarations. |
| 135 | * ---------------------------------------------------------------------------- |
| 136 | */ |
| 137 | static cf_clock |
| 138 | hlc_wall_clock_get(); |
| 139 | static as_hlc_timestamp |
| 140 | hlc_ts_get(); |
| 141 | static bool |
| 142 | hlc_ts_set(as_hlc_timestamp old_value, as_hlc_timestamp new_value, |
| 143 | cf_node source); |
| 144 | static cf_clock |
| 145 | hlc_physical_ts_get(as_hlc_timestamp hlc_ts); |
| 146 | static uint16_t |
| 147 | hlc_logical_ts_get(as_hlc_timestamp hlc_ts); |
| 148 | static void |
| 149 | hlc_physical_ts_set(as_hlc_timestamp* hlc_ts, cf_clock physical_ts); |
| 150 | static void |
| 151 | hlc_physical_ts_on_set(cf_clock physical_ts, cf_clock wall_clock_now); |
| 152 | static void |
| 153 | hlc_logical_ts_set(as_hlc_timestamp* hlc_ts, uint16_t logical_ts); |
| 154 | static void |
| 155 | hlc_logical_ts_incr(uint16_t* logical_ts, cf_clock* physical_ts, |
| 156 | cf_clock wall_clock_now); |
| 157 | |
| 158 | /* |
| 159 | * ---------------------------------------------------------------------------- |
| 160 | * Public API. |
| 161 | * ---------------------------------------------------------------------------- |
| 162 | */ |
| 163 | /** |
| 164 | * Initialize hybrid logical clock. |
| 165 | */ |
| 166 | void |
| 167 | as_hlc_init() |
| 168 | { |
| 169 | g_now = 0; |
| 170 | g_prev_physical_component = 0; |
| 171 | g_prev_wall_clock = 0; |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * Return the physical component of a hlc timstamp |
| 176 | * @param hlc_ts the hybrid logical clock timestamp. |
| 177 | */ |
| 178 | cf_clock |
| 179 | as_hlc_physical_ts_get(as_hlc_timestamp hlc_ts) |
| 180 | { |
| 181 | return hlc_physical_ts_get(hlc_ts); |
| 182 | } |
| 183 | |
| 184 | /** |
| 185 | * Return a hlc timestamp representing the hlc time "now". The notion is to make |
| 186 | * the minimum increment to the hlc timestamp necessary. |
| 187 | */ |
| 188 | as_hlc_timestamp |
| 189 | as_hlc_timestamp_now() |
| 190 | { |
| 191 | // Keep trying till an atomic operation succeeds. Looks like a tight loop |
| 192 | // but even with reasonable contention should not take more then a few |
| 193 | // iterations to succeed. |
| 194 | while (true) { |
| 195 | as_hlc_timestamp current_hlc_ts = hlc_ts_get(); |
| 196 | |
| 197 | // Initialize the new physical and logical values to current values. |
| 198 | cf_clock new_hlc_physical_ts = hlc_physical_ts_get(current_hlc_ts); |
| 199 | uint16_t new_hlc_logical_ts = hlc_logical_ts_get(current_hlc_ts); |
| 200 | |
| 201 | cf_clock wall_clock_physical_ts = hlc_wall_clock_get(); |
| 202 | |
| 203 | if (new_hlc_physical_ts >= wall_clock_physical_ts) { |
| 204 | // The HLC physical component is greater than the physical wall |
| 205 | // time. Advance the logical timestamp. |
| 206 | hlc_logical_ts_incr(&new_hlc_logical_ts, &new_hlc_physical_ts, |
| 207 | wall_clock_physical_ts); |
| 208 | } |
| 209 | else { |
| 210 | // The wall clock is greater, use this as the physical component and |
| 211 | // reset the logical timestamp. |
| 212 | new_hlc_physical_ts = wall_clock_physical_ts; |
| 213 | new_hlc_logical_ts = 0; |
| 214 | } |
| 215 | |
| 216 | as_hlc_timestamp new_hlc_ts = 0; |
| 217 | |
| 218 | hlc_physical_ts_set(&new_hlc_ts, new_hlc_physical_ts); |
| 219 | hlc_logical_ts_set(&new_hlc_ts, new_hlc_logical_ts); |
| 220 | |
| 221 | if (hlc_ts_set(current_hlc_ts, new_hlc_ts, g_config.self_node)) { |
| 222 | hlc_physical_ts_on_set(new_hlc_physical_ts, wall_clock_physical_ts); |
| 223 | DETAIL("changed HLC value from %" PRIu64 " to %" PRIu64, |
| 224 | current_hlc_ts, new_hlc_ts); |
| 225 | return new_hlc_ts; |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | /** |
| 231 | * Update the HLC on receipt of a remote message. The notion is to adjust this |
| 232 | * node's hlc to ensure the receive hlc ts > the send hlc ts. |
| 233 | * |
| 234 | * @param source for debugging and tracking only. |
| 235 | * @param send_timestamp the hlc timestamp when this message was sent. |
| 236 | * @param recv_timestamp (output) the message receive timestamp which will be |
| 237 | * populated. Can be NULL in which case it will be ignored. |
| 238 | */ |
| 239 | void |
| 240 | as_hlc_timestamp_update(cf_node source, as_hlc_timestamp send_ts, |
| 241 | as_hlc_msg_timestamp* msg_ts) |
| 242 | { |
| 243 | cf_clock send_ts_physical_ts = hlc_physical_ts_get(send_ts); |
| 244 | uint16_t send_ts_logical_ts = hlc_logical_ts_get(send_ts); |
| 245 | |
| 246 | // Keep trying till an atomic operation succeeds. Looks like a tight loop |
| 247 | // but even with reasonable contention should not take more then a few |
| 248 | // iterations to succeed. |
| 249 | while (true) { |
| 250 | as_hlc_timestamp current_hlc_ts = hlc_ts_get(); |
| 251 | |
| 252 | cf_clock current_hlc_physical_ts = hlc_physical_ts_get(current_hlc_ts); |
| 253 | uint16_t current_hlc_logical_ts = hlc_logical_ts_get(current_hlc_ts); |
| 254 | |
| 255 | cf_clock wall_clock_physical_ts = hlc_wall_clock_get(); |
| 256 | |
| 257 | cf_clock new_hlc_physical_ts = MAX( |
| 258 | MAX(current_hlc_physical_ts, send_ts_physical_ts), |
| 259 | wall_clock_physical_ts); |
| 260 | uint16_t new_hlc_logical_ts = 0; |
| 261 | |
| 262 | if (new_hlc_physical_ts == current_hlc_physical_ts |
| 263 | && new_hlc_physical_ts == send_ts_physical_ts) { |
| 264 | // There is no change in the physical components of peer and local |
| 265 | // hlc clocks. Set logical component to max of the two values and |
| 266 | // increment. |
| 267 | new_hlc_logical_ts = MAX(current_hlc_logical_ts, |
| 268 | send_ts_logical_ts); |
| 269 | hlc_logical_ts_incr(&new_hlc_logical_ts, &new_hlc_physical_ts, |
| 270 | wall_clock_physical_ts); |
| 271 | } |
| 272 | else if (new_hlc_physical_ts == current_hlc_physical_ts) { |
| 273 | // The physical component of the send timestamp is smaller than our |
| 274 | // current physical component. We just need to increment the logical |
| 275 | // component. |
| 276 | new_hlc_logical_ts = current_hlc_ts; |
| 277 | hlc_logical_ts_incr(&new_hlc_logical_ts, &new_hlc_physical_ts, |
| 278 | wall_clock_physical_ts); |
| 279 | } |
| 280 | else if (new_hlc_physical_ts == send_ts_physical_ts) { |
| 281 | // Current physical component is lesser than the incoming physical |
| 282 | // component. We need to ensure that the updated logical component |
| 283 | // is greater than the send logical component. |
| 284 | new_hlc_logical_ts = send_ts_logical_ts; |
| 285 | hlc_logical_ts_incr(&new_hlc_logical_ts, &new_hlc_physical_ts, |
| 286 | wall_clock_physical_ts); |
| 287 | } |
| 288 | else { |
| 289 | // Our physical clock is greater than current physical component and |
| 290 | // the send physical component. We can reset the logical clock to |
| 291 | // zero and still maintain the send and receive ordering. |
| 292 | new_hlc_logical_ts = 0; |
| 293 | } |
| 294 | |
| 295 | as_hlc_timestamp new_hlc_ts = 0; |
| 296 | |
| 297 | hlc_physical_ts_set(&new_hlc_ts, new_hlc_physical_ts); |
| 298 | hlc_logical_ts_set(&new_hlc_ts, new_hlc_logical_ts); |
| 299 | |
| 300 | if (hlc_ts_set(current_hlc_ts, new_hlc_ts, source)) { |
| 301 | hlc_physical_ts_on_set(new_hlc_physical_ts, wall_clock_physical_ts); |
| 302 | DETAIL("message received from node %" PRIx64 " with HLC %" PRIu64 " - changed HLC value from %" PRIu64 " to %" PRIu64, |
| 303 | source, send_ts, current_hlc_ts, new_hlc_ts); |
| 304 | if (msg_ts) { |
| 305 | msg_ts->send_ts = send_ts; |
| 306 | msg_ts->recv_ts = new_hlc_ts; |
| 307 | } |
| 308 | return; |
| 309 | } |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | /** |
| 314 | * Return the difference in milliseconds between two hlc timestamps. Note this |
| 315 | * difference may be greater than or equal to (but never less than) |
| 316 | * the physical wall call difference, because HLC can have non linear jumps, |
| 317 | * whenever the clock is adjusted. The difference should be used as an estimate |
| 318 | * rather than an absolute difference. |
| 319 | * For e.g. use the difference to check that the real time difference is most |
| 320 | * some number of milliseconds. However do not use this for interval statistics |
| 321 | * or to check if the difference in time is at least some number of |
| 322 | * milliseconds. |
| 323 | * |
| 324 | * @param ts1 the first timestamp. |
| 325 | * @param ts2 the seconds timestamp. |
| 326 | * @return ts1 - ts2 in milliseconds. if ts1 < ts2 the result is negative, |
| 327 | * else it is positive or zero. |
| 328 | */ |
| 329 | int64_t |
| 330 | as_hlc_timestamp_diff_ms(as_hlc_timestamp ts1, as_hlc_timestamp ts2) |
| 331 | { |
| 332 | int64_t diff = 0; |
| 333 | if (ts1 >= ts2) { |
| 334 | diff = hlc_physical_ts_get(ts1) - hlc_physical_ts_get(ts2); |
| 335 | } |
| 336 | else { |
| 337 | diff = -(hlc_physical_ts_get(ts2) - hlc_physical_ts_get(ts1)); |
| 338 | } |
| 339 | |
| 340 | return diff; |
| 341 | } |
| 342 | |
| 343 | /** |
| 344 | * Orders a local timestamp and remote message send timestamp. |
| 345 | * |
| 346 | * @param local_ts the local timestamp. |
| 347 | * @param msg_ts message receive timestamp containing the remote send and the |
| 348 | * local receive timestamp. |
| 349 | * @return the order between the local and the message timestamp. |
| 350 | */ |
| 351 | as_hlc_timestamp_order |
| 352 | as_hlc_send_timestamp_order(as_hlc_timestamp local_ts, |
| 353 | as_hlc_msg_timestamp* msg_ts) |
| 354 | { |
| 355 | if (local_ts > msg_ts->recv_ts) { |
| 356 | // The local event happened after the local message received timestamp |
| 357 | // and therefore after the remote send as well. |
| 358 | return AS_HLC_HAPPENS_AFTER; |
| 359 | } |
| 360 | |
| 361 | // Compute the unceratinty window around the local receive timestamp. |
| 362 | uint64_t offset = abs(msg_ts->send_ts - msg_ts->recv_ts); |
| 363 | |
| 364 | if (local_ts > (msg_ts->recv_ts - offset)) { |
| 365 | // Local timestamp is in the uncertainty window. We cannot tell the |
| 366 | // order. |
| 367 | return AS_HLC_ORDER_INDETERMINATE; |
| 368 | } |
| 369 | |
| 370 | cf_clock local_physical_ts = hlc_physical_ts_get(local_ts); |
| 371 | cf_clock recv_physical_ts = hlc_physical_ts_get(msg_ts->recv_ts); |
| 372 | |
| 373 | if ((recv_physical_ts - local_physical_ts) |
| 374 | < g_config.fabric_latency_max_ms) { |
| 375 | // Consider the max network delay worth of time to also be part of the |
| 376 | // uncertainty window. |
| 377 | return AS_HLC_ORDER_INDETERMINATE; |
| 378 | } |
| 379 | |
| 380 | return AS_HLC_HAPPENS_BEFORE; |
| 381 | } |
| 382 | |
| 383 | /** |
| 384 | * Orders two timestamp generated by the same node / process. |
| 385 | * |
| 386 | * @param ts1 the first timestamp. |
| 387 | * @param ts2 the second timestamp. |
| 388 | * @return AS_HLC_HAPPENS_BEFORE if ts1 happens before ts2 else |
| 389 | * AS_HLC_HAPPENS_AFTER if ts1 happens after ts2 else |
| 390 | * AS_HLC_ORDER_INDETERMINATE. |
| 391 | */ |
| 392 | as_hlc_timestamp_order |
| 393 | as_hlc_timestamp_order_get(as_hlc_timestamp ts1, as_hlc_timestamp ts2) |
| 394 | { |
| 395 | if (ts1 < ts2) { |
| 396 | return AS_HLC_HAPPENS_BEFORE; |
| 397 | } |
| 398 | else if (ts1 > ts2) { |
| 399 | return AS_HLC_HAPPENS_AFTER; |
| 400 | } |
| 401 | |
| 402 | return AS_HLC_ORDER_INDETERMINATE; |
| 403 | } |
| 404 | |
| 405 | /** |
| 406 | * Subtract milliseconds worth of time from the timestamp. |
| 407 | * @param timestamp the input timestamp. |
| 408 | * @param ms the number of milliseconds to subtract. |
| 409 | */ |
| 410 | as_hlc_timestamp |
| 411 | as_hlc_timestamp_subtract_ms(as_hlc_timestamp timestamp, int ms) |
| 412 | { |
| 413 | cf_clock physical_ts = hlc_physical_ts_get(timestamp); |
| 414 | uint16_t logical_ts = hlc_logical_ts_get(timestamp); |
| 415 | physical_ts -= ms; |
| 416 | as_hlc_timestamp new_hlc_ts = 0; |
| 417 | |
| 418 | hlc_physical_ts_set(&new_hlc_ts, physical_ts); |
| 419 | hlc_logical_ts_set(&new_hlc_ts, logical_ts); |
| 420 | return new_hlc_ts; |
| 421 | } |
| 422 | |
| 423 | /** |
| 424 | * Dump some debugging information to the logs. |
| 425 | */ |
| 426 | void |
| 427 | as_hlc_dump(bool verbose) |
| 428 | { |
| 429 | as_hlc_timestamp now = as_hlc_timestamp_now(); |
| 430 | cf_clock current_hlc_physical_ts = hlc_physical_ts_get(now); |
| 431 | uint16_t current_hlc_logical_ts = hlc_logical_ts_get(now); |
| 432 | |
| 433 | INFO("HLC Ts:%" PRIu64 " HLC Physical Ts:%" PRIu64 " HLC Logical Ts:%d Wall Clock:%" PRIu64, |
| 434 | now, current_hlc_physical_ts, current_hlc_logical_ts, |
| 435 | hlc_wall_clock_get()); |
| 436 | } |
| 437 | |
| 438 | /* |
| 439 | * ---------------------------------------------------------------------------- |
| 440 | * Private functions. |
| 441 | * ---------------------------------------------------------------------------- |
| 442 | */ |
| 443 | |
| 444 | /** |
| 445 | * Return this node's wall clock. |
| 446 | */ |
| 447 | static cf_clock |
| 448 | hlc_wall_clock_get() |
| 449 | { |
| 450 | // Unix timestamps will be 48 bits for a reasonable future. We will use only |
| 451 | // 48 bits. |
| 452 | return cf_clock_getabsolute(); |
| 453 | } |
| 454 | |
| 455 | /** |
| 456 | * Return the physical component of a hlc timstamp |
| 457 | * @param hlc_ts the hybrid logical clock timestamp. |
| 458 | */ |
| 459 | static cf_clock |
| 460 | hlc_physical_ts_get(as_hlc_timestamp hlc_ts) |
| 461 | { |
| 462 | return hlc_ts >> 16; |
| 463 | } |
| 464 | |
| 465 | /** |
| 466 | * Return the logical component of a hlc timstamp |
| 467 | * @param hlc_ts the hybrid logical clock timestamp. |
| 468 | */ |
| 469 | static uint16_t |
| 470 | hlc_logical_ts_get(as_hlc_timestamp hlc_ts) |
| 471 | { |
| 472 | return (uint16_t)(hlc_ts & LOGICAL_TS_MASK); |
| 473 | } |
| 474 | |
| 475 | /** |
| 476 | * Set the physical component of a hlc timestamp. 16 LSBs of the input physical |
| 477 | * timestamp will be ignored. |
| 478 | * @param hlc_ts the timestamp |
| 479 | * @param physical_ts the physical timestamp whose value should be set into the |
| 480 | * hls timestamp. |
| 481 | */ |
| 482 | static void |
| 483 | hlc_physical_ts_set(as_hlc_timestamp* hlc_ts, cf_clock physical_ts) |
| 484 | { |
| 485 | *hlc_ts = (*hlc_ts & LOGICAL_TS_MASK) | (physical_ts << 16); |
| 486 | } |
| 487 | |
| 488 | /** |
| 489 | * Handle setting updating the physical component of the hlc timestamp. |
| 490 | */ |
| 491 | static void |
| 492 | hlc_physical_ts_on_set(cf_clock physical_ts, cf_clock wall_clock_now) |
| 493 | { |
| 494 | if (g_prev_physical_component != physical_ts) { |
| 495 | g_prev_physical_component = physical_ts; |
| 496 | g_prev_wall_clock = wall_clock_now; |
| 497 | } |
| 498 | } |
| 499 | |
| 500 | /** |
| 501 | * Increment the logical timestamp and deal with a wrap around by incrementing |
| 502 | * the physical timestamp and ensure physical component moves at least at the |
| 503 | * rate of the wall clock to ensure hlc can be used as a crude measure of time |
| 504 | * intervals. |
| 505 | */ |
| 506 | static void |
| 507 | hlc_logical_ts_incr(uint16_t* logical_ts, cf_clock* physical_ts, |
| 508 | cf_clock wall_clock_now) |
| 509 | { |
| 510 | (*logical_ts)++; |
| 511 | if (*logical_ts == 0) { |
| 512 | (*physical_ts)++; |
| 513 | } |
| 514 | cf_clock physical_component_diff = *physical_ts - g_prev_physical_component; |
| 515 | cf_clock wall_clock_diff = |
| 516 | (wall_clock_now > g_prev_wall_clock) ? |
| 517 | wall_clock_now - g_prev_wall_clock : 0; |
| 518 | if (physical_component_diff < wall_clock_diff) { |
| 519 | *physical_ts += wall_clock_diff - physical_component_diff; |
| 520 | } |
| 521 | } |
| 522 | |
| 523 | /** |
| 524 | * Set the logical component of a hlc timestamp. |
| 525 | * @param hlc_ts the timestamp |
| 526 | * @param logical_ts the logical timestamp whose value should be set into the |
| 527 | * hls timestamp. |
| 528 | */ |
| 529 | static void |
| 530 | hlc_logical_ts_set(as_hlc_timestamp* hlc_ts, uint16_t logical_ts) |
| 531 | { |
| 532 | *hlc_ts = (*hlc_ts & PHYSICAL_TS_MASK) | (((uint64_t)logical_ts)); |
| 533 | } |
| 534 | |
| 535 | /** |
| 536 | * Get current value for the global timestamp atomically. |
| 537 | * |
| 538 | * @param new_value the new value for the global timestamp. |
| 539 | * @return true on successful set, false on failure to do an atomic set. |
| 540 | */ |
| 541 | static as_hlc_timestamp |
| 542 | hlc_ts_get() |
| 543 | { |
| 544 | return ck_pr_load_64(&g_now); |
| 545 | } |
| 546 | |
| 547 | /** |
| 548 | * Set a new value for the global timestamp atomically. |
| 549 | * |
| 550 | * @param old_value the old value. |
| 551 | * @param new_value the new value for the global timestamp. |
| 552 | * @param source the source node that caused this jump update in HLC, self if we |
| 553 | * are advancing the clock, peer node if advance is caused on message receipt. |
| 554 | * @return true on successful set, false on failure to do an atomic set. |
| 555 | */ |
| 556 | static bool |
| 557 | hlc_ts_set(as_hlc_timestamp old_value, as_hlc_timestamp new_value, |
| 558 | cf_node source) |
| 559 | { |
| 560 | // Default to ck atomic check and set. |
| 561 | cf_clock jump = hlc_physical_ts_get(new_value) |
| 562 | - hlc_physical_ts_get(old_value); |
| 563 | if (jump > HLC_JUMP_WARN && old_value > 0) { |
| 564 | INFO("HLC jumped by %" PRIu64" ms cause:%" PRIx64" old:%" PRIu64" new:%" PRIu64, jump, source, old_value, new_value); |
| 565 | } |
| 566 | return ck_pr_cas_64(&g_now, old_value, new_value); |
| 567 | } |
| 568 | |