1#include "consistent_hashing.h"
2
3#include "bitops.h"
4
5#include "popcount.h"
6
7#include <stdexcept>
8
9/*
10 * (all numbers are written in big-endian manner: the least significant digit on the right)
11 * (only bit representations are used - no hex or octal, leading zeroes are ommited)
12 *
13 * Consistent hashing scheme:
14 *
15 * (sizeof(TValue) * 8, y] (y, 0]
16 * a = * ablock
17 * b = * cblock
18 *
19 * (sizeof(TValue) * 8, k] (k, 0]
20 * c = * cblock
21 *
22 * d = *
23 *
24 * k - is determined by 2^(k-1) < n <= 2^k inequality
25 * z - is number of ones in cblock
26 * y - number of digits after first one in cblock
27 *
28 * The cblock determines logic of using a- and b- blocks:
29 *
30 * bits of cblock | result of a function
31 * 0 : 0
32 * 1 : 1 (optimization, the next case includes this one)
33 * 1?..? : 1ablock (z is even) or 1bblock (z is odd) if possible (<n)
34 *
35 * If last case is not possible (>=n), than smooth moving from n=2^(k-1) to n=2^k is applied.
36 * Using "*" bits of a-,b-,c-,d- blocks uint64_t value is combined, modulo of which determines
37 * if the value should be greather than 2^(k-1) or ConsistentHashing(x, 2^(k-1)) should be used.
38 * The last case is optimized according to previous checks.
39 */
40
41namespace {
42
43template<class TValue>
44TValue PowerOf2(size_t k) {
45 return (TValue)0x1 << k;
46}
47
48template<class TValue>
49TValue SelectAOrBBlock(TValue a, TValue b, TValue cBlock) {
50 size_t z = PopCount<uint64_t>(cBlock);
51 bool useABlock = z % 2 == 0;
52 return useABlock ? a : b;
53}
54
55// Gets the exact result for n = k2 = 2 ^ k
56template<class TValue>
57size_t ConsistentHashingForPowersOf2(TValue a, TValue b, TValue c, TValue k2) {
58 TValue cBlock = c & (k2 - 1); // (k, 0] bits of c
59 // Zero and one cases
60 if (cBlock < 2) {
61 // First two cases of result function table: 0 if cblock is 0, 1 if cblock is 1.
62 return cBlock;
63 }
64 size_t y = GetValueBitCount<uint64_t>(cBlock) - 1; // cblock = 0..01?..? (y = number of digits after 1), y > 0
65 TValue y2 = PowerOf2<TValue>(y); // y2 = 2^y
66 TValue abBlock = SelectAOrBBlock(a, b, cBlock) & (y2 - 1);
67 return y2 + abBlock;
68}
69
70template<class TValue>
71uint64_t GetAsteriskBits(TValue a, TValue b, TValue c, TValue d, size_t k) {
72 size_t shift = sizeof(TValue) * 8 - k;
73 uint64_t res = (d << shift) | (c >> k);
74 ++shift;
75 res <<= shift;
76 res |= b >> (k - 1);
77 res <<= shift;
78 res |= a >> (k - 1);
79
80 return res;
81}
82
83template<class TValue>
84size_t ConsistentHashingImpl(TValue a, TValue b, TValue c, TValue d, size_t n) {
85 if (n <= 0)
86 throw std::runtime_error("Can't map consistently to a zero values.");
87
88 // Uninteresting case
89 if (n == 1) {
90 return 0;
91 }
92 size_t k = GetValueBitCount(n - 1); // 2^(k-1) < n <= 2^k, k >= 1
93 TValue k2 = PowerOf2<TValue>(k); // k2 = 2^k
94 size_t largeValue;
95 {
96 // Bit determined variant. Large scheme.
97 largeValue = ConsistentHashingForPowersOf2(a, b, c, k2);
98 if (largeValue < n) {
99 return largeValue;
100 }
101 }
102 // Since largeValue is not assigned yet
103 // Smooth moving from one bit scheme to another
104 TValue k21 = PowerOf2<TValue>(k - 1);
105 {
106 size_t s = GetAsteriskBits(a, b, c, d, k) % (largeValue * (largeValue + 1));
107 size_t largeValue2 = s / k2 + k21;
108 if (largeValue2 < n) {
109 return largeValue2;
110 }
111 }
112 // Bit determined variant. Short scheme.
113 return ConsistentHashingForPowersOf2(a, b, c, k21); // Do not apply checks. It is always less than k21 = 2^(k-1)
114}
115
116} // namespace // anonymous
117
118std::size_t ConsistentHashing(std::uint64_t x, std::size_t n) {
119 uint32_t lo = LO_32(x);
120 uint32_t hi = HI_32(x);
121 return ConsistentHashingImpl<uint16_t>(LO_16(lo), HI_16(lo), LO_16(hi), HI_16(hi), n);
122}
123std::size_t ConsistentHashing(std::uint64_t lo, std::uint64_t hi, std::size_t n) {
124 return ConsistentHashingImpl<uint32_t>(LO_32(lo), HI_32(lo), LO_32(hi), HI_32(hi), n);
125}
126