1#pragma once
2
3/** SipHash is a fast cryptographic hash function for short strings.
4 * Taken from here: https://www.131002.net/siphash/
5 *
6 * This is SipHash 2-4 variant.
7 *
8 * Two changes are made:
9 * - returns also 128 bits, not only 64;
10 * - done streaming (can be calculated in parts).
11 *
12 * On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
13 * (~ 700 MB/sec, 15 million strings per second)
14 */
15
16#include <common/Types.h>
17#include <common/unaligned.h>
18#include <string>
19#include <type_traits>
20#include <Core/Defines.h>
21
22#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
23
24#define SIPROUND \
25 do \
26 { \
27 v0 += v1; v1 = ROTL(v1, 13); v1 ^= v0; v0 = ROTL(v0, 32); \
28 v2 += v3; v3 = ROTL(v3, 16); v3 ^= v2; \
29 v0 += v3; v3 = ROTL(v3, 21); v3 ^= v0; \
30 v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \
31 } while(0)
32
33
34class SipHash
35{
36private:
37 /// State.
38 UInt64 v0;
39 UInt64 v1;
40 UInt64 v2;
41 UInt64 v3;
42
43 /// How many bytes have been processed.
44 UInt64 cnt;
45
46 /// The current 8 bytes of input data.
47 union
48 {
49 UInt64 current_word;
50 UInt8 current_bytes[8];
51 };
52
53 ALWAYS_INLINE void finalize()
54 {
55 /// In the last free byte, we write the remainder of the division by 256.
56 current_bytes[7] = cnt;
57
58 v3 ^= current_word;
59 SIPROUND;
60 SIPROUND;
61 v0 ^= current_word;
62
63 v2 ^= 0xff;
64 SIPROUND;
65 SIPROUND;
66 SIPROUND;
67 SIPROUND;
68 }
69
70public:
71 /// Arguments - seed.
72 SipHash(UInt64 k0 = 0, UInt64 k1 = 0)
73 {
74 /// Initialize the state with some random bytes and seed.
75 v0 = 0x736f6d6570736575ULL ^ k0;
76 v1 = 0x646f72616e646f6dULL ^ k1;
77 v2 = 0x6c7967656e657261ULL ^ k0;
78 v3 = 0x7465646279746573ULL ^ k1;
79
80 cnt = 0;
81 current_word = 0;
82 }
83
84 void update(const char * data, UInt64 size)
85 {
86 const char * end = data + size;
87
88 /// We'll finish to process the remainder of the previous update, if any.
89 if (cnt & 7)
90 {
91 while (cnt & 7 && data < end)
92 {
93 current_bytes[cnt & 7] = *data;
94 ++data;
95 ++cnt;
96 }
97
98 /// If we still do not have enough bytes to an 8-byte word.
99 if (cnt & 7)
100 return;
101
102 v3 ^= current_word;
103 SIPROUND;
104 SIPROUND;
105 v0 ^= current_word;
106 }
107
108 cnt += end - data;
109
110 while (data + 8 <= end)
111 {
112 current_word = unalignedLoad<UInt64>(data);
113
114 v3 ^= current_word;
115 SIPROUND;
116 SIPROUND;
117 v0 ^= current_word;
118
119 data += 8;
120 }
121
122 /// Pad the remainder, which is missing up to an 8-byte word.
123 current_word = 0;
124 switch (end - data)
125 {
126 case 7: current_bytes[6] = data[6]; [[fallthrough]];
127 case 6: current_bytes[5] = data[5]; [[fallthrough]];
128 case 5: current_bytes[4] = data[4]; [[fallthrough]];
129 case 4: current_bytes[3] = data[3]; [[fallthrough]];
130 case 3: current_bytes[2] = data[2]; [[fallthrough]];
131 case 2: current_bytes[1] = data[1]; [[fallthrough]];
132 case 1: current_bytes[0] = data[0]; [[fallthrough]];
133 case 0: break;
134 }
135 }
136
137 /// NOTE: std::has_unique_object_representations is only available since clang 6. As of Mar 2017 we still use clang 5 sometimes.
138 template <typename T>
139 std::enable_if_t<std::/*has_unique_object_representations_v*/is_standard_layout_v<T>, void> update(const T & x)
140 {
141 update(reinterpret_cast<const char *>(&x), sizeof(x));
142 }
143
144 void update(const std::string & x)
145 {
146 update(x.data(), x.length());
147 }
148
149 /// Get the result in some form. This can only be done once!
150
151 void get128(char * out)
152 {
153 finalize();
154 reinterpret_cast<UInt64 *>(out)[0] = v0 ^ v1;
155 reinterpret_cast<UInt64 *>(out)[1] = v2 ^ v3;
156 }
157
158 /// template for avoiding 'unsigned long long' vs 'unsigned long' problem on old poco in macos
159 template <typename T>
160 ALWAYS_INLINE void get128(T & lo, T & hi)
161 {
162 static_assert(sizeof(T) == 8);
163 finalize();
164 lo = v0 ^ v1;
165 hi = v2 ^ v3;
166 }
167
168 UInt64 get64()
169 {
170 finalize();
171 return v0 ^ v1 ^ v2 ^ v3;
172 }
173};
174
175
176#undef ROTL
177#undef SIPROUND
178
179#include <cstddef>
180
181inline void sipHash128(const char * data, const size_t size, char * out)
182{
183 SipHash hash;
184 hash.update(data, size);
185 hash.get128(out);
186}
187
188inline UInt64 sipHash64(const char * data, const size_t size)
189{
190 SipHash hash;
191 hash.update(data, size);
192 return hash.get64();
193}
194
195template <typename T>
196std::enable_if_t<std::/*has_unique_object_representations_v*/is_standard_layout_v<T>, UInt64> sipHash64(const T & x)
197{
198 SipHash hash;
199 hash.update(x);
200 return hash.get64();
201}
202
203inline UInt64 sipHash64(const std::string & s)
204{
205 return sipHash64(s.data(), s.size());
206}
207