1 | // metrohash128.cpp |
2 | // |
3 | // Copyright 2015-2018 J. Andrew Rogers |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
6 | // you may not use this file except in compliance with the License. |
7 | // You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, software |
12 | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | // See the License for the specific language governing permissions and |
15 | // limitations under the License. |
16 | |
17 | #include <string.h> |
18 | #include "platform.h" |
19 | #include "metrohash128.h" |
20 | |
21 | const char * MetroHash128::test_string = "012345678901234567890123456789012345678901234567890123456789012" ; |
22 | |
23 | const uint8_t MetroHash128::test_seed_0[16] = { |
24 | 0xC7, 0x7C, 0xE2, 0xBF, 0xA4, 0xED, 0x9F, 0x9B, |
25 | 0x05, 0x48, 0xB2, 0xAC, 0x50, 0x74, 0xA2, 0x97 |
26 | }; |
27 | |
28 | const uint8_t MetroHash128::test_seed_1[16] = { |
29 | 0x45, 0xA3, 0xCD, 0xB8, 0x38, 0x19, 0x9D, 0x7F, |
30 | 0xBD, 0xD6, 0x8D, 0x86, 0x7A, 0x14, 0xEC, 0xEF |
31 | }; |
32 | |
33 | |
34 | |
35 | MetroHash128::MetroHash128(const uint64_t seed) |
36 | { |
37 | Initialize(seed); |
38 | } |
39 | |
40 | |
41 | void MetroHash128::Initialize(const uint64_t seed) |
42 | { |
43 | // initialize internal hash registers |
44 | state.v[0] = (static_cast<uint64_t>(seed) - k0) * k3; |
45 | state.v[1] = (static_cast<uint64_t>(seed) + k1) * k2; |
46 | state.v[2] = (static_cast<uint64_t>(seed) + k0) * k2; |
47 | state.v[3] = (static_cast<uint64_t>(seed) - k1) * k3; |
48 | |
49 | // initialize total length of input |
50 | bytes = 0; |
51 | } |
52 | |
53 | |
54 | void MetroHash128::Update(const uint8_t * const buffer, const uint64_t length) |
55 | { |
56 | const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer); |
57 | const uint8_t * const end = ptr + length; |
58 | |
59 | // input buffer may be partially filled |
60 | if (bytes % 32) |
61 | { |
62 | uint64_t fill = 32 - (bytes % 32); |
63 | if (fill > length) |
64 | fill = length; |
65 | |
66 | memcpy(input.b + (bytes % 32), ptr, static_cast<size_t>(fill)); |
67 | ptr += fill; |
68 | bytes += fill; |
69 | |
70 | // input buffer is still partially filled |
71 | if ((bytes % 32) != 0) return; |
72 | |
73 | // process full input buffer |
74 | state.v[0] += read_u64(&input.b[ 0]) * k0; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; |
75 | state.v[1] += read_u64(&input.b[ 8]) * k1; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; |
76 | state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; |
77 | state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; |
78 | } |
79 | |
80 | // bulk update |
81 | bytes += (end - ptr); |
82 | while (ptr <= (end - 32)) |
83 | { |
84 | // process directly from the source, bypassing the input buffer |
85 | state.v[0] += read_u64(ptr) * k0; ptr += 8; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; |
86 | state.v[1] += read_u64(ptr) * k1; ptr += 8; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; |
87 | state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; |
88 | state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; |
89 | } |
90 | |
91 | // store remaining bytes in input buffer |
92 | if (ptr < end) |
93 | memcpy(input.b, ptr, end - ptr); |
94 | } |
95 | |
96 | |
97 | void MetroHash128::Finalize(uint8_t * const hash) |
98 | { |
99 | // finalize bulk loop, if used |
100 | if (bytes >= 32) |
101 | { |
102 | state.v[2] ^= rotate_right(((state.v[0] + state.v[3]) * k0) + state.v[1], 21) * k1; |
103 | state.v[3] ^= rotate_right(((state.v[1] + state.v[2]) * k1) + state.v[0], 21) * k0; |
104 | state.v[0] ^= rotate_right(((state.v[0] + state.v[2]) * k0) + state.v[3], 21) * k1; |
105 | state.v[1] ^= rotate_right(((state.v[1] + state.v[3]) * k1) + state.v[2], 21) * k0; |
106 | } |
107 | |
108 | // process any bytes remaining in the input buffer |
109 | const uint8_t * ptr = reinterpret_cast<const uint8_t*>(input.b); |
110 | const uint8_t * const end = ptr + (bytes % 32); |
111 | |
112 | if ((end - ptr) >= 16) |
113 | { |
114 | state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3; |
115 | state.v[1] += read_u64(ptr) * k2; ptr += 8; state.v[1] = rotate_right(state.v[1],33) * k3; |
116 | state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 45) * k1; |
117 | state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 45) * k0; |
118 | } |
119 | |
120 | if ((end - ptr) >= 8) |
121 | { |
122 | state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3; |
123 | state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 27) * k1; |
124 | } |
125 | |
126 | if ((end - ptr) >= 4) |
127 | { |
128 | state.v[1] += read_u32(ptr) * k2; ptr += 4; state.v[1] = rotate_right(state.v[1],33) * k3; |
129 | state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 46) * k0; |
130 | } |
131 | |
132 | if ((end - ptr) >= 2) |
133 | { |
134 | state.v[0] += read_u16(ptr) * k2; ptr += 2; state.v[0] = rotate_right(state.v[0],33) * k3; |
135 | state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 22) * k1; |
136 | } |
137 | |
138 | if ((end - ptr) >= 1) |
139 | { |
140 | state.v[1] += read_u8 (ptr) * k2; state.v[1] = rotate_right(state.v[1],33) * k3; |
141 | state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 58) * k0; |
142 | } |
143 | |
144 | state.v[0] += rotate_right((state.v[0] * k0) + state.v[1], 13); |
145 | state.v[1] += rotate_right((state.v[1] * k1) + state.v[0], 37); |
146 | state.v[0] += rotate_right((state.v[0] * k2) + state.v[1], 13); |
147 | state.v[1] += rotate_right((state.v[1] * k3) + state.v[0], 37); |
148 | |
149 | bytes = 0; |
150 | |
151 | // do any endian conversion here |
152 | |
153 | memcpy(hash, state.v, 16); |
154 | } |
155 | |
156 | |
157 | void MetroHash128::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed) |
158 | { |
159 | const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer); |
160 | const uint8_t * const end = ptr + length; |
161 | |
162 | uint64_t v[4]; |
163 | |
164 | v[0] = (static_cast<uint64_t>(seed) - k0) * k3; |
165 | v[1] = (static_cast<uint64_t>(seed) + k1) * k2; |
166 | |
167 | if (length >= 32) |
168 | { |
169 | v[2] = (static_cast<uint64_t>(seed) + k0) * k2; |
170 | v[3] = (static_cast<uint64_t>(seed) - k1) * k3; |
171 | |
172 | do |
173 | { |
174 | v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; |
175 | v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; |
176 | v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; |
177 | v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; |
178 | } |
179 | while (ptr <= (end - 32)); |
180 | |
181 | v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 21) * k1; |
182 | v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 21) * k0; |
183 | v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 21) * k1; |
184 | v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 21) * k0; |
185 | } |
186 | |
187 | if ((end - ptr) >= 16) |
188 | { |
189 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; |
190 | v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],33) * k3; |
191 | v[0] ^= rotate_right((v[0] * k2) + v[1], 45) * k1; |
192 | v[1] ^= rotate_right((v[1] * k3) + v[0], 45) * k0; |
193 | } |
194 | |
195 | if ((end - ptr) >= 8) |
196 | { |
197 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; |
198 | v[0] ^= rotate_right((v[0] * k2) + v[1], 27) * k1; |
199 | } |
200 | |
201 | if ((end - ptr) >= 4) |
202 | { |
203 | v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],33) * k3; |
204 | v[1] ^= rotate_right((v[1] * k3) + v[0], 46) * k0; |
205 | } |
206 | |
207 | if ((end - ptr) >= 2) |
208 | { |
209 | v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],33) * k3; |
210 | v[0] ^= rotate_right((v[0] * k2) + v[1], 22) * k1; |
211 | } |
212 | |
213 | if ((end - ptr) >= 1) |
214 | { |
215 | v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],33) * k3; |
216 | v[1] ^= rotate_right((v[1] * k3) + v[0], 58) * k0; |
217 | } |
218 | |
219 | v[0] += rotate_right((v[0] * k0) + v[1], 13); |
220 | v[1] += rotate_right((v[1] * k1) + v[0], 37); |
221 | v[0] += rotate_right((v[0] * k2) + v[1], 13); |
222 | v[1] += rotate_right((v[1] * k3) + v[0], 37); |
223 | |
224 | // do any endian conversion here |
225 | |
226 | memcpy(hash, v, 16); |
227 | } |
228 | |
229 | |
230 | bool MetroHash128::ImplementationVerified() |
231 | { |
232 | uint8_t hash[16]; |
233 | const uint8_t * key = reinterpret_cast<const uint8_t *>(MetroHash128::test_string); |
234 | |
235 | // verify one-shot implementation |
236 | MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 0); |
237 | if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false; |
238 | |
239 | MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 1); |
240 | if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false; |
241 | |
242 | // verify incremental implementation |
243 | MetroHash128 metro; |
244 | |
245 | metro.Initialize(0); |
246 | metro.Update(reinterpret_cast<const uint8_t *>(MetroHash128::test_string), strlen(MetroHash128::test_string)); |
247 | metro.Finalize(hash); |
248 | if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false; |
249 | |
250 | metro.Initialize(1); |
251 | metro.Update(reinterpret_cast<const uint8_t *>(MetroHash128::test_string), strlen(MetroHash128::test_string)); |
252 | metro.Finalize(hash); |
253 | if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false; |
254 | |
255 | return true; |
256 | } |
257 | |
258 | |
259 | void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) |
260 | { |
261 | static const uint64_t k0 = 0xC83A91E1; |
262 | static const uint64_t k1 = 0x8648DBDB; |
263 | static const uint64_t k2 = 0x7BDEC03B; |
264 | static const uint64_t k3 = 0x2F5870A5; |
265 | |
266 | const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key); |
267 | const uint8_t * const end = ptr + len; |
268 | |
269 | uint64_t v[4]; |
270 | |
271 | v[0] = ((static_cast<uint64_t>(seed) - k0) * k3) + len; |
272 | v[1] = ((static_cast<uint64_t>(seed) + k1) * k2) + len; |
273 | |
274 | if (len >= 32) |
275 | { |
276 | v[2] = ((static_cast<uint64_t>(seed) + k0) * k2) + len; |
277 | v[3] = ((static_cast<uint64_t>(seed) - k1) * k3) + len; |
278 | |
279 | do |
280 | { |
281 | v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; |
282 | v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; |
283 | v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; |
284 | v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; |
285 | } |
286 | while (ptr <= (end - 32)); |
287 | |
288 | v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 26) * k1; |
289 | v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 26) * k0; |
290 | v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 26) * k1; |
291 | v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0; |
292 | } |
293 | |
294 | if ((end - ptr) >= 16) |
295 | { |
296 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; |
297 | v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],33) * k3; |
298 | v[0] ^= rotate_right((v[0] * k2) + v[1], 17) * k1; |
299 | v[1] ^= rotate_right((v[1] * k3) + v[0], 17) * k0; |
300 | } |
301 | |
302 | if ((end - ptr) >= 8) |
303 | { |
304 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; |
305 | v[0] ^= rotate_right((v[0] * k2) + v[1], 20) * k1; |
306 | } |
307 | |
308 | if ((end - ptr) >= 4) |
309 | { |
310 | v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],33) * k3; |
311 | v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0; |
312 | } |
313 | |
314 | if ((end - ptr) >= 2) |
315 | { |
316 | v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],33) * k3; |
317 | v[0] ^= rotate_right((v[0] * k2) + v[1], 24) * k1; |
318 | } |
319 | |
320 | if ((end - ptr) >= 1) |
321 | { |
322 | v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],33) * k3; |
323 | v[1] ^= rotate_right((v[1] * k3) + v[0], 24) * k0; |
324 | } |
325 | |
326 | v[0] += rotate_right((v[0] * k0) + v[1], 13); |
327 | v[1] += rotate_right((v[1] * k1) + v[0], 37); |
328 | v[0] += rotate_right((v[0] * k2) + v[1], 13); |
329 | v[1] += rotate_right((v[1] * k3) + v[0], 37); |
330 | |
331 | // do any endian conversion here |
332 | |
333 | memcpy(out, v, 16); |
334 | } |
335 | |
336 | |
337 | void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) |
338 | { |
339 | static const uint64_t k0 = 0xD6D018F5; |
340 | static const uint64_t k1 = 0xA2AA033B; |
341 | static const uint64_t k2 = 0x62992FC1; |
342 | static const uint64_t k3 = 0x30BC5B29; |
343 | |
344 | const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key); |
345 | const uint8_t * const end = ptr + len; |
346 | |
347 | uint64_t v[4]; |
348 | |
349 | v[0] = ((static_cast<uint64_t>(seed) - k0) * k3) + len; |
350 | v[1] = ((static_cast<uint64_t>(seed) + k1) * k2) + len; |
351 | |
352 | if (len >= 32) |
353 | { |
354 | v[2] = ((static_cast<uint64_t>(seed) + k0) * k2) + len; |
355 | v[3] = ((static_cast<uint64_t>(seed) - k1) * k3) + len; |
356 | |
357 | do |
358 | { |
359 | v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; |
360 | v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; |
361 | v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; |
362 | v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; |
363 | } |
364 | while (ptr <= (end - 32)); |
365 | |
366 | v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; |
367 | v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; |
368 | v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; |
369 | v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; |
370 | } |
371 | |
372 | if ((end - ptr) >= 16) |
373 | { |
374 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],29) * k3; |
375 | v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],29) * k3; |
376 | v[0] ^= rotate_right((v[0] * k2) + v[1], 29) * k1; |
377 | v[1] ^= rotate_right((v[1] * k3) + v[0], 29) * k0; |
378 | } |
379 | |
380 | if ((end - ptr) >= 8) |
381 | { |
382 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],29) * k3; |
383 | v[0] ^= rotate_right((v[0] * k2) + v[1], 29) * k1; |
384 | } |
385 | |
386 | if ((end - ptr) >= 4) |
387 | { |
388 | v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],29) * k3; |
389 | v[1] ^= rotate_right((v[1] * k3) + v[0], 25) * k0; |
390 | } |
391 | |
392 | if ((end - ptr) >= 2) |
393 | { |
394 | v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],29) * k3; |
395 | v[0] ^= rotate_right((v[0] * k2) + v[1], 30) * k1; |
396 | } |
397 | |
398 | if ((end - ptr) >= 1) |
399 | { |
400 | v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],29) * k3; |
401 | v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0; |
402 | } |
403 | |
404 | v[0] += rotate_right((v[0] * k0) + v[1], 33); |
405 | v[1] += rotate_right((v[1] * k1) + v[0], 33); |
406 | v[0] += rotate_right((v[0] * k2) + v[1], 33); |
407 | v[1] += rotate_right((v[1] * k3) + v[0], 33); |
408 | |
409 | // do any endian conversion here |
410 | |
411 | memcpy(out, v, 16); |
412 | } |
413 | |
414 | |