1// metrohash64.cpp
2//
3// Copyright 2015-2018 J. Andrew Rogers
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17#include "platform.h"
18#include "metrohash64.h"
19
20#include <cstring>
21
22const char * MetroHash64::test_string = "012345678901234567890123456789012345678901234567890123456789012";
23
24const uint8_t MetroHash64::test_seed_0[8] = { 0x6B, 0x75, 0x3D, 0xAE, 0x06, 0x70, 0x4B, 0xAD };
25const uint8_t MetroHash64::test_seed_1[8] = { 0x3B, 0x0D, 0x48, 0x1C, 0xF4, 0xB9, 0xB8, 0xDF };
26
27
28
29MetroHash64::MetroHash64(const uint64_t seed)
30{
31 Initialize(seed);
32}
33
34
35void MetroHash64::Initialize(const uint64_t seed)
36{
37 vseed = (static_cast<uint64_t>(seed) + k2) * k0;
38
39 // initialize internal hash registers
40 state.v[0] = vseed;
41 state.v[1] = vseed;
42 state.v[2] = vseed;
43 state.v[3] = vseed;
44
45 // initialize total length of input
46 bytes = 0;
47}
48
49
50void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
51{
52 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
53 const uint8_t * const end = ptr + length;
54
55 // input buffer may be partially filled
56 if (bytes % 32)
57 {
58 uint64_t fill = 32 - (bytes % 32);
59 if (fill > length)
60 fill = length;
61
62 memcpy(input.b + (bytes % 32), ptr, static_cast<size_t>(fill));
63 ptr += fill;
64 bytes += fill;
65
66 // input buffer is still partially filled
67 if ((bytes % 32) != 0) return;
68
69 // process full input buffer
70 state.v[0] += read_u64(&input.b[ 0]) * k0; state.v[0] = rotate_right(state.v[0],29) + state.v[2];
71 state.v[1] += read_u64(&input.b[ 8]) * k1; state.v[1] = rotate_right(state.v[1],29) + state.v[3];
72 state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
73 state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
74 }
75
76 // bulk update
77 bytes += static_cast<uint64_t>(end - ptr);
78 while (ptr <= (end - 32))
79 {
80 // process directly from the source, bypassing the input buffer
81 state.v[0] += read_u64(ptr) * k0; ptr += 8; state.v[0] = rotate_right(state.v[0],29) + state.v[2];
82 state.v[1] += read_u64(ptr) * k1; ptr += 8; state.v[1] = rotate_right(state.v[1],29) + state.v[3];
83 state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
84 state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
85 }
86
87 // store remaining bytes in input buffer
88 if (ptr < end)
89 memcpy(input.b, ptr, static_cast<size_t>(end - ptr));
90}
91
92
93void MetroHash64::Finalize(uint8_t * const hash)
94{
95 // finalize bulk loop, if used
96 if (bytes >= 32)
97 {
98 state.v[2] ^= rotate_right(((state.v[0] + state.v[3]) * k0) + state.v[1], 37) * k1;
99 state.v[3] ^= rotate_right(((state.v[1] + state.v[2]) * k1) + state.v[0], 37) * k0;
100 state.v[0] ^= rotate_right(((state.v[0] + state.v[2]) * k0) + state.v[3], 37) * k1;
101 state.v[1] ^= rotate_right(((state.v[1] + state.v[3]) * k1) + state.v[2], 37) * k0;
102
103 state.v[0] = vseed + (state.v[0] ^ state.v[1]);
104 }
105
106 // process any bytes remaining in the input buffer
107 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(input.b);
108 const uint8_t * const end = ptr + (bytes % 32);
109
110 if ((end - ptr) >= 16)
111 {
112 state.v[1] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[1] = rotate_right(state.v[1],29) * k3;
113 state.v[2] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[2] = rotate_right(state.v[2],29) * k3;
114 state.v[1] ^= rotate_right(state.v[1] * k0, 21) + state.v[2];
115 state.v[2] ^= rotate_right(state.v[2] * k3, 21) + state.v[1];
116 state.v[0] += state.v[2];
117 }
118
119 if ((end - ptr) >= 8)
120 {
121 state.v[0] += read_u64(ptr) * k3; ptr += 8;
122 state.v[0] ^= rotate_right(state.v[0], 55) * k1;
123 }
124
125 if ((end - ptr) >= 4)
126 {
127 state.v[0] += read_u32(ptr) * k3; ptr += 4;
128 state.v[0] ^= rotate_right(state.v[0], 26) * k1;
129 }
130
131 if ((end - ptr) >= 2)
132 {
133 state.v[0] += read_u16(ptr) * k3; ptr += 2;
134 state.v[0] ^= rotate_right(state.v[0], 48) * k1;
135 }
136
137 if ((end - ptr) >= 1)
138 {
139 state.v[0] += read_u8 (ptr) * k3;
140 state.v[0] ^= rotate_right(state.v[0], 37) * k1;
141 }
142
143 state.v[0] ^= rotate_right(state.v[0], 28);
144 state.v[0] *= k0;
145 state.v[0] ^= rotate_right(state.v[0], 29);
146
147 bytes = 0;
148
149 // do any endian conversion here
150
151 memcpy(hash, state.v, 8);
152}
153
154
155void MetroHash64::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed)
156{
157 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
158 const uint8_t * const end = ptr + length;
159
160 uint64_t h = (static_cast<uint64_t>(seed) + k2) * k0;
161
162 if (length >= 32)
163 {
164 uint64_t v[4];
165 v[0] = h;
166 v[1] = h;
167 v[2] = h;
168 v[3] = h;
169
170 do
171 {
172 v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
173 v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
174 v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
175 v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
176 }
177 while (ptr <= (end - 32));
178
179 v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 37) * k1;
180 v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 37) * k0;
181 v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 37) * k1;
182 v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 37) * k0;
183 h += v[0] ^ v[1];
184 }
185
186 if ((end - ptr) >= 16)
187 {
188 uint64_t v0 = h + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3;
189 uint64_t v1 = h + (read_u64(ptr) * k2); ptr += 8; v1 = rotate_right(v1,29) * k3;
190 v0 ^= rotate_right(v0 * k0, 21) + v1;
191 v1 ^= rotate_right(v1 * k3, 21) + v0;
192 h += v1;
193 }
194
195 if ((end - ptr) >= 8)
196 {
197 h += read_u64(ptr) * k3; ptr += 8;
198 h ^= rotate_right(h, 55) * k1;
199 }
200
201 if ((end - ptr) >= 4)
202 {
203 h += read_u32(ptr) * k3; ptr += 4;
204 h ^= rotate_right(h, 26) * k1;
205 }
206
207 if ((end - ptr) >= 2)
208 {
209 h += read_u16(ptr) * k3; ptr += 2;
210 h ^= rotate_right(h, 48) * k1;
211 }
212
213 if ((end - ptr) >= 1)
214 {
215 h += read_u8 (ptr) * k3;
216 h ^= rotate_right(h, 37) * k1;
217 }
218
219 h ^= rotate_right(h, 28);
220 h *= k0;
221 h ^= rotate_right(h, 29);
222
223 memcpy(hash, &h, 8);
224}
225
226
227bool MetroHash64::ImplementationVerified()
228{
229 uint8_t hash[8];
230 const uint8_t * key = reinterpret_cast<const uint8_t *>(MetroHash64::test_string);
231
232 // verify one-shot implementation
233 MetroHash64::Hash(key, strlen(MetroHash64::test_string), hash, 0);
234 if (memcmp(hash, MetroHash64::test_seed_0, 8) != 0) return false;
235
236 MetroHash64::Hash(key, strlen(MetroHash64::test_string), hash, 1);
237 if (memcmp(hash, MetroHash64::test_seed_1, 8) != 0) return false;
238
239 // verify incremental implementation
240 MetroHash64 metro;
241
242 metro.Initialize(0);
243 metro.Update(reinterpret_cast<const uint8_t *>(MetroHash64::test_string), strlen(MetroHash64::test_string));
244 metro.Finalize(hash);
245 if (memcmp(hash, MetroHash64::test_seed_0, 8) != 0) return false;
246
247 metro.Initialize(1);
248 metro.Update(reinterpret_cast<const uint8_t *>(MetroHash64::test_string), strlen(MetroHash64::test_string));
249 metro.Finalize(hash);
250 if (memcmp(hash, MetroHash64::test_seed_1, 8) != 0) return false;
251
252 return true;
253}
254
255
256void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out)
257{
258 static const uint64_t k0 = 0xC83A91E1;
259 static const uint64_t k1 = 0x8648DBDB;
260 static const uint64_t k2 = 0x7BDEC03B;
261 static const uint64_t k3 = 0x2F5870A5;
262
263 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
264 const uint8_t * const end = ptr + len;
265
266 uint64_t hash = ((static_cast<uint64_t>(seed) + k2) * k0) + len;
267
268 if (len >= 32)
269 {
270 uint64_t v[4];
271 v[0] = hash;
272 v[1] = hash;
273 v[2] = hash;
274 v[3] = hash;
275
276 do
277 {
278 v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
279 v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
280 v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
281 v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
282 }
283 while (ptr <= (end - 32));
284
285 v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1;
286 v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0;
287 v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1;
288 v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
289 hash += v[0] ^ v[1];
290 }
291
292 if ((end - ptr) >= 16)
293 {
294 uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1;
295 uint64_t v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2;
296 v0 ^= rotate_right(v0 * k0, 35) + v1;
297 v1 ^= rotate_right(v1 * k3, 35) + v0;
298 hash += v1;
299 }
300
301 if ((end - ptr) >= 8)
302 {
303 hash += read_u64(ptr) * k3; ptr += 8;
304 hash ^= rotate_right(hash, 33) * k1;
305
306 }
307
308 if ((end - ptr) >= 4)
309 {
310 hash += read_u32(ptr) * k3; ptr += 4;
311 hash ^= rotate_right(hash, 15) * k1;
312 }
313
314 if ((end - ptr) >= 2)
315 {
316 hash += read_u16(ptr) * k3; ptr += 2;
317 hash ^= rotate_right(hash, 13) * k1;
318 }
319
320 if ((end - ptr) >= 1)
321 {
322 hash += read_u8 (ptr) * k3;
323 hash ^= rotate_right(hash, 25) * k1;
324 }
325
326 hash ^= rotate_right(hash, 33);
327 hash *= k0;
328 hash ^= rotate_right(hash, 33);
329
330 memcpy(out, &hash, 8);
331}
332
333
334void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out)
335{
336 static const uint64_t k0 = 0xD6D018F5;
337 static const uint64_t k1 = 0xA2AA033B;
338 static const uint64_t k2 = 0x62992FC1;
339 static const uint64_t k3 = 0x30BC5B29;
340
341 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
342 const uint8_t * const end = ptr + len;
343
344 uint64_t hash = ((static_cast<uint64_t>(seed) + k2) * k0) + len;
345
346 if (len >= 32)
347 {
348 uint64_t v[4];
349 v[0] = hash;
350 v[1] = hash;
351 v[2] = hash;
352 v[3] = hash;
353
354 do
355 {
356 v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
357 v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
358 v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
359 v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
360 }
361 while (ptr <= (end - 32));
362
363 v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 30) * k1;
364 v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 30) * k0;
365 v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 30) * k1;
366 v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0;
367 hash += v[0] ^ v[1];
368 }
369
370 if ((end - ptr) >= 16)
371 {
372 uint64_t v0 = hash + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3;
373 uint64_t v1 = hash + (read_u64(ptr) * k2); ptr += 8; v1 = rotate_right(v1,29) * k3;
374 v0 ^= rotate_right(v0 * k0, 34) + v1;
375 v1 ^= rotate_right(v1 * k3, 34) + v0;
376 hash += v1;
377 }
378
379 if ((end - ptr) >= 8)
380 {
381 hash += read_u64(ptr) * k3; ptr += 8;
382 hash ^= rotate_right(hash, 36) * k1;
383 }
384
385 if ((end - ptr) >= 4)
386 {
387 hash += read_u32(ptr) * k3; ptr += 4;
388 hash ^= rotate_right(hash, 15) * k1;
389 }
390
391 if ((end - ptr) >= 2)
392 {
393 hash += read_u16(ptr) * k3; ptr += 2;
394 hash ^= rotate_right(hash, 15) * k1;
395 }
396
397 if ((end - ptr) >= 1)
398 {
399 hash += read_u8 (ptr) * k3;
400 hash ^= rotate_right(hash, 23) * k1;
401 }
402
403 hash ^= rotate_right(hash, 28);
404 hash *= k0;
405 hash ^= rotate_right(hash, 29);
406
407 memcpy(out, &hash, 8);
408}
409
410
411