1// metrohash128.cpp
2//
3// Copyright 2015-2018 J. Andrew Rogers
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17#include <string.h>
18#include "platform.h"
19#include "metrohash128.h"
20
21const char * MetroHash128::test_string = "012345678901234567890123456789012345678901234567890123456789012";
22
23const uint8_t MetroHash128::test_seed_0[16] = {
24 0xC7, 0x7C, 0xE2, 0xBF, 0xA4, 0xED, 0x9F, 0x9B,
25 0x05, 0x48, 0xB2, 0xAC, 0x50, 0x74, 0xA2, 0x97
26 };
27
28const uint8_t MetroHash128::test_seed_1[16] = {
29 0x45, 0xA3, 0xCD, 0xB8, 0x38, 0x19, 0x9D, 0x7F,
30 0xBD, 0xD6, 0x8D, 0x86, 0x7A, 0x14, 0xEC, 0xEF
31 };
32
33
34
35MetroHash128::MetroHash128(const uint64_t seed)
36{
37 Initialize(seed);
38}
39
40
41void MetroHash128::Initialize(const uint64_t seed)
42{
43 // initialize internal hash registers
44 state.v[0] = (static_cast<uint64_t>(seed) - k0) * k3;
45 state.v[1] = (static_cast<uint64_t>(seed) + k1) * k2;
46 state.v[2] = (static_cast<uint64_t>(seed) + k0) * k2;
47 state.v[3] = (static_cast<uint64_t>(seed) - k1) * k3;
48
49 // initialize total length of input
50 bytes = 0;
51}
52
53
54void MetroHash128::Update(const uint8_t * const buffer, const uint64_t length)
55{
56 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
57 const uint8_t * const end = ptr + length;
58
59 // input buffer may be partially filled
60 if (bytes % 32)
61 {
62 uint64_t fill = 32 - (bytes % 32);
63 if (fill > length)
64 fill = length;
65
66 memcpy(input.b + (bytes % 32), ptr, static_cast<size_t>(fill));
67 ptr += fill;
68 bytes += fill;
69
70 // input buffer is still partially filled
71 if ((bytes % 32) != 0) return;
72
73 // process full input buffer
74 state.v[0] += read_u64(&input.b[ 0]) * k0; state.v[0] = rotate_right(state.v[0],29) + state.v[2];
75 state.v[1] += read_u64(&input.b[ 8]) * k1; state.v[1] = rotate_right(state.v[1],29) + state.v[3];
76 state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
77 state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
78 }
79
80 // bulk update
81 bytes += (end - ptr);
82 while (ptr <= (end - 32))
83 {
84 // process directly from the source, bypassing the input buffer
85 state.v[0] += read_u64(ptr) * k0; ptr += 8; state.v[0] = rotate_right(state.v[0],29) + state.v[2];
86 state.v[1] += read_u64(ptr) * k1; ptr += 8; state.v[1] = rotate_right(state.v[1],29) + state.v[3];
87 state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
88 state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
89 }
90
91 // store remaining bytes in input buffer
92 if (ptr < end)
93 memcpy(input.b, ptr, end - ptr);
94}
95
96
97void MetroHash128::Finalize(uint8_t * const hash)
98{
99 // finalize bulk loop, if used
100 if (bytes >= 32)
101 {
102 state.v[2] ^= rotate_right(((state.v[0] + state.v[3]) * k0) + state.v[1], 21) * k1;
103 state.v[3] ^= rotate_right(((state.v[1] + state.v[2]) * k1) + state.v[0], 21) * k0;
104 state.v[0] ^= rotate_right(((state.v[0] + state.v[2]) * k0) + state.v[3], 21) * k1;
105 state.v[1] ^= rotate_right(((state.v[1] + state.v[3]) * k1) + state.v[2], 21) * k0;
106 }
107
108 // process any bytes remaining in the input buffer
109 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(input.b);
110 const uint8_t * const end = ptr + (bytes % 32);
111
112 if ((end - ptr) >= 16)
113 {
114 state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3;
115 state.v[1] += read_u64(ptr) * k2; ptr += 8; state.v[1] = rotate_right(state.v[1],33) * k3;
116 state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 45) * k1;
117 state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 45) * k0;
118 }
119
120 if ((end - ptr) >= 8)
121 {
122 state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3;
123 state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 27) * k1;
124 }
125
126 if ((end - ptr) >= 4)
127 {
128 state.v[1] += read_u32(ptr) * k2; ptr += 4; state.v[1] = rotate_right(state.v[1],33) * k3;
129 state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 46) * k0;
130 }
131
132 if ((end - ptr) >= 2)
133 {
134 state.v[0] += read_u16(ptr) * k2; ptr += 2; state.v[0] = rotate_right(state.v[0],33) * k3;
135 state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 22) * k1;
136 }
137
138 if ((end - ptr) >= 1)
139 {
140 state.v[1] += read_u8 (ptr) * k2; state.v[1] = rotate_right(state.v[1],33) * k3;
141 state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 58) * k0;
142 }
143
144 state.v[0] += rotate_right((state.v[0] * k0) + state.v[1], 13);
145 state.v[1] += rotate_right((state.v[1] * k1) + state.v[0], 37);
146 state.v[0] += rotate_right((state.v[0] * k2) + state.v[1], 13);
147 state.v[1] += rotate_right((state.v[1] * k3) + state.v[0], 37);
148
149 bytes = 0;
150
151 // do any endian conversion here
152
153 memcpy(hash, state.v, 16);
154}
155
156
157void MetroHash128::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed)
158{
159 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
160 const uint8_t * const end = ptr + length;
161
162 uint64_t v[4];
163
164 v[0] = (static_cast<uint64_t>(seed) - k0) * k3;
165 v[1] = (static_cast<uint64_t>(seed) + k1) * k2;
166
167 if (length >= 32)
168 {
169 v[2] = (static_cast<uint64_t>(seed) + k0) * k2;
170 v[3] = (static_cast<uint64_t>(seed) - k1) * k3;
171
172 do
173 {
174 v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
175 v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
176 v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
177 v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
178 }
179 while (ptr <= (end - 32));
180
181 v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 21) * k1;
182 v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 21) * k0;
183 v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 21) * k1;
184 v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 21) * k0;
185 }
186
187 if ((end - ptr) >= 16)
188 {
189 v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3;
190 v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],33) * k3;
191 v[0] ^= rotate_right((v[0] * k2) + v[1], 45) * k1;
192 v[1] ^= rotate_right((v[1] * k3) + v[0], 45) * k0;
193 }
194
195 if ((end - ptr) >= 8)
196 {
197 v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3;
198 v[0] ^= rotate_right((v[0] * k2) + v[1], 27) * k1;
199 }
200
201 if ((end - ptr) >= 4)
202 {
203 v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],33) * k3;
204 v[1] ^= rotate_right((v[1] * k3) + v[0], 46) * k0;
205 }
206
207 if ((end - ptr) >= 2)
208 {
209 v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],33) * k3;
210 v[0] ^= rotate_right((v[0] * k2) + v[1], 22) * k1;
211 }
212
213 if ((end - ptr) >= 1)
214 {
215 v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],33) * k3;
216 v[1] ^= rotate_right((v[1] * k3) + v[0], 58) * k0;
217 }
218
219 v[0] += rotate_right((v[0] * k0) + v[1], 13);
220 v[1] += rotate_right((v[1] * k1) + v[0], 37);
221 v[0] += rotate_right((v[0] * k2) + v[1], 13);
222 v[1] += rotate_right((v[1] * k3) + v[0], 37);
223
224 // do any endian conversion here
225
226 memcpy(hash, v, 16);
227}
228
229
230bool MetroHash128::ImplementationVerified()
231{
232 uint8_t hash[16];
233 const uint8_t * key = reinterpret_cast<const uint8_t *>(MetroHash128::test_string);
234
235 // verify one-shot implementation
236 MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 0);
237 if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false;
238
239 MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 1);
240 if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false;
241
242 // verify incremental implementation
243 MetroHash128 metro;
244
245 metro.Initialize(0);
246 metro.Update(reinterpret_cast<const uint8_t *>(MetroHash128::test_string), strlen(MetroHash128::test_string));
247 metro.Finalize(hash);
248 if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false;
249
250 metro.Initialize(1);
251 metro.Update(reinterpret_cast<const uint8_t *>(MetroHash128::test_string), strlen(MetroHash128::test_string));
252 metro.Finalize(hash);
253 if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false;
254
255 return true;
256}
257
258
259void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out)
260{
261 static const uint64_t k0 = 0xC83A91E1;
262 static const uint64_t k1 = 0x8648DBDB;
263 static const uint64_t k2 = 0x7BDEC03B;
264 static const uint64_t k3 = 0x2F5870A5;
265
266 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
267 const uint8_t * const end = ptr + len;
268
269 uint64_t v[4];
270
271 v[0] = ((static_cast<uint64_t>(seed) - k0) * k3) + len;
272 v[1] = ((static_cast<uint64_t>(seed) + k1) * k2) + len;
273
274 if (len >= 32)
275 {
276 v[2] = ((static_cast<uint64_t>(seed) + k0) * k2) + len;
277 v[3] = ((static_cast<uint64_t>(seed) - k1) * k3) + len;
278
279 do
280 {
281 v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
282 v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
283 v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
284 v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
285 }
286 while (ptr <= (end - 32));
287
288 v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 26) * k1;
289 v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 26) * k0;
290 v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 26) * k1;
291 v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0;
292 }
293
294 if ((end - ptr) >= 16)
295 {
296 v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3;
297 v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],33) * k3;
298 v[0] ^= rotate_right((v[0] * k2) + v[1], 17) * k1;
299 v[1] ^= rotate_right((v[1] * k3) + v[0], 17) * k0;
300 }
301
302 if ((end - ptr) >= 8)
303 {
304 v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3;
305 v[0] ^= rotate_right((v[0] * k2) + v[1], 20) * k1;
306 }
307
308 if ((end - ptr) >= 4)
309 {
310 v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],33) * k3;
311 v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0;
312 }
313
314 if ((end - ptr) >= 2)
315 {
316 v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],33) * k3;
317 v[0] ^= rotate_right((v[0] * k2) + v[1], 24) * k1;
318 }
319
320 if ((end - ptr) >= 1)
321 {
322 v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],33) * k3;
323 v[1] ^= rotate_right((v[1] * k3) + v[0], 24) * k0;
324 }
325
326 v[0] += rotate_right((v[0] * k0) + v[1], 13);
327 v[1] += rotate_right((v[1] * k1) + v[0], 37);
328 v[0] += rotate_right((v[0] * k2) + v[1], 13);
329 v[1] += rotate_right((v[1] * k3) + v[0], 37);
330
331 // do any endian conversion here
332
333 memcpy(out, v, 16);
334}
335
336
337void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out)
338{
339 static const uint64_t k0 = 0xD6D018F5;
340 static const uint64_t k1 = 0xA2AA033B;
341 static const uint64_t k2 = 0x62992FC1;
342 static const uint64_t k3 = 0x30BC5B29;
343
344 const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
345 const uint8_t * const end = ptr + len;
346
347 uint64_t v[4];
348
349 v[0] = ((static_cast<uint64_t>(seed) - k0) * k3) + len;
350 v[1] = ((static_cast<uint64_t>(seed) + k1) * k2) + len;
351
352 if (len >= 32)
353 {
354 v[2] = ((static_cast<uint64_t>(seed) + k0) * k2) + len;
355 v[3] = ((static_cast<uint64_t>(seed) - k1) * k3) + len;
356
357 do
358 {
359 v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
360 v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
361 v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
362 v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
363 }
364 while (ptr <= (end - 32));
365
366 v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1;
367 v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0;
368 v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1;
369 v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
370 }
371
372 if ((end - ptr) >= 16)
373 {
374 v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],29) * k3;
375 v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],29) * k3;
376 v[0] ^= rotate_right((v[0] * k2) + v[1], 29) * k1;
377 v[1] ^= rotate_right((v[1] * k3) + v[0], 29) * k0;
378 }
379
380 if ((end - ptr) >= 8)
381 {
382 v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],29) * k3;
383 v[0] ^= rotate_right((v[0] * k2) + v[1], 29) * k1;
384 }
385
386 if ((end - ptr) >= 4)
387 {
388 v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],29) * k3;
389 v[1] ^= rotate_right((v[1] * k3) + v[0], 25) * k0;
390 }
391
392 if ((end - ptr) >= 2)
393 {
394 v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],29) * k3;
395 v[0] ^= rotate_right((v[0] * k2) + v[1], 30) * k1;
396 }
397
398 if ((end - ptr) >= 1)
399 {
400 v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],29) * k3;
401 v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0;
402 }
403
404 v[0] += rotate_right((v[0] * k0) + v[1], 33);
405 v[1] += rotate_right((v[1] * k1) + v[0], 33);
406 v[0] += rotate_right((v[0] * k2) + v[1], 33);
407 v[1] += rotate_right((v[1] * k3) + v[0], 33);
408
409 // do any endian conversion here
410
411 memcpy(out, v, 16);
412}
413
414