1// MurmurHash2 was written by Austin Appleby, and is placed in the public
2// domain. The author hereby disclaims copyright to this source code.
3
4// Note - This code makes a few assumptions about how your machine behaves -
5
6// 1. We can read a 4-byte value from any address without crashing
7// 2. sizeof(int) == 4
8
9// And it has a few limitations -
10
11// 1. It will not work incrementally.
12// 2. It will not produce the same results on little-endian and big-endian
13// machines.
14
15#include "murmurhash2.h"
16#include <cstring>
17
18// Platform-specific functions and macros
19// Microsoft Visual Studio
20
21#if defined(_MSC_VER)
22
23#define BIG_CONSTANT(x) (x)
24
25// Other compilers
26
27#else // defined(_MSC_VER)
28
29#define BIG_CONSTANT(x) (x##LLU)
30
31#endif // !defined(_MSC_VER)
32
33
34uint32_t MurmurHash2(const void * key, int len, uint32_t seed)
35{
36 // 'm' and 'r' are mixing constants generated offline.
37 // They're not really 'magic', they just happen to work well.
38
39 const uint32_t m = 0x5bd1e995;
40 const int r = 24;
41
42 // Initialize the hash to a 'random' value
43
44 uint32_t h = seed ^ len;
45
46 // Mix 4 bytes at a time into the hash
47
48 const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
49
50 while (len >= 4)
51 {
52 uint32_t k;
53 memcpy(&k, data, sizeof(k));
54 k *= m;
55 k ^= k >> r;
56 k *= m;
57
58 h *= m;
59 h ^= k;
60
61 data += 4;
62 len -= 4;
63 }
64
65 // Handle the last few bytes of the input array
66
67 switch (len)
68 {
69 case 3: h ^= data[2] << 16;
70 case 2: h ^= data[1] << 8;
71 case 1: h ^= data[0];
72 h *= m;
73 };
74
75 // Do a few final mixes of the hash to ensure the last few
76 // bytes are well-incorporated.
77
78 h ^= h >> 13;
79 h *= m;
80 h ^= h >> 15;
81
82 return h;
83}
84
85// MurmurHash2, 64-bit versions, by Austin Appleby
86
87// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
88// and endian-ness issues if used across multiple platforms.
89
90// 64-bit hash for 64-bit platforms
91
92uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
93{
94 const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
95 const int r = 47;
96
97 uint64_t h = seed ^ (len * m);
98
99 const uint64_t * data = reinterpret_cast<const uint64_t *>(key);
100 const uint64_t * end = data + (len/8);
101
102 while (data != end)
103 {
104 uint64_t k = *data++;
105
106 k *= m;
107 k ^= k >> r;
108 k *= m;
109
110 h ^= k;
111 h *= m;
112 }
113
114 const unsigned char * data2 = reinterpret_cast<const unsigned char *>(data);
115
116 switch (len & 7)
117 {
118 case 7: h ^= static_cast<uint64_t>(data2[6]) << 48;
119 case 6: h ^= static_cast<uint64_t>(data2[5]) << 40;
120 case 5: h ^= static_cast<uint64_t>(data2[4]) << 32;
121 case 4: h ^= static_cast<uint64_t>(data2[3]) << 24;
122 case 3: h ^= static_cast<uint64_t>(data2[2]) << 16;
123 case 2: h ^= static_cast<uint64_t>(data2[1]) << 8;
124 case 1: h ^= static_cast<uint64_t>(data2[0]);
125 h *= m;
126 };
127
128 h ^= h >> r;
129 h *= m;
130 h ^= h >> r;
131
132 return h;
133}
134
135
136// 64-bit hash for 32-bit platforms
137
138uint64_t MurmurHash64B(const void * key, int len, uint64_t seed)
139{
140 const uint32_t m = 0x5bd1e995;
141 const int r = 24;
142
143 uint32_t h1 = static_cast<uint32_t>(seed) ^ len;
144 uint32_t h2 = static_cast<uint32_t>(seed >> 32);
145
146 const uint32_t * data = reinterpret_cast<const uint32_t *>(key);
147
148 while (len >= 8)
149 {
150 uint32_t k1 = *data++;
151 k1 *= m; k1 ^= k1 >> r; k1 *= m;
152 h1 *= m; h1 ^= k1;
153 len -= 4;
154
155 uint32_t k2 = *data++;
156 k2 *= m; k2 ^= k2 >> r; k2 *= m;
157 h2 *= m; h2 ^= k2;
158 len -= 4;
159 }
160
161 if (len >= 4)
162 {
163 uint32_t k1 = *data++;
164 k1 *= m; k1 ^= k1 >> r; k1 *= m;
165 h1 *= m; h1 ^= k1;
166 len -= 4;
167 }
168
169 switch (len)
170 {
171 case 3: h2 ^= reinterpret_cast<const unsigned char *>(data)[2] << 16;
172 case 2: h2 ^= reinterpret_cast<const unsigned char *>(data)[1] << 8;
173 case 1: h2 ^= reinterpret_cast<const unsigned char *>(data)[0];
174 h2 *= m;
175 };
176
177 h1 ^= h2 >> 18; h1 *= m;
178 h2 ^= h1 >> 22; h2 *= m;
179 h1 ^= h2 >> 17; h1 *= m;
180 h2 ^= h1 >> 19; h2 *= m;
181
182 uint64_t h = h1;
183
184 h = (h << 32) | h2;
185
186 return h;
187}
188
189// MurmurHash2A, by Austin Appleby
190
191// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
192// construction. Bulk speed should be identical to Murmur2, small-key speed
193// will be 10%-20% slower due to the added overhead at the end of the hash.
194
195// This variant fixes a minor issue where null keys were more likely to
196// collide with each other than expected, and also makes the function
197// more amenable to incremental implementations.
198
199#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
200
201uint32_t MurmurHash2A(const void * key, int len, uint32_t seed)
202{
203 const uint32_t m = 0x5bd1e995;
204 const int r = 24;
205 uint32_t l = len;
206
207 const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
208
209 uint32_t h = seed;
210
211 while (len >= 4)
212 {
213 uint32_t k = *reinterpret_cast<const uint32_t *>(data);
214 mmix(h,k);
215 data += 4;
216 len -= 4;
217 }
218
219 uint32_t t = 0;
220
221 switch (len)
222 {
223 case 3: t ^= data[2] << 16;
224 case 2: t ^= data[1] << 8;
225 case 1: t ^= data[0];
226 };
227
228 mmix(h,t);
229 mmix(h,l);
230
231 h ^= h >> 13;
232 h *= m;
233 h ^= h >> 15;
234
235 return h;
236}
237
238// MurmurHashNeutral2, by Austin Appleby
239
240// Same as MurmurHash2, but endian- and alignment-neutral.
241// Half the speed though, alas.
242
243uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed)
244{
245 const uint32_t m = 0x5bd1e995;
246 const int r = 24;
247
248 uint32_t h = seed ^ len;
249
250 const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
251
252 while (len >= 4)
253 {
254 uint32_t k;
255
256 k = data[0];
257 k |= data[1] << 8;
258 k |= data[2] << 16;
259 k |= data[3] << 24;
260
261 k *= m;
262 k ^= k >> r;
263 k *= m;
264
265 h *= m;
266 h ^= k;
267
268 data += 4;
269 len -= 4;
270 }
271
272 switch (len)
273 {
274 case 3: h ^= data[2] << 16;
275 case 2: h ^= data[1] << 8;
276 case 1: h ^= data[0];
277 h *= m;
278 };
279
280 h ^= h >> 13;
281 h *= m;
282 h ^= h >> 15;
283
284 return h;
285}
286
287//-----------------------------------------------------------------------------
288// MurmurHashAligned2, by Austin Appleby
289
290// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
291// on certain platforms.
292
293// Performance will be lower than MurmurHash2
294
295#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
296
297
298uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed)
299{
300 const uint32_t m = 0x5bd1e995;
301 const int r = 24;
302
303 const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
304
305 uint32_t h = seed ^ len;
306
307 int align = reinterpret_cast<uint64_t>(data) & 3;
308
309 if (align && (len >= 4))
310 {
311 // Pre-load the temp registers
312
313 uint32_t t = 0, d = 0;
314
315 switch (align)
316 {
317 case 1: t |= data[2] << 16;
318 case 2: t |= data[1] << 8;
319 case 3: t |= data[0];
320 }
321
322 t <<= (8 * align);
323
324 data += 4-align;
325 len -= 4-align;
326
327 int sl = 8 * (4-align);
328 int sr = 8 * align;
329
330 // Mix
331
332 while (len >= 4)
333 {
334 d = *(reinterpret_cast<const uint32_t *>(data));
335 t = (t >> sr) | (d << sl);
336
337 uint32_t k = t;
338
339 MIX(h,k,m);
340
341 t = d;
342
343 data += 4;
344 len -= 4;
345 }
346
347 // Handle leftover data in temp registers
348
349 d = 0;
350
351 if (len >= align)
352 {
353 switch (align)
354 {
355 case 3: d |= data[2] << 16;
356 case 2: d |= data[1] << 8;
357 case 1: d |= data[0];
358 }
359
360 uint32_t k = (t >> sr) | (d << sl);
361 MIX(h,k,m);
362
363 data += align;
364 len -= align;
365
366 //----------
367 // Handle tail bytes
368
369 switch (len)
370 {
371 case 3: h ^= data[2] << 16;
372 case 2: h ^= data[1] << 8;
373 case 1: h ^= data[0];
374 h *= m;
375 };
376 }
377 else
378 {
379 switch (len)
380 {
381 case 3: d |= data[2] << 16;
382 case 2: d |= data[1] << 8;
383 case 1: d |= data[0];
384 case 0: h ^= (t >> sr) | (d << sl);
385 h *= m;
386 }
387 }
388
389 h ^= h >> 13;
390 h *= m;
391 h ^= h >> 15;
392
393 return h;
394 }
395 else
396 {
397 while (len >= 4)
398 {
399 uint32_t k = *reinterpret_cast<const uint32_t *>(data);
400
401 MIX(h,k,m);
402
403 data += 4;
404 len -= 4;
405 }
406
407 // Handle tail bytes
408
409 switch (len)
410 {
411 case 3: h ^= data[2] << 16;
412 case 2: h ^= data[1] << 8;
413 case 1: h ^= data[0];
414 h *= m;
415 };
416
417 h ^= h >> 13;
418 h *= m;
419 h ^= h >> 15;
420
421 return h;
422 }
423}
424