murmurhash2.cpp source code [ClickHouse/contrib/murmurhash/src/murmurhash2.cpp]

1	// MurmurHash2 was written by Austin Appleby, and is placed in the public
2	// domain. The author hereby disclaims copyright to this source code.
3
4	// Note - This code makes a few assumptions about how your machine behaves -
5
6	// 1. We can read a 4-byte value from any address without crashing
7	// 2. sizeof(int) == 4
8
9	// And it has a few limitations -
10
11	// 1. It will not work incrementally.
12	// 2. It will not produce the same results on little-endian and big-endian
13	// machines.
14
15	#include "murmurhash2.h"
16	#include <cstring>
17
18	// Platform-specific functions and macros
19	// Microsoft Visual Studio
20
21	#if defined(_MSC_VER)
22
23	#define BIG_CONSTANT(x) (x)
24
25	// Other compilers
26
27	#else // defined(_MSC_VER)
28
29	#define BIG_CONSTANT(x) (x##LLU)
30
31	#endif // !defined(_MSC_VER)
32
33
34	uint32_t MurmurHash2(const void * key, int len, uint32_t seed)
35	{
36	// 'm' and 'r' are mixing constants generated offline.
37	// They're not really 'magic', they just happen to work well.
38
39	const uint32_t m = `0x5bd1e995`;
40	const int r = `24`;
41
42	// Initialize the hash to a 'random' value
43
44	uint32_t h = seed ^ len;
45
46	// Mix 4 bytes at a time into the hash
47
48	const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
49
50	while (len >= `4`)
51	{
52	uint32_t k;
53	memcpy(&k, data, sizeof(k));
54	k *= m;
55	k ^= k >> r;
56	k *= m;
57
58	h *= m;
59	h ^= k;
60
61	data += `4`;
62	len -= `4`;
63	}
64
65	// Handle the last few bytes of the input array
66
67	switch (len)
68	{
69	case `3`: h ^= data[`2`] << `16`;
70	case `2`: h ^= data[`1`] << `8`;
71	case `1`: h ^= data[`0`];
72	h *= m;
73	};
74
75	// Do a few final mixes of the hash to ensure the last few
76	// bytes are well-incorporated.
77
78	h ^= h >> `13`;
79	h *= m;
80	h ^= h >> `15`;
81
82	return h;
83	}
84
85	// MurmurHash2, 64-bit versions, by Austin Appleby
86
87	// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
88	// and endian-ness issues if used across multiple platforms.
89
90	// 64-bit hash for 64-bit platforms
91
92	uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
93	{
94	const uint64_t m = BIG_CONSTANT(`0xc6a4a7935bd1e995`);
95	const int r = `47`;
96
97	uint64_t h = seed ^ (len * m);
98
99	const uint64_t * data = reinterpret_cast<const uint64_t *>(key);
100	const uint64_t * end = data + (len/`8`);
101
102	while (data != end)
103	{
104	uint64_t k = *data++;
105
106	k *= m;
107	k ^= k >> r;
108	k *= m;
109
110	h ^= k;
111	h *= m;
112	}
113
114	const unsigned char * data2 = reinterpret_cast<const unsigned char *>(data);
115
116	switch (len & `7`)
117	{
118	case `7`: h ^= static_cast<uint64_t>(data2[`6`]) << `48`;
119	case `6`: h ^= static_cast<uint64_t>(data2[`5`]) << `40`;
120	case `5`: h ^= static_cast<uint64_t>(data2[`4`]) << `32`;
121	case `4`: h ^= static_cast<uint64_t>(data2[`3`]) << `24`;
122	case `3`: h ^= static_cast<uint64_t>(data2[`2`]) << `16`;
123	case `2`: h ^= static_cast<uint64_t>(data2[`1`]) << `8`;
124	case `1`: h ^= static_cast<uint64_t>(data2[`0`]);
125	h *= m;
126	};
127
128	h ^= h >> r;
129	h *= m;
130	h ^= h >> r;
131
132	return h;
133	}
134
135
136	// 64-bit hash for 32-bit platforms
137
138	uint64_t MurmurHash64B(const void * key, int len, uint64_t seed)
139	{
140	const uint32_t m = `0x5bd1e995`;
141	const int r = `24`;
142
143	uint32_t h1 = static_cast<uint32_t>(seed) ^ len;
144	uint32_t h2 = static_cast<uint32_t>(seed >> `32`);
145
146	const uint32_t * data = reinterpret_cast<const uint32_t *>(key);
147
148	while (len >= `8`)
149	{
150	uint32_t k1 = *data++;
151	k1 = m; k1 ^= k1 >> r; k1 = m;
152	h1 *= m; h1 ^= k1;
153	len -= `4`;
154
155	uint32_t k2 = *data++;
156	k2 = m; k2 ^= k2 >> r; k2 = m;
157	h2 *= m; h2 ^= k2;
158	len -= `4`;
159	}
160
161	if (len >= `4`)
162	{
163	uint32_t k1 = *data++;
164	k1 = m; k1 ^= k1 >> r; k1 = m;
165	h1 *= m; h1 ^= k1;
166	len -= `4`;
167	}
168
169	switch (len)
170	{
171	case `3`: h2 ^= reinterpret_cast<const unsigned char *>(data)[`2`] << `16`;
172	case `2`: h2 ^= reinterpret_cast<const unsigned char *>(data)[`1`] << `8`;
173	case `1`: h2 ^= reinterpret_cast<const unsigned char *>(data)[`0`];
174	h2 *= m;
175	};
176
177	h1 ^= h2 >> `18`; h1 *= m;
178	h2 ^= h1 >> `22`; h2 *= m;
179	h1 ^= h2 >> `17`; h1 *= m;
180	h2 ^= h1 >> `19`; h2 *= m;
181
182	uint64_t h = h1;
183
184	h = (h << `32`) \| h2;
185
186	return h;
187	}
188
189	// MurmurHash2A, by Austin Appleby
190
191	// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
192	// construction. Bulk speed should be identical to Murmur2, small-key speed
193	// will be 10%-20% slower due to the added overhead at the end of the hash.
194
195	// This variant fixes a minor issue where null keys were more likely to
196	// collide with each other than expected, and also makes the function
197	// more amenable to incremental implementations.
198
199	#define mmix(h,k) { k = m; k ^= k >> r; k = m; h *= m; h ^= k; }
200
201	uint32_t MurmurHash2A(const void * key, int len, uint32_t seed)
202	{
203	const uint32_t m = `0x5bd1e995`;
204	const int r = `24`;
205	uint32_t l = len;
206
207	const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
208
209	uint32_t h = seed;
210
211	while (len >= `4`)
212	{
213	uint32_t k = *reinterpret_cast<const uint32_t *>(data);
214	mmix(h,k);
215	data += `4`;
216	len -= `4`;
217	}
218
219	uint32_t t = `0`;
220
221	switch (len)
222	{
223	case `3`: t ^= data[`2`] << `16`;
224	case `2`: t ^= data[`1`] << `8`;
225	case `1`: t ^= data[`0`];
226	};
227
228	mmix(h,t);
229	mmix(h,l);
230
231	h ^= h >> `13`;
232	h *= m;
233	h ^= h >> `15`;
234
235	return h;
236	}
237
238	// MurmurHashNeutral2, by Austin Appleby
239
240	// Same as MurmurHash2, but endian- and alignment-neutral.
241	// Half the speed though, alas.
242
243	uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed)
244	{
245	const uint32_t m = `0x5bd1e995`;
246	const int r = `24`;
247
248	uint32_t h = seed ^ len;
249
250	const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
251
252	while (len >= `4`)
253	{
254	uint32_t k;
255
256	k = data[`0`];
257	k \|= data[`1`] << `8`;
258	k \|= data[`2`] << `16`;
259	k \|= data[`3`] << `24`;
260
261	k *= m;
262	k ^= k >> r;
263	k *= m;
264
265	h *= m;
266	h ^= k;
267
268	data += `4`;
269	len -= `4`;
270	}
271
272	switch (len)
273	{
274	case `3`: h ^= data[`2`] << `16`;
275	case `2`: h ^= data[`1`] << `8`;
276	case `1`: h ^= data[`0`];
277	h *= m;
278	};
279
280	h ^= h >> `13`;
281	h *= m;
282	h ^= h >> `15`;
283
284	return h;
285	}
286
287	//-----------------------------------------------------------------------------
288	// MurmurHashAligned2, by Austin Appleby
289
290	// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
291	// on certain platforms.
292
293	// Performance will be lower than MurmurHash2
294
295	#define MIX(h,k,m) { k = m; k ^= k >> r; k = m; h *= m; h ^= k; }
296
297
298	uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed)
299	{
300	const uint32_t m = `0x5bd1e995`;
301	const int r = `24`;
302
303	const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
304
305	uint32_t h = seed ^ len;
306
307	int align = reinterpret_cast<uint64_t>(data) & `3`;
308
309	if (align && (len >= `4`))
310	{
311	// Pre-load the temp registers
312
313	uint32_t t = `0`, d = `0`;
314
315	switch (align)
316	{
317	case `1`: t \|= data[`2`] << `16`;
318	case `2`: t \|= data[`1`] << `8`;
319	case `3`: t \|= data[`0`];
320	}
321
322	t <<= (`8` * align);
323
324	data += `4`-align;
325	len -= `4`-align;
326
327	int sl = `8` * (`4`-align);
328	int sr = `8` * align;
329
330	// Mix
331
332	while (len >= `4`)
333	{
334	d = (reinterpret_cast<const* uint32_t *>(data));
335	t = (t >> sr) \| (d << sl);
336
337	uint32_t k = t;
338
339	MIX(h,k,m);
340
341	t = d;
342
343	data += `4`;
344	len -= `4`;
345	}
346
347	// Handle leftover data in temp registers
348
349	d = `0`;
350
351	if (len >= align)
352	{
353	switch (align)
354	{
355	case `3`: d \|= data[`2`] << `16`;
356	case `2`: d \|= data[`1`] << `8`;
357	case `1`: d \|= data[`0`];
358	}
359
360	uint32_t k = (t >> sr) \| (d << sl);
361	MIX(h,k,m);
362
363	data += align;
364	len -= align;
365
366	//----------
367	// Handle tail bytes
368
369	switch (len)
370	{
371	case `3`: h ^= data[`2`] << `16`;
372	case `2`: h ^= data[`1`] << `8`;
373	case `1`: h ^= data[`0`];
374	h *= m;
375	};
376	}
377	else
378	{
379	switch (len)
380	{
381	case `3`: d \|= data[`2`] << `16`;
382	case `2`: d \|= data[`1`] << `8`;
383	case `1`: d \|= data[`0`];
384	case `0`: h ^= (t >> sr) \| (d << sl);
385	h *= m;
386	}
387	}
388
389	h ^= h >> `13`;
390	h *= m;
391	h ^= h >> `15`;
392
393	return h;
394	}
395	else
396	{
397	while (len >= `4`)
398	{
399	uint32_t k = *reinterpret_cast<const uint32_t *>(data);
400
401	MIX(h,k,m);
402
403	data += `4`;
404	len -= `4`;
405	}
406
407	// Handle tail bytes
408
409	switch (len)
410	{
411	case `3`: h ^= data[`2`] << `16`;
412	case `2`: h ^= data[`1`] << `8`;
413	case `1`: h ^= data[`0`];
414	h *= m;
415	};
416
417	h ^= h >> `13`;
418	h *= m;
419	h ^= h >> `15`;
420
421	return h;
422	}
423	}
424

Browse the source code of ClickHouse/contrib/murmurhash/src/murmurhash2.cpp