SpookyHashV1.cpp source code [folly/hash/SpookyHashV1.cpp]

1	/*
2	* Copyright 2017-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	// Spooky Hash
18	// A 128-bit noncryptographic hash, for checksums and table lookup
19	// By Bob Jenkins. Public domain.
20	// Oct 31 2010: published framework, disclaimer ShortHash isn't right
21	// Nov 7 2010: disabled ShortHash
22	// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
23	// April 10 2012: buffer overflow on platforms without unaligned reads
24	// July 12 2012: was passing out variables in final to in/out in short
25	// July 30 2012: I reintroduced the buffer overflow
26
27	#include <folly/hash/SpookyHashV1.h>
28
29	#include <folly/CppAttributes.h>
30
31	#include <cstring>
32
33	#define ALLOW_UNALIGNED_READS 1
34
35	namespace folly {
36	namespace hash {
37
38	// clang-format off
39
40	//
41	// short hash ... it could be used on any message,
42	// but it's used by Spooky just for short messages.
43	//
44	void SpookyHashV1::Short(
45	const void* message,
46	size_t length,
47	uint64_t *hash1,
48	uint64_t *hash2)
49	{
50	uint64_t buf[`2`*sc_numVars];
51	union
52	{
53	const uint8_t *p8;
54	uint32_t *p32;
55	uint64_t *p64;
56	size_t i;
57	} u;
58
59	u.p8 = (const uint8_t *)message;
60
61	if (!ALLOW_UNALIGNED_READS && (u.i & `0x7`))
62	{
63	memcpy(buf, message, length);
64	u.p64 = buf;
65	}
66
67	size_t remainder = length%`32`;
68	uint64_t a=*hash1;
69	uint64_t b=*hash2;
70	uint64_t c=sc_const;
71	uint64_t d=sc_const;
72
73	if (length > `15`)
74	{
75	const uint64_t end = u.p64 + (length/`32`)`4`;
76
77	// handle all complete sets of 32 bytes
78	for (; u.p64 < end; u.p64 += `4`)
79	{
80	c += u.p64[`0`];
81	d += u.p64[`1`];
82	ShortMix(a,b,c,d);
83	a += u.p64[`2`];
84	b += u.p64[`3`];
85	}
86
87	//Handle the case of 16+ remaining bytes.
88	if (remainder >= `16`)
89	{
90	c += u.p64[`0`];
91	d += u.p64[`1`];
92	ShortMix(a,b,c,d);
93	u.p64 += `2`;
94	remainder -= `16`;
95	}
96	}
97
98	// Handle the last 0..15 bytes, and its length
99	d = ((uint64_t)length) << `56`;
100	switch (remainder)
101	{
102	case `15`:
103	d += ((uint64_t)u.p8[`14`]) << `48`;
104	FOLLY_FALLTHROUGH;
105	case `14`:
106	d += ((uint64_t)u.p8[`13`]) << `40`;
107	FOLLY_FALLTHROUGH;
108	case `13`:
109	d += ((uint64_t)u.p8[`12`]) << `32`;
110	FOLLY_FALLTHROUGH;
111	case `12`:
112	d += u.p32[`2`];
113	c += u.p64[`0`];
114	break;
115	case `11`:
116	d += ((uint64_t)u.p8[`10`]) << `16`;
117	FOLLY_FALLTHROUGH;
118	case `10`:
119	d += ((uint64_t)u.p8[`9`]) << `8`;
120	FOLLY_FALLTHROUGH;
121	case `9`:
122	d += (uint64_t)u.p8[`8`];
123	FOLLY_FALLTHROUGH;
124	case `8`:
125	c += u.p64[`0`];
126	break;
127	case `7`:
128	c += ((uint64_t)u.p8[`6`]) << `48`;
129	FOLLY_FALLTHROUGH;
130	case `6`:
131	c += ((uint64_t)u.p8[`5`]) << `40`;
132	FOLLY_FALLTHROUGH;
133	case `5`:
134	c += ((uint64_t)u.p8[`4`]) << `32`;
135	FOLLY_FALLTHROUGH;
136	case `4`:
137	c += u.p32[`0`];
138	break;
139	case `3`:
140	c += ((uint64_t)u.p8[`2`]) << `16`;
141	FOLLY_FALLTHROUGH;
142	case `2`:
143	c += ((uint64_t)u.p8[`1`]) << `8`;
144	FOLLY_FALLTHROUGH;
145	case `1`:
146	c += (uint64_t)u.p8[`0`];
147	break;
148	case `0`:
149	c += sc_const;
150	d += sc_const;
151	}
152	ShortEnd(a,b,c,d);
153	*hash1 = a;
154	*hash2 = b;
155	}
156
157
158
159
160	// do the whole hash in one call
161	void SpookyHashV1::Hash128(
162	const void *message,
163	size_t length,
164	uint64_t *hash1,
165	uint64_t *hash2)
166	{
167	if (length < sc_bufSize)
168	{
169	Short(message, length, hash1, hash2);
170	return;
171	}
172
173	uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
174	uint64_t buf[sc_numVars];
175	uint64_t *end;
176	union
177	{
178	const uint8_t *p8;
179	uint64_t *p64;
180	size_t i;
181	} u;
182	size_t remainder;
183
184	h0=h3=h6=h9 = *hash1;
185	h1=h4=h7=h10 = *hash2;
186	h2=h5=h8=h11 = sc_const;
187
188	u.p8 = (const uint8_t *)message;
189	end = u.p64 + (length/sc_blockSize)*sc_numVars;
190
191	// handle all whole sc_blockSize blocks of bytes
192	if (ALLOW_UNALIGNED_READS \|\| ((u.i & `0x7`) == `0`))
193	{
194	while (u.p64 < end)
195	{
196	Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
197	u.p64 += sc_numVars;
198	}
199	}
200	else
201	{
202	while (u.p64 < end)
203	{
204	memcpy(buf, u.p64, sc_blockSize);
205	Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
206	u.p64 += sc_numVars;
207	}
208	}
209
210	// handle the last partial block of sc_blockSize bytes
211	remainder = (length - ((const uint8_t )end-(const* uint8_t *)message));
212	memcpy(buf, end, remainder);
213	memset(((uint8_t *)buf)+remainder, `0`, sc_blockSize-remainder);
214	((uint8_t*)buf)[sc_blockSize - `1`] = uint8_t(remainder);
215	Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
216
217	// do some final mixing
218	End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
219	*hash1 = h0;
220	*hash2 = h1;
221	}
222
223
224
225	// init spooky state
226	void SpookyHashV1::Init(uint64_t seed1, uint64_t seed2)
227	{
228	m_length = `0`;
229	m_remainder = `0`;
230	m_state[`0`] = seed1;
231	m_state[`1`] = seed2;
232	}
233
234
235	// add a message fragment to the state
236	void SpookyHashV1::Update(const void *message, size_t length)
237	{
238	uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
239	size_t newLength = length + m_remainder;
240	uint8_t remainder;
241	union
242	{
243	const uint8_t *p8;
244	uint64_t *p64;
245	size_t i;
246	} u;
247	const uint64_t *end;
248
249	// Is this message fragment too short? If it is, stuff it away.
250	if (newLength < sc_bufSize)
251	{
252	memcpy(&((uint8_t *)m_data)[m_remainder], message, length);
253	m_length = length + m_length;
254	m_remainder = (uint8_t)newLength;
255	return;
256	}
257
258	// init the variables
259	if (m_length < sc_bufSize)
260	{
261	h0=h3=h6=h9 = m_state[`0`];
262	h1=h4=h7=h10 = m_state[`1`];
263	h2=h5=h8=h11 = sc_const;
264	}
265	else
266	{
267	h0 = m_state[`0`];
268	h1 = m_state[`1`];
269	h2 = m_state[`2`];
270	h3 = m_state[`3`];
271	h4 = m_state[`4`];
272	h5 = m_state[`5`];
273	h6 = m_state[`6`];
274	h7 = m_state[`7`];
275	h8 = m_state[`8`];
276	h9 = m_state[`9`];
277	h10 = m_state[`10`];
278	h11 = m_state[`11`];
279	}
280	m_length = length + m_length;
281
282	// if we've got anything stuffed away, use it now
283	if (m_remainder)
284	{
285	uint8_t prefix = sc_bufSize-m_remainder;
286	memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix);
287	u.p64 = m_data;
288	Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
289	Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
290	u.p8 = ((const uint8_t *)message) + prefix;
291	length -= prefix;
292	}
293	else
294	{
295	u.p8 = (const uint8_t *)message;
296	}
297
298	// handle all whole blocks of sc_blockSize bytes
299	end = u.p64 + (length/sc_blockSize)*sc_numVars;
300	remainder = (uint8_t)(length-((const uint8_t *)end-u.p8));
301	if (ALLOW_UNALIGNED_READS \|\| (u.i & `0x7`) == `0`)
302	{
303	while (u.p64 < end)
304	{
305	Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
306	u.p64 += sc_numVars;
307	}
308	}
309	else
310	{
311	while (u.p64 < end)
312	{
313	memcpy(m_data, u.p8, sc_blockSize);
314	Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
315	u.p64 += sc_numVars;
316	}
317	}
318
319	// stuff away the last few bytes
320	m_remainder = remainder;
321	memcpy(m_data, end, remainder);
322
323	// stuff away the variables
324	m_state[`0`] = h0;
325	m_state[`1`] = h1;
326	m_state[`2`] = h2;
327	m_state[`3`] = h3;
328	m_state[`4`] = h4;
329	m_state[`5`] = h5;
330	m_state[`6`] = h6;
331	m_state[`7`] = h7;
332	m_state[`8`] = h8;
333	m_state[`9`] = h9;
334	m_state[`10`] = h10;
335	m_state[`11`] = h11;
336	}
337
338
339	// report the hash for the concatenation of all message fragments so far
340	void SpookyHashV1::Final(uint64_t hash1, uint64_t hash2)
341	{
342	// init the variables
343	if (m_length < sc_bufSize)
344	{
345	*hash1 = m_state[`0`];
346	*hash2 = m_state[`1`];
347	Short( m_data, m_length, hash1, hash2);
348	return;
349	}
350
351	const uint64_t data = (const* uint64_t *)m_data;
352	uint8_t remainder = m_remainder;
353
354	uint64_t h0 = m_state[`0`];
355	uint64_t h1 = m_state[`1`];
356	uint64_t h2 = m_state[`2`];
357	uint64_t h3 = m_state[`3`];
358	uint64_t h4 = m_state[`4`];
359	uint64_t h5 = m_state[`5`];
360	uint64_t h6 = m_state[`6`];
361	uint64_t h7 = m_state[`7`];
362	uint64_t h8 = m_state[`8`];
363	uint64_t h9 = m_state[`9`];
364	uint64_t h10 = m_state[`10`];
365	uint64_t h11 = m_state[`11`];
366
367	if (remainder >= sc_blockSize)
368	{
369	// m_data can contain two blocks; handle any whole first block
370	Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
371	data += sc_numVars;
372	remainder -= sc_blockSize;
373	}
374
375	// mix in the last partial block, and the length mod sc_blockSize
376	memset(&((uint8_t *)data)[remainder], `0`, (sc_blockSize-remainder));
377
378	((uint8_t *)data)[sc_blockSize-`1`] = remainder;
379	Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
380
381	// do some final mixing
382	End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
383
384	*hash1 = h0;
385	*hash2 = h1;
386	}
387
388	// clang-format on
389
390	} // namespace hash
391	} // namespace folly
392

Browse the source code of folly/hash/SpookyHashV1.cpp