1/*
2 * Copyright 2017-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Spooky Hash
18// A 128-bit noncryptographic hash, for checksums and table lookup
19// By Bob Jenkins. Public domain.
20// Oct 31 2010: published framework, disclaimer ShortHash isn't right
21// Nov 7 2010: disabled ShortHash
22// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
23// April 10 2012: buffer overflow on platforms without unaligned reads
24// July 12 2012: was passing out variables in final to in/out in short
25// July 30 2012: I reintroduced the buffer overflow
26
27#include <folly/hash/SpookyHashV1.h>
28
29#include <folly/CppAttributes.h>
30
31#include <cstring>
32
33#define ALLOW_UNALIGNED_READS 1
34
35namespace folly {
36namespace hash {
37
38// clang-format off
39
40//
41// short hash ... it could be used on any message,
42// but it's used by Spooky just for short messages.
43//
44void SpookyHashV1::Short(
45 const void* message,
46 size_t length,
47 uint64_t *hash1,
48 uint64_t *hash2)
49{
50 uint64_t buf[2*sc_numVars];
51 union
52 {
53 const uint8_t *p8;
54 uint32_t *p32;
55 uint64_t *p64;
56 size_t i;
57 } u;
58
59 u.p8 = (const uint8_t *)message;
60
61 if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
62 {
63 memcpy(buf, message, length);
64 u.p64 = buf;
65 }
66
67 size_t remainder = length%32;
68 uint64_t a=*hash1;
69 uint64_t b=*hash2;
70 uint64_t c=sc_const;
71 uint64_t d=sc_const;
72
73 if (length > 15)
74 {
75 const uint64_t *end = u.p64 + (length/32)*4;
76
77 // handle all complete sets of 32 bytes
78 for (; u.p64 < end; u.p64 += 4)
79 {
80 c += u.p64[0];
81 d += u.p64[1];
82 ShortMix(a,b,c,d);
83 a += u.p64[2];
84 b += u.p64[3];
85 }
86
87 //Handle the case of 16+ remaining bytes.
88 if (remainder >= 16)
89 {
90 c += u.p64[0];
91 d += u.p64[1];
92 ShortMix(a,b,c,d);
93 u.p64 += 2;
94 remainder -= 16;
95 }
96 }
97
98 // Handle the last 0..15 bytes, and its length
99 d = ((uint64_t)length) << 56;
100 switch (remainder)
101 {
102 case 15:
103 d += ((uint64_t)u.p8[14]) << 48;
104 FOLLY_FALLTHROUGH;
105 case 14:
106 d += ((uint64_t)u.p8[13]) << 40;
107 FOLLY_FALLTHROUGH;
108 case 13:
109 d += ((uint64_t)u.p8[12]) << 32;
110 FOLLY_FALLTHROUGH;
111 case 12:
112 d += u.p32[2];
113 c += u.p64[0];
114 break;
115 case 11:
116 d += ((uint64_t)u.p8[10]) << 16;
117 FOLLY_FALLTHROUGH;
118 case 10:
119 d += ((uint64_t)u.p8[9]) << 8;
120 FOLLY_FALLTHROUGH;
121 case 9:
122 d += (uint64_t)u.p8[8];
123 FOLLY_FALLTHROUGH;
124 case 8:
125 c += u.p64[0];
126 break;
127 case 7:
128 c += ((uint64_t)u.p8[6]) << 48;
129 FOLLY_FALLTHROUGH;
130 case 6:
131 c += ((uint64_t)u.p8[5]) << 40;
132 FOLLY_FALLTHROUGH;
133 case 5:
134 c += ((uint64_t)u.p8[4]) << 32;
135 FOLLY_FALLTHROUGH;
136 case 4:
137 c += u.p32[0];
138 break;
139 case 3:
140 c += ((uint64_t)u.p8[2]) << 16;
141 FOLLY_FALLTHROUGH;
142 case 2:
143 c += ((uint64_t)u.p8[1]) << 8;
144 FOLLY_FALLTHROUGH;
145 case 1:
146 c += (uint64_t)u.p8[0];
147 break;
148 case 0:
149 c += sc_const;
150 d += sc_const;
151 }
152 ShortEnd(a,b,c,d);
153 *hash1 = a;
154 *hash2 = b;
155}
156
157
158
159
160// do the whole hash in one call
161void SpookyHashV1::Hash128(
162 const void *message,
163 size_t length,
164 uint64_t *hash1,
165 uint64_t *hash2)
166{
167 if (length < sc_bufSize)
168 {
169 Short(message, length, hash1, hash2);
170 return;
171 }
172
173 uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
174 uint64_t buf[sc_numVars];
175 uint64_t *end;
176 union
177 {
178 const uint8_t *p8;
179 uint64_t *p64;
180 size_t i;
181 } u;
182 size_t remainder;
183
184 h0=h3=h6=h9 = *hash1;
185 h1=h4=h7=h10 = *hash2;
186 h2=h5=h8=h11 = sc_const;
187
188 u.p8 = (const uint8_t *)message;
189 end = u.p64 + (length/sc_blockSize)*sc_numVars;
190
191 // handle all whole sc_blockSize blocks of bytes
192 if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
193 {
194 while (u.p64 < end)
195 {
196 Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
197 u.p64 += sc_numVars;
198 }
199 }
200 else
201 {
202 while (u.p64 < end)
203 {
204 memcpy(buf, u.p64, sc_blockSize);
205 Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
206 u.p64 += sc_numVars;
207 }
208 }
209
210 // handle the last partial block of sc_blockSize bytes
211 remainder = (length - ((const uint8_t *)end-(const uint8_t *)message));
212 memcpy(buf, end, remainder);
213 memset(((uint8_t *)buf)+remainder, 0, sc_blockSize-remainder);
214 ((uint8_t*)buf)[sc_blockSize - 1] = uint8_t(remainder);
215 Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
216
217 // do some final mixing
218 End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
219 *hash1 = h0;
220 *hash2 = h1;
221}
222
223
224
225// init spooky state
226void SpookyHashV1::Init(uint64_t seed1, uint64_t seed2)
227{
228 m_length = 0;
229 m_remainder = 0;
230 m_state[0] = seed1;
231 m_state[1] = seed2;
232}
233
234
235// add a message fragment to the state
236void SpookyHashV1::Update(const void *message, size_t length)
237{
238 uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
239 size_t newLength = length + m_remainder;
240 uint8_t remainder;
241 union
242 {
243 const uint8_t *p8;
244 uint64_t *p64;
245 size_t i;
246 } u;
247 const uint64_t *end;
248
249 // Is this message fragment too short? If it is, stuff it away.
250 if (newLength < sc_bufSize)
251 {
252 memcpy(&((uint8_t *)m_data)[m_remainder], message, length);
253 m_length = length + m_length;
254 m_remainder = (uint8_t)newLength;
255 return;
256 }
257
258 // init the variables
259 if (m_length < sc_bufSize)
260 {
261 h0=h3=h6=h9 = m_state[0];
262 h1=h4=h7=h10 = m_state[1];
263 h2=h5=h8=h11 = sc_const;
264 }
265 else
266 {
267 h0 = m_state[0];
268 h1 = m_state[1];
269 h2 = m_state[2];
270 h3 = m_state[3];
271 h4 = m_state[4];
272 h5 = m_state[5];
273 h6 = m_state[6];
274 h7 = m_state[7];
275 h8 = m_state[8];
276 h9 = m_state[9];
277 h10 = m_state[10];
278 h11 = m_state[11];
279 }
280 m_length = length + m_length;
281
282 // if we've got anything stuffed away, use it now
283 if (m_remainder)
284 {
285 uint8_t prefix = sc_bufSize-m_remainder;
286 memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix);
287 u.p64 = m_data;
288 Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
289 Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
290 u.p8 = ((const uint8_t *)message) + prefix;
291 length -= prefix;
292 }
293 else
294 {
295 u.p8 = (const uint8_t *)message;
296 }
297
298 // handle all whole blocks of sc_blockSize bytes
299 end = u.p64 + (length/sc_blockSize)*sc_numVars;
300 remainder = (uint8_t)(length-((const uint8_t *)end-u.p8));
301 if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
302 {
303 while (u.p64 < end)
304 {
305 Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
306 u.p64 += sc_numVars;
307 }
308 }
309 else
310 {
311 while (u.p64 < end)
312 {
313 memcpy(m_data, u.p8, sc_blockSize);
314 Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
315 u.p64 += sc_numVars;
316 }
317 }
318
319 // stuff away the last few bytes
320 m_remainder = remainder;
321 memcpy(m_data, end, remainder);
322
323 // stuff away the variables
324 m_state[0] = h0;
325 m_state[1] = h1;
326 m_state[2] = h2;
327 m_state[3] = h3;
328 m_state[4] = h4;
329 m_state[5] = h5;
330 m_state[6] = h6;
331 m_state[7] = h7;
332 m_state[8] = h8;
333 m_state[9] = h9;
334 m_state[10] = h10;
335 m_state[11] = h11;
336}
337
338
339// report the hash for the concatenation of all message fragments so far
340void SpookyHashV1::Final(uint64_t *hash1, uint64_t *hash2)
341{
342 // init the variables
343 if (m_length < sc_bufSize)
344 {
345 *hash1 = m_state[0];
346 *hash2 = m_state[1];
347 Short( m_data, m_length, hash1, hash2);
348 return;
349 }
350
351 const uint64_t *data = (const uint64_t *)m_data;
352 uint8_t remainder = m_remainder;
353
354 uint64_t h0 = m_state[0];
355 uint64_t h1 = m_state[1];
356 uint64_t h2 = m_state[2];
357 uint64_t h3 = m_state[3];
358 uint64_t h4 = m_state[4];
359 uint64_t h5 = m_state[5];
360 uint64_t h6 = m_state[6];
361 uint64_t h7 = m_state[7];
362 uint64_t h8 = m_state[8];
363 uint64_t h9 = m_state[9];
364 uint64_t h10 = m_state[10];
365 uint64_t h11 = m_state[11];
366
367 if (remainder >= sc_blockSize)
368 {
369 // m_data can contain two blocks; handle any whole first block
370 Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
371 data += sc_numVars;
372 remainder -= sc_blockSize;
373 }
374
375 // mix in the last partial block, and the length mod sc_blockSize
376 memset(&((uint8_t *)data)[remainder], 0, (sc_blockSize-remainder));
377
378 ((uint8_t *)data)[sc_blockSize-1] = remainder;
379 Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
380
381 // do some final mixing
382 End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
383
384 *hash1 = h0;
385 *hash2 = h1;
386}
387
388// clang-format on
389
390} // namespace hash
391} // namespace folly
392