1 | /* |
2 | * Copyright 2017-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | // Spooky Hash |
18 | // A 128-bit noncryptographic hash, for checksums and table lookup |
19 | // By Bob Jenkins. Public domain. |
20 | // Oct 31 2010: published framework, disclaimer ShortHash isn't right |
21 | // Nov 7 2010: disabled ShortHash |
22 | // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again |
23 | // April 10 2012: buffer overflow on platforms without unaligned reads |
24 | // July 12 2012: was passing out variables in final to in/out in short |
25 | // July 30 2012: I reintroduced the buffer overflow |
26 | |
27 | #include <folly/hash/SpookyHashV1.h> |
28 | |
29 | #include <folly/CppAttributes.h> |
30 | |
31 | #include <cstring> |
32 | |
33 | #define ALLOW_UNALIGNED_READS 1 |
34 | |
35 | namespace folly { |
36 | namespace hash { |
37 | |
38 | // clang-format off |
39 | |
40 | // |
41 | // short hash ... it could be used on any message, |
42 | // but it's used by Spooky just for short messages. |
43 | // |
44 | void SpookyHashV1::Short( |
45 | const void* message, |
46 | size_t length, |
47 | uint64_t *hash1, |
48 | uint64_t *hash2) |
49 | { |
50 | uint64_t buf[2*sc_numVars]; |
51 | union |
52 | { |
53 | const uint8_t *p8; |
54 | uint32_t *p32; |
55 | uint64_t *p64; |
56 | size_t i; |
57 | } u; |
58 | |
59 | u.p8 = (const uint8_t *)message; |
60 | |
61 | if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) |
62 | { |
63 | memcpy(buf, message, length); |
64 | u.p64 = buf; |
65 | } |
66 | |
67 | size_t remainder = length%32; |
68 | uint64_t a=*hash1; |
69 | uint64_t b=*hash2; |
70 | uint64_t c=sc_const; |
71 | uint64_t d=sc_const; |
72 | |
73 | if (length > 15) |
74 | { |
75 | const uint64_t *end = u.p64 + (length/32)*4; |
76 | |
77 | // handle all complete sets of 32 bytes |
78 | for (; u.p64 < end; u.p64 += 4) |
79 | { |
80 | c += u.p64[0]; |
81 | d += u.p64[1]; |
82 | ShortMix(a,b,c,d); |
83 | a += u.p64[2]; |
84 | b += u.p64[3]; |
85 | } |
86 | |
87 | //Handle the case of 16+ remaining bytes. |
88 | if (remainder >= 16) |
89 | { |
90 | c += u.p64[0]; |
91 | d += u.p64[1]; |
92 | ShortMix(a,b,c,d); |
93 | u.p64 += 2; |
94 | remainder -= 16; |
95 | } |
96 | } |
97 | |
98 | // Handle the last 0..15 bytes, and its length |
99 | d = ((uint64_t)length) << 56; |
100 | switch (remainder) |
101 | { |
102 | case 15: |
103 | d += ((uint64_t)u.p8[14]) << 48; |
104 | FOLLY_FALLTHROUGH; |
105 | case 14: |
106 | d += ((uint64_t)u.p8[13]) << 40; |
107 | FOLLY_FALLTHROUGH; |
108 | case 13: |
109 | d += ((uint64_t)u.p8[12]) << 32; |
110 | FOLLY_FALLTHROUGH; |
111 | case 12: |
112 | d += u.p32[2]; |
113 | c += u.p64[0]; |
114 | break; |
115 | case 11: |
116 | d += ((uint64_t)u.p8[10]) << 16; |
117 | FOLLY_FALLTHROUGH; |
118 | case 10: |
119 | d += ((uint64_t)u.p8[9]) << 8; |
120 | FOLLY_FALLTHROUGH; |
121 | case 9: |
122 | d += (uint64_t)u.p8[8]; |
123 | FOLLY_FALLTHROUGH; |
124 | case 8: |
125 | c += u.p64[0]; |
126 | break; |
127 | case 7: |
128 | c += ((uint64_t)u.p8[6]) << 48; |
129 | FOLLY_FALLTHROUGH; |
130 | case 6: |
131 | c += ((uint64_t)u.p8[5]) << 40; |
132 | FOLLY_FALLTHROUGH; |
133 | case 5: |
134 | c += ((uint64_t)u.p8[4]) << 32; |
135 | FOLLY_FALLTHROUGH; |
136 | case 4: |
137 | c += u.p32[0]; |
138 | break; |
139 | case 3: |
140 | c += ((uint64_t)u.p8[2]) << 16; |
141 | FOLLY_FALLTHROUGH; |
142 | case 2: |
143 | c += ((uint64_t)u.p8[1]) << 8; |
144 | FOLLY_FALLTHROUGH; |
145 | case 1: |
146 | c += (uint64_t)u.p8[0]; |
147 | break; |
148 | case 0: |
149 | c += sc_const; |
150 | d += sc_const; |
151 | } |
152 | ShortEnd(a,b,c,d); |
153 | *hash1 = a; |
154 | *hash2 = b; |
155 | } |
156 | |
157 | |
158 | |
159 | |
160 | // do the whole hash in one call |
161 | void SpookyHashV1::Hash128( |
162 | const void *message, |
163 | size_t length, |
164 | uint64_t *hash1, |
165 | uint64_t *hash2) |
166 | { |
167 | if (length < sc_bufSize) |
168 | { |
169 | Short(message, length, hash1, hash2); |
170 | return; |
171 | } |
172 | |
173 | uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; |
174 | uint64_t buf[sc_numVars]; |
175 | uint64_t *end; |
176 | union |
177 | { |
178 | const uint8_t *p8; |
179 | uint64_t *p64; |
180 | size_t i; |
181 | } u; |
182 | size_t remainder; |
183 | |
184 | h0=h3=h6=h9 = *hash1; |
185 | h1=h4=h7=h10 = *hash2; |
186 | h2=h5=h8=h11 = sc_const; |
187 | |
188 | u.p8 = (const uint8_t *)message; |
189 | end = u.p64 + (length/sc_blockSize)*sc_numVars; |
190 | |
191 | // handle all whole sc_blockSize blocks of bytes |
192 | if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) |
193 | { |
194 | while (u.p64 < end) |
195 | { |
196 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
197 | u.p64 += sc_numVars; |
198 | } |
199 | } |
200 | else |
201 | { |
202 | while (u.p64 < end) |
203 | { |
204 | memcpy(buf, u.p64, sc_blockSize); |
205 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
206 | u.p64 += sc_numVars; |
207 | } |
208 | } |
209 | |
210 | // handle the last partial block of sc_blockSize bytes |
211 | remainder = (length - ((const uint8_t *)end-(const uint8_t *)message)); |
212 | memcpy(buf, end, remainder); |
213 | memset(((uint8_t *)buf)+remainder, 0, sc_blockSize-remainder); |
214 | ((uint8_t*)buf)[sc_blockSize - 1] = uint8_t(remainder); |
215 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
216 | |
217 | // do some final mixing |
218 | End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
219 | *hash1 = h0; |
220 | *hash2 = h1; |
221 | } |
222 | |
223 | |
224 | |
225 | // init spooky state |
226 | void SpookyHashV1::Init(uint64_t seed1, uint64_t seed2) |
227 | { |
228 | m_length = 0; |
229 | m_remainder = 0; |
230 | m_state[0] = seed1; |
231 | m_state[1] = seed2; |
232 | } |
233 | |
234 | |
235 | // add a message fragment to the state |
236 | void SpookyHashV1::Update(const void *message, size_t length) |
237 | { |
238 | uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; |
239 | size_t newLength = length + m_remainder; |
240 | uint8_t remainder; |
241 | union |
242 | { |
243 | const uint8_t *p8; |
244 | uint64_t *p64; |
245 | size_t i; |
246 | } u; |
247 | const uint64_t *end; |
248 | |
249 | // Is this message fragment too short? If it is, stuff it away. |
250 | if (newLength < sc_bufSize) |
251 | { |
252 | memcpy(&((uint8_t *)m_data)[m_remainder], message, length); |
253 | m_length = length + m_length; |
254 | m_remainder = (uint8_t)newLength; |
255 | return; |
256 | } |
257 | |
258 | // init the variables |
259 | if (m_length < sc_bufSize) |
260 | { |
261 | h0=h3=h6=h9 = m_state[0]; |
262 | h1=h4=h7=h10 = m_state[1]; |
263 | h2=h5=h8=h11 = sc_const; |
264 | } |
265 | else |
266 | { |
267 | h0 = m_state[0]; |
268 | h1 = m_state[1]; |
269 | h2 = m_state[2]; |
270 | h3 = m_state[3]; |
271 | h4 = m_state[4]; |
272 | h5 = m_state[5]; |
273 | h6 = m_state[6]; |
274 | h7 = m_state[7]; |
275 | h8 = m_state[8]; |
276 | h9 = m_state[9]; |
277 | h10 = m_state[10]; |
278 | h11 = m_state[11]; |
279 | } |
280 | m_length = length + m_length; |
281 | |
282 | // if we've got anything stuffed away, use it now |
283 | if (m_remainder) |
284 | { |
285 | uint8_t prefix = sc_bufSize-m_remainder; |
286 | memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix); |
287 | u.p64 = m_data; |
288 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
289 | Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
290 | u.p8 = ((const uint8_t *)message) + prefix; |
291 | length -= prefix; |
292 | } |
293 | else |
294 | { |
295 | u.p8 = (const uint8_t *)message; |
296 | } |
297 | |
298 | // handle all whole blocks of sc_blockSize bytes |
299 | end = u.p64 + (length/sc_blockSize)*sc_numVars; |
300 | remainder = (uint8_t)(length-((const uint8_t *)end-u.p8)); |
301 | if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) |
302 | { |
303 | while (u.p64 < end) |
304 | { |
305 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
306 | u.p64 += sc_numVars; |
307 | } |
308 | } |
309 | else |
310 | { |
311 | while (u.p64 < end) |
312 | { |
313 | memcpy(m_data, u.p8, sc_blockSize); |
314 | Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
315 | u.p64 += sc_numVars; |
316 | } |
317 | } |
318 | |
319 | // stuff away the last few bytes |
320 | m_remainder = remainder; |
321 | memcpy(m_data, end, remainder); |
322 | |
323 | // stuff away the variables |
324 | m_state[0] = h0; |
325 | m_state[1] = h1; |
326 | m_state[2] = h2; |
327 | m_state[3] = h3; |
328 | m_state[4] = h4; |
329 | m_state[5] = h5; |
330 | m_state[6] = h6; |
331 | m_state[7] = h7; |
332 | m_state[8] = h8; |
333 | m_state[9] = h9; |
334 | m_state[10] = h10; |
335 | m_state[11] = h11; |
336 | } |
337 | |
338 | |
339 | // report the hash for the concatenation of all message fragments so far |
340 | void SpookyHashV1::Final(uint64_t *hash1, uint64_t *hash2) |
341 | { |
342 | // init the variables |
343 | if (m_length < sc_bufSize) |
344 | { |
345 | *hash1 = m_state[0]; |
346 | *hash2 = m_state[1]; |
347 | Short( m_data, m_length, hash1, hash2); |
348 | return; |
349 | } |
350 | |
351 | const uint64_t *data = (const uint64_t *)m_data; |
352 | uint8_t remainder = m_remainder; |
353 | |
354 | uint64_t h0 = m_state[0]; |
355 | uint64_t h1 = m_state[1]; |
356 | uint64_t h2 = m_state[2]; |
357 | uint64_t h3 = m_state[3]; |
358 | uint64_t h4 = m_state[4]; |
359 | uint64_t h5 = m_state[5]; |
360 | uint64_t h6 = m_state[6]; |
361 | uint64_t h7 = m_state[7]; |
362 | uint64_t h8 = m_state[8]; |
363 | uint64_t h9 = m_state[9]; |
364 | uint64_t h10 = m_state[10]; |
365 | uint64_t h11 = m_state[11]; |
366 | |
367 | if (remainder >= sc_blockSize) |
368 | { |
369 | // m_data can contain two blocks; handle any whole first block |
370 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
371 | data += sc_numVars; |
372 | remainder -= sc_blockSize; |
373 | } |
374 | |
375 | // mix in the last partial block, and the length mod sc_blockSize |
376 | memset(&((uint8_t *)data)[remainder], 0, (sc_blockSize-remainder)); |
377 | |
378 | ((uint8_t *)data)[sc_blockSize-1] = remainder; |
379 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
380 | |
381 | // do some final mixing |
382 | End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
383 | |
384 | *hash1 = h0; |
385 | *hash2 = h1; |
386 | } |
387 | |
388 | // clang-format on |
389 | |
390 | } // namespace hash |
391 | } // namespace folly |
392 | |