1 | /* |
2 | * Copyright 2017-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | // Spooky Hash |
18 | // A 128-bit noncryptographic hash, for checksums and table lookup |
19 | // By Bob Jenkins. Public domain. |
20 | // Oct 31 2010: published framework, disclaimer ShortHash isn't right |
21 | // Nov 7 2010: disabled ShortHash |
22 | // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again |
23 | // April 10 2012: buffer overflow on platforms without unaligned reads |
24 | // July 12 2012: was passing out variables in final to in/out in short |
25 | // July 30 2012: I reintroduced the buffer overflow |
26 | // August 5 2012: SpookyV2: d = should be d += in short hash, and remove |
27 | // extra mix from long hash |
28 | |
29 | #include <folly/hash/SpookyHashV2.h> |
30 | |
31 | #include <folly/CppAttributes.h> |
32 | #include <folly/Portability.h> |
33 | |
34 | #include <cstring> |
35 | |
36 | namespace folly { |
37 | namespace hash { |
38 | |
39 | // clang-format off |
40 | |
41 | // |
42 | // short hash ... it could be used on any message, |
43 | // but it's used by Spooky just for short messages. |
44 | // |
45 | void SpookyHashV2::Short( |
46 | const void *message, |
47 | size_t length, |
48 | uint64_t *hash1, |
49 | uint64_t *hash2) |
50 | { |
51 | uint64_t buf[2*sc_numVars]; |
52 | union |
53 | { |
54 | const uint8_t *p8; |
55 | uint32_t *p32; |
56 | uint64_t *p64; |
57 | size_t i; |
58 | } u; |
59 | |
60 | u.p8 = (const uint8_t *)message; |
61 | |
62 | if (!kHasUnalignedAccess && (u.i & 0x7)) |
63 | { |
64 | memcpy(buf, message, length); |
65 | u.p64 = buf; |
66 | } |
67 | |
68 | size_t remainder = length%32; |
69 | uint64_t a=*hash1; |
70 | uint64_t b=*hash2; |
71 | uint64_t c=sc_const; |
72 | uint64_t d=sc_const; |
73 | |
74 | if (length > 15) |
75 | { |
76 | const uint64_t *end = u.p64 + (length/32)*4; |
77 | |
78 | // handle all complete sets of 32 bytes |
79 | for (; u.p64 < end; u.p64 += 4) |
80 | { |
81 | c += u.p64[0]; |
82 | d += u.p64[1]; |
83 | ShortMix(a,b,c,d); |
84 | a += u.p64[2]; |
85 | b += u.p64[3]; |
86 | } |
87 | |
88 | //Handle the case of 16+ remaining bytes. |
89 | if (remainder >= 16) |
90 | { |
91 | c += u.p64[0]; |
92 | d += u.p64[1]; |
93 | ShortMix(a,b,c,d); |
94 | u.p64 += 2; |
95 | remainder -= 16; |
96 | } |
97 | } |
98 | |
99 | // Handle the last 0..15 bytes, and its length |
100 | d += ((uint64_t)length) << 56; |
101 | switch (remainder) |
102 | { |
103 | case 15: |
104 | d += ((uint64_t)u.p8[14]) << 48; |
105 | FOLLY_FALLTHROUGH; |
106 | case 14: |
107 | d += ((uint64_t)u.p8[13]) << 40; |
108 | FOLLY_FALLTHROUGH; |
109 | case 13: |
110 | d += ((uint64_t)u.p8[12]) << 32; |
111 | FOLLY_FALLTHROUGH; |
112 | case 12: |
113 | d += u.p32[2]; |
114 | c += u.p64[0]; |
115 | break; |
116 | case 11: |
117 | d += ((uint64_t)u.p8[10]) << 16; |
118 | FOLLY_FALLTHROUGH; |
119 | case 10: |
120 | d += ((uint64_t)u.p8[9]) << 8; |
121 | FOLLY_FALLTHROUGH; |
122 | case 9: |
123 | d += (uint64_t)u.p8[8]; |
124 | FOLLY_FALLTHROUGH; |
125 | case 8: |
126 | c += u.p64[0]; |
127 | break; |
128 | case 7: |
129 | c += ((uint64_t)u.p8[6]) << 48; |
130 | FOLLY_FALLTHROUGH; |
131 | case 6: |
132 | c += ((uint64_t)u.p8[5]) << 40; |
133 | FOLLY_FALLTHROUGH; |
134 | case 5: |
135 | c += ((uint64_t)u.p8[4]) << 32; |
136 | FOLLY_FALLTHROUGH; |
137 | case 4: |
138 | c += u.p32[0]; |
139 | break; |
140 | case 3: |
141 | c += ((uint64_t)u.p8[2]) << 16; |
142 | FOLLY_FALLTHROUGH; |
143 | case 2: |
144 | c += ((uint64_t)u.p8[1]) << 8; |
145 | FOLLY_FALLTHROUGH; |
146 | case 1: |
147 | c += (uint64_t)u.p8[0]; |
148 | break; |
149 | case 0: |
150 | c += sc_const; |
151 | d += sc_const; |
152 | } |
153 | ShortEnd(a,b,c,d); |
154 | *hash1 = a; |
155 | *hash2 = b; |
156 | } |
157 | |
158 | |
159 | |
160 | |
161 | // do the whole hash in one call |
162 | void SpookyHashV2::Hash128( |
163 | const void *message, |
164 | size_t length, |
165 | uint64_t *hash1, |
166 | uint64_t *hash2) |
167 | { |
168 | if (length < sc_bufSize) |
169 | { |
170 | Short(message, length, hash1, hash2); |
171 | return; |
172 | } |
173 | |
174 | uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; |
175 | uint64_t buf[sc_numVars]; |
176 | uint64_t *end; |
177 | union |
178 | { |
179 | const uint8_t *p8; |
180 | uint64_t *p64; |
181 | size_t i; |
182 | } u; |
183 | size_t remainder; |
184 | |
185 | h0=h3=h6=h9 = *hash1; |
186 | h1=h4=h7=h10 = *hash2; |
187 | h2=h5=h8=h11 = sc_const; |
188 | |
189 | u.p8 = (const uint8_t *)message; |
190 | end = u.p64 + (length/sc_blockSize)*sc_numVars; |
191 | |
192 | // handle all whole sc_blockSize blocks of bytes |
193 | if (kHasUnalignedAccess || ((u.i & 0x7) == 0)) |
194 | { |
195 | while (u.p64 < end) |
196 | { |
197 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
198 | u.p64 += sc_numVars; |
199 | } |
200 | } |
201 | else |
202 | { |
203 | while (u.p64 < end) |
204 | { |
205 | memcpy(buf, u.p64, sc_blockSize); |
206 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
207 | u.p64 += sc_numVars; |
208 | } |
209 | } |
210 | |
211 | // handle the last partial block of sc_blockSize bytes |
212 | remainder = (length - ((const uint8_t *)end-(const uint8_t *)message)); |
213 | memcpy(buf, end, remainder); |
214 | memset(((uint8_t *)buf)+remainder, 0, sc_blockSize-remainder); |
215 | ((uint8_t*)buf)[sc_blockSize - 1] = uint8_t(remainder); |
216 | |
217 | // do some final mixing |
218 | End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
219 | *hash1 = h0; |
220 | *hash2 = h1; |
221 | } |
222 | |
223 | |
224 | |
225 | // init spooky state |
226 | void SpookyHashV2::Init(uint64_t seed1, uint64_t seed2) |
227 | { |
228 | m_length = 0; |
229 | m_remainder = 0; |
230 | m_state[0] = seed1; |
231 | m_state[1] = seed2; |
232 | } |
233 | |
234 | |
235 | // add a message fragment to the state |
236 | void SpookyHashV2::Update(const void *message, size_t length) |
237 | { |
238 | uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; |
239 | size_t newLength = length + m_remainder; |
240 | uint8_t remainder; |
241 | union |
242 | { |
243 | const uint8_t *p8; |
244 | uint64_t *p64; |
245 | size_t i; |
246 | } u; |
247 | const uint64_t *end; |
248 | |
249 | // Is this message fragment too short? If it is, stuff it away. |
250 | if (newLength < sc_bufSize) |
251 | { |
252 | memcpy(&((uint8_t *)m_data)[m_remainder], message, length); |
253 | m_length = length + m_length; |
254 | m_remainder = (uint8_t)newLength; |
255 | return; |
256 | } |
257 | |
258 | // init the variables |
259 | if (m_length < sc_bufSize) |
260 | { |
261 | h0=h3=h6=h9 = m_state[0]; |
262 | h1=h4=h7=h10 = m_state[1]; |
263 | h2=h5=h8=h11 = sc_const; |
264 | } |
265 | else |
266 | { |
267 | h0 = m_state[0]; |
268 | h1 = m_state[1]; |
269 | h2 = m_state[2]; |
270 | h3 = m_state[3]; |
271 | h4 = m_state[4]; |
272 | h5 = m_state[5]; |
273 | h6 = m_state[6]; |
274 | h7 = m_state[7]; |
275 | h8 = m_state[8]; |
276 | h9 = m_state[9]; |
277 | h10 = m_state[10]; |
278 | h11 = m_state[11]; |
279 | } |
280 | m_length = length + m_length; |
281 | |
282 | // if we've got anything stuffed away, use it now |
283 | if (m_remainder) |
284 | { |
285 | uint8_t prefix = sc_bufSize-m_remainder; |
286 | memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix); |
287 | u.p64 = m_data; |
288 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
289 | Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
290 | u.p8 = ((const uint8_t *)message) + prefix; |
291 | length -= prefix; |
292 | } |
293 | else |
294 | { |
295 | u.p8 = (const uint8_t *)message; |
296 | } |
297 | |
298 | // handle all whole blocks of sc_blockSize bytes |
299 | end = u.p64 + (length/sc_blockSize)*sc_numVars; |
300 | remainder = (uint8_t)(length-((const uint8_t *)end-u.p8)); |
301 | if (kHasUnalignedAccess || (u.i & 0x7) == 0) |
302 | { |
303 | while (u.p64 < end) |
304 | { |
305 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
306 | u.p64 += sc_numVars; |
307 | } |
308 | } |
309 | else |
310 | { |
311 | while (u.p64 < end) |
312 | { |
313 | memcpy(m_data, u.p8, sc_blockSize); |
314 | Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
315 | u.p64 += sc_numVars; |
316 | } |
317 | } |
318 | |
319 | // stuff away the last few bytes |
320 | m_remainder = remainder; |
321 | memcpy(m_data, end, remainder); |
322 | |
323 | // stuff away the variables |
324 | m_state[0] = h0; |
325 | m_state[1] = h1; |
326 | m_state[2] = h2; |
327 | m_state[3] = h3; |
328 | m_state[4] = h4; |
329 | m_state[5] = h5; |
330 | m_state[6] = h6; |
331 | m_state[7] = h7; |
332 | m_state[8] = h8; |
333 | m_state[9] = h9; |
334 | m_state[10] = h10; |
335 | m_state[11] = h11; |
336 | } |
337 | |
338 | |
339 | // report the hash for the concatenation of all message fragments so far |
340 | void SpookyHashV2::Final(uint64_t *hash1, uint64_t *hash2) const |
341 | { |
342 | // init the variables |
343 | if (m_length < sc_bufSize) |
344 | { |
345 | *hash1 = m_state[0]; |
346 | *hash2 = m_state[1]; |
347 | Short( m_data, m_length, hash1, hash2); |
348 | return; |
349 | } |
350 | |
351 | uint64_t buf[2*sc_numVars]; |
352 | memcpy(buf, m_data, sizeof(buf)); |
353 | uint64_t *data = buf; |
354 | uint8_t remainder = m_remainder; |
355 | |
356 | uint64_t h0 = m_state[0]; |
357 | uint64_t h1 = m_state[1]; |
358 | uint64_t h2 = m_state[2]; |
359 | uint64_t h3 = m_state[3]; |
360 | uint64_t h4 = m_state[4]; |
361 | uint64_t h5 = m_state[5]; |
362 | uint64_t h6 = m_state[6]; |
363 | uint64_t h7 = m_state[7]; |
364 | uint64_t h8 = m_state[8]; |
365 | uint64_t h9 = m_state[9]; |
366 | uint64_t h10 = m_state[10]; |
367 | uint64_t h11 = m_state[11]; |
368 | |
369 | if (remainder >= sc_blockSize) |
370 | { |
371 | // m_data can contain two blocks; handle any whole first block |
372 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
373 | data += sc_numVars; |
374 | remainder -= sc_blockSize; |
375 | } |
376 | |
377 | // mix in the last partial block, and the length mod sc_blockSize |
378 | memset(&((uint8_t *)data)[remainder], 0, (sc_blockSize-remainder)); |
379 | |
380 | ((uint8_t *)data)[sc_blockSize-1] = remainder; |
381 | |
382 | // do some final mixing |
383 | End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
384 | |
385 | *hash1 = h0; |
386 | *hash2 = h1; |
387 | } |
388 | |
389 | // clang-format on |
390 | |
391 | } // namespace hash |
392 | } // namespace folly |
393 | |