1 | // Copyright (c) 2011 Google, Inc. |
2 | // |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | // of this software and associated documentation files (the "Software"), to deal |
5 | // in the Software without restriction, including without limitation the rights |
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | // copies of the Software, and to permit persons to whom the Software is |
8 | // furnished to do so, subject to the following conditions: |
9 | // |
10 | // The above copyright notice and this permission notice shall be included in |
11 | // all copies or substantial portions of the Software. |
12 | // |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
19 | // THE SOFTWARE. |
20 | // |
21 | // CityHash, by Geoff Pike and Jyrki Alakuijala |
22 | // |
23 | // This file provides CityHash64() and related functions. |
24 | // |
25 | // It's probably possible to create even faster hash functions by |
26 | // writing a program that systematically explores some of the space of |
27 | // possible hash functions, by using SIMD instructions, or by |
28 | // compromising on hash quality. |
29 | |
30 | #include "config.h" |
31 | #include <city.h> |
32 | |
33 | #include <algorithm> |
34 | #include <string.h> // for memcpy and memset |
35 | |
36 | using namespace std; |
37 | |
38 | |
39 | #if !defined(WORDS_BIGENDIAN) |
40 | |
41 | #define uint32_in_expected_order(x) (x) |
42 | #define uint64_in_expected_order(x) (x) |
43 | |
44 | #else |
45 | |
46 | #ifdef _MSC_VER |
47 | #include <stdlib.h> |
48 | #define bswap_32(x) _byteswap_ulong(x) |
49 | #define bswap_64(x) _byteswap_uint64(x) |
50 | |
51 | #elif defined(__APPLE__) |
52 | // Mac OS X / Darwin features |
53 | #include <libkern/OSByteOrder.h> |
54 | #define bswap_32(x) OSSwapInt32(x) |
55 | #define bswap_64(x) OSSwapInt64(x) |
56 | |
57 | #else |
58 | #include <byteswap.h> |
59 | #endif |
60 | |
61 | #define uint32_in_expected_order(x) (bswap_32(x)) |
62 | #define uint64_in_expected_order(x) (bswap_64(x)) |
63 | |
64 | #endif // WORDS_BIGENDIAN |
65 | |
66 | #if !defined(LIKELY) |
67 | #if HAVE_BUILTIN_EXPECT |
68 | #define LIKELY(x) (__builtin_expect(!!(x), 1)) |
69 | #else |
70 | #define LIKELY(x) (x) |
71 | #endif |
72 | #endif |
73 | |
74 | namespace CityHash_v1_0_2 |
75 | { |
76 | |
77 | static uint64 UNALIGNED_LOAD64(const char *p) { |
78 | uint64 result; |
79 | memcpy(&result, p, sizeof(result)); |
80 | return result; |
81 | } |
82 | |
83 | static uint32 UNALIGNED_LOAD32(const char *p) { |
84 | uint32 result; |
85 | memcpy(&result, p, sizeof(result)); |
86 | return result; |
87 | } |
88 | |
89 | static uint64 Fetch64(const char *p) { |
90 | return uint64_in_expected_order(UNALIGNED_LOAD64(p)); |
91 | } |
92 | |
93 | static uint32 Fetch32(const char *p) { |
94 | return uint32_in_expected_order(UNALIGNED_LOAD32(p)); |
95 | } |
96 | |
97 | // Some primes between 2^63 and 2^64 for various uses. |
98 | static const uint64 k0 = 0xc3a5c85c97cb3127ULL; |
99 | static const uint64 k1 = 0xb492b66fbe98f273ULL; |
100 | static const uint64 k2 = 0x9ae16a3b2f90404fULL; |
101 | static const uint64 k3 = 0xc949d7c7509e6557ULL; |
102 | |
103 | // Bitwise right rotate. Normally this will compile to a single |
104 | // instruction, especially if the shift is a manifest constant. |
105 | static uint64 Rotate(uint64 val, int shift) { |
106 | // Avoid shifting by 64: doing so yields an undefined result. |
107 | return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); |
108 | } |
109 | |
110 | // Equivalent to Rotate(), but requires the second arg to be non-zero. |
111 | // On x86-64, and probably others, it's possible for this to compile |
112 | // to a single instruction if both args are already in registers. |
113 | static uint64 RotateByAtLeast1(uint64 val, int shift) { |
114 | return (val >> shift) | (val << (64 - shift)); |
115 | } |
116 | |
117 | static uint64 ShiftMix(uint64 val) { |
118 | return val ^ (val >> 47); |
119 | } |
120 | |
121 | static uint64 HashLen16(uint64 u, uint64 v) { |
122 | return Hash128to64(uint128(u, v)); |
123 | } |
124 | |
125 | static uint64 HashLen0to16(const char *s, size_t len) { |
126 | if (len > 8) { |
127 | uint64 a = Fetch64(s); |
128 | uint64 b = Fetch64(s + len - 8); |
129 | return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b; |
130 | } |
131 | if (len >= 4) { |
132 | uint64 a = Fetch32(s); |
133 | return HashLen16(len + (a << 3), Fetch32(s + len - 4)); |
134 | } |
135 | if (len > 0) { |
136 | uint8 a = s[0]; |
137 | uint8 b = s[len >> 1]; |
138 | uint8 c = s[len - 1]; |
139 | uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8); |
140 | uint32 z = len + (static_cast<uint32>(c) << 2); |
141 | return ShiftMix(y * k2 ^ z * k3) * k2; |
142 | } |
143 | return k2; |
144 | } |
145 | |
146 | // This probably works well for 16-byte strings as well, but it may be overkill |
147 | // in that case. |
148 | static uint64 HashLen17to32(const char *s, size_t len) { |
149 | uint64 a = Fetch64(s) * k1; |
150 | uint64 b = Fetch64(s + 8); |
151 | uint64 c = Fetch64(s + len - 8) * k2; |
152 | uint64 d = Fetch64(s + len - 16) * k0; |
153 | return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d, |
154 | a + Rotate(b ^ k3, 20) - c + len); |
155 | } |
156 | |
157 | // Return a 16-byte hash for 48 bytes. Quick and dirty. |
158 | // Callers do best to use "random-looking" values for a and b. |
159 | static pair<uint64, uint64> WeakHashLen32WithSeeds( |
160 | uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) { |
161 | a += w; |
162 | b = Rotate(b + a + z, 21); |
163 | uint64 c = a; |
164 | a += x; |
165 | a += y; |
166 | b += Rotate(a, 44); |
167 | return make_pair(a + z, b + c); |
168 | } |
169 | |
170 | // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. |
171 | static pair<uint64, uint64> WeakHashLen32WithSeeds( |
172 | const char* s, uint64 a, uint64 b) { |
173 | return WeakHashLen32WithSeeds(Fetch64(s), |
174 | Fetch64(s + 8), |
175 | Fetch64(s + 16), |
176 | Fetch64(s + 24), |
177 | a, |
178 | b); |
179 | } |
180 | |
181 | // Return an 8-byte hash for 33 to 64 bytes. |
182 | static uint64 HashLen33to64(const char *s, size_t len) { |
183 | uint64 z = Fetch64(s + 24); |
184 | uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0; |
185 | uint64 b = Rotate(a + z, 52); |
186 | uint64 c = Rotate(a, 37); |
187 | a += Fetch64(s + 8); |
188 | c += Rotate(a, 7); |
189 | a += Fetch64(s + 16); |
190 | uint64 vf = a + z; |
191 | uint64 vs = b + Rotate(a, 31) + c; |
192 | a = Fetch64(s + 16) + Fetch64(s + len - 32); |
193 | z = Fetch64(s + len - 8); |
194 | b = Rotate(a + z, 52); |
195 | c = Rotate(a, 37); |
196 | a += Fetch64(s + len - 24); |
197 | c += Rotate(a, 7); |
198 | a += Fetch64(s + len - 16); |
199 | uint64 wf = a + z; |
200 | uint64 ws = b + Rotate(a, 31) + c; |
201 | uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0); |
202 | return ShiftMix(r * k0 + vs) * k2; |
203 | } |
204 | |
205 | uint64 CityHash64(const char *s, size_t len) { |
206 | if (len <= 32) { |
207 | if (len <= 16) { |
208 | return HashLen0to16(s, len); |
209 | } else { |
210 | return HashLen17to32(s, len); |
211 | } |
212 | } else if (len <= 64) { |
213 | return HashLen33to64(s, len); |
214 | } |
215 | |
216 | // For strings over 64 bytes we hash the end first, and then as we |
217 | // loop we keep 56 bytes of state: v, w, x, y, and z. |
218 | uint64 x = Fetch64(s); |
219 | uint64 y = Fetch64(s + len - 16) ^ k1; |
220 | uint64 z = Fetch64(s + len - 56) ^ k0; |
221 | pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, y); |
222 | pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0); |
223 | z += ShiftMix(v.second) * k1; |
224 | x = Rotate(z + x, 39) * k1; |
225 | y = Rotate(y, 33) * k1; |
226 | |
227 | // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. |
228 | len = (len - 1) & ~static_cast<size_t>(63); |
229 | do { |
230 | x = Rotate(x + y + v.first + Fetch64(s + 16), 37) * k1; |
231 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; |
232 | x ^= w.second; |
233 | y ^= v.first; |
234 | z = Rotate(z ^ w.first, 33); |
235 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); |
236 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); |
237 | std::swap(z, x); |
238 | s += 64; |
239 | len -= 64; |
240 | } while (len != 0); |
241 | return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, |
242 | HashLen16(v.second, w.second) + x); |
243 | } |
244 | |
245 | uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) { |
246 | return CityHash64WithSeeds(s, len, k2, seed); |
247 | } |
248 | |
249 | uint64 CityHash64WithSeeds(const char *s, size_t len, |
250 | uint64 seed0, uint64 seed1) { |
251 | return HashLen16(CityHash64(s, len) - seed0, seed1); |
252 | } |
253 | |
254 | // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings |
255 | // of any length representable in ssize_t. Based on City and Murmur. |
256 | static uint128 CityMurmur(const char *s, size_t len, uint128 seed) { |
257 | uint64 a = Uint128Low64(seed); |
258 | uint64 b = Uint128High64(seed); |
259 | uint64 c = 0; |
260 | uint64 d = 0; |
261 | ssize_t l = len - 16; |
262 | if (l <= 0) { // len <= 16 |
263 | a = ShiftMix(a * k1) * k1; |
264 | c = b * k1 + HashLen0to16(s, len); |
265 | d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); |
266 | } else { // len > 16 |
267 | c = HashLen16(Fetch64(s + len - 8) + k1, a); |
268 | d = HashLen16(b + len, c + Fetch64(s + len - 16)); |
269 | a += d; |
270 | do { |
271 | a ^= ShiftMix(Fetch64(s) * k1) * k1; |
272 | a *= k1; |
273 | b ^= a; |
274 | c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; |
275 | c *= k1; |
276 | d ^= c; |
277 | s += 16; |
278 | l -= 16; |
279 | } while (l > 0); |
280 | } |
281 | a = HashLen16(a, c); |
282 | b = HashLen16(d, b); |
283 | return uint128(a ^ b, HashLen16(b, a)); |
284 | } |
285 | |
286 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) { |
287 | if (len < 128) { |
288 | return CityMurmur(s, len, seed); |
289 | } |
290 | |
291 | // We expect len >= 128 to be the common case. Keep 56 bytes of state: |
292 | // v, w, x, y, and z. |
293 | pair<uint64, uint64> v, w; |
294 | uint64 x = Uint128Low64(seed); |
295 | uint64 y = Uint128High64(seed); |
296 | uint64 z = len * k1; |
297 | v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); |
298 | v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); |
299 | w.first = Rotate(y + z, 35) * k1 + x; |
300 | w.second = Rotate(x + Fetch64(s + 88), 53) * k1; |
301 | |
302 | // This is the same inner loop as CityHash64(), manually unrolled. |
303 | do { |
304 | x = Rotate(x + y + v.first + Fetch64(s + 16), 37) * k1; |
305 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; |
306 | x ^= w.second; |
307 | y ^= v.first; |
308 | z = Rotate(z ^ w.first, 33); |
309 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); |
310 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); |
311 | std::swap(z, x); |
312 | s += 64; |
313 | x = Rotate(x + y + v.first + Fetch64(s + 16), 37) * k1; |
314 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; |
315 | x ^= w.second; |
316 | y ^= v.first; |
317 | z = Rotate(z ^ w.first, 33); |
318 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); |
319 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); |
320 | std::swap(z, x); |
321 | s += 64; |
322 | len -= 128; |
323 | } while (LIKELY(len >= 128)); |
324 | y += Rotate(w.first, 37) * k0 + z; |
325 | x += Rotate(v.first + z, 49) * k0; |
326 | // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. |
327 | for (size_t tail_done = 0; tail_done < len; ) { |
328 | tail_done += 32; |
329 | y = Rotate(y - x, 42) * k0 + v.second; |
330 | w.first += Fetch64(s + len - tail_done + 16); |
331 | x = Rotate(x, 49) * k0 + w.first; |
332 | w.first += v.first; |
333 | v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second); |
334 | } |
335 | // At this point our 48 bytes of state should contain more than |
336 | // enough information for a strong 128-bit hash. We use two |
337 | // different 48-byte-to-8-byte hashes to get a 16-byte final result. |
338 | x = HashLen16(x, v.first); |
339 | y = HashLen16(y, w.first); |
340 | return uint128(HashLen16(x + v.second, w.second) + y, |
341 | HashLen16(x + w.second, y + v.second)); |
342 | } |
343 | |
344 | uint128 CityHash128(const char *s, size_t len) { |
345 | if (len >= 16) { |
346 | return CityHash128WithSeed(s + 16, |
347 | len - 16, |
348 | uint128(Fetch64(s) ^ k3, |
349 | Fetch64(s + 8))); |
350 | } else if (len >= 8) { |
351 | return CityHash128WithSeed(NULL, |
352 | 0, |
353 | uint128(Fetch64(s) ^ (len * k0), |
354 | Fetch64(s + len - 8) ^ k1)); |
355 | } else { |
356 | return CityHash128WithSeed(s, len, uint128(k0, k1)); |
357 | } |
358 | } |
359 | |
360 | } |
361 | |
362 | #ifdef __SSE4_2__ |
363 | #include <citycrc.h> |
364 | #include <nmmintrin.h> |
365 | |
366 | namespace CityHash_v1_0_2 |
367 | { |
368 | |
369 | // Requires len >= 240. |
370 | static void CityHashCrc256Long(const char *s, size_t len, |
371 | uint32 seed, uint64 *result) { |
372 | uint64 a = Fetch64(s + 56) + k0; |
373 | uint64 b = Fetch64(s + 96) + k0; |
374 | uint64 c = result[1] = HashLen16(b, len); |
375 | uint64 d = result[2] = Fetch64(s + 120) * k0 + len; |
376 | uint64 e = Fetch64(s + 184) + seed; |
377 | uint64 f = seed; |
378 | uint64 g = 0; |
379 | uint64 h = 0; |
380 | uint64 i = 0; |
381 | uint64 j = 0; |
382 | uint64 t = c + d; |
383 | |
384 | // 240 bytes of input per iter. |
385 | size_t iters = len / 240; |
386 | len -= iters * 240; |
387 | do { |
388 | #define CHUNK(multiplier, z) \ |
389 | { \ |
390 | uint64 old_a = a; \ |
391 | a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \ |
392 | b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \ |
393 | c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \ |
394 | d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \ |
395 | e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \ |
396 | t = old_a; \ |
397 | } \ |
398 | f = _mm_crc32_u64(f, a); \ |
399 | g = _mm_crc32_u64(g, b); \ |
400 | h = _mm_crc32_u64(h, c); \ |
401 | i = _mm_crc32_u64(i, d); \ |
402 | j = _mm_crc32_u64(j, e); \ |
403 | s += 40 |
404 | |
405 | CHUNK(1, 1); CHUNK(k0, 0); |
406 | CHUNK(1, 1); CHUNK(k0, 0); |
407 | CHUNK(1, 1); CHUNK(k0, 0); |
408 | } while (--iters > 0); |
409 | j += i << 32; |
410 | a = HashLen16(a, j); |
411 | h += g << 32; |
412 | b = b * k0 + h; |
413 | c = HashLen16(c, f) + i; |
414 | d = HashLen16(d, e); |
415 | pair<uint64, uint64> v(j + e, HashLen16(h, t)); |
416 | h = v.second + f; |
417 | // If 0 < len < 240, hash chunks of 32 bytes each from the end of s. |
418 | for (size_t tail_done = 0; tail_done < len; ) { |
419 | tail_done += 32; |
420 | c = Rotate(c - a, 42) * k0 + v.second; |
421 | d += Fetch64(s + len - tail_done + 16); |
422 | a = Rotate(a, 49) * k0 + d; |
423 | d += v.first; |
424 | v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second); |
425 | } |
426 | |
427 | // Final mix. |
428 | e = HashLen16(a, d) + v.first; |
429 | f = HashLen16(b, c) + a; |
430 | g = HashLen16(v.first, v.second) + c; |
431 | result[0] = e + f + g + h; |
432 | a = ShiftMix((a + g) * k0) * k0 + b; |
433 | result[1] += a + result[0]; |
434 | a = ShiftMix(a * k0) * k0 + c; |
435 | result[2] += a + result[1]; |
436 | a = ShiftMix((a + e) * k0) * k0; |
437 | result[3] = a + result[2]; |
438 | } |
439 | |
440 | // Requires len < 240. |
441 | static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) { |
442 | char buf[240]; |
443 | memcpy(buf, s, len); |
444 | memset(buf + len, 0, 240 - len); |
445 | CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result); |
446 | } |
447 | |
448 | void CityHashCrc256(const char *s, size_t len, uint64 *result) { |
449 | if (LIKELY(len >= 240)) { |
450 | CityHashCrc256Long(s, len, 0, result); |
451 | } else { |
452 | CityHashCrc256Short(s, len, result); |
453 | } |
454 | } |
455 | |
456 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) { |
457 | if (len <= 900) { |
458 | return CityHash128WithSeed(s, len, seed); |
459 | } else { |
460 | uint64 result[4]; |
461 | CityHashCrc256(s, len, result); |
462 | uint64 u = Uint128High64(seed) + result[0]; |
463 | uint64 v = Uint128Low64(seed) + result[1]; |
464 | return uint128(HashLen16(u, v + result[2]), |
465 | HashLen16(Rotate(v, 32), u * k0 + result[3])); |
466 | } |
467 | } |
468 | |
469 | uint128 CityHashCrc128(const char *s, size_t len) { |
470 | if (len <= 900) { |
471 | return CityHash128(s, len); |
472 | } else { |
473 | uint64 result[4]; |
474 | CityHashCrc256(s, len, result); |
475 | return uint128(result[2], result[3]); |
476 | } |
477 | } |
478 | |
479 | } |
480 | |
481 | #endif |
482 | |