1 | /* |
2 | * Copyright 2011-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #pragma once |
18 | |
19 | #include <cstdint> |
20 | #include <cstring> |
21 | #include <limits> |
22 | #include <string> |
23 | #include <tuple> |
24 | #include <type_traits> |
25 | #include <utility> |
26 | |
27 | #include <folly/Traits.h> |
28 | #include <folly/Utility.h> |
29 | #include <folly/functional/ApplyTuple.h> |
30 | #include <folly/hash/SpookyHashV1.h> |
31 | #include <folly/hash/SpookyHashV2.h> |
32 | #include <folly/lang/Bits.h> |
33 | |
34 | /* |
35 | * Various hashing functions. |
36 | */ |
37 | |
38 | namespace folly { |
39 | namespace hash { |
40 | |
41 | uint64_t hash_128_to_64(const uint64_t upper, const uint64_t lower) noexcept; |
42 | |
43 | ////////////////////////////////////////////////////////////////////// |
44 | |
45 | /* |
46 | * Thomas Wang 64 bit mix hash function |
47 | */ |
48 | |
49 | inline uint64_t twang_mix64(uint64_t key) noexcept { |
50 | key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1; |
51 | key = key ^ (key >> 24); |
52 | key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8) |
53 | key = key ^ (key >> 14); |
54 | key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4) |
55 | key = key ^ (key >> 28); |
56 | key = key + (key << 31); // key *= 1 + (1 << 31) |
57 | return key; |
58 | } |
59 | |
60 | /* |
61 | * Inverse of twang_mix64 |
62 | * |
63 | * Note that twang_unmix64 is significantly slower than twang_mix64. |
64 | */ |
65 | |
66 | inline uint64_t twang_unmix64(uint64_t key) noexcept { |
67 | // See the comments in jenkins_rev_unmix32 for an explanation as to how this |
68 | // was generated |
69 | key *= 4611686016279904257U; |
70 | key ^= (key >> 28) ^ (key >> 56); |
71 | key *= 14933078535860113213U; |
72 | key ^= (key >> 14) ^ (key >> 28) ^ (key >> 42) ^ (key >> 56); |
73 | key *= 15244667743933553977U; |
74 | key ^= (key >> 24) ^ (key >> 48); |
75 | key = (key + 1) * 9223367638806167551U; |
76 | return key; |
77 | } |
78 | |
79 | /* |
80 | * Thomas Wang downscaling hash function |
81 | */ |
82 | |
83 | inline uint32_t twang_32from64(uint64_t key) noexcept { |
84 | key = (~key) + (key << 18); |
85 | key = key ^ (key >> 31); |
86 | key = key * 21; |
87 | key = key ^ (key >> 11); |
88 | key = key + (key << 6); |
89 | key = key ^ (key >> 22); |
90 | return (uint32_t)key; |
91 | } |
92 | |
93 | /* |
94 | * Robert Jenkins' reversible 32 bit mix hash function |
95 | */ |
96 | |
97 | inline uint32_t jenkins_rev_mix32(uint32_t key) noexcept { |
98 | key += (key << 12); // key *= (1 + (1 << 12)) |
99 | key ^= (key >> 22); |
100 | key += (key << 4); // key *= (1 + (1 << 4)) |
101 | key ^= (key >> 9); |
102 | key += (key << 10); // key *= (1 + (1 << 10)) |
103 | key ^= (key >> 2); |
104 | // key *= (1 + (1 << 7)) * (1 + (1 << 12)) |
105 | key += (key << 7); |
106 | key += (key << 12); |
107 | return key; |
108 | } |
109 | |
110 | /* |
111 | * Inverse of jenkins_rev_mix32 |
112 | * |
113 | * Note that jenkinks_rev_unmix32 is significantly slower than |
114 | * jenkins_rev_mix32. |
115 | */ |
116 | |
117 | inline uint32_t jenkins_rev_unmix32(uint32_t key) noexcept { |
118 | // These are the modular multiplicative inverses (in Z_2^32) of the |
119 | // multiplication factors in jenkins_rev_mix32, in reverse order. They were |
120 | // computed using the Extended Euclidean algorithm, see |
121 | // http://en.wikipedia.org/wiki/Modular_multiplicative_inverse |
122 | key *= 2364026753U; |
123 | |
124 | // The inverse of a ^= (a >> n) is |
125 | // b = a |
126 | // for (int i = n; i < 32; i += n) { |
127 | // b ^= (a >> i); |
128 | // } |
129 | key ^= (key >> 2) ^ (key >> 4) ^ (key >> 6) ^ (key >> 8) ^ (key >> 10) ^ |
130 | (key >> 12) ^ (key >> 14) ^ (key >> 16) ^ (key >> 18) ^ (key >> 20) ^ |
131 | (key >> 22) ^ (key >> 24) ^ (key >> 26) ^ (key >> 28) ^ (key >> 30); |
132 | key *= 3222273025U; |
133 | key ^= (key >> 9) ^ (key >> 18) ^ (key >> 27); |
134 | key *= 4042322161U; |
135 | key ^= (key >> 22); |
136 | key *= 16773121U; |
137 | return key; |
138 | } |
139 | |
140 | /* |
141 | * Fowler / Noll / Vo (FNV) Hash |
142 | * http://www.isthe.com/chongo/tech/comp/fnv/ |
143 | */ |
144 | |
145 | const uint32_t FNV_32_HASH_START = 2166136261UL; |
146 | const uint64_t FNV_64_HASH_START = 14695981039346656037ULL; |
147 | const uint64_t FNVA_64_HASH_START = 14695981039346656037ULL; |
148 | |
149 | inline uint32_t fnv32( |
150 | const char* buf, |
151 | uint32_t hash = FNV_32_HASH_START) noexcept { |
152 | // forcing signed char, since other platforms can use unsigned |
153 | const signed char* s = reinterpret_cast<const signed char*>(buf); |
154 | |
155 | for (; *s; ++s) { |
156 | hash += |
157 | (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); |
158 | hash ^= *s; |
159 | } |
160 | return hash; |
161 | } |
162 | |
163 | inline uint32_t fnv32_buf( |
164 | const void* buf, |
165 | size_t n, |
166 | uint32_t hash = FNV_32_HASH_START) noexcept { |
167 | // forcing signed char, since other platforms can use unsigned |
168 | const signed char* char_buf = reinterpret_cast<const signed char*>(buf); |
169 | |
170 | for (size_t i = 0; i < n; ++i) { |
171 | hash += |
172 | (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); |
173 | hash ^= char_buf[i]; |
174 | } |
175 | |
176 | return hash; |
177 | } |
178 | |
179 | inline uint32_t fnv32( |
180 | const std::string& str, |
181 | uint32_t hash = FNV_32_HASH_START) noexcept { |
182 | return fnv32_buf(str.data(), str.size(), hash); |
183 | } |
184 | |
185 | inline uint64_t fnv64( |
186 | const char* buf, |
187 | uint64_t hash = FNV_64_HASH_START) noexcept { |
188 | // forcing signed char, since other platforms can use unsigned |
189 | const signed char* s = reinterpret_cast<const signed char*>(buf); |
190 | |
191 | for (; *s; ++s) { |
192 | hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + |
193 | (hash << 8) + (hash << 40); |
194 | hash ^= *s; |
195 | } |
196 | return hash; |
197 | } |
198 | |
199 | inline uint64_t fnv64_buf( |
200 | const void* buf, |
201 | size_t n, |
202 | uint64_t hash = FNV_64_HASH_START) noexcept { |
203 | // forcing signed char, since other platforms can use unsigned |
204 | const signed char* char_buf = reinterpret_cast<const signed char*>(buf); |
205 | |
206 | for (size_t i = 0; i < n; ++i) { |
207 | hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + |
208 | (hash << 8) + (hash << 40); |
209 | hash ^= char_buf[i]; |
210 | } |
211 | return hash; |
212 | } |
213 | |
214 | inline uint64_t fnv64( |
215 | const std::string& str, |
216 | uint64_t hash = FNV_64_HASH_START) noexcept { |
217 | return fnv64_buf(str.data(), str.size(), hash); |
218 | } |
219 | |
220 | inline uint64_t fnva64_buf( |
221 | const void* buf, |
222 | size_t n, |
223 | uint64_t hash = FNVA_64_HASH_START) noexcept { |
224 | const uint8_t* char_buf = reinterpret_cast<const uint8_t*>(buf); |
225 | |
226 | for (size_t i = 0; i < n; ++i) { |
227 | hash ^= char_buf[i]; |
228 | hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + |
229 | (hash << 8) + (hash << 40); |
230 | } |
231 | return hash; |
232 | } |
233 | |
234 | inline uint64_t fnva64( |
235 | const std::string& str, |
236 | uint64_t hash = FNVA_64_HASH_START) noexcept { |
237 | return fnva64_buf(str.data(), str.size(), hash); |
238 | } |
239 | |
240 | /* |
241 | * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html |
242 | */ |
243 | |
244 | #define get16bits(d) folly::loadUnaligned<uint16_t>(d) |
245 | |
246 | inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) noexcept { |
247 | // forcing signed char, since other platforms can use unsigned |
248 | const unsigned char* s = reinterpret_cast<const unsigned char*>(buf); |
249 | uint32_t hash = static_cast<uint32_t>(len); |
250 | uint32_t tmp; |
251 | size_t rem; |
252 | |
253 | if (len <= 0 || buf == nullptr) { |
254 | return 0; |
255 | } |
256 | |
257 | rem = len & 3; |
258 | len >>= 2; |
259 | |
260 | /* Main loop */ |
261 | for (; len > 0; len--) { |
262 | hash += get16bits(s); |
263 | tmp = (get16bits(s + 2) << 11) ^ hash; |
264 | hash = (hash << 16) ^ tmp; |
265 | s += 2 * sizeof(uint16_t); |
266 | hash += hash >> 11; |
267 | } |
268 | |
269 | /* Handle end cases */ |
270 | switch (rem) { |
271 | case 3: |
272 | hash += get16bits(s); |
273 | hash ^= hash << 16; |
274 | hash ^= s[sizeof(uint16_t)] << 18; |
275 | hash += hash >> 11; |
276 | break; |
277 | case 2: |
278 | hash += get16bits(s); |
279 | hash ^= hash << 11; |
280 | hash += hash >> 17; |
281 | break; |
282 | case 1: |
283 | hash += *s; |
284 | hash ^= hash << 10; |
285 | hash += hash >> 1; |
286 | } |
287 | |
288 | /* Force "avalanching" of final 127 bits */ |
289 | hash ^= hash << 3; |
290 | hash += hash >> 5; |
291 | hash ^= hash << 4; |
292 | hash += hash >> 17; |
293 | hash ^= hash << 25; |
294 | hash += hash >> 6; |
295 | |
296 | return hash; |
297 | } |
298 | |
299 | #undef get16bits |
300 | |
301 | inline uint32_t hsieh_hash32(const char* s) noexcept { |
302 | return hsieh_hash32_buf(s, std::strlen(s)); |
303 | } |
304 | |
305 | inline uint32_t hsieh_hash32_str(const std::string& str) noexcept { |
306 | return hsieh_hash32_buf(str.data(), str.size()); |
307 | } |
308 | |
309 | ////////////////////////////////////////////////////////////////////// |
310 | |
311 | } // namespace hash |
312 | |
313 | namespace detail { |
314 | |
315 | template <typename I> |
316 | struct integral_hasher { |
317 | using folly_is_avalanching = |
318 | bool_constant<(sizeof(I) >= 8 || sizeof(size_t) == 4)>; |
319 | |
320 | size_t operator()(I const& i) const noexcept { |
321 | static_assert(sizeof(I) <= 16, "Input type is too wide" ); |
322 | /* constexpr */ if (sizeof(I) <= 4) { |
323 | auto const i32 = static_cast<int32_t>(i); // impl accident: sign-extends |
324 | auto const u32 = static_cast<uint32_t>(i32); |
325 | return static_cast<size_t>(hash::jenkins_rev_mix32(u32)); |
326 | } else if (sizeof(I) <= 8) { |
327 | auto const u64 = static_cast<uint64_t>(i); |
328 | return static_cast<size_t>(hash::twang_mix64(u64)); |
329 | } else { |
330 | auto const u = to_unsigned(i); |
331 | auto const hi = static_cast<uint64_t>(u >> sizeof(I) * 4); |
332 | auto const lo = static_cast<uint64_t>(u); |
333 | return hash::hash_128_to_64(hi, lo); |
334 | } |
335 | } |
336 | }; |
337 | |
338 | template <typename F> |
339 | struct float_hasher { |
340 | using folly_is_avalanching = std::true_type; |
341 | |
342 | size_t operator()(F const& f) const noexcept { |
343 | static_assert(sizeof(F) <= 8, "Input type is too wide" ); |
344 | |
345 | if (f == F{}) { // Ensure 0 and -0 get the same hash. |
346 | return 0; |
347 | } |
348 | |
349 | uint64_t u64 = 0; |
350 | memcpy(&u64, &f, sizeof(F)); |
351 | return static_cast<size_t>(hash::twang_mix64(u64)); |
352 | } |
353 | }; |
354 | |
355 | } // namespace detail |
356 | |
357 | template <class Key, class Enable = void> |
358 | struct hasher; |
359 | |
360 | struct Hash { |
361 | template <class T> |
362 | size_t operator()(const T& v) const noexcept(noexcept(hasher<T>()(v))) { |
363 | return hasher<T>()(v); |
364 | } |
365 | |
366 | template <class T, class... Ts> |
367 | size_t operator()(const T& t, const Ts&... ts) const { |
368 | return hash::hash_128_to_64((*this)(t), (*this)(ts...)); |
369 | } |
370 | }; |
371 | |
372 | // IsAvalanchingHasher<H, K> extends std::integral_constant<bool, V>. |
373 | // V will be true if it is known that when a hasher of type H computes |
374 | // the hash of a key of type K, any subset of B bits from the resulting |
375 | // hash value is usable in a context that can tolerate a collision rate |
376 | // of about 1/2^B. (Input bits lost implicitly converting between K and |
377 | // the argument of H::operator() are not considered here; K is separate |
378 | // to handle the case of generic hashers like folly::Hash). |
379 | // |
380 | // If std::hash<T> or folly::hasher<T> is specialized for a new type T and |
381 | // the implementation avalanches input entropy across all of the bits of a |
382 | // std::size_t result, the specialization should be marked as avalanching. |
383 | // This can be done either by adding a member type folly_is_avalanching |
384 | // to the functor H that contains a constexpr bool value of true, or by |
385 | // specializing IsAvalanchingHasher<H, K>. The member type mechanism is |
386 | // more convenient, but specializing IsAvalanchingHasher may be required |
387 | // if a hasher is polymorphic on the key type or if its definition cannot |
388 | // be modified. |
389 | // |
390 | // The standard's definition of hash quality is based on the chance hash |
391 | // collisions using the entire hash value. No requirement is made that |
392 | // this property holds for subsets of the bits. In addition, hashed keys |
393 | // in real-world workloads are not chosen uniformly from the entire domain |
394 | // of keys, which can further increase the collision rate for a subset |
395 | // of bits. For example, std::hash<uint64_t> in libstdc++-v3 and libc++ |
396 | // is the identity function. This hash function has no collisions when |
397 | // considering hash values in their entirety, but for real-world workloads |
398 | // the high bits are likely to always be zero. |
399 | // |
400 | // Some hash functions provide a stronger guarantee -- the standard's |
401 | // collision property is also preserved for subsets of the output bits and |
402 | // for sub-domains of keys. Another way to say this is that each bit of |
403 | // the hash value contains entropy from the entire input, changes to the |
404 | // input avalanche across all of the bits of the output. The distinction |
405 | // is useful when mapping the hash value onto a smaller space efficiently |
406 | // (such as when implementing a hash table). |
407 | template <typename Hasher, typename Key> |
408 | struct IsAvalanchingHasher; |
409 | |
410 | namespace detail { |
411 | template <typename Hasher, typename Void = void> |
412 | struct IsAvalanchingHasherFromMemberType : std::false_type {}; |
413 | |
414 | template <typename Hasher> |
415 | struct IsAvalanchingHasherFromMemberType< |
416 | Hasher, |
417 | void_t<typename Hasher::folly_is_avalanching>> |
418 | : bool_constant<Hasher::folly_is_avalanching::value> {}; |
419 | } // namespace detail |
420 | |
421 | template <typename Hasher, typename Key> |
422 | struct IsAvalanchingHasher : detail::IsAvalanchingHasherFromMemberType<Hasher> { |
423 | }; |
424 | |
425 | // It's ugly to put this here, but folly::transparent isn't hash specific |
426 | // so it seems even more ugly to put this near its declaration |
427 | template <typename H, typename K> |
428 | struct IsAvalanchingHasher<transparent<H>, K> : IsAvalanchingHasher<H, K> {}; |
429 | |
430 | template <typename K> |
431 | struct IsAvalanchingHasher<Hash, K> : IsAvalanchingHasher<hasher<K>, K> {}; |
432 | |
433 | template <> |
434 | struct hasher<bool> { |
435 | using folly_is_avalanching = std::true_type; |
436 | |
437 | size_t operator()(bool key) const noexcept { |
438 | // Make sure that all the output bits depend on the input. |
439 | return key ? std::numeric_limits<size_t>::max() : 0; |
440 | } |
441 | }; |
442 | template <typename K> |
443 | struct IsAvalanchingHasher<hasher<bool>, K> : std::true_type {}; |
444 | |
445 | template <> |
446 | struct hasher<unsigned long long> |
447 | : detail::integral_hasher<unsigned long long> {}; |
448 | |
449 | template <> |
450 | struct hasher<signed long long> : detail::integral_hasher<signed long long> {}; |
451 | |
452 | template <> |
453 | struct hasher<unsigned long> : detail::integral_hasher<unsigned long> {}; |
454 | |
455 | template <> |
456 | struct hasher<signed long> : detail::integral_hasher<signed long> {}; |
457 | |
458 | template <> |
459 | struct hasher<unsigned int> : detail::integral_hasher<unsigned int> {}; |
460 | |
461 | template <> |
462 | struct hasher<signed int> : detail::integral_hasher<signed int> {}; |
463 | |
464 | template <> |
465 | struct hasher<unsigned short> : detail::integral_hasher<unsigned short> {}; |
466 | |
467 | template <> |
468 | struct hasher<signed short> : detail::integral_hasher<signed short> {}; |
469 | |
470 | template <> |
471 | struct hasher<unsigned char> : detail::integral_hasher<unsigned char> {}; |
472 | |
473 | template <> |
474 | struct hasher<signed char> : detail::integral_hasher<signed char> {}; |
475 | |
476 | template <> // char is a different type from both signed char and unsigned char |
477 | struct hasher<char> : detail::integral_hasher<char> {}; |
478 | |
479 | #if FOLLY_HAVE_INT128_T |
480 | template <> |
481 | struct hasher<signed __int128> : detail::integral_hasher<signed __int128> {}; |
482 | |
483 | template <> |
484 | struct hasher<unsigned __int128> : detail::integral_hasher<unsigned __int128> { |
485 | }; |
486 | #endif |
487 | |
488 | template <> |
489 | struct hasher<float> : detail::float_hasher<float> {}; |
490 | |
491 | template <> |
492 | struct hasher<double> : detail::float_hasher<double> {}; |
493 | |
494 | template <> |
495 | struct hasher<std::string> { |
496 | using folly_is_avalanching = std::true_type; |
497 | |
498 | size_t operator()(const std::string& key) const { |
499 | return static_cast<size_t>( |
500 | hash::SpookyHashV2::Hash64(key.data(), key.size(), 0)); |
501 | } |
502 | }; |
503 | template <typename K> |
504 | struct IsAvalanchingHasher<hasher<std::string>, K> : std::true_type {}; |
505 | |
506 | template <typename T> |
507 | struct hasher<T, std::enable_if_t<std::is_enum<T>::value>> { |
508 | size_t operator()(T key) const noexcept { |
509 | return Hash()(static_cast<std::underlying_type_t<T>>(key)); |
510 | } |
511 | }; |
512 | |
513 | template <typename T, typename K> |
514 | struct IsAvalanchingHasher< |
515 | hasher<T, std::enable_if_t<std::is_enum<T>::value>>, |
516 | K> : IsAvalanchingHasher<hasher<std::underlying_type_t<T>>, K> {}; |
517 | |
518 | template <typename T1, typename T2> |
519 | struct hasher<std::pair<T1, T2>> { |
520 | using folly_is_avalanching = std::true_type; |
521 | |
522 | size_t operator()(const std::pair<T1, T2>& key) const { |
523 | return Hash()(key.first, key.second); |
524 | } |
525 | }; |
526 | |
527 | template <typename... Ts> |
528 | struct hasher<std::tuple<Ts...>> { |
529 | size_t operator()(const std::tuple<Ts...>& key) const { |
530 | return apply(Hash(), key); |
531 | } |
532 | }; |
533 | |
534 | // combiner for multi-arg tuple also mixes bits |
535 | template <typename T, typename K> |
536 | struct IsAvalanchingHasher<hasher<std::tuple<T>>, K> |
537 | : IsAvalanchingHasher<hasher<T>, K> {}; |
538 | template <typename T1, typename T2, typename... Ts, typename K> |
539 | struct IsAvalanchingHasher<hasher<std::tuple<T1, T2, Ts...>>, K> |
540 | : std::true_type {}; |
541 | |
542 | namespace hash { |
543 | // Simply uses std::hash to hash. Note that std::hash is not guaranteed |
544 | // to be a very good hash function; provided std::hash doesn't collide on |
545 | // the individual inputs, you are fine, but that won't be true for, say, |
546 | // strings or pairs |
547 | class StdHasher { |
548 | public: |
549 | // The standard requires all explicit and partial specializations of std::hash |
550 | // supplied by either the standard library or by users to be default |
551 | // constructible. |
552 | template <typename T> |
553 | size_t operator()(const T& t) const noexcept(noexcept(std::hash<T>()(t))) { |
554 | return std::hash<T>()(t); |
555 | } |
556 | }; |
557 | |
558 | // This is a general-purpose way to create a single hash from multiple |
559 | // hashable objects. hash_combine_generic takes a class Hasher implementing |
560 | // hash<T>; hash_combine uses a default hasher StdHasher that uses std::hash. |
561 | // hash_combine_generic hashes each argument and combines those hashes in |
562 | // an order-dependent way to yield a new hash; hash_range does so (also in an |
563 | // order-dependent way) for items in the range [first, last); |
564 | // commutative_hash_combine_* hashes values but combines them in an |
565 | // order-independent way to yield a new hash. |
566 | |
567 | // This is the Hash128to64 function from Google's cityhash (available |
568 | // under the MIT License). We use it to reduce multiple 64 bit hashes |
569 | // into a single hash. |
570 | inline uint64_t hash_128_to_64( |
571 | const uint64_t upper, |
572 | const uint64_t lower) noexcept { |
573 | // Murmur-inspired hashing. |
574 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; |
575 | uint64_t a = (lower ^ upper) * kMul; |
576 | a ^= (a >> 47); |
577 | uint64_t b = (upper ^ a) * kMul; |
578 | b ^= (b >> 47); |
579 | b *= kMul; |
580 | return b; |
581 | } |
582 | |
583 | template <class Hash, class Value> |
584 | uint64_t commutative_hash_combine_value_generic( |
585 | uint64_t seed, |
586 | Hash const& hasher, |
587 | Value const& value) { |
588 | auto const x = hasher(value); |
589 | auto const y = IsAvalanchingHasher<Hash, Value>::value ? x : twang_mix64(x); |
590 | // Commutative accumulator taken from this paper: |
591 | // https://www.preprints.org/manuscript/201710.0192/v1/download |
592 | return 3860031 + (seed + y) * 2779 + (seed * y * 2); |
593 | } |
594 | |
595 | // hash_range combines hashes of items in the range [first, last) in an |
596 | // __order-dependent__ fashion. To hash an unordered container (e.g., |
597 | // folly::dynamic, hash tables like std::unordered_map), use |
598 | // commutative_hash_combine_range instead, which combines hashes of items |
599 | // independent of ordering. |
600 | template < |
601 | class Iter, |
602 | class Hash = std::hash<typename std::iterator_traits<Iter>::value_type>> |
603 | uint64_t |
604 | hash_range(Iter begin, Iter end, uint64_t hash = 0, Hash hasher = Hash()) { |
605 | for (; begin != end; ++begin) { |
606 | hash = hash_128_to_64(hash, hasher(*begin)); |
607 | } |
608 | return hash; |
609 | } |
610 | |
611 | template <class Hash, class Iter> |
612 | uint64_t commutative_hash_combine_range_generic( |
613 | uint64_t seed, |
614 | Hash const& hasher, |
615 | Iter first, |
616 | Iter last) { |
617 | while (first != last) { |
618 | seed = commutative_hash_combine_value_generic(seed, hasher, *first++); |
619 | } |
620 | return seed; |
621 | } |
622 | |
623 | template <class Iter> |
624 | uint64_t commutative_hash_combine_range(Iter first, Iter last) { |
625 | return commutative_hash_combine_range_generic(0, Hash{}, first, last); |
626 | } |
627 | |
628 | namespace detail { |
629 | using c_array_size_t = size_t[]; |
630 | } // namespace detail |
631 | |
632 | // Never used, but gcc demands it. |
633 | template <class Hasher> |
634 | inline size_t hash_combine_generic(const Hasher&) noexcept { |
635 | return 0; |
636 | } |
637 | |
638 | template <class Hasher, typename T, typename... Ts> |
639 | size_t hash_combine_generic( |
640 | const Hasher& h, |
641 | const T& t, |
642 | const Ts&... ts) noexcept(noexcept(detail::c_array_size_t{h(t), |
643 | h(ts)...})) { |
644 | size_t seed = h(t); |
645 | if (sizeof...(ts) == 0) { |
646 | return seed; |
647 | } |
648 | size_t remainder = hash_combine_generic(h, ts...); |
649 | if /* constexpr */ (sizeof(size_t) == sizeof(uint32_t)) { |
650 | return twang_32from64((uint64_t(seed) << 32) | remainder); |
651 | } else { |
652 | return static_cast<size_t>(hash_128_to_64(seed, remainder)); |
653 | } |
654 | } |
655 | |
656 | template <typename Hash, typename... Value> |
657 | uint64_t commutative_hash_combine_generic( |
658 | uint64_t seed, |
659 | Hash const& hasher, |
660 | Value const&... value) { |
661 | // variadic foreach: |
662 | uint64_t _[] = { |
663 | 0, seed = commutative_hash_combine_value_generic(seed, hasher, value)...}; |
664 | (void)_; |
665 | return seed; |
666 | } |
667 | |
668 | template <typename T, typename... Ts> |
669 | size_t hash_combine(const T& t, const Ts&... ts) noexcept( |
670 | noexcept(hash_combine_generic(StdHasher{}, t, ts...))) { |
671 | return hash_combine_generic(StdHasher{}, t, ts...); |
672 | } |
673 | |
674 | template <typename... Value> |
675 | uint64_t commutative_hash_combine(Value const&... value) { |
676 | return commutative_hash_combine_generic(0, Hash{}, value...); |
677 | } |
678 | } // namespace hash |
679 | |
680 | // recursion |
681 | template <size_t index, typename... Ts> |
682 | struct TupleHasher { |
683 | size_t operator()(std::tuple<Ts...> const& key) const { |
684 | return hash::hash_combine( |
685 | TupleHasher<index - 1, Ts...>()(key), std::get<index>(key)); |
686 | } |
687 | }; |
688 | |
689 | // base |
690 | template <typename... Ts> |
691 | struct TupleHasher<0, Ts...> { |
692 | size_t operator()(std::tuple<Ts...> const& key) const { |
693 | // we could do std::hash here directly, but hash_combine hides all the |
694 | // ugly templating implicitly |
695 | return hash::hash_combine(std::get<0>(key)); |
696 | } |
697 | }; |
698 | |
699 | } // namespace folly |
700 | |
701 | // Custom hash functions. |
702 | namespace std { |
703 | #if FOLLY_SUPPLY_MISSING_INT128_TRAITS |
704 | template <> |
705 | struct hash<__int128> : folly::detail::integral_hasher<__int128> {}; |
706 | |
707 | template <> |
708 | struct hash<unsigned __int128> |
709 | : folly::detail::integral_hasher<unsigned __int128> {}; |
710 | #endif |
711 | |
712 | // Hash function for pairs. Requires default hash functions for both |
713 | // items in the pair. |
714 | template <typename T1, typename T2> |
715 | struct hash<std::pair<T1, T2>> { |
716 | using folly_is_avalanching = std::true_type; |
717 | |
718 | size_t operator()(const std::pair<T1, T2>& x) const { |
719 | return folly::hash::hash_combine(x.first, x.second); |
720 | } |
721 | }; |
722 | |
723 | // Hash function for tuples. Requires default hash functions for all types. |
724 | template <typename... Ts> |
725 | struct hash<std::tuple<Ts...>> { |
726 | private: |
727 | using FirstT = std::decay_t<std::tuple_element_t<0, std::tuple<Ts..., bool>>>; |
728 | |
729 | public: |
730 | using folly_is_avalanching = folly::bool_constant<( |
731 | sizeof...(Ts) != 1 || |
732 | folly::IsAvalanchingHasher<std::hash<FirstT>, FirstT>::value)>; |
733 | |
734 | size_t operator()(std::tuple<Ts...> const& key) const { |
735 | folly::TupleHasher< |
736 | sizeof...(Ts) - 1, // start index |
737 | Ts...> |
738 | hasher; |
739 | |
740 | return hasher(key); |
741 | } |
742 | }; |
743 | } // namespace std |
744 | |
745 | namespace folly { |
746 | |
747 | // std::hash<std::string> is avalanching on libstdc++-v3 (code checked), |
748 | // libc++ (code checked), and MSVC (based on online information). |
749 | // std::hash for float and double on libstdc++-v3 are avalanching, |
750 | // but they are not on libc++. std::hash for integral types is not |
751 | // avalanching for libstdc++-v3 or libc++. We're conservative here and |
752 | // just mark std::string as avalanching. std::string_view will also be |
753 | // so, once it exists. |
754 | template <typename... Args, typename K> |
755 | struct IsAvalanchingHasher<std::hash<std::basic_string<Args...>>, K> |
756 | : std::true_type {}; |
757 | |
758 | } // namespace folly |
759 | |