| 1 | // Copyright 2018 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | // |
| 15 | // ----------------------------------------------------------------------------- |
| 16 | // File: hash.h |
| 17 | // ----------------------------------------------------------------------------- |
| 18 | // |
| 19 | #ifndef ABSL_HASH_INTERNAL_HASH_H_ |
| 20 | #define ABSL_HASH_INTERNAL_HASH_H_ |
| 21 | |
| 22 | #include <algorithm> |
| 23 | #include <array> |
| 24 | #include <cmath> |
| 25 | #include <cstring> |
| 26 | #include <deque> |
| 27 | #include <forward_list> |
| 28 | #include <functional> |
| 29 | #include <iterator> |
| 30 | #include <limits> |
| 31 | #include <list> |
| 32 | #include <map> |
| 33 | #include <memory> |
| 34 | #include <set> |
| 35 | #include <string> |
| 36 | #include <tuple> |
| 37 | #include <type_traits> |
| 38 | #include <utility> |
| 39 | #include <vector> |
| 40 | |
| 41 | #include "absl/base/internal/endian.h" |
| 42 | #include "absl/base/port.h" |
| 43 | #include "absl/container/fixed_array.h" |
| 44 | #include "absl/meta/type_traits.h" |
| 45 | #include "absl/numeric/int128.h" |
| 46 | #include "absl/strings/string_view.h" |
| 47 | #include "absl/types/optional.h" |
| 48 | #include "absl/types/variant.h" |
| 49 | #include "absl/utility/utility.h" |
| 50 | #include "absl/hash/internal/city.h" |
| 51 | |
| 52 | namespace absl { |
| 53 | namespace hash_internal { |
| 54 | |
| 55 | // HashStateBase |
| 56 | // |
| 57 | // A hash state object represents an intermediate state in the computation |
| 58 | // of an unspecified hash algorithm. `HashStateBase` provides a CRTP style |
| 59 | // base class for hash state implementations. Developers adding type support |
| 60 | // for `absl::Hash` should not rely on any parts of the state object other than |
| 61 | // the following member functions: |
| 62 | // |
| 63 | // * HashStateBase::combine() |
| 64 | // * HashStateBase::combine_contiguous() |
| 65 | // |
| 66 | // A derived hash state class of type `H` must provide a static member function |
| 67 | // with a signature similar to the following: |
| 68 | // |
| 69 | // `static H combine_contiguous(H state, const unsigned char*, size_t)`. |
| 70 | // |
| 71 | // `HashStateBase` will provide a complete implementations for a hash state |
| 72 | // object in terms of this method. |
| 73 | // |
| 74 | // Example: |
| 75 | // |
| 76 | // // Use CRTP to define your derived class. |
| 77 | // struct MyHashState : HashStateBase<MyHashState> { |
| 78 | // static H combine_contiguous(H state, const unsigned char*, size_t); |
| 79 | // using MyHashState::HashStateBase::combine; |
| 80 | // using MyHashState::HashStateBase::combine_contiguous; |
| 81 | // }; |
| 82 | template <typename H> |
| 83 | class HashStateBase { |
| 84 | public: |
| 85 | // HashStateBase::combine() |
| 86 | // |
| 87 | // Combines an arbitrary number of values into a hash state, returning the |
| 88 | // updated state. |
| 89 | // |
| 90 | // Each of the value types `T` must be separately hashable by the Abseil |
| 91 | // hashing framework. |
| 92 | // |
| 93 | // NOTE: |
| 94 | // |
| 95 | // state = H::combine(std::move(state), value1, value2, value3); |
| 96 | // |
| 97 | // is guaranteed to produce the same hash expansion as: |
| 98 | // |
| 99 | // state = H::combine(std::move(state), value1); |
| 100 | // state = H::combine(std::move(state), value2); |
| 101 | // state = H::combine(std::move(state), value3); |
| 102 | template <typename T, typename... Ts> |
| 103 | static H combine(H state, const T& value, const Ts&... values); |
| 104 | static H combine(H state) { return state; } |
| 105 | |
| 106 | // HashStateBase::combine_contiguous() |
| 107 | // |
| 108 | // Combines a contiguous array of `size` elements into a hash state, returning |
| 109 | // the updated state. |
| 110 | // |
| 111 | // NOTE: |
| 112 | // |
| 113 | // state = H::combine_contiguous(std::move(state), data, size); |
| 114 | // |
| 115 | // is NOT guaranteed to produce the same hash expansion as a for-loop (it may |
| 116 | // perform internal optimizations). If you need this guarantee, use the |
| 117 | // for-loop instead. |
| 118 | template <typename T> |
| 119 | static H combine_contiguous(H state, const T* data, size_t size); |
| 120 | }; |
| 121 | |
| 122 | // is_uniquely_represented |
| 123 | // |
| 124 | // `is_uniquely_represented<T>` is a trait class that indicates whether `T` |
| 125 | // is uniquely represented. |
| 126 | // |
| 127 | // A type is "uniquely represented" if two equal values of that type are |
| 128 | // guaranteed to have the same bytes in their underlying storage. In other |
| 129 | // words, if `a == b`, then `memcmp(&a, &b, sizeof(T))` is guaranteed to be |
| 130 | // zero. This property cannot be detected automatically, so this trait is false |
| 131 | // by default, but can be specialized by types that wish to assert that they are |
| 132 | // uniquely represented. This makes them eligible for certain optimizations. |
| 133 | // |
| 134 | // If you have any doubt whatsoever, do not specialize this template. |
| 135 | // The default is completely safe, and merely disables some optimizations |
| 136 | // that will not matter for most types. Specializing this template, |
| 137 | // on the other hand, can be very hazardous. |
| 138 | // |
| 139 | // To be uniquely represented, a type must not have multiple ways of |
| 140 | // representing the same value; for example, float and double are not |
| 141 | // uniquely represented, because they have distinct representations for |
| 142 | // +0 and -0. Furthermore, the type's byte representation must consist |
| 143 | // solely of user-controlled data, with no padding bits and no compiler- |
| 144 | // controlled data such as vptrs or sanitizer metadata. This is usually |
| 145 | // very difficult to guarantee, because in most cases the compiler can |
| 146 | // insert data and padding bits at its own discretion. |
| 147 | // |
| 148 | // If you specialize this template for a type `T`, you must do so in the file |
| 149 | // that defines that type (or in this file). If you define that specialization |
| 150 | // anywhere else, `is_uniquely_represented<T>` could have different meanings |
| 151 | // in different places. |
| 152 | // |
| 153 | // The Enable parameter is meaningless; it is provided as a convenience, |
| 154 | // to support certain SFINAE techniques when defining specializations. |
| 155 | template <typename T, typename Enable = void> |
| 156 | struct is_uniquely_represented : std::false_type {}; |
| 157 | |
| 158 | // is_uniquely_represented<unsigned char> |
| 159 | // |
| 160 | // unsigned char is a synonym for "byte", so it is guaranteed to be |
| 161 | // uniquely represented. |
| 162 | template <> |
| 163 | struct is_uniquely_represented<unsigned char> : std::true_type {}; |
| 164 | |
| 165 | // is_uniquely_represented for non-standard integral types |
| 166 | // |
| 167 | // Integral types other than bool should be uniquely represented on any |
| 168 | // platform that this will plausibly be ported to. |
| 169 | template <typename Integral> |
| 170 | struct is_uniquely_represented< |
| 171 | Integral, typename std::enable_if<std::is_integral<Integral>::value>::type> |
| 172 | : std::true_type {}; |
| 173 | |
| 174 | // is_uniquely_represented<bool> |
| 175 | // |
| 176 | // |
| 177 | template <> |
| 178 | struct is_uniquely_represented<bool> : std::false_type {}; |
| 179 | |
| 180 | // hash_bytes() |
| 181 | // |
| 182 | // Convenience function that combines `hash_state` with the byte representation |
| 183 | // of `value`. |
| 184 | template <typename H, typename T> |
| 185 | H hash_bytes(H hash_state, const T& value) { |
| 186 | const unsigned char* start = reinterpret_cast<const unsigned char*>(&value); |
| 187 | return H::combine_contiguous(std::move(hash_state), start, sizeof(value)); |
| 188 | } |
| 189 | |
| 190 | // ----------------------------------------------------------------------------- |
| 191 | // AbslHashValue for Basic Types |
| 192 | // ----------------------------------------------------------------------------- |
| 193 | |
| 194 | // Note: Default `AbslHashValue` implementations live in `hash_internal`. This |
| 195 | // allows us to block lexical scope lookup when doing an unqualified call to |
| 196 | // `AbslHashValue` below. User-defined implementations of `AbslHashValue` can |
| 197 | // only be found via ADL. |
| 198 | |
| 199 | // AbslHashValue() for hashing bool values |
| 200 | // |
| 201 | // We use SFINAE to ensure that this overload only accepts bool, not types that |
| 202 | // are convertible to bool. |
| 203 | template <typename H, typename B> |
| 204 | typename std::enable_if<std::is_same<B, bool>::value, H>::type AbslHashValue( |
| 205 | H hash_state, B value) { |
| 206 | return H::combine(std::move(hash_state), |
| 207 | static_cast<unsigned char>(value ? 1 : 0)); |
| 208 | } |
| 209 | |
| 210 | // AbslHashValue() for hashing enum values |
| 211 | template <typename H, typename Enum> |
| 212 | typename std::enable_if<std::is_enum<Enum>::value, H>::type AbslHashValue( |
| 213 | H hash_state, Enum e) { |
| 214 | // In practice, we could almost certainly just invoke hash_bytes directly, |
| 215 | // but it's possible that a sanitizer might one day want to |
| 216 | // store data in the unused bits of an enum. To avoid that risk, we |
| 217 | // convert to the underlying type before hashing. Hopefully this will get |
| 218 | // optimized away; if not, we can reopen discussion with c-toolchain-team. |
| 219 | return H::combine(std::move(hash_state), |
| 220 | static_cast<typename std::underlying_type<Enum>::type>(e)); |
| 221 | } |
| 222 | // AbslHashValue() for hashing floating-point values |
| 223 | template <typename H, typename Float> |
| 224 | typename std::enable_if<std::is_same<Float, float>::value || |
| 225 | std::is_same<Float, double>::value, |
| 226 | H>::type |
| 227 | AbslHashValue(H hash_state, Float value) { |
| 228 | return hash_internal::hash_bytes(std::move(hash_state), |
| 229 | value == 0 ? 0 : value); |
| 230 | } |
| 231 | |
| 232 | // Long double has the property that it might have extra unused bytes in it. |
| 233 | // For example, in x86 sizeof(long double)==16 but it only really uses 80-bits |
| 234 | // of it. This means we can't use hash_bytes on a long double and have to |
| 235 | // convert it to something else first. |
| 236 | template <typename H, typename LongDouble> |
| 237 | typename std::enable_if<std::is_same<LongDouble, long double>::value, H>::type |
| 238 | AbslHashValue(H hash_state, LongDouble value) { |
| 239 | const int category = std::fpclassify(value); |
| 240 | switch (category) { |
| 241 | case FP_INFINITE: |
| 242 | // Add the sign bit to differentiate between +Inf and -Inf |
| 243 | hash_state = H::combine(std::move(hash_state), std::signbit(value)); |
| 244 | break; |
| 245 | |
| 246 | case FP_NAN: |
| 247 | case FP_ZERO: |
| 248 | default: |
| 249 | // Category is enough for these. |
| 250 | break; |
| 251 | |
| 252 | case FP_NORMAL: |
| 253 | case FP_SUBNORMAL: |
| 254 | // We can't convert `value` directly to double because this would have |
| 255 | // undefined behavior if the value is out of range. |
| 256 | // std::frexp gives us a value in the range (-1, -.5] or [.5, 1) that is |
| 257 | // guaranteed to be in range for `double`. The truncation is |
| 258 | // implementation defined, but that works as long as it is deterministic. |
| 259 | int exp; |
| 260 | auto mantissa = static_cast<double>(std::frexp(value, &exp)); |
| 261 | hash_state = H::combine(std::move(hash_state), mantissa, exp); |
| 262 | } |
| 263 | |
| 264 | return H::combine(std::move(hash_state), category); |
| 265 | } |
| 266 | |
| 267 | // AbslHashValue() for hashing pointers |
| 268 | template <typename H, typename T> |
| 269 | H AbslHashValue(H hash_state, T* ptr) { |
| 270 | auto v = reinterpret_cast<uintptr_t>(ptr); |
| 271 | // Due to alignment, pointers tend to have low bits as zero, and the next few |
| 272 | // bits follow a pattern since they are also multiples of some base value. |
| 273 | // Mixing the pointer twice helps prevent stuck low bits for certain alignment |
| 274 | // values. |
| 275 | return H::combine(std::move(hash_state), v, v); |
| 276 | } |
| 277 | |
| 278 | // AbslHashValue() for hashing nullptr_t |
| 279 | template <typename H> |
| 280 | H AbslHashValue(H hash_state, std::nullptr_t) { |
| 281 | return H::combine(std::move(hash_state), static_cast<void*>(nullptr)); |
| 282 | } |
| 283 | |
| 284 | // ----------------------------------------------------------------------------- |
| 285 | // AbslHashValue for Composite Types |
| 286 | // ----------------------------------------------------------------------------- |
| 287 | |
| 288 | // is_hashable() |
| 289 | // |
| 290 | // Trait class which returns true if T is hashable by the absl::Hash framework. |
| 291 | // Used for the AbslHashValue implementations for composite types below. |
| 292 | template <typename T> |
| 293 | struct is_hashable; |
| 294 | |
| 295 | // AbslHashValue() for hashing pairs |
| 296 | template <typename H, typename T1, typename T2> |
| 297 | typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value, |
| 298 | H>::type |
| 299 | AbslHashValue(H hash_state, const std::pair<T1, T2>& p) { |
| 300 | return H::combine(std::move(hash_state), p.first, p.second); |
| 301 | } |
| 302 | |
| 303 | // hash_tuple() |
| 304 | // |
| 305 | // Helper function for hashing a tuple. The third argument should |
| 306 | // be an index_sequence running from 0 to tuple_size<Tuple> - 1. |
| 307 | template <typename H, typename Tuple, size_t... Is> |
| 308 | H hash_tuple(H hash_state, const Tuple& t, absl::index_sequence<Is...>) { |
| 309 | return H::combine(std::move(hash_state), std::get<Is>(t)...); |
| 310 | } |
| 311 | |
| 312 | // AbslHashValue for hashing tuples |
| 313 | template <typename H, typename... Ts> |
| 314 | #if defined(_MSC_VER) |
| 315 | // This SFINAE gets MSVC confused under some conditions. Let's just disable it |
| 316 | // for now. |
| 317 | H |
| 318 | #else // _MSC_VER |
| 319 | typename std::enable_if<absl::conjunction<is_hashable<Ts>...>::value, H>::type |
| 320 | #endif // _MSC_VER |
| 321 | AbslHashValue(H hash_state, const std::tuple<Ts...>& t) { |
| 322 | return hash_internal::hash_tuple(std::move(hash_state), t, |
| 323 | absl::make_index_sequence<sizeof...(Ts)>()); |
| 324 | } |
| 325 | |
| 326 | // ----------------------------------------------------------------------------- |
| 327 | // AbslHashValue for Pointers |
| 328 | // ----------------------------------------------------------------------------- |
| 329 | |
| 330 | // AbslHashValue for hashing unique_ptr |
| 331 | template <typename H, typename T, typename D> |
| 332 | H AbslHashValue(H hash_state, const std::unique_ptr<T, D>& ptr) { |
| 333 | return H::combine(std::move(hash_state), ptr.get()); |
| 334 | } |
| 335 | |
| 336 | // AbslHashValue for hashing shared_ptr |
| 337 | template <typename H, typename T> |
| 338 | H AbslHashValue(H hash_state, const std::shared_ptr<T>& ptr) { |
| 339 | return H::combine(std::move(hash_state), ptr.get()); |
| 340 | } |
| 341 | |
| 342 | // ----------------------------------------------------------------------------- |
| 343 | // AbslHashValue for String-Like Types |
| 344 | // ----------------------------------------------------------------------------- |
| 345 | |
| 346 | // AbslHashValue for hashing strings |
| 347 | // |
| 348 | // All the string-like types supported here provide the same hash expansion for |
| 349 | // the same character sequence. These types are: |
| 350 | // |
| 351 | // - `std::string` (and std::basic_string<char, std::char_traits<char>, A> for |
| 352 | // any allocator A) |
| 353 | // - `absl::string_view` and `std::string_view` |
| 354 | // |
| 355 | // For simplicity, we currently support only `char` strings. This support may |
| 356 | // be broadened, if necessary, but with some caution - this overload would |
| 357 | // misbehave in cases where the traits' `eq()` member isn't equivalent to `==` |
| 358 | // on the underlying character type. |
| 359 | template <typename H> |
| 360 | H AbslHashValue(H hash_state, absl::string_view str) { |
| 361 | return H::combine( |
| 362 | H::combine_contiguous(std::move(hash_state), str.data(), str.size()), |
| 363 | str.size()); |
| 364 | } |
| 365 | |
| 366 | // ----------------------------------------------------------------------------- |
| 367 | // AbslHashValue for Sequence Containers |
| 368 | // ----------------------------------------------------------------------------- |
| 369 | |
| 370 | // AbslHashValue for hashing std::array |
| 371 | template <typename H, typename T, size_t N> |
| 372 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
| 373 | H hash_state, const std::array<T, N>& array) { |
| 374 | return H::combine_contiguous(std::move(hash_state), array.data(), |
| 375 | array.size()); |
| 376 | } |
| 377 | |
| 378 | // AbslHashValue for hashing std::deque |
| 379 | template <typename H, typename T, typename Allocator> |
| 380 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
| 381 | H hash_state, const std::deque<T, Allocator>& deque) { |
| 382 | // TODO(gromer): investigate a more efficient implementation taking |
| 383 | // advantage of the chunk structure. |
| 384 | for (const auto& t : deque) { |
| 385 | hash_state = H::combine(std::move(hash_state), t); |
| 386 | } |
| 387 | return H::combine(std::move(hash_state), deque.size()); |
| 388 | } |
| 389 | |
| 390 | // AbslHashValue for hashing std::forward_list |
| 391 | template <typename H, typename T, typename Allocator> |
| 392 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
| 393 | H hash_state, const std::forward_list<T, Allocator>& list) { |
| 394 | size_t size = 0; |
| 395 | for (const T& t : list) { |
| 396 | hash_state = H::combine(std::move(hash_state), t); |
| 397 | ++size; |
| 398 | } |
| 399 | return H::combine(std::move(hash_state), size); |
| 400 | } |
| 401 | |
| 402 | // AbslHashValue for hashing std::list |
| 403 | template <typename H, typename T, typename Allocator> |
| 404 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
| 405 | H hash_state, const std::list<T, Allocator>& list) { |
| 406 | for (const auto& t : list) { |
| 407 | hash_state = H::combine(std::move(hash_state), t); |
| 408 | } |
| 409 | return H::combine(std::move(hash_state), list.size()); |
| 410 | } |
| 411 | |
| 412 | // AbslHashValue for hashing std::vector |
| 413 | // |
| 414 | // Do not use this for vector<bool>. It does not have a .data(), and a fallback |
| 415 | // for std::hash<> is most likely faster. |
| 416 | template <typename H, typename T, typename Allocator> |
| 417 | typename std::enable_if<is_hashable<T>::value && !std::is_same<T, bool>::value, |
| 418 | H>::type |
| 419 | AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) { |
| 420 | return H::combine(H::combine_contiguous(std::move(hash_state), vector.data(), |
| 421 | vector.size()), |
| 422 | vector.size()); |
| 423 | } |
| 424 | |
| 425 | // ----------------------------------------------------------------------------- |
| 426 | // AbslHashValue for Ordered Associative Containers |
| 427 | // ----------------------------------------------------------------------------- |
| 428 | |
| 429 | // AbslHashValue for hashing std::map |
| 430 | template <typename H, typename Key, typename T, typename Compare, |
| 431 | typename Allocator> |
| 432 | typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, |
| 433 | H>::type |
| 434 | AbslHashValue(H hash_state, const std::map<Key, T, Compare, Allocator>& map) { |
| 435 | for (const auto& t : map) { |
| 436 | hash_state = H::combine(std::move(hash_state), t); |
| 437 | } |
| 438 | return H::combine(std::move(hash_state), map.size()); |
| 439 | } |
| 440 | |
| 441 | // AbslHashValue for hashing std::multimap |
| 442 | template <typename H, typename Key, typename T, typename Compare, |
| 443 | typename Allocator> |
| 444 | typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, |
| 445 | H>::type |
| 446 | AbslHashValue(H hash_state, |
| 447 | const std::multimap<Key, T, Compare, Allocator>& map) { |
| 448 | for (const auto& t : map) { |
| 449 | hash_state = H::combine(std::move(hash_state), t); |
| 450 | } |
| 451 | return H::combine(std::move(hash_state), map.size()); |
| 452 | } |
| 453 | |
| 454 | // AbslHashValue for hashing std::set |
| 455 | template <typename H, typename Key, typename Compare, typename Allocator> |
| 456 | typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( |
| 457 | H hash_state, const std::set<Key, Compare, Allocator>& set) { |
| 458 | for (const auto& t : set) { |
| 459 | hash_state = H::combine(std::move(hash_state), t); |
| 460 | } |
| 461 | return H::combine(std::move(hash_state), set.size()); |
| 462 | } |
| 463 | |
| 464 | // AbslHashValue for hashing std::multiset |
| 465 | template <typename H, typename Key, typename Compare, typename Allocator> |
| 466 | typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( |
| 467 | H hash_state, const std::multiset<Key, Compare, Allocator>& set) { |
| 468 | for (const auto& t : set) { |
| 469 | hash_state = H::combine(std::move(hash_state), t); |
| 470 | } |
| 471 | return H::combine(std::move(hash_state), set.size()); |
| 472 | } |
| 473 | |
| 474 | // ----------------------------------------------------------------------------- |
| 475 | // AbslHashValue for Wrapper Types |
| 476 | // ----------------------------------------------------------------------------- |
| 477 | |
| 478 | // AbslHashValue for hashing absl::optional |
| 479 | template <typename H, typename T> |
| 480 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
| 481 | H hash_state, const absl::optional<T>& opt) { |
| 482 | if (opt) hash_state = H::combine(std::move(hash_state), *opt); |
| 483 | return H::combine(std::move(hash_state), opt.has_value()); |
| 484 | } |
| 485 | |
| 486 | // VariantVisitor |
| 487 | template <typename H> |
| 488 | struct VariantVisitor { |
| 489 | H&& hash_state; |
| 490 | template <typename T> |
| 491 | H operator()(const T& t) const { |
| 492 | return H::combine(std::move(hash_state), t); |
| 493 | } |
| 494 | }; |
| 495 | |
| 496 | // AbslHashValue for hashing absl::variant |
| 497 | template <typename H, typename... T> |
| 498 | typename std::enable_if<conjunction<is_hashable<T>...>::value, H>::type |
| 499 | AbslHashValue(H hash_state, const absl::variant<T...>& v) { |
| 500 | if (!v.valueless_by_exception()) { |
| 501 | hash_state = absl::visit(VariantVisitor<H>{std::move(hash_state)}, v); |
| 502 | } |
| 503 | return H::combine(std::move(hash_state), v.index()); |
| 504 | } |
| 505 | |
| 506 | // ----------------------------------------------------------------------------- |
| 507 | // AbslHashValue for Other Types |
| 508 | // ----------------------------------------------------------------------------- |
| 509 | |
| 510 | // AbslHashValue for hashing std::bitset is not defined, for the same reason as |
| 511 | // for vector<bool> (see std::vector above): It does not expose the raw bytes, |
| 512 | // and a fallback to std::hash<> is most likely faster. |
| 513 | |
| 514 | // ----------------------------------------------------------------------------- |
| 515 | |
| 516 | // hash_range_or_bytes() |
| 517 | // |
| 518 | // Mixes all values in the range [data, data+size) into the hash state. |
| 519 | // This overload accepts only uniquely-represented types, and hashes them by |
| 520 | // hashing the entire range of bytes. |
| 521 | template <typename H, typename T> |
| 522 | typename std::enable_if<is_uniquely_represented<T>::value, H>::type |
| 523 | hash_range_or_bytes(H hash_state, const T* data, size_t size) { |
| 524 | const auto* bytes = reinterpret_cast<const unsigned char*>(data); |
| 525 | return H::combine_contiguous(std::move(hash_state), bytes, sizeof(T) * size); |
| 526 | } |
| 527 | |
| 528 | // hash_range_or_bytes() |
| 529 | template <typename H, typename T> |
| 530 | typename std::enable_if<!is_uniquely_represented<T>::value, H>::type |
| 531 | hash_range_or_bytes(H hash_state, const T* data, size_t size) { |
| 532 | for (const auto end = data + size; data < end; ++data) { |
| 533 | hash_state = H::combine(std::move(hash_state), *data); |
| 534 | } |
| 535 | return hash_state; |
| 536 | } |
| 537 | |
| 538 | #if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \ |
| 539 | ABSL_META_INTERNAL_STD_HASH_SFINAE_FRIENDLY_ |
| 540 | #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1 |
| 541 | #else |
| 542 | #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 0 |
| 543 | #endif |
| 544 | |
| 545 | // HashSelect |
| 546 | // |
| 547 | // Type trait to select the appropriate hash implementation to use. |
| 548 | // HashSelect::type<T> will give the proper hash implementation, to be invoked |
| 549 | // as: |
| 550 | // HashSelect::type<T>::Invoke(state, value) |
| 551 | // Also, HashSelect::type<T>::value is a boolean equal to `true` if there is a |
| 552 | // valid `Invoke` function. Types that are not hashable will have a ::value of |
| 553 | // `false`. |
| 554 | struct HashSelect { |
| 555 | private: |
| 556 | struct State : HashStateBase<State> { |
| 557 | static State combine_contiguous(State hash_state, const unsigned char*, |
| 558 | size_t); |
| 559 | using State::HashStateBase::combine_contiguous; |
| 560 | }; |
| 561 | |
| 562 | struct UniquelyRepresentedProbe { |
| 563 | template <typename H, typename T> |
| 564 | static auto Invoke(H state, const T& value) |
| 565 | -> absl::enable_if_t<is_uniquely_represented<T>::value, H> { |
| 566 | return hash_internal::hash_bytes(std::move(state), value); |
| 567 | } |
| 568 | }; |
| 569 | |
| 570 | struct HashValueProbe { |
| 571 | template <typename H, typename T> |
| 572 | static auto Invoke(H state, const T& value) -> absl::enable_if_t< |
| 573 | std::is_same<H, |
| 574 | decltype(AbslHashValue(std::move(state), value))>::value, |
| 575 | H> { |
| 576 | return AbslHashValue(std::move(state), value); |
| 577 | } |
| 578 | }; |
| 579 | |
| 580 | struct LegacyHashProbe { |
| 581 | #if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ |
| 582 | template <typename H, typename T> |
| 583 | static auto Invoke(H state, const T& value) -> absl::enable_if_t< |
| 584 | std::is_convertible< |
| 585 | decltype(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>()(value)), |
| 586 | size_t>::value, |
| 587 | H> { |
| 588 | return hash_internal::hash_bytes( |
| 589 | std::move(state), |
| 590 | ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>{}(value)); |
| 591 | } |
| 592 | #endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ |
| 593 | }; |
| 594 | |
| 595 | struct StdHashProbe { |
| 596 | template <typename H, typename T> |
| 597 | static auto Invoke(H state, const T& value) |
| 598 | -> absl::enable_if_t<type_traits_internal::IsHashable<T>::value, H> { |
| 599 | return hash_internal::hash_bytes(std::move(state), std::hash<T>{}(value)); |
| 600 | } |
| 601 | }; |
| 602 | |
| 603 | template <typename Hash, typename T> |
| 604 | struct Probe : Hash { |
| 605 | private: |
| 606 | template <typename H, typename = decltype(H::Invoke( |
| 607 | std::declval<State>(), std::declval<const T&>()))> |
| 608 | static std::true_type Test(int); |
| 609 | template <typename U> |
| 610 | static std::false_type Test(char); |
| 611 | |
| 612 | public: |
| 613 | static constexpr bool value = decltype(Test<Hash>(0))::value; |
| 614 | }; |
| 615 | |
| 616 | public: |
| 617 | // Probe each implementation in order. |
| 618 | // disjunction provides short circuiting wrt instantiation. |
| 619 | template <typename T> |
| 620 | using Apply = absl::disjunction< // |
| 621 | Probe<UniquelyRepresentedProbe, T>, // |
| 622 | Probe<HashValueProbe, T>, // |
| 623 | Probe<LegacyHashProbe, T>, // |
| 624 | Probe<StdHashProbe, T>, // |
| 625 | std::false_type>; |
| 626 | }; |
| 627 | |
| 628 | template <typename T> |
| 629 | struct is_hashable |
| 630 | : std::integral_constant<bool, HashSelect::template Apply<T>::value> {}; |
| 631 | |
| 632 | // CityHashState |
| 633 | class CityHashState : public HashStateBase<CityHashState> { |
| 634 | // absl::uint128 is not an alias or a thin wrapper around the intrinsic. |
| 635 | // We use the intrinsic when available to improve performance. |
| 636 | #ifdef ABSL_HAVE_INTRINSIC_INT128 |
| 637 | using uint128 = __uint128_t; |
| 638 | #else // ABSL_HAVE_INTRINSIC_INT128 |
| 639 | using uint128 = absl::uint128; |
| 640 | #endif // ABSL_HAVE_INTRINSIC_INT128 |
| 641 | |
| 642 | static constexpr uint64_t kMul = |
| 643 | sizeof(size_t) == 4 ? uint64_t{0xcc9e2d51} : uint64_t{0x9ddfea08eb382d69}; |
| 644 | |
| 645 | template <typename T> |
| 646 | using IntegralFastPath = |
| 647 | conjunction<std::is_integral<T>, is_uniquely_represented<T>>; |
| 648 | |
| 649 | public: |
| 650 | // Move only |
| 651 | CityHashState(CityHashState&&) = default; |
| 652 | CityHashState& operator=(CityHashState&&) = default; |
| 653 | |
| 654 | // CityHashState::combine_contiguous() |
| 655 | // |
| 656 | // Fundamental base case for hash recursion: mixes the given range of bytes |
| 657 | // into the hash state. |
| 658 | static CityHashState combine_contiguous(CityHashState hash_state, |
| 659 | const unsigned char* first, |
| 660 | size_t size) { |
| 661 | return CityHashState( |
| 662 | CombineContiguousImpl(hash_state.state_, first, size, |
| 663 | std::integral_constant<int, sizeof(size_t)>{})); |
| 664 | } |
| 665 | using CityHashState::HashStateBase::combine_contiguous; |
| 666 | |
| 667 | // CityHashState::hash() |
| 668 | // |
| 669 | // For performance reasons in non-opt mode, we specialize this for |
| 670 | // integral types. |
| 671 | // Otherwise we would be instantiating and calling dozens of functions for |
| 672 | // something that is just one multiplication and a couple xor's. |
| 673 | // The result should be the same as running the whole algorithm, but faster. |
| 674 | template <typename T, absl::enable_if_t<IntegralFastPath<T>::value, int> = 0> |
| 675 | static size_t hash(T value) { |
| 676 | return static_cast<size_t>(Mix(Seed(), static_cast<uint64_t>(value))); |
| 677 | } |
| 678 | |
| 679 | // Overload of CityHashState::hash() |
| 680 | template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0> |
| 681 | static size_t hash(const T& value) { |
| 682 | return static_cast<size_t>(combine(CityHashState{}, value).state_); |
| 683 | } |
| 684 | |
| 685 | private: |
| 686 | // Invoked only once for a given argument; that plus the fact that this is |
| 687 | // move-only ensures that there is only one non-moved-from object. |
| 688 | CityHashState() : state_(Seed()) {} |
| 689 | |
| 690 | // Workaround for MSVC bug. |
| 691 | // We make the type copyable to fix the calling convention, even though we |
| 692 | // never actually copy it. Keep it private to not affect the public API of the |
| 693 | // type. |
| 694 | CityHashState(const CityHashState&) = default; |
| 695 | |
| 696 | explicit CityHashState(uint64_t state) : state_(state) {} |
| 697 | |
| 698 | // Implementation of the base case for combine_contiguous where we actually |
| 699 | // mix the bytes into the state. |
| 700 | // Dispatch to different implementations of the combine_contiguous depending |
| 701 | // on the value of `sizeof(size_t)`. |
| 702 | static uint64_t CombineContiguousImpl(uint64_t state, |
| 703 | const unsigned char* first, size_t len, |
| 704 | std::integral_constant<int, 4> |
| 705 | /* sizeof_size_t */); |
| 706 | static uint64_t CombineContiguousImpl(uint64_t state, |
| 707 | const unsigned char* first, size_t len, |
| 708 | std::integral_constant<int, 8> |
| 709 | /* sizeof_size_t*/); |
| 710 | |
| 711 | // Reads 9 to 16 bytes from p. |
| 712 | // The first 8 bytes are in .first, the rest (zero padded) bytes are in |
| 713 | // .second. |
| 714 | static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p, |
| 715 | size_t len) { |
| 716 | uint64_t high = little_endian::Load64(p + len - 8); |
| 717 | return {little_endian::Load64(p), high >> (128 - len * 8)}; |
| 718 | } |
| 719 | |
| 720 | // Reads 4 to 8 bytes from p. Zero pads to fill uint64_t. |
| 721 | static uint64_t Read4To8(const unsigned char* p, size_t len) { |
| 722 | return (static_cast<uint64_t>(little_endian::Load32(p + len - 4)) |
| 723 | << (len - 4) * 8) | |
| 724 | little_endian::Load32(p); |
| 725 | } |
| 726 | |
| 727 | // Reads 1 to 3 bytes from p. Zero pads to fill uint32_t. |
| 728 | static uint32_t Read1To3(const unsigned char* p, size_t len) { |
| 729 | return static_cast<uint32_t>((p[0]) | // |
| 730 | (p[len / 2] << (len / 2 * 8)) | // |
| 731 | (p[len - 1] << ((len - 1) * 8))); |
| 732 | } |
| 733 | |
| 734 | ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) { |
| 735 | using MultType = |
| 736 | absl::conditional_t<sizeof(size_t) == 4, uint64_t, uint128>; |
| 737 | // We do the addition in 64-bit space to make sure the 128-bit |
| 738 | // multiplication is fast. If we were to do it as MultType the compiler has |
| 739 | // to assume that the high word is non-zero and needs to perform 2 |
| 740 | // multiplications instead of one. |
| 741 | MultType m = state + v; |
| 742 | m *= kMul; |
| 743 | return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2))); |
| 744 | } |
| 745 | |
| 746 | // Seed() |
| 747 | // |
| 748 | // A non-deterministic seed. |
| 749 | // |
| 750 | // The current purpose of this seed is to generate non-deterministic results |
| 751 | // and prevent having users depend on the particular hash values. |
| 752 | // It is not meant as a security feature right now, but it leaves the door |
| 753 | // open to upgrade it to a true per-process random seed. A true random seed |
| 754 | // costs more and we don't need to pay for that right now. |
| 755 | // |
| 756 | // On platforms with ASLR, we take advantage of it to make a per-process |
| 757 | // random value. |
| 758 | // See https://en.wikipedia.org/wiki/Address_space_layout_randomization |
| 759 | // |
| 760 | // On other platforms this is still going to be non-deterministic but most |
| 761 | // probably per-build and not per-process. |
| 762 | ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() { |
| 763 | return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(kSeed)); |
| 764 | } |
| 765 | static const void* const kSeed; |
| 766 | |
| 767 | uint64_t state_; |
| 768 | }; |
| 769 | |
| 770 | // CityHashState::CombineContiguousImpl() |
| 771 | inline uint64_t CityHashState::CombineContiguousImpl( |
| 772 | uint64_t state, const unsigned char* first, size_t len, |
| 773 | std::integral_constant<int, 4> /* sizeof_size_t */) { |
| 774 | // For large values we use CityHash, for small ones we just use a |
| 775 | // multiplicative hash. |
| 776 | uint64_t v; |
| 777 | if (len > 8) { |
| 778 | v = absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first), len); |
| 779 | } else if (len >= 4) { |
| 780 | v = Read4To8(first, len); |
| 781 | } else if (len > 0) { |
| 782 | v = Read1To3(first, len); |
| 783 | } else { |
| 784 | // Empty ranges have no effect. |
| 785 | return state; |
| 786 | } |
| 787 | return Mix(state, v); |
| 788 | } |
| 789 | |
| 790 | // Overload of CityHashState::CombineContiguousImpl() |
| 791 | inline uint64_t CityHashState::CombineContiguousImpl( |
| 792 | uint64_t state, const unsigned char* first, size_t len, |
| 793 | std::integral_constant<int, 8> /* sizeof_size_t */) { |
| 794 | // For large values we use CityHash, for small ones we just use a |
| 795 | // multiplicative hash. |
| 796 | uint64_t v; |
| 797 | if (len > 16) { |
| 798 | v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len); |
| 799 | } else if (len > 8) { |
| 800 | auto p = Read9To16(first, len); |
| 801 | state = Mix(state, p.first); |
| 802 | v = p.second; |
| 803 | } else if (len >= 4) { |
| 804 | v = Read4To8(first, len); |
| 805 | } else if (len > 0) { |
| 806 | v = Read1To3(first, len); |
| 807 | } else { |
| 808 | // Empty ranges have no effect. |
| 809 | return state; |
| 810 | } |
| 811 | return Mix(state, v); |
| 812 | } |
| 813 | |
| 814 | |
| 815 | struct AggregateBarrier {}; |
| 816 | |
| 817 | // HashImpl |
| 818 | |
| 819 | // Add a private base class to make sure this type is not an aggregate. |
| 820 | // Aggregates can be aggregate initialized even if the default constructor is |
| 821 | // deleted. |
| 822 | struct PoisonedHash : private AggregateBarrier { |
| 823 | PoisonedHash() = delete; |
| 824 | PoisonedHash(const PoisonedHash&) = delete; |
| 825 | PoisonedHash& operator=(const PoisonedHash&) = delete; |
| 826 | }; |
| 827 | |
| 828 | template <typename T> |
| 829 | struct HashImpl { |
| 830 | size_t operator()(const T& value) const { return CityHashState::hash(value); } |
| 831 | }; |
| 832 | |
| 833 | template <typename T> |
| 834 | struct Hash |
| 835 | : absl::conditional_t<is_hashable<T>::value, HashImpl<T>, PoisonedHash> {}; |
| 836 | |
| 837 | template <typename H> |
| 838 | template <typename T, typename... Ts> |
| 839 | H HashStateBase<H>::combine(H state, const T& value, const Ts&... values) { |
| 840 | return H::combine(hash_internal::HashSelect::template Apply<T>::Invoke( |
| 841 | std::move(state), value), |
| 842 | values...); |
| 843 | } |
| 844 | |
| 845 | // HashStateBase::combine_contiguous() |
| 846 | template <typename H> |
| 847 | template <typename T> |
| 848 | H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) { |
| 849 | return hash_internal::hash_range_or_bytes(std::move(state), data, size); |
| 850 | } |
| 851 | } // namespace hash_internal |
| 852 | } // namespace absl |
| 853 | |
| 854 | #endif // ABSL_HASH_INTERNAL_HASH_H_ |
| 855 | |