1 | // Copyright 2018 The Abseil Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | // |
15 | // ----------------------------------------------------------------------------- |
16 | // File: hash.h |
17 | // ----------------------------------------------------------------------------- |
18 | // |
19 | #ifndef ABSL_HASH_INTERNAL_HASH_H_ |
20 | #define ABSL_HASH_INTERNAL_HASH_H_ |
21 | |
22 | #include <algorithm> |
23 | #include <array> |
24 | #include <cmath> |
25 | #include <cstring> |
26 | #include <deque> |
27 | #include <forward_list> |
28 | #include <functional> |
29 | #include <iterator> |
30 | #include <limits> |
31 | #include <list> |
32 | #include <map> |
33 | #include <memory> |
34 | #include <set> |
35 | #include <string> |
36 | #include <tuple> |
37 | #include <type_traits> |
38 | #include <utility> |
39 | #include <vector> |
40 | |
41 | #include "absl/base/internal/endian.h" |
42 | #include "absl/base/port.h" |
43 | #include "absl/container/fixed_array.h" |
44 | #include "absl/meta/type_traits.h" |
45 | #include "absl/numeric/int128.h" |
46 | #include "absl/strings/string_view.h" |
47 | #include "absl/types/optional.h" |
48 | #include "absl/types/variant.h" |
49 | #include "absl/utility/utility.h" |
50 | #include "absl/hash/internal/city.h" |
51 | |
52 | namespace absl { |
53 | namespace hash_internal { |
54 | |
55 | // HashStateBase |
56 | // |
57 | // A hash state object represents an intermediate state in the computation |
58 | // of an unspecified hash algorithm. `HashStateBase` provides a CRTP style |
59 | // base class for hash state implementations. Developers adding type support |
60 | // for `absl::Hash` should not rely on any parts of the state object other than |
61 | // the following member functions: |
62 | // |
63 | // * HashStateBase::combine() |
64 | // * HashStateBase::combine_contiguous() |
65 | // |
66 | // A derived hash state class of type `H` must provide a static member function |
67 | // with a signature similar to the following: |
68 | // |
69 | // `static H combine_contiguous(H state, const unsigned char*, size_t)`. |
70 | // |
71 | // `HashStateBase` will provide a complete implementations for a hash state |
72 | // object in terms of this method. |
73 | // |
74 | // Example: |
75 | // |
76 | // // Use CRTP to define your derived class. |
77 | // struct MyHashState : HashStateBase<MyHashState> { |
78 | // static H combine_contiguous(H state, const unsigned char*, size_t); |
79 | // using MyHashState::HashStateBase::combine; |
80 | // using MyHashState::HashStateBase::combine_contiguous; |
81 | // }; |
82 | template <typename H> |
83 | class HashStateBase { |
84 | public: |
85 | // HashStateBase::combine() |
86 | // |
87 | // Combines an arbitrary number of values into a hash state, returning the |
88 | // updated state. |
89 | // |
90 | // Each of the value types `T` must be separately hashable by the Abseil |
91 | // hashing framework. |
92 | // |
93 | // NOTE: |
94 | // |
95 | // state = H::combine(std::move(state), value1, value2, value3); |
96 | // |
97 | // is guaranteed to produce the same hash expansion as: |
98 | // |
99 | // state = H::combine(std::move(state), value1); |
100 | // state = H::combine(std::move(state), value2); |
101 | // state = H::combine(std::move(state), value3); |
102 | template <typename T, typename... Ts> |
103 | static H combine(H state, const T& value, const Ts&... values); |
104 | static H combine(H state) { return state; } |
105 | |
106 | // HashStateBase::combine_contiguous() |
107 | // |
108 | // Combines a contiguous array of `size` elements into a hash state, returning |
109 | // the updated state. |
110 | // |
111 | // NOTE: |
112 | // |
113 | // state = H::combine_contiguous(std::move(state), data, size); |
114 | // |
115 | // is NOT guaranteed to produce the same hash expansion as a for-loop (it may |
116 | // perform internal optimizations). If you need this guarantee, use the |
117 | // for-loop instead. |
118 | template <typename T> |
119 | static H combine_contiguous(H state, const T* data, size_t size); |
120 | }; |
121 | |
122 | // is_uniquely_represented |
123 | // |
124 | // `is_uniquely_represented<T>` is a trait class that indicates whether `T` |
125 | // is uniquely represented. |
126 | // |
127 | // A type is "uniquely represented" if two equal values of that type are |
128 | // guaranteed to have the same bytes in their underlying storage. In other |
129 | // words, if `a == b`, then `memcmp(&a, &b, sizeof(T))` is guaranteed to be |
130 | // zero. This property cannot be detected automatically, so this trait is false |
131 | // by default, but can be specialized by types that wish to assert that they are |
132 | // uniquely represented. This makes them eligible for certain optimizations. |
133 | // |
134 | // If you have any doubt whatsoever, do not specialize this template. |
135 | // The default is completely safe, and merely disables some optimizations |
136 | // that will not matter for most types. Specializing this template, |
137 | // on the other hand, can be very hazardous. |
138 | // |
139 | // To be uniquely represented, a type must not have multiple ways of |
140 | // representing the same value; for example, float and double are not |
141 | // uniquely represented, because they have distinct representations for |
142 | // +0 and -0. Furthermore, the type's byte representation must consist |
143 | // solely of user-controlled data, with no padding bits and no compiler- |
144 | // controlled data such as vptrs or sanitizer metadata. This is usually |
145 | // very difficult to guarantee, because in most cases the compiler can |
146 | // insert data and padding bits at its own discretion. |
147 | // |
148 | // If you specialize this template for a type `T`, you must do so in the file |
149 | // that defines that type (or in this file). If you define that specialization |
150 | // anywhere else, `is_uniquely_represented<T>` could have different meanings |
151 | // in different places. |
152 | // |
153 | // The Enable parameter is meaningless; it is provided as a convenience, |
154 | // to support certain SFINAE techniques when defining specializations. |
155 | template <typename T, typename Enable = void> |
156 | struct is_uniquely_represented : std::false_type {}; |
157 | |
158 | // is_uniquely_represented<unsigned char> |
159 | // |
160 | // unsigned char is a synonym for "byte", so it is guaranteed to be |
161 | // uniquely represented. |
162 | template <> |
163 | struct is_uniquely_represented<unsigned char> : std::true_type {}; |
164 | |
165 | // is_uniquely_represented for non-standard integral types |
166 | // |
167 | // Integral types other than bool should be uniquely represented on any |
168 | // platform that this will plausibly be ported to. |
169 | template <typename Integral> |
170 | struct is_uniquely_represented< |
171 | Integral, typename std::enable_if<std::is_integral<Integral>::value>::type> |
172 | : std::true_type {}; |
173 | |
174 | // is_uniquely_represented<bool> |
175 | // |
176 | // |
177 | template <> |
178 | struct is_uniquely_represented<bool> : std::false_type {}; |
179 | |
180 | // hash_bytes() |
181 | // |
182 | // Convenience function that combines `hash_state` with the byte representation |
183 | // of `value`. |
184 | template <typename H, typename T> |
185 | H hash_bytes(H hash_state, const T& value) { |
186 | const unsigned char* start = reinterpret_cast<const unsigned char*>(&value); |
187 | return H::combine_contiguous(std::move(hash_state), start, sizeof(value)); |
188 | } |
189 | |
190 | // ----------------------------------------------------------------------------- |
191 | // AbslHashValue for Basic Types |
192 | // ----------------------------------------------------------------------------- |
193 | |
194 | // Note: Default `AbslHashValue` implementations live in `hash_internal`. This |
195 | // allows us to block lexical scope lookup when doing an unqualified call to |
196 | // `AbslHashValue` below. User-defined implementations of `AbslHashValue` can |
197 | // only be found via ADL. |
198 | |
199 | // AbslHashValue() for hashing bool values |
200 | // |
201 | // We use SFINAE to ensure that this overload only accepts bool, not types that |
202 | // are convertible to bool. |
203 | template <typename H, typename B> |
204 | typename std::enable_if<std::is_same<B, bool>::value, H>::type AbslHashValue( |
205 | H hash_state, B value) { |
206 | return H::combine(std::move(hash_state), |
207 | static_cast<unsigned char>(value ? 1 : 0)); |
208 | } |
209 | |
210 | // AbslHashValue() for hashing enum values |
211 | template <typename H, typename Enum> |
212 | typename std::enable_if<std::is_enum<Enum>::value, H>::type AbslHashValue( |
213 | H hash_state, Enum e) { |
214 | // In practice, we could almost certainly just invoke hash_bytes directly, |
215 | // but it's possible that a sanitizer might one day want to |
216 | // store data in the unused bits of an enum. To avoid that risk, we |
217 | // convert to the underlying type before hashing. Hopefully this will get |
218 | // optimized away; if not, we can reopen discussion with c-toolchain-team. |
219 | return H::combine(std::move(hash_state), |
220 | static_cast<typename std::underlying_type<Enum>::type>(e)); |
221 | } |
222 | // AbslHashValue() for hashing floating-point values |
223 | template <typename H, typename Float> |
224 | typename std::enable_if<std::is_same<Float, float>::value || |
225 | std::is_same<Float, double>::value, |
226 | H>::type |
227 | AbslHashValue(H hash_state, Float value) { |
228 | return hash_internal::hash_bytes(std::move(hash_state), |
229 | value == 0 ? 0 : value); |
230 | } |
231 | |
232 | // Long double has the property that it might have extra unused bytes in it. |
233 | // For example, in x86 sizeof(long double)==16 but it only really uses 80-bits |
234 | // of it. This means we can't use hash_bytes on a long double and have to |
235 | // convert it to something else first. |
236 | template <typename H, typename LongDouble> |
237 | typename std::enable_if<std::is_same<LongDouble, long double>::value, H>::type |
238 | AbslHashValue(H hash_state, LongDouble value) { |
239 | const int category = std::fpclassify(value); |
240 | switch (category) { |
241 | case FP_INFINITE: |
242 | // Add the sign bit to differentiate between +Inf and -Inf |
243 | hash_state = H::combine(std::move(hash_state), std::signbit(value)); |
244 | break; |
245 | |
246 | case FP_NAN: |
247 | case FP_ZERO: |
248 | default: |
249 | // Category is enough for these. |
250 | break; |
251 | |
252 | case FP_NORMAL: |
253 | case FP_SUBNORMAL: |
254 | // We can't convert `value` directly to double because this would have |
255 | // undefined behavior if the value is out of range. |
256 | // std::frexp gives us a value in the range (-1, -.5] or [.5, 1) that is |
257 | // guaranteed to be in range for `double`. The truncation is |
258 | // implementation defined, but that works as long as it is deterministic. |
259 | int exp; |
260 | auto mantissa = static_cast<double>(std::frexp(value, &exp)); |
261 | hash_state = H::combine(std::move(hash_state), mantissa, exp); |
262 | } |
263 | |
264 | return H::combine(std::move(hash_state), category); |
265 | } |
266 | |
267 | // AbslHashValue() for hashing pointers |
268 | template <typename H, typename T> |
269 | H AbslHashValue(H hash_state, T* ptr) { |
270 | auto v = reinterpret_cast<uintptr_t>(ptr); |
271 | // Due to alignment, pointers tend to have low bits as zero, and the next few |
272 | // bits follow a pattern since they are also multiples of some base value. |
273 | // Mixing the pointer twice helps prevent stuck low bits for certain alignment |
274 | // values. |
275 | return H::combine(std::move(hash_state), v, v); |
276 | } |
277 | |
278 | // AbslHashValue() for hashing nullptr_t |
279 | template <typename H> |
280 | H AbslHashValue(H hash_state, std::nullptr_t) { |
281 | return H::combine(std::move(hash_state), static_cast<void*>(nullptr)); |
282 | } |
283 | |
284 | // ----------------------------------------------------------------------------- |
285 | // AbslHashValue for Composite Types |
286 | // ----------------------------------------------------------------------------- |
287 | |
288 | // is_hashable() |
289 | // |
290 | // Trait class which returns true if T is hashable by the absl::Hash framework. |
291 | // Used for the AbslHashValue implementations for composite types below. |
292 | template <typename T> |
293 | struct is_hashable; |
294 | |
295 | // AbslHashValue() for hashing pairs |
296 | template <typename H, typename T1, typename T2> |
297 | typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value, |
298 | H>::type |
299 | AbslHashValue(H hash_state, const std::pair<T1, T2>& p) { |
300 | return H::combine(std::move(hash_state), p.first, p.second); |
301 | } |
302 | |
303 | // hash_tuple() |
304 | // |
305 | // Helper function for hashing a tuple. The third argument should |
306 | // be an index_sequence running from 0 to tuple_size<Tuple> - 1. |
307 | template <typename H, typename Tuple, size_t... Is> |
308 | H hash_tuple(H hash_state, const Tuple& t, absl::index_sequence<Is...>) { |
309 | return H::combine(std::move(hash_state), std::get<Is>(t)...); |
310 | } |
311 | |
312 | // AbslHashValue for hashing tuples |
313 | template <typename H, typename... Ts> |
314 | #if defined(_MSC_VER) |
315 | // This SFINAE gets MSVC confused under some conditions. Let's just disable it |
316 | // for now. |
317 | H |
318 | #else // _MSC_VER |
319 | typename std::enable_if<absl::conjunction<is_hashable<Ts>...>::value, H>::type |
320 | #endif // _MSC_VER |
321 | AbslHashValue(H hash_state, const std::tuple<Ts...>& t) { |
322 | return hash_internal::hash_tuple(std::move(hash_state), t, |
323 | absl::make_index_sequence<sizeof...(Ts)>()); |
324 | } |
325 | |
326 | // ----------------------------------------------------------------------------- |
327 | // AbslHashValue for Pointers |
328 | // ----------------------------------------------------------------------------- |
329 | |
330 | // AbslHashValue for hashing unique_ptr |
331 | template <typename H, typename T, typename D> |
332 | H AbslHashValue(H hash_state, const std::unique_ptr<T, D>& ptr) { |
333 | return H::combine(std::move(hash_state), ptr.get()); |
334 | } |
335 | |
336 | // AbslHashValue for hashing shared_ptr |
337 | template <typename H, typename T> |
338 | H AbslHashValue(H hash_state, const std::shared_ptr<T>& ptr) { |
339 | return H::combine(std::move(hash_state), ptr.get()); |
340 | } |
341 | |
342 | // ----------------------------------------------------------------------------- |
343 | // AbslHashValue for String-Like Types |
344 | // ----------------------------------------------------------------------------- |
345 | |
346 | // AbslHashValue for hashing strings |
347 | // |
348 | // All the string-like types supported here provide the same hash expansion for |
349 | // the same character sequence. These types are: |
350 | // |
351 | // - `std::string` (and std::basic_string<char, std::char_traits<char>, A> for |
352 | // any allocator A) |
353 | // - `absl::string_view` and `std::string_view` |
354 | // |
355 | // For simplicity, we currently support only `char` strings. This support may |
356 | // be broadened, if necessary, but with some caution - this overload would |
357 | // misbehave in cases where the traits' `eq()` member isn't equivalent to `==` |
358 | // on the underlying character type. |
359 | template <typename H> |
360 | H AbslHashValue(H hash_state, absl::string_view str) { |
361 | return H::combine( |
362 | H::combine_contiguous(std::move(hash_state), str.data(), str.size()), |
363 | str.size()); |
364 | } |
365 | |
366 | // ----------------------------------------------------------------------------- |
367 | // AbslHashValue for Sequence Containers |
368 | // ----------------------------------------------------------------------------- |
369 | |
370 | // AbslHashValue for hashing std::array |
371 | template <typename H, typename T, size_t N> |
372 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
373 | H hash_state, const std::array<T, N>& array) { |
374 | return H::combine_contiguous(std::move(hash_state), array.data(), |
375 | array.size()); |
376 | } |
377 | |
378 | // AbslHashValue for hashing std::deque |
379 | template <typename H, typename T, typename Allocator> |
380 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
381 | H hash_state, const std::deque<T, Allocator>& deque) { |
382 | // TODO(gromer): investigate a more efficient implementation taking |
383 | // advantage of the chunk structure. |
384 | for (const auto& t : deque) { |
385 | hash_state = H::combine(std::move(hash_state), t); |
386 | } |
387 | return H::combine(std::move(hash_state), deque.size()); |
388 | } |
389 | |
390 | // AbslHashValue for hashing std::forward_list |
391 | template <typename H, typename T, typename Allocator> |
392 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
393 | H hash_state, const std::forward_list<T, Allocator>& list) { |
394 | size_t size = 0; |
395 | for (const T& t : list) { |
396 | hash_state = H::combine(std::move(hash_state), t); |
397 | ++size; |
398 | } |
399 | return H::combine(std::move(hash_state), size); |
400 | } |
401 | |
402 | // AbslHashValue for hashing std::list |
403 | template <typename H, typename T, typename Allocator> |
404 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
405 | H hash_state, const std::list<T, Allocator>& list) { |
406 | for (const auto& t : list) { |
407 | hash_state = H::combine(std::move(hash_state), t); |
408 | } |
409 | return H::combine(std::move(hash_state), list.size()); |
410 | } |
411 | |
412 | // AbslHashValue for hashing std::vector |
413 | // |
414 | // Do not use this for vector<bool>. It does not have a .data(), and a fallback |
415 | // for std::hash<> is most likely faster. |
416 | template <typename H, typename T, typename Allocator> |
417 | typename std::enable_if<is_hashable<T>::value && !std::is_same<T, bool>::value, |
418 | H>::type |
419 | AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) { |
420 | return H::combine(H::combine_contiguous(std::move(hash_state), vector.data(), |
421 | vector.size()), |
422 | vector.size()); |
423 | } |
424 | |
425 | // ----------------------------------------------------------------------------- |
426 | // AbslHashValue for Ordered Associative Containers |
427 | // ----------------------------------------------------------------------------- |
428 | |
429 | // AbslHashValue for hashing std::map |
430 | template <typename H, typename Key, typename T, typename Compare, |
431 | typename Allocator> |
432 | typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, |
433 | H>::type |
434 | AbslHashValue(H hash_state, const std::map<Key, T, Compare, Allocator>& map) { |
435 | for (const auto& t : map) { |
436 | hash_state = H::combine(std::move(hash_state), t); |
437 | } |
438 | return H::combine(std::move(hash_state), map.size()); |
439 | } |
440 | |
441 | // AbslHashValue for hashing std::multimap |
442 | template <typename H, typename Key, typename T, typename Compare, |
443 | typename Allocator> |
444 | typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, |
445 | H>::type |
446 | AbslHashValue(H hash_state, |
447 | const std::multimap<Key, T, Compare, Allocator>& map) { |
448 | for (const auto& t : map) { |
449 | hash_state = H::combine(std::move(hash_state), t); |
450 | } |
451 | return H::combine(std::move(hash_state), map.size()); |
452 | } |
453 | |
454 | // AbslHashValue for hashing std::set |
455 | template <typename H, typename Key, typename Compare, typename Allocator> |
456 | typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( |
457 | H hash_state, const std::set<Key, Compare, Allocator>& set) { |
458 | for (const auto& t : set) { |
459 | hash_state = H::combine(std::move(hash_state), t); |
460 | } |
461 | return H::combine(std::move(hash_state), set.size()); |
462 | } |
463 | |
464 | // AbslHashValue for hashing std::multiset |
465 | template <typename H, typename Key, typename Compare, typename Allocator> |
466 | typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( |
467 | H hash_state, const std::multiset<Key, Compare, Allocator>& set) { |
468 | for (const auto& t : set) { |
469 | hash_state = H::combine(std::move(hash_state), t); |
470 | } |
471 | return H::combine(std::move(hash_state), set.size()); |
472 | } |
473 | |
474 | // ----------------------------------------------------------------------------- |
475 | // AbslHashValue for Wrapper Types |
476 | // ----------------------------------------------------------------------------- |
477 | |
478 | // AbslHashValue for hashing absl::optional |
479 | template <typename H, typename T> |
480 | typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( |
481 | H hash_state, const absl::optional<T>& opt) { |
482 | if (opt) hash_state = H::combine(std::move(hash_state), *opt); |
483 | return H::combine(std::move(hash_state), opt.has_value()); |
484 | } |
485 | |
486 | // VariantVisitor |
487 | template <typename H> |
488 | struct VariantVisitor { |
489 | H&& hash_state; |
490 | template <typename T> |
491 | H operator()(const T& t) const { |
492 | return H::combine(std::move(hash_state), t); |
493 | } |
494 | }; |
495 | |
496 | // AbslHashValue for hashing absl::variant |
497 | template <typename H, typename... T> |
498 | typename std::enable_if<conjunction<is_hashable<T>...>::value, H>::type |
499 | AbslHashValue(H hash_state, const absl::variant<T...>& v) { |
500 | if (!v.valueless_by_exception()) { |
501 | hash_state = absl::visit(VariantVisitor<H>{std::move(hash_state)}, v); |
502 | } |
503 | return H::combine(std::move(hash_state), v.index()); |
504 | } |
505 | |
506 | // ----------------------------------------------------------------------------- |
507 | // AbslHashValue for Other Types |
508 | // ----------------------------------------------------------------------------- |
509 | |
510 | // AbslHashValue for hashing std::bitset is not defined, for the same reason as |
511 | // for vector<bool> (see std::vector above): It does not expose the raw bytes, |
512 | // and a fallback to std::hash<> is most likely faster. |
513 | |
514 | // ----------------------------------------------------------------------------- |
515 | |
516 | // hash_range_or_bytes() |
517 | // |
518 | // Mixes all values in the range [data, data+size) into the hash state. |
519 | // This overload accepts only uniquely-represented types, and hashes them by |
520 | // hashing the entire range of bytes. |
521 | template <typename H, typename T> |
522 | typename std::enable_if<is_uniquely_represented<T>::value, H>::type |
523 | hash_range_or_bytes(H hash_state, const T* data, size_t size) { |
524 | const auto* bytes = reinterpret_cast<const unsigned char*>(data); |
525 | return H::combine_contiguous(std::move(hash_state), bytes, sizeof(T) * size); |
526 | } |
527 | |
528 | // hash_range_or_bytes() |
529 | template <typename H, typename T> |
530 | typename std::enable_if<!is_uniquely_represented<T>::value, H>::type |
531 | hash_range_or_bytes(H hash_state, const T* data, size_t size) { |
532 | for (const auto end = data + size; data < end; ++data) { |
533 | hash_state = H::combine(std::move(hash_state), *data); |
534 | } |
535 | return hash_state; |
536 | } |
537 | |
538 | #if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \ |
539 | ABSL_META_INTERNAL_STD_HASH_SFINAE_FRIENDLY_ |
540 | #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1 |
541 | #else |
542 | #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 0 |
543 | #endif |
544 | |
545 | // HashSelect |
546 | // |
547 | // Type trait to select the appropriate hash implementation to use. |
548 | // HashSelect::type<T> will give the proper hash implementation, to be invoked |
549 | // as: |
550 | // HashSelect::type<T>::Invoke(state, value) |
551 | // Also, HashSelect::type<T>::value is a boolean equal to `true` if there is a |
552 | // valid `Invoke` function. Types that are not hashable will have a ::value of |
553 | // `false`. |
554 | struct HashSelect { |
555 | private: |
556 | struct State : HashStateBase<State> { |
557 | static State combine_contiguous(State hash_state, const unsigned char*, |
558 | size_t); |
559 | using State::HashStateBase::combine_contiguous; |
560 | }; |
561 | |
562 | struct UniquelyRepresentedProbe { |
563 | template <typename H, typename T> |
564 | static auto Invoke(H state, const T& value) |
565 | -> absl::enable_if_t<is_uniquely_represented<T>::value, H> { |
566 | return hash_internal::hash_bytes(std::move(state), value); |
567 | } |
568 | }; |
569 | |
570 | struct HashValueProbe { |
571 | template <typename H, typename T> |
572 | static auto Invoke(H state, const T& value) -> absl::enable_if_t< |
573 | std::is_same<H, |
574 | decltype(AbslHashValue(std::move(state), value))>::value, |
575 | H> { |
576 | return AbslHashValue(std::move(state), value); |
577 | } |
578 | }; |
579 | |
580 | struct LegacyHashProbe { |
581 | #if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ |
582 | template <typename H, typename T> |
583 | static auto Invoke(H state, const T& value) -> absl::enable_if_t< |
584 | std::is_convertible< |
585 | decltype(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>()(value)), |
586 | size_t>::value, |
587 | H> { |
588 | return hash_internal::hash_bytes( |
589 | std::move(state), |
590 | ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>{}(value)); |
591 | } |
592 | #endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ |
593 | }; |
594 | |
595 | struct StdHashProbe { |
596 | template <typename H, typename T> |
597 | static auto Invoke(H state, const T& value) |
598 | -> absl::enable_if_t<type_traits_internal::IsHashable<T>::value, H> { |
599 | return hash_internal::hash_bytes(std::move(state), std::hash<T>{}(value)); |
600 | } |
601 | }; |
602 | |
603 | template <typename Hash, typename T> |
604 | struct Probe : Hash { |
605 | private: |
606 | template <typename H, typename = decltype(H::Invoke( |
607 | std::declval<State>(), std::declval<const T&>()))> |
608 | static std::true_type Test(int); |
609 | template <typename U> |
610 | static std::false_type Test(char); |
611 | |
612 | public: |
613 | static constexpr bool value = decltype(Test<Hash>(0))::value; |
614 | }; |
615 | |
616 | public: |
617 | // Probe each implementation in order. |
618 | // disjunction provides short circuiting wrt instantiation. |
619 | template <typename T> |
620 | using Apply = absl::disjunction< // |
621 | Probe<UniquelyRepresentedProbe, T>, // |
622 | Probe<HashValueProbe, T>, // |
623 | Probe<LegacyHashProbe, T>, // |
624 | Probe<StdHashProbe, T>, // |
625 | std::false_type>; |
626 | }; |
627 | |
628 | template <typename T> |
629 | struct is_hashable |
630 | : std::integral_constant<bool, HashSelect::template Apply<T>::value> {}; |
631 | |
632 | // CityHashState |
633 | class CityHashState : public HashStateBase<CityHashState> { |
634 | // absl::uint128 is not an alias or a thin wrapper around the intrinsic. |
635 | // We use the intrinsic when available to improve performance. |
636 | #ifdef ABSL_HAVE_INTRINSIC_INT128 |
637 | using uint128 = __uint128_t; |
638 | #else // ABSL_HAVE_INTRINSIC_INT128 |
639 | using uint128 = absl::uint128; |
640 | #endif // ABSL_HAVE_INTRINSIC_INT128 |
641 | |
642 | static constexpr uint64_t kMul = |
643 | sizeof(size_t) == 4 ? uint64_t{0xcc9e2d51} : uint64_t{0x9ddfea08eb382d69}; |
644 | |
645 | template <typename T> |
646 | using IntegralFastPath = |
647 | conjunction<std::is_integral<T>, is_uniquely_represented<T>>; |
648 | |
649 | public: |
650 | // Move only |
651 | CityHashState(CityHashState&&) = default; |
652 | CityHashState& operator=(CityHashState&&) = default; |
653 | |
654 | // CityHashState::combine_contiguous() |
655 | // |
656 | // Fundamental base case for hash recursion: mixes the given range of bytes |
657 | // into the hash state. |
658 | static CityHashState combine_contiguous(CityHashState hash_state, |
659 | const unsigned char* first, |
660 | size_t size) { |
661 | return CityHashState( |
662 | CombineContiguousImpl(hash_state.state_, first, size, |
663 | std::integral_constant<int, sizeof(size_t)>{})); |
664 | } |
665 | using CityHashState::HashStateBase::combine_contiguous; |
666 | |
667 | // CityHashState::hash() |
668 | // |
669 | // For performance reasons in non-opt mode, we specialize this for |
670 | // integral types. |
671 | // Otherwise we would be instantiating and calling dozens of functions for |
672 | // something that is just one multiplication and a couple xor's. |
673 | // The result should be the same as running the whole algorithm, but faster. |
674 | template <typename T, absl::enable_if_t<IntegralFastPath<T>::value, int> = 0> |
675 | static size_t hash(T value) { |
676 | return static_cast<size_t>(Mix(Seed(), static_cast<uint64_t>(value))); |
677 | } |
678 | |
679 | // Overload of CityHashState::hash() |
680 | template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0> |
681 | static size_t hash(const T& value) { |
682 | return static_cast<size_t>(combine(CityHashState{}, value).state_); |
683 | } |
684 | |
685 | private: |
686 | // Invoked only once for a given argument; that plus the fact that this is |
687 | // move-only ensures that there is only one non-moved-from object. |
688 | CityHashState() : state_(Seed()) {} |
689 | |
690 | // Workaround for MSVC bug. |
691 | // We make the type copyable to fix the calling convention, even though we |
692 | // never actually copy it. Keep it private to not affect the public API of the |
693 | // type. |
694 | CityHashState(const CityHashState&) = default; |
695 | |
696 | explicit CityHashState(uint64_t state) : state_(state) {} |
697 | |
698 | // Implementation of the base case for combine_contiguous where we actually |
699 | // mix the bytes into the state. |
700 | // Dispatch to different implementations of the combine_contiguous depending |
701 | // on the value of `sizeof(size_t)`. |
702 | static uint64_t CombineContiguousImpl(uint64_t state, |
703 | const unsigned char* first, size_t len, |
704 | std::integral_constant<int, 4> |
705 | /* sizeof_size_t */); |
706 | static uint64_t CombineContiguousImpl(uint64_t state, |
707 | const unsigned char* first, size_t len, |
708 | std::integral_constant<int, 8> |
709 | /* sizeof_size_t*/); |
710 | |
711 | // Reads 9 to 16 bytes from p. |
712 | // The first 8 bytes are in .first, the rest (zero padded) bytes are in |
713 | // .second. |
714 | static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p, |
715 | size_t len) { |
716 | uint64_t high = little_endian::Load64(p + len - 8); |
717 | return {little_endian::Load64(p), high >> (128 - len * 8)}; |
718 | } |
719 | |
720 | // Reads 4 to 8 bytes from p. Zero pads to fill uint64_t. |
721 | static uint64_t Read4To8(const unsigned char* p, size_t len) { |
722 | return (static_cast<uint64_t>(little_endian::Load32(p + len - 4)) |
723 | << (len - 4) * 8) | |
724 | little_endian::Load32(p); |
725 | } |
726 | |
727 | // Reads 1 to 3 bytes from p. Zero pads to fill uint32_t. |
728 | static uint32_t Read1To3(const unsigned char* p, size_t len) { |
729 | return static_cast<uint32_t>((p[0]) | // |
730 | (p[len / 2] << (len / 2 * 8)) | // |
731 | (p[len - 1] << ((len - 1) * 8))); |
732 | } |
733 | |
734 | ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) { |
735 | using MultType = |
736 | absl::conditional_t<sizeof(size_t) == 4, uint64_t, uint128>; |
737 | // We do the addition in 64-bit space to make sure the 128-bit |
738 | // multiplication is fast. If we were to do it as MultType the compiler has |
739 | // to assume that the high word is non-zero and needs to perform 2 |
740 | // multiplications instead of one. |
741 | MultType m = state + v; |
742 | m *= kMul; |
743 | return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2))); |
744 | } |
745 | |
746 | // Seed() |
747 | // |
748 | // A non-deterministic seed. |
749 | // |
750 | // The current purpose of this seed is to generate non-deterministic results |
751 | // and prevent having users depend on the particular hash values. |
752 | // It is not meant as a security feature right now, but it leaves the door |
753 | // open to upgrade it to a true per-process random seed. A true random seed |
754 | // costs more and we don't need to pay for that right now. |
755 | // |
756 | // On platforms with ASLR, we take advantage of it to make a per-process |
757 | // random value. |
758 | // See https://en.wikipedia.org/wiki/Address_space_layout_randomization |
759 | // |
760 | // On other platforms this is still going to be non-deterministic but most |
761 | // probably per-build and not per-process. |
762 | ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() { |
763 | return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(kSeed)); |
764 | } |
765 | static const void* const kSeed; |
766 | |
767 | uint64_t state_; |
768 | }; |
769 | |
770 | // CityHashState::CombineContiguousImpl() |
771 | inline uint64_t CityHashState::CombineContiguousImpl( |
772 | uint64_t state, const unsigned char* first, size_t len, |
773 | std::integral_constant<int, 4> /* sizeof_size_t */) { |
774 | // For large values we use CityHash, for small ones we just use a |
775 | // multiplicative hash. |
776 | uint64_t v; |
777 | if (len > 8) { |
778 | v = absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first), len); |
779 | } else if (len >= 4) { |
780 | v = Read4To8(first, len); |
781 | } else if (len > 0) { |
782 | v = Read1To3(first, len); |
783 | } else { |
784 | // Empty ranges have no effect. |
785 | return state; |
786 | } |
787 | return Mix(state, v); |
788 | } |
789 | |
790 | // Overload of CityHashState::CombineContiguousImpl() |
791 | inline uint64_t CityHashState::CombineContiguousImpl( |
792 | uint64_t state, const unsigned char* first, size_t len, |
793 | std::integral_constant<int, 8> /* sizeof_size_t */) { |
794 | // For large values we use CityHash, for small ones we just use a |
795 | // multiplicative hash. |
796 | uint64_t v; |
797 | if (len > 16) { |
798 | v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len); |
799 | } else if (len > 8) { |
800 | auto p = Read9To16(first, len); |
801 | state = Mix(state, p.first); |
802 | v = p.second; |
803 | } else if (len >= 4) { |
804 | v = Read4To8(first, len); |
805 | } else if (len > 0) { |
806 | v = Read1To3(first, len); |
807 | } else { |
808 | // Empty ranges have no effect. |
809 | return state; |
810 | } |
811 | return Mix(state, v); |
812 | } |
813 | |
814 | |
815 | struct AggregateBarrier {}; |
816 | |
817 | // HashImpl |
818 | |
819 | // Add a private base class to make sure this type is not an aggregate. |
820 | // Aggregates can be aggregate initialized even if the default constructor is |
821 | // deleted. |
822 | struct PoisonedHash : private AggregateBarrier { |
823 | PoisonedHash() = delete; |
824 | PoisonedHash(const PoisonedHash&) = delete; |
825 | PoisonedHash& operator=(const PoisonedHash&) = delete; |
826 | }; |
827 | |
828 | template <typename T> |
829 | struct HashImpl { |
830 | size_t operator()(const T& value) const { return CityHashState::hash(value); } |
831 | }; |
832 | |
833 | template <typename T> |
834 | struct Hash |
835 | : absl::conditional_t<is_hashable<T>::value, HashImpl<T>, PoisonedHash> {}; |
836 | |
837 | template <typename H> |
838 | template <typename T, typename... Ts> |
839 | H HashStateBase<H>::combine(H state, const T& value, const Ts&... values) { |
840 | return H::combine(hash_internal::HashSelect::template Apply<T>::Invoke( |
841 | std::move(state), value), |
842 | values...); |
843 | } |
844 | |
845 | // HashStateBase::combine_contiguous() |
846 | template <typename H> |
847 | template <typename T> |
848 | H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) { |
849 | return hash_internal::hash_range_or_bytes(std::move(state), data, size); |
850 | } |
851 | } // namespace hash_internal |
852 | } // namespace absl |
853 | |
854 | #endif // ABSL_HASH_INTERNAL_HASH_H_ |
855 | |