1 | // Copyright 2018 The Abseil Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | // |
15 | // An open-addressing |
16 | // hashtable with quadratic probing. |
17 | // |
18 | // This is a low level hashtable on top of which different interfaces can be |
19 | // implemented, like flat_hash_set, node_hash_set, string_hash_set, etc. |
20 | // |
21 | // The table interface is similar to that of std::unordered_set. Notable |
22 | // differences are that most member functions support heterogeneous keys when |
23 | // BOTH the hash and eq functions are marked as transparent. They do so by |
24 | // providing a typedef called `is_transparent`. |
25 | // |
26 | // When heterogeneous lookup is enabled, functions that take key_type act as if |
27 | // they have an overload set like: |
28 | // |
29 | // iterator find(const key_type& key); |
30 | // template <class K> |
31 | // iterator find(const K& key); |
32 | // |
33 | // size_type erase(const key_type& key); |
34 | // template <class K> |
35 | // size_type erase(const K& key); |
36 | // |
37 | // std::pair<iterator, iterator> equal_range(const key_type& key); |
38 | // template <class K> |
39 | // std::pair<iterator, iterator> equal_range(const K& key); |
40 | // |
41 | // When heterogeneous lookup is disabled, only the explicit `key_type` overloads |
42 | // exist. |
43 | // |
44 | // find() also supports passing the hash explicitly: |
45 | // |
46 | // iterator find(const key_type& key, size_t hash); |
47 | // template <class U> |
48 | // iterator find(const U& key, size_t hash); |
49 | // |
50 | // In addition the pointer to element and iterator stability guarantees are |
51 | // weaker: all iterators and pointers are invalidated after a new element is |
52 | // inserted. |
53 | // |
54 | // IMPLEMENTATION DETAILS |
55 | // |
56 | // The table stores elements inline in a slot array. In addition to the slot |
57 | // array the table maintains some control state per slot. The extra state is one |
58 | // byte per slot and stores empty or deleted marks, or alternatively 7 bits from |
59 | // the hash of an occupied slot. The table is split into logical groups of |
60 | // slots, like so: |
61 | // |
62 | // Group 1 Group 2 Group 3 |
63 | // +---------------+---------------+---------------+ |
64 | // | | | | | | | | | | | | | | | | | | | | | | | | | |
65 | // +---------------+---------------+---------------+ |
66 | // |
67 | // On lookup the hash is split into two parts: |
68 | // - H2: 7 bits (those stored in the control bytes) |
69 | // - H1: the rest of the bits |
70 | // The groups are probed using H1. For each group the slots are matched to H2 in |
71 | // parallel. Because H2 is 7 bits (128 states) and the number of slots per group |
72 | // is low (8 or 16) in almost all cases a match in H2 is also a lookup hit. |
73 | // |
74 | // On insert, once the right group is found (as in lookup), its slots are |
75 | // filled in order. |
76 | // |
77 | // On erase a slot is cleared. In case the group did not have any empty slots |
78 | // before the erase, the erased slot is marked as deleted. |
79 | // |
80 | // Groups without empty slots (but maybe with deleted slots) extend the probe |
81 | // sequence. The probing algorithm is quadratic. Given N the number of groups, |
82 | // the probing function for the i'th probe is: |
83 | // |
84 | // P(0) = H1 % N |
85 | // |
86 | // P(i) = (P(i - 1) + i) % N |
87 | // |
88 | // This probing function guarantees that after N probes, all the groups of the |
89 | // table will be probed exactly once. |
90 | |
91 | #ifndef ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ |
92 | #define ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ |
93 | |
94 | #include <algorithm> |
95 | #include <cmath> |
96 | #include <cstdint> |
97 | #include <cstring> |
98 | #include <iterator> |
99 | #include <limits> |
100 | #include <memory> |
101 | #include <tuple> |
102 | #include <type_traits> |
103 | #include <utility> |
104 | |
105 | #include "absl/base/internal/bits.h" |
106 | #include "absl/base/internal/endian.h" |
107 | #include "absl/base/port.h" |
108 | #include "absl/container/internal/common.h" |
109 | #include "absl/container/internal/compressed_tuple.h" |
110 | #include "absl/container/internal/container_memory.h" |
111 | #include "absl/container/internal/hash_policy_traits.h" |
112 | #include "absl/container/internal/hashtable_debug_hooks.h" |
113 | #include "absl/container/internal/hashtablez_sampler.h" |
114 | #include "absl/container/internal/have_sse.h" |
115 | #include "absl/container/internal/layout.h" |
116 | #include "absl/memory/memory.h" |
117 | #include "absl/meta/type_traits.h" |
118 | #include "absl/utility/utility.h" |
119 | |
120 | namespace absl { |
121 | namespace container_internal { |
122 | |
123 | template <size_t Width> |
124 | class probe_seq { |
125 | public: |
126 | probe_seq(size_t hash, size_t mask) { |
127 | assert(((mask + 1) & mask) == 0 && "not a mask" ); |
128 | mask_ = mask; |
129 | offset_ = hash & mask_; |
130 | } |
131 | size_t offset() const { return offset_; } |
132 | size_t offset(size_t i) const { return (offset_ + i) & mask_; } |
133 | |
134 | void next() { |
135 | index_ += Width; |
136 | offset_ += index_; |
137 | offset_ &= mask_; |
138 | } |
139 | // 0-based probe index. The i-th probe in the probe sequence. |
140 | size_t index() const { return index_; } |
141 | |
142 | private: |
143 | size_t mask_; |
144 | size_t offset_; |
145 | size_t index_ = 0; |
146 | }; |
147 | |
148 | template <class ContainerKey, class Hash, class Eq> |
149 | struct RequireUsableKey { |
150 | template <class PassedKey, class... Args> |
151 | std::pair< |
152 | decltype(std::declval<const Hash&>()(std::declval<const PassedKey&>())), |
153 | decltype(std::declval<const Eq&>()(std::declval<const ContainerKey&>(), |
154 | std::declval<const PassedKey&>()))>* |
155 | operator()(const PassedKey&, const Args&...) const; |
156 | }; |
157 | |
158 | template <class E, class Policy, class Hash, class Eq, class... Ts> |
159 | struct IsDecomposable : std::false_type {}; |
160 | |
161 | template <class Policy, class Hash, class Eq, class... Ts> |
162 | struct IsDecomposable< |
163 | absl::void_t<decltype( |
164 | Policy::apply(RequireUsableKey<typename Policy::key_type, Hash, Eq>(), |
165 | std::declval<Ts>()...))>, |
166 | Policy, Hash, Eq, Ts...> : std::true_type {}; |
167 | |
168 | // TODO(alkis): Switch to std::is_nothrow_swappable when gcc/clang supports it. |
169 | template <class T> |
170 | constexpr bool IsNoThrowSwappable() { |
171 | using std::swap; |
172 | return noexcept(swap(std::declval<T&>(), std::declval<T&>())); |
173 | } |
174 | |
175 | template <typename T> |
176 | int TrailingZeros(T x) { |
177 | return sizeof(T) == 8 ? base_internal::CountTrailingZerosNonZero64( |
178 | static_cast<uint64_t>(x)) |
179 | : base_internal::CountTrailingZerosNonZero32( |
180 | static_cast<uint32_t>(x)); |
181 | } |
182 | |
183 | template <typename T> |
184 | int LeadingZeros(T x) { |
185 | return sizeof(T) == 8 |
186 | ? base_internal::CountLeadingZeros64(static_cast<uint64_t>(x)) |
187 | : base_internal::CountLeadingZeros32(static_cast<uint32_t>(x)); |
188 | } |
189 | |
190 | // An abstraction over a bitmask. It provides an easy way to iterate through the |
191 | // indexes of the set bits of a bitmask. When Shift=0 (platforms with SSE), |
192 | // this is a true bitmask. On non-SSE, platforms the arithematic used to |
193 | // emulate the SSE behavior works in bytes (Shift=3) and leaves each bytes as |
194 | // either 0x00 or 0x80. |
195 | // |
196 | // For example: |
197 | // for (int i : BitMask<uint32_t, 16>(0x5)) -> yields 0, 2 |
198 | // for (int i : BitMask<uint64_t, 8, 3>(0x0000000080800000)) -> yields 2, 3 |
199 | template <class T, int SignificantBits, int Shift = 0> |
200 | class BitMask { |
201 | static_assert(std::is_unsigned<T>::value, "" ); |
202 | static_assert(Shift == 0 || Shift == 3, "" ); |
203 | |
204 | public: |
205 | // These are useful for unit tests (gunit). |
206 | using value_type = int; |
207 | using iterator = BitMask; |
208 | using const_iterator = BitMask; |
209 | |
210 | explicit BitMask(T mask) : mask_(mask) {} |
211 | BitMask& operator++() { |
212 | mask_ &= (mask_ - 1); |
213 | return *this; |
214 | } |
215 | explicit operator bool() const { return mask_ != 0; } |
216 | int operator*() const { return LowestBitSet(); } |
217 | int LowestBitSet() const { |
218 | return container_internal::TrailingZeros(mask_) >> Shift; |
219 | } |
220 | int HighestBitSet() const { |
221 | return (sizeof(T) * CHAR_BIT - container_internal::LeadingZeros(mask_) - |
222 | 1) >> |
223 | Shift; |
224 | } |
225 | |
226 | BitMask begin() const { return *this; } |
227 | BitMask end() const { return BitMask(0); } |
228 | |
229 | int TrailingZeros() const { |
230 | return container_internal::TrailingZeros(mask_) >> Shift; |
231 | } |
232 | |
233 | int LeadingZeros() const { |
234 | constexpr int total_significant_bits = SignificantBits << Shift; |
235 | constexpr int = sizeof(T) * 8 - total_significant_bits; |
236 | return container_internal::LeadingZeros(mask_ << extra_bits) >> Shift; |
237 | } |
238 | |
239 | private: |
240 | friend bool operator==(const BitMask& a, const BitMask& b) { |
241 | return a.mask_ == b.mask_; |
242 | } |
243 | friend bool operator!=(const BitMask& a, const BitMask& b) { |
244 | return a.mask_ != b.mask_; |
245 | } |
246 | |
247 | T mask_; |
248 | }; |
249 | |
250 | using ctrl_t = signed char; |
251 | using h2_t = uint8_t; |
252 | |
253 | // The values here are selected for maximum performance. See the static asserts |
254 | // below for details. |
255 | enum Ctrl : ctrl_t { |
256 | kEmpty = -128, // 0b10000000 |
257 | kDeleted = -2, // 0b11111110 |
258 | kSentinel = -1, // 0b11111111 |
259 | }; |
260 | static_assert( |
261 | kEmpty & kDeleted & kSentinel & 0x80, |
262 | "Special markers need to have the MSB to make checking for them efficient" ); |
263 | static_assert(kEmpty < kSentinel && kDeleted < kSentinel, |
264 | "kEmpty and kDeleted must be smaller than kSentinel to make the " |
265 | "SIMD test of IsEmptyOrDeleted() efficient" ); |
266 | static_assert(kSentinel == -1, |
267 | "kSentinel must be -1 to elide loading it from memory into SIMD " |
268 | "registers (pcmpeqd xmm, xmm)" ); |
269 | static_assert(kEmpty == -128, |
270 | "kEmpty must be -128 to make the SIMD check for its " |
271 | "existence efficient (psignb xmm, xmm)" ); |
272 | static_assert(~kEmpty & ~kDeleted & kSentinel & 0x7F, |
273 | "kEmpty and kDeleted must share an unset bit that is not shared " |
274 | "by kSentinel to make the scalar test for MatchEmptyOrDeleted() " |
275 | "efficient" ); |
276 | static_assert(kDeleted == -2, |
277 | "kDeleted must be -2 to make the implementation of " |
278 | "ConvertSpecialToEmptyAndFullToDeleted efficient" ); |
279 | |
280 | // A single block of empty control bytes for tables without any slots allocated. |
281 | // This enables removing a branch in the hot path of find(). |
282 | inline ctrl_t* EmptyGroup() { |
283 | alignas(16) static constexpr ctrl_t empty_group[] = { |
284 | kSentinel, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, |
285 | kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty}; |
286 | return const_cast<ctrl_t*>(empty_group); |
287 | } |
288 | |
289 | // Mixes a randomly generated per-process seed with `hash` and `ctrl` to |
290 | // randomize insertion order within groups. |
291 | bool ShouldInsertBackwards(size_t hash, ctrl_t* ctrl); |
292 | |
293 | // Returns a hash seed. |
294 | // |
295 | // The seed consists of the ctrl_ pointer, which adds enough entropy to ensure |
296 | // non-determinism of iteration order in most cases. |
297 | inline size_t HashSeed(const ctrl_t* ctrl) { |
298 | // The low bits of the pointer have little or no entropy because of |
299 | // alignment. We shift the pointer to try to use higher entropy bits. A |
300 | // good number seems to be 12 bits, because that aligns with page size. |
301 | return reinterpret_cast<uintptr_t>(ctrl) >> 12; |
302 | } |
303 | |
304 | inline size_t H1(size_t hash, const ctrl_t* ctrl) { |
305 | return (hash >> 7) ^ HashSeed(ctrl); |
306 | } |
307 | inline ctrl_t H2(size_t hash) { return hash & 0x7F; } |
308 | |
309 | inline bool IsEmpty(ctrl_t c) { return c == kEmpty; } |
310 | inline bool IsFull(ctrl_t c) { return c >= 0; } |
311 | inline bool IsDeleted(ctrl_t c) { return c == kDeleted; } |
312 | inline bool IsEmptyOrDeleted(ctrl_t c) { return c < kSentinel; } |
313 | |
314 | #if SWISSTABLE_HAVE_SSE2 |
315 | |
316 | // https://github.com/abseil/abseil-cpp/issues/209 |
317 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87853 |
318 | // _mm_cmpgt_epi8 is broken under GCC with -funsigned-char |
319 | // Work around this by using the portable implementation of Group |
320 | // when using -funsigned-char under GCC. |
321 | inline __m128i _mm_cmpgt_epi8_fixed(__m128i a, __m128i b) { |
322 | #if defined(__GNUC__) && !defined(__clang__) |
323 | if (std::is_unsigned<char>::value) { |
324 | const __m128i mask = _mm_set1_epi8(0x80); |
325 | const __m128i diff = _mm_subs_epi8(b, a); |
326 | return _mm_cmpeq_epi8(_mm_and_si128(diff, mask), mask); |
327 | } |
328 | #endif |
329 | return _mm_cmpgt_epi8(a, b); |
330 | } |
331 | |
332 | struct GroupSse2Impl { |
333 | static constexpr size_t kWidth = 16; // the number of slots per group |
334 | |
335 | explicit GroupSse2Impl(const ctrl_t* pos) { |
336 | ctrl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos)); |
337 | } |
338 | |
339 | // Returns a bitmask representing the positions of slots that match hash. |
340 | BitMask<uint32_t, kWidth> Match(h2_t hash) const { |
341 | auto match = _mm_set1_epi8(hash); |
342 | return BitMask<uint32_t, kWidth>( |
343 | _mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl))); |
344 | } |
345 | |
346 | // Returns a bitmask representing the positions of empty slots. |
347 | BitMask<uint32_t, kWidth> MatchEmpty() const { |
348 | #if SWISSTABLE_HAVE_SSSE3 |
349 | // This only works because kEmpty is -128. |
350 | return BitMask<uint32_t, kWidth>( |
351 | _mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl))); |
352 | #else |
353 | return Match(static_cast<h2_t>(kEmpty)); |
354 | #endif |
355 | } |
356 | |
357 | // Returns a bitmask representing the positions of empty or deleted slots. |
358 | BitMask<uint32_t, kWidth> MatchEmptyOrDeleted() const { |
359 | auto special = _mm_set1_epi8(kSentinel); |
360 | return BitMask<uint32_t, kWidth>( |
361 | _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl))); |
362 | } |
363 | |
364 | // Returns the number of trailing empty or deleted elements in the group. |
365 | uint32_t CountLeadingEmptyOrDeleted() const { |
366 | auto special = _mm_set1_epi8(kSentinel); |
367 | return TrailingZeros( |
368 | _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)) + 1); |
369 | } |
370 | |
371 | void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { |
372 | auto msbs = _mm_set1_epi8(static_cast<char>(-128)); |
373 | auto x126 = _mm_set1_epi8(126); |
374 | #if SWISSTABLE_HAVE_SSSE3 |
375 | auto res = _mm_or_si128(_mm_shuffle_epi8(x126, ctrl), msbs); |
376 | #else |
377 | auto zero = _mm_setzero_si128(); |
378 | auto special_mask = _mm_cmpgt_epi8_fixed(zero, ctrl); |
379 | auto res = _mm_or_si128(msbs, _mm_andnot_si128(special_mask, x126)); |
380 | #endif |
381 | _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), res); |
382 | } |
383 | |
384 | __m128i ctrl; |
385 | }; |
386 | #endif // SWISSTABLE_HAVE_SSE2 |
387 | |
388 | struct GroupPortableImpl { |
389 | static constexpr size_t kWidth = 8; |
390 | |
391 | explicit GroupPortableImpl(const ctrl_t* pos) |
392 | : ctrl(little_endian::Load64(pos)) {} |
393 | |
394 | BitMask<uint64_t, kWidth, 3> Match(h2_t hash) const { |
395 | // For the technique, see: |
396 | // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord |
397 | // (Determine if a word has a byte equal to n). |
398 | // |
399 | // Caveat: there are false positives but: |
400 | // - they only occur if there is a real match |
401 | // - they never occur on kEmpty, kDeleted, kSentinel |
402 | // - they will be handled gracefully by subsequent checks in code |
403 | // |
404 | // Example: |
405 | // v = 0x1716151413121110 |
406 | // hash = 0x12 |
407 | // retval = (v - lsbs) & ~v & msbs = 0x0000000080800000 |
408 | constexpr uint64_t msbs = 0x8080808080808080ULL; |
409 | constexpr uint64_t lsbs = 0x0101010101010101ULL; |
410 | auto x = ctrl ^ (lsbs * hash); |
411 | return BitMask<uint64_t, kWidth, 3>((x - lsbs) & ~x & msbs); |
412 | } |
413 | |
414 | BitMask<uint64_t, kWidth, 3> MatchEmpty() const { |
415 | constexpr uint64_t msbs = 0x8080808080808080ULL; |
416 | return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 6)) & msbs); |
417 | } |
418 | |
419 | BitMask<uint64_t, kWidth, 3> MatchEmptyOrDeleted() const { |
420 | constexpr uint64_t msbs = 0x8080808080808080ULL; |
421 | return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 7)) & msbs); |
422 | } |
423 | |
424 | uint32_t CountLeadingEmptyOrDeleted() const { |
425 | constexpr uint64_t gaps = 0x00FEFEFEFEFEFEFEULL; |
426 | return (TrailingZeros(((~ctrl & (ctrl >> 7)) | gaps) + 1) + 7) >> 3; |
427 | } |
428 | |
429 | void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { |
430 | constexpr uint64_t msbs = 0x8080808080808080ULL; |
431 | constexpr uint64_t lsbs = 0x0101010101010101ULL; |
432 | auto x = ctrl & msbs; |
433 | auto res = (~x + (x >> 7)) & ~lsbs; |
434 | little_endian::Store64(dst, res); |
435 | } |
436 | |
437 | uint64_t ctrl; |
438 | }; |
439 | |
440 | #if SWISSTABLE_HAVE_SSE2 |
441 | using Group = GroupSse2Impl; |
442 | #else |
443 | using Group = GroupPortableImpl; |
444 | #endif |
445 | |
446 | template <class Policy, class Hash, class Eq, class Alloc> |
447 | class raw_hash_set; |
448 | |
449 | inline bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } |
450 | |
451 | // PRECONDITION: |
452 | // IsValidCapacity(capacity) |
453 | // ctrl[capacity] == kSentinel |
454 | // ctrl[i] != kSentinel for all i < capacity |
455 | // Applies mapping for every byte in ctrl: |
456 | // DELETED -> EMPTY |
457 | // EMPTY -> EMPTY |
458 | // FULL -> DELETED |
459 | inline void ConvertDeletedToEmptyAndFullToDeleted( |
460 | ctrl_t* ctrl, size_t capacity) { |
461 | assert(ctrl[capacity] == kSentinel); |
462 | assert(IsValidCapacity(capacity)); |
463 | for (ctrl_t* pos = ctrl; pos != ctrl + capacity + 1; pos += Group::kWidth) { |
464 | Group{pos}.ConvertSpecialToEmptyAndFullToDeleted(pos); |
465 | } |
466 | // Copy the cloned ctrl bytes. |
467 | std::memcpy(ctrl + capacity + 1, ctrl, Group::kWidth); |
468 | ctrl[capacity] = kSentinel; |
469 | } |
470 | |
471 | // Rounds up the capacity to the next power of 2 minus 1, with a minimum of 1. |
472 | inline size_t NormalizeCapacity(size_t n) { |
473 | return n ? ~size_t{} >> LeadingZeros(n) : 1; |
474 | } |
475 | |
476 | // We use 7/8th as maximum load factor. |
477 | // For 16-wide groups, that gives an average of two empty slots per group. |
478 | inline size_t CapacityToGrowth(size_t capacity) { |
479 | assert(IsValidCapacity(capacity)); |
480 | // `capacity*7/8` |
481 | if (Group::kWidth == 8 && capacity == 7) { |
482 | // x-x/8 does not work when x==7. |
483 | return 6; |
484 | } |
485 | return capacity - capacity / 8; |
486 | } |
487 | // From desired "growth" to a lowerbound of the necessary capacity. |
488 | // Might not be a valid one and required NormalizeCapacity(). |
489 | inline size_t GrowthToLowerboundCapacity(size_t growth) { |
490 | // `growth*8/7` |
491 | if (Group::kWidth == 8 && growth == 7) { |
492 | // x+(x-1)/7 does not work when x==7. |
493 | return 8; |
494 | } |
495 | return growth + static_cast<size_t>((static_cast<int64_t>(growth) - 1) / 7); |
496 | } |
497 | |
498 | // Policy: a policy defines how to perform different operations on |
499 | // the slots of the hashtable (see hash_policy_traits.h for the full interface |
500 | // of policy). |
501 | // |
502 | // Hash: a (possibly polymorphic) functor that hashes keys of the hashtable. The |
503 | // functor should accept a key and return size_t as hash. For best performance |
504 | // it is important that the hash function provides high entropy across all bits |
505 | // of the hash. |
506 | // |
507 | // Eq: a (possibly polymorphic) functor that compares two keys for equality. It |
508 | // should accept two (of possibly different type) keys and return a bool: true |
509 | // if they are equal, false if they are not. If two keys compare equal, then |
510 | // their hash values as defined by Hash MUST be equal. |
511 | // |
512 | // Allocator: an Allocator [https://devdocs.io/cpp/concept/allocator] with which |
513 | // the storage of the hashtable will be allocated and the elements will be |
514 | // constructed and destroyed. |
515 | template <class Policy, class Hash, class Eq, class Alloc> |
516 | class raw_hash_set { |
517 | using PolicyTraits = hash_policy_traits<Policy>; |
518 | using KeyArgImpl = |
519 | KeyArg<IsTransparent<Eq>::value && IsTransparent<Hash>::value>; |
520 | |
521 | public: |
522 | using init_type = typename PolicyTraits::init_type; |
523 | using key_type = typename PolicyTraits::key_type; |
524 | // TODO(sbenza): Hide slot_type as it is an implementation detail. Needs user |
525 | // code fixes! |
526 | using slot_type = typename PolicyTraits::slot_type; |
527 | using allocator_type = Alloc; |
528 | using size_type = size_t; |
529 | using difference_type = ptrdiff_t; |
530 | using hasher = Hash; |
531 | using key_equal = Eq; |
532 | using policy_type = Policy; |
533 | using value_type = typename PolicyTraits::value_type; |
534 | using reference = value_type&; |
535 | using const_reference = const value_type&; |
536 | using pointer = typename absl::allocator_traits< |
537 | allocator_type>::template rebind_traits<value_type>::pointer; |
538 | using const_pointer = typename absl::allocator_traits< |
539 | allocator_type>::template rebind_traits<value_type>::const_pointer; |
540 | |
541 | // Alias used for heterogeneous lookup functions. |
542 | // `key_arg<K>` evaluates to `K` when the functors are transparent and to |
543 | // `key_type` otherwise. It permits template argument deduction on `K` for the |
544 | // transparent case. |
545 | template <class K> |
546 | using key_arg = typename KeyArgImpl::template type<K, key_type>; |
547 | |
548 | private: |
549 | // Give an early error when key_type is not hashable/eq. |
550 | auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)); |
551 | auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)); |
552 | |
553 | using Layout = absl::container_internal::Layout<ctrl_t, slot_type>; |
554 | |
555 | static Layout MakeLayout(size_t capacity) { |
556 | assert(IsValidCapacity(capacity)); |
557 | return Layout(capacity + Group::kWidth + 1, capacity); |
558 | } |
559 | |
560 | using AllocTraits = absl::allocator_traits<allocator_type>; |
561 | using SlotAlloc = typename absl::allocator_traits< |
562 | allocator_type>::template rebind_alloc<slot_type>; |
563 | using SlotAllocTraits = typename absl::allocator_traits< |
564 | allocator_type>::template rebind_traits<slot_type>; |
565 | |
566 | static_assert(std::is_lvalue_reference<reference>::value, |
567 | "Policy::element() must return a reference" ); |
568 | |
569 | template <typename T> |
570 | struct SameAsElementReference |
571 | : std::is_same<typename std::remove_cv< |
572 | typename std::remove_reference<reference>::type>::type, |
573 | typename std::remove_cv< |
574 | typename std::remove_reference<T>::type>::type> {}; |
575 | |
576 | // An enabler for insert(T&&): T must be convertible to init_type or be the |
577 | // same as [cv] value_type [ref]. |
578 | // Note: we separate SameAsElementReference into its own type to avoid using |
579 | // reference unless we need to. MSVC doesn't seem to like it in some |
580 | // cases. |
581 | template <class T> |
582 | using RequiresInsertable = typename std::enable_if< |
583 | absl::disjunction<std::is_convertible<T, init_type>, |
584 | SameAsElementReference<T>>::value, |
585 | int>::type; |
586 | |
587 | // RequiresNotInit is a workaround for gcc prior to 7.1. |
588 | // See https://godbolt.org/g/Y4xsUh. |
589 | template <class T> |
590 | using RequiresNotInit = |
591 | typename std::enable_if<!std::is_same<T, init_type>::value, int>::type; |
592 | |
593 | template <class... Ts> |
594 | using IsDecomposable = IsDecomposable<void, PolicyTraits, Hash, Eq, Ts...>; |
595 | |
596 | public: |
597 | static_assert(std::is_same<pointer, value_type*>::value, |
598 | "Allocators with custom pointer types are not supported" ); |
599 | static_assert(std::is_same<const_pointer, const value_type*>::value, |
600 | "Allocators with custom pointer types are not supported" ); |
601 | |
602 | class iterator { |
603 | friend class raw_hash_set; |
604 | |
605 | public: |
606 | using iterator_category = std::forward_iterator_tag; |
607 | using value_type = typename raw_hash_set::value_type; |
608 | using reference = |
609 | absl::conditional_t<PolicyTraits::constant_iterators::value, |
610 | const value_type&, value_type&>; |
611 | using pointer = absl::remove_reference_t<reference>*; |
612 | using difference_type = typename raw_hash_set::difference_type; |
613 | |
614 | iterator() {} |
615 | |
616 | // PRECONDITION: not an end() iterator. |
617 | reference operator*() const { return PolicyTraits::element(slot_); } |
618 | |
619 | // PRECONDITION: not an end() iterator. |
620 | pointer operator->() const { return &operator*(); } |
621 | |
622 | // PRECONDITION: not an end() iterator. |
623 | iterator& operator++() { |
624 | ++ctrl_; |
625 | ++slot_; |
626 | skip_empty_or_deleted(); |
627 | return *this; |
628 | } |
629 | // PRECONDITION: not an end() iterator. |
630 | iterator operator++(int) { |
631 | auto tmp = *this; |
632 | ++*this; |
633 | return tmp; |
634 | } |
635 | |
636 | friend bool operator==(const iterator& a, const iterator& b) { |
637 | return a.ctrl_ == b.ctrl_; |
638 | } |
639 | friend bool operator!=(const iterator& a, const iterator& b) { |
640 | return !(a == b); |
641 | } |
642 | |
643 | private: |
644 | iterator(ctrl_t* ctrl) : ctrl_(ctrl) {} // for end() |
645 | iterator(ctrl_t* ctrl, slot_type* slot) : ctrl_(ctrl), slot_(slot) {} |
646 | |
647 | void skip_empty_or_deleted() { |
648 | while (IsEmptyOrDeleted(*ctrl_)) { |
649 | // ctrl is not necessarily aligned to Group::kWidth. It is also likely |
650 | // to read past the space for ctrl bytes and into slots. This is ok |
651 | // because ctrl has sizeof() == 1 and slot has sizeof() >= 1 so there |
652 | // is no way to read outside the combined slot array. |
653 | uint32_t shift = Group{ctrl_}.CountLeadingEmptyOrDeleted(); |
654 | ctrl_ += shift; |
655 | slot_ += shift; |
656 | } |
657 | } |
658 | |
659 | ctrl_t* ctrl_ = nullptr; |
660 | // To avoid uninitialized member warnigs, put slot_ in an anonymous union. |
661 | // The member is not initialized on singleton and end iterators. |
662 | union { |
663 | slot_type* slot_; |
664 | }; |
665 | }; |
666 | |
667 | class const_iterator { |
668 | friend class raw_hash_set; |
669 | |
670 | public: |
671 | using iterator_category = typename iterator::iterator_category; |
672 | using value_type = typename raw_hash_set::value_type; |
673 | using reference = typename raw_hash_set::const_reference; |
674 | using pointer = typename raw_hash_set::const_pointer; |
675 | using difference_type = typename raw_hash_set::difference_type; |
676 | |
677 | const_iterator() {} |
678 | // Implicit construction from iterator. |
679 | const_iterator(iterator i) : inner_(std::move(i)) {} |
680 | |
681 | reference operator*() const { return *inner_; } |
682 | pointer operator->() const { return inner_.operator->(); } |
683 | |
684 | const_iterator& operator++() { |
685 | ++inner_; |
686 | return *this; |
687 | } |
688 | const_iterator operator++(int) { return inner_++; } |
689 | |
690 | friend bool operator==(const const_iterator& a, const const_iterator& b) { |
691 | return a.inner_ == b.inner_; |
692 | } |
693 | friend bool operator!=(const const_iterator& a, const const_iterator& b) { |
694 | return !(a == b); |
695 | } |
696 | |
697 | private: |
698 | const_iterator(const ctrl_t* ctrl, const slot_type* slot) |
699 | : inner_(const_cast<ctrl_t*>(ctrl), const_cast<slot_type*>(slot)) {} |
700 | |
701 | iterator inner_; |
702 | }; |
703 | |
704 | using node_type = node_handle<Policy, hash_policy_traits<Policy>, Alloc>; |
705 | using insert_return_type = InsertReturnType<iterator, node_type>; |
706 | |
707 | raw_hash_set() noexcept( |
708 | std::is_nothrow_default_constructible<hasher>::value&& |
709 | std::is_nothrow_default_constructible<key_equal>::value&& |
710 | std::is_nothrow_default_constructible<allocator_type>::value) {} |
711 | |
712 | explicit raw_hash_set(size_t bucket_count, const hasher& hash = hasher(), |
713 | const key_equal& eq = key_equal(), |
714 | const allocator_type& alloc = allocator_type()) |
715 | : ctrl_(EmptyGroup()), settings_(0, hash, eq, alloc) { |
716 | if (bucket_count) { |
717 | capacity_ = NormalizeCapacity(bucket_count); |
718 | reset_growth_left(); |
719 | initialize_slots(); |
720 | } |
721 | } |
722 | |
723 | raw_hash_set(size_t bucket_count, const hasher& hash, |
724 | const allocator_type& alloc) |
725 | : raw_hash_set(bucket_count, hash, key_equal(), alloc) {} |
726 | |
727 | raw_hash_set(size_t bucket_count, const allocator_type& alloc) |
728 | : raw_hash_set(bucket_count, hasher(), key_equal(), alloc) {} |
729 | |
730 | explicit raw_hash_set(const allocator_type& alloc) |
731 | : raw_hash_set(0, hasher(), key_equal(), alloc) {} |
732 | |
733 | template <class InputIter> |
734 | raw_hash_set(InputIter first, InputIter last, size_t bucket_count = 0, |
735 | const hasher& hash = hasher(), const key_equal& eq = key_equal(), |
736 | const allocator_type& alloc = allocator_type()) |
737 | : raw_hash_set(bucket_count, hash, eq, alloc) { |
738 | insert(first, last); |
739 | } |
740 | |
741 | template <class InputIter> |
742 | raw_hash_set(InputIter first, InputIter last, size_t bucket_count, |
743 | const hasher& hash, const allocator_type& alloc) |
744 | : raw_hash_set(first, last, bucket_count, hash, key_equal(), alloc) {} |
745 | |
746 | template <class InputIter> |
747 | raw_hash_set(InputIter first, InputIter last, size_t bucket_count, |
748 | const allocator_type& alloc) |
749 | : raw_hash_set(first, last, bucket_count, hasher(), key_equal(), alloc) {} |
750 | |
751 | template <class InputIter> |
752 | raw_hash_set(InputIter first, InputIter last, const allocator_type& alloc) |
753 | : raw_hash_set(first, last, 0, hasher(), key_equal(), alloc) {} |
754 | |
755 | // Instead of accepting std::initializer_list<value_type> as the first |
756 | // argument like std::unordered_set<value_type> does, we have two overloads |
757 | // that accept std::initializer_list<T> and std::initializer_list<init_type>. |
758 | // This is advantageous for performance. |
759 | // |
760 | // // Turns {"abc", "def"} into std::initializer_list<std::string>, then |
761 | // // copies the strings into the set. |
762 | // std::unordered_set<std::string> s = {"abc", "def"}; |
763 | // |
764 | // // Turns {"abc", "def"} into std::initializer_list<const char*>, then |
765 | // // copies the strings into the set. |
766 | // absl::flat_hash_set<std::string> s = {"abc", "def"}; |
767 | // |
768 | // The same trick is used in insert(). |
769 | // |
770 | // The enabler is necessary to prevent this constructor from triggering where |
771 | // the copy constructor is meant to be called. |
772 | // |
773 | // absl::flat_hash_set<int> a, b{a}; |
774 | // |
775 | // RequiresNotInit<T> is a workaround for gcc prior to 7.1. |
776 | template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0> |
777 | raw_hash_set(std::initializer_list<T> init, size_t bucket_count = 0, |
778 | const hasher& hash = hasher(), const key_equal& eq = key_equal(), |
779 | const allocator_type& alloc = allocator_type()) |
780 | : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {} |
781 | |
782 | raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count = 0, |
783 | const hasher& hash = hasher(), const key_equal& eq = key_equal(), |
784 | const allocator_type& alloc = allocator_type()) |
785 | : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {} |
786 | |
787 | template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0> |
788 | raw_hash_set(std::initializer_list<T> init, size_t bucket_count, |
789 | const hasher& hash, const allocator_type& alloc) |
790 | : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {} |
791 | |
792 | raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count, |
793 | const hasher& hash, const allocator_type& alloc) |
794 | : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {} |
795 | |
796 | template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0> |
797 | raw_hash_set(std::initializer_list<T> init, size_t bucket_count, |
798 | const allocator_type& alloc) |
799 | : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {} |
800 | |
801 | raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count, |
802 | const allocator_type& alloc) |
803 | : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {} |
804 | |
805 | template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0> |
806 | raw_hash_set(std::initializer_list<T> init, const allocator_type& alloc) |
807 | : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} |
808 | |
809 | raw_hash_set(std::initializer_list<init_type> init, |
810 | const allocator_type& alloc) |
811 | : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} |
812 | |
813 | raw_hash_set(const raw_hash_set& that) |
814 | : raw_hash_set(that, AllocTraits::select_on_container_copy_construction( |
815 | that.alloc_ref())) {} |
816 | |
817 | raw_hash_set(const raw_hash_set& that, const allocator_type& a) |
818 | : raw_hash_set(0, that.hash_ref(), that.eq_ref(), a) { |
819 | reserve(that.size()); |
820 | // Because the table is guaranteed to be empty, we can do something faster |
821 | // than a full `insert`. |
822 | for (const auto& v : that) { |
823 | const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v); |
824 | auto target = find_first_non_full(hash); |
825 | set_ctrl(target.offset, H2(hash)); |
826 | emplace_at(target.offset, v); |
827 | infoz_.RecordInsert(hash, target.probe_length); |
828 | } |
829 | size_ = that.size(); |
830 | growth_left() -= that.size(); |
831 | } |
832 | |
833 | raw_hash_set(raw_hash_set&& that) noexcept( |
834 | std::is_nothrow_copy_constructible<hasher>::value&& |
835 | std::is_nothrow_copy_constructible<key_equal>::value&& |
836 | std::is_nothrow_copy_constructible<allocator_type>::value) |
837 | : ctrl_(absl::exchange(that.ctrl_, EmptyGroup())), |
838 | slots_(absl::exchange(that.slots_, nullptr)), |
839 | size_(absl::exchange(that.size_, 0)), |
840 | capacity_(absl::exchange(that.capacity_, 0)), |
841 | infoz_(absl::exchange(that.infoz_, HashtablezInfoHandle())), |
842 | // Hash, equality and allocator are copied instead of moved because |
843 | // `that` must be left valid. If Hash is std::function<Key>, moving it |
844 | // would create a nullptr functor that cannot be called. |
845 | settings_(that.settings_) { |
846 | // growth_left was copied above, reset the one from `that`. |
847 | that.growth_left() = 0; |
848 | } |
849 | |
850 | raw_hash_set(raw_hash_set&& that, const allocator_type& a) |
851 | : ctrl_(EmptyGroup()), |
852 | slots_(nullptr), |
853 | size_(0), |
854 | capacity_(0), |
855 | settings_(0, that.hash_ref(), that.eq_ref(), a) { |
856 | if (a == that.alloc_ref()) { |
857 | std::swap(ctrl_, that.ctrl_); |
858 | std::swap(slots_, that.slots_); |
859 | std::swap(size_, that.size_); |
860 | std::swap(capacity_, that.capacity_); |
861 | std::swap(growth_left(), that.growth_left()); |
862 | std::swap(infoz_, that.infoz_); |
863 | } else { |
864 | reserve(that.size()); |
865 | // Note: this will copy elements of dense_set and unordered_set instead of |
866 | // moving them. This can be fixed if it ever becomes an issue. |
867 | for (auto& elem : that) insert(std::move(elem)); |
868 | } |
869 | } |
870 | |
871 | raw_hash_set& operator=(const raw_hash_set& that) { |
872 | raw_hash_set tmp(that, |
873 | AllocTraits::propagate_on_container_copy_assignment::value |
874 | ? that.alloc_ref() |
875 | : alloc_ref()); |
876 | swap(tmp); |
877 | return *this; |
878 | } |
879 | |
880 | raw_hash_set& operator=(raw_hash_set&& that) noexcept( |
881 | absl::allocator_traits<allocator_type>::is_always_equal::value&& |
882 | std::is_nothrow_move_assignable<hasher>::value&& |
883 | std::is_nothrow_move_assignable<key_equal>::value) { |
884 | // TODO(sbenza): We should only use the operations from the noexcept clause |
885 | // to make sure we actually adhere to that contract. |
886 | return move_assign( |
887 | std::move(that), |
888 | typename AllocTraits::propagate_on_container_move_assignment()); |
889 | } |
890 | |
891 | ~raw_hash_set() { destroy_slots(); } |
892 | |
893 | iterator begin() { |
894 | auto it = iterator_at(0); |
895 | it.skip_empty_or_deleted(); |
896 | return it; |
897 | } |
898 | iterator end() { return {ctrl_ + capacity_}; } |
899 | |
900 | const_iterator begin() const { |
901 | return const_cast<raw_hash_set*>(this)->begin(); |
902 | } |
903 | const_iterator end() const { return const_cast<raw_hash_set*>(this)->end(); } |
904 | const_iterator cbegin() const { return begin(); } |
905 | const_iterator cend() const { return end(); } |
906 | |
907 | bool empty() const { return !size(); } |
908 | size_t size() const { return size_; } |
909 | size_t capacity() const { return capacity_; } |
910 | size_t max_size() const { return (std::numeric_limits<size_t>::max)(); } |
911 | |
912 | ABSL_ATTRIBUTE_REINITIALIZES void clear() { |
913 | // Iterating over this container is O(bucket_count()). When bucket_count() |
914 | // is much greater than size(), iteration becomes prohibitively expensive. |
915 | // For clear() it is more important to reuse the allocated array when the |
916 | // container is small because allocation takes comparatively long time |
917 | // compared to destruction of the elements of the container. So we pick the |
918 | // largest bucket_count() threshold for which iteration is still fast and |
919 | // past that we simply deallocate the array. |
920 | if (capacity_ > 127) { |
921 | destroy_slots(); |
922 | } else if (capacity_) { |
923 | for (size_t i = 0; i != capacity_; ++i) { |
924 | if (IsFull(ctrl_[i])) { |
925 | PolicyTraits::destroy(&alloc_ref(), slots_ + i); |
926 | } |
927 | } |
928 | size_ = 0; |
929 | reset_ctrl(); |
930 | reset_growth_left(); |
931 | } |
932 | assert(empty()); |
933 | infoz_.RecordStorageChanged(0, capacity_); |
934 | } |
935 | |
936 | // This overload kicks in when the argument is an rvalue of insertable and |
937 | // decomposable type other than init_type. |
938 | // |
939 | // flat_hash_map<std::string, int> m; |
940 | // m.insert(std::make_pair("abc", 42)); |
941 | template <class T, RequiresInsertable<T> = 0, |
942 | typename std::enable_if<IsDecomposable<T>::value, int>::type = 0, |
943 | T* = nullptr> |
944 | std::pair<iterator, bool> insert(T&& value) { |
945 | return emplace(std::forward<T>(value)); |
946 | } |
947 | |
948 | // This overload kicks in when the argument is a bitfield or an lvalue of |
949 | // insertable and decomposable type. |
950 | // |
951 | // union { int n : 1; }; |
952 | // flat_hash_set<int> s; |
953 | // s.insert(n); |
954 | // |
955 | // flat_hash_set<std::string> s; |
956 | // const char* p = "hello"; |
957 | // s.insert(p); |
958 | // |
959 | // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace |
960 | // RequiresInsertable<T> with RequiresInsertable<const T&>. |
961 | // We are hitting this bug: https://godbolt.org/g/1Vht4f. |
962 | template < |
963 | class T, RequiresInsertable<T> = 0, |
964 | typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0> |
965 | std::pair<iterator, bool> insert(const T& value) { |
966 | return emplace(value); |
967 | } |
968 | |
969 | // This overload kicks in when the argument is an rvalue of init_type. Its |
970 | // purpose is to handle brace-init-list arguments. |
971 | // |
972 | // flat_hash_map<std::string, int> s; |
973 | // s.insert({"abc", 42}); |
974 | std::pair<iterator, bool> insert(init_type&& value) { |
975 | return emplace(std::move(value)); |
976 | } |
977 | |
978 | template <class T, RequiresInsertable<T> = 0, |
979 | typename std::enable_if<IsDecomposable<T>::value, int>::type = 0, |
980 | T* = nullptr> |
981 | iterator insert(const_iterator, T&& value) { |
982 | return insert(std::forward<T>(value)).first; |
983 | } |
984 | |
985 | // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace |
986 | // RequiresInsertable<T> with RequiresInsertable<const T&>. |
987 | // We are hitting this bug: https://godbolt.org/g/1Vht4f. |
988 | template < |
989 | class T, RequiresInsertable<T> = 0, |
990 | typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0> |
991 | iterator insert(const_iterator, const T& value) { |
992 | return insert(value).first; |
993 | } |
994 | |
995 | iterator insert(const_iterator, init_type&& value) { |
996 | return insert(std::move(value)).first; |
997 | } |
998 | |
999 | template <class InputIt> |
1000 | void insert(InputIt first, InputIt last) { |
1001 | for (; first != last; ++first) insert(*first); |
1002 | } |
1003 | |
1004 | template <class T, RequiresNotInit<T> = 0, RequiresInsertable<const T&> = 0> |
1005 | void insert(std::initializer_list<T> ilist) { |
1006 | insert(ilist.begin(), ilist.end()); |
1007 | } |
1008 | |
1009 | void insert(std::initializer_list<init_type> ilist) { |
1010 | insert(ilist.begin(), ilist.end()); |
1011 | } |
1012 | |
1013 | insert_return_type insert(node_type&& node) { |
1014 | if (!node) return {end(), false, node_type()}; |
1015 | const auto& elem = PolicyTraits::element(CommonAccess::GetSlot(node)); |
1016 | auto res = PolicyTraits::apply( |
1017 | InsertSlot<false>{*this, std::move(*CommonAccess::GetSlot(node))}, |
1018 | elem); |
1019 | if (res.second) { |
1020 | CommonAccess::Reset(&node); |
1021 | return {res.first, true, node_type()}; |
1022 | } else { |
1023 | return {res.first, false, std::move(node)}; |
1024 | } |
1025 | } |
1026 | |
1027 | iterator insert(const_iterator, node_type&& node) { |
1028 | return insert(std::move(node)).first; |
1029 | } |
1030 | |
1031 | // This overload kicks in if we can deduce the key from args. This enables us |
1032 | // to avoid constructing value_type if an entry with the same key already |
1033 | // exists. |
1034 | // |
1035 | // For example: |
1036 | // |
1037 | // flat_hash_map<std::string, std::string> m = {{"abc", "def"}}; |
1038 | // // Creates no std::string copies and makes no heap allocations. |
1039 | // m.emplace("abc", "xyz"); |
1040 | template <class... Args, typename std::enable_if< |
1041 | IsDecomposable<Args...>::value, int>::type = 0> |
1042 | std::pair<iterator, bool> emplace(Args&&... args) { |
1043 | return PolicyTraits::apply(EmplaceDecomposable{*this}, |
1044 | std::forward<Args>(args)...); |
1045 | } |
1046 | |
1047 | // This overload kicks in if we cannot deduce the key from args. It constructs |
1048 | // value_type unconditionally and then either moves it into the table or |
1049 | // destroys. |
1050 | template <class... Args, typename std::enable_if< |
1051 | !IsDecomposable<Args...>::value, int>::type = 0> |
1052 | std::pair<iterator, bool> emplace(Args&&... args) { |
1053 | typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type |
1054 | raw; |
1055 | slot_type* slot = reinterpret_cast<slot_type*>(&raw); |
1056 | |
1057 | PolicyTraits::construct(&alloc_ref(), slot, std::forward<Args>(args)...); |
1058 | const auto& elem = PolicyTraits::element(slot); |
1059 | return PolicyTraits::apply(InsertSlot<true>{*this, std::move(*slot)}, elem); |
1060 | } |
1061 | |
1062 | template <class... Args> |
1063 | iterator emplace_hint(const_iterator, Args&&... args) { |
1064 | return emplace(std::forward<Args>(args)...).first; |
1065 | } |
1066 | |
1067 | // Extension API: support for lazy emplace. |
1068 | // |
1069 | // Looks up key in the table. If found, returns the iterator to the element. |
1070 | // Otherwise calls f with one argument of type raw_hash_set::constructor. f |
1071 | // MUST call raw_hash_set::constructor with arguments as if a |
1072 | // raw_hash_set::value_type is constructed, otherwise the behavior is |
1073 | // undefined. |
1074 | // |
1075 | // For example: |
1076 | // |
1077 | // std::unordered_set<ArenaString> s; |
1078 | // // Makes ArenaStr even if "abc" is in the map. |
1079 | // s.insert(ArenaString(&arena, "abc")); |
1080 | // |
1081 | // flat_hash_set<ArenaStr> s; |
1082 | // // Makes ArenaStr only if "abc" is not in the map. |
1083 | // s.lazy_emplace("abc", [&](const constructor& ctor) { |
1084 | // ctor(&arena, "abc"); |
1085 | // }); |
1086 | // |
1087 | // WARNING: This API is currently experimental. If there is a way to implement |
1088 | // the same thing with the rest of the API, prefer that. |
1089 | class constructor { |
1090 | friend class raw_hash_set; |
1091 | |
1092 | public: |
1093 | template <class... Args> |
1094 | void operator()(Args&&... args) const { |
1095 | assert(*slot_); |
1096 | PolicyTraits::construct(alloc_, *slot_, std::forward<Args>(args)...); |
1097 | *slot_ = nullptr; |
1098 | } |
1099 | |
1100 | private: |
1101 | constructor(allocator_type* a, slot_type** slot) : alloc_(a), slot_(slot) {} |
1102 | |
1103 | allocator_type* alloc_; |
1104 | slot_type** slot_; |
1105 | }; |
1106 | |
1107 | template <class K = key_type, class F> |
1108 | iterator lazy_emplace(const key_arg<K>& key, F&& f) { |
1109 | auto res = find_or_prepare_insert(key); |
1110 | if (res.second) { |
1111 | slot_type* slot = slots_ + res.first; |
1112 | std::forward<F>(f)(constructor(&alloc_ref(), &slot)); |
1113 | assert(!slot); |
1114 | } |
1115 | return iterator_at(res.first); |
1116 | } |
1117 | |
1118 | // Extension API: support for heterogeneous keys. |
1119 | // |
1120 | // std::unordered_set<std::string> s; |
1121 | // // Turns "abc" into std::string. |
1122 | // s.erase("abc"); |
1123 | // |
1124 | // flat_hash_set<std::string> s; |
1125 | // // Uses "abc" directly without copying it into std::string. |
1126 | // s.erase("abc"); |
1127 | template <class K = key_type> |
1128 | size_type erase(const key_arg<K>& key) { |
1129 | auto it = find(key); |
1130 | if (it == end()) return 0; |
1131 | erase(it); |
1132 | return 1; |
1133 | } |
1134 | |
1135 | // Erases the element pointed to by `it`. Unlike `std::unordered_set::erase`, |
1136 | // this method returns void to reduce algorithmic complexity to O(1). In |
1137 | // order to erase while iterating across a map, use the following idiom (which |
1138 | // also works for standard containers): |
1139 | // |
1140 | // for (auto it = m.begin(), end = m.end(); it != end;) { |
1141 | // if (<pred>) { |
1142 | // m.erase(it++); |
1143 | // } else { |
1144 | // ++it; |
1145 | // } |
1146 | // } |
1147 | void erase(const_iterator cit) { erase(cit.inner_); } |
1148 | |
1149 | // This overload is necessary because otherwise erase<K>(const K&) would be |
1150 | // a better match if non-const iterator is passed as an argument. |
1151 | void erase(iterator it) { |
1152 | assert(it != end()); |
1153 | PolicyTraits::destroy(&alloc_ref(), it.slot_); |
1154 | erase_meta_only(it); |
1155 | } |
1156 | |
1157 | iterator erase(const_iterator first, const_iterator last) { |
1158 | while (first != last) { |
1159 | erase(first++); |
1160 | } |
1161 | return last.inner_; |
1162 | } |
1163 | |
1164 | // Moves elements from `src` into `this`. |
1165 | // If the element already exists in `this`, it is left unmodified in `src`. |
1166 | template <typename H, typename E> |
1167 | void merge(raw_hash_set<Policy, H, E, Alloc>& src) { // NOLINT |
1168 | assert(this != &src); |
1169 | for (auto it = src.begin(), e = src.end(); it != e; ++it) { |
1170 | if (PolicyTraits::apply(InsertSlot<false>{*this, std::move(*it.slot_)}, |
1171 | PolicyTraits::element(it.slot_)) |
1172 | .second) { |
1173 | src.erase_meta_only(it); |
1174 | } |
1175 | } |
1176 | } |
1177 | |
1178 | template <typename H, typename E> |
1179 | void merge(raw_hash_set<Policy, H, E, Alloc>&& src) { |
1180 | merge(src); |
1181 | } |
1182 | |
1183 | node_type (const_iterator position) { |
1184 | auto node = |
1185 | CommonAccess::Make<node_type>(alloc_ref(), position.inner_.slot_); |
1186 | erase_meta_only(position); |
1187 | return node; |
1188 | } |
1189 | |
1190 | template < |
1191 | class K = key_type, |
1192 | typename std::enable_if<!std::is_same<K, iterator>::value, int>::type = 0> |
1193 | node_type (const key_arg<K>& key) { |
1194 | auto it = find(key); |
1195 | return it == end() ? node_type() : extract(const_iterator{it}); |
1196 | } |
1197 | |
1198 | void swap(raw_hash_set& that) noexcept( |
1199 | IsNoThrowSwappable<hasher>() && IsNoThrowSwappable<key_equal>() && |
1200 | (!AllocTraits::propagate_on_container_swap::value || |
1201 | IsNoThrowSwappable<allocator_type>())) { |
1202 | using std::swap; |
1203 | swap(ctrl_, that.ctrl_); |
1204 | swap(slots_, that.slots_); |
1205 | swap(size_, that.size_); |
1206 | swap(capacity_, that.capacity_); |
1207 | swap(growth_left(), that.growth_left()); |
1208 | swap(hash_ref(), that.hash_ref()); |
1209 | swap(eq_ref(), that.eq_ref()); |
1210 | swap(infoz_, that.infoz_); |
1211 | if (AllocTraits::propagate_on_container_swap::value) { |
1212 | swap(alloc_ref(), that.alloc_ref()); |
1213 | } else { |
1214 | // If the allocators do not compare equal it is officially undefined |
1215 | // behavior. We choose to do nothing. |
1216 | } |
1217 | } |
1218 | |
1219 | void rehash(size_t n) { |
1220 | if (n == 0 && capacity_ == 0) return; |
1221 | if (n == 0 && size_ == 0) { |
1222 | destroy_slots(); |
1223 | infoz_.RecordStorageChanged(0, 0); |
1224 | return; |
1225 | } |
1226 | // bitor is a faster way of doing `max` here. We will round up to the next |
1227 | // power-of-2-minus-1, so bitor is good enough. |
1228 | auto m = NormalizeCapacity(n | GrowthToLowerboundCapacity(size())); |
1229 | // n == 0 unconditionally rehashes as per the standard. |
1230 | if (n == 0 || m > capacity_) { |
1231 | resize(m); |
1232 | } |
1233 | } |
1234 | |
1235 | void reserve(size_t n) { rehash(GrowthToLowerboundCapacity(n)); } |
1236 | |
1237 | // Extension API: support for heterogeneous keys. |
1238 | // |
1239 | // std::unordered_set<std::string> s; |
1240 | // // Turns "abc" into std::string. |
1241 | // s.count("abc"); |
1242 | // |
1243 | // ch_set<std::string> s; |
1244 | // // Uses "abc" directly without copying it into std::string. |
1245 | // s.count("abc"); |
1246 | template <class K = key_type> |
1247 | size_t count(const key_arg<K>& key) const { |
1248 | return find(key) == end() ? 0 : 1; |
1249 | } |
1250 | |
1251 | // Issues CPU prefetch instructions for the memory needed to find or insert |
1252 | // a key. Like all lookup functions, this support heterogeneous keys. |
1253 | // |
1254 | // NOTE: This is a very low level operation and should not be used without |
1255 | // specific benchmarks indicating its importance. |
1256 | template <class K = key_type> |
1257 | void prefetch(const key_arg<K>& key) const { |
1258 | (void)key; |
1259 | #if defined(__GNUC__) |
1260 | auto seq = probe(hash_ref()(key)); |
1261 | __builtin_prefetch(static_cast<const void*>(ctrl_ + seq.offset())); |
1262 | __builtin_prefetch(static_cast<const void*>(slots_ + seq.offset())); |
1263 | #endif // __GNUC__ |
1264 | } |
1265 | |
1266 | // The API of find() has two extensions. |
1267 | // |
1268 | // 1. The hash can be passed by the user. It must be equal to the hash of the |
1269 | // key. |
1270 | // |
1271 | // 2. The type of the key argument doesn't have to be key_type. This is so |
1272 | // called heterogeneous key support. |
1273 | template <class K = key_type> |
1274 | iterator find(const key_arg<K>& key, size_t hash) { |
1275 | auto seq = probe(hash); |
1276 | while (true) { |
1277 | Group g{ctrl_ + seq.offset()}; |
1278 | for (int i : g.Match(H2(hash))) { |
1279 | if (ABSL_PREDICT_TRUE(PolicyTraits::apply( |
1280 | EqualElement<K>{key, eq_ref()}, |
1281 | PolicyTraits::element(slots_ + seq.offset(i))))) |
1282 | return iterator_at(seq.offset(i)); |
1283 | } |
1284 | if (ABSL_PREDICT_TRUE(g.MatchEmpty())) return end(); |
1285 | seq.next(); |
1286 | } |
1287 | } |
1288 | template <class K = key_type> |
1289 | iterator find(const key_arg<K>& key) { |
1290 | return find(key, hash_ref()(key)); |
1291 | } |
1292 | |
1293 | template <class K = key_type> |
1294 | const_iterator find(const key_arg<K>& key, size_t hash) const { |
1295 | return const_cast<raw_hash_set*>(this)->find(key, hash); |
1296 | } |
1297 | template <class K = key_type> |
1298 | const_iterator find(const key_arg<K>& key) const { |
1299 | return find(key, hash_ref()(key)); |
1300 | } |
1301 | |
1302 | template <class K = key_type> |
1303 | bool contains(const key_arg<K>& key) const { |
1304 | return find(key) != end(); |
1305 | } |
1306 | |
1307 | template <class K = key_type> |
1308 | std::pair<iterator, iterator> equal_range(const key_arg<K>& key) { |
1309 | auto it = find(key); |
1310 | if (it != end()) return {it, std::next(it)}; |
1311 | return {it, it}; |
1312 | } |
1313 | template <class K = key_type> |
1314 | std::pair<const_iterator, const_iterator> equal_range( |
1315 | const key_arg<K>& key) const { |
1316 | auto it = find(key); |
1317 | if (it != end()) return {it, std::next(it)}; |
1318 | return {it, it}; |
1319 | } |
1320 | |
1321 | size_t bucket_count() const { return capacity_; } |
1322 | float load_factor() const { |
1323 | return capacity_ ? static_cast<double>(size()) / capacity_ : 0.0; |
1324 | } |
1325 | float max_load_factor() const { return 1.0f; } |
1326 | void max_load_factor(float) { |
1327 | // Does nothing. |
1328 | } |
1329 | |
1330 | hasher hash_function() const { return hash_ref(); } |
1331 | key_equal key_eq() const { return eq_ref(); } |
1332 | allocator_type get_allocator() const { return alloc_ref(); } |
1333 | |
1334 | friend bool operator==(const raw_hash_set& a, const raw_hash_set& b) { |
1335 | if (a.size() != b.size()) return false; |
1336 | const raw_hash_set* outer = &a; |
1337 | const raw_hash_set* inner = &b; |
1338 | if (outer->capacity() > inner->capacity()) std::swap(outer, inner); |
1339 | for (const value_type& elem : *outer) |
1340 | if (!inner->has_element(elem)) return false; |
1341 | return true; |
1342 | } |
1343 | |
1344 | friend bool operator!=(const raw_hash_set& a, const raw_hash_set& b) { |
1345 | return !(a == b); |
1346 | } |
1347 | |
1348 | friend void swap(raw_hash_set& a, |
1349 | raw_hash_set& b) noexcept(noexcept(a.swap(b))) { |
1350 | a.swap(b); |
1351 | } |
1352 | |
1353 | private: |
1354 | template <class Container, typename Enabler> |
1355 | friend struct absl::container_internal::hashtable_debug_internal:: |
1356 | HashtableDebugAccess; |
1357 | |
1358 | struct FindElement { |
1359 | template <class K, class... Args> |
1360 | const_iterator operator()(const K& key, Args&&...) const { |
1361 | return s.find(key); |
1362 | } |
1363 | const raw_hash_set& s; |
1364 | }; |
1365 | |
1366 | struct HashElement { |
1367 | template <class K, class... Args> |
1368 | size_t operator()(const K& key, Args&&...) const { |
1369 | return h(key); |
1370 | } |
1371 | const hasher& h; |
1372 | }; |
1373 | |
1374 | template <class K1> |
1375 | struct EqualElement { |
1376 | template <class K2, class... Args> |
1377 | bool operator()(const K2& lhs, Args&&...) const { |
1378 | return eq(lhs, rhs); |
1379 | } |
1380 | const K1& rhs; |
1381 | const key_equal& eq; |
1382 | }; |
1383 | |
1384 | struct EmplaceDecomposable { |
1385 | template <class K, class... Args> |
1386 | std::pair<iterator, bool> operator()(const K& key, Args&&... args) const { |
1387 | auto res = s.find_or_prepare_insert(key); |
1388 | if (res.second) { |
1389 | s.emplace_at(res.first, std::forward<Args>(args)...); |
1390 | } |
1391 | return {s.iterator_at(res.first), res.second}; |
1392 | } |
1393 | raw_hash_set& s; |
1394 | }; |
1395 | |
1396 | template <bool do_destroy> |
1397 | struct InsertSlot { |
1398 | template <class K, class... Args> |
1399 | std::pair<iterator, bool> operator()(const K& key, Args&&...) && { |
1400 | auto res = s.find_or_prepare_insert(key); |
1401 | if (res.second) { |
1402 | PolicyTraits::transfer(&s.alloc_ref(), s.slots_ + res.first, &slot); |
1403 | } else if (do_destroy) { |
1404 | PolicyTraits::destroy(&s.alloc_ref(), &slot); |
1405 | } |
1406 | return {s.iterator_at(res.first), res.second}; |
1407 | } |
1408 | raw_hash_set& s; |
1409 | // Constructed slot. Either moved into place or destroyed. |
1410 | slot_type&& slot; |
1411 | }; |
1412 | |
1413 | // "erases" the object from the container, except that it doesn't actually |
1414 | // destroy the object. It only updates all the metadata of the class. |
1415 | // This can be used in conjunction with Policy::transfer to move the object to |
1416 | // another place. |
1417 | void erase_meta_only(const_iterator it) { |
1418 | assert(IsFull(*it.inner_.ctrl_) && "erasing a dangling iterator" ); |
1419 | --size_; |
1420 | const size_t index = it.inner_.ctrl_ - ctrl_; |
1421 | const size_t index_before = (index - Group::kWidth) & capacity_; |
1422 | const auto empty_after = Group(it.inner_.ctrl_).MatchEmpty(); |
1423 | const auto empty_before = Group(ctrl_ + index_before).MatchEmpty(); |
1424 | |
1425 | // We count how many consecutive non empties we have to the right and to the |
1426 | // left of `it`. If the sum is >= kWidth then there is at least one probe |
1427 | // window that might have seen a full group. |
1428 | bool was_never_full = |
1429 | empty_before && empty_after && |
1430 | static_cast<size_t>(empty_after.TrailingZeros() + |
1431 | empty_before.LeadingZeros()) < Group::kWidth; |
1432 | |
1433 | set_ctrl(index, was_never_full ? kEmpty : kDeleted); |
1434 | growth_left() += was_never_full; |
1435 | infoz_.RecordErase(); |
1436 | } |
1437 | |
1438 | void initialize_slots() { |
1439 | assert(capacity_); |
1440 | // Folks with custom allocators often make unwarranted assumptions about the |
1441 | // behavior of their classes vis-a-vis trivial destructability and what |
1442 | // calls they will or wont make. Avoid sampling for people with custom |
1443 | // allocators to get us out of this mess. This is not a hard guarantee but |
1444 | // a workaround while we plan the exact guarantee we want to provide. |
1445 | // |
1446 | // People are often sloppy with the exact type of their allocator (sometimes |
1447 | // it has an extra const or is missing the pair, but rebinds made it work |
1448 | // anyway). To avoid the ambiguity, we work off SlotAlloc which we have |
1449 | // bound more carefully. |
1450 | if (std::is_same<SlotAlloc, std::allocator<slot_type>>::value && |
1451 | slots_ == nullptr) { |
1452 | infoz_ = Sample(); |
1453 | } |
1454 | |
1455 | auto layout = MakeLayout(capacity_); |
1456 | char* mem = static_cast<char*>( |
1457 | Allocate<Layout::Alignment()>(&alloc_ref(), layout.AllocSize())); |
1458 | ctrl_ = reinterpret_cast<ctrl_t*>(layout.template Pointer<0>(mem)); |
1459 | slots_ = layout.template Pointer<1>(mem); |
1460 | reset_ctrl(); |
1461 | reset_growth_left(); |
1462 | infoz_.RecordStorageChanged(size_, capacity_); |
1463 | } |
1464 | |
1465 | void destroy_slots() { |
1466 | if (!capacity_) return; |
1467 | for (size_t i = 0; i != capacity_; ++i) { |
1468 | if (IsFull(ctrl_[i])) { |
1469 | PolicyTraits::destroy(&alloc_ref(), slots_ + i); |
1470 | } |
1471 | } |
1472 | auto layout = MakeLayout(capacity_); |
1473 | // Unpoison before returning the memory to the allocator. |
1474 | SanitizerUnpoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_); |
1475 | Deallocate<Layout::Alignment()>(&alloc_ref(), ctrl_, layout.AllocSize()); |
1476 | ctrl_ = EmptyGroup(); |
1477 | slots_ = nullptr; |
1478 | size_ = 0; |
1479 | capacity_ = 0; |
1480 | growth_left() = 0; |
1481 | } |
1482 | |
1483 | void resize(size_t new_capacity) { |
1484 | assert(IsValidCapacity(new_capacity)); |
1485 | auto* old_ctrl = ctrl_; |
1486 | auto* old_slots = slots_; |
1487 | const size_t old_capacity = capacity_; |
1488 | capacity_ = new_capacity; |
1489 | initialize_slots(); |
1490 | |
1491 | size_t total_probe_length = 0; |
1492 | for (size_t i = 0; i != old_capacity; ++i) { |
1493 | if (IsFull(old_ctrl[i])) { |
1494 | size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, |
1495 | PolicyTraits::element(old_slots + i)); |
1496 | auto target = find_first_non_full(hash); |
1497 | size_t new_i = target.offset; |
1498 | total_probe_length += target.probe_length; |
1499 | set_ctrl(new_i, H2(hash)); |
1500 | PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, old_slots + i); |
1501 | } |
1502 | } |
1503 | if (old_capacity) { |
1504 | SanitizerUnpoisonMemoryRegion(old_slots, |
1505 | sizeof(slot_type) * old_capacity); |
1506 | auto layout = MakeLayout(old_capacity); |
1507 | Deallocate<Layout::Alignment()>(&alloc_ref(), old_ctrl, |
1508 | layout.AllocSize()); |
1509 | } |
1510 | infoz_.RecordRehash(total_probe_length); |
1511 | } |
1512 | |
1513 | void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE { |
1514 | assert(IsValidCapacity(capacity_)); |
1515 | assert(!is_small()); |
1516 | // Algorithm: |
1517 | // - mark all DELETED slots as EMPTY |
1518 | // - mark all FULL slots as DELETED |
1519 | // - for each slot marked as DELETED |
1520 | // hash = Hash(element) |
1521 | // target = find_first_non_full(hash) |
1522 | // if target is in the same group |
1523 | // mark slot as FULL |
1524 | // else if target is EMPTY |
1525 | // transfer element to target |
1526 | // mark slot as EMPTY |
1527 | // mark target as FULL |
1528 | // else if target is DELETED |
1529 | // swap current element with target element |
1530 | // mark target as FULL |
1531 | // repeat procedure for current slot with moved from element (target) |
1532 | ConvertDeletedToEmptyAndFullToDeleted(ctrl_, capacity_); |
1533 | typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type |
1534 | raw; |
1535 | size_t total_probe_length = 0; |
1536 | slot_type* slot = reinterpret_cast<slot_type*>(&raw); |
1537 | for (size_t i = 0; i != capacity_; ++i) { |
1538 | if (!IsDeleted(ctrl_[i])) continue; |
1539 | size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, |
1540 | PolicyTraits::element(slots_ + i)); |
1541 | auto target = find_first_non_full(hash); |
1542 | size_t new_i = target.offset; |
1543 | total_probe_length += target.probe_length; |
1544 | |
1545 | // Verify if the old and new i fall within the same group wrt the hash. |
1546 | // If they do, we don't need to move the object as it falls already in the |
1547 | // best probe we can. |
1548 | const auto probe_index = [&](size_t pos) { |
1549 | return ((pos - probe(hash).offset()) & capacity_) / Group::kWidth; |
1550 | }; |
1551 | |
1552 | // Element doesn't move. |
1553 | if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) { |
1554 | set_ctrl(i, H2(hash)); |
1555 | continue; |
1556 | } |
1557 | if (IsEmpty(ctrl_[new_i])) { |
1558 | // Transfer element to the empty spot. |
1559 | // set_ctrl poisons/unpoisons the slots so we have to call it at the |
1560 | // right time. |
1561 | set_ctrl(new_i, H2(hash)); |
1562 | PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slots_ + i); |
1563 | set_ctrl(i, kEmpty); |
1564 | } else { |
1565 | assert(IsDeleted(ctrl_[new_i])); |
1566 | set_ctrl(new_i, H2(hash)); |
1567 | // Until we are done rehashing, DELETED marks previously FULL slots. |
1568 | // Swap i and new_i elements. |
1569 | PolicyTraits::transfer(&alloc_ref(), slot, slots_ + i); |
1570 | PolicyTraits::transfer(&alloc_ref(), slots_ + i, slots_ + new_i); |
1571 | PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slot); |
1572 | --i; // repeat |
1573 | } |
1574 | } |
1575 | reset_growth_left(); |
1576 | infoz_.RecordRehash(total_probe_length); |
1577 | } |
1578 | |
1579 | void rehash_and_grow_if_necessary() { |
1580 | if (capacity_ == 0) { |
1581 | resize(1); |
1582 | } else if (size() <= CapacityToGrowth(capacity()) / 2) { |
1583 | // Squash DELETED without growing if there is enough capacity. |
1584 | drop_deletes_without_resize(); |
1585 | } else { |
1586 | // Otherwise grow the container. |
1587 | resize(capacity_ * 2 + 1); |
1588 | } |
1589 | } |
1590 | |
1591 | bool has_element(const value_type& elem) const { |
1592 | size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, elem); |
1593 | auto seq = probe(hash); |
1594 | while (true) { |
1595 | Group g{ctrl_ + seq.offset()}; |
1596 | for (int i : g.Match(H2(hash))) { |
1597 | if (ABSL_PREDICT_TRUE(PolicyTraits::element(slots_ + seq.offset(i)) == |
1598 | elem)) |
1599 | return true; |
1600 | } |
1601 | if (ABSL_PREDICT_TRUE(g.MatchEmpty())) return false; |
1602 | seq.next(); |
1603 | assert(seq.index() < capacity_ && "full table!" ); |
1604 | } |
1605 | return false; |
1606 | } |
1607 | |
1608 | // Probes the raw_hash_set with the probe sequence for hash and returns the |
1609 | // pointer to the first empty or deleted slot. |
1610 | // NOTE: this function must work with tables having both kEmpty and kDelete |
1611 | // in one group. Such tables appears during drop_deletes_without_resize. |
1612 | // |
1613 | // This function is very useful when insertions happen and: |
1614 | // - the input is already a set |
1615 | // - there are enough slots |
1616 | // - the element with the hash is not in the table |
1617 | struct FindInfo { |
1618 | size_t offset; |
1619 | size_t probe_length; |
1620 | }; |
1621 | FindInfo find_first_non_full(size_t hash) { |
1622 | auto seq = probe(hash); |
1623 | while (true) { |
1624 | Group g{ctrl_ + seq.offset()}; |
1625 | auto mask = g.MatchEmptyOrDeleted(); |
1626 | if (mask) { |
1627 | #if !defined(NDEBUG) |
1628 | // We want to add entropy even when ASLR is not enabled. |
1629 | // In debug build we will randomly insert in either the front or back of |
1630 | // the group. |
1631 | // TODO(kfm,sbenza): revisit after we do unconditional mixing |
1632 | if (!is_small() && ShouldInsertBackwards(hash, ctrl_)) { |
1633 | return {seq.offset(mask.HighestBitSet()), seq.index()}; |
1634 | } |
1635 | #endif |
1636 | return {seq.offset(mask.LowestBitSet()), seq.index()}; |
1637 | } |
1638 | assert(seq.index() < capacity_ && "full table!" ); |
1639 | seq.next(); |
1640 | } |
1641 | } |
1642 | |
1643 | // TODO(alkis): Optimize this assuming *this and that don't overlap. |
1644 | raw_hash_set& move_assign(raw_hash_set&& that, std::true_type) { |
1645 | raw_hash_set tmp(std::move(that)); |
1646 | swap(tmp); |
1647 | return *this; |
1648 | } |
1649 | raw_hash_set& move_assign(raw_hash_set&& that, std::false_type) { |
1650 | raw_hash_set tmp(std::move(that), alloc_ref()); |
1651 | swap(tmp); |
1652 | return *this; |
1653 | } |
1654 | |
1655 | protected: |
1656 | template <class K> |
1657 | std::pair<size_t, bool> find_or_prepare_insert(const K& key) { |
1658 | auto hash = hash_ref()(key); |
1659 | auto seq = probe(hash); |
1660 | while (true) { |
1661 | Group g{ctrl_ + seq.offset()}; |
1662 | for (int i : g.Match(H2(hash))) { |
1663 | if (ABSL_PREDICT_TRUE(PolicyTraits::apply( |
1664 | EqualElement<K>{key, eq_ref()}, |
1665 | PolicyTraits::element(slots_ + seq.offset(i))))) |
1666 | return {seq.offset(i), false}; |
1667 | } |
1668 | if (ABSL_PREDICT_TRUE(g.MatchEmpty())) break; |
1669 | seq.next(); |
1670 | } |
1671 | return {prepare_insert(hash), true}; |
1672 | } |
1673 | |
1674 | size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE { |
1675 | auto target = find_first_non_full(hash); |
1676 | if (ABSL_PREDICT_FALSE(growth_left() == 0 && |
1677 | !IsDeleted(ctrl_[target.offset]))) { |
1678 | rehash_and_grow_if_necessary(); |
1679 | target = find_first_non_full(hash); |
1680 | } |
1681 | ++size_; |
1682 | growth_left() -= IsEmpty(ctrl_[target.offset]); |
1683 | set_ctrl(target.offset, H2(hash)); |
1684 | infoz_.RecordInsert(hash, target.probe_length); |
1685 | return target.offset; |
1686 | } |
1687 | |
1688 | // Constructs the value in the space pointed by the iterator. This only works |
1689 | // after an unsuccessful find_or_prepare_insert() and before any other |
1690 | // modifications happen in the raw_hash_set. |
1691 | // |
1692 | // PRECONDITION: i is an index returned from find_or_prepare_insert(k), where |
1693 | // k is the key decomposed from `forward<Args>(args)...`, and the bool |
1694 | // returned by find_or_prepare_insert(k) was true. |
1695 | // POSTCONDITION: *m.iterator_at(i) == value_type(forward<Args>(args)...). |
1696 | template <class... Args> |
1697 | void emplace_at(size_t i, Args&&... args) { |
1698 | PolicyTraits::construct(&alloc_ref(), slots_ + i, |
1699 | std::forward<Args>(args)...); |
1700 | |
1701 | assert(PolicyTraits::apply(FindElement{*this}, *iterator_at(i)) == |
1702 | iterator_at(i) && |
1703 | "constructed value does not match the lookup key" ); |
1704 | } |
1705 | |
1706 | iterator iterator_at(size_t i) { return {ctrl_ + i, slots_ + i}; } |
1707 | const_iterator iterator_at(size_t i) const { return {ctrl_ + i, slots_ + i}; } |
1708 | |
1709 | private: |
1710 | friend struct RawHashSetTestOnlyAccess; |
1711 | |
1712 | probe_seq<Group::kWidth> probe(size_t hash) const { |
1713 | return probe_seq<Group::kWidth>(H1(hash, ctrl_), capacity_); |
1714 | } |
1715 | |
1716 | // Reset all ctrl bytes back to kEmpty, except the sentinel. |
1717 | void reset_ctrl() { |
1718 | std::memset(ctrl_, kEmpty, capacity_ + Group::kWidth); |
1719 | ctrl_[capacity_] = kSentinel; |
1720 | SanitizerPoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_); |
1721 | } |
1722 | |
1723 | void reset_growth_left() { |
1724 | growth_left() = CapacityToGrowth(capacity()) - size_; |
1725 | } |
1726 | |
1727 | // Sets the control byte, and if `i < Group::kWidth`, set the cloned byte at |
1728 | // the end too. |
1729 | void set_ctrl(size_t i, ctrl_t h) { |
1730 | assert(i < capacity_); |
1731 | |
1732 | if (IsFull(h)) { |
1733 | SanitizerUnpoisonObject(slots_ + i); |
1734 | } else { |
1735 | SanitizerPoisonObject(slots_ + i); |
1736 | } |
1737 | |
1738 | ctrl_[i] = h; |
1739 | ctrl_[((i - Group::kWidth) & capacity_) + 1 + |
1740 | ((Group::kWidth - 1) & capacity_)] = h; |
1741 | } |
1742 | |
1743 | size_t& growth_left() { return settings_.template get<0>(); } |
1744 | |
1745 | // The representation of the object has two modes: |
1746 | // - small: For capacities < kWidth-1 |
1747 | // - large: For the rest. |
1748 | // |
1749 | // Differences: |
1750 | // - In small mode we are able to use the whole capacity. The extra control |
1751 | // bytes give us at least one "empty" control byte to stop the iteration. |
1752 | // This is important to make 1 a valid capacity. |
1753 | // |
1754 | // - In small mode only the first `capacity()` control bytes after the |
1755 | // sentinel are valid. The rest contain dummy kEmpty values that do not |
1756 | // represent a real slot. This is important to take into account on |
1757 | // find_first_non_full(), where we never try ShouldInsertBackwards() for |
1758 | // small tables. |
1759 | bool is_small() const { return capacity_ < Group::kWidth - 1; } |
1760 | |
1761 | hasher& hash_ref() { return settings_.template get<1>(); } |
1762 | const hasher& hash_ref() const { return settings_.template get<1>(); } |
1763 | key_equal& eq_ref() { return settings_.template get<2>(); } |
1764 | const key_equal& eq_ref() const { return settings_.template get<2>(); } |
1765 | allocator_type& alloc_ref() { return settings_.template get<3>(); } |
1766 | const allocator_type& alloc_ref() const { |
1767 | return settings_.template get<3>(); |
1768 | } |
1769 | |
1770 | // TODO(alkis): Investigate removing some of these fields: |
1771 | // - ctrl/slots can be derived from each other |
1772 | // - size can be moved into the slot array |
1773 | ctrl_t* ctrl_ = EmptyGroup(); // [(capacity + 1) * ctrl_t] |
1774 | slot_type* slots_ = nullptr; // [capacity * slot_type] |
1775 | size_t size_ = 0; // number of full slots |
1776 | size_t capacity_ = 0; // total number of slots |
1777 | HashtablezInfoHandle infoz_; |
1778 | absl::container_internal::CompressedTuple<size_t /* growth_left */, hasher, |
1779 | key_equal, allocator_type> |
1780 | settings_{0, hasher{}, key_equal{}, allocator_type{}}; |
1781 | }; |
1782 | |
1783 | namespace hashtable_debug_internal { |
1784 | template <typename Set> |
1785 | struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> { |
1786 | using Traits = typename Set::PolicyTraits; |
1787 | using Slot = typename Traits::slot_type; |
1788 | |
1789 | static size_t GetNumProbes(const Set& set, |
1790 | const typename Set::key_type& key) { |
1791 | size_t num_probes = 0; |
1792 | size_t hash = set.hash_ref()(key); |
1793 | auto seq = set.probe(hash); |
1794 | while (true) { |
1795 | container_internal::Group g{set.ctrl_ + seq.offset()}; |
1796 | for (int i : g.Match(container_internal::H2(hash))) { |
1797 | if (Traits::apply( |
1798 | typename Set::template EqualElement<typename Set::key_type>{ |
1799 | key, set.eq_ref()}, |
1800 | Traits::element(set.slots_ + seq.offset(i)))) |
1801 | return num_probes; |
1802 | ++num_probes; |
1803 | } |
1804 | if (g.MatchEmpty()) return num_probes; |
1805 | seq.next(); |
1806 | ++num_probes; |
1807 | } |
1808 | } |
1809 | |
1810 | static size_t AllocatedByteSize(const Set& c) { |
1811 | size_t capacity = c.capacity_; |
1812 | if (capacity == 0) return 0; |
1813 | auto layout = Set::MakeLayout(capacity); |
1814 | size_t m = layout.AllocSize(); |
1815 | |
1816 | size_t per_slot = Traits::space_used(static_cast<const Slot*>(nullptr)); |
1817 | if (per_slot != ~size_t{}) { |
1818 | m += per_slot * c.size(); |
1819 | } else { |
1820 | for (size_t i = 0; i != capacity; ++i) { |
1821 | if (container_internal::IsFull(c.ctrl_[i])) { |
1822 | m += Traits::space_used(c.slots_ + i); |
1823 | } |
1824 | } |
1825 | } |
1826 | return m; |
1827 | } |
1828 | |
1829 | static size_t LowerBoundAllocatedByteSize(size_t size) { |
1830 | size_t capacity = GrowthToLowerboundCapacity(size); |
1831 | if (capacity == 0) return 0; |
1832 | auto layout = Set::MakeLayout(NormalizeCapacity(capacity)); |
1833 | size_t m = layout.AllocSize(); |
1834 | size_t per_slot = Traits::space_used(static_cast<const Slot*>(nullptr)); |
1835 | if (per_slot != ~size_t{}) { |
1836 | m += per_slot * size; |
1837 | } |
1838 | return m; |
1839 | } |
1840 | }; |
1841 | |
1842 | } // namespace hashtable_debug_internal |
1843 | } // namespace container_internal |
1844 | } // namespace absl |
1845 | |
1846 | #endif // ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ |
1847 | |