1/*
2 * Copyright 2017-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <cstddef>
20#include <cstdint>
21#include <cstring>
22
23#include <array>
24#include <iterator>
25#include <limits>
26#include <memory>
27#include <new>
28#include <type_traits>
29#include <utility>
30#include <vector>
31
32#include <folly/Bits.h>
33#include <folly/ConstexprMath.h>
34#include <folly/Likely.h>
35#include <folly/Portability.h>
36#include <folly/ScopeGuard.h>
37#include <folly/Traits.h>
38#include <folly/functional/ApplyTuple.h>
39#include <folly/functional/Invoke.h>
40#include <folly/lang/Align.h>
41#include <folly/lang/Assume.h>
42#include <folly/lang/Exception.h>
43#include <folly/lang/Launder.h>
44#include <folly/lang/SafeAssert.h>
45#include <folly/portability/Builtins.h>
46
47#include <folly/container/HeterogeneousAccess.h>
48#include <folly/container/detail/F14Defaults.h>
49#include <folly/container/detail/F14IntrinsicsAvailability.h>
50
51#if FOLLY_ASAN_ENABLED && defined(FOLLY_TLS)
52#define FOLLY_F14_TLS_IF_ASAN FOLLY_TLS
53#else
54#define FOLLY_F14_TLS_IF_ASAN
55#endif
56
57#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
58
59#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
60#if FOLLY_NEON
61#include <arm_acle.h> // __crc32cd
62#else
63#include <nmmintrin.h> // _mm_crc32_u64
64#endif
65#else
66#ifdef _WIN32
67#include <intrin.h> // _mul128 in fallback bit mixer
68#endif
69#endif
70
71#if FOLLY_NEON
72#include <arm_neon.h> // uint8x16t intrinsics
73#else // SSE2
74#include <immintrin.h> // __m128i intrinsics
75#include <xmmintrin.h> // _mm_prefetch
76#endif
77
78#endif
79
80#ifndef FOLLY_F14_PERTURB_INSERTION_ORDER
81#define FOLLY_F14_PERTURB_INSERTION_ORDER folly::kIsDebug
82#endif
83
84namespace folly {
85
86struct F14TableStats {
87 char const* policy;
88 std::size_t size{0};
89 std::size_t valueSize{0};
90 std::size_t bucketCount{0};
91 std::size_t chunkCount{0};
92 std::vector<std::size_t> chunkOccupancyHisto;
93 std::vector<std::size_t> chunkOutboundOverflowHisto;
94 std::vector<std::size_t> chunkHostedOverflowHisto;
95 std::vector<std::size_t> keyProbeLengthHisto;
96 std::vector<std::size_t> missProbeLengthHisto;
97 std::size_t totalBytes{0};
98 std::size_t overheadBytes{0};
99
100 private:
101 template <typename T>
102 static auto computeHelper(T const* m) -> decltype(m->computeStats()) {
103 return m->computeStats();
104 }
105
106 static F14TableStats computeHelper(...) {
107 return {};
108 }
109
110 public:
111 template <typename T>
112 static F14TableStats compute(T const& m) {
113 return computeHelper(&m);
114 }
115};
116
117namespace f14 {
118namespace detail {
119
120template <F14IntrinsicsMode>
121struct F14LinkCheck {};
122
123template <>
124struct F14LinkCheck<getF14IntrinsicsMode()> {
125 // The purpose of this method is to trigger a link failure if
126 // compilation flags vary across compilation units. The definition
127 // is in F14Table.cpp, so only one of F14LinkCheck<None>::check,
128 // F14LinkCheck<Simd>::check, or F14LinkCheck<SimdAndCrc>::check will
129 // be available at link time.
130 //
131 // To cause a link failure the function must be invoked in code that
132 // is not optimized away, so we call it on a couple of cold paths
133 // (exception handling paths in copy construction and rehash). LTO may
134 // remove it entirely, but that's fine.
135 static void check() noexcept;
136};
137
138bool tlsPendingSafeInserts(std::ptrdiff_t delta = 0);
139std::size_t tlsMinstdRand(std::size_t n);
140
141#if defined(_LIBCPP_VERSION)
142
143template <typename K, typename V, typename H>
144struct StdNodeReplica {
145 void* next;
146 std::size_t hash;
147 V value;
148};
149
150#else
151
152template <typename H>
153struct StdIsFastHash : std::true_type {};
154template <>
155struct StdIsFastHash<std::hash<long double>> : std::false_type {};
156template <typename... Args>
157struct StdIsFastHash<std::hash<std::basic_string<Args...>>> : std::false_type {
158};
159
160// TODO: add specialization for std::basic_string_view
161
162// mimic internal node of unordered containers in STL to estimate the size
163template <typename K, typename V, typename H, typename Enable = void>
164struct StdNodeReplica {
165 void* next;
166 V value;
167};
168template <typename K, typename V, typename H>
169struct StdNodeReplica<
170 K,
171 V,
172 H,
173 std::enable_if_t<
174 !StdIsFastHash<H>::value || !is_nothrow_invocable<H, K>::value>> {
175 void* next;
176 V value;
177 std::size_t hash;
178};
179
180#endif
181
182} // namespace detail
183} // namespace f14
184
185#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
186namespace f14 {
187namespace detail {
188template <typename Policy>
189class F14Table;
190} // namespace detail
191} // namespace f14
192
193class F14HashToken final {
194 public:
195 F14HashToken() = default;
196
197 private:
198 using HashPair = std::pair<std::size_t, std::size_t>;
199
200 explicit F14HashToken(HashPair hp) : hp_(hp) {}
201 explicit operator HashPair() const {
202 return hp_;
203 }
204
205 HashPair hp_;
206
207 template <typename Policy>
208 friend class f14::detail::F14Table;
209};
210
211namespace f14 {
212namespace detail {
213//// Defaults should be selected using void
214
215template <typename Arg, typename Default>
216using VoidDefault =
217 std::conditional_t<std::is_same<Arg, Default>::value, void, Arg>;
218
219template <typename Arg, typename Default>
220using Defaulted =
221 typename std::conditional_t<std::is_same<Arg, void>::value, Default, Arg>;
222
223template <
224 typename TableKey,
225 typename Hasher,
226 typename KeyEqual,
227 typename ArgKey,
228 typename Void = void>
229struct EligibleForHeterogeneousFind : std::false_type {};
230
231template <
232 typename TableKey,
233 typename Hasher,
234 typename KeyEqual,
235 typename ArgKey>
236struct EligibleForHeterogeneousFind<
237 TableKey,
238 Hasher,
239 KeyEqual,
240 ArgKey,
241 void_t<
242 typename Hasher::is_transparent,
243 typename KeyEqual::is_transparent,
244 invoke_result_t<Hasher, ArgKey const&>,
245 invoke_result_t<KeyEqual, ArgKey const&, TableKey const&>>>
246 : std::true_type {};
247
248template <
249 typename TableKey,
250 typename Hasher,
251 typename KeyEqual,
252 typename ArgKey>
253using EligibleForHeterogeneousInsert = Conjunction<
254 EligibleForHeterogeneousFind<TableKey, Hasher, KeyEqual, ArgKey>,
255 std::is_constructible<TableKey, ArgKey>>;
256
257template <
258 typename TableKey,
259 typename Hasher,
260 typename KeyEqual,
261 typename KeyArg0OrBool,
262 typename... KeyArgs>
263using KeyTypeForEmplaceHelper = std::conditional_t<
264 sizeof...(KeyArgs) == 1 &&
265 (std::is_same<remove_cvref_t<KeyArg0OrBool>, TableKey>::value ||
266 EligibleForHeterogeneousFind<
267 TableKey,
268 Hasher,
269 KeyEqual,
270 KeyArg0OrBool>::value),
271 KeyArg0OrBool&&,
272 TableKey>;
273
274template <
275 typename TableKey,
276 typename Hasher,
277 typename KeyEqual,
278 typename... KeyArgs>
279using KeyTypeForEmplace = KeyTypeForEmplaceHelper<
280 TableKey,
281 Hasher,
282 KeyEqual,
283 std::tuple_element_t<0, std::tuple<KeyArgs..., bool>>,
284 KeyArgs...>;
285
286////////////////
287
288template <typename T>
289FOLLY_ALWAYS_INLINE static void prefetchAddr(T const* ptr) {
290#ifndef _WIN32
291 __builtin_prefetch(static_cast<void const*>(ptr));
292#elif FOLLY_NEON
293 __prefetch(static_cast<void const*>(ptr));
294#else
295 _mm_prefetch(
296 static_cast<char const*>(static_cast<void const*>(ptr)), _MM_HINT_T0);
297#endif
298}
299
300template <typename T>
301FOLLY_ALWAYS_INLINE static unsigned findFirstSetNonZero(T mask) {
302 assume(mask != 0);
303 if (sizeof(mask) == sizeof(unsigned)) {
304 return __builtin_ctz(static_cast<unsigned>(mask));
305 } else {
306 return __builtin_ctzll(mask);
307 }
308}
309
310#if FOLLY_NEON
311using TagVector = uint8x16_t;
312
313using MaskType = uint64_t;
314
315constexpr unsigned kMaskSpacing = 4;
316#else // SSE2
317using TagVector = __m128i;
318
319using MaskType = uint32_t;
320
321constexpr unsigned kMaskSpacing = 1;
322#endif
323
324// We could use unaligned loads to relax this requirement, but that
325// would be both a performance penalty and require a bulkier packed
326// ItemIter format
327constexpr std::size_t kRequiredVectorAlignment =
328 constexpr_max(std::size_t{16}, alignof(max_align_t));
329
330using EmptyTagVectorType = std::aligned_storage_t<
331 sizeof(TagVector) + kRequiredVectorAlignment,
332 alignof(max_align_t)>;
333
334extern EmptyTagVectorType kEmptyTagVector;
335
336template <unsigned BitCount>
337struct FullMask {
338 static constexpr MaskType value =
339 (FullMask<BitCount - 1>::value << kMaskSpacing) + 1;
340};
341
342template <>
343struct FullMask<1> : std::integral_constant<MaskType, 1> {};
344
345#if FOLLY_ARM
346// Mask iteration is different for ARM because that is the only platform
347// for which the mask is bigger than a register.
348
349// Iterates a mask, optimized for the case that only a few bits are set
350class SparseMaskIter {
351 static_assert(kMaskSpacing == 4, "");
352
353 uint32_t interleavedMask_;
354
355 public:
356 explicit SparseMaskIter(MaskType mask)
357 : interleavedMask_{static_cast<uint32_t>(((mask >> 32) << 2) | mask)} {}
358
359 bool hasNext() {
360 return interleavedMask_ != 0;
361 }
362
363 unsigned next() {
364 FOLLY_SAFE_DCHECK(hasNext(), "");
365 unsigned i = findFirstSetNonZero(interleavedMask_);
366 interleavedMask_ &= (interleavedMask_ - 1);
367 return ((i >> 2) | (i << 2)) & 0xf;
368 }
369};
370
371// Iterates a mask, optimized for the case that most bits are set
372class DenseMaskIter {
373 static_assert(kMaskSpacing == 4, "");
374
375 std::size_t count_;
376 unsigned index_;
377 uint8_t const* tags_;
378
379 public:
380 explicit DenseMaskIter(uint8_t const* tags, MaskType mask) {
381 if (mask == 0) {
382 count_ = 0;
383 } else {
384 count_ = popcount(static_cast<uint32_t>(((mask >> 32) << 2) | mask));
385 if (LIKELY((mask & 1) != 0)) {
386 index_ = 0;
387 } else {
388 index_ = findFirstSetNonZero(mask) / kMaskSpacing;
389 }
390 tags_ = tags;
391 }
392 }
393
394 bool hasNext() {
395 return count_ > 0;
396 }
397
398 unsigned next() {
399 auto rv = index_;
400 --count_;
401 if (count_ > 0) {
402 do {
403 ++index_;
404 } while ((tags_[index_] & 0x80) == 0);
405 }
406 FOLLY_SAFE_DCHECK(index_ < 16, "");
407 return rv;
408 }
409};
410
411#else
412// Iterates a mask, optimized for the case that only a few bits are set
413class SparseMaskIter {
414 MaskType mask_;
415
416 public:
417 explicit SparseMaskIter(MaskType mask) : mask_{mask} {}
418
419 bool hasNext() {
420 return mask_ != 0;
421 }
422
423 unsigned next() {
424 FOLLY_SAFE_DCHECK(hasNext(), "");
425 unsigned i = findFirstSetNonZero(mask_);
426 mask_ &= (mask_ - 1);
427 return i / kMaskSpacing;
428 }
429};
430
431// Iterates a mask, optimized for the case that most bits are set
432class DenseMaskIter {
433 MaskType mask_;
434 unsigned index_{0};
435
436 public:
437 explicit DenseMaskIter(uint8_t const*, MaskType mask) : mask_{mask} {}
438
439 bool hasNext() {
440 return mask_ != 0;
441 }
442
443 unsigned next() {
444 FOLLY_SAFE_DCHECK(hasNext(), "");
445 if (LIKELY((mask_ & 1) != 0)) {
446 mask_ >>= kMaskSpacing;
447 return index_++;
448 } else {
449 unsigned s = findFirstSetNonZero(mask_);
450 unsigned rv = index_ + (s / kMaskSpacing);
451 mask_ >>= (s + kMaskSpacing);
452 index_ = rv + 1;
453 return rv;
454 }
455 }
456};
457#endif
458
459// Iterates a mask, returning pairs of [begin,end) index covering blocks
460// of set bits
461class MaskRangeIter {
462 MaskType mask_;
463 unsigned shift_{0};
464
465 public:
466 explicit MaskRangeIter(MaskType mask) {
467 // If kMaskSpacing is > 1 then there will be empty bits even for
468 // contiguous ranges. Fill them in.
469 mask_ = mask * ((1 << kMaskSpacing) - 1);
470 }
471
472 bool hasNext() {
473 return mask_ != 0;
474 }
475
476 std::pair<unsigned, unsigned> next() {
477 FOLLY_SAFE_DCHECK(hasNext(), "");
478 auto s = shift_;
479 unsigned b = findFirstSetNonZero(mask_);
480 unsigned e = findFirstSetNonZero(~(mask_ | (mask_ - 1)));
481 mask_ >>= e;
482 shift_ = s + e;
483 return std::make_pair((s + b) / kMaskSpacing, (s + e) / kMaskSpacing);
484 }
485};
486
487// Holds the result of an index query that has an optional result,
488// interpreting a mask of 0 to be the empty answer and the index of the
489// last set bit to be the non-empty answer
490class LastOccupiedInMask {
491 MaskType mask_;
492
493 public:
494 explicit LastOccupiedInMask(MaskType mask) : mask_{mask} {}
495
496 bool hasIndex() const {
497 return mask_ != 0;
498 }
499
500 unsigned index() const {
501 assume(mask_ != 0);
502 return (findLastSet(mask_) - 1) / kMaskSpacing;
503 }
504};
505
506// Holds the result of an index query that has an optional result,
507// interpreting a mask of 0 to be the empty answer and the index of the
508// first set bit to be the non-empty answer
509class FirstEmptyInMask {
510 MaskType mask_;
511
512 public:
513 explicit FirstEmptyInMask(MaskType mask) : mask_{mask} {}
514
515 bool hasIndex() const {
516 return mask_ != 0;
517 }
518
519 unsigned index() const {
520 FOLLY_SAFE_DCHECK(mask_ != 0, "");
521 return findFirstSetNonZero(mask_) / kMaskSpacing;
522 }
523};
524
525template <typename ItemType>
526struct alignas(kRequiredVectorAlignment) F14Chunk {
527 using Item = ItemType;
528
529 // For our 16 byte vector alignment (and assuming alignof(Item) >=
530 // 4) kCapacity of 14 is the most space efficient. Slightly smaller
531 // or larger capacities can help with cache alignment in a couple of
532 // cases without wasting too much space, but once the items are larger
533 // then we're unlikely to get much benefit anyway. The only case we
534 // optimize is using kCapacity of 12 for 4 byte items, which makes the
535 // chunk take exactly 1 cache line, and adding 16 bytes of padding for
536 // 16 byte items so that a chunk takes exactly 4 cache lines.
537 static constexpr unsigned kCapacity = sizeof(Item) == 4 ? 12 : 14;
538
539 static constexpr unsigned kDesiredCapacity = kCapacity - 2;
540
541 static constexpr unsigned kAllocatedCapacity =
542 kCapacity + (sizeof(Item) == 16 ? 1 : 0);
543
544 // If kCapacity == 12 then we get 16 bits of capacityScale by using
545 // tag 12 and 13, otherwise we only get 4 bits of control_
546 static constexpr std::size_t kCapacityScaleBits = kCapacity == 12 ? 16 : 4;
547 static constexpr std::size_t kCapacityScaleShift = kCapacityScaleBits - 4;
548
549 static constexpr MaskType kFullMask = FullMask<kCapacity>::value;
550
551 // Non-empty tags have their top bit set. tags_ array might be bigger
552 // than kCapacity to keep alignment of first item.
553 std::array<uint8_t, 14> tags_;
554
555 // Bits 0..3 of chunk 0 record the scaling factor between the number of
556 // chunks and the max size without rehash. Bits 4-7 in any chunk are a
557 // 4-bit counter of the number of values in this chunk that were placed
558 // because they overflowed their desired chunk (hostedOverflowCount).
559 uint8_t control_;
560
561 // The number of values that would have been placed into this chunk if
562 // there had been space, including values that also overflowed previous
563 // full chunks. This value saturates; once it becomes 255 it no longer
564 // increases nor decreases.
565 uint8_t outboundOverflowCount_;
566
567 std::array<aligned_storage_for_t<Item>, kAllocatedCapacity> rawItems_;
568
569 static F14Chunk* emptyInstance() {
570 auto raw = reinterpret_cast<char*>(&kEmptyTagVector);
571 if (kRequiredVectorAlignment > alignof(max_align_t)) {
572 auto delta = kRequiredVectorAlignment -
573 (reinterpret_cast<uintptr_t>(raw) % kRequiredVectorAlignment);
574 raw += delta;
575 }
576 auto rv = reinterpret_cast<F14Chunk*>(raw);
577 FOLLY_SAFE_DCHECK(
578 (reinterpret_cast<uintptr_t>(rv) % kRequiredVectorAlignment) == 0, "");
579 return rv;
580 }
581
582 void clear() {
583 // tags_ = {}; control_ = 0; outboundOverflowCount_ = 0;
584
585 // gcc < 6 doesn't exploit chunk alignment to generate the optimal
586 // SSE clear from memset. This is very hot code, so it is worth
587 // handling that case specially.
588#if FOLLY_SSE >= 2 && __GNUC__ <= 5 && !__clang__
589 // this doesn't violate strict aliasing rules because __m128i is
590 // tagged as __may_alias__
591 auto* v = static_cast<__m128i*>(static_cast<void*>(&tags_[0]));
592 _mm_store_si128(v, _mm_setzero_si128());
593#else
594 std::memset(&tags_[0], '\0', 16);
595#endif
596 }
597
598 void copyOverflowInfoFrom(F14Chunk const& rhs) {
599 FOLLY_SAFE_DCHECK(hostedOverflowCount() == 0, "");
600 control_ += static_cast<uint8_t>(rhs.control_ & 0xf0);
601 outboundOverflowCount_ = rhs.outboundOverflowCount_;
602 }
603
604 unsigned hostedOverflowCount() const {
605 return control_ >> 4;
606 }
607
608 static constexpr uint8_t kIncrHostedOverflowCount = 0x10;
609 static constexpr uint8_t kDecrHostedOverflowCount =
610 static_cast<uint8_t>(-0x10);
611
612 void adjustHostedOverflowCount(uint8_t op) {
613 control_ += op;
614 }
615
616 bool eof() const {
617 return capacityScale() != 0;
618 }
619
620 std::size_t capacityScale() const {
621 if (kCapacityScaleBits == 4) {
622 return control_ & 0xf;
623 } else {
624 uint16_t v;
625 std::memcpy(&v, &tags_[12], 2);
626 return v;
627 }
628 }
629
630 void setCapacityScale(std::size_t scale) {
631 FOLLY_SAFE_DCHECK(
632 this != emptyInstance() && scale > 0 &&
633 scale < (std::size_t{1} << kCapacityScaleBits),
634 "");
635 if (kCapacityScaleBits == 4) {
636 control_ = (control_ & ~0xf) | static_cast<uint8_t>(scale);
637 } else {
638 uint16_t v = static_cast<uint16_t>(scale);
639 std::memcpy(&tags_[12], &v, 2);
640 }
641 }
642
643 void markEof(std::size_t scale) {
644 folly::assume(control_ == 0);
645 setCapacityScale(scale);
646 }
647
648 unsigned outboundOverflowCount() const {
649 return outboundOverflowCount_;
650 }
651
652 void incrOutboundOverflowCount() {
653 if (outboundOverflowCount_ != 255) {
654 ++outboundOverflowCount_;
655 }
656 }
657
658 void decrOutboundOverflowCount() {
659 if (outboundOverflowCount_ != 255) {
660 --outboundOverflowCount_;
661 }
662 }
663
664 std::size_t tag(std::size_t index) const {
665 return tags_[index];
666 }
667
668 void setTag(std::size_t index, std::size_t tag) {
669 FOLLY_SAFE_DCHECK(
670 this != emptyInstance() && tag >= 0x80 && tag <= 0xff, "");
671 tags_[index] = static_cast<uint8_t>(tag);
672 }
673
674 void clearTag(std::size_t index) {
675 tags_[index] = 0;
676 }
677
678#if FOLLY_NEON
679 ////////
680 // Tag filtering using NEON intrinsics
681
682 SparseMaskIter tagMatchIter(std::size_t needle) const {
683 FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
684 uint8x16_t tagV = vld1q_u8(&tags_[0]);
685 auto needleV = vdupq_n_u8(static_cast<uint8_t>(needle));
686 auto eqV = vceqq_u8(tagV, needleV);
687 // get info from every byte into the bottom half of every uint16_t
688 // by shifting right 4, then round to get it into a 64-bit vector
689 uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(eqV), 4);
690 uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
691 return SparseMaskIter(mask);
692 }
693
694 MaskType occupiedMask() const {
695 uint8x16_t tagV = vld1q_u8(&tags_[0]);
696 // signed shift extends top bit to all bits
697 auto occupiedV =
698 vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u8(tagV), 7));
699 uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(occupiedV), 4);
700 return vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
701 }
702#else
703 ////////
704 // Tag filtering using SSE2 intrinsics
705
706 TagVector const* tagVector() const {
707 return static_cast<TagVector const*>(static_cast<void const*>(&tags_[0]));
708 }
709
710 SparseMaskIter tagMatchIter(std::size_t needle) const {
711 FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
712 auto tagV = _mm_load_si128(tagVector());
713
714 // TRICKY! It may seem strange to have a std::size_t needle and narrow
715 // it at the last moment, rather than making HashPair::second be a
716 // uint8_t, but the latter choice sometimes leads to a performance
717 // problem.
718 //
719 // On architectures with SSE2 but not AVX2, _mm_set1_epi8 expands
720 // to multiple instructions. One of those is a MOVD of either 4 or
721 // 8 byte width. Only the bottom byte of that move actually affects
722 // the result, but if a 1-byte needle has been spilled then this will
723 // be a 4 byte load. GCC 5.5 has been observed to reload needle
724 // (or perhaps fuse a reload and part of a previous static_cast)
725 // needle using a MOVZX with a 1 byte load in parallel with the MOVD.
726 // This combination causes a failure of store-to-load forwarding,
727 // which has a big performance penalty (60 nanoseconds per find on
728 // a microbenchmark). Keeping needle >= 4 bytes avoids the problem
729 // and also happens to result in slightly more compact assembly.
730 auto needleV = _mm_set1_epi8(static_cast<uint8_t>(needle));
731 auto eqV = _mm_cmpeq_epi8(tagV, needleV);
732 auto mask = _mm_movemask_epi8(eqV) & kFullMask;
733 return SparseMaskIter{mask};
734 }
735
736 MaskType occupiedMask() const {
737 auto tagV = _mm_load_si128(tagVector());
738 return _mm_movemask_epi8(tagV) & kFullMask;
739 }
740#endif
741
742 DenseMaskIter occupiedIter() const {
743 return DenseMaskIter{&tags_[0], occupiedMask()};
744 }
745
746 MaskRangeIter occupiedRangeIter() const {
747 return MaskRangeIter{occupiedMask()};
748 }
749
750 LastOccupiedInMask lastOccupied() const {
751 return LastOccupiedInMask{occupiedMask()};
752 }
753
754 FirstEmptyInMask firstEmpty() const {
755 return FirstEmptyInMask{occupiedMask() ^ kFullMask};
756 }
757
758 bool occupied(std::size_t index) const {
759 FOLLY_SAFE_DCHECK(tags_[index] == 0 || (tags_[index] & 0x80) != 0, "");
760 return tags_[index] != 0;
761 }
762
763 Item* itemAddr(std::size_t i) const {
764 return static_cast<Item*>(
765 const_cast<void*>(static_cast<void const*>(&rawItems_[i])));
766 }
767
768 Item& item(std::size_t i) {
769 FOLLY_SAFE_DCHECK(this->occupied(i), "");
770 return *launder(itemAddr(i));
771 }
772
773 Item const& citem(std::size_t i) const {
774 FOLLY_SAFE_DCHECK(this->occupied(i), "");
775 return *launder(itemAddr(i));
776 }
777
778 static F14Chunk& owner(Item& item, std::size_t index) {
779 auto rawAddr =
780 static_cast<uint8_t*>(static_cast<void*>(std::addressof(item))) -
781 offsetof(F14Chunk, rawItems_) - index * sizeof(Item);
782 auto chunkAddr = static_cast<F14Chunk*>(static_cast<void*>(rawAddr));
783 FOLLY_SAFE_DCHECK(std::addressof(item) == chunkAddr->itemAddr(index), "");
784 return *chunkAddr;
785 }
786};
787
788////////////////
789
790// PackedChunkItemPtr points to an Item in an F14Chunk, allowing both the
791// Item& and its index to be recovered. It sorts by the address of the
792// item, and it only works for items that are in a properly-aligned chunk.
793
794// generic form, not actually packed
795template <typename Ptr>
796class PackedChunkItemPtr {
797 public:
798 PackedChunkItemPtr(Ptr p, std::size_t i) noexcept : ptr_{p}, index_{i} {
799 FOLLY_SAFE_DCHECK(ptr_ != nullptr || index_ == 0, "");
800 }
801
802 Ptr ptr() const {
803 return ptr_;
804 }
805
806 std::size_t index() const {
807 return index_;
808 }
809
810 bool operator<(PackedChunkItemPtr const& rhs) const {
811 FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
812 return ptr_ < rhs.ptr_;
813 }
814
815 bool operator==(PackedChunkItemPtr const& rhs) const {
816 FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
817 return ptr_ == rhs.ptr_;
818 }
819
820 bool operator!=(PackedChunkItemPtr const& rhs) const {
821 return !(*this == rhs);
822 }
823
824 private:
825 Ptr ptr_;
826 std::size_t index_;
827};
828
829// Bare pointer form, packed into a uintptr_t. Uses only bits wasted by
830// alignment, so it works on 32-bit and 64-bit platforms
831template <typename T>
832class PackedChunkItemPtr<T*> {
833 static_assert((alignof(F14Chunk<T>) % 16) == 0, "");
834
835 // Chunks are 16-byte aligned, so we can maintain a packed pointer to a
836 // chunk item by packing the 4-bit item index into the least significant
837 // bits of a pointer to the chunk itself. This makes ItemIter::pack
838 // more expensive, however, since it has to compute the chunk address.
839 //
840 // Chunk items have varying alignment constraints, so it would seem
841 // to be that we can't do a similar trick while using only bit masking
842 // operations on the Item* itself. It happens to be, however, that if
843 // sizeof(Item) is not a multiple of 16 then we can recover a portion
844 // of the index bits from the knowledge that the Item-s are stored in
845 // an array that is itself 16-byte aligned.
846 //
847 // If kAlignBits is the number of trailing zero bits in sizeof(Item)
848 // (up to 4), then we can borrow those bits to store kAlignBits of the
849 // index directly. We can recover (4 - kAlignBits) bits of the index
850 // from the item pointer itself, by defining/observing that
851 //
852 // A = kAlignBits (A <= 4)
853 //
854 // S = (sizeof(Item) % 16) >> A (shifted-away bits are all zero)
855 //
856 // R = (itemPtr % 16) >> A (shifted-away bits are all zero)
857 //
858 // M = 16 >> A
859 //
860 // itemPtr % 16 = (index * sizeof(Item)) % 16
861 //
862 // (R * 2^A) % 16 = (index * (sizeof(Item) % 16)) % 16
863 //
864 // (R * 2^A) % 16 = (index * 2^A * S) % 16
865 //
866 // R % M = (index * S) % M
867 //
868 // S is relatively prime with M, so a multiplicative inverse is easy
869 // to compute
870 //
871 // Sinv = S^(M - 1) % M
872 //
873 // (R * Sinv) % M = index % M
874 //
875 // This lets us recover the bottom bits of the index. When sizeof(T)
876 // is 8-byte aligned kSizeInverse will always be 1. When sizeof(T)
877 // is 4-byte aligned kSizeInverse will be either 1 or 3.
878
879 // returns pow(x, y) % m
880 static constexpr uintptr_t powerMod(uintptr_t x, uintptr_t y, uintptr_t m) {
881 return y == 0 ? 1 : (x * powerMod(x, y - 1, m)) % m;
882 }
883
884 static constexpr uintptr_t kIndexBits = 4;
885 static constexpr uintptr_t kIndexMask = (uintptr_t{1} << kIndexBits) - 1;
886
887 static constexpr uintptr_t kAlignBits = constexpr_min(
888 uintptr_t{4},
889 constexpr_find_first_set(uintptr_t{sizeof(T)}) - 1);
890
891 static constexpr uintptr_t kAlignMask = (uintptr_t{1} << kAlignBits) - 1;
892
893 static constexpr uintptr_t kModulus = uintptr_t{1}
894 << (kIndexBits - kAlignBits);
895 static constexpr uintptr_t kSizeInverse =
896 powerMod(sizeof(T) >> kAlignBits, kModulus - 1, kModulus);
897
898 public:
899 PackedChunkItemPtr(T* p, std::size_t i) noexcept {
900 uintptr_t encoded = i >> (kIndexBits - kAlignBits);
901 assume((encoded & ~kAlignMask) == 0);
902 raw_ = reinterpret_cast<uintptr_t>(p) | encoded;
903 FOLLY_SAFE_DCHECK(p == ptr(), "");
904 FOLLY_SAFE_DCHECK(i == index(), "");
905 }
906
907 T* ptr() const {
908 return reinterpret_cast<T*>(raw_ & ~kAlignMask);
909 }
910
911 std::size_t index() const {
912 auto encoded = (raw_ & kAlignMask) << (kIndexBits - kAlignBits);
913 auto deduced =
914 ((raw_ >> kAlignBits) * kSizeInverse) & (kIndexMask >> kAlignBits);
915 return encoded | deduced;
916 }
917
918 bool operator<(PackedChunkItemPtr const& rhs) const {
919 return raw_ < rhs.raw_;
920 }
921 bool operator==(PackedChunkItemPtr const& rhs) const {
922 return raw_ == rhs.raw_;
923 }
924 bool operator!=(PackedChunkItemPtr const& rhs) const {
925 return !(*this == rhs);
926 }
927
928 private:
929 uintptr_t raw_;
930};
931
932template <typename ChunkPtr>
933class F14ItemIter {
934 private:
935 using Chunk = typename std::pointer_traits<ChunkPtr>::element_type;
936
937 public:
938 using Item = typename Chunk::Item;
939 using ItemPtr = typename std::pointer_traits<ChunkPtr>::template rebind<Item>;
940 using ItemConstPtr =
941 typename std::pointer_traits<ChunkPtr>::template rebind<Item const>;
942
943 using Packed = PackedChunkItemPtr<ItemPtr>;
944
945 //// PUBLIC
946
947 F14ItemIter() noexcept : itemPtr_{nullptr}, index_{0} {}
948
949 // default copy and move constructors and assignment operators are correct
950
951 explicit F14ItemIter(Packed const& packed)
952 : itemPtr_{packed.ptr()}, index_{packed.index()} {}
953
954 F14ItemIter(ChunkPtr chunk, std::size_t index)
955 : itemPtr_{std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index))},
956 index_{index} {
957 FOLLY_SAFE_DCHECK(index < Chunk::kCapacity, "");
958 assume(
959 std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index)) !=
960 nullptr);
961 assume(itemPtr_ != nullptr);
962 }
963
964 FOLLY_ALWAYS_INLINE void advanceImpl(bool checkEof, bool likelyDead) {
965 auto c = chunk();
966
967 // common case is packed entries
968 while (index_ > 0) {
969 --index_;
970 --itemPtr_;
971 if (LIKELY(c->occupied(index_))) {
972 return;
973 }
974 }
975
976 // It's fairly common for an iterator to be advanced and then become
977 // dead, for example in the return value from erase(iter) or in
978 // the last step of a loop. We'd like to make sure that the entire
979 // advance() method can be eliminated by the compiler's dead code
980 // elimination pass. To do that it must eliminate the loops, which
981 // requires it to prove that they have no side effects. It's easy
982 // to show that there are no escaping stores, but at the moment
983 // compilers also consider an infinite loop to be a side effect.
984 // (There are parts of the standard that would allow them to treat
985 // this as undefined behavior, but at the moment they don't exploit
986 // those clauses.)
987 //
988 // The following loop should really be a while loop, which would
989 // save a register, some instructions, and a conditional branch,
990 // but by writing it as a for loop the compiler can prove to itself
991 // that it will eventually terminate. (No matter that even if the
992 // loop executed in a single cycle it would take about 200 years to
993 // run all 2^64 iterations.)
994 //
995 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82776 has the bug we
996 // filed about the issue. while (true) {
997 for (std::size_t i = 1; !likelyDead || i != 0; ++i) {
998 if (checkEof) {
999 // exhausted the current chunk
1000 if (UNLIKELY(c->eof())) {
1001 FOLLY_SAFE_DCHECK(index_ == 0, "");
1002 itemPtr_ = nullptr;
1003 return;
1004 }
1005 } else {
1006 FOLLY_SAFE_DCHECK(!c->eof(), "");
1007 }
1008 --c;
1009 auto last = c->lastOccupied();
1010 if (checkEof && !likelyDead) {
1011 prefetchAddr(&*c - 1);
1012 }
1013 if (LIKELY(last.hasIndex())) {
1014 index_ = last.index();
1015 itemPtr_ = std::pointer_traits<ItemPtr>::pointer_to(c->item(index_));
1016 return;
1017 }
1018 }
1019 }
1020
1021 void precheckedAdvance() {
1022 advanceImpl(false, false);
1023 }
1024
1025 FOLLY_ALWAYS_INLINE void advance() {
1026 advanceImpl(true, false);
1027 }
1028
1029 FOLLY_ALWAYS_INLINE void advanceLikelyDead() {
1030 advanceImpl(true, true);
1031 }
1032
1033 ChunkPtr chunk() const {
1034 return std::pointer_traits<ChunkPtr>::pointer_to(
1035 Chunk::owner(*itemPtr_, index_));
1036 }
1037
1038 std::size_t index() const {
1039 return index_;
1040 }
1041
1042 Item* itemAddr() const {
1043 return std::addressof(*itemPtr_);
1044 }
1045 Item& item() const {
1046 return *itemPtr_;
1047 }
1048 Item const& citem() const {
1049 return *itemPtr_;
1050 }
1051
1052 bool atEnd() const {
1053 return itemPtr_ == nullptr;
1054 }
1055
1056 Packed pack() const {
1057 return Packed{itemPtr_, static_cast<uint8_t>(index_)};
1058 }
1059
1060 bool operator==(F14ItemIter const& rhs) const {
1061 // this form makes iter == end() into a single null check after inlining
1062 // and constant propagation
1063 return itemPtr_ == rhs.itemPtr_;
1064 }
1065
1066 bool operator!=(F14ItemIter const& rhs) const {
1067 return !(*this == rhs);
1068 }
1069
1070 private:
1071 ItemPtr itemPtr_;
1072 std::size_t index_;
1073};
1074
1075////////////////
1076
1077template <typename SizeType, typename ItemIter, bool EnablePackedItemIter>
1078struct SizeAndPackedBegin {
1079 SizeType size_{0};
1080
1081 private:
1082 typename ItemIter::Packed packedBegin_{ItemIter{}.pack()};
1083
1084 public:
1085 typename ItemIter::Packed& packedBegin() {
1086 return packedBegin_;
1087 }
1088
1089 typename ItemIter::Packed const& packedBegin() const {
1090 return packedBegin_;
1091 }
1092};
1093
1094template <typename SizeType, typename ItemIter>
1095struct SizeAndPackedBegin<SizeType, ItemIter, false> {
1096 SizeType size_{0};
1097
1098 [[noreturn]] typename ItemIter::Packed& packedBegin() {
1099 assume_unreachable();
1100 }
1101
1102 [[noreturn]] typename ItemIter::Packed const& packedBegin() const {
1103 assume_unreachable();
1104 }
1105};
1106
1107template <typename Policy>
1108class F14Table : public Policy {
1109 public:
1110 using Item = typename Policy::Item;
1111
1112 using value_type = typename Policy::Value;
1113 using allocator_type = typename Policy::Alloc;
1114
1115 private:
1116 using Alloc = typename Policy::Alloc;
1117 using AllocTraits = typename Policy::AllocTraits;
1118 using Hasher = typename Policy::Hasher;
1119 using InternalSizeType = typename Policy::InternalSizeType;
1120 using KeyEqual = typename Policy::KeyEqual;
1121
1122 using Policy::kAllocIsAlwaysEqual;
1123 using Policy::kContinuousCapacity;
1124 using Policy::kDefaultConstructIsNoexcept;
1125 using Policy::kEnableItemIteration;
1126 using Policy::kSwapIsNoexcept;
1127
1128 using Policy::destroyItemOnClear;
1129 using Policy::isAvalanchingHasher;
1130 using Policy::prefetchBeforeCopy;
1131 using Policy::prefetchBeforeDestroy;
1132 using Policy::prefetchBeforeRehash;
1133
1134 using ByteAlloc = typename AllocTraits::template rebind_alloc<uint8_t>;
1135 using BytePtr = typename std::allocator_traits<ByteAlloc>::pointer;
1136
1137 using Chunk = F14Chunk<Item>;
1138 using ChunkPtr =
1139 typename std::pointer_traits<BytePtr>::template rebind<Chunk>;
1140
1141 using HashPair = typename F14HashToken::HashPair;
1142
1143 public:
1144 using ItemIter = F14ItemIter<ChunkPtr>;
1145
1146 private:
1147 //////// begin fields
1148
1149 ChunkPtr chunks_{Chunk::emptyInstance()};
1150 InternalSizeType chunkMask_{0};
1151 SizeAndPackedBegin<InternalSizeType, ItemIter, kEnableItemIteration>
1152 sizeAndPackedBegin_;
1153
1154 //////// end fields
1155
1156 void swapContents(F14Table& rhs) noexcept {
1157 using std::swap;
1158 swap(chunks_, rhs.chunks_);
1159 swap(chunkMask_, rhs.chunkMask_);
1160 swap(sizeAndPackedBegin_.size_, rhs.sizeAndPackedBegin_.size_);
1161 if (kEnableItemIteration) {
1162 swap(
1163 sizeAndPackedBegin_.packedBegin(),
1164 rhs.sizeAndPackedBegin_.packedBegin());
1165 }
1166 }
1167
1168 public:
1169 F14Table(
1170 std::size_t initialCapacity,
1171 Hasher const& hasher,
1172 KeyEqual const& keyEqual,
1173 Alloc const& alloc)
1174 : Policy{hasher, keyEqual, alloc} {
1175 if (initialCapacity > 0) {
1176 reserve(initialCapacity);
1177 }
1178 }
1179
1180 F14Table(F14Table const& rhs) : Policy{rhs} {
1181 buildFromF14Table(rhs);
1182 }
1183
1184 F14Table(F14Table const& rhs, Alloc const& alloc) : Policy{rhs, alloc} {
1185 buildFromF14Table(rhs);
1186 }
1187
1188 F14Table(F14Table&& rhs) noexcept(
1189 std::is_nothrow_move_constructible<Hasher>::value&&
1190 std::is_nothrow_move_constructible<KeyEqual>::value&&
1191 std::is_nothrow_move_constructible<Alloc>::value)
1192 : Policy{std::move(rhs)} {
1193 swapContents(rhs);
1194 }
1195
1196 F14Table(F14Table&& rhs, Alloc const& alloc) noexcept(kAllocIsAlwaysEqual)
1197 : Policy{std::move(rhs), alloc} {
1198 if (kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
1199 // move storage (common case)
1200 swapContents(rhs);
1201 } else {
1202 // new storage because allocators unequal, move values (rare case)
1203 buildFromF14Table(std::move(rhs));
1204 }
1205 }
1206
1207 F14Table& operator=(F14Table const& rhs) {
1208 if (this != &rhs) {
1209 reset();
1210 static_cast<Policy&>(*this) = rhs;
1211 buildFromF14Table(rhs);
1212 }
1213 return *this;
1214 }
1215
1216 F14Table& operator=(F14Table&& rhs) noexcept(
1217 std::is_nothrow_move_assignable<Hasher>::value&&
1218 std::is_nothrow_move_assignable<KeyEqual>::value &&
1219 (kAllocIsAlwaysEqual ||
1220 (AllocTraits::propagate_on_container_move_assignment::value &&
1221 std::is_nothrow_move_assignable<Alloc>::value))) {
1222 if (this != &rhs) {
1223 reset();
1224 static_cast<Policy&>(*this) = std::move(rhs);
1225 if (AllocTraits::propagate_on_container_move_assignment::value ||
1226 kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
1227 // move storage (common case)
1228 swapContents(rhs);
1229 } else {
1230 // new storage because allocators unequal, move values (rare case)
1231 buildFromF14Table(std::move(rhs));
1232 }
1233 }
1234 return *this;
1235 }
1236
1237 ~F14Table() {
1238 reset();
1239 }
1240
1241 void swap(F14Table& rhs) noexcept(kSwapIsNoexcept) {
1242 // If propagate_on_container_swap is false and allocators are
1243 // not equal, the only way to accomplish a swap would be to do
1244 // dynamic allocation and then move (or swap) each contained value.
1245 // AllocatorAwareContainer-s are not supposed to attempt this, but
1246 // rather are supposed to have undefined behavior in that case.
1247 FOLLY_SAFE_CHECK(
1248 AllocTraits::propagate_on_container_swap::value ||
1249 kAllocIsAlwaysEqual || this->alloc() == rhs.alloc(),
1250 "swap is undefined for unequal non-propagating allocators");
1251 this->swapPolicy(rhs);
1252 swapContents(rhs);
1253 }
1254
1255 private:
1256 //////// hash helpers
1257
1258 // Hash values are used to compute the desired position, which is the
1259 // chunk index at which we would like to place a value (if there is no
1260 // overflow), and the tag, which is an additional 8 bits of entropy.
1261 //
1262 // The standard's definition of hash function quality only refers to
1263 // the probability of collisions of the entire hash value, not to the
1264 // probability of collisions of the results of shifting or masking the
1265 // hash value. Some hash functions, however, provide this stronger
1266 // guarantee (not quite the same as the definition of avalanching,
1267 // but similar).
1268 //
1269 // If the user-supplied hasher is an avalanching one (each bit of the
1270 // hash value has a 50% chance of being the same for differing hash
1271 // inputs), then we can just take 1 byte of the hash value for the tag
1272 // and the rest for the desired position. Avalanching hashers also
1273 // let us map hash value to array index position with just a bitmask
1274 // without risking clumping. (Many hash tables just accept the risk
1275 // and do it regardless.)
1276 //
1277 // std::hash<std::string> avalanches in all implementations we've
1278 // examined: libstdc++-v3 uses MurmurHash2, and libc++ uses CityHash
1279 // or MurmurHash2. The other std::hash specializations, however, do not
1280 // have this property. std::hash for integral and pointer values is the
1281 // identity function on libstdc++-v3 and libc++, in particular. In our
1282 // experience it is also fairly common for user-defined specializations
1283 // of std::hash to combine fields in an ad-hoc way that does not evenly
1284 // distribute entropy among the bits of the result (a + 37 * b, for
1285 // example, where a and b are integer fields).
1286 //
1287 // For hash functions we don't trust to avalanche, we repair things by
1288 // applying a bit mixer to the user-supplied hash.
1289
1290#if FOLLY_X64 || FOLLY_AARCH64
1291 // 64-bit
1292 static HashPair splitHash(std::size_t hash) {
1293 static_assert(sizeof(std::size_t) == sizeof(uint64_t), "");
1294 std::size_t tag;
1295 if (!isAvalanchingHasher()) {
1296#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
1297#if FOLLY_SSE
1298 // SSE4.2 CRC
1299 std::size_t c = _mm_crc32_u64(0, hash);
1300 tag = (c >> 24) | 0x80;
1301 hash += c;
1302#else
1303 // CRC is optional on armv8 (-march=armv8-a+crc), standard on armv8.1
1304 std::size_t c = __crc32cd(0, hash);
1305 tag = (c >> 24) | 0x80;
1306 hash += c;
1307#endif
1308#else
1309 // The mixer below is not fully avalanching for all 64 bits of
1310 // output, but looks quite good for bits 18..63 and puts plenty
1311 // of entropy even lower when considering multiple bits together
1312 // (like the tag). Importantly, when under register pressure it
1313 // uses fewer registers, instructions, and immediate constants
1314 // than the alternatives, resulting in compact code that is more
1315 // easily inlinable. In one instantiation a modified Murmur mixer
1316 // was 48 bytes of assembly (even after using the same multiplicand
1317 // for both steps) and this one was 27 bytes, for example.
1318 auto const kMul = 0xc4ceb9fe1a85ec53ULL;
1319#ifdef _WIN32
1320 __int64 signedHi;
1321 __int64 signedLo = _mul128(
1322 static_cast<__int64>(hash), static_cast<__int64>(kMul), &signedHi);
1323 auto hi = static_cast<uint64_t>(signedHi);
1324 auto lo = static_cast<uint64_t>(signedLo);
1325#else
1326 auto hi = static_cast<uint64_t>(
1327 (static_cast<unsigned __int128>(hash) * kMul) >> 64);
1328 auto lo = hash * kMul;
1329#endif
1330 hash = hi ^ lo;
1331 hash *= kMul;
1332 tag = ((hash >> 15) & 0x7f) | 0x80;
1333 hash >>= 22;
1334#endif
1335 } else {
1336 // we don't trust the top bit
1337 tag = (hash >> 56) | 0x80;
1338 }
1339 return std::make_pair(hash, tag);
1340 }
1341#else
1342 // 32-bit
1343 static HashPair splitHash(std::size_t hash) {
1344 static_assert(sizeof(std::size_t) == sizeof(uint32_t), "");
1345 uint8_t tag;
1346 if (!isAvalanchingHasher()) {
1347#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
1348#if FOLLY_SSE
1349 // SSE4.2 CRC
1350 auto c = _mm_crc32_u32(0, hash);
1351 tag = static_cast<uint8_t>(~(c >> 25));
1352 hash += c;
1353#else
1354 auto c = __crc32cw(0, hash);
1355 tag = static_cast<uint8_t>(~(c >> 25));
1356 hash += c;
1357#endif
1358#else
1359 // finalizer for 32-bit murmur2
1360 hash ^= hash >> 13;
1361 hash *= 0x5bd1e995;
1362 hash ^= hash >> 15;
1363 tag = static_cast<uint8_t>(~(hash >> 25));
1364#endif
1365 } else {
1366 // we don't trust the top bit
1367 tag = (hash >> 24) | 0x80;
1368 }
1369 return std::make_pair(hash, tag);
1370 }
1371#endif
1372
1373 //////// memory management helpers
1374
1375 static std::size_t computeCapacity(
1376 std::size_t chunkCount,
1377 std::size_t scale) {
1378 FOLLY_SAFE_DCHECK(!(chunkCount > 1 && scale == 0), "");
1379 FOLLY_SAFE_DCHECK(
1380 scale < (std::size_t{1} << Chunk::kCapacityScaleBits), "");
1381 FOLLY_SAFE_DCHECK((chunkCount & (chunkCount - 1)) == 0, "");
1382 return (((chunkCount - 1) >> Chunk::kCapacityScaleShift) + 1) * scale;
1383 }
1384
1385 std::pair<std::size_t, std::size_t> computeChunkCountAndScale(
1386 std::size_t desiredCapacity,
1387 bool continuousSingleChunkCapacity,
1388 bool continuousMultiChunkCapacity) const {
1389 if (desiredCapacity <= Chunk::kCapacity) {
1390 // we can go to 100% capacity in a single chunk with no problem
1391 if (!continuousSingleChunkCapacity) {
1392 if (desiredCapacity <= 2) {
1393 desiredCapacity = 2;
1394 } else if (desiredCapacity <= 6) {
1395 desiredCapacity = 6;
1396 } else {
1397 desiredCapacity = Chunk::kCapacity;
1398 }
1399 }
1400 auto rv = std::make_pair(std::size_t{1}, desiredCapacity);
1401 FOLLY_SAFE_DCHECK(
1402 computeCapacity(rv.first, rv.second) == desiredCapacity, "");
1403 return rv;
1404 } else {
1405 std::size_t minChunks =
1406 (desiredCapacity - 1) / Chunk::kDesiredCapacity + 1;
1407 std::size_t chunkPow = findLastSet(minChunks - 1);
1408 if (chunkPow == 8 * sizeof(std::size_t)) {
1409 throw_exception<std::bad_alloc>();
1410 }
1411
1412 std::size_t chunkCount = std::size_t{1} << chunkPow;
1413
1414 // Let cc * scale be the actual capacity.
1415 // cc = ((chunkCount - 1) >> kCapacityScaleShift) + 1.
1416 // If chunkPow >= kCapacityScaleShift, then cc = chunkCount >>
1417 // kCapacityScaleShift = 1 << (chunkPow - kCapacityScaleShift),
1418 // otherwise it equals 1 = 1 << 0. Let cc = 1 << ss.
1419 std::size_t ss = chunkPow >= Chunk::kCapacityScaleShift
1420 ? chunkPow - Chunk::kCapacityScaleShift
1421 : 0;
1422
1423 std::size_t scale;
1424 if (continuousMultiChunkCapacity) {
1425 // (1 << ss) * scale >= desiredCapacity
1426 scale = ((desiredCapacity - 1) >> ss) + 1;
1427 } else {
1428 // (1 << ss) * scale == chunkCount * kDesiredCapacity
1429 scale = Chunk::kDesiredCapacity << (chunkPow - ss);
1430 }
1431
1432 std::size_t actualCapacity = computeCapacity(chunkCount, scale);
1433 FOLLY_SAFE_DCHECK(actualCapacity >= desiredCapacity, "");
1434 if (actualCapacity > max_size()) {
1435 throw_exception<std::bad_alloc>();
1436 }
1437
1438 return std::make_pair(chunkCount, scale);
1439 }
1440 }
1441
1442 static std::size_t chunkAllocSize(
1443 std::size_t chunkCount,
1444 std::size_t capacityScale) {
1445 FOLLY_SAFE_DCHECK(chunkCount > 0, "");
1446 FOLLY_SAFE_DCHECK(!(chunkCount > 1 && capacityScale == 0), "");
1447 if (chunkCount == 1) {
1448 static_assert(offsetof(Chunk, rawItems_) == 16, "");
1449 return 16 + sizeof(Item) * computeCapacity(1, capacityScale);
1450 } else {
1451 return sizeof(Chunk) * chunkCount;
1452 }
1453 }
1454
1455 ChunkPtr initializeChunks(
1456 BytePtr raw,
1457 std::size_t chunkCount,
1458 std::size_t capacityScale) {
1459 static_assert(std::is_trivial<Chunk>::value, "F14Chunk should be POD");
1460 auto chunks = static_cast<Chunk*>(static_cast<void*>(&*raw));
1461 for (std::size_t i = 0; i < chunkCount; ++i) {
1462 chunks[i].clear();
1463 }
1464 chunks[0].markEof(capacityScale);
1465 return std::pointer_traits<ChunkPtr>::pointer_to(*chunks);
1466 }
1467
1468 std::size_t itemCount() const noexcept {
1469 if (chunkMask_ == 0) {
1470 return computeCapacity(1, chunks_->capacityScale());
1471 } else {
1472 return (chunkMask_ + 1) * Chunk::kCapacity;
1473 }
1474 }
1475
1476 public:
1477 ItemIter begin() const noexcept {
1478 FOLLY_SAFE_DCHECK(kEnableItemIteration, "");
1479 return ItemIter{sizeAndPackedBegin_.packedBegin()};
1480 }
1481
1482 ItemIter end() const noexcept {
1483 return ItemIter{};
1484 }
1485
1486 bool empty() const noexcept {
1487 return size() == 0;
1488 }
1489
1490 InternalSizeType size() const noexcept {
1491 return sizeAndPackedBegin_.size_;
1492 }
1493
1494 std::size_t max_size() const noexcept {
1495 auto& a = this->alloc();
1496 return std::min<std::size_t>(
1497 (std::numeric_limits<InternalSizeType>::max)(),
1498 AllocTraits::max_size(a));
1499 }
1500
1501 std::size_t bucket_count() const noexcept {
1502 return computeCapacity(chunkMask_ + 1, chunks_->capacityScale());
1503 }
1504
1505 std::size_t max_bucket_count() const noexcept {
1506 return max_size();
1507 }
1508
1509 float load_factor() const noexcept {
1510 return empty()
1511 ? 0.0f
1512 : static_cast<float>(size()) / static_cast<float>(bucket_count());
1513 }
1514
1515 float max_load_factor() const noexcept {
1516 return 1.0f;
1517 }
1518
1519 void max_load_factor(float) noexcept {
1520 // Probing hash tables can't run load factors >= 1 (unlike chaining
1521 // tables). In addition, we have measured that there is little or
1522 // no performance advantage to running a smaller load factor (cache
1523 // locality losses outweigh the small reduction in probe lengths,
1524 // often making it slower). Therefore, we've decided to just fix
1525 // max_load_factor at 1.0f regardless of what the user requests.
1526 // This has an additional advantage that we don't have to store it.
1527 // Taking alignment into consideration this makes every F14 table
1528 // 8 bytes smaller, and is part of the reason an empty F14NodeMap
1529 // is almost half the size of an empty std::unordered_map (32 vs
1530 // 56 bytes).
1531 //
1532 // I don't have a strong opinion on whether we should remove this
1533 // method or leave a stub, let ngbronson or xshi know if you have a
1534 // compelling argument either way.
1535 }
1536
1537 private:
1538 // Our probe strategy is to advance through additional chunks with
1539 // a stride that is key-specific. This is called double hashing,
1540 // and is a well known and high quality probing strategy. So long as
1541 // the stride and the chunk count are relatively prime, we will visit
1542 // every chunk once and then return to the original chunk, letting us
1543 // detect and end the cycle. The chunk count is a power of two, so
1544 // we can satisfy the relatively prime part by choosing an odd stride.
1545 // We've already computed a high quality secondary hash value for the
1546 // tag, so we just use it for the second probe hash as well.
1547 //
1548 // At the maximum load factor of 12/14, expected probe length for a
1549 // find hit is 1.041, with 99% of keys found in the first three chunks.
1550 // Expected probe length for a find miss (or insert) is 1.275, with a
1551 // p99 probe length of 4 (fewer than 1% of failing find look at 5 or
1552 // more chunks).
1553 //
1554 // This code is structured so you can try various ways of encoding
1555 // the current probe state. For example, at the moment the probe's
1556 // state is the position in the cycle and the resulting chunk index is
1557 // computed from that inside probeCurrentIndex. We could also make the
1558 // probe state the chunk index, and then increment it by hp.second *
1559 // 2 + 1 in probeAdvance. Wrapping can be applied early or late as
1560 // well. This particular code seems to be easier for the optimizer
1561 // to understand.
1562 //
1563 // We could also implement probing strategies that resulted in the same
1564 // tour for every key initially assigned to a chunk (linear probing or
1565 // quadratic), but that results in longer probe lengths. In particular,
1566 // the cache locality wins of linear probing are not worth the increase
1567 // in probe lengths (extra work and less branch predictability) in
1568 // our experiments.
1569
1570 std::size_t probeDelta(HashPair hp) const {
1571 return 2 * hp.second + 1;
1572 }
1573
1574 template <typename K>
1575 FOLLY_ALWAYS_INLINE ItemIter findImpl(HashPair hp, K const& key) const {
1576 std::size_t index = hp.first;
1577 std::size_t step = probeDelta(hp);
1578 for (std::size_t tries = 0; tries <= chunkMask_; ++tries) {
1579 ChunkPtr chunk = chunks_ + (index & chunkMask_);
1580 if (sizeof(Chunk) > 64) {
1581 prefetchAddr(chunk->itemAddr(8));
1582 }
1583 auto hits = chunk->tagMatchIter(hp.second);
1584 while (hits.hasNext()) {
1585 auto i = hits.next();
1586 if (LIKELY(this->keyMatchesItem(key, chunk->item(i)))) {
1587 // Tag match and key match were both successful. The chance
1588 // of a false tag match is 1/128 for each key in the chunk
1589 // (with a proper hash function).
1590 return ItemIter{chunk, i};
1591 }
1592 }
1593 if (LIKELY(chunk->outboundOverflowCount() == 0)) {
1594 // No keys that wanted to be placed in this chunk were denied
1595 // entry, so our search is over. This is the common case.
1596 break;
1597 }
1598 index += step;
1599 }
1600 // Loop exit because tries is exhausted is rare, but possible.
1601 // That means that for every chunk there is currently a key present
1602 // in the map that visited that chunk on its probe search but ended
1603 // up somewhere else, and we have searched every chunk.
1604 return ItemIter{};
1605 }
1606
1607 public:
1608 // Prehashing splits the work of find(key) into two calls, enabling you
1609 // to manually implement loop pipelining for hot bulk lookups. prehash
1610 // computes the hash and prefetches the first computed memory location,
1611 // and the two-arg find(F14HashToken,K) performs the rest of the search.
1612 template <typename K>
1613 F14HashToken prehash(K const& key) const {
1614 FOLLY_SAFE_DCHECK(chunks_ != nullptr, "");
1615 auto hp = splitHash(this->computeKeyHash(key));
1616 ChunkPtr firstChunk = chunks_ + (hp.first & chunkMask_);
1617 prefetchAddr(firstChunk);
1618 return F14HashToken(std::move(hp));
1619 }
1620
1621 template <typename K>
1622 FOLLY_ALWAYS_INLINE ItemIter find(K const& key) const {
1623 auto hp = splitHash(this->computeKeyHash(key));
1624 return findImpl(hp, key);
1625 }
1626
1627 template <typename K>
1628 FOLLY_ALWAYS_INLINE ItemIter
1629 find(F14HashToken const& token, K const& key) const {
1630 FOLLY_SAFE_DCHECK(
1631 splitHash(this->computeKeyHash(key)) == static_cast<HashPair>(token),
1632 "");
1633 return findImpl(static_cast<HashPair>(token), key);
1634 }
1635
1636 private:
1637 void adjustSizeAndBeginAfterInsert(ItemIter iter) {
1638 if (kEnableItemIteration) {
1639 // packedBegin is the max of all valid ItemIter::pack()
1640 auto packed = iter.pack();
1641 if (sizeAndPackedBegin_.packedBegin() < packed) {
1642 sizeAndPackedBegin_.packedBegin() = packed;
1643 }
1644 }
1645
1646 ++sizeAndPackedBegin_.size_;
1647 }
1648
1649 // Ignores hp if pos.chunk()->hostedOverflowCount() == 0
1650 void eraseBlank(ItemIter iter, HashPair hp) {
1651 iter.chunk()->clearTag(iter.index());
1652
1653 if (iter.chunk()->hostedOverflowCount() != 0) {
1654 // clean up
1655 std::size_t index = hp.first;
1656 std::size_t delta = probeDelta(hp);
1657 uint8_t hostedOp = 0;
1658 while (true) {
1659 ChunkPtr chunk = chunks_ + (index & chunkMask_);
1660 if (chunk == iter.chunk()) {
1661 chunk->adjustHostedOverflowCount(hostedOp);
1662 break;
1663 }
1664 chunk->decrOutboundOverflowCount();
1665 hostedOp = Chunk::kDecrHostedOverflowCount;
1666 index += delta;
1667 }
1668 }
1669 }
1670
1671 void adjustSizeAndBeginBeforeErase(ItemIter iter) {
1672 --sizeAndPackedBegin_.size_;
1673 if (kEnableItemIteration) {
1674 if (iter.pack() == sizeAndPackedBegin_.packedBegin()) {
1675 if (size() == 0) {
1676 iter = ItemIter{};
1677 } else {
1678 iter.precheckedAdvance();
1679 }
1680 sizeAndPackedBegin_.packedBegin() = iter.pack();
1681 }
1682 }
1683 }
1684
1685 template <typename... Args>
1686 void insertAtBlank(ItemIter pos, HashPair hp, Args&&... args) {
1687 try {
1688 auto dst = pos.itemAddr();
1689 this->constructValueAtItem(*this, dst, std::forward<Args>(args)...);
1690 } catch (...) {
1691 eraseBlank(pos, hp);
1692 throw;
1693 }
1694 adjustSizeAndBeginAfterInsert(pos);
1695 }
1696
1697 ItemIter allocateTag(uint8_t* fullness, HashPair hp) {
1698 ChunkPtr chunk;
1699 std::size_t index = hp.first;
1700 std::size_t delta = probeDelta(hp);
1701 uint8_t hostedOp = 0;
1702 while (true) {
1703 index &= chunkMask_;
1704 chunk = chunks_ + index;
1705 if (LIKELY(fullness[index] < Chunk::kCapacity)) {
1706 break;
1707 }
1708 chunk->incrOutboundOverflowCount();
1709 hostedOp = Chunk::kIncrHostedOverflowCount;
1710 index += delta;
1711 }
1712 unsigned itemIndex = fullness[index]++;
1713 FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
1714 chunk->setTag(itemIndex, hp.second);
1715 chunk->adjustHostedOverflowCount(hostedOp);
1716 return ItemIter{chunk, itemIndex};
1717 }
1718
1719 ChunkPtr lastOccupiedChunk() const {
1720 FOLLY_SAFE_DCHECK(size() > 0, "");
1721 if (kEnableItemIteration) {
1722 return begin().chunk();
1723 } else {
1724 return chunks_ + chunkMask_;
1725 }
1726 }
1727
1728 template <typename T>
1729 void directBuildFrom(T&& src) {
1730 FOLLY_SAFE_DCHECK(src.size() > 0 && chunkMask_ == src.chunkMask_, "");
1731
1732 // We use std::forward<T> to allow portions of src to be moved out by
1733 // either beforeBuild or afterBuild, but we are just relying on good
1734 // behavior of our Policy superclass to ensure that any particular
1735 // field of this is a donor at most once.
1736
1737 auto undoState =
1738 this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
1739 bool success = false;
1740 SCOPE_EXIT {
1741 this->afterBuild(
1742 undoState, success, src.size(), bucket_count(), std::forward<T>(src));
1743 };
1744
1745 // Copy can fail part-way through if a Value copy constructor throws.
1746 // Failing afterBuild is limited in its cleanup power in this case,
1747 // because it can't enumerate the items that were actually copied.
1748 // Fortunately we can divide the situation into cases where all of
1749 // the state is owned by the table itself (F14Node and F14Value),
1750 // for which clearImpl() can do partial cleanup, and cases where all
1751 // of the values are owned by the policy (F14Vector), in which case
1752 // partial failure should not occur. Sorry for the subtle invariants
1753 // in the Policy API.
1754
1755 if (is_trivially_copyable<Item>::value && !this->destroyItemOnClear() &&
1756 itemCount() == src.itemCount()) {
1757 FOLLY_SAFE_DCHECK(chunkMask_ == src.chunkMask_, "");
1758
1759 auto scale = chunks_->capacityScale();
1760
1761 // most happy path
1762 auto n = chunkAllocSize(chunkMask_ + 1, scale);
1763 std::memcpy(&chunks_[0], &src.chunks_[0], n);
1764 sizeAndPackedBegin_.size_ = src.size();
1765 if (kEnableItemIteration) {
1766 auto srcBegin = src.begin();
1767 sizeAndPackedBegin_.packedBegin() =
1768 ItemIter{chunks_ + (srcBegin.chunk() - src.chunks_),
1769 srcBegin.index()}
1770 .pack();
1771 }
1772 if (kContinuousCapacity) {
1773 // capacityScale might not match even if itemCount matches
1774 chunks_->setCapacityScale(scale);
1775 }
1776 } else {
1777 std::size_t maxChunkIndex = src.lastOccupiedChunk() - src.chunks_;
1778
1779 // happy path, no rehash but pack items toward bottom of chunk and
1780 // use copy constructor
1781 auto srcChunk = &src.chunks_[maxChunkIndex];
1782 Chunk* dstChunk = &chunks_[maxChunkIndex];
1783 do {
1784 dstChunk->copyOverflowInfoFrom(*srcChunk);
1785
1786 auto iter = srcChunk->occupiedIter();
1787 if (prefetchBeforeCopy()) {
1788 for (auto piter = iter; piter.hasNext();) {
1789 this->prefetchValue(srcChunk->citem(piter.next()));
1790 }
1791 }
1792
1793 std::size_t dstI = 0;
1794 for (; iter.hasNext(); ++dstI) {
1795 auto srcI = iter.next();
1796 auto&& srcArg =
1797 std::forward<T>(src).buildArgForItem(srcChunk->item(srcI));
1798 auto dst = dstChunk->itemAddr(dstI);
1799 this->constructValueAtItem(
1800 0, dst, std::forward<decltype(srcArg)>(srcArg));
1801 dstChunk->setTag(dstI, srcChunk->tag(srcI));
1802 ++sizeAndPackedBegin_.size_;
1803 }
1804
1805 --srcChunk;
1806 --dstChunk;
1807 } while (size() != src.size());
1808
1809 // reset doesn't care about packedBegin, so we don't fix it until the end
1810 if (kEnableItemIteration) {
1811 sizeAndPackedBegin_.packedBegin() =
1812 ItemIter{chunks_ + maxChunkIndex,
1813 chunks_[maxChunkIndex].lastOccupied().index()}
1814 .pack();
1815 }
1816 }
1817
1818 success = true;
1819 }
1820
1821 template <typename T>
1822 void rehashBuildFrom(T&& src) {
1823 FOLLY_SAFE_DCHECK(src.chunkMask_ > chunkMask_, "");
1824
1825 // 1 byte per chunk means < 1 bit per value temporary overhead
1826 std::array<uint8_t, 256> stackBuf;
1827 uint8_t* fullness;
1828 auto cc = chunkMask_ + 1;
1829 if (cc <= stackBuf.size()) {
1830 fullness = stackBuf.data();
1831 } else {
1832 ByteAlloc a{this->alloc()};
1833 fullness = &*std::allocator_traits<ByteAlloc>::allocate(a, cc);
1834 }
1835 SCOPE_EXIT {
1836 if (cc > stackBuf.size()) {
1837 ByteAlloc a{this->alloc()};
1838 std::allocator_traits<ByteAlloc>::deallocate(
1839 a,
1840 std::pointer_traits<typename std::allocator_traits<
1841 ByteAlloc>::pointer>::pointer_to(*fullness),
1842 cc);
1843 }
1844 };
1845 std::memset(fullness, '\0', cc);
1846
1847 // We use std::forward<T> to allow portions of src to be moved out by
1848 // either beforeBuild or afterBuild, but we are just relying on good
1849 // behavior of our Policy superclass to ensure that any particular
1850 // field of this is a donor at most once.
1851
1852 // Exception safety requires beforeBuild to happen after all of the
1853 // allocate() calls.
1854 auto undoState =
1855 this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
1856 bool success = false;
1857 SCOPE_EXIT {
1858 this->afterBuild(
1859 undoState, success, src.size(), bucket_count(), std::forward<T>(src));
1860 };
1861
1862 // The current table is at a valid state at all points for policies
1863 // in which non-trivial values are owned by the main table (F14Node
1864 // and F14Value), so reset() will clean things up properly if we
1865 // fail partway through. For the case that the policy manages value
1866 // lifecycle (F14Vector) then nothing after beforeBuild can throw and
1867 // we don't have to worry about partial failure.
1868
1869 std::size_t srcChunkIndex = src.lastOccupiedChunk() - src.chunks_;
1870 while (true) {
1871 auto srcChunk = &src.chunks_[srcChunkIndex];
1872 auto iter = srcChunk->occupiedIter();
1873 if (prefetchBeforeRehash()) {
1874 for (auto piter = iter; piter.hasNext();) {
1875 this->prefetchValue(srcChunk->item(piter.next()));
1876 }
1877 }
1878 if (srcChunk->hostedOverflowCount() == 0) {
1879 // all items are in their preferred chunk (no probing), so we
1880 // don't need to compute any hash values
1881 while (iter.hasNext()) {
1882 auto i = iter.next();
1883 auto& srcItem = srcChunk->item(i);
1884 auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
1885 HashPair hp{srcChunkIndex, srcChunk->tag(i)};
1886 insertAtBlank(
1887 allocateTag(fullness, hp),
1888 hp,
1889 std::forward<decltype(srcArg)>(srcArg));
1890 }
1891 } else {
1892 // any chunk's items might be in here
1893 while (iter.hasNext()) {
1894 auto i = iter.next();
1895 auto& srcItem = srcChunk->item(i);
1896 auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
1897 auto const& srcKey = src.keyForValue(srcArg);
1898 auto hp = splitHash(this->computeKeyHash(srcKey));
1899 FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(i), "");
1900 insertAtBlank(
1901 allocateTag(fullness, hp),
1902 hp,
1903 std::forward<decltype(srcArg)>(srcArg));
1904 }
1905 }
1906 if (srcChunkIndex == 0) {
1907 break;
1908 }
1909 --srcChunkIndex;
1910 }
1911
1912 success = true;
1913 }
1914
1915 template <typename T>
1916 FOLLY_NOINLINE void buildFromF14Table(T&& src) {
1917 FOLLY_SAFE_DCHECK(bucket_count() == 0, "");
1918 if (src.size() == 0) {
1919 return;
1920 }
1921
1922 // Use the source's capacity, unless it is oversized.
1923 auto upperLimit = computeChunkCountAndScale(src.size(), false, false);
1924 auto ccas =
1925 std::make_pair(src.chunkMask_ + 1, src.chunks_->capacityScale());
1926 FOLLY_SAFE_DCHECK(
1927 ccas.first >= upperLimit.first,
1928 "rounded chunk count can't be bigger than actual");
1929 if (ccas.first > upperLimit.first || ccas.second > upperLimit.second) {
1930 ccas = upperLimit;
1931 }
1932 rehashImpl(0, 1, 0, ccas.first, ccas.second);
1933
1934 try {
1935 if (chunkMask_ == src.chunkMask_) {
1936 directBuildFrom(std::forward<T>(src));
1937 } else {
1938 rehashBuildFrom(std::forward<T>(src));
1939 }
1940 } catch (...) {
1941 reset();
1942 F14LinkCheck<getF14IntrinsicsMode()>::check();
1943 throw;
1944 }
1945 }
1946
1947 void reserveImpl(std::size_t desiredCapacity) {
1948 desiredCapacity = std::max<std::size_t>(desiredCapacity, size());
1949 if (desiredCapacity == 0) {
1950 reset();
1951 return;
1952 }
1953
1954 auto origChunkCount = chunkMask_ + 1;
1955 auto origCapacityScale = chunks_->capacityScale();
1956 auto origCapacity = computeCapacity(origChunkCount, origCapacityScale);
1957
1958 // This came from an explicit reserve() or rehash() call, so there's
1959 // a good chance the capacity is exactly right. To avoid O(n^2)
1960 // behavior, we don't do rehashes that decrease the size by less
1961 // than 1/8, and if we have a requested increase of less than 1/8 we
1962 // instead go to the next power of two.
1963
1964 if (desiredCapacity <= origCapacity &&
1965 desiredCapacity >= origCapacity - origCapacity / 8) {
1966 return;
1967 }
1968 bool attemptExact =
1969 !(desiredCapacity > origCapacity &&
1970 desiredCapacity < origCapacity + origCapacity / 8);
1971
1972 std::size_t newChunkCount;
1973 std::size_t newCapacityScale;
1974 std::tie(newChunkCount, newCapacityScale) = computeChunkCountAndScale(
1975 desiredCapacity, attemptExact, kContinuousCapacity && attemptExact);
1976 auto newCapacity = computeCapacity(newChunkCount, newCapacityScale);
1977
1978 if (origCapacity != newCapacity) {
1979 rehashImpl(
1980 size(),
1981 origChunkCount,
1982 origCapacityScale,
1983 newChunkCount,
1984 newCapacityScale);
1985 }
1986 }
1987
1988 FOLLY_NOINLINE void reserveForInsertImpl(
1989 std::size_t capacityMinusOne,
1990 std::size_t origChunkCount,
1991 std::size_t origCapacityScale,
1992 std::size_t origCapacity) {
1993 FOLLY_SAFE_DCHECK(capacityMinusOne >= size(), "");
1994 std::size_t capacity = capacityMinusOne + 1;
1995
1996 // we want to grow by between 2^0.5 and 2^1.5 ending at a "good"
1997 // size, so we grow by 2^0.5 and then round up
1998
1999 // 1.01101_2 = 1.40625
2000 std::size_t minGrowth = origCapacity + (origCapacity >> 2) +
2001 (origCapacity >> 3) + (origCapacity >> 5);
2002 capacity = std::max<std::size_t>(capacity, minGrowth);
2003
2004 std::size_t newChunkCount;
2005 std::size_t newCapacityScale;
2006 std::tie(newChunkCount, newCapacityScale) =
2007 computeChunkCountAndScale(capacity, false, false);
2008
2009 FOLLY_SAFE_DCHECK(
2010 computeCapacity(newChunkCount, newCapacityScale) > origCapacity, "");
2011
2012 rehashImpl(
2013 size(),
2014 origChunkCount,
2015 origCapacityScale,
2016 newChunkCount,
2017 newCapacityScale);
2018 }
2019
2020 void rehashImpl(
2021 std::size_t origSize,
2022 std::size_t origChunkCount,
2023 std::size_t origCapacityScale,
2024 std::size_t newChunkCount,
2025 std::size_t newCapacityScale) {
2026 auto origChunks = chunks_;
2027 auto origCapacity = computeCapacity(origChunkCount, origCapacityScale);
2028 auto origAllocSize = chunkAllocSize(origChunkCount, origCapacityScale);
2029 auto newCapacity = computeCapacity(newChunkCount, newCapacityScale);
2030 auto newAllocSize = chunkAllocSize(newChunkCount, newCapacityScale);
2031
2032 BytePtr rawAllocation;
2033 auto undoState = this->beforeRehash(
2034 origSize, origCapacity, newCapacity, newAllocSize, rawAllocation);
2035 chunks_ = initializeChunks(rawAllocation, newChunkCount, newCapacityScale);
2036
2037 FOLLY_SAFE_DCHECK(
2038 newChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
2039 chunkMask_ = static_cast<InternalSizeType>(newChunkCount - 1);
2040
2041 bool success = false;
2042 SCOPE_EXIT {
2043 // this SCOPE_EXIT reverts chunks_ and chunkMask_ if necessary
2044 BytePtr finishedRawAllocation = nullptr;
2045 std::size_t finishedAllocSize = 0;
2046 if (LIKELY(success)) {
2047 if (origCapacity > 0) {
2048 finishedRawAllocation = std::pointer_traits<BytePtr>::pointer_to(
2049 *static_cast<uint8_t*>(static_cast<void*>(&*origChunks)));
2050 finishedAllocSize = origAllocSize;
2051 }
2052 } else {
2053 finishedRawAllocation = rawAllocation;
2054 finishedAllocSize = newAllocSize;
2055 chunks_ = origChunks;
2056 FOLLY_SAFE_DCHECK(
2057 origChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
2058 chunkMask_ = static_cast<InternalSizeType>(origChunkCount - 1);
2059 F14LinkCheck<getF14IntrinsicsMode()>::check();
2060 }
2061
2062 this->afterRehash(
2063 std::move(undoState),
2064 success,
2065 origSize,
2066 origCapacity,
2067 newCapacity,
2068 finishedRawAllocation,
2069 finishedAllocSize);
2070 };
2071
2072 if (origSize == 0) {
2073 // nothing to do
2074 } else if (origChunkCount == 1 && newChunkCount == 1) {
2075 // no mask, no chunk scan, no hash computation, no probing
2076 auto srcChunk = origChunks;
2077 auto dstChunk = chunks_;
2078 std::size_t srcI = 0;
2079 std::size_t dstI = 0;
2080 while (dstI < origSize) {
2081 if (LIKELY(srcChunk->occupied(srcI))) {
2082 dstChunk->setTag(dstI, srcChunk->tag(srcI));
2083 this->moveItemDuringRehash(
2084 dstChunk->itemAddr(dstI), srcChunk->item(srcI));
2085 ++dstI;
2086 }
2087 ++srcI;
2088 }
2089 if (kEnableItemIteration) {
2090 sizeAndPackedBegin_.packedBegin() = ItemIter{dstChunk, dstI - 1}.pack();
2091 }
2092 } else {
2093 // 1 byte per chunk means < 1 bit per value temporary overhead
2094 std::array<uint8_t, 256> stackBuf;
2095 uint8_t* fullness;
2096 if (newChunkCount <= stackBuf.size()) {
2097 fullness = stackBuf.data();
2098 } else {
2099 ByteAlloc a{this->alloc()};
2100 // may throw
2101 fullness =
2102 &*std::allocator_traits<ByteAlloc>::allocate(a, newChunkCount);
2103 }
2104 std::memset(fullness, '\0', newChunkCount);
2105 SCOPE_EXIT {
2106 if (newChunkCount > stackBuf.size()) {
2107 ByteAlloc a{this->alloc()};
2108 std::allocator_traits<ByteAlloc>::deallocate(
2109 a,
2110 std::pointer_traits<typename std::allocator_traits<
2111 ByteAlloc>::pointer>::pointer_to(*fullness),
2112 newChunkCount);
2113 }
2114 };
2115
2116 auto srcChunk = origChunks + origChunkCount - 1;
2117 std::size_t remaining = origSize;
2118 while (remaining > 0) {
2119 auto iter = srcChunk->occupiedIter();
2120 if (prefetchBeforeRehash()) {
2121 for (auto piter = iter; piter.hasNext();) {
2122 this->prefetchValue(srcChunk->item(piter.next()));
2123 }
2124 }
2125 while (iter.hasNext()) {
2126 --remaining;
2127 auto srcI = iter.next();
2128 Item& srcItem = srcChunk->item(srcI);
2129 auto hp = splitHash(
2130 this->computeItemHash(const_cast<Item const&>(srcItem)));
2131 FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(srcI), "");
2132
2133 auto dstIter = allocateTag(fullness, hp);
2134 this->moveItemDuringRehash(dstIter.itemAddr(), srcItem);
2135 }
2136 --srcChunk;
2137 }
2138
2139 if (kEnableItemIteration) {
2140 // this code replaces size invocations of adjustSizeAndBeginAfterInsert
2141 std::size_t i = chunkMask_;
2142 while (fullness[i] == 0) {
2143 --i;
2144 }
2145 sizeAndPackedBegin_.packedBegin() =
2146 ItemIter{chunks_ + i, std::size_t{fullness[i]} - 1}.pack();
2147 }
2148 }
2149
2150 success = true;
2151 }
2152
2153 // Randomization to help expose bugs when running tests in debug or
2154 // sanitizer builds
2155
2156 FOLLY_ALWAYS_INLINE void debugModeOnReserve(std::size_t capacity) {
2157 if (kIsSanitizeAddress || kIsDebug) {
2158 if (capacity > size()) {
2159 tlsPendingSafeInserts(static_cast<std::ptrdiff_t>(capacity - size()));
2160 }
2161 }
2162 }
2163
2164 void debugModeSpuriousRehash() {
2165 auto cc = chunkMask_ + 1;
2166 auto ss = chunks_->capacityScale();
2167 rehashImpl(size(), cc, ss, cc, ss);
2168 }
2169
2170 FOLLY_ALWAYS_INLINE void debugModeBeforeInsert() {
2171 // When running under ASAN, we add a spurious rehash with 1/size()
2172 // probability before every insert. This means that finding reference
2173 // stability problems for F14Value and F14Vector is much more likely.
2174 // The most common pattern that causes this is
2175 //
2176 // auto& ref = map[k1]; map[k2] = foo(ref);
2177 //
2178 // One way to fix this is to call map.reserve(N) before such a
2179 // sequence, where N is the number of keys that might be inserted
2180 // within the section that retains references plus the existing size.
2181 if (kIsSanitizeAddress && !tlsPendingSafeInserts() && size() > 0 &&
2182 tlsMinstdRand(size()) == 0) {
2183 debugModeSpuriousRehash();
2184 }
2185 }
2186
2187 FOLLY_ALWAYS_INLINE void debugModeAfterInsert() {
2188 if (kIsSanitizeAddress || kIsDebug) {
2189 tlsPendingSafeInserts(-1);
2190 }
2191 }
2192
2193 FOLLY_ALWAYS_INLINE void debugModePerturbSlotInsertOrder(
2194 ChunkPtr chunk,
2195 std::size_t& itemIndex) {
2196 FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
2197 constexpr bool perturbSlot = FOLLY_F14_PERTURB_INSERTION_ORDER;
2198 if (perturbSlot && !tlsPendingSafeInserts()) {
2199 std::size_t e = chunkMask_ == 0 ? bucket_count() : Chunk::kCapacity;
2200 std::size_t i = itemIndex + tlsMinstdRand(e - itemIndex);
2201 if (!chunk->occupied(i)) {
2202 itemIndex = i;
2203 }
2204 }
2205 }
2206
2207 public:
2208 // user has no control over max_load_factor
2209
2210 void rehash(std::size_t capacity) {
2211 reserve(capacity);
2212 }
2213
2214 void reserve(std::size_t capacity) {
2215 // We want to support the pattern
2216 // map.reserve(map.size() + 2); auto& r1 = map[k1]; auto& r2 = map[k2];
2217 debugModeOnReserve(capacity);
2218 reserveImpl(capacity);
2219 }
2220
2221 // Returns true iff a rehash was performed
2222 void reserveForInsert(size_t incoming = 1) {
2223 FOLLY_SAFE_DCHECK(incoming > 0, "");
2224
2225 auto needed = size() + incoming;
2226 auto chunkCount = chunkMask_ + 1;
2227 auto scale = chunks_->capacityScale();
2228 auto existing = computeCapacity(chunkCount, scale);
2229 if (needed - 1 >= existing) {
2230 reserveForInsertImpl(needed - 1, chunkCount, scale, existing);
2231 }
2232 }
2233
2234 // Returns pos,true if construct, pos,false if found. key is only used
2235 // during the search; all constructor args for an inserted value come
2236 // from args... key won't be accessed after args are touched.
2237 template <typename K, typename... Args>
2238 std::pair<ItemIter, bool> tryEmplaceValue(K const& key, Args&&... args) {
2239 const auto hp = splitHash(this->computeKeyHash(key));
2240
2241 if (size() > 0) {
2242 auto existing = findImpl(hp, key);
2243 if (!existing.atEnd()) {
2244 return std::make_pair(existing, false);
2245 }
2246 }
2247
2248 debugModeBeforeInsert();
2249
2250 reserveForInsert();
2251
2252 std::size_t index = hp.first;
2253 ChunkPtr chunk = chunks_ + (index & chunkMask_);
2254 auto firstEmpty = chunk->firstEmpty();
2255
2256 if (!firstEmpty.hasIndex()) {
2257 std::size_t delta = probeDelta(hp);
2258 do {
2259 chunk->incrOutboundOverflowCount();
2260 index += delta;
2261 chunk = chunks_ + (index & chunkMask_);
2262 firstEmpty = chunk->firstEmpty();
2263 } while (!firstEmpty.hasIndex());
2264 chunk->adjustHostedOverflowCount(Chunk::kIncrHostedOverflowCount);
2265 }
2266 std::size_t itemIndex = firstEmpty.index();
2267
2268 debugModePerturbSlotInsertOrder(chunk, itemIndex);
2269
2270 chunk->setTag(itemIndex, hp.second);
2271 ItemIter iter{chunk, itemIndex};
2272
2273 // insertAtBlank will clear the tag if the constructor throws
2274 insertAtBlank(iter, hp, std::forward<Args>(args)...);
2275
2276 debugModeAfterInsert();
2277
2278 return std::make_pair(iter, true);
2279 }
2280
2281 private:
2282 template <bool Reset>
2283 void clearImpl() noexcept {
2284 if (chunks_ == Chunk::emptyInstance()) {
2285 FOLLY_SAFE_DCHECK(empty() && bucket_count() == 0, "");
2286 return;
2287 }
2288
2289 // turn clear into reset if the table is >= 16 chunks so that
2290 // we don't get too low a load factor
2291 bool willReset = Reset || chunkMask_ + 1 >= 16;
2292
2293 auto origSize = size();
2294 auto origCapacity = bucket_count();
2295 if (willReset) {
2296 this->beforeReset(origSize, origCapacity);
2297 } else {
2298 this->beforeClear(origSize, origCapacity);
2299 }
2300
2301 if (!empty()) {
2302 if (destroyItemOnClear()) {
2303 for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
2304 ChunkPtr chunk = chunks_ + ci;
2305 auto iter = chunk->occupiedIter();
2306 if (prefetchBeforeDestroy()) {
2307 for (auto piter = iter; piter.hasNext();) {
2308 this->prefetchValue(chunk->item(piter.next()));
2309 }
2310 }
2311 while (iter.hasNext()) {
2312 this->destroyItem(chunk->item(iter.next()));
2313 }
2314 }
2315 }
2316 if (!willReset) {
2317 // It's okay to do this in a separate loop because we only do it
2318 // when the chunk count is small. That avoids a branch when we
2319 // are promoting a clear to a reset for a large table.
2320 auto scale = chunks_[0].capacityScale();
2321 for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
2322 chunks_[ci].clear();
2323 }
2324 chunks_[0].markEof(scale);
2325 }
2326 if (kEnableItemIteration) {
2327 sizeAndPackedBegin_.packedBegin() = ItemIter{}.pack();
2328 }
2329 sizeAndPackedBegin_.size_ = 0;
2330 }
2331
2332 if (willReset) {
2333 BytePtr rawAllocation = std::pointer_traits<BytePtr>::pointer_to(
2334 *static_cast<uint8_t*>(static_cast<void*>(&*chunks_)));
2335 std::size_t rawSize =
2336 chunkAllocSize(chunkMask_ + 1, chunks_->capacityScale());
2337
2338 chunks_ = Chunk::emptyInstance();
2339 chunkMask_ = 0;
2340
2341 this->afterReset(origSize, origCapacity, rawAllocation, rawSize);
2342 } else {
2343 this->afterClear(origSize, origCapacity);
2344 }
2345 }
2346
2347 void eraseImpl(ItemIter pos, HashPair hp) {
2348 this->destroyItem(pos.item());
2349 adjustSizeAndBeginBeforeErase(pos);
2350 eraseBlank(pos, hp);
2351 }
2352
2353 public:
2354 // The item needs to still be hashable during this call. If you want
2355 // to intercept the value before it is destroyed (to extract it, for
2356 // example), use eraseIterInto(pos, beforeDestroy).
2357 void eraseIter(ItemIter pos) {
2358 eraseIterInto(pos, [](value_type&&) {});
2359 }
2360
2361 // The item needs to still be hashable during this call. If you want
2362 // to intercept the value before it is destroyed (to extract it, for
2363 // example), do so in the beforeDestroy callback.
2364 template <typename BeforeDestroy>
2365 void eraseIterInto(ItemIter pos, BeforeDestroy&& beforeDestroy) {
2366 HashPair hp{};
2367 if (pos.chunk()->hostedOverflowCount() != 0) {
2368 hp = splitHash(this->computeItemHash(pos.citem()));
2369 }
2370 beforeDestroy(this->valueAtItemForExtract(pos.item()));
2371 eraseImpl(pos, hp);
2372 }
2373
2374 template <typename K>
2375 std::size_t eraseKey(K const& key) {
2376 return eraseKeyInto(key, [](value_type&&) {});
2377 }
2378
2379 template <typename K, typename BeforeDestroy>
2380 std::size_t eraseKeyInto(K const& key, BeforeDestroy&& beforeDestroy) {
2381 if (UNLIKELY(size() == 0)) {
2382 return 0;
2383 }
2384 auto hp = splitHash(this->computeKeyHash(key));
2385 auto iter = findImpl(hp, key);
2386 if (!iter.atEnd()) {
2387 beforeDestroy(this->valueAtItemForExtract(iter.item()));
2388 eraseImpl(iter, hp);
2389 return 1;
2390 } else {
2391 return 0;
2392 }
2393 }
2394
2395 void clear() noexcept {
2396 if (kIsSanitizeAddress) {
2397 // force recycling of heap memory
2398 auto bc = bucket_count();
2399 reset();
2400 try {
2401 reserveImpl(bc);
2402 } catch (std::bad_alloc const&) {
2403 // ASAN mode only, keep going
2404 }
2405 } else {
2406 clearImpl<false>();
2407 }
2408 }
2409
2410 // Like clear(), but always frees all dynamic storage allocated
2411 // by the table.
2412 void reset() noexcept {
2413 clearImpl<true>();
2414 }
2415
2416 // Get memory footprint, not including sizeof(*this).
2417 std::size_t getAllocatedMemorySize() const {
2418 std::size_t sum = 0;
2419 visitAllocationClasses(
2420 [&sum](std::size_t bytes, std::size_t n) { sum += bytes * n; });
2421 return sum;
2422 }
2423
2424 // Enumerates classes of allocated memory blocks currently owned
2425 // by this table, calling visitor(allocationSize, allocationCount).
2426 // This can be used to get a more accurate indication of memory footprint
2427 // than getAllocatedMemorySize() if you have some way of computing the
2428 // internal fragmentation of the allocator, such as JEMalloc's nallocx.
2429 // The visitor might be called twice with the same allocationSize. The
2430 // visitor's computation should produce the same result for visitor(8,
2431 // 2) as for two calls to visitor(8, 1), for example. The visitor may
2432 // be called with a zero allocationCount.
2433 template <typename V>
2434 void visitAllocationClasses(V&& visitor) const {
2435 auto scale = chunks_->capacityScale();
2436 this->visitPolicyAllocationClasses(
2437 scale == 0 ? 0 : chunkAllocSize(chunkMask_ + 1, scale),
2438 size(),
2439 bucket_count(),
2440 visitor);
2441 }
2442
2443 // visitor should take an Item const&
2444 template <typename V>
2445 void visitItems(V&& visitor) const {
2446 if (empty()) {
2447 return;
2448 }
2449 std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
2450 auto chunk = &chunks_[0];
2451 for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
2452 auto iter = chunk->occupiedIter();
2453 if (prefetchBeforeCopy()) {
2454 for (auto piter = iter; piter.hasNext();) {
2455 this->prefetchValue(chunk->citem(piter.next()));
2456 }
2457 }
2458 while (iter.hasNext()) {
2459 visitor(chunk->citem(iter.next()));
2460 }
2461 }
2462 }
2463
2464 // visitor should take two Item const*
2465 template <typename V>
2466 void visitContiguousItemRanges(V&& visitor) const {
2467 if (empty()) {
2468 return;
2469 }
2470 std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
2471 auto chunk = &chunks_[0];
2472 for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
2473 for (auto iter = chunk->occupiedRangeIter(); iter.hasNext();) {
2474 auto be = iter.next();
2475 FOLLY_SAFE_DCHECK(
2476 chunk->occupied(be.first) && chunk->occupied(be.second - 1), "");
2477 Item const* b = chunk->itemAddr(be.first);
2478 visitor(b, b + (be.second - be.first));
2479 }
2480 }
2481 }
2482
2483 private:
2484 static std::size_t& histoAt(
2485 std::vector<std::size_t>& histo,
2486 std::size_t index) {
2487 if (histo.size() <= index) {
2488 histo.resize(index + 1);
2489 }
2490 return histo.at(index);
2491 }
2492
2493 public:
2494 // Expensive
2495 F14TableStats computeStats() const {
2496 F14TableStats stats;
2497
2498 if (kIsDebug && kEnableItemIteration) {
2499 // validate iteration
2500 std::size_t n = 0;
2501 ItemIter prev;
2502 for (auto iter = begin(); iter != end(); iter.advance()) {
2503 FOLLY_SAFE_DCHECK(n == 0 || iter.pack() < prev.pack(), "");
2504 ++n;
2505 prev = iter;
2506 }
2507 FOLLY_SAFE_DCHECK(n == size(), "");
2508 }
2509
2510 FOLLY_SAFE_DCHECK(
2511 (chunks_ == Chunk::emptyInstance()) == (bucket_count() == 0), "");
2512
2513 std::size_t n1 = 0;
2514 std::size_t n2 = 0;
2515 auto cc = bucket_count() == 0 ? 0 : chunkMask_ + 1;
2516 for (std::size_t ci = 0; ci < cc; ++ci) {
2517 ChunkPtr chunk = chunks_ + ci;
2518 FOLLY_SAFE_DCHECK(chunk->eof() == (ci == 0), "");
2519
2520 auto iter = chunk->occupiedIter();
2521
2522 std::size_t chunkOccupied = 0;
2523 for (auto piter = iter; piter.hasNext(); piter.next()) {
2524 ++chunkOccupied;
2525 }
2526 n1 += chunkOccupied;
2527
2528 histoAt(stats.chunkOccupancyHisto, chunkOccupied)++;
2529 histoAt(
2530 stats.chunkOutboundOverflowHisto, chunk->outboundOverflowCount())++;
2531 histoAt(stats.chunkHostedOverflowHisto, chunk->hostedOverflowCount())++;
2532
2533 while (iter.hasNext()) {
2534 auto ii = iter.next();
2535 ++n2;
2536
2537 {
2538 auto& item = chunk->citem(ii);
2539 auto hp = splitHash(this->computeItemHash(item));
2540 FOLLY_SAFE_DCHECK(chunk->tag(ii) == hp.second, "");
2541
2542 std::size_t dist = 1;
2543 std::size_t index = hp.first;
2544 std::size_t delta = probeDelta(hp);
2545 while ((index & chunkMask_) != ci) {
2546 index += delta;
2547 ++dist;
2548 }
2549
2550 histoAt(stats.keyProbeLengthHisto, dist)++;
2551 }
2552
2553 // misses could have any tag, so we do the dumb but accurate
2554 // thing and just try them all
2555 for (std::size_t ti = 0; ti < 256; ++ti) {
2556 uint8_t tag = static_cast<uint8_t>(ti == 0 ? 1 : 0);
2557 HashPair hp{ci, tag};
2558
2559 std::size_t dist = 1;
2560 std::size_t index = hp.first;
2561 std::size_t delta = probeDelta(hp);
2562 for (std::size_t tries = 0; tries <= chunkMask_ &&
2563 chunks_[index & chunkMask_].outboundOverflowCount() != 0;
2564 ++tries) {
2565 index += delta;
2566 ++dist;
2567 }
2568
2569 histoAt(stats.missProbeLengthHisto, dist)++;
2570 }
2571 }
2572 }
2573
2574 FOLLY_SAFE_DCHECK(n1 == size(), "");
2575 FOLLY_SAFE_DCHECK(n2 == size(), "");
2576
2577#if FOLLY_HAS_RTTI
2578 stats.policy = typeid(Policy).name();
2579#endif
2580 stats.size = size();
2581 stats.valueSize = sizeof(value_type);
2582 stats.bucketCount = bucket_count();
2583 stats.chunkCount = cc;
2584
2585 stats.totalBytes = sizeof(*this) + getAllocatedMemorySize();
2586 stats.overheadBytes = stats.totalBytes - size() * sizeof(value_type);
2587
2588 return stats;
2589 }
2590};
2591} // namespace detail
2592} // namespace f14
2593
2594#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
2595
2596namespace f14 {
2597namespace test {
2598inline void disableInsertOrderRandomization() {
2599 if (kIsSanitizeAddress || kIsDebug) {
2600 detail::tlsPendingSafeInserts(static_cast<std::ptrdiff_t>(
2601 (std::numeric_limits<std::size_t>::max)() / 2));
2602 }
2603}
2604} // namespace test
2605} // namespace f14
2606} // namespace folly
2607