F14Table.h source code [folly/container/detail/F14Table.h]

1	/*
2	* Copyright 2017-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#pragma once
18
19	#include <cstddef>
20	#include <cstdint>
21	#include <cstring>
22
23	#include <array>
24	#include <iterator>
25	#include <limits>
26	#include <memory>
27	#include <new>
28	#include <type_traits>
29	#include <utility>
30	#include <vector>
31
32	#include <folly/Bits.h>
33	#include <folly/ConstexprMath.h>
34	#include <folly/Likely.h>
35	#include <folly/Portability.h>
36	#include <folly/ScopeGuard.h>
37	#include <folly/Traits.h>
38	#include <folly/functional/ApplyTuple.h>
39	#include <folly/functional/Invoke.h>
40	#include <folly/lang/Align.h>
41	#include <folly/lang/Assume.h>
42	#include <folly/lang/Exception.h>
43	#include <folly/lang/Launder.h>
44	#include <folly/lang/SafeAssert.h>
45	#include <folly/portability/Builtins.h>
46
47	#include <folly/container/HeterogeneousAccess.h>
48	#include <folly/container/detail/F14Defaults.h>
49	#include <folly/container/detail/F14IntrinsicsAvailability.h>
50
51	#if FOLLY_ASAN_ENABLED && defined(FOLLY_TLS)
52	#define FOLLY_F14_TLS_IF_ASAN FOLLY_TLS
53	#else
54	#define FOLLY_F14_TLS_IF_ASAN
55	#endif
56
57	#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
58
59	#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
60	#if FOLLY_NEON
61	#include <arm_acle.h> // __crc32cd
62	#else
63	#include <nmmintrin.h> // _mm_crc32_u64
64	#endif
65	#else
66	#ifdef _WIN32
67	#include <intrin.h> // _mul128 in fallback bit mixer
68	#endif
69	#endif
70
71	#if FOLLY_NEON
72	#include <arm_neon.h> // uint8x16t intrinsics
73	#else // SSE2
74	#include <immintrin.h> // __m128i intrinsics
75	#include <xmmintrin.h> // _mm_prefetch
76	#endif
77
78	#endif
79
80	#ifndef FOLLY_F14_PERTURB_INSERTION_ORDER
81	#define FOLLY_F14_PERTURB_INSERTION_ORDER folly::kIsDebug
82	#endif
83
84	namespace folly {
85
86	struct F14TableStats {
87	char const* policy;
88	std::size_t size{`0`};
89	std::size_t valueSize{`0`};
90	std::size_t bucketCount{`0`};
91	std::size_t chunkCount{`0`};
92	std::vector<std::size_t> chunkOccupancyHisto;
93	std::vector<std::size_t> chunkOutboundOverflowHisto;
94	std::vector<std::size_t> chunkHostedOverflowHisto;
95	std::vector<std::size_t> keyProbeLengthHisto;
96	std::vector<std::size_t> missProbeLengthHisto;
97	std::size_t totalBytes{`0`};
98	std::size_t overheadBytes{`0`};
99
100	private:
101	template <typename T>
102	static auto computeHelper(T const* m) -> decltype(m->computeStats()) {
103	return m->computeStats();
104	}
105
106	static F14TableStats computeHelper(...) {
107	return {};
108	}
109
110	public:
111	template <typename T>
112	static F14TableStats compute(T const& m) {
113	return computeHelper(&m);
114	}
115	};
116
117	namespace f14 {
118	namespace detail {
119
120	template <F14IntrinsicsMode>
121	struct F14LinkCheck {};
122
123	template <>
124	struct F14LinkCheck<getF14IntrinsicsMode()> {
125	// The purpose of this method is to trigger a link failure if
126	// compilation flags vary across compilation units. The definition
127	// is in F14Table.cpp, so only one of F14LinkCheck<None>::check,
128	// F14LinkCheck<Simd>::check, or F14LinkCheck<SimdAndCrc>::check will
129	// be available at link time.
130	//
131	// To cause a link failure the function must be invoked in code that
132	// is not optimized away, so we call it on a couple of cold paths
133	// (exception handling paths in copy construction and rehash). LTO may
134	// remove it entirely, but that's fine.
135	static void check() noexcept;
136	};
137
138	bool tlsPendingSafeInserts(std::ptrdiff_t delta = `0`);
139	std::size_t tlsMinstdRand(std::size_t n);
140
141	#if defined(_LIBCPP_VERSION)
142
143	template <typename K, typename V, typename H>
144	struct StdNodeReplica {
145	void* next;
146	std::size_t hash;
147	V value;
148	};
149
150	#else
151
152	template <typename H>
153	struct StdIsFastHash : std::true_type {};
154	template <>
155	struct StdIsFastHash<std::hash<long double>> : std::false_type {};
156	template <typename... Args>
157	struct StdIsFastHash<std::hash<std::basic_string<Args...>>> : std::false_type {
158	};
159
160	// TODO: add specialization for std::basic_string_view
161
162	// mimic internal node of unordered containers in STL to estimate the size
163	template <typename K, typename V, typename H, typename Enable = void>
164	struct StdNodeReplica {
165	void* next;
166	V value;
167	};
168	template <typename K, typename V, typename H>
169	struct StdNodeReplica<
170	K,
171	V,
172	H,
173	std::enable_if_t<
174	!StdIsFastHash<H>::value \|\| !is_nothrow_invocable<H, K>::value>> {
175	void* next;
176	V value;
177	std::size_t hash;
178	};
179
180	#endif
181
182	} // namespace detail
183	} // namespace f14
184
185	#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
186	namespace f14 {
187	namespace detail {
188	template <typename Policy>
189	class F14Table;
190	} // namespace detail
191	} // namespace f14
192
193	class F14HashToken final {
194	public:
195	F14HashToken() = default;
196
197	private:
198	using HashPair = std::pair<std::size_t, std::size_t>;
199
200	explicit F14HashToken(HashPair hp) : hp_(hp) {}
201	explicit operator HashPair() const {
202	return hp_;
203	}
204
205	HashPair hp_;
206
207	template <typename Policy>
208	friend class f14::detail::F14Table;
209	};
210
211	namespace f14 {
212	namespace detail {
213	//// Defaults should be selected using void
214
215	template <typename Arg, typename Default>
216	using VoidDefault =
217	std::conditional_t<std::is_same<Arg, Default>::value, void, Arg>;
218
219	template <typename Arg, typename Default>
220	using Defaulted =
221	typename std::conditional_t<std::is_same<Arg, void>::value, Default, Arg>;
222
223	template <
224	typename TableKey,
225	typename Hasher,
226	typename KeyEqual,
227	typename ArgKey,
228	typename Void = void>
229	struct EligibleForHeterogeneousFind : std::false_type {};
230
231	template <
232	typename TableKey,
233	typename Hasher,
234	typename KeyEqual,
235	typename ArgKey>
236	struct EligibleForHeterogeneousFind<
237	TableKey,
238	Hasher,
239	KeyEqual,
240	ArgKey,
241	void_t<
242	typename Hasher::is_transparent,
243	typename KeyEqual::is_transparent,
244	invoke_result_t<Hasher, ArgKey const&>,
245	invoke_result_t<KeyEqual, ArgKey const&, TableKey const&>>>
246	: std::true_type {};
247
248	template <
249	typename TableKey,
250	typename Hasher,
251	typename KeyEqual,
252	typename ArgKey>
253	using EligibleForHeterogeneousInsert = Conjunction<
254	EligibleForHeterogeneousFind<TableKey, Hasher, KeyEqual, ArgKey>,
255	std::is_constructible<TableKey, ArgKey>>;
256
257	template <
258	typename TableKey,
259	typename Hasher,
260	typename KeyEqual,
261	typename KeyArg0OrBool,
262	typename... KeyArgs>
263	using KeyTypeForEmplaceHelper = std::conditional_t<
264	sizeof...(KeyArgs) == `1` &&
265	(std::is_same<remove_cvref_t<KeyArg0OrBool>, TableKey>::value \|\|
266	EligibleForHeterogeneousFind<
267	TableKey,
268	Hasher,
269	KeyEqual,
270	KeyArg0OrBool>::value),
271	KeyArg0OrBool&&,
272	TableKey>;
273
274	template <
275	typename TableKey,
276	typename Hasher,
277	typename KeyEqual,
278	typename... KeyArgs>
279	using KeyTypeForEmplace = KeyTypeForEmplaceHelper<
280	TableKey,
281	Hasher,
282	KeyEqual,
283	std::tuple_element_t<`0`, std::tuple<KeyArgs..., bool>>,
284	KeyArgs...>;
285
286	////////////////
287
288	template <typename T>
289	FOLLY_ALWAYS_INLINE static void prefetchAddr(T const* ptr) {
290	#ifndef _WIN32
291	__builtin_prefetch(static_cast<void const*>(ptr));
292	#elif FOLLY_NEON
293	__prefetch(static_cast<void const*>(ptr));
294	#else
295	_mm_prefetch(
296	static_cast<char const>(static_cast<void* const*>(ptr)), _MM_HINT_T0);
297	#endif
298	}
299
300	template <typename T>
301	FOLLY_ALWAYS_INLINE static unsigned findFirstSetNonZero(T mask) {
302	assume(mask != `0`);
303	if (sizeof(mask) == sizeof(unsigned)) {
304	return __builtin_ctz(static_cast<unsigned>(mask));
305	} else {
306	return __builtin_ctzll(mask);
307	}
308	}
309
310	#if FOLLY_NEON
311	using TagVector = uint8x16_t;
312
313	using MaskType = uint64_t;
314
315	constexpr unsigned kMaskSpacing = `4`;
316	#else // SSE2
317	using TagVector = __m128i;
318
319	using MaskType = uint32_t;
320
321	constexpr unsigned kMaskSpacing = `1`;
322	#endif
323
324	// We could use unaligned loads to relax this requirement, but that
325	// would be both a performance penalty and require a bulkier packed
326	// ItemIter format
327	constexpr std::size_t kRequiredVectorAlignment =
328	constexpr_max(std::size_t{`16`}, alignof(max_align_t));
329
330	using EmptyTagVectorType = std::aligned_storage_t<
331	sizeof(TagVector) + kRequiredVectorAlignment,
332	alignof(max_align_t)>;
333
334	extern EmptyTagVectorType kEmptyTagVector;
335
336	template <unsigned BitCount>
337	struct FullMask {
338	static constexpr MaskType value =
339	(FullMask<BitCount - `1`>::value << kMaskSpacing) + `1`;
340	};
341
342	template <>
343	struct FullMask<`1`> : std::integral_constant<MaskType, `1`> {};
344
345	#if FOLLY_ARM
346	// Mask iteration is different for ARM because that is the only platform
347	// for which the mask is bigger than a register.
348
349	// Iterates a mask, optimized for the case that only a few bits are set
350	class SparseMaskIter {
351	static_assert(kMaskSpacing == `4`, "");
352
353	uint32_t interleavedMask_;
354
355	public:
356	explicit SparseMaskIter(MaskType mask)
357	: interleavedMask_{static_cast<uint32_t>(((mask >> `32`) << `2`) \| mask)} {}
358
359	bool hasNext() {
360	return interleavedMask_ != `0`;
361	}
362
363	unsigned next() {
364	FOLLY_SAFE_DCHECK(hasNext(), "");
365	unsigned i = findFirstSetNonZero(interleavedMask_);
366	interleavedMask_ &= (interleavedMask_ - `1`);
367	return ((i >> `2`) \| (i << `2`)) & `0xf`;
368	}
369	};
370
371	// Iterates a mask, optimized for the case that most bits are set
372	class DenseMaskIter {
373	static_assert(kMaskSpacing == `4`, "");
374
375	std::size_t count_;
376	unsigned index_;
377	uint8_t const* tags_;
378
379	public:
380	explicit DenseMaskIter(uint8_t const* tags, MaskType mask) {
381	if (mask == `0`) {
382	count_ = `0`;
383	} else {
384	count_ = popcount(static_cast<uint32_t>(((mask >> `32`) << `2`) \| mask));
385	if (LIKELY((mask & `1`) != `0`)) {
386	index_ = `0`;
387	} else {
388	index_ = findFirstSetNonZero(mask) / kMaskSpacing;
389	}
390	tags_ = tags;
391	}
392	}
393
394	bool hasNext() {
395	return count_ > `0`;
396	}
397
398	unsigned next() {
399	auto rv = index_;
400	--count_;
401	if (count_ > `0`) {
402	do {
403	++index_;
404	} while ((tags_[index_] & `0x80`) == `0`);
405	}
406	FOLLY_SAFE_DCHECK(index_ < `16`, "");
407	return rv;
408	}
409	};
410
411	#else
412	// Iterates a mask, optimized for the case that only a few bits are set
413	class SparseMaskIter {
414	MaskType mask_;
415
416	public:
417	explicit SparseMaskIter(MaskType mask) : mask_{mask} {}
418
419	bool hasNext() {
420	return mask_ != `0`;
421	}
422
423	unsigned next() {
424	FOLLY_SAFE_DCHECK(hasNext(), "");
425	unsigned i = findFirstSetNonZero(mask_);
426	mask_ &= (mask_ - `1`);
427	return i / kMaskSpacing;
428	}
429	};
430
431	// Iterates a mask, optimized for the case that most bits are set
432	class DenseMaskIter {
433	MaskType mask_;
434	unsigned index_{`0`};
435
436	public:
437	explicit DenseMaskIter(uint8_t const*, MaskType mask) : mask_{mask} {}
438
439	bool hasNext() {
440	return mask_ != `0`;
441	}
442
443	unsigned next() {
444	FOLLY_SAFE_DCHECK(hasNext(), "");
445	if (LIKELY((mask_ & `1`) != `0`)) {
446	mask_ >>= kMaskSpacing;
447	return index_++;
448	} else {
449	unsigned s = findFirstSetNonZero(mask_);
450	unsigned rv = index_ + (s / kMaskSpacing);
451	mask_ >>= (s + kMaskSpacing);
452	index_ = rv + `1`;
453	return rv;
454	}
455	}
456	};
457	#endif
458
459	// Iterates a mask, returning pairs of [begin,end) index covering blocks
460	// of set bits
461	class MaskRangeIter {
462	MaskType mask_;
463	unsigned shift_{`0`};
464
465	public:
466	explicit MaskRangeIter(MaskType mask) {
467	// If kMaskSpacing is > 1 then there will be empty bits even for
468	// contiguous ranges. Fill them in.
469	mask_ = mask * ((`1` << kMaskSpacing) - `1`);
470	}
471
472	bool hasNext() {
473	return mask_ != `0`;
474	}
475
476	std::pair<unsigned, unsigned> next() {
477	FOLLY_SAFE_DCHECK(hasNext(), "");
478	auto s = shift_;
479	unsigned b = findFirstSetNonZero(mask_);
480	unsigned e = findFirstSetNonZero(~(mask_ \| (mask_ - `1`)));
481	mask_ >>= e;
482	shift_ = s + e;
483	return std::make_pair((s + b) / kMaskSpacing, (s + e) / kMaskSpacing);
484	}
485	};
486
487	// Holds the result of an index query that has an optional result,
488	// interpreting a mask of 0 to be the empty answer and the index of the
489	// last set bit to be the non-empty answer
490	class LastOccupiedInMask {
491	MaskType mask_;
492
493	public:
494	explicit LastOccupiedInMask(MaskType mask) : mask_{mask} {}
495
496	bool hasIndex() const {
497	return mask_ != `0`;
498	}
499
500	unsigned index() const {
501	assume(mask_ != `0`);
502	return (findLastSet(mask_) - `1`) / kMaskSpacing;
503	}
504	};
505
506	// Holds the result of an index query that has an optional result,
507	// interpreting a mask of 0 to be the empty answer and the index of the
508	// first set bit to be the non-empty answer
509	class FirstEmptyInMask {
510	MaskType mask_;
511
512	public:
513	explicit FirstEmptyInMask(MaskType mask) : mask_{mask} {}
514
515	bool hasIndex() const {
516	return mask_ != `0`;
517	}
518
519	unsigned index() const {
520	FOLLY_SAFE_DCHECK(mask_ != `0`, "");
521	return findFirstSetNonZero(mask_) / kMaskSpacing;
522	}
523	};
524
525	template <typename ItemType>
526	struct alignas(kRequiredVectorAlignment) F14Chunk {
527	using Item = ItemType;
528
529	// For our 16 byte vector alignment (and assuming alignof(Item) >=
530	// 4) kCapacity of 14 is the most space efficient. Slightly smaller
531	// or larger capacities can help with cache alignment in a couple of
532	// cases without wasting too much space, but once the items are larger
533	// then we're unlikely to get much benefit anyway. The only case we
534	// optimize is using kCapacity of 12 for 4 byte items, which makes the
535	// chunk take exactly 1 cache line, and adding 16 bytes of padding for
536	// 16 byte items so that a chunk takes exactly 4 cache lines.
537	static constexpr unsigned kCapacity = sizeof(Item) == `4` ? `12` : `14`;
538
539	static constexpr unsigned kDesiredCapacity = kCapacity - `2`;
540
541	static constexpr unsigned kAllocatedCapacity =
542	kCapacity + (sizeof(Item) == `16` ? `1` : `0`);
543
544	// If kCapacity == 12 then we get 16 bits of capacityScale by using
545	// tag 12 and 13, otherwise we only get 4 bits of control_
546	static constexpr std::size_t kCapacityScaleBits = kCapacity == `12` ? `16` : `4`;
547	static constexpr std::size_t kCapacityScaleShift = kCapacityScaleBits - `4`;
548
549	static constexpr MaskType kFullMask = FullMask<kCapacity>::value;
550
551	// Non-empty tags have their top bit set. tags_ array might be bigger
552	// than kCapacity to keep alignment of first item.
553	std::array<uint8_t, `14`> tags_;
554
555	// Bits 0..3 of chunk 0 record the scaling factor between the number of
556	// chunks and the max size without rehash. Bits 4-7 in any chunk are a
557	// 4-bit counter of the number of values in this chunk that were placed
558	// because they overflowed their desired chunk (hostedOverflowCount).
559	uint8_t control_;
560
561	// The number of values that would have been placed into this chunk if
562	// there had been space, including values that also overflowed previous
563	// full chunks. This value saturates; once it becomes 255 it no longer
564	// increases nor decreases.
565	uint8_t outboundOverflowCount_;
566
567	std::array<aligned_storage_for_t<Item>, kAllocatedCapacity> rawItems_;
568
569	static F14Chunk* emptyInstance() {
570	auto raw = reinterpret_cast<char*>(&kEmptyTagVector);
571	if (kRequiredVectorAlignment > alignof(max_align_t)) {
572	auto delta = kRequiredVectorAlignment -
573	(reinterpret_cast<uintptr_t>(raw) % kRequiredVectorAlignment);
574	raw += delta;
575	}
576	auto rv = reinterpret_cast<F14Chunk*>(raw);
577	FOLLY_SAFE_DCHECK(
578	(reinterpret_cast<uintptr_t>(rv) % kRequiredVectorAlignment) == `0`, "");
579	return rv;
580	}
581
582	void clear() {
583	// tags_ = {}; control_ = 0; outboundOverflowCount_ = 0;
584
585	// gcc < 6 doesn't exploit chunk alignment to generate the optimal
586	// SSE clear from memset. This is very hot code, so it is worth
587	// handling that case specially.
588	#if FOLLY_SSE >= 2 && __GNUC__ <= 5 && !__clang__
589	// this doesn't violate strict aliasing rules because __m128i is
590	// tagged as __may_alias__
591	auto* v = static_cast<__m128i>(static_cast<void**>(&tags_[`0`]));
592	_mm_store_si128(v, _mm_setzero_si128());
593	#else
594	std::memset(&tags_[`0`], `'\0'`, `16`);
595	#endif
596	}
597
598	void copyOverflowInfoFrom(F14Chunk const& rhs) {
599	FOLLY_SAFE_DCHECK(hostedOverflowCount() == `0`, "");
600	control_ += static_cast<uint8_t>(rhs.control_ & `0xf0`);
601	outboundOverflowCount_ = rhs.outboundOverflowCount_;
602	}
603
604	unsigned hostedOverflowCount() const {
605	return control_ >> `4`;
606	}
607
608	static constexpr uint8_t kIncrHostedOverflowCount = `0x10`;
609	static constexpr uint8_t kDecrHostedOverflowCount =
610	static_cast<uint8_t>(-`0x10`);
611
612	void adjustHostedOverflowCount(uint8_t op) {
613	control_ += op;
614	}
615
616	bool eof() const {
617	return capacityScale() != `0`;
618	}
619
620	std::size_t capacityScale() const {
621	if (kCapacityScaleBits == `4`) {
622	return control_ & `0xf`;
623	} else {
624	uint16_t v;
625	std::memcpy(&v, &tags_[`12`], `2`);
626	return v;
627	}
628	}
629
630	void setCapacityScale(std::size_t scale) {
631	FOLLY_SAFE_DCHECK(
632	this != emptyInstance() && scale > `0` &&
633	scale < (std::size_t{`1`} << kCapacityScaleBits),
634	"");
635	if (kCapacityScaleBits == `4`) {
636	control_ = (control_ & ~`0xf`) \| static_cast<uint8_t>(scale);
637	} else {
638	uint16_t v = static_cast<uint16_t>(scale);
639	std::memcpy(&tags_[`12`], &v, `2`);
640	}
641	}
642
643	void markEof(std::size_t scale) {
644	folly::assume(control_ == `0`);
645	setCapacityScale(scale);
646	}
647
648	unsigned outboundOverflowCount() const {
649	return outboundOverflowCount_;
650	}
651
652	void incrOutboundOverflowCount() {
653	if (outboundOverflowCount_ != `255`) {
654	++outboundOverflowCount_;
655	}
656	}
657
658	void decrOutboundOverflowCount() {
659	if (outboundOverflowCount_ != `255`) {
660	--outboundOverflowCount_;
661	}
662	}
663
664	std::size_t tag(std::size_t index) const {
665	return tags_[index];
666	}
667
668	void setTag(std::size_t index, std::size_t tag) {
669	FOLLY_SAFE_DCHECK(
670	this != emptyInstance() && tag >= `0x80` && tag <= `0xff`, "");
671	tags_[index] = static_cast<uint8_t>(tag);
672	}
673
674	void clearTag(std::size_t index) {
675	tags_[index] = `0`;
676	}
677
678	#if FOLLY_NEON
679	////////
680	// Tag filtering using NEON intrinsics
681
682	SparseMaskIter tagMatchIter(std::size_t needle) const {
683	FOLLY_SAFE_DCHECK(needle >= `0x80` && needle < `0x100`, "");
684	uint8x16_t tagV = vld1q_u8(&tags_[`0`]);
685	auto needleV = vdupq_n_u8(static_cast<uint8_t>(needle));
686	auto eqV = vceqq_u8(tagV, needleV);
687	// get info from every byte into the bottom half of every uint16_t
688	// by shifting right 4, then round to get it into a 64-bit vector
689	uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(eqV), `4`);
690	uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(maskV), `0`) & kFullMask;
691	return SparseMaskIter(mask);
692	}
693
694	MaskType occupiedMask() const {
695	uint8x16_t tagV = vld1q_u8(&tags_[`0`]);
696	// signed shift extends top bit to all bits
697	auto occupiedV =
698	vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u8(tagV), `7`));
699	uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(occupiedV), `4`);
700	return vget_lane_u64(vreinterpret_u64_u8(maskV), `0`) & kFullMask;
701	}
702	#else
703	////////
704	// Tag filtering using SSE2 intrinsics
705
706	TagVector const* tagVector() const {
707	return static_cast<TagVector const>(static_cast<void* const*>(&tags_[`0`]));
708	}
709
710	SparseMaskIter tagMatchIter(std::size_t needle) const {
711	FOLLY_SAFE_DCHECK(needle >= `0x80` && needle < `0x100`, "");
712	auto tagV = _mm_load_si128(tagVector());
713
714	// TRICKY! It may seem strange to have a std::size_t needle and narrow
715	// it at the last moment, rather than making HashPair::second be a
716	// uint8_t, but the latter choice sometimes leads to a performance
717	// problem.
718	//
719	// On architectures with SSE2 but not AVX2, _mm_set1_epi8 expands
720	// to multiple instructions. One of those is a MOVD of either 4 or
721	// 8 byte width. Only the bottom byte of that move actually affects
722	// the result, but if a 1-byte needle has been spilled then this will
723	// be a 4 byte load. GCC 5.5 has been observed to reload needle
724	// (or perhaps fuse a reload and part of a previous static_cast)
725	// needle using a MOVZX with a 1 byte load in parallel with the MOVD.
726	// This combination causes a failure of store-to-load forwarding,
727	// which has a big performance penalty (60 nanoseconds per find on
728	// a microbenchmark). Keeping needle >= 4 bytes avoids the problem
729	// and also happens to result in slightly more compact assembly.
730	auto needleV = _mm_set1_epi8(static_cast<uint8_t>(needle));
731	auto eqV = _mm_cmpeq_epi8(tagV, needleV);
732	auto mask = _mm_movemask_epi8(eqV) & kFullMask;
733	return SparseMaskIter{mask};
734	}
735
736	MaskType occupiedMask() const {
737	auto tagV = _mm_load_si128(tagVector());
738	return _mm_movemask_epi8(tagV) & kFullMask;
739	}
740	#endif
741
742	DenseMaskIter occupiedIter() const {
743	return DenseMaskIter{&tags_[`0`], occupiedMask()};
744	}
745
746	MaskRangeIter occupiedRangeIter() const {
747	return MaskRangeIter{occupiedMask()};
748	}
749
750	LastOccupiedInMask lastOccupied() const {
751	return LastOccupiedInMask{occupiedMask()};
752	}
753
754	FirstEmptyInMask firstEmpty() const {
755	return FirstEmptyInMask{occupiedMask() ^ kFullMask};
756	}
757
758	bool occupied(std::size_t index) const {
759	FOLLY_SAFE_DCHECK(tags_[index] == `0` \|\| (tags_[index] & `0x80`) != `0`, "");
760	return tags_[index] != `0`;
761	}
762
763	Item* itemAddr(std::size_t i) const {
764	return static_cast<Item*>(
765	const_cast<void>(static_cast<void* const*>(&rawItems_[i])));
766	}
767
768	Item& item(std::size_t i) {
769	FOLLY_SAFE_DCHECK(this->occupied(i), "");
770	return *launder(itemAddr(i));
771	}
772
773	Item const& citem(std::size_t i) const {
774	FOLLY_SAFE_DCHECK(this->occupied(i), "");
775	return *launder(itemAddr(i));
776	}
777
778	static F14Chunk& owner(Item& item, std::size_t index) {
779	auto rawAddr =
780	static_cast<uint8_t>(static_cast<void**>(std::addressof(item))) -
781	offsetof(F14Chunk, rawItems_) - index * sizeof(Item);
782	auto chunkAddr = static_cast<F14Chunk>(static_cast<void**>(rawAddr));
783	FOLLY_SAFE_DCHECK(std::addressof(item) == chunkAddr->itemAddr(index), "");
784	return *chunkAddr;
785	}
786	};
787
788	////////////////
789
790	// PackedChunkItemPtr points to an Item in an F14Chunk, allowing both the
791	// Item& and its index to be recovered. It sorts by the address of the
792	// item, and it only works for items that are in a properly-aligned chunk.
793
794	// generic form, not actually packed
795	template <typename Ptr>
796	class PackedChunkItemPtr {
797	public:
798	PackedChunkItemPtr(Ptr p, std::size_t i) noexcept : ptr_{p}, index_{i} {
799	FOLLY_SAFE_DCHECK(ptr_ != nullptr \|\| index_ == `0`, "");
800	}
801
802	Ptr ptr() const {
803	return ptr_;
804	}
805
806	std::size_t index() const {
807	return index_;
808	}
809
810	bool operator<(PackedChunkItemPtr const& rhs) const {
811	FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ \|\| index_ == rhs.index_, "");
812	return ptr_ < rhs.ptr_;
813	}
814
815	bool operator==(PackedChunkItemPtr const& rhs) const {
816	FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ \|\| index_ == rhs.index_, "");
817	return ptr_ == rhs.ptr_;
818	}
819
820	bool operator!=(PackedChunkItemPtr const& rhs) const {
821	return !(*this == rhs);
822	}
823
824	private:
825	Ptr ptr_;
826	std::size_t index_;
827	};
828
829	// Bare pointer form, packed into a uintptr_t. Uses only bits wasted by
830	// alignment, so it works on 32-bit and 64-bit platforms
831	template <typename T>
832	class PackedChunkItemPtr<T*> {
833	static_assert((alignof(F14Chunk<T>) % `16`) == `0`, "");
834
835	// Chunks are 16-byte aligned, so we can maintain a packed pointer to a
836	// chunk item by packing the 4-bit item index into the least significant
837	// bits of a pointer to the chunk itself. This makes ItemIter::pack
838	// more expensive, however, since it has to compute the chunk address.
839	//
840	// Chunk items have varying alignment constraints, so it would seem
841	// to be that we can't do a similar trick while using only bit masking
842	// operations on the Item itself. It happens to be, however, that if*
843	// sizeof(Item) is not a multiple of 16 then we can recover a portion
844	// of the index bits from the knowledge that the Item-s are stored in
845	// an array that is itself 16-byte aligned.
846	//
847	// If kAlignBits is the number of trailing zero bits in sizeof(Item)
848	// (up to 4), then we can borrow those bits to store kAlignBits of the
849	// index directly. We can recover (4 - kAlignBits) bits of the index
850	// from the item pointer itself, by defining/observing that
851	//
852	// A = kAlignBits (A <= 4)
853	//
854	// S = (sizeof(Item) % 16) >> A (shifted-away bits are all zero)
855	//
856	// R = (itemPtr % 16) >> A (shifted-away bits are all zero)
857	//
858	// M = 16 >> A
859	//
860	// itemPtr % 16 = (index sizeof(Item)) % 16*
861	//
862	// (R 2^A) % 16 = (index * (sizeof(Item) % 16)) % 16*
863	//
864	// (R 2^A) % 16 = (index * 2^A * S) % 16*
865	//
866	// R % M = (index S) % M*
867	//
868	// S is relatively prime with M, so a multiplicative inverse is easy
869	// to compute
870	//
871	// Sinv = S^(M - 1) % M
872	//
873	// (R Sinv) % M = index % M*
874	//
875	// This lets us recover the bottom bits of the index. When sizeof(T)
876	// is 8-byte aligned kSizeInverse will always be 1. When sizeof(T)
877	// is 4-byte aligned kSizeInverse will be either 1 or 3.
878
879	// returns pow(x, y) % m
880	static constexpr uintptr_t powerMod(uintptr_t x, uintptr_t y, uintptr_t m) {
881	return y == `0` ? `1` : (x * powerMod(x, y - `1`, m)) % m;
882	}
883
884	static constexpr uintptr_t kIndexBits = `4`;
885	static constexpr uintptr_t kIndexMask = (uintptr_t{`1`} << kIndexBits) - `1`;
886
887	static constexpr uintptr_t kAlignBits = constexpr_min(
888	uintptr_t{`4`},
889	constexpr_find_first_set(uintptr_t{sizeof(T)}) - `1`);
890
891	static constexpr uintptr_t kAlignMask = (uintptr_t{`1`} << kAlignBits) - `1`;
892
893	static constexpr uintptr_t kModulus = uintptr_t{`1`}
894	<< (kIndexBits - kAlignBits);
895	static constexpr uintptr_t kSizeInverse =
896	powerMod(sizeof(T) >> kAlignBits, kModulus - `1`, kModulus);
897
898	public:
899	PackedChunkItemPtr(T* p, std::size_t i) noexcept {
900	uintptr_t encoded = i >> (kIndexBits - kAlignBits);
901	assume((encoded & ~kAlignMask) == `0`);
902	raw_ = reinterpret_cast<uintptr_t>(p) \| encoded;
903	FOLLY_SAFE_DCHECK(p == ptr(), "");
904	FOLLY_SAFE_DCHECK(i == index(), "");
905	}
906
907	T* ptr() const {
908	return reinterpret_cast<T*>(raw_ & ~kAlignMask);
909	}
910
911	std::size_t index() const {
912	auto encoded = (raw_ & kAlignMask) << (kIndexBits - kAlignBits);
913	auto deduced =
914	((raw_ >> kAlignBits) * kSizeInverse) & (kIndexMask >> kAlignBits);
915	return encoded \| deduced;
916	}
917
918	bool operator<(PackedChunkItemPtr const& rhs) const {
919	return raw_ < rhs.raw_;
920	}
921	bool operator==(PackedChunkItemPtr const& rhs) const {
922	return raw_ == rhs.raw_;
923	}
924	bool operator!=(PackedChunkItemPtr const& rhs) const {
925	return !(*this == rhs);
926	}
927
928	private:
929	uintptr_t raw_;
930	};
931
932	template <typename ChunkPtr>
933	class F14ItemIter {
934	private:
935	using Chunk = typename std::pointer_traits<ChunkPtr>::element_type;
936
937	public:
938	using Item = typename Chunk::Item;
939	using ItemPtr = typename std::pointer_traits<ChunkPtr>::template rebind<Item>;
940	using ItemConstPtr =
941	typename std::pointer_traits<ChunkPtr>::template rebind<Item const>;
942
943	using Packed = PackedChunkItemPtr<ItemPtr>;
944
945	//// PUBLIC
946
947	F14ItemIter() noexcept : itemPtr_{nullptr}, index_{`0`} {}
948
949	// default copy and move constructors and assignment operators are correct
950
951	explicit F14ItemIter(Packed const& packed)
952	: itemPtr_{packed.ptr()}, index_{packed.index()} {}
953
954	F14ItemIter(ChunkPtr chunk, std::size_t index)
955	: itemPtr_{std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index))},
956	index_{index} {
957	FOLLY_SAFE_DCHECK(index < Chunk::kCapacity, "");
958	assume(
959	std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index)) !=
960	nullptr);
961	assume(itemPtr_ != nullptr);
962	}
963
964	FOLLY_ALWAYS_INLINE void advanceImpl(bool checkEof, bool likelyDead) {
965	auto c = chunk();
966
967	// common case is packed entries
968	while (index_ > `0`) {
969	--index_;
970	--itemPtr_;
971	if (LIKELY(c->occupied(index_))) {
972	return;
973	}
974	}
975
976	// It's fairly common for an iterator to be advanced and then become
977	// dead, for example in the return value from erase(iter) or in
978	// the last step of a loop. We'd like to make sure that the entire
979	// advance() method can be eliminated by the compiler's dead code
980	// elimination pass. To do that it must eliminate the loops, which
981	// requires it to prove that they have no side effects. It's easy
982	// to show that there are no escaping stores, but at the moment
983	// compilers also consider an infinite loop to be a side effect.
984	// (There are parts of the standard that would allow them to treat
985	// this as undefined behavior, but at the moment they don't exploit
986	// those clauses.)
987	//
988	// The following loop should really be a while loop, which would
989	// save a register, some instructions, and a conditional branch,
990	// but by writing it as a for loop the compiler can prove to itself
991	// that it will eventually terminate. (No matter that even if the
992	// loop executed in a single cycle it would take about 200 years to
993	// run all 2^64 iterations.)
994	//
995	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82776 has the bug we
996	// filed about the issue. while (true) {
997	for (std::size_t i = `1`; !likelyDead \|\| i != `0`; ++i) {
998	if (checkEof) {
999	// exhausted the current chunk
1000	if (UNLIKELY(c->eof())) {
1001	FOLLY_SAFE_DCHECK(index_ == `0`, "");
1002	itemPtr_ = nullptr;
1003	return;
1004	}
1005	} else {
1006	FOLLY_SAFE_DCHECK(!c->eof(), "");
1007	}
1008	--c;
1009	auto last = c->lastOccupied();
1010	if (checkEof && !likelyDead) {
1011	prefetchAddr(&*c - `1`);
1012	}
1013	if (LIKELY(last.hasIndex())) {
1014	index_ = last.index();
1015	itemPtr_ = std::pointer_traits<ItemPtr>::pointer_to(c->item(index_));
1016	return;
1017	}
1018	}
1019	}
1020
1021	void precheckedAdvance() {
1022	advanceImpl(false, false);
1023	}
1024
1025	FOLLY_ALWAYS_INLINE void advance() {
1026	advanceImpl(true, false);
1027	}
1028
1029	FOLLY_ALWAYS_INLINE void advanceLikelyDead() {
1030	advanceImpl(true, true);
1031	}
1032
1033	ChunkPtr chunk() const {
1034	return std::pointer_traits<ChunkPtr>::pointer_to(
1035	Chunk::owner(*itemPtr_, index_));
1036	}
1037
1038	std::size_t index() const {
1039	return index_;
1040	}
1041
1042	Item* itemAddr() const {
1043	return std::addressof(*itemPtr_);
1044	}
1045	Item& item() const {
1046	return *itemPtr_;
1047	}
1048	Item const& citem() const {
1049	return *itemPtr_;
1050	}
1051
1052	bool atEnd() const {
1053	return itemPtr_ == nullptr;
1054	}
1055
1056	Packed pack() const {
1057	return Packed{itemPtr_, static_cast<uint8_t>(index_)};
1058	}
1059
1060	bool operator==(F14ItemIter const& rhs) const {
1061	// this form makes iter == end() into a single null check after inlining
1062	// and constant propagation
1063	return itemPtr_ == rhs.itemPtr_;
1064	}
1065
1066	bool operator!=(F14ItemIter const& rhs) const {
1067	return !(*this == rhs);
1068	}
1069
1070	private:
1071	ItemPtr itemPtr_;
1072	std::size_t index_;
1073	};
1074
1075	////////////////
1076
1077	template <typename SizeType, typename ItemIter, bool EnablePackedItemIter>
1078	struct SizeAndPackedBegin {
1079	SizeType size_{`0`};
1080
1081	private:
1082	typename ItemIter::Packed packedBegin_{ItemIter{}.pack()};
1083
1084	public:
1085	typename ItemIter::Packed& packedBegin() {
1086	return packedBegin_;
1087	}
1088
1089	typename ItemIter::Packed const& packedBegin() const {
1090	return packedBegin_;
1091	}
1092	};
1093
1094	template <typename SizeType, typename ItemIter>
1095	struct SizeAndPackedBegin<SizeType, ItemIter, false> {
1096	SizeType size_{`0`};
1097
1098	[[noreturn]] typename ItemIter::Packed& packedBegin() {
1099	assume_unreachable();
1100	}
1101
1102	[[noreturn]] typename ItemIter::Packed const& packedBegin() const {
1103	assume_unreachable();
1104	}
1105	};
1106
1107	template <typename Policy>
1108	class F14Table : public Policy {
1109	public:
1110	using Item = typename Policy::Item;
1111
1112	using value_type = typename Policy::Value;
1113	using allocator_type = typename Policy::Alloc;
1114
1115	private:
1116	using Alloc = typename Policy::Alloc;
1117	using AllocTraits = typename Policy::AllocTraits;
1118	using Hasher = typename Policy::Hasher;
1119	using InternalSizeType = typename Policy::InternalSizeType;
1120	using KeyEqual = typename Policy::KeyEqual;
1121
1122	using Policy::kAllocIsAlwaysEqual;
1123	using Policy::kContinuousCapacity;
1124	using Policy::kDefaultConstructIsNoexcept;
1125	using Policy::kEnableItemIteration;
1126	using Policy::kSwapIsNoexcept;
1127
1128	using Policy::destroyItemOnClear;
1129	using Policy::isAvalanchingHasher;
1130	using Policy::prefetchBeforeCopy;
1131	using Policy::prefetchBeforeDestroy;
1132	using Policy::prefetchBeforeRehash;
1133
1134	using ByteAlloc = typename AllocTraits::template rebind_alloc<uint8_t>;
1135	using BytePtr = typename std::allocator_traits<ByteAlloc>::pointer;
1136
1137	using Chunk = F14Chunk<Item>;
1138	using ChunkPtr =
1139	typename std::pointer_traits<BytePtr>::template rebind<Chunk>;
1140
1141	using HashPair = typename F14HashToken::HashPair;
1142
1143	public:
1144	using ItemIter = F14ItemIter<ChunkPtr>;
1145
1146	private:
1147	//////// begin fields
1148
1149	ChunkPtr chunks_{Chunk::emptyInstance()};
1150	InternalSizeType chunkMask_{`0`};
1151	SizeAndPackedBegin<InternalSizeType, ItemIter, kEnableItemIteration>
1152	sizeAndPackedBegin_;
1153
1154	//////// end fields
1155
1156	void swapContents(F14Table& rhs) noexcept {
1157	using std::swap;
1158	swap(chunks_, rhs.chunks_);
1159	swap(chunkMask_, rhs.chunkMask_);
1160	swap(sizeAndPackedBegin_.size_, rhs.sizeAndPackedBegin_.size_);
1161	if (kEnableItemIteration) {
1162	swap(
1163	sizeAndPackedBegin_.packedBegin(),
1164	rhs.sizeAndPackedBegin_.packedBegin());
1165	}
1166	}
1167
1168	public:
1169	F14Table(
1170	std::size_t initialCapacity,
1171	Hasher const& hasher,
1172	KeyEqual const& keyEqual,
1173	Alloc const& alloc)
1174	: Policy{hasher, keyEqual, alloc} {
1175	if (initialCapacity > `0`) {
1176	reserve(initialCapacity);
1177	}
1178	}
1179
1180	F14Table(F14Table const& rhs) : Policy{rhs} {
1181	buildFromF14Table(rhs);
1182	}
1183
1184	F14Table(F14Table const& rhs, Alloc const& alloc) : Policy{rhs, alloc} {
1185	buildFromF14Table(rhs);
1186	}
1187
1188	F14Table(F14Table&& rhs) noexcept(
1189	std::is_nothrow_move_constructible<Hasher>::value&&
1190	std::is_nothrow_move_constructible<KeyEqual>::value&&
1191	std::is_nothrow_move_constructible<Alloc>::value)
1192	: Policy{std::move(rhs)} {
1193	swapContents(rhs);
1194	}
1195
1196	F14Table(F14Table&& rhs, Alloc const& alloc) noexcept(kAllocIsAlwaysEqual)
1197	: Policy{std::move(rhs), alloc} {
1198	if (kAllocIsAlwaysEqual \|\| this->alloc() == rhs.alloc()) {
1199	// move storage (common case)
1200	swapContents(rhs);
1201	} else {
1202	// new storage because allocators unequal, move values (rare case)
1203	buildFromF14Table(std::move(rhs));
1204	}
1205	}
1206
1207	F14Table& operator=(F14Table const& rhs) {
1208	if (this != &rhs) {
1209	reset();
1210	static_cast<Policy&>(*this) = rhs;
1211	buildFromF14Table(rhs);
1212	}
1213	return *this;
1214	}
1215
1216	F14Table& operator=(F14Table&& rhs) noexcept(
1217	std::is_nothrow_move_assignable<Hasher>::value&&
1218	std::is_nothrow_move_assignable<KeyEqual>::value &&
1219	(kAllocIsAlwaysEqual \|\|
1220	(AllocTraits::propagate_on_container_move_assignment::value &&
1221	std::is_nothrow_move_assignable<Alloc>::value))) {
1222	if (this != &rhs) {
1223	reset();
1224	static_cast<Policy&>(*this) = std::move(rhs);
1225	if (AllocTraits::propagate_on_container_move_assignment::value \|\|
1226	kAllocIsAlwaysEqual \|\| this->alloc() == rhs.alloc()) {
1227	// move storage (common case)
1228	swapContents(rhs);
1229	} else {
1230	// new storage because allocators unequal, move values (rare case)
1231	buildFromF14Table(std::move(rhs));
1232	}
1233	}
1234	return *this;
1235	}
1236
1237	~F14Table() {
1238	reset();
1239	}
1240
1241	void swap(F14Table& rhs) noexcept(kSwapIsNoexcept) {
1242	// If propagate_on_container_swap is false and allocators are
1243	// not equal, the only way to accomplish a swap would be to do
1244	// dynamic allocation and then move (or swap) each contained value.
1245	// AllocatorAwareContainer-s are not supposed to attempt this, but
1246	// rather are supposed to have undefined behavior in that case.
1247	FOLLY_SAFE_CHECK(
1248	AllocTraits::propagate_on_container_swap::value \|\|
1249	kAllocIsAlwaysEqual \|\| this->alloc() == rhs.alloc(),
1250	"swap is undefined for unequal non-propagating allocators");
1251	this->swapPolicy(rhs);
1252	swapContents(rhs);
1253	}
1254
1255	private:
1256	//////// hash helpers
1257
1258	// Hash values are used to compute the desired position, which is the
1259	// chunk index at which we would like to place a value (if there is no
1260	// overflow), and the tag, which is an additional 8 bits of entropy.
1261	//
1262	// The standard's definition of hash function quality only refers to
1263	// the probability of collisions of the entire hash value, not to the
1264	// probability of collisions of the results of shifting or masking the
1265	// hash value. Some hash functions, however, provide this stronger
1266	// guarantee (not quite the same as the definition of avalanching,
1267	// but similar).
1268	//
1269	// If the user-supplied hasher is an avalanching one (each bit of the
1270	// hash value has a 50% chance of being the same for differing hash
1271	// inputs), then we can just take 1 byte of the hash value for the tag
1272	// and the rest for the desired position. Avalanching hashers also
1273	// let us map hash value to array index position with just a bitmask
1274	// without risking clumping. (Many hash tables just accept the risk
1275	// and do it regardless.)
1276	//
1277	// std::hash<std::string> avalanches in all implementations we've
1278	// examined: libstdc++-v3 uses MurmurHash2, and libc++ uses CityHash
1279	// or MurmurHash2. The other std::hash specializations, however, do not
1280	// have this property. std::hash for integral and pointer values is the
1281	// identity function on libstdc++-v3 and libc++, in particular. In our
1282	// experience it is also fairly common for user-defined specializations
1283	// of std::hash to combine fields in an ad-hoc way that does not evenly
1284	// distribute entropy among the bits of the result (a + 37 b, for*
1285	// example, where a and b are integer fields).
1286	//
1287	// For hash functions we don't trust to avalanche, we repair things by
1288	// applying a bit mixer to the user-supplied hash.
1289
1290	#if FOLLY_X64 \|\| FOLLY_AARCH64
1291	// 64-bit
1292	static HashPair splitHash(std::size_t hash) {
1293	static_assert(sizeof(std::size_t) == sizeof(uint64_t), "");
1294	std::size_t tag;
1295	if (!isAvalanchingHasher()) {
1296	#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
1297	#if FOLLY_SSE
1298	// SSE4.2 CRC
1299	std::size_t c = _mm_crc32_u64(`0`, hash);
1300	tag = (c >> `24`) \| `0x80`;
1301	hash += c;
1302	#else
1303	// CRC is optional on armv8 (-march=armv8-a+crc), standard on armv8.1
1304	std::size_t c = __crc32cd(`0`, hash);
1305	tag = (c >> `24`) \| `0x80`;
1306	hash += c;
1307	#endif
1308	#else
1309	// The mixer below is not fully avalanching for all 64 bits of
1310	// output, but looks quite good for bits 18..63 and puts plenty
1311	// of entropy even lower when considering multiple bits together
1312	// (like the tag). Importantly, when under register pressure it
1313	// uses fewer registers, instructions, and immediate constants
1314	// than the alternatives, resulting in compact code that is more
1315	// easily inlinable. In one instantiation a modified Murmur mixer
1316	// was 48 bytes of assembly (even after using the same multiplicand
1317	// for both steps) and this one was 27 bytes, for example.
1318	auto const kMul = `0xc4ceb9fe1a85ec53ULL`;
1319	#ifdef _WIN32
1320	__int64 signedHi;
1321	__int64 signedLo = _mul128(
1322	static_cast<__int64>(hash), static_cast<__int64>(kMul), &signedHi);
1323	auto hi = static_cast<uint64_t>(signedHi);
1324	auto lo = static_cast<uint64_t>(signedLo);
1325	#else
1326	auto hi = static_cast<uint64_t>(
1327	(static_cast<unsigned __int128>(hash) * kMul) >> `64`);
1328	auto lo = hash * kMul;
1329	#endif
1330	hash = hi ^ lo;
1331	hash *= kMul;
1332	tag = ((hash >> `15`) & `0x7f`) \| `0x80`;
1333	hash >>= `22`;
1334	#endif
1335	} else {
1336	// we don't trust the top bit
1337	tag = (hash >> `56`) \| `0x80`;
1338	}
1339	return std::make_pair(hash, tag);
1340	}
1341	#else
1342	// 32-bit
1343	static HashPair splitHash(std::size_t hash) {
1344	static_assert(sizeof(std::size_t) == sizeof(uint32_t), "");
1345	uint8_t tag;
1346	if (!isAvalanchingHasher()) {
1347	#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
1348	#if FOLLY_SSE
1349	// SSE4.2 CRC
1350	auto c = _mm_crc32_u32(`0`, hash);
1351	tag = static_cast<uint8_t>(~(c >> `25`));
1352	hash += c;
1353	#else
1354	auto c = __crc32cw(`0`, hash);
1355	tag = static_cast<uint8_t>(~(c >> `25`));
1356	hash += c;
1357	#endif
1358	#else
1359	// finalizer for 32-bit murmur2
1360	hash ^= hash >> `13`;
1361	hash *= `0x5bd1e995`;
1362	hash ^= hash >> `15`;
1363	tag = static_cast<uint8_t>(~(hash >> `25`));
1364	#endif
1365	} else {
1366	// we don't trust the top bit
1367	tag = (hash >> `24`) \| `0x80`;
1368	}
1369	return std::make_pair(hash, tag);
1370	}
1371	#endif
1372
1373	//////// memory management helpers
1374
1375	static std::size_t computeCapacity(
1376	std::size_t chunkCount,
1377	std::size_t scale) {
1378	FOLLY_SAFE_DCHECK(!(chunkCount > `1` && scale == `0`), "");
1379	FOLLY_SAFE_DCHECK(
1380	scale < (std::size_t{`1`} << Chunk::kCapacityScaleBits), "");
1381	FOLLY_SAFE_DCHECK((chunkCount & (chunkCount - `1`)) == `0`, "");
1382	return (((chunkCount - `1`) >> Chunk::kCapacityScaleShift) + `1`) * scale;
1383	}
1384
1385	std::pair<std::size_t, std::size_t> computeChunkCountAndScale(
1386	std::size_t desiredCapacity,
1387	bool continuousSingleChunkCapacity,
1388	bool continuousMultiChunkCapacity) const {
1389	if (desiredCapacity <= Chunk::kCapacity) {
1390	// we can go to 100% capacity in a single chunk with no problem
1391	if (!continuousSingleChunkCapacity) {
1392	if (desiredCapacity <= `2`) {
1393	desiredCapacity = `2`;
1394	} else if (desiredCapacity <= `6`) {
1395	desiredCapacity = `6`;
1396	} else {
1397	desiredCapacity = Chunk::kCapacity;
1398	}
1399	}
1400	auto rv = std::make_pair(std::size_t{`1`}, desiredCapacity);
1401	FOLLY_SAFE_DCHECK(
1402	computeCapacity(rv.first, rv.second) == desiredCapacity, "");
1403	return rv;
1404	} else {
1405	std::size_t minChunks =
1406	(desiredCapacity - `1`) / Chunk::kDesiredCapacity + `1`;
1407	std::size_t chunkPow = findLastSet(minChunks - `1`);
1408	if (chunkPow == `8` * sizeof(std::size_t)) {
1409	throw_exception<std::bad_alloc>();
1410	}
1411
1412	std::size_t chunkCount = std::size_t{`1`} << chunkPow;
1413
1414	// Let cc scale be the actual capacity.*
1415	// cc = ((chunkCount - 1) >> kCapacityScaleShift) + 1.
1416	// If chunkPow >= kCapacityScaleShift, then cc = chunkCount >>
1417	// kCapacityScaleShift = 1 << (chunkPow - kCapacityScaleShift),
1418	// otherwise it equals 1 = 1 << 0. Let cc = 1 << ss.
1419	std::size_t ss = chunkPow >= Chunk::kCapacityScaleShift
1420	? chunkPow - Chunk::kCapacityScaleShift
1421	: `0`;
1422
1423	std::size_t scale;
1424	if (continuousMultiChunkCapacity) {
1425	// (1 << ss) scale >= desiredCapacity*
1426	scale = ((desiredCapacity - `1`) >> ss) + `1`;
1427	} else {
1428	// (1 << ss) scale == chunkCount * kDesiredCapacity*
1429	scale = Chunk::kDesiredCapacity << (chunkPow - ss);
1430	}
1431
1432	std::size_t actualCapacity = computeCapacity(chunkCount, scale);
1433	FOLLY_SAFE_DCHECK(actualCapacity >= desiredCapacity, "");
1434	if (actualCapacity > max_size()) {
1435	throw_exception<std::bad_alloc>();
1436	}
1437
1438	return std::make_pair(chunkCount, scale);
1439	}
1440	}
1441
1442	static std::size_t chunkAllocSize(
1443	std::size_t chunkCount,
1444	std::size_t capacityScale) {
1445	FOLLY_SAFE_DCHECK(chunkCount > `0`, "");
1446	FOLLY_SAFE_DCHECK(!(chunkCount > `1` && capacityScale == `0`), "");
1447	if (chunkCount == `1`) {
1448	static_assert(offsetof(Chunk, rawItems_) == `16`, "");
1449	return `16` + sizeof(Item) * computeCapacity(`1`, capacityScale);
1450	} else {
1451	return sizeof(Chunk) * chunkCount;
1452	}
1453	}
1454
1455	ChunkPtr initializeChunks(
1456	BytePtr raw,
1457	std::size_t chunkCount,
1458	std::size_t capacityScale) {
1459	static_assert(std::is_trivial<Chunk>::value, "F14Chunk should be POD");
1460	auto chunks = static_cast<Chunk>(static_cast<void*>(&raw));
1461	for (std::size_t i = `0`; i < chunkCount; ++i) {
1462	chunks[i].clear();
1463	}
1464	chunks[`0`].markEof(capacityScale);
1465	return std::pointer_traits<ChunkPtr>::pointer_to(*chunks);
1466	}
1467
1468	std::size_t itemCount() const noexcept {
1469	if (chunkMask_ == `0`) {
1470	return computeCapacity(`1`, chunks_->capacityScale());
1471	} else {
1472	return (chunkMask_ + `1`) * Chunk::kCapacity;
1473	}
1474	}
1475
1476	public:
1477	ItemIter begin() const noexcept {
1478	FOLLY_SAFE_DCHECK(kEnableItemIteration, "");
1479	return ItemIter{sizeAndPackedBegin_.packedBegin()};
1480	}
1481
1482	ItemIter end() const noexcept {
1483	return ItemIter{};
1484	}
1485
1486	bool empty() const noexcept {
1487	return size() == `0`;
1488	}
1489
1490	InternalSizeType size() const noexcept {
1491	return sizeAndPackedBegin_.size_;
1492	}
1493
1494	std::size_t max_size() const noexcept {
1495	auto& a = this->alloc();
1496	return std::min<std::size_t>(
1497	(std::numeric_limits<InternalSizeType>::max)(),
1498	AllocTraits::max_size(a));
1499	}
1500
1501	std::size_t bucket_count() const noexcept {
1502	return computeCapacity(chunkMask_ + `1`, chunks_->capacityScale());
1503	}
1504
1505	std::size_t max_bucket_count() const noexcept {
1506	return max_size();
1507	}
1508
1509	float load_factor() const noexcept {
1510	return empty()
1511	? `0.0f`
1512	: static_cast<float>(size()) / static_cast<float>(bucket_count());
1513	}
1514
1515	float max_load_factor() const noexcept {
1516	return `1.0f`;
1517	}
1518
1519	void max_load_factor(float) noexcept {
1520	// Probing hash tables can't run load factors >= 1 (unlike chaining
1521	// tables). In addition, we have measured that there is little or
1522	// no performance advantage to running a smaller load factor (cache
1523	// locality losses outweigh the small reduction in probe lengths,
1524	// often making it slower). Therefore, we've decided to just fix
1525	// max_load_factor at 1.0f regardless of what the user requests.
1526	// This has an additional advantage that we don't have to store it.
1527	// Taking alignment into consideration this makes every F14 table
1528	// 8 bytes smaller, and is part of the reason an empty F14NodeMap
1529	// is almost half the size of an empty std::unordered_map (32 vs
1530	// 56 bytes).
1531	//
1532	// I don't have a strong opinion on whether we should remove this
1533	// method or leave a stub, let ngbronson or xshi know if you have a
1534	// compelling argument either way.
1535	}
1536
1537	private:
1538	// Our probe strategy is to advance through additional chunks with
1539	// a stride that is key-specific. This is called double hashing,
1540	// and is a well known and high quality probing strategy. So long as
1541	// the stride and the chunk count are relatively prime, we will visit
1542	// every chunk once and then return to the original chunk, letting us
1543	// detect and end the cycle. The chunk count is a power of two, so
1544	// we can satisfy the relatively prime part by choosing an odd stride.
1545	// We've already computed a high quality secondary hash value for the
1546	// tag, so we just use it for the second probe hash as well.
1547	//
1548	// At the maximum load factor of 12/14, expected probe length for a
1549	// find hit is 1.041, with 99% of keys found in the first three chunks.
1550	// Expected probe length for a find miss (or insert) is 1.275, with a
1551	// p99 probe length of 4 (fewer than 1% of failing find look at 5 or
1552	// more chunks).
1553	//
1554	// This code is structured so you can try various ways of encoding
1555	// the current probe state. For example, at the moment the probe's
1556	// state is the position in the cycle and the resulting chunk index is
1557	// computed from that inside probeCurrentIndex. We could also make the
1558	// probe state the chunk index, and then increment it by hp.second *
1559	// 2 + 1 in probeAdvance. Wrapping can be applied early or late as
1560	// well. This particular code seems to be easier for the optimizer
1561	// to understand.
1562	//
1563	// We could also implement probing strategies that resulted in the same
1564	// tour for every key initially assigned to a chunk (linear probing or
1565	// quadratic), but that results in longer probe lengths. In particular,
1566	// the cache locality wins of linear probing are not worth the increase
1567	// in probe lengths (extra work and less branch predictability) in
1568	// our experiments.
1569
1570	std::size_t probeDelta(HashPair hp) const {
1571	return `2` * hp.second + `1`;
1572	}
1573
1574	template <typename K>
1575	FOLLY_ALWAYS_INLINE ItemIter findImpl(HashPair hp, K const& key) const {
1576	std::size_t index = hp.first;
1577	std::size_t step = probeDelta(hp);
1578	for (std::size_t tries = `0`; tries <= chunkMask_; ++tries) {
1579	ChunkPtr chunk = chunks_ + (index & chunkMask_);
1580	if (sizeof(Chunk) > `64`) {
1581	prefetchAddr(chunk->itemAddr(`8`));
1582	}
1583	auto hits = chunk->tagMatchIter(hp.second);
1584	while (hits.hasNext()) {
1585	auto i = hits.next();
1586	if (LIKELY(this->keyMatchesItem(key, chunk->item(i)))) {
1587	// Tag match and key match were both successful. The chance
1588	// of a false tag match is 1/128 for each key in the chunk
1589	// (with a proper hash function).
1590	return ItemIter{chunk, i};
1591	}
1592	}
1593	if (LIKELY(chunk->outboundOverflowCount() == `0`)) {
1594	// No keys that wanted to be placed in this chunk were denied
1595	// entry, so our search is over. This is the common case.
1596	break;
1597	}
1598	index += step;
1599	}
1600	// Loop exit because tries is exhausted is rare, but possible.
1601	// That means that for every chunk there is currently a key present
1602	// in the map that visited that chunk on its probe search but ended
1603	// up somewhere else, and we have searched every chunk.
1604	return ItemIter{};
1605	}
1606
1607	public:
1608	// Prehashing splits the work of find(key) into two calls, enabling you
1609	// to manually implement loop pipelining for hot bulk lookups. prehash
1610	// computes the hash and prefetches the first computed memory location,
1611	// and the two-arg find(F14HashToken,K) performs the rest of the search.
1612	template <typename K>
1613	F14HashToken prehash(K const& key) const {
1614	FOLLY_SAFE_DCHECK(chunks_ != nullptr, "");
1615	auto hp = splitHash(this->computeKeyHash(key));
1616	ChunkPtr firstChunk = chunks_ + (hp.first & chunkMask_);
1617	prefetchAddr(firstChunk);
1618	return F14HashToken(std::move(hp));
1619	}
1620
1621	template <typename K>
1622	FOLLY_ALWAYS_INLINE ItemIter find(K const& key) const {
1623	auto hp = splitHash(this->computeKeyHash(key));
1624	return findImpl(hp, key);
1625	}
1626
1627	template <typename K>
1628	FOLLY_ALWAYS_INLINE ItemIter
1629	find(F14HashToken const& token, K const& key) const {
1630	FOLLY_SAFE_DCHECK(
1631	splitHash(this->computeKeyHash(key)) == static_cast<HashPair>(token),
1632	"");
1633	return findImpl(static_cast<HashPair>(token), key);
1634	}
1635
1636	private:
1637	void adjustSizeAndBeginAfterInsert(ItemIter iter) {
1638	if (kEnableItemIteration) {
1639	// packedBegin is the max of all valid ItemIter::pack()
1640	auto packed = iter.pack();
1641	if (sizeAndPackedBegin_.packedBegin() < packed) {
1642	sizeAndPackedBegin_.packedBegin() = packed;
1643	}
1644	}
1645
1646	++sizeAndPackedBegin_.size_;
1647	}
1648
1649	// Ignores hp if pos.chunk()->hostedOverflowCount() == 0
1650	void eraseBlank(ItemIter iter, HashPair hp) {
1651	iter.chunk()->clearTag(iter.index());
1652
1653	if (iter.chunk()->hostedOverflowCount() != `0`) {
1654	// clean up
1655	std::size_t index = hp.first;
1656	std::size_t delta = probeDelta(hp);
1657	uint8_t hostedOp = `0`;
1658	while (true) {
1659	ChunkPtr chunk = chunks_ + (index & chunkMask_);
1660	if (chunk == iter.chunk()) {
1661	chunk->adjustHostedOverflowCount(hostedOp);
1662	break;
1663	}
1664	chunk->decrOutboundOverflowCount();
1665	hostedOp = Chunk::kDecrHostedOverflowCount;
1666	index += delta;
1667	}
1668	}
1669	}
1670
1671	void adjustSizeAndBeginBeforeErase(ItemIter iter) {
1672	--sizeAndPackedBegin_.size_;
1673	if (kEnableItemIteration) {
1674	if (iter.pack() == sizeAndPackedBegin_.packedBegin()) {
1675	if (size() == `0`) {
1676	iter = ItemIter{};
1677	} else {
1678	iter.precheckedAdvance();
1679	}
1680	sizeAndPackedBegin_.packedBegin() = iter.pack();
1681	}
1682	}
1683	}
1684
1685	template <typename... Args>
1686	void insertAtBlank(ItemIter pos, HashPair hp, Args&&... args) {
1687	try {
1688	auto dst = pos.itemAddr();
1689	this->constructValueAtItem(*this, dst, std::forward<Args>(args)...);
1690	} catch (...) {
1691	eraseBlank(pos, hp);
1692	throw;
1693	}
1694	adjustSizeAndBeginAfterInsert(pos);
1695	}
1696
1697	ItemIter allocateTag(uint8_t* fullness, HashPair hp) {
1698	ChunkPtr chunk;
1699	std::size_t index = hp.first;
1700	std::size_t delta = probeDelta(hp);
1701	uint8_t hostedOp = `0`;
1702	while (true) {
1703	index &= chunkMask_;
1704	chunk = chunks_ + index;
1705	if (LIKELY(fullness[index] < Chunk::kCapacity)) {
1706	break;
1707	}
1708	chunk->incrOutboundOverflowCount();
1709	hostedOp = Chunk::kIncrHostedOverflowCount;
1710	index += delta;
1711	}
1712	unsigned itemIndex = fullness[index]++;
1713	FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
1714	chunk->setTag(itemIndex, hp.second);
1715	chunk->adjustHostedOverflowCount(hostedOp);
1716	return ItemIter{chunk, itemIndex};
1717	}
1718
1719	ChunkPtr lastOccupiedChunk() const {
1720	FOLLY_SAFE_DCHECK(size() > `0`, "");
1721	if (kEnableItemIteration) {
1722	return begin().chunk();
1723	} else {
1724	return chunks_ + chunkMask_;
1725	}
1726	}
1727
1728	template <typename T>
1729	void directBuildFrom(T&& src) {
1730	FOLLY_SAFE_DCHECK(src.size() > `0` && chunkMask_ == src.chunkMask_, "");
1731
1732	// We use std::forward<T> to allow portions of src to be moved out by
1733	// either beforeBuild or afterBuild, but we are just relying on good
1734	// behavior of our Policy superclass to ensure that any particular
1735	// field of this is a donor at most once.
1736
1737	auto undoState =
1738	this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
1739	bool success = false;
1740	SCOPE_EXIT {
1741	this->afterBuild(
1742	undoState, success, src.size(), bucket_count(), std::forward<T>(src));
1743	};
1744
1745	// Copy can fail part-way through if a Value copy constructor throws.
1746	// Failing afterBuild is limited in its cleanup power in this case,
1747	// because it can't enumerate the items that were actually copied.
1748	// Fortunately we can divide the situation into cases where all of
1749	// the state is owned by the table itself (F14Node and F14Value),
1750	// for which clearImpl() can do partial cleanup, and cases where all
1751	// of the values are owned by the policy (F14Vector), in which case
1752	// partial failure should not occur. Sorry for the subtle invariants
1753	// in the Policy API.
1754
1755	if (is_trivially_copyable<Item>::value && !this->destroyItemOnClear() &&
1756	itemCount() == src.itemCount()) {
1757	FOLLY_SAFE_DCHECK(chunkMask_ == src.chunkMask_, "");
1758
1759	auto scale = chunks_->capacityScale();
1760
1761	// most happy path
1762	auto n = chunkAllocSize(chunkMask_ + `1`, scale);
1763	std::memcpy(&chunks_[`0`], &src.chunks_[`0`], n);
1764	sizeAndPackedBegin_.size_ = src.size();
1765	if (kEnableItemIteration) {
1766	auto srcBegin = src.begin();
1767	sizeAndPackedBegin_.packedBegin() =
1768	ItemIter{chunks_ + (srcBegin.chunk() - src.chunks_),
1769	srcBegin.index()}
1770	.pack();
1771	}
1772	if (kContinuousCapacity) {
1773	// capacityScale might not match even if itemCount matches
1774	chunks_->setCapacityScale(scale);
1775	}
1776	} else {
1777	std::size_t maxChunkIndex = src.lastOccupiedChunk() - src.chunks_;
1778
1779	// happy path, no rehash but pack items toward bottom of chunk and
1780	// use copy constructor
1781	auto srcChunk = &src.chunks_[maxChunkIndex];
1782	Chunk* dstChunk = &chunks_[maxChunkIndex];
1783	do {
1784	dstChunk->copyOverflowInfoFrom(*srcChunk);
1785
1786	auto iter = srcChunk->occupiedIter();
1787	if (prefetchBeforeCopy()) {
1788	for (auto piter = iter; piter.hasNext();) {
1789	this->prefetchValue(srcChunk->citem(piter.next()));
1790	}
1791	}
1792
1793	std::size_t dstI = `0`;
1794	for (; iter.hasNext(); ++dstI) {
1795	auto srcI = iter.next();
1796	auto&& srcArg =
1797	std::forward<T>(src).buildArgForItem(srcChunk->item(srcI));
1798	auto dst = dstChunk->itemAddr(dstI);
1799	this->constructValueAtItem(
1800	`0`, dst, std::forward<decltype(srcArg)>(srcArg));
1801	dstChunk->setTag(dstI, srcChunk->tag(srcI));
1802	++sizeAndPackedBegin_.size_;
1803	}
1804
1805	--srcChunk;
1806	--dstChunk;
1807	} while (size() != src.size());
1808
1809	// reset doesn't care about packedBegin, so we don't fix it until the end
1810	if (kEnableItemIteration) {
1811	sizeAndPackedBegin_.packedBegin() =
1812	ItemIter{chunks_ + maxChunkIndex,
1813	chunks_[maxChunkIndex].lastOccupied().index()}
1814	.pack();
1815	}
1816	}
1817
1818	success = true;
1819	}
1820
1821	template <typename T>
1822	void rehashBuildFrom(T&& src) {
1823	FOLLY_SAFE_DCHECK(src.chunkMask_ > chunkMask_, "");
1824
1825	// 1 byte per chunk means < 1 bit per value temporary overhead
1826	std::array<uint8_t, `256`> stackBuf;
1827	uint8_t* fullness;
1828	auto cc = chunkMask_ + `1`;
1829	if (cc <= stackBuf.size()) {
1830	fullness = stackBuf.data();
1831	} else {
1832	ByteAlloc a{this->alloc()};
1833	fullness = &*std::allocator_traits<ByteAlloc>::allocate(a, cc);
1834	}
1835	SCOPE_EXIT {
1836	if (cc > stackBuf.size()) {
1837	ByteAlloc a{this->alloc()};
1838	std::allocator_traits<ByteAlloc>::deallocate(
1839	a,
1840	std::pointer_traits<typename std::allocator_traits<
1841	ByteAlloc>::pointer>::pointer_to(*fullness),
1842	cc);
1843	}
1844	};
1845	std::memset(fullness, `'\0'`, cc);
1846
1847	// We use std::forward<T> to allow portions of src to be moved out by
1848	// either beforeBuild or afterBuild, but we are just relying on good
1849	// behavior of our Policy superclass to ensure that any particular
1850	// field of this is a donor at most once.
1851
1852	// Exception safety requires beforeBuild to happen after all of the
1853	// allocate() calls.
1854	auto undoState =
1855	this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
1856	bool success = false;
1857	SCOPE_EXIT {
1858	this->afterBuild(
1859	undoState, success, src.size(), bucket_count(), std::forward<T>(src));
1860	};
1861
1862	// The current table is at a valid state at all points for policies
1863	// in which non-trivial values are owned by the main table (F14Node
1864	// and F14Value), so reset() will clean things up properly if we
1865	// fail partway through. For the case that the policy manages value
1866	// lifecycle (F14Vector) then nothing after beforeBuild can throw and
1867	// we don't have to worry about partial failure.
1868
1869	std::size_t srcChunkIndex = src.lastOccupiedChunk() - src.chunks_;
1870	while (true) {
1871	auto srcChunk = &src.chunks_[srcChunkIndex];
1872	auto iter = srcChunk->occupiedIter();
1873	if (prefetchBeforeRehash()) {
1874	for (auto piter = iter; piter.hasNext();) {
1875	this->prefetchValue(srcChunk->item(piter.next()));
1876	}
1877	}
1878	if (srcChunk->hostedOverflowCount() == `0`) {
1879	// all items are in their preferred chunk (no probing), so we
1880	// don't need to compute any hash values
1881	while (iter.hasNext()) {
1882	auto i = iter.next();
1883	auto& srcItem = srcChunk->item(i);
1884	auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
1885	HashPair hp{srcChunkIndex, srcChunk->tag(i)};
1886	insertAtBlank(
1887	allocateTag(fullness, hp),
1888	hp,
1889	std::forward<decltype(srcArg)>(srcArg));
1890	}
1891	} else {
1892	// any chunk's items might be in here
1893	while (iter.hasNext()) {
1894	auto i = iter.next();
1895	auto& srcItem = srcChunk->item(i);
1896	auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
1897	auto const& srcKey = src.keyForValue(srcArg);
1898	auto hp = splitHash(this->computeKeyHash(srcKey));
1899	FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(i), "");
1900	insertAtBlank(
1901	allocateTag(fullness, hp),
1902	hp,
1903	std::forward<decltype(srcArg)>(srcArg));
1904	}
1905	}
1906	if (srcChunkIndex == `0`) {
1907	break;
1908	}
1909	--srcChunkIndex;
1910	}
1911
1912	success = true;
1913	}
1914
1915	template <typename T>
1916	FOLLY_NOINLINE void buildFromF14Table(T&& src) {
1917	FOLLY_SAFE_DCHECK(bucket_count() == `0`, "");
1918	if (src.size() == `0`) {
1919	return;
1920	}
1921
1922	// Use the source's capacity, unless it is oversized.
1923	auto upperLimit = computeChunkCountAndScale(src.size(), false, false);
1924	auto ccas =
1925	std::make_pair(src.chunkMask_ + `1`, src.chunks_->capacityScale());
1926	FOLLY_SAFE_DCHECK(
1927	ccas.first >= upperLimit.first,
1928	"rounded chunk count can't be bigger than actual");
1929	if (ccas.first > upperLimit.first \|\| ccas.second > upperLimit.second) {
1930	ccas = upperLimit;
1931	}
1932	rehashImpl(`0`, `1`, `0`, ccas.first, ccas.second);
1933
1934	try {
1935	if (chunkMask_ == src.chunkMask_) {
1936	directBuildFrom(std::forward<T>(src));
1937	} else {
1938	rehashBuildFrom(std::forward<T>(src));
1939	}
1940	} catch (...) {
1941	reset();
1942	F14LinkCheck<getF14IntrinsicsMode()>::check();
1943	throw;
1944	}
1945	}
1946
1947	void reserveImpl(std::size_t desiredCapacity) {
1948	desiredCapacity = std::max<std::size_t>(desiredCapacity, size());
1949	if (desiredCapacity == `0`) {
1950	reset();
1951	return;
1952	}
1953
1954	auto origChunkCount = chunkMask_ + `1`;
1955	auto origCapacityScale = chunks_->capacityScale();
1956	auto origCapacity = computeCapacity(origChunkCount, origCapacityScale);
1957
1958	// This came from an explicit reserve() or rehash() call, so there's
1959	// a good chance the capacity is exactly right. To avoid O(n^2)
1960	// behavior, we don't do rehashes that decrease the size by less
1961	// than 1/8, and if we have a requested increase of less than 1/8 we
1962	// instead go to the next power of two.
1963
1964	if (desiredCapacity <= origCapacity &&
1965	desiredCapacity >= origCapacity - origCapacity / `8`) {
1966	return;
1967	}
1968	bool attemptExact =
1969	!(desiredCapacity > origCapacity &&
1970	desiredCapacity < origCapacity + origCapacity / `8`);
1971
1972	std::size_t newChunkCount;
1973	std::size_t newCapacityScale;
1974	std::tie(newChunkCount, newCapacityScale) = computeChunkCountAndScale(
1975	desiredCapacity, attemptExact, kContinuousCapacity && attemptExact);
1976	auto newCapacity = computeCapacity(newChunkCount, newCapacityScale);
1977
1978	if (origCapacity != newCapacity) {
1979	rehashImpl(
1980	size(),
1981	origChunkCount,
1982	origCapacityScale,
1983	newChunkCount,
1984	newCapacityScale);
1985	}
1986	}
1987
1988	FOLLY_NOINLINE void reserveForInsertImpl(
1989	std::size_t capacityMinusOne,
1990	std::size_t origChunkCount,
1991	std::size_t origCapacityScale,
1992	std::size_t origCapacity) {
1993	FOLLY_SAFE_DCHECK(capacityMinusOne >= size(), "");
1994	std::size_t capacity = capacityMinusOne + `1`;
1995
1996	// we want to grow by between 2^0.5 and 2^1.5 ending at a "good"
1997	// size, so we grow by 2^0.5 and then round up
1998
1999	// 1.01101_2 = 1.40625
2000	std::size_t minGrowth = origCapacity + (origCapacity >> `2`) +
2001	(origCapacity >> `3`) + (origCapacity >> `5`);
2002	capacity = std::max<std::size_t>(capacity, minGrowth);
2003
2004	std::size_t newChunkCount;
2005	std::size_t newCapacityScale;
2006	std::tie(newChunkCount, newCapacityScale) =
2007	computeChunkCountAndScale(capacity, false, false);
2008
2009	FOLLY_SAFE_DCHECK(
2010	computeCapacity(newChunkCount, newCapacityScale) > origCapacity, "");
2011
2012	rehashImpl(
2013	size(),
2014	origChunkCount,
2015	origCapacityScale,
2016	newChunkCount,
2017	newCapacityScale);
2018	}
2019
2020	void rehashImpl(
2021	std::size_t origSize,
2022	std::size_t origChunkCount,
2023	std::size_t origCapacityScale,
2024	std::size_t newChunkCount,
2025	std::size_t newCapacityScale) {
2026	auto origChunks = chunks_;
2027	auto origCapacity = computeCapacity(origChunkCount, origCapacityScale);
2028	auto origAllocSize = chunkAllocSize(origChunkCount, origCapacityScale);
2029	auto newCapacity = computeCapacity(newChunkCount, newCapacityScale);
2030	auto newAllocSize = chunkAllocSize(newChunkCount, newCapacityScale);
2031
2032	BytePtr rawAllocation;
2033	auto undoState = this->beforeRehash(
2034	origSize, origCapacity, newCapacity, newAllocSize, rawAllocation);
2035	chunks_ = initializeChunks(rawAllocation, newChunkCount, newCapacityScale);
2036
2037	FOLLY_SAFE_DCHECK(
2038	newChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
2039	chunkMask_ = static_cast<InternalSizeType>(newChunkCount - `1`);
2040
2041	bool success = false;
2042	SCOPE_EXIT {
2043	// this SCOPE_EXIT reverts chunks_ and chunkMask_ if necessary
2044	BytePtr finishedRawAllocation = nullptr;
2045	std::size_t finishedAllocSize = `0`;
2046	if (LIKELY(success)) {
2047	if (origCapacity > `0`) {
2048	finishedRawAllocation = std::pointer_traits<BytePtr>::pointer_to(
2049	*static_cast<uint8_t>(static_cast<void*>(&origChunks)));
2050	finishedAllocSize = origAllocSize;
2051	}
2052	} else {
2053	finishedRawAllocation = rawAllocation;
2054	finishedAllocSize = newAllocSize;
2055	chunks_ = origChunks;
2056	FOLLY_SAFE_DCHECK(
2057	origChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
2058	chunkMask_ = static_cast<InternalSizeType>(origChunkCount - `1`);
2059	F14LinkCheck<getF14IntrinsicsMode()>::check();
2060	}
2061
2062	this->afterRehash(
2063	std::move(undoState),
2064	success,
2065	origSize,
2066	origCapacity,
2067	newCapacity,
2068	finishedRawAllocation,
2069	finishedAllocSize);
2070	};
2071
2072	if (origSize == `0`) {
2073	// nothing to do
2074	} else if (origChunkCount == `1` && newChunkCount == `1`) {
2075	// no mask, no chunk scan, no hash computation, no probing
2076	auto srcChunk = origChunks;
2077	auto dstChunk = chunks_;
2078	std::size_t srcI = `0`;
2079	std::size_t dstI = `0`;
2080	while (dstI < origSize) {
2081	if (LIKELY(srcChunk->occupied(srcI))) {
2082	dstChunk->setTag(dstI, srcChunk->tag(srcI));
2083	this->moveItemDuringRehash(
2084	dstChunk->itemAddr(dstI), srcChunk->item(srcI));
2085	++dstI;
2086	}
2087	++srcI;
2088	}
2089	if (kEnableItemIteration) {
2090	sizeAndPackedBegin_.packedBegin() = ItemIter{dstChunk, dstI - `1`}.pack();
2091	}
2092	} else {
2093	// 1 byte per chunk means < 1 bit per value temporary overhead
2094	std::array<uint8_t, `256`> stackBuf;
2095	uint8_t* fullness;
2096	if (newChunkCount <= stackBuf.size()) {
2097	fullness = stackBuf.data();
2098	} else {
2099	ByteAlloc a{this->alloc()};
2100	// may throw
2101	fullness =
2102	&*std::allocator_traits<ByteAlloc>::allocate(a, newChunkCount);
2103	}
2104	std::memset(fullness, `'\0'`, newChunkCount);
2105	SCOPE_EXIT {
2106	if (newChunkCount > stackBuf.size()) {
2107	ByteAlloc a{this->alloc()};
2108	std::allocator_traits<ByteAlloc>::deallocate(
2109	a,
2110	std::pointer_traits<typename std::allocator_traits<
2111	ByteAlloc>::pointer>::pointer_to(*fullness),
2112	newChunkCount);
2113	}
2114	};
2115
2116	auto srcChunk = origChunks + origChunkCount - `1`;
2117	std::size_t remaining = origSize;
2118	while (remaining > `0`) {
2119	auto iter = srcChunk->occupiedIter();
2120	if (prefetchBeforeRehash()) {
2121	for (auto piter = iter; piter.hasNext();) {
2122	this->prefetchValue(srcChunk->item(piter.next()));
2123	}
2124	}
2125	while (iter.hasNext()) {
2126	--remaining;
2127	auto srcI = iter.next();
2128	Item& srcItem = srcChunk->item(srcI);
2129	auto hp = splitHash(
2130	this->computeItemHash(const_cast<Item const&>(srcItem)));
2131	FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(srcI), "");
2132
2133	auto dstIter = allocateTag(fullness, hp);
2134	this->moveItemDuringRehash(dstIter.itemAddr(), srcItem);
2135	}
2136	--srcChunk;
2137	}
2138
2139	if (kEnableItemIteration) {
2140	// this code replaces size invocations of adjustSizeAndBeginAfterInsert
2141	std::size_t i = chunkMask_;
2142	while (fullness[i] == `0`) {
2143	--i;
2144	}
2145	sizeAndPackedBegin_.packedBegin() =
2146	ItemIter{chunks_ + i, std::size_t{fullness[i]} - `1`}.pack();
2147	}
2148	}
2149
2150	success = true;
2151	}
2152
2153	// Randomization to help expose bugs when running tests in debug or
2154	// sanitizer builds
2155
2156	FOLLY_ALWAYS_INLINE void debugModeOnReserve(std::size_t capacity) {
2157	if (kIsSanitizeAddress \|\| kIsDebug) {
2158	if (capacity > size()) {
2159	tlsPendingSafeInserts(static_cast<std::ptrdiff_t>(capacity - size()));
2160	}
2161	}
2162	}
2163
2164	void debugModeSpuriousRehash() {
2165	auto cc = chunkMask_ + `1`;
2166	auto ss = chunks_->capacityScale();
2167	rehashImpl(size(), cc, ss, cc, ss);
2168	}
2169
2170	FOLLY_ALWAYS_INLINE void debugModeBeforeInsert() {
2171	// When running under ASAN, we add a spurious rehash with 1/size()
2172	// probability before every insert. This means that finding reference
2173	// stability problems for F14Value and F14Vector is much more likely.
2174	// The most common pattern that causes this is
2175	//
2176	// auto& ref = map[k1]; map[k2] = foo(ref);
2177	//
2178	// One way to fix this is to call map.reserve(N) before such a
2179	// sequence, where N is the number of keys that might be inserted
2180	// within the section that retains references plus the existing size.
2181	if (kIsSanitizeAddress && !tlsPendingSafeInserts() && size() > `0` &&
2182	tlsMinstdRand(size()) == `0`) {
2183	debugModeSpuriousRehash();
2184	}
2185	}
2186
2187	FOLLY_ALWAYS_INLINE void debugModeAfterInsert() {
2188	if (kIsSanitizeAddress \|\| kIsDebug) {
2189	tlsPendingSafeInserts(-`1`);
2190	}
2191	}
2192
2193	FOLLY_ALWAYS_INLINE void debugModePerturbSlotInsertOrder(
2194	ChunkPtr chunk,
2195	std::size_t& itemIndex) {
2196	FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
2197	constexpr bool perturbSlot = FOLLY_F14_PERTURB_INSERTION_ORDER;
2198	if (perturbSlot && !tlsPendingSafeInserts()) {
2199	std::size_t e = chunkMask_ == `0` ? bucket_count() : Chunk::kCapacity;
2200	std::size_t i = itemIndex + tlsMinstdRand(e - itemIndex);
2201	if (!chunk->occupied(i)) {
2202	itemIndex = i;
2203	}
2204	}
2205	}
2206
2207	public:
2208	// user has no control over max_load_factor
2209
2210	void rehash(std::size_t capacity) {
2211	reserve(capacity);
2212	}
2213
2214	void reserve(std::size_t capacity) {
2215	// We want to support the pattern
2216	// map.reserve(map.size() + 2); auto& r1 = map[k1]; auto& r2 = map[k2];
2217	debugModeOnReserve(capacity);
2218	reserveImpl(capacity);
2219	}
2220
2221	// Returns true iff a rehash was performed
2222	void reserveForInsert(size_t incoming = `1`) {
2223	FOLLY_SAFE_DCHECK(incoming > `0`, "");
2224
2225	auto needed = size() + incoming;
2226	auto chunkCount = chunkMask_ + `1`;
2227	auto scale = chunks_->capacityScale();
2228	auto existing = computeCapacity(chunkCount, scale);
2229	if (needed - `1` >= existing) {
2230	reserveForInsertImpl(needed - `1`, chunkCount, scale, existing);
2231	}
2232	}
2233
2234	// Returns pos,true if construct, pos,false if found. key is only used
2235	// during the search; all constructor args for an inserted value come
2236	// from args... key won't be accessed after args are touched.
2237	template <typename K, typename... Args>
2238	std::pair<ItemIter, bool> tryEmplaceValue(K const& key, Args&&... args) {
2239	const auto hp = splitHash(this->computeKeyHash(key));
2240
2241	if (size() > `0`) {
2242	auto existing = findImpl(hp, key);
2243	if (!existing.atEnd()) {
2244	return std::make_pair(existing, false);
2245	}
2246	}
2247
2248	debugModeBeforeInsert();
2249
2250	reserveForInsert();
2251
2252	std::size_t index = hp.first;
2253	ChunkPtr chunk = chunks_ + (index & chunkMask_);
2254	auto firstEmpty = chunk->firstEmpty();
2255
2256	if (!firstEmpty.hasIndex()) {
2257	std::size_t delta = probeDelta(hp);
2258	do {
2259	chunk->incrOutboundOverflowCount();
2260	index += delta;
2261	chunk = chunks_ + (index & chunkMask_);
2262	firstEmpty = chunk->firstEmpty();
2263	} while (!firstEmpty.hasIndex());
2264	chunk->adjustHostedOverflowCount(Chunk::kIncrHostedOverflowCount);
2265	}
2266	std::size_t itemIndex = firstEmpty.index();
2267
2268	debugModePerturbSlotInsertOrder(chunk, itemIndex);
2269
2270	chunk->setTag(itemIndex, hp.second);
2271	ItemIter iter{chunk, itemIndex};
2272
2273	// insertAtBlank will clear the tag if the constructor throws
2274	insertAtBlank(iter, hp, std::forward<Args>(args)...);
2275
2276	debugModeAfterInsert();
2277
2278	return std::make_pair(iter, true);
2279	}
2280
2281	private:
2282	template <bool Reset>
2283	void clearImpl() noexcept {
2284	if (chunks_ == Chunk::emptyInstance()) {
2285	FOLLY_SAFE_DCHECK(empty() && bucket_count() == `0`, "");
2286	return;
2287	}
2288
2289	// turn clear into reset if the table is >= 16 chunks so that
2290	// we don't get too low a load factor
2291	bool willReset = Reset \|\| chunkMask_ + `1` >= `16`;
2292
2293	auto origSize = size();
2294	auto origCapacity = bucket_count();
2295	if (willReset) {
2296	this->beforeReset(origSize, origCapacity);
2297	} else {
2298	this->beforeClear(origSize, origCapacity);
2299	}
2300
2301	if (!empty()) {
2302	if (destroyItemOnClear()) {
2303	for (std::size_t ci = `0`; ci <= chunkMask_; ++ci) {
2304	ChunkPtr chunk = chunks_ + ci;
2305	auto iter = chunk->occupiedIter();
2306	if (prefetchBeforeDestroy()) {
2307	for (auto piter = iter; piter.hasNext();) {
2308	this->prefetchValue(chunk->item(piter.next()));
2309	}
2310	}
2311	while (iter.hasNext()) {
2312	this->destroyItem(chunk->item(iter.next()));
2313	}
2314	}
2315	}
2316	if (!willReset) {
2317	// It's okay to do this in a separate loop because we only do it
2318	// when the chunk count is small. That avoids a branch when we
2319	// are promoting a clear to a reset for a large table.
2320	auto scale = chunks_[`0`].capacityScale();
2321	for (std::size_t ci = `0`; ci <= chunkMask_; ++ci) {
2322	chunks_[ci].clear();
2323	}
2324	chunks_[`0`].markEof(scale);
2325	}
2326	if (kEnableItemIteration) {
2327	sizeAndPackedBegin_.packedBegin() = ItemIter{}.pack();
2328	}
2329	sizeAndPackedBegin_.size_ = `0`;
2330	}
2331
2332	if (willReset) {
2333	BytePtr rawAllocation = std::pointer_traits<BytePtr>::pointer_to(
2334	*static_cast<uint8_t>(static_cast<void*>(&chunks_)));
2335	std::size_t rawSize =
2336	chunkAllocSize(chunkMask_ + `1`, chunks_->capacityScale());
2337
2338	chunks_ = Chunk::emptyInstance();
2339	chunkMask_ = `0`;
2340
2341	this->afterReset(origSize, origCapacity, rawAllocation, rawSize);
2342	} else {
2343	this->afterClear(origSize, origCapacity);
2344	}
2345	}
2346
2347	void eraseImpl(ItemIter pos, HashPair hp) {
2348	this->destroyItem(pos.item());
2349	adjustSizeAndBeginBeforeErase(pos);
2350	eraseBlank(pos, hp);
2351	}
2352
2353	public:
2354	// The item needs to still be hashable during this call. If you want
2355	// to intercept the value before it is destroyed (to extract it, for
2356	// example), use eraseIterInto(pos, beforeDestroy).
2357	void eraseIter(ItemIter pos) {
2358	eraseIterInto(pos, [](value_type&&) {});
2359	}
2360
2361	// The item needs to still be hashable during this call. If you want
2362	// to intercept the value before it is destroyed (to extract it, for
2363	// example), do so in the beforeDestroy callback.
2364	template <typename BeforeDestroy>
2365	void eraseIterInto(ItemIter pos, BeforeDestroy&& beforeDestroy) {
2366	HashPair hp{};
2367	if (pos.chunk()->hostedOverflowCount() != `0`) {
2368	hp = splitHash(this->computeItemHash(pos.citem()));
2369	}
2370	beforeDestroy(this->valueAtItemForExtract(pos.item()));
2371	eraseImpl(pos, hp);
2372	}
2373
2374	template <typename K>
2375	std::size_t eraseKey(K const& key) {
2376	return eraseKeyInto(key, [](value_type&&) {});
2377	}
2378
2379	template <typename K, typename BeforeDestroy>
2380	std::size_t eraseKeyInto(K const& key, BeforeDestroy&& beforeDestroy) {
2381	if (UNLIKELY(size() == `0`)) {
2382	return `0`;
2383	}
2384	auto hp = splitHash(this->computeKeyHash(key));
2385	auto iter = findImpl(hp, key);
2386	if (!iter.atEnd()) {
2387	beforeDestroy(this->valueAtItemForExtract(iter.item()));
2388	eraseImpl(iter, hp);
2389	return `1`;
2390	} else {
2391	return `0`;
2392	}
2393	}
2394
2395	void clear() noexcept {
2396	if (kIsSanitizeAddress) {
2397	// force recycling of heap memory
2398	auto bc = bucket_count();
2399	reset();
2400	try {
2401	reserveImpl(bc);
2402	} catch (std::bad_alloc const&) {
2403	// ASAN mode only, keep going
2404	}
2405	} else {
2406	clearImpl<false>();
2407	}
2408	}
2409
2410	// Like clear(), but always frees all dynamic storage allocated
2411	// by the table.
2412	void reset() noexcept {
2413	clearImpl<true>();
2414	}
2415
2416	// Get memory footprint, not including sizeof(this).*
2417	std::size_t getAllocatedMemorySize() const {
2418	std::size_t sum = `0`;
2419	visitAllocationClasses(
2420	[&sum](std::size_t bytes, std::size_t n) { sum += bytes * n; });
2421	return sum;
2422	}
2423
2424	// Enumerates classes of allocated memory blocks currently owned
2425	// by this table, calling visitor(allocationSize, allocationCount).
2426	// This can be used to get a more accurate indication of memory footprint
2427	// than getAllocatedMemorySize() if you have some way of computing the
2428	// internal fragmentation of the allocator, such as JEMalloc's nallocx.
2429	// The visitor might be called twice with the same allocationSize. The
2430	// visitor's computation should produce the same result for visitor(8,
2431	// 2) as for two calls to visitor(8, 1), for example. The visitor may
2432	// be called with a zero allocationCount.
2433	template <typename V>
2434	void visitAllocationClasses(V&& visitor) const {
2435	auto scale = chunks_->capacityScale();
2436	this->visitPolicyAllocationClasses(
2437	scale == `0` ? `0` : chunkAllocSize(chunkMask_ + `1`, scale),
2438	size(),
2439	bucket_count(),
2440	visitor);
2441	}
2442
2443	// visitor should take an Item const&
2444	template <typename V>
2445	void visitItems(V&& visitor) const {
2446	if (empty()) {
2447	return;
2448	}
2449	std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
2450	auto chunk = &chunks_[`0`];
2451	for (std::size_t i = `0`; i <= maxChunkIndex; ++i, ++chunk) {
2452	auto iter = chunk->occupiedIter();
2453	if (prefetchBeforeCopy()) {
2454	for (auto piter = iter; piter.hasNext();) {
2455	this->prefetchValue(chunk->citem(piter.next()));
2456	}
2457	}
2458	while (iter.hasNext()) {
2459	visitor(chunk->citem(iter.next()));
2460	}
2461	}
2462	}
2463
2464	// visitor should take two Item const*
2465	template <typename V>
2466	void visitContiguousItemRanges(V&& visitor) const {
2467	if (empty()) {
2468	return;
2469	}
2470	std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
2471	auto chunk = &chunks_[`0`];
2472	for (std::size_t i = `0`; i <= maxChunkIndex; ++i, ++chunk) {
2473	for (auto iter = chunk->occupiedRangeIter(); iter.hasNext();) {
2474	auto be = iter.next();
2475	FOLLY_SAFE_DCHECK(
2476	chunk->occupied(be.first) && chunk->occupied(be.second - `1`), "");
2477	Item const* b = chunk->itemAddr(be.first);
2478	visitor(b, b + (be.second - be.first));
2479	}
2480	}
2481	}
2482
2483	private:
2484	static std::size_t& histoAt(
2485	std::vector<std::size_t>& histo,
2486	std::size_t index) {
2487	if (histo.size() <= index) {
2488	histo.resize(index + `1`);
2489	}
2490	return histo.at(index);
2491	}
2492
2493	public:
2494	// Expensive
2495	F14TableStats computeStats() const {
2496	F14TableStats stats;
2497
2498	if (kIsDebug && kEnableItemIteration) {
2499	// validate iteration
2500	std::size_t n = `0`;
2501	ItemIter prev;
2502	for (auto iter = begin(); iter != end(); iter.advance()) {
2503	FOLLY_SAFE_DCHECK(n == `0` \|\| iter.pack() < prev.pack(), "");
2504	++n;
2505	prev = iter;
2506	}
2507	FOLLY_SAFE_DCHECK(n == size(), "");
2508	}
2509
2510	FOLLY_SAFE_DCHECK(
2511	(chunks_ == Chunk::emptyInstance()) == (bucket_count() == `0`), "");
2512
2513	std::size_t n1 = `0`;
2514	std::size_t n2 = `0`;
2515	auto cc = bucket_count() == `0` ? `0` : chunkMask_ + `1`;
2516	for (std::size_t ci = `0`; ci < cc; ++ci) {
2517	ChunkPtr chunk = chunks_ + ci;
2518	FOLLY_SAFE_DCHECK(chunk->eof() == (ci == `0`), "");
2519
2520	auto iter = chunk->occupiedIter();
2521
2522	std::size_t chunkOccupied = `0`;
2523	for (auto piter = iter; piter.hasNext(); piter.next()) {
2524	++chunkOccupied;
2525	}
2526	n1 += chunkOccupied;
2527
2528	histoAt(stats.chunkOccupancyHisto, chunkOccupied)++;
2529	histoAt(
2530	stats.chunkOutboundOverflowHisto, chunk->outboundOverflowCount())++;
2531	histoAt(stats.chunkHostedOverflowHisto, chunk->hostedOverflowCount())++;
2532
2533	while (iter.hasNext()) {
2534	auto ii = iter.next();
2535	++n2;
2536
2537	{
2538	auto& item = chunk->citem(ii);
2539	auto hp = splitHash(this->computeItemHash(item));
2540	FOLLY_SAFE_DCHECK(chunk->tag(ii) == hp.second, "");
2541
2542	std::size_t dist = `1`;
2543	std::size_t index = hp.first;
2544	std::size_t delta = probeDelta(hp);
2545	while ((index & chunkMask_) != ci) {
2546	index += delta;
2547	++dist;
2548	}
2549
2550	histoAt(stats.keyProbeLengthHisto, dist)++;
2551	}
2552
2553	// misses could have any tag, so we do the dumb but accurate
2554	// thing and just try them all
2555	for (std::size_t ti = `0`; ti < `256`; ++ti) {
2556	uint8_t tag = static_cast<uint8_t>(ti == `0` ? `1` : `0`);
2557	HashPair hp{ci, tag};
2558
2559	std::size_t dist = `1`;
2560	std::size_t index = hp.first;
2561	std::size_t delta = probeDelta(hp);
2562	for (std::size_t tries = `0`; tries <= chunkMask_ &&
2563	chunks_[index & chunkMask_].outboundOverflowCount() != `0`;
2564	++tries) {
2565	index += delta;
2566	++dist;
2567	}
2568
2569	histoAt(stats.missProbeLengthHisto, dist)++;
2570	}
2571	}
2572	}
2573
2574	FOLLY_SAFE_DCHECK(n1 == size(), "");
2575	FOLLY_SAFE_DCHECK(n2 == size(), "");
2576
2577	#if FOLLY_HAS_RTTI
2578	stats.policy = typeid(Policy).name();
2579	#endif
2580	stats.size = size();
2581	stats.valueSize = sizeof(value_type);
2582	stats.bucketCount = bucket_count();
2583	stats.chunkCount = cc;
2584
2585	stats.totalBytes = sizeof(*this) + getAllocatedMemorySize();
2586	stats.overheadBytes = stats.totalBytes - size() * sizeof(value_type);
2587
2588	return stats;
2589	}
2590	};
2591	} // namespace detail
2592	} // namespace f14
2593
2594	#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
2595
2596	namespace f14 {
2597	namespace test {
2598	inline void disableInsertOrderRandomization() {
2599	if (kIsSanitizeAddress \|\| kIsDebug) {
2600	detail::tlsPendingSafeInserts(static_cast<std::ptrdiff_t>(
2601	(std::numeric_limits<std::size_t>::max)() / `2`));
2602	}
2603	}
2604	} // namespace test
2605	} // namespace f14
2606	} // namespace folly
2607

Browse the source code of folly/container/detail/F14Table.h