AtomicHashMap.h source code [folly/AtomicHashMap.h]

1	/*
2	* Copyright 2012-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	/*
17	* AtomicHashMap --
18	*
19	* A high-performance concurrent hash map with int32 or int64 keys. Supports
20	* insert, find(key), findAt(index), erase(key), size, and more. Memory cannot
21	* be freed or reclaimed by erase. Can grow to a maximum of about 18 times the
22	* initial capacity, but performance degrades linearly with growth. Can also be
23	* used as an object store with unique 32-bit references directly into the
24	* internal storage (retrieved with iterator::getIndex()).
25	*
26	* Advantages:
27	* - High-performance (~2-4x tbb::concurrent_hash_map in heavily
28	* multi-threaded environments).
29	* - Efficient memory usage if initial capacity is not over estimated
30	* (especially for small keys and values).
31	* - Good fragmentation properties (only allocates in large slabs which can
32	* be reused with clear() and never move).
33	* - Can generate unique, long-lived 32-bit references for efficient lookup
34	* (see findAt()).
35	*
36	* Disadvantages:
37	* - Keys must be native int32 or int64, or explicitly converted.
38	* - Must be able to specify unique empty, locked, and erased keys
39	* - Performance degrades linearly as size grows beyond initialization
40	* capacity.
41	* - Max size limit of ~18x initial size (dependent on max load factor).
42	* - Memory is not freed or reclaimed by erase.
43	*
44	* Usage and Operation Details:
45	* Simple performance/memory tradeoff with maxLoadFactor. Higher load factors
46	* give better memory utilization but probe lengths increase, reducing
47	* performance.
48	*
49	* Implementation and Performance Details:
50	* AHArray is a fixed size contiguous block of value_type cells. When
51	* writing a cell, the key is locked while the rest of the record is
52	* written. Once done, the cell is unlocked by setting the key. find()
53	* is completely wait-free and doesn't require any non-relaxed atomic
54	* operations. AHA cannot grow beyond initialization capacity, but is
55	* faster because of reduced data indirection.
56	*
57	* AHMap is a wrapper around AHArray sub-maps that allows growth and provides
58	* an interface closer to the STL UnorderedAssociativeContainer concept. These
59	* sub-maps are allocated on the fly and are processed in series, so the more
60	* there are (from growing past initial capacity), the worse the performance.
61	*
62	* Insert returns false if there is a key collision and throws if the max size
63	* of the map is exceeded.
64	*
65	* Benchmark performance with 8 simultaneous threads processing 1 million
66	* unique <int64, int64> entries on a 4-core, 2.5 GHz machine:
67	*
68	* Load Factor Mem Efficiency usec/Insert usec/Find
69	* 50% 50% 0.19 0.05
70	* 85% 85% 0.20 0.06
71	* 90% 90% 0.23 0.08
72	* 95% 95% 0.27 0.10
73	*
74	* See folly/tests/AtomicHashMapTest.cpp for more benchmarks.
75	*
76	* @author Spencer Ahrens <sahrens@fb.com>
77	* @author Jordan DeLong <delong.j@fb.com>
78	*
79	*/
80
81	#pragma once
82	#define FOLLY_ATOMICHASHMAP_H_
83
84	#include <boost/iterator/iterator_facade.hpp>
85	#include <boost/noncopyable.hpp>
86	#include <boost/type_traits/is_convertible.hpp>
87
88	#include <atomic>
89	#include <functional>
90	#include <stdexcept>
91
92	#include <folly/AtomicHashArray.h>
93	#include <folly/CPortability.h>
94	#include <folly/Likely.h>
95	#include <folly/ThreadCachedInt.h>
96	#include <folly/container/Foreach.h>
97	#include <folly/hash/Hash.h>
98
99	namespace folly {
100
101	/*
102	* AtomicHashMap provides an interface somewhat similar to the
103	* UnorderedAssociativeContainer concept in C++. This does not
104	* exactly match this concept (or even the basic Container concept),
105	* because of some restrictions imposed by our datastructure.
106	*
107	* Specific differences (there are quite a few):
108	*
109	* - Efficiently thread safe for inserts (main point of this stuff),
110	* wait-free for lookups.
111	*
112	* - You can erase from this container, but the cell containing the key will
113	* not be free or reclaimed.
114	*
115	* - You can erase everything by calling clear() (and you must guarantee only
116	* one thread can be using the container to do that).
117	*
118	* - We aren't DefaultConstructible, CopyConstructible, Assignable, or
119	* EqualityComparable. (Most of these are probably not something
120	* you actually want to do with this anyway.)
121	*
122	* - We don't support the various bucket functions, rehash(),
123	* reserve(), or equal_range(). Also no constructors taking
124	* iterators, although this could change.
125	*
126	* - Several insertion functions, notably operator[], are not
127	* implemented. It is a little too easy to misuse these functions
128	* with this container, where part of the point is that when an
129	* insertion happens for a new key, it will atomically have the
130	* desired value.
131	*
132	* - The map has no templated insert() taking an iterator range, but
133	* we do provide an insert(key, value). The latter seems more
134	* frequently useful for this container (to avoid sprinkling
135	* make_pair everywhere), and providing both can lead to some gross
136	* template error messages.
137	*
138	* - The Allocator must not be stateful (a new instance will be spun up for
139	* each allocation), and its allocate() method must take a raw number of
140	* bytes.
141	*
142	* - KeyT must be a 32 bit or 64 bit atomic integer type, and you must
143	* define special 'locked' and 'empty' key values in the ctor
144	*
145	* - We don't take the Hash function object as an instance in the
146	* constructor.
147	*
148	*/
149
150	// Thrown when insertion fails due to running out of space for
151	// submaps.
152	struct FOLLY_EXPORT AtomicHashMapFullError : std::runtime_error {
153	explicit AtomicHashMapFullError()
154	: std::runtime_error ("AtomicHashMap is full") {}
155	};
156
157	template <
158	class KeyT,
159	class ValueT,
160	class HashFcn,
161	class EqualFcn,
162	class Allocator,
163	class ProbeFcn,
164	class KeyConvertFcn>
165	class AtomicHashMap : boost::noncopyable {
166	typedef AtomicHashArray<
167	KeyT,
168	ValueT,
169	HashFcn,
170	EqualFcn,
171	Allocator,
172	ProbeFcn,
173	KeyConvertFcn>
174	SubMap;
175
176	public:
177	typedef KeyT key_type;
178	typedef ValueT mapped_type;
179	typedef std::pair<const KeyT, ValueT> value_type;
180	typedef HashFcn hasher;
181	typedef EqualFcn key_equal;
182	typedef KeyConvertFcn key_convert;
183	typedef value_type* pointer;
184	typedef value_type& reference;
185	typedef const value_type& const_reference;
186	typedef std::ptrdiff_t difference_type;
187	typedef std::size_t size_type;
188	typedef typename SubMap::Config Config;
189
190	template <class ContT, class IterVal, class SubIt>
191	struct ahm_iterator;
192
193	typedef ahm_iterator<
194	const AtomicHashMap,
195	const value_type,
196	typename SubMap::const_iterator>
197	const_iterator;
198	typedef ahm_iterator<AtomicHashMap, value_type, typename SubMap::iterator>
199	iterator;
200
201	public:
202	const float kGrowthFrac_; // How much to grow when we run out of capacity.
203
204	// The constructor takes a finalSizeEst which is the optimal
205	// number of elements to maximize space utilization and performance,
206	// and a Config object to specify more advanced options.
207	explicit AtomicHashMap(size_t finalSizeEst, const Config& c = Config());
208
209	~AtomicHashMap() {
210	const unsigned int numMaps =
211	numMapsAllocated_.load(std::memory_order_relaxed);
212	FOR_EACH_RANGE (i, `0`, numMaps) {
213	SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
214	DCHECK(thisMap);
215	SubMap::destroy(thisMap);
216	}
217	}
218
219	key_equal key_eq() const {
220	return key_equal();
221	}
222	hasher hash_function() const {
223	return hasher();
224	}
225
226	/*
227	* insert --
228	*
229	* Returns a pair with iterator to the element at r.first and
230	* success. Retrieve the index with ret.first.getIndex().
231	*
232	* Does not overwrite on key collision, but returns an iterator to
233	* the existing element (since this could due to a race with
234	* another thread, it is often important to check this return
235	* value).
236	*
237	* Allocates new sub maps as the existing ones become full. If
238	* all sub maps are full, no element is inserted, and
239	* AtomicHashMapFullError is thrown.
240	*/
241	std::pair<iterator, bool> insert(const value_type& r) {
242	return emplace(r.first, r.second);
243	}
244	std::pair<iterator, bool> insert(key_type k, const mapped_type& v) {
245	return emplace(k, v);
246	}
247	std::pair<iterator, bool> insert(value_type&& r) {
248	return emplace(r.first, std::move(r.second));
249	}
250	std::pair<iterator, bool> insert(key_type k, mapped_type&& v) {
251	return emplace(k, std::move(v));
252	}
253
254	/*
255	* emplace --
256	*
257	* Same contract as insert(), but performs in-place construction
258	* of the value type using the specified arguments.
259	*
260	* Also, like find(), this method optionally allows 'key_in' to have a type
261	* different from that stored in the table; see find(). If and only if no
262	* equal key is already present, this method converts 'key_in' to a key of
263	* type KeyT using the provided LookupKeyToKeyFcn.
264	*/
265	template <
266	typename LookupKeyT = key_type,
267	typename LookupHashFcn = hasher,
268	typename LookupEqualFcn = key_equal,
269	typename LookupKeyToKeyFcn = key_convert,
270	typename... ArgTs>
271	std::pair<iterator, bool> emplace(LookupKeyT k, ArgTs&&... vCtorArg);
272
273	/*
274	* find --
275	*
276	* Returns the iterator to the element if found, otherwise end().
277	*
278	* As an optional feature, the type of the key to look up (LookupKeyT) is
279	* allowed to be different from the type of keys actually stored (KeyT).
280	*
281	* This enables use cases where materializing the key is costly and usually
282	* redudant, e.g., canonicalizing/interning a set of strings and being able
283	* to look up by StringPiece. To use this feature, LookupHashFcn must take
284	* a LookupKeyT, and LookupEqualFcn must take KeyT and LookupKeyT as first
285	* and second parameter, respectively.
286	*
287	* See folly/test/ArrayHashMapTest.cpp for sample usage.
288	*/
289	template <
290	typename LookupKeyT = key_type,
291	typename LookupHashFcn = hasher,
292	typename LookupEqualFcn = key_equal>
293	iterator find(LookupKeyT k);
294
295	template <
296	typename LookupKeyT = key_type,
297	typename LookupHashFcn = hasher,
298	typename LookupEqualFcn = key_equal>
299	const_iterator find(LookupKeyT k) const;
300
301	/*
302	* erase --
303	*
304	* Erases key k from the map
305	*
306	* Returns 1 iff the key is found and erased, and 0 otherwise.
307	*/
308	size_type erase(key_type k);
309
310	/*
311	* clear --
312	*
313	* Wipes all keys and values from primary map and destroys all secondary
314	* maps. Primary map remains allocated and thus the memory can be reused
315	* in place. Not thread safe.
316	*
317	*/
318	void clear();
319
320	/*
321	* size --
322	*
323	* Returns the exact size of the map. Note this is not as cheap as typical
324	* size() implementations because, for each AtomicHashArray in this AHM, we
325	* need to grab a lock and accumulate the values from all the thread local
326	* counters. See folly/ThreadCachedInt.h for more details.
327	*/
328	size_t size() const;
329
330	bool empty() const {
331	return size() == `0`;
332	}
333
334	size_type count(key_type k) const {
335	return find(k) == end() ? `0` : `1`;
336	}
337
338	/*
339	* findAt --
340	*
341	* Returns an iterator into the map.
342	*
343	* idx should only be an unmodified value returned by calling getIndex() on
344	* a valid iterator returned by find() or insert(). If idx is invalid you
345	* have a bug and the process aborts.
346	*/
347	iterator findAt(uint32_t idx) {
348	SimpleRetT ret = findAtInternal(idx);
349	DCHECK_LT(ret.i, numSubMaps());
350	return iterator(
351	this,
352	ret.i,
353	subMaps_[ret.i].load(std::memory_order_relaxed)->makeIter(ret.j));
354	}
355	const_iterator findAt(uint32_t idx) const {
356	return const_cast<AtomicHashMap>(this*)->findAt(idx);
357	}
358
359	// Total capacity - summation of capacities of all submaps.
360	size_t capacity() const;
361
362	// Number of new insertions until current submaps are all at max load factor.
363	size_t spaceRemaining() const;
364
365	void setEntryCountThreadCacheSize(int32_t newSize) {
366	const int numMaps = numMapsAllocated_.load(std::memory_order_acquire);
367	for (int i = `0`; i < numMaps; ++i) {
368	SubMap* map = subMaps_[i].load(std::memory_order_relaxed);
369	map->setEntryCountThreadCacheSize(newSize);
370	}
371	}
372
373	// Number of sub maps allocated so far to implement this map. The more there
374	// are, the worse the performance.
375	int numSubMaps() const {
376	return numMapsAllocated_.load(std::memory_order_acquire);
377	}
378
379	iterator begin() {
380	iterator it(this, `0`, subMaps_[`0`].load(std::memory_order_relaxed)->begin());
381	it.checkAdvanceToNextSubmap();
382	return it;
383	}
384
385	const_iterator begin() const {
386	const_iterator it(
387	this, `0`, subMaps_[`0`].load(std::memory_order_relaxed)->begin());
388	it.checkAdvanceToNextSubmap();
389	return it;
390	}
391
392	iterator end() {
393	return iterator();
394	}
395
396	const_iterator end() const {
397	return const_iterator();
398	}
399
400	/ Advanced functions for direct access: /
401
402	inline uint32_t recToIdx(const value_type& r, bool mayInsert = true) {
403	SimpleRetT ret =
404	mayInsert ? insertInternal(r.first, r.second) : findInternal(r.first);
405	return encodeIndex(ret.i, ret.j);
406	}
407
408	inline uint32_t recToIdx(value_type&& r, bool mayInsert = true) {
409	SimpleRetT ret = mayInsert ? insertInternal(r.first, std::move(r.second))
410	: findInternal(r.first);
411	return encodeIndex(ret.i, ret.j);
412	}
413
414	inline uint32_t
415	recToIdx(key_type k, const mapped_type& v, bool mayInsert = true) {
416	SimpleRetT ret = mayInsert ? insertInternal(k, v) : findInternal(k);
417	return encodeIndex(ret.i, ret.j);
418	}
419
420	inline uint32_t recToIdx(key_type k, mapped_type&& v, bool mayInsert = true) {
421	SimpleRetT ret =
422	mayInsert ? insertInternal(k, std::move(v)) : findInternal(k);
423	return encodeIndex(ret.i, ret.j);
424	}
425
426	inline uint32_t keyToIdx(const KeyT k, bool mayInsert = false) {
427	return recToIdx(value_type(k), mayInsert);
428	}
429
430	inline const value_type& idxToRec(uint32_t idx) const {
431	SimpleRetT ret = findAtInternal(idx);
432	return subMaps_[ret.i].load(std::memory_order_relaxed)->idxToRec(ret.j);
433	}
434
435	/ Private data and helper functions... /
436
437	private:
438	// This limits primary submap size to 2^31 ~= 2 billion, secondary submap
439	// size to 2^(32 - kNumSubMapBits_ - 1) = 2^27 ~= 130 million, and num subMaps
440	// to 2^kNumSubMapBits_ = 16.
441	static const uint32_t kNumSubMapBits_ = `4`;
442	static const uint32_t kSecondaryMapBit_ = `1u` << `31`; // Highest bit
443	static const uint32_t kSubMapIndexShift_ = `32` - kNumSubMapBits_ - `1`;
444	static const uint32_t kSubMapIndexMask_ = (`1` << kSubMapIndexShift_) - `1`;
445	static const uint32_t kNumSubMaps_ = `1` << kNumSubMapBits_;
446	static const uintptr_t kLockedPtr_ = `0x88ULL` << `48`; // invalid pointer
447
448	struct SimpleRetT {
449	uint32_t i;
450	size_t j;
451	bool success;
452	SimpleRetT(uint32_t ii, size_t jj, bool s) : i(ii), j(jj), success(s) {}
453	SimpleRetT() = default;
454	};
455
456	template <
457	typename LookupKeyT = key_type,
458	typename LookupHashFcn = hasher,
459	typename LookupEqualFcn = key_equal,
460	typename LookupKeyToKeyFcn = key_convert,
461	typename... ArgTs>
462	SimpleRetT insertInternal(LookupKeyT key, ArgTs&&... value);
463
464	template <
465	typename LookupKeyT = key_type,
466	typename LookupHashFcn = hasher,
467	typename LookupEqualFcn = key_equal>
468	SimpleRetT findInternal(const LookupKeyT k) const;
469
470	SimpleRetT findAtInternal(uint32_t idx) const;
471
472	std::atomic<SubMap*> subMaps_[kNumSubMaps_];
473	std::atomic<uint32_t> numMapsAllocated_;
474
475	inline bool tryLockMap(unsigned int idx) {
476	SubMap* val = nullptr;
477	return subMaps_[idx].compare_exchange_strong(
478	val, (SubMap*)kLockedPtr_, std::memory_order_acquire);
479	}
480
481	static inline uint32_t encodeIndex(uint32_t subMap, uint32_t subMapIdx);
482
483	}; // AtomicHashMap
484
485	template <
486	class KeyT,
487	class ValueT,
488	class HashFcn = std::hash<KeyT>,
489	class EqualFcn = std::equal_to<KeyT>,
490	class Allocator = std::allocator<char>>
491	using QuadraticProbingAtomicHashMap = AtomicHashMap<
492	KeyT,
493	ValueT,
494	HashFcn,
495	EqualFcn,
496	Allocator,
497	AtomicHashArrayQuadraticProbeFcn>;
498	} // namespace folly
499
500	#include <folly/AtomicHashMap-inl.h>
501

Browse the source code of folly/AtomicHashMap.h