1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * AtomicHashMap --
18 *
19 * A high-performance concurrent hash map with int32 or int64 keys. Supports
20 * insert, find(key), findAt(index), erase(key), size, and more. Memory cannot
21 * be freed or reclaimed by erase. Can grow to a maximum of about 18 times the
22 * initial capacity, but performance degrades linearly with growth. Can also be
23 * used as an object store with unique 32-bit references directly into the
24 * internal storage (retrieved with iterator::getIndex()).
25 *
26 * Advantages:
27 * - High-performance (~2-4x tbb::concurrent_hash_map in heavily
28 * multi-threaded environments).
29 * - Efficient memory usage if initial capacity is not over estimated
30 * (especially for small keys and values).
31 * - Good fragmentation properties (only allocates in large slabs which can
32 * be reused with clear() and never move).
33 * - Can generate unique, long-lived 32-bit references for efficient lookup
34 * (see findAt()).
35 *
36 * Disadvantages:
37 * - Keys must be native int32 or int64, or explicitly converted.
38 * - Must be able to specify unique empty, locked, and erased keys
39 * - Performance degrades linearly as size grows beyond initialization
40 * capacity.
41 * - Max size limit of ~18x initial size (dependent on max load factor).
42 * - Memory is not freed or reclaimed by erase.
43 *
44 * Usage and Operation Details:
45 * Simple performance/memory tradeoff with maxLoadFactor. Higher load factors
46 * give better memory utilization but probe lengths increase, reducing
47 * performance.
48 *
49 * Implementation and Performance Details:
50 * AHArray is a fixed size contiguous block of value_type cells. When
51 * writing a cell, the key is locked while the rest of the record is
52 * written. Once done, the cell is unlocked by setting the key. find()
53 * is completely wait-free and doesn't require any non-relaxed atomic
54 * operations. AHA cannot grow beyond initialization capacity, but is
55 * faster because of reduced data indirection.
56 *
57 * AHMap is a wrapper around AHArray sub-maps that allows growth and provides
58 * an interface closer to the STL UnorderedAssociativeContainer concept. These
59 * sub-maps are allocated on the fly and are processed in series, so the more
60 * there are (from growing past initial capacity), the worse the performance.
61 *
62 * Insert returns false if there is a key collision and throws if the max size
63 * of the map is exceeded.
64 *
65 * Benchmark performance with 8 simultaneous threads processing 1 million
66 * unique <int64, int64> entries on a 4-core, 2.5 GHz machine:
67 *
68 * Load Factor Mem Efficiency usec/Insert usec/Find
69 * 50% 50% 0.19 0.05
70 * 85% 85% 0.20 0.06
71 * 90% 90% 0.23 0.08
72 * 95% 95% 0.27 0.10
73 *
74 * See folly/tests/AtomicHashMapTest.cpp for more benchmarks.
75 *
76 * @author Spencer Ahrens <sahrens@fb.com>
77 * @author Jordan DeLong <delong.j@fb.com>
78 *
79 */
80
81#pragma once
82#define FOLLY_ATOMICHASHMAP_H_
83
84#include <boost/iterator/iterator_facade.hpp>
85#include <boost/noncopyable.hpp>
86#include <boost/type_traits/is_convertible.hpp>
87
88#include <atomic>
89#include <functional>
90#include <stdexcept>
91
92#include <folly/AtomicHashArray.h>
93#include <folly/CPortability.h>
94#include <folly/Likely.h>
95#include <folly/ThreadCachedInt.h>
96#include <folly/container/Foreach.h>
97#include <folly/hash/Hash.h>
98
99namespace folly {
100
101/*
102 * AtomicHashMap provides an interface somewhat similar to the
103 * UnorderedAssociativeContainer concept in C++. This does not
104 * exactly match this concept (or even the basic Container concept),
105 * because of some restrictions imposed by our datastructure.
106 *
107 * Specific differences (there are quite a few):
108 *
109 * - Efficiently thread safe for inserts (main point of this stuff),
110 * wait-free for lookups.
111 *
112 * - You can erase from this container, but the cell containing the key will
113 * not be free or reclaimed.
114 *
115 * - You can erase everything by calling clear() (and you must guarantee only
116 * one thread can be using the container to do that).
117 *
118 * - We aren't DefaultConstructible, CopyConstructible, Assignable, or
119 * EqualityComparable. (Most of these are probably not something
120 * you actually want to do with this anyway.)
121 *
122 * - We don't support the various bucket functions, rehash(),
123 * reserve(), or equal_range(). Also no constructors taking
124 * iterators, although this could change.
125 *
126 * - Several insertion functions, notably operator[], are not
127 * implemented. It is a little too easy to misuse these functions
128 * with this container, where part of the point is that when an
129 * insertion happens for a new key, it will atomically have the
130 * desired value.
131 *
132 * - The map has no templated insert() taking an iterator range, but
133 * we do provide an insert(key, value). The latter seems more
134 * frequently useful for this container (to avoid sprinkling
135 * make_pair everywhere), and providing both can lead to some gross
136 * template error messages.
137 *
138 * - The Allocator must not be stateful (a new instance will be spun up for
139 * each allocation), and its allocate() method must take a raw number of
140 * bytes.
141 *
142 * - KeyT must be a 32 bit or 64 bit atomic integer type, and you must
143 * define special 'locked' and 'empty' key values in the ctor
144 *
145 * - We don't take the Hash function object as an instance in the
146 * constructor.
147 *
148 */
149
150// Thrown when insertion fails due to running out of space for
151// submaps.
152struct FOLLY_EXPORT AtomicHashMapFullError : std::runtime_error {
153 explicit AtomicHashMapFullError()
154 : std::runtime_error("AtomicHashMap is full") {}
155};
156
157template <
158 class KeyT,
159 class ValueT,
160 class HashFcn,
161 class EqualFcn,
162 class Allocator,
163 class ProbeFcn,
164 class KeyConvertFcn>
165class AtomicHashMap : boost::noncopyable {
166 typedef AtomicHashArray<
167 KeyT,
168 ValueT,
169 HashFcn,
170 EqualFcn,
171 Allocator,
172 ProbeFcn,
173 KeyConvertFcn>
174 SubMap;
175
176 public:
177 typedef KeyT key_type;
178 typedef ValueT mapped_type;
179 typedef std::pair<const KeyT, ValueT> value_type;
180 typedef HashFcn hasher;
181 typedef EqualFcn key_equal;
182 typedef KeyConvertFcn key_convert;
183 typedef value_type* pointer;
184 typedef value_type& reference;
185 typedef const value_type& const_reference;
186 typedef std::ptrdiff_t difference_type;
187 typedef std::size_t size_type;
188 typedef typename SubMap::Config Config;
189
190 template <class ContT, class IterVal, class SubIt>
191 struct ahm_iterator;
192
193 typedef ahm_iterator<
194 const AtomicHashMap,
195 const value_type,
196 typename SubMap::const_iterator>
197 const_iterator;
198 typedef ahm_iterator<AtomicHashMap, value_type, typename SubMap::iterator>
199 iterator;
200
201 public:
202 const float kGrowthFrac_; // How much to grow when we run out of capacity.
203
204 // The constructor takes a finalSizeEst which is the optimal
205 // number of elements to maximize space utilization and performance,
206 // and a Config object to specify more advanced options.
207 explicit AtomicHashMap(size_t finalSizeEst, const Config& c = Config());
208
209 ~AtomicHashMap() {
210 const unsigned int numMaps =
211 numMapsAllocated_.load(std::memory_order_relaxed);
212 FOR_EACH_RANGE (i, 0, numMaps) {
213 SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
214 DCHECK(thisMap);
215 SubMap::destroy(thisMap);
216 }
217 }
218
219 key_equal key_eq() const {
220 return key_equal();
221 }
222 hasher hash_function() const {
223 return hasher();
224 }
225
226 /*
227 * insert --
228 *
229 * Returns a pair with iterator to the element at r.first and
230 * success. Retrieve the index with ret.first.getIndex().
231 *
232 * Does not overwrite on key collision, but returns an iterator to
233 * the existing element (since this could due to a race with
234 * another thread, it is often important to check this return
235 * value).
236 *
237 * Allocates new sub maps as the existing ones become full. If
238 * all sub maps are full, no element is inserted, and
239 * AtomicHashMapFullError is thrown.
240 */
241 std::pair<iterator, bool> insert(const value_type& r) {
242 return emplace(r.first, r.second);
243 }
244 std::pair<iterator, bool> insert(key_type k, const mapped_type& v) {
245 return emplace(k, v);
246 }
247 std::pair<iterator, bool> insert(value_type&& r) {
248 return emplace(r.first, std::move(r.second));
249 }
250 std::pair<iterator, bool> insert(key_type k, mapped_type&& v) {
251 return emplace(k, std::move(v));
252 }
253
254 /*
255 * emplace --
256 *
257 * Same contract as insert(), but performs in-place construction
258 * of the value type using the specified arguments.
259 *
260 * Also, like find(), this method optionally allows 'key_in' to have a type
261 * different from that stored in the table; see find(). If and only if no
262 * equal key is already present, this method converts 'key_in' to a key of
263 * type KeyT using the provided LookupKeyToKeyFcn.
264 */
265 template <
266 typename LookupKeyT = key_type,
267 typename LookupHashFcn = hasher,
268 typename LookupEqualFcn = key_equal,
269 typename LookupKeyToKeyFcn = key_convert,
270 typename... ArgTs>
271 std::pair<iterator, bool> emplace(LookupKeyT k, ArgTs&&... vCtorArg);
272
273 /*
274 * find --
275 *
276 * Returns the iterator to the element if found, otherwise end().
277 *
278 * As an optional feature, the type of the key to look up (LookupKeyT) is
279 * allowed to be different from the type of keys actually stored (KeyT).
280 *
281 * This enables use cases where materializing the key is costly and usually
282 * redudant, e.g., canonicalizing/interning a set of strings and being able
283 * to look up by StringPiece. To use this feature, LookupHashFcn must take
284 * a LookupKeyT, and LookupEqualFcn must take KeyT and LookupKeyT as first
285 * and second parameter, respectively.
286 *
287 * See folly/test/ArrayHashMapTest.cpp for sample usage.
288 */
289 template <
290 typename LookupKeyT = key_type,
291 typename LookupHashFcn = hasher,
292 typename LookupEqualFcn = key_equal>
293 iterator find(LookupKeyT k);
294
295 template <
296 typename LookupKeyT = key_type,
297 typename LookupHashFcn = hasher,
298 typename LookupEqualFcn = key_equal>
299 const_iterator find(LookupKeyT k) const;
300
301 /*
302 * erase --
303 *
304 * Erases key k from the map
305 *
306 * Returns 1 iff the key is found and erased, and 0 otherwise.
307 */
308 size_type erase(key_type k);
309
310 /*
311 * clear --
312 *
313 * Wipes all keys and values from primary map and destroys all secondary
314 * maps. Primary map remains allocated and thus the memory can be reused
315 * in place. Not thread safe.
316 *
317 */
318 void clear();
319
320 /*
321 * size --
322 *
323 * Returns the exact size of the map. Note this is not as cheap as typical
324 * size() implementations because, for each AtomicHashArray in this AHM, we
325 * need to grab a lock and accumulate the values from all the thread local
326 * counters. See folly/ThreadCachedInt.h for more details.
327 */
328 size_t size() const;
329
330 bool empty() const {
331 return size() == 0;
332 }
333
334 size_type count(key_type k) const {
335 return find(k) == end() ? 0 : 1;
336 }
337
338 /*
339 * findAt --
340 *
341 * Returns an iterator into the map.
342 *
343 * idx should only be an unmodified value returned by calling getIndex() on
344 * a valid iterator returned by find() or insert(). If idx is invalid you
345 * have a bug and the process aborts.
346 */
347 iterator findAt(uint32_t idx) {
348 SimpleRetT ret = findAtInternal(idx);
349 DCHECK_LT(ret.i, numSubMaps());
350 return iterator(
351 this,
352 ret.i,
353 subMaps_[ret.i].load(std::memory_order_relaxed)->makeIter(ret.j));
354 }
355 const_iterator findAt(uint32_t idx) const {
356 return const_cast<AtomicHashMap*>(this)->findAt(idx);
357 }
358
359 // Total capacity - summation of capacities of all submaps.
360 size_t capacity() const;
361
362 // Number of new insertions until current submaps are all at max load factor.
363 size_t spaceRemaining() const;
364
365 void setEntryCountThreadCacheSize(int32_t newSize) {
366 const int numMaps = numMapsAllocated_.load(std::memory_order_acquire);
367 for (int i = 0; i < numMaps; ++i) {
368 SubMap* map = subMaps_[i].load(std::memory_order_relaxed);
369 map->setEntryCountThreadCacheSize(newSize);
370 }
371 }
372
373 // Number of sub maps allocated so far to implement this map. The more there
374 // are, the worse the performance.
375 int numSubMaps() const {
376 return numMapsAllocated_.load(std::memory_order_acquire);
377 }
378
379 iterator begin() {
380 iterator it(this, 0, subMaps_[0].load(std::memory_order_relaxed)->begin());
381 it.checkAdvanceToNextSubmap();
382 return it;
383 }
384
385 const_iterator begin() const {
386 const_iterator it(
387 this, 0, subMaps_[0].load(std::memory_order_relaxed)->begin());
388 it.checkAdvanceToNextSubmap();
389 return it;
390 }
391
392 iterator end() {
393 return iterator();
394 }
395
396 const_iterator end() const {
397 return const_iterator();
398 }
399
400 /* Advanced functions for direct access: */
401
402 inline uint32_t recToIdx(const value_type& r, bool mayInsert = true) {
403 SimpleRetT ret =
404 mayInsert ? insertInternal(r.first, r.second) : findInternal(r.first);
405 return encodeIndex(ret.i, ret.j);
406 }
407
408 inline uint32_t recToIdx(value_type&& r, bool mayInsert = true) {
409 SimpleRetT ret = mayInsert ? insertInternal(r.first, std::move(r.second))
410 : findInternal(r.first);
411 return encodeIndex(ret.i, ret.j);
412 }
413
414 inline uint32_t
415 recToIdx(key_type k, const mapped_type& v, bool mayInsert = true) {
416 SimpleRetT ret = mayInsert ? insertInternal(k, v) : findInternal(k);
417 return encodeIndex(ret.i, ret.j);
418 }
419
420 inline uint32_t recToIdx(key_type k, mapped_type&& v, bool mayInsert = true) {
421 SimpleRetT ret =
422 mayInsert ? insertInternal(k, std::move(v)) : findInternal(k);
423 return encodeIndex(ret.i, ret.j);
424 }
425
426 inline uint32_t keyToIdx(const KeyT k, bool mayInsert = false) {
427 return recToIdx(value_type(k), mayInsert);
428 }
429
430 inline const value_type& idxToRec(uint32_t idx) const {
431 SimpleRetT ret = findAtInternal(idx);
432 return subMaps_[ret.i].load(std::memory_order_relaxed)->idxToRec(ret.j);
433 }
434
435 /* Private data and helper functions... */
436
437 private:
438 // This limits primary submap size to 2^31 ~= 2 billion, secondary submap
439 // size to 2^(32 - kNumSubMapBits_ - 1) = 2^27 ~= 130 million, and num subMaps
440 // to 2^kNumSubMapBits_ = 16.
441 static const uint32_t kNumSubMapBits_ = 4;
442 static const uint32_t kSecondaryMapBit_ = 1u << 31; // Highest bit
443 static const uint32_t kSubMapIndexShift_ = 32 - kNumSubMapBits_ - 1;
444 static const uint32_t kSubMapIndexMask_ = (1 << kSubMapIndexShift_) - 1;
445 static const uint32_t kNumSubMaps_ = 1 << kNumSubMapBits_;
446 static const uintptr_t kLockedPtr_ = 0x88ULL << 48; // invalid pointer
447
448 struct SimpleRetT {
449 uint32_t i;
450 size_t j;
451 bool success;
452 SimpleRetT(uint32_t ii, size_t jj, bool s) : i(ii), j(jj), success(s) {}
453 SimpleRetT() = default;
454 };
455
456 template <
457 typename LookupKeyT = key_type,
458 typename LookupHashFcn = hasher,
459 typename LookupEqualFcn = key_equal,
460 typename LookupKeyToKeyFcn = key_convert,
461 typename... ArgTs>
462 SimpleRetT insertInternal(LookupKeyT key, ArgTs&&... value);
463
464 template <
465 typename LookupKeyT = key_type,
466 typename LookupHashFcn = hasher,
467 typename LookupEqualFcn = key_equal>
468 SimpleRetT findInternal(const LookupKeyT k) const;
469
470 SimpleRetT findAtInternal(uint32_t idx) const;
471
472 std::atomic<SubMap*> subMaps_[kNumSubMaps_];
473 std::atomic<uint32_t> numMapsAllocated_;
474
475 inline bool tryLockMap(unsigned int idx) {
476 SubMap* val = nullptr;
477 return subMaps_[idx].compare_exchange_strong(
478 val, (SubMap*)kLockedPtr_, std::memory_order_acquire);
479 }
480
481 static inline uint32_t encodeIndex(uint32_t subMap, uint32_t subMapIdx);
482
483}; // AtomicHashMap
484
485template <
486 class KeyT,
487 class ValueT,
488 class HashFcn = std::hash<KeyT>,
489 class EqualFcn = std::equal_to<KeyT>,
490 class Allocator = std::allocator<char>>
491using QuadraticProbingAtomicHashMap = AtomicHashMap<
492 KeyT,
493 ValueT,
494 HashFcn,
495 EqualFcn,
496 Allocator,
497 AtomicHashArrayQuadraticProbeFcn>;
498} // namespace folly
499
500#include <folly/AtomicHashMap-inl.h>
501