1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/AtomicUnorderedMap.h>
18
19#include <memory>
20#include <thread>
21#include <unordered_map>
22
23#include <folly/Benchmark.h>
24#include <folly/portability/GFlags.h>
25#include <folly/portability/GTest.h>
26#include <folly/portability/Semaphore.h>
27#include <folly/test/DeterministicSchedule.h>
28
29using namespace folly;
30using namespace folly::test;
31
32template <class T>
33struct non_atomic {
34 T value;
35
36 non_atomic() = default;
37 non_atomic(const non_atomic&) = delete;
38 constexpr /* implicit */ non_atomic(T desired) : value(desired) {}
39
40 T operator+=(T arg) {
41 value += arg;
42 return load();
43 }
44
45 T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
46 return value;
47 }
48
49 /* implicit */
50 operator T() const {
51 return load();
52 }
53
54 void store(
55 T desired,
56 std::memory_order /* order */ = std::memory_order_seq_cst) {
57 value = desired;
58 }
59
60 T exchange(
61 T desired,
62 std::memory_order /* order */ = std::memory_order_seq_cst) {
63 T old = load();
64 store(desired);
65 return old;
66 }
67
68 bool compare_exchange_weak(
69 T& expected,
70 T desired,
71 std::memory_order /* success */ = std::memory_order_seq_cst,
72 std::memory_order /* failure */ = std::memory_order_seq_cst) {
73 if (value == expected) {
74 value = desired;
75 return true;
76 }
77
78 expected = value;
79 return false;
80 }
81
82 bool compare_exchange_strong(
83 T& expected,
84 T desired,
85 std::memory_order /* success */ = std::memory_order_seq_cst,
86 std::memory_order /* failure */ = std::memory_order_seq_cst) {
87 if (value == expected) {
88 value = desired;
89 return true;
90 }
91
92 expected = value;
93 return false;
94 }
95
96 bool is_lock_free() const {
97 return true;
98 }
99};
100
101template <
102 typename Key,
103 typename Value,
104 typename IndexType,
105 template <typename> class Atom = std::atomic,
106 typename Allocator = std::allocator<char>>
107using UIM = AtomicUnorderedInsertMap<
108 Key,
109 Value,
110 std::hash<Key>,
111 std::equal_to<Key>,
112 (boost::has_trivial_destructor<Key>::value &&
113 boost::has_trivial_destructor<Value>::value),
114 Atom,
115 IndexType,
116 Allocator>;
117
118namespace {
119template <typename T>
120struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
121} // namespace
122
123// uint16_t doesn't make sense for most platforms, but we might as well
124// test it
125using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
126TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
127
128TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
129 UIM<std::string,
130 std::string,
131 TypeParam,
132 std::atomic,
133 folly::detail::MMapAlloc>
134 m(100);
135
136 m.emplace("abc", "ABC");
137 EXPECT_TRUE(m.find("abc") != m.cend());
138 EXPECT_EQ(m.find("abc")->first, "abc");
139 EXPECT_EQ(m.find("abc")->second, "ABC");
140 EXPECT_TRUE(m.find("def") == m.cend());
141 auto iter = m.cbegin();
142 EXPECT_TRUE(iter != m.cend());
143 EXPECT_TRUE(iter == m.find("abc"));
144 auto a = iter;
145 EXPECT_TRUE(a == iter);
146 auto b = iter;
147 ++iter;
148 EXPECT_TRUE(iter == m.cend());
149 EXPECT_TRUE(a == b);
150 EXPECT_TRUE(a != iter);
151 a++;
152 EXPECT_TRUE(a == iter);
153 EXPECT_TRUE(a != b);
154}
155
156TEST(AtomicUnorderedInsertMap, load_factor) {
157 AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
158
159 // we should be able to put in much more than 5000 things because of
160 // our load factor request
161 for (int i = 0; i < 10000; ++i) {
162 m.emplace(i, true);
163 }
164}
165
166TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
167 AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
168
169 EXPECT_THROW(
170 {
171 for (int i = 0; i < 6000; ++i) {
172 m.emplace(i, false);
173 }
174 },
175 std::bad_alloc);
176}
177
178TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
179 UIM<int, MutableAtom<int>, TypeParam> m(100);
180
181 for (int i = 0; i < 50; ++i) {
182 m.emplace(i, i);
183 }
184
185 m.find(1)->second.data++;
186}
187
188TEST(UnorderedInsertMap, value_mutation) {
189 UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
190
191 for (int i = 0; i < 50; ++i) {
192 m.emplace(i, i);
193 }
194
195 m.find(1)->second.data++;
196 EXPECT_EQ(m.find(1)->second.data, 2);
197}
198
199// This test is too expensive to run automatically. On my dev server it
200// takes about 10 minutes for dbg build, 2 for opt.
201TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
202 size_t capacity = 2000000000;
203 AtomicUnorderedInsertMap64<size_t, size_t> big(capacity);
204 for (size_t i = 0; i < capacity * 2; i += 2) {
205 big.emplace(i, i * 10);
206 }
207 for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
208 auto iter = big.find(i);
209 if ((i & 1) == 0 && i < capacity * 2) {
210 EXPECT_EQ(iter->second, i * 10);
211 } else {
212 EXPECT_TRUE(iter == big.cend());
213 }
214 }
215}
216
217BENCHMARK(lookup_int_int_hit, iters) {
218 std::unique_ptr<AtomicUnorderedInsertMap<int, size_t>> ptr = {};
219
220 size_t capacity = 100000;
221
222 BENCHMARK_SUSPEND {
223 ptr = std::make_unique<AtomicUnorderedInsertMap<int, size_t>>(capacity);
224 for (size_t i = 0; i < capacity; ++i) {
225 auto k = 3 * ((5641 * i) % capacity);
226 ptr->emplace(k, k + 1);
227 EXPECT_EQ(ptr->find(k)->second, k + 1);
228 }
229 }
230
231 for (size_t i = 0; i < iters; ++i) {
232 size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
233 auto iter = ptr->find(k);
234 if (iter == ptr->cend() || iter->second != k + 1) {
235 auto jter = ptr->find(k);
236 EXPECT_TRUE(iter == jter);
237 }
238 EXPECT_EQ(iter->second, k + 1);
239 }
240
241 BENCHMARK_SUSPEND {
242 ptr.reset(nullptr);
243 }
244}
245
246struct PairHash {
247 size_t operator()(const std::pair<uint64_t, uint64_t>& pr) const {
248 return pr.first ^ pr.second;
249 }
250};
251
252void contendedRW(
253 size_t itersPerThread,
254 size_t capacity,
255 size_t numThreads,
256 size_t readsPerWrite) {
257 typedef std::pair<uint64_t, uint64_t> Key;
258 typedef AtomicUnorderedInsertMap<Key, MutableAtom<uint32_t>, PairHash> Map;
259
260 std::unique_ptr<Map> ptr = {};
261 std::atomic<bool> go{false};
262 std::vector<std::thread> threads;
263
264 BENCHMARK_SUSPEND {
265 ptr = std::make_unique<Map>(capacity);
266 while (threads.size() < numThreads) {
267 threads.emplace_back([&]() {
268 while (!go) {
269 std::this_thread::yield();
270 }
271
272 size_t reads = 0;
273 size_t writes = 0;
274 while (reads + writes < itersPerThread) {
275 auto r = Random::rand32();
276 Key key(reads + writes, r);
277 if (reads < writes * readsPerWrite ||
278 writes >= capacity / numThreads) {
279 // read needed
280 ++reads;
281 auto iter = ptr->find(key);
282 EXPECT_TRUE(
283 iter == ptr->cend() ||
284 iter->second.data.load(std::memory_order_acquire) >= key.first);
285 } else {
286 ++writes;
287 try {
288 auto pr = ptr->emplace(key, key.first);
289 if (!pr.second) {
290 pr.first->second.data++;
291 }
292 } catch (std::bad_alloc&) {
293 LOG(INFO) << "bad alloc";
294 }
295 }
296 }
297 });
298 }
299 }
300
301 go = true;
302
303 for (auto& thr : threads) {
304 thr.join();
305 }
306
307 BENCHMARK_SUSPEND {
308 ptr.reset(nullptr);
309 }
310}
311
312// clang-format off
313// sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
314//
315// without MAP_HUGETLB (default)
316//
317// ============================================================================
318// common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter
319// iters/s
320// ============================================================================
321// lookup_int_int_hit 20.05ns 49.89M
322// contendedRW(small_32thr_99pct) 70.36ns 14.21M
323// contendedRW(large_32thr_99pct) 164.23ns 6.09M
324// contendedRW(large_32thr_99_9pct) 158.81ns 6.30M
325// ============================================================================
326//
327// with MAP_HUGETLB hacked in
328// ============================================================================
329// lookup_int_int_hit 19.67ns 50.84M
330// contendedRW(small_32thr_99pct) 62.46ns 16.01M
331// contendedRW(large_32thr_99pct) 119.41ns 8.37M
332// contendedRW(large_32thr_99_9pct) 111.23ns 8.99M
333// ============================================================================
334// clang-format on
335BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
336BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
337BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
338
339BENCHMARK_DRAW_LINE();
340
341// clang-format off
342// sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
343// Single threaded benchmarks to test how much better we are than
344// std::unordered_map and what is the cost of using atomic operations
345// in the uncontended use case
346// ============================================================================
347// std_map 1.20ms 832.58
348// atomic_fast_map 511.35us 1.96K
349// fast_map 196.28us 5.09K
350// ============================================================================
351// clang-format on
352
353BENCHMARK(std_map) {
354 std::unordered_map<long, long> m;
355 m.reserve(10000);
356 for (int i = 0; i < 10000; ++i) {
357 m.emplace(i, i);
358 }
359
360 for (int i = 0; i < 10000; ++i) {
361 auto a = m.find(i);
362 folly::doNotOptimizeAway(&*a);
363 }
364}
365
366BENCHMARK(atomic_fast_map) {
367 UIM<long, long, uint32_t, std::atomic> m(10000);
368 for (int i = 0; i < 10000; ++i) {
369 m.emplace(i, i);
370 }
371
372 for (int i = 0; i < 10000; ++i) {
373 auto a = m.find(i);
374 folly::doNotOptimizeAway(&*a);
375 }
376}
377
378BENCHMARK(fast_map) {
379 UIM<long, long, uint32_t, non_atomic> m(10000);
380 for (int i = 0; i < 10000; ++i) {
381 m.emplace(i, i);
382 }
383
384 for (int i = 0; i < 10000; ++i) {
385 auto a = m.find(i);
386 folly::doNotOptimizeAway(&*a);
387 }
388}
389
390BENCHMARK(atomic_fast_map_64) {
391 UIM<long, long, uint64_t, std::atomic> m(10000);
392 for (int i = 0; i < 10000; ++i) {
393 m.emplace(i, i);
394 }
395
396 for (int i = 0; i < 10000; ++i) {
397 auto a = m.find(i);
398 folly::doNotOptimizeAway(&*a);
399 }
400}
401
402BENCHMARK(fast_map_64) {
403 UIM<long, long, uint64_t, non_atomic> m(10000);
404 for (int i = 0; i < 10000; ++i) {
405 m.emplace(i, i);
406 }
407
408 for (int i = 0; i < 10000; ++i) {
409 auto a = m.find(i);
410 folly::doNotOptimizeAway(&*a);
411 }
412}
413
414int main(int argc, char** argv) {
415 testing::InitGoogleTest(&argc, argv);
416 gflags::ParseCommandLineFlags(&argc, &argv, true);
417 int rv = RUN_ALL_TESTS();
418 folly::runBenchmarksOnFlag();
419 return rv;
420}
421