1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/AtomicUnorderedMap.h> |
18 | |
19 | #include <memory> |
20 | #include <thread> |
21 | #include <unordered_map> |
22 | |
23 | #include <folly/Benchmark.h> |
24 | #include <folly/portability/GFlags.h> |
25 | #include <folly/portability/GTest.h> |
26 | #include <folly/portability/Semaphore.h> |
27 | #include <folly/test/DeterministicSchedule.h> |
28 | |
29 | using namespace folly; |
30 | using namespace folly::test; |
31 | |
32 | template <class T> |
33 | struct non_atomic { |
34 | T value; |
35 | |
36 | non_atomic() = default; |
37 | non_atomic(const non_atomic&) = delete; |
38 | constexpr /* implicit */ non_atomic(T desired) : value(desired) {} |
39 | |
40 | T operator+=(T arg) { |
41 | value += arg; |
42 | return load(); |
43 | } |
44 | |
45 | T load(std::memory_order /* order */ = std::memory_order_seq_cst) const { |
46 | return value; |
47 | } |
48 | |
49 | /* implicit */ |
50 | operator T() const { |
51 | return load(); |
52 | } |
53 | |
54 | void store( |
55 | T desired, |
56 | std::memory_order /* order */ = std::memory_order_seq_cst) { |
57 | value = desired; |
58 | } |
59 | |
60 | T exchange( |
61 | T desired, |
62 | std::memory_order /* order */ = std::memory_order_seq_cst) { |
63 | T old = load(); |
64 | store(desired); |
65 | return old; |
66 | } |
67 | |
68 | bool compare_exchange_weak( |
69 | T& expected, |
70 | T desired, |
71 | std::memory_order /* success */ = std::memory_order_seq_cst, |
72 | std::memory_order /* failure */ = std::memory_order_seq_cst) { |
73 | if (value == expected) { |
74 | value = desired; |
75 | return true; |
76 | } |
77 | |
78 | expected = value; |
79 | return false; |
80 | } |
81 | |
82 | bool compare_exchange_strong( |
83 | T& expected, |
84 | T desired, |
85 | std::memory_order /* success */ = std::memory_order_seq_cst, |
86 | std::memory_order /* failure */ = std::memory_order_seq_cst) { |
87 | if (value == expected) { |
88 | value = desired; |
89 | return true; |
90 | } |
91 | |
92 | expected = value; |
93 | return false; |
94 | } |
95 | |
96 | bool is_lock_free() const { |
97 | return true; |
98 | } |
99 | }; |
100 | |
101 | template < |
102 | typename Key, |
103 | typename Value, |
104 | typename IndexType, |
105 | template <typename> class Atom = std::atomic, |
106 | typename Allocator = std::allocator<char>> |
107 | using UIM = AtomicUnorderedInsertMap< |
108 | Key, |
109 | Value, |
110 | std::hash<Key>, |
111 | std::equal_to<Key>, |
112 | (boost::has_trivial_destructor<Key>::value && |
113 | boost::has_trivial_destructor<Value>::value), |
114 | Atom, |
115 | IndexType, |
116 | Allocator>; |
117 | |
118 | namespace { |
119 | template <typename T> |
120 | struct AtomicUnorderedInsertMapTest : public ::testing::Test {}; |
121 | } // namespace |
122 | |
123 | // uint16_t doesn't make sense for most platforms, but we might as well |
124 | // test it |
125 | using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>; |
126 | TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest); |
127 | |
128 | TYPED_TEST(AtomicUnorderedInsertMapTest, basic) { |
129 | UIM<std::string, |
130 | std::string, |
131 | TypeParam, |
132 | std::atomic, |
133 | folly::detail::MMapAlloc> |
134 | m(100); |
135 | |
136 | m.emplace("abc" , "ABC" ); |
137 | EXPECT_TRUE(m.find("abc" ) != m.cend()); |
138 | EXPECT_EQ(m.find("abc" )->first, "abc" ); |
139 | EXPECT_EQ(m.find("abc" )->second, "ABC" ); |
140 | EXPECT_TRUE(m.find("def" ) == m.cend()); |
141 | auto iter = m.cbegin(); |
142 | EXPECT_TRUE(iter != m.cend()); |
143 | EXPECT_TRUE(iter == m.find("abc" )); |
144 | auto a = iter; |
145 | EXPECT_TRUE(a == iter); |
146 | auto b = iter; |
147 | ++iter; |
148 | EXPECT_TRUE(iter == m.cend()); |
149 | EXPECT_TRUE(a == b); |
150 | EXPECT_TRUE(a != iter); |
151 | a++; |
152 | EXPECT_TRUE(a == iter); |
153 | EXPECT_TRUE(a != b); |
154 | } |
155 | |
156 | TEST(AtomicUnorderedInsertMap, load_factor) { |
157 | AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f); |
158 | |
159 | // we should be able to put in much more than 5000 things because of |
160 | // our load factor request |
161 | for (int i = 0; i < 10000; ++i) { |
162 | m.emplace(i, true); |
163 | } |
164 | } |
165 | |
166 | TEST(AtomicUnorderedInsertMap, capacity_exceeded) { |
167 | AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f); |
168 | |
169 | EXPECT_THROW( |
170 | { |
171 | for (int i = 0; i < 6000; ++i) { |
172 | m.emplace(i, false); |
173 | } |
174 | }, |
175 | std::bad_alloc); |
176 | } |
177 | |
178 | TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) { |
179 | UIM<int, MutableAtom<int>, TypeParam> m(100); |
180 | |
181 | for (int i = 0; i < 50; ++i) { |
182 | m.emplace(i, i); |
183 | } |
184 | |
185 | m.find(1)->second.data++; |
186 | } |
187 | |
188 | TEST(UnorderedInsertMap, value_mutation) { |
189 | UIM<int, MutableData<int>, uint32_t, non_atomic> m(100); |
190 | |
191 | for (int i = 0; i < 50; ++i) { |
192 | m.emplace(i, i); |
193 | } |
194 | |
195 | m.find(1)->second.data++; |
196 | EXPECT_EQ(m.find(1)->second.data, 2); |
197 | } |
198 | |
199 | // This test is too expensive to run automatically. On my dev server it |
200 | // takes about 10 minutes for dbg build, 2 for opt. |
201 | TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) { |
202 | size_t capacity = 2000000000; |
203 | AtomicUnorderedInsertMap64<size_t, size_t> big(capacity); |
204 | for (size_t i = 0; i < capacity * 2; i += 2) { |
205 | big.emplace(i, i * 10); |
206 | } |
207 | for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) { |
208 | auto iter = big.find(i); |
209 | if ((i & 1) == 0 && i < capacity * 2) { |
210 | EXPECT_EQ(iter->second, i * 10); |
211 | } else { |
212 | EXPECT_TRUE(iter == big.cend()); |
213 | } |
214 | } |
215 | } |
216 | |
217 | BENCHMARK(lookup_int_int_hit, iters) { |
218 | std::unique_ptr<AtomicUnorderedInsertMap<int, size_t>> ptr = {}; |
219 | |
220 | size_t capacity = 100000; |
221 | |
222 | BENCHMARK_SUSPEND { |
223 | ptr = std::make_unique<AtomicUnorderedInsertMap<int, size_t>>(capacity); |
224 | for (size_t i = 0; i < capacity; ++i) { |
225 | auto k = 3 * ((5641 * i) % capacity); |
226 | ptr->emplace(k, k + 1); |
227 | EXPECT_EQ(ptr->find(k)->second, k + 1); |
228 | } |
229 | } |
230 | |
231 | for (size_t i = 0; i < iters; ++i) { |
232 | size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity); |
233 | auto iter = ptr->find(k); |
234 | if (iter == ptr->cend() || iter->second != k + 1) { |
235 | auto jter = ptr->find(k); |
236 | EXPECT_TRUE(iter == jter); |
237 | } |
238 | EXPECT_EQ(iter->second, k + 1); |
239 | } |
240 | |
241 | BENCHMARK_SUSPEND { |
242 | ptr.reset(nullptr); |
243 | } |
244 | } |
245 | |
246 | struct PairHash { |
247 | size_t operator()(const std::pair<uint64_t, uint64_t>& pr) const { |
248 | return pr.first ^ pr.second; |
249 | } |
250 | }; |
251 | |
252 | void contendedRW( |
253 | size_t itersPerThread, |
254 | size_t capacity, |
255 | size_t numThreads, |
256 | size_t readsPerWrite) { |
257 | typedef std::pair<uint64_t, uint64_t> Key; |
258 | typedef AtomicUnorderedInsertMap<Key, MutableAtom<uint32_t>, PairHash> Map; |
259 | |
260 | std::unique_ptr<Map> ptr = {}; |
261 | std::atomic<bool> go{false}; |
262 | std::vector<std::thread> threads; |
263 | |
264 | BENCHMARK_SUSPEND { |
265 | ptr = std::make_unique<Map>(capacity); |
266 | while (threads.size() < numThreads) { |
267 | threads.emplace_back([&]() { |
268 | while (!go) { |
269 | std::this_thread::yield(); |
270 | } |
271 | |
272 | size_t reads = 0; |
273 | size_t writes = 0; |
274 | while (reads + writes < itersPerThread) { |
275 | auto r = Random::rand32(); |
276 | Key key(reads + writes, r); |
277 | if (reads < writes * readsPerWrite || |
278 | writes >= capacity / numThreads) { |
279 | // read needed |
280 | ++reads; |
281 | auto iter = ptr->find(key); |
282 | EXPECT_TRUE( |
283 | iter == ptr->cend() || |
284 | iter->second.data.load(std::memory_order_acquire) >= key.first); |
285 | } else { |
286 | ++writes; |
287 | try { |
288 | auto pr = ptr->emplace(key, key.first); |
289 | if (!pr.second) { |
290 | pr.first->second.data++; |
291 | } |
292 | } catch (std::bad_alloc&) { |
293 | LOG(INFO) << "bad alloc" ; |
294 | } |
295 | } |
296 | } |
297 | }); |
298 | } |
299 | } |
300 | |
301 | go = true; |
302 | |
303 | for (auto& thr : threads) { |
304 | thr.join(); |
305 | } |
306 | |
307 | BENCHMARK_SUSPEND { |
308 | ptr.reset(nullptr); |
309 | } |
310 | } |
311 | |
312 | // clang-format off |
313 | // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000 |
314 | // |
315 | // without MAP_HUGETLB (default) |
316 | // |
317 | // ============================================================================ |
318 | // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter |
319 | // iters/s |
320 | // ============================================================================ |
321 | // lookup_int_int_hit 20.05ns 49.89M |
322 | // contendedRW(small_32thr_99pct) 70.36ns 14.21M |
323 | // contendedRW(large_32thr_99pct) 164.23ns 6.09M |
324 | // contendedRW(large_32thr_99_9pct) 158.81ns 6.30M |
325 | // ============================================================================ |
326 | // |
327 | // with MAP_HUGETLB hacked in |
328 | // ============================================================================ |
329 | // lookup_int_int_hit 19.67ns 50.84M |
330 | // contendedRW(small_32thr_99pct) 62.46ns 16.01M |
331 | // contendedRW(large_32thr_99pct) 119.41ns 8.37M |
332 | // contendedRW(large_32thr_99_9pct) 111.23ns 8.99M |
333 | // ============================================================================ |
334 | // clang-format on |
335 | BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99) |
336 | BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99) |
337 | BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999) |
338 | |
339 | BENCHMARK_DRAW_LINE(); |
340 | |
341 | // clang-format off |
342 | // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000 |
343 | // Single threaded benchmarks to test how much better we are than |
344 | // std::unordered_map and what is the cost of using atomic operations |
345 | // in the uncontended use case |
346 | // ============================================================================ |
347 | // std_map 1.20ms 832.58 |
348 | // atomic_fast_map 511.35us 1.96K |
349 | // fast_map 196.28us 5.09K |
350 | // ============================================================================ |
351 | // clang-format on |
352 | |
353 | BENCHMARK(std_map) { |
354 | std::unordered_map<long, long> m; |
355 | m.reserve(10000); |
356 | for (int i = 0; i < 10000; ++i) { |
357 | m.emplace(i, i); |
358 | } |
359 | |
360 | for (int i = 0; i < 10000; ++i) { |
361 | auto a = m.find(i); |
362 | folly::doNotOptimizeAway(&*a); |
363 | } |
364 | } |
365 | |
366 | BENCHMARK(atomic_fast_map) { |
367 | UIM<long, long, uint32_t, std::atomic> m(10000); |
368 | for (int i = 0; i < 10000; ++i) { |
369 | m.emplace(i, i); |
370 | } |
371 | |
372 | for (int i = 0; i < 10000; ++i) { |
373 | auto a = m.find(i); |
374 | folly::doNotOptimizeAway(&*a); |
375 | } |
376 | } |
377 | |
378 | BENCHMARK(fast_map) { |
379 | UIM<long, long, uint32_t, non_atomic> m(10000); |
380 | for (int i = 0; i < 10000; ++i) { |
381 | m.emplace(i, i); |
382 | } |
383 | |
384 | for (int i = 0; i < 10000; ++i) { |
385 | auto a = m.find(i); |
386 | folly::doNotOptimizeAway(&*a); |
387 | } |
388 | } |
389 | |
390 | BENCHMARK(atomic_fast_map_64) { |
391 | UIM<long, long, uint64_t, std::atomic> m(10000); |
392 | for (int i = 0; i < 10000; ++i) { |
393 | m.emplace(i, i); |
394 | } |
395 | |
396 | for (int i = 0; i < 10000; ++i) { |
397 | auto a = m.find(i); |
398 | folly::doNotOptimizeAway(&*a); |
399 | } |
400 | } |
401 | |
402 | BENCHMARK(fast_map_64) { |
403 | UIM<long, long, uint64_t, non_atomic> m(10000); |
404 | for (int i = 0; i < 10000; ++i) { |
405 | m.emplace(i, i); |
406 | } |
407 | |
408 | for (int i = 0; i < 10000; ++i) { |
409 | auto a = m.find(i); |
410 | folly::doNotOptimizeAway(&*a); |
411 | } |
412 | } |
413 | |
414 | int main(int argc, char** argv) { |
415 | testing::InitGoogleTest(&argc, argv); |
416 | gflags::ParseCommandLineFlags(&argc, &argv, true); |
417 | int rv = RUN_ALL_TESTS(); |
418 | folly::runBenchmarksOnFlag(); |
419 | return rv; |
420 | } |
421 | |