1 | /* |
2 | * Copyright 2011-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/synchronization/SmallLocks.h> |
18 | |
19 | #include <cassert> |
20 | #include <condition_variable> |
21 | #include <cstdio> |
22 | #include <mutex> |
23 | #include <string> |
24 | #include <thread> |
25 | #include <vector> |
26 | |
27 | #include <glog/logging.h> |
28 | |
29 | #include <folly/Random.h> |
30 | #include <folly/portability/Asm.h> |
31 | #include <folly/portability/GTest.h> |
32 | #include <folly/portability/PThread.h> |
33 | #include <folly/portability/Unistd.h> |
34 | |
35 | using folly::MicroLock; |
36 | using folly::MicroSpinLock; |
37 | using folly::MSLGuard; |
38 | using std::string; |
39 | |
40 | #ifdef FOLLY_PICO_SPIN_LOCK_H_ |
41 | using folly::PicoSpinLock; |
42 | #endif |
43 | |
44 | DEFINE_int64( |
45 | stress_test_seconds, |
46 | 2, |
47 | "Number of seconds for which to run stress tests" ); |
48 | |
49 | namespace { |
50 | |
51 | struct LockedVal { |
52 | int ar[1024]; |
53 | MicroSpinLock lock; |
54 | |
55 | LockedVal() { |
56 | lock.init(); |
57 | memset(ar, 0, sizeof ar); |
58 | } |
59 | }; |
60 | |
61 | // Compile time test for packed struct support (requires that both of |
62 | // these classes are POD). |
63 | FOLLY_PACK_PUSH |
64 | struct ignore1 { |
65 | MicroSpinLock msl; |
66 | int16_t foo; |
67 | } FOLLY_PACK_ATTR; |
68 | static_assert(sizeof(ignore1) == 3, "Size check failed" ); |
69 | static_assert(sizeof(MicroSpinLock) == 1, "Size check failed" ); |
70 | #ifdef FOLLY_PICO_SPIN_LOCK_H_ |
71 | struct ignore2 { |
72 | PicoSpinLock<uint32_t> psl; |
73 | int16_t foo; |
74 | } FOLLY_PACK_ATTR; |
75 | static_assert(sizeof(ignore2) == 6, "Size check failed" ); |
76 | #endif |
77 | FOLLY_PACK_POP |
78 | |
79 | LockedVal v; |
80 | void splock_test() { |
81 | const int max = 1000; |
82 | auto rng = folly::ThreadLocalPRNG(); |
83 | for (int i = 0; i < max; i++) { |
84 | folly::asm_volatile_pause(); |
85 | MSLGuard g(v.lock); |
86 | |
87 | int first = v.ar[0]; |
88 | for (size_t j = 1; j < sizeof v.ar / sizeof j; ++j) { |
89 | EXPECT_EQ(first, v.ar[j]); |
90 | } |
91 | |
92 | int byte = folly::Random::rand32(rng); |
93 | memset(v.ar, char(byte), sizeof v.ar); |
94 | } |
95 | } |
96 | |
97 | #ifdef FOLLY_PICO_SPIN_LOCK_H_ |
98 | template <class T> |
99 | struct PslTest { |
100 | PicoSpinLock<T> lock; |
101 | |
102 | PslTest() { |
103 | lock.init(); |
104 | } |
105 | |
106 | void doTest() { |
107 | using UT = typename std::make_unsigned<T>::type; |
108 | T ourVal = rand() % T(UT(1) << (sizeof(UT) * 8 - 1)); |
109 | for (int i = 0; i < 100; ++i) { |
110 | std::lock_guard<PicoSpinLock<T>> guard(lock); |
111 | lock.setData(ourVal); |
112 | for (int n = 0; n < 10; ++n) { |
113 | folly::asm_volatile_pause(); |
114 | EXPECT_EQ(lock.getData(), ourVal); |
115 | } |
116 | } |
117 | } |
118 | }; |
119 | |
120 | template <class T> |
121 | void doPslTest() { |
122 | PslTest<T> testObj; |
123 | |
124 | const int nthrs = 17; |
125 | std::vector<std::thread> threads; |
126 | for (int i = 0; i < nthrs; ++i) { |
127 | threads.push_back(std::thread(&PslTest<T>::doTest, &testObj)); |
128 | } |
129 | for (auto& t : threads) { |
130 | t.join(); |
131 | } |
132 | } |
133 | #endif |
134 | |
135 | struct TestClobber { |
136 | TestClobber() { |
137 | lock_.init(); |
138 | } |
139 | |
140 | void go() { |
141 | std::lock_guard<MicroSpinLock> g(lock_); |
142 | // This bug depends on gcc register allocation and is very sensitive. We |
143 | // have to use DCHECK instead of EXPECT_*. |
144 | DCHECK(!lock_.try_lock()); |
145 | } |
146 | |
147 | private: |
148 | MicroSpinLock lock_; |
149 | }; |
150 | |
151 | } // namespace |
152 | |
153 | TEST(SmallLocks, SpinLockCorrectness) { |
154 | EXPECT_EQ(sizeof(MicroSpinLock), 1); |
155 | |
156 | int nthrs = sysconf(_SC_NPROCESSORS_ONLN) * 2; |
157 | std::vector<std::thread> threads; |
158 | for (int i = 0; i < nthrs; ++i) { |
159 | threads.push_back(std::thread(splock_test)); |
160 | } |
161 | for (auto& t : threads) { |
162 | t.join(); |
163 | } |
164 | } |
165 | |
166 | #ifdef FOLLY_PICO_SPIN_LOCK_H_ |
167 | TEST(SmallLocks, PicoSpinCorrectness) { |
168 | doPslTest<int16_t>(); |
169 | doPslTest<uint16_t>(); |
170 | doPslTest<int32_t>(); |
171 | doPslTest<uint32_t>(); |
172 | doPslTest<int64_t>(); |
173 | doPslTest<uint64_t>(); |
174 | } |
175 | |
176 | TEST(SmallLocks, PicoSpinSigned) { |
177 | typedef PicoSpinLock<int16_t, 0> Lock; |
178 | Lock val; |
179 | val.init(-4); |
180 | EXPECT_EQ(val.getData(), -4); |
181 | |
182 | { |
183 | std::lock_guard<Lock> guard(val); |
184 | EXPECT_EQ(val.getData(), -4); |
185 | val.setData(-8); |
186 | EXPECT_EQ(val.getData(), -8); |
187 | } |
188 | EXPECT_EQ(val.getData(), -8); |
189 | } |
190 | #endif |
191 | |
192 | TEST(SmallLocks, RegClobber) { |
193 | TestClobber().go(); |
194 | } |
195 | |
196 | FOLLY_PACK_PUSH |
197 | #if defined(__SANITIZE_ADDRESS__) && !defined(__clang__) && \ |
198 | (defined(__GNUC__) || defined(__GNUG__)) |
199 | static_assert(sizeof(MicroLock) == 4, "Size check failed" ); |
200 | #else |
201 | static_assert(sizeof(MicroLock) == 1, "Size check failed" ); |
202 | #endif |
203 | FOLLY_PACK_POP |
204 | |
205 | namespace { |
206 | |
207 | struct SimpleBarrier { |
208 | SimpleBarrier() : lock_(), cv_(), ready_(false) {} |
209 | |
210 | void wait() { |
211 | std::unique_lock<std::mutex> lockHeld(lock_); |
212 | while (!ready_) { |
213 | cv_.wait(lockHeld); |
214 | } |
215 | } |
216 | |
217 | void run() { |
218 | { |
219 | std::unique_lock<std::mutex> lockHeld(lock_); |
220 | ready_ = true; |
221 | } |
222 | |
223 | cv_.notify_all(); |
224 | } |
225 | |
226 | private: |
227 | std::mutex lock_; |
228 | std::condition_variable cv_; |
229 | bool ready_; |
230 | }; |
231 | } // namespace |
232 | |
233 | TEST(SmallLocks, MicroLock) { |
234 | volatile uint64_t counters[4] = {0, 0, 0, 0}; |
235 | std::vector<std::thread> threads; |
236 | static const unsigned nrThreads = 20; |
237 | static const unsigned iterPerThread = 10000; |
238 | SimpleBarrier startBarrier; |
239 | |
240 | assert(iterPerThread % 4 == 0); |
241 | |
242 | // Embed the lock in a larger structure to ensure that we do not |
243 | // affect bits outside the ones MicroLock is defined to affect. |
244 | struct { |
245 | uint8_t a; |
246 | std::atomic<uint8_t> b; |
247 | MicroLock alock; |
248 | std::atomic<uint8_t> d; |
249 | } x; |
250 | |
251 | uint8_t origB = 'b'; |
252 | uint8_t origD = 'd'; |
253 | |
254 | x.a = 'a'; |
255 | x.b = origB; |
256 | x.alock.init(); |
257 | x.d = origD; |
258 | |
259 | // This thread touches other parts of the host word to show that |
260 | // MicroLock does not interfere with memory outside of the byte |
261 | // it owns. |
262 | std::thread adjacentMemoryToucher = std::thread([&] { |
263 | startBarrier.wait(); |
264 | for (unsigned iter = 0; iter < iterPerThread; ++iter) { |
265 | if (iter % 2) { |
266 | x.b++; |
267 | } else { |
268 | x.d++; |
269 | } |
270 | } |
271 | }); |
272 | |
273 | for (unsigned i = 0; i < nrThreads; ++i) { |
274 | threads.emplace_back([&] { |
275 | startBarrier.wait(); |
276 | for (unsigned iter = 0; iter < iterPerThread; ++iter) { |
277 | unsigned slotNo = iter % 4; |
278 | x.alock.lock(slotNo); |
279 | counters[slotNo] += 1; |
280 | // The occasional sleep makes it more likely that we'll |
281 | // exercise the futex-wait path inside MicroLock. |
282 | if (iter % 1000 == 0) { |
283 | struct timespec ts = {0, 10000}; |
284 | (void)nanosleep(&ts, nullptr); |
285 | } |
286 | x.alock.unlock(slotNo); |
287 | } |
288 | }); |
289 | } |
290 | |
291 | startBarrier.run(); |
292 | |
293 | for (auto it = threads.begin(); it != threads.end(); ++it) { |
294 | it->join(); |
295 | } |
296 | |
297 | adjacentMemoryToucher.join(); |
298 | |
299 | EXPECT_EQ(x.a, 'a'); |
300 | EXPECT_EQ(x.b, (uint8_t)(origB + iterPerThread / 2)); |
301 | EXPECT_EQ(x.d, (uint8_t)(origD + iterPerThread / 2)); |
302 | for (unsigned i = 0; i < 4; ++i) { |
303 | EXPECT_EQ(counters[i], ((uint64_t)nrThreads * iterPerThread) / 4); |
304 | } |
305 | } |
306 | |
307 | TEST(SmallLocks, MicroLockTryLock) { |
308 | MicroLock lock; |
309 | lock.init(); |
310 | EXPECT_TRUE(lock.try_lock()); |
311 | EXPECT_FALSE(lock.try_lock()); |
312 | lock.unlock(); |
313 | } |
314 | |
315 | namespace { |
316 | template <typename Mutex, typename Duration> |
317 | void simpleStressTest(Duration duration, int numThreads) { |
318 | auto&& mutex = Mutex{}; |
319 | auto&& data = std::atomic<std::uint64_t>{0}; |
320 | auto&& threads = std::vector<std::thread>{}; |
321 | auto&& stop = std::atomic<bool>{true}; |
322 | |
323 | for (auto i = 0; i < numThreads; ++i) { |
324 | threads.emplace_back([&] { |
325 | while (!stop.load(std::memory_order_relaxed)) { |
326 | auto lck = std::unique_lock<Mutex>{mutex}; |
327 | EXPECT_EQ(data.fetch_add(1, std::memory_order_relaxed), 0); |
328 | EXPECT_EQ(data.fetch_sub(1, std::memory_order_relaxed), 1); |
329 | } |
330 | }); |
331 | } |
332 | |
333 | std::this_thread::sleep_for(duration); |
334 | stop.store(true); |
335 | for (auto& thread : threads) { |
336 | thread.join(); |
337 | } |
338 | } |
339 | } // namespace |
340 | |
341 | TEST(SmallLocks, MicroSpinLockStressTestLockTwoThreads) { |
342 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
343 | simpleStressTest<MicroSpinLock>(duration, 2); |
344 | } |
345 | |
346 | TEST(SmallLocks, MicroSpinLockStressTestLockHardwareConcurrency) { |
347 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
348 | auto threads = std::thread::hardware_concurrency(); |
349 | simpleStressTest<MicroSpinLock>(duration, threads); |
350 | } |
351 | |
352 | TEST(SmallLocks, PicoSpinLockStressTestLockTwoThreads) { |
353 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
354 | simpleStressTest<PicoSpinLock<std::uint16_t>>(duration, 2); |
355 | } |
356 | |
357 | TEST(SmallLocks, PicoSpinLockStressTestLockHardwareConcurrency) { |
358 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
359 | auto threads = std::thread::hardware_concurrency(); |
360 | simpleStressTest<PicoSpinLock<std::uint16_t>>(duration, threads); |
361 | } |
362 | |
363 | namespace { |
364 | template <typename Mutex> |
365 | class MutexWrapper { |
366 | public: |
367 | void lock() { |
368 | while (!mutex_.try_lock()) { |
369 | } |
370 | } |
371 | void unlock() { |
372 | mutex_.unlock(); |
373 | } |
374 | |
375 | Mutex mutex_; |
376 | }; |
377 | |
378 | template <typename Mutex, typename Duration> |
379 | void simpleStressTestTryLock(Duration duration, int numThreads) { |
380 | simpleStressTest<MutexWrapper<Mutex>>(duration, numThreads); |
381 | } |
382 | } // namespace |
383 | |
384 | TEST(SmallLocks, MicroSpinLockStressTestTryLockTwoThreads) { |
385 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
386 | simpleStressTestTryLock<MicroSpinLock>(duration, 2); |
387 | } |
388 | |
389 | TEST(SmallLocks, MicroSpinLockStressTestTryLockHardwareConcurrency) { |
390 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
391 | auto threads = std::thread::hardware_concurrency(); |
392 | simpleStressTestTryLock<MicroSpinLock>(duration, threads); |
393 | } |
394 | |
395 | TEST(SmallLocks, PicoSpinLockStressTestTryLockTwoThreads) { |
396 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
397 | simpleStressTestTryLock<PicoSpinLock<std::uint16_t>>(duration, 2); |
398 | } |
399 | |
400 | TEST(SmallLocks, PicoSpinLockStressTestTryLockHardwareConcurrency) { |
401 | auto duration = std::chrono::seconds{FLAGS_stress_test_seconds}; |
402 | auto threads = std::thread::hardware_concurrency(); |
403 | simpleStressTestTryLock<PicoSpinLock<std::uint16_t>>(duration, threads); |
404 | } |
405 | |