1/*
2 * Copyright 2011-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * N.B. You most likely do _not_ want to use RWSpinLock or any other
18 * kind of spinlock. Use SharedMutex instead.
19 *
20 * In short, spinlocks in preemptive multi-tasking operating systems
21 * have serious problems and fast mutexes like SharedMutex are almost
22 * certainly the better choice, because letting the OS scheduler put a
23 * thread to sleep is better for system responsiveness and throughput
24 * than wasting a timeslice repeatedly querying a lock held by a
25 * thread that's blocked, and you can't prevent userspace
26 * programs blocking.
27 *
28 * Spinlocks in an operating system kernel make much more sense than
29 * they do in userspace.
30 *
31 * -------------------------------------------------------------------
32 *
33 * Two Read-Write spin lock implementations.
34 *
35 * Ref: http://locklessinc.com/articles/locks
36 *
37 * Both locks here are faster than pthread_rwlock and have very low
38 * overhead (usually 20-30ns). They don't use any system mutexes and
39 * are very compact (4/8 bytes), so are suitable for per-instance
40 * based locking, particularly when contention is not expected.
41 *
42 * For a spinlock, RWSpinLock is a reasonable choice. (See the note
43 * about for why a spin lock is frequently a bad idea generally.)
44 * RWSpinLock has minimal overhead, and comparable contention
45 * performance when the number of competing threads is less than or
46 * equal to the number of logical CPUs. Even as the number of
47 * threads gets larger, RWSpinLock can still be very competitive in
48 * READ, although it is slower on WRITE, and also inherently unfair
49 * to writers.
50 *
51 * RWTicketSpinLock shows more balanced READ/WRITE performance. If
52 * your application really needs a lot more threads, and a
53 * higher-priority writer, prefer one of the RWTicketSpinLock locks.
54 *
55 * Caveats:
56 *
57 * RWTicketSpinLock locks can only be used with GCC on x86/x86-64
58 * based systems.
59 *
60 * RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent
61 * readers and writers.
62 *
63 * RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
64 * readers and writers.
65 *
66 * RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
67 * writer priority) is NOT reentrant, even for lock_shared().
68 *
69 * The lock will not grant any new shared (read) accesses while a thread
70 * attempting to acquire the lock in write mode is blocked. (That is,
71 * if the lock is held in shared mode by N threads, and a thread attempts
72 * to acquire it in write mode, no one else can acquire it in shared mode
73 * until these N threads release the lock and then the blocked thread
74 * acquires and releases the exclusive lock.) This also applies for
75 * attempts to reacquire the lock in shared mode by threads that already
76 * hold it in shared mode, making the lock non-reentrant.
77 *
78 * RWSpinLock handles 2^30 - 1 concurrent readers.
79 *
80 * @author Xin Liu <xliux@fb.com>
81 */
82
83#pragma once
84
85/*
86========================================================================
87Benchmark on (Intel(R) Xeon(R) CPU L5630 @ 2.13GHz) 8 cores(16 HTs)
88========================================================================
89
90------------------------------------------------------------------------------
911. Single thread benchmark (read/write lock + unlock overhead)
92Benchmark Iters Total t t/iter iter/sec
93-------------------------------------------------------------------------------
94* BM_RWSpinLockRead 100000 1.786 ms 17.86 ns 53.4M
95+30.5% BM_RWSpinLockWrite 100000 2.331 ms 23.31 ns 40.91M
96+85.7% BM_RWTicketSpinLock32Read 100000 3.317 ms 33.17 ns 28.75M
97+96.0% BM_RWTicketSpinLock32Write 100000 3.5 ms 35 ns 27.25M
98+85.6% BM_RWTicketSpinLock64Read 100000 3.315 ms 33.15 ns 28.77M
99+96.0% BM_RWTicketSpinLock64Write 100000 3.5 ms 35 ns 27.25M
100+85.7% BM_RWTicketSpinLock32FavorWriterRead 100000 3.317 ms 33.17 ns 28.75M
101+29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000 2.316 ms 23.16 ns 41.18M
102+85.3% BM_RWTicketSpinLock64FavorWriterRead 100000 3.309 ms 33.09 ns 28.82M
103+30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000 2.325 ms 23.25 ns 41.02M
104+ 175% BM_PThreadRWMutexRead 100000 4.917 ms 49.17 ns 19.4M
105+ 166% BM_PThreadRWMutexWrite 100000 4.757 ms 47.57 ns 20.05M
106
107------------------------------------------------------------------------------
1082. Contention Benchmark 90% read 10% write
109Benchmark hits average min max sigma
110------------------------------------------------------------------------------
111---------- 8 threads ------------
112RWSpinLock Write 142666 220ns 78ns 40.8us 269ns
113RWSpinLock Read 1282297 222ns 80ns 37.7us 248ns
114RWTicketSpinLock Write 85692 209ns 71ns 17.9us 252ns
115RWTicketSpinLock Read 769571 215ns 78ns 33.4us 251ns
116pthread_rwlock_t Write 84248 2.48us 99ns 269us 8.19us
117pthread_rwlock_t Read 761646 933ns 101ns 374us 3.25us
118
119---------- 16 threads ------------
120RWSpinLock Write 124236 237ns 78ns 261us 801ns
121RWSpinLock Read 1115807 236ns 78ns 2.27ms 2.17us
122RWTicketSpinLock Write 81781 231ns 71ns 31.4us 351ns
123RWTicketSpinLock Read 734518 238ns 78ns 73.6us 379ns
124pthread_rwlock_t Write 83363 7.12us 99ns 785us 28.1us
125pthread_rwlock_t Read 754978 2.18us 101ns 1.02ms 14.3us
126
127---------- 50 threads ------------
128RWSpinLock Write 131142 1.37us 82ns 7.53ms 68.2us
129RWSpinLock Read 1181240 262ns 78ns 6.62ms 12.7us
130RWTicketSpinLock Write 83045 397ns 73ns 7.01ms 31.5us
131RWTicketSpinLock Read 744133 386ns 78ns 11ms 31.4us
132pthread_rwlock_t Write 80849 112us 103ns 4.52ms 263us
133pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us
134
135*/
136
137#include <folly/Portability.h>
138#include <folly/portability/Asm.h>
139
140#if defined(__GNUC__) && (defined(__i386) || FOLLY_X64 || defined(ARCH_K8))
141#define RW_SPINLOCK_USE_X86_INTRINSIC_
142#include <x86intrin.h>
143#elif defined(_MSC_VER) && defined(FOLLY_X64)
144#define RW_SPINLOCK_USE_X86_INTRINSIC_
145#elif FOLLY_AARCH64
146#define RW_SPINLOCK_USE_X86_INTRINSIC_
147#else
148#undef RW_SPINLOCK_USE_X86_INTRINSIC_
149#endif
150
151// iOS doesn't define _mm_cvtsi64_si128 and friends
152#if (FOLLY_SSE >= 2) && !FOLLY_MOBILE && FOLLY_X64
153#define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
154#else
155#undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
156#endif
157
158#include <algorithm>
159#include <atomic>
160#include <thread>
161
162#include <folly/Likely.h>
163
164namespace folly {
165
166/*
167 * A simple, small (4-bytes), but unfair rwlock. Use it when you want
168 * a nice writer and don't expect a lot of write/read contention, or
169 * when you need small rwlocks since you are creating a large number
170 * of them.
171 *
172 * Note that the unfairness here is extreme: if the lock is
173 * continually accessed for read, writers will never get a chance. If
174 * the lock can be that highly contended this class is probably not an
175 * ideal choice anyway.
176 *
177 * It currently implements most of the Lockable, SharedLockable and
178 * UpgradeLockable concepts except the TimedLockable related locking/unlocking
179 * interfaces.
180 */
181class RWSpinLock {
182 enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 };
183
184 public:
185 constexpr RWSpinLock() : bits_(0) {}
186
187 RWSpinLock(RWSpinLock const&) = delete;
188 RWSpinLock& operator=(RWSpinLock const&) = delete;
189
190 // Lockable Concept
191 void lock() {
192 uint_fast32_t count = 0;
193 while (!LIKELY(try_lock())) {
194 if (++count > 1000) {
195 std::this_thread::yield();
196 }
197 }
198 }
199
200 // Writer is responsible for clearing up both the UPGRADED and WRITER bits.
201 void unlock() {
202 static_assert(READER > WRITER + UPGRADED, "wrong bits!");
203 bits_.fetch_and(~(WRITER | UPGRADED), std::memory_order_release);
204 }
205
206 // SharedLockable Concept
207 void lock_shared() {
208 uint_fast32_t count = 0;
209 while (!LIKELY(try_lock_shared())) {
210 if (++count > 1000) {
211 std::this_thread::yield();
212 }
213 }
214 }
215
216 void unlock_shared() {
217 bits_.fetch_add(-READER, std::memory_order_release);
218 }
219
220 // Downgrade the lock from writer status to reader status.
221 void unlock_and_lock_shared() {
222 bits_.fetch_add(READER, std::memory_order_acquire);
223 unlock();
224 }
225
226 // UpgradeLockable Concept
227 void lock_upgrade() {
228 uint_fast32_t count = 0;
229 while (!try_lock_upgrade()) {
230 if (++count > 1000) {
231 std::this_thread::yield();
232 }
233 }
234 }
235
236 void unlock_upgrade() {
237 bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel);
238 }
239
240 // unlock upgrade and try to acquire write lock
241 void unlock_upgrade_and_lock() {
242 int64_t count = 0;
243 while (!try_unlock_upgrade_and_lock()) {
244 if (++count > 1000) {
245 std::this_thread::yield();
246 }
247 }
248 }
249
250 // unlock upgrade and read lock atomically
251 void unlock_upgrade_and_lock_shared() {
252 bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel);
253 }
254
255 // write unlock and upgrade lock atomically
256 void unlock_and_lock_upgrade() {
257 // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
258 // the same time when other threads are trying do try_lock_upgrade().
259 bits_.fetch_or(UPGRADED, std::memory_order_acquire);
260 bits_.fetch_add(-WRITER, std::memory_order_release);
261 }
262
263 // Attempt to acquire writer permission. Return false if we didn't get it.
264 bool try_lock() {
265 int32_t expect = 0;
266 return bits_.compare_exchange_strong(
267 expect, WRITER, std::memory_order_acq_rel);
268 }
269
270 // Try to get reader permission on the lock. This can fail if we
271 // find out someone is a writer or upgrader.
272 // Setting the UPGRADED bit would allow a writer-to-be to indicate
273 // its intention to write and block any new readers while waiting
274 // for existing readers to finish and release their read locks. This
275 // helps avoid starving writers (promoted from upgraders).
276 bool try_lock_shared() {
277 // fetch_add is considerably (100%) faster than compare_exchange,
278 // so here we are optimizing for the common (lock success) case.
279 int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
280 if (UNLIKELY(value & (WRITER | UPGRADED))) {
281 bits_.fetch_add(-READER, std::memory_order_release);
282 return false;
283 }
284 return true;
285 }
286
287 // try to unlock upgrade and write lock atomically
288 bool try_unlock_upgrade_and_lock() {
289 int32_t expect = UPGRADED;
290 return bits_.compare_exchange_strong(
291 expect, WRITER, std::memory_order_acq_rel);
292 }
293
294 // try to acquire an upgradable lock.
295 bool try_lock_upgrade() {
296 int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire);
297
298 // Note: when failed, we cannot flip the UPGRADED bit back,
299 // as in this case there is either another upgrade lock or a write lock.
300 // If it's a write lock, the bit will get cleared up when that lock's done
301 // with unlock().
302 return ((value & (UPGRADED | WRITER)) == 0);
303 }
304
305 // mainly for debugging purposes.
306 int32_t bits() const {
307 return bits_.load(std::memory_order_acquire);
308 }
309
310 class ReadHolder;
311 class UpgradedHolder;
312 class WriteHolder;
313
314 class ReadHolder {
315 public:
316 explicit ReadHolder(RWSpinLock* lock) : lock_(lock) {
317 if (lock_) {
318 lock_->lock_shared();
319 }
320 }
321
322 explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
323 lock_->lock_shared();
324 }
325
326 ReadHolder(ReadHolder&& other) noexcept : lock_(other.lock_) {
327 other.lock_ = nullptr;
328 }
329
330 // down-grade
331 explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) {
332 upgraded.lock_ = nullptr;
333 if (lock_) {
334 lock_->unlock_upgrade_and_lock_shared();
335 }
336 }
337
338 explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) {
339 writer.lock_ = nullptr;
340 if (lock_) {
341 lock_->unlock_and_lock_shared();
342 }
343 }
344
345 ReadHolder& operator=(ReadHolder&& other) {
346 using std::swap;
347 swap(lock_, other.lock_);
348 return *this;
349 }
350
351 ReadHolder(const ReadHolder& other) = delete;
352 ReadHolder& operator=(const ReadHolder& other) = delete;
353
354 ~ReadHolder() {
355 if (lock_) {
356 lock_->unlock_shared();
357 }
358 }
359
360 void reset(RWSpinLock* lock = nullptr) {
361 if (lock == lock_) {
362 return;
363 }
364 if (lock_) {
365 lock_->unlock_shared();
366 }
367 lock_ = lock;
368 if (lock_) {
369 lock_->lock_shared();
370 }
371 }
372
373 void swap(ReadHolder* other) {
374 std::swap(lock_, other->lock_);
375 }
376
377 private:
378 friend class UpgradedHolder;
379 friend class WriteHolder;
380 RWSpinLock* lock_;
381 };
382
383 class UpgradedHolder {
384 public:
385 explicit UpgradedHolder(RWSpinLock* lock) : lock_(lock) {
386 if (lock_) {
387 lock_->lock_upgrade();
388 }
389 }
390
391 explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) {
392 lock_->lock_upgrade();
393 }
394
395 explicit UpgradedHolder(WriteHolder&& writer) {
396 lock_ = writer.lock_;
397 writer.lock_ = nullptr;
398 if (lock_) {
399 lock_->unlock_and_lock_upgrade();
400 }
401 }
402
403 UpgradedHolder(UpgradedHolder&& other) noexcept : lock_(other.lock_) {
404 other.lock_ = nullptr;
405 }
406
407 UpgradedHolder& operator=(UpgradedHolder&& other) {
408 using std::swap;
409 swap(lock_, other.lock_);
410 return *this;
411 }
412
413 UpgradedHolder(const UpgradedHolder& other) = delete;
414 UpgradedHolder& operator=(const UpgradedHolder& other) = delete;
415
416 ~UpgradedHolder() {
417 if (lock_) {
418 lock_->unlock_upgrade();
419 }
420 }
421
422 void reset(RWSpinLock* lock = nullptr) {
423 if (lock == lock_) {
424 return;
425 }
426 if (lock_) {
427 lock_->unlock_upgrade();
428 }
429 lock_ = lock;
430 if (lock_) {
431 lock_->lock_upgrade();
432 }
433 }
434
435 void swap(UpgradedHolder* other) {
436 using std::swap;
437 swap(lock_, other->lock_);
438 }
439
440 private:
441 friend class WriteHolder;
442 friend class ReadHolder;
443 RWSpinLock* lock_;
444 };
445
446 class WriteHolder {
447 public:
448 explicit WriteHolder(RWSpinLock* lock) : lock_(lock) {
449 if (lock_) {
450 lock_->lock();
451 }
452 }
453
454 explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
455 lock_->lock();
456 }
457
458 // promoted from an upgrade lock holder
459 explicit WriteHolder(UpgradedHolder&& upgraded) {
460 lock_ = upgraded.lock_;
461 upgraded.lock_ = nullptr;
462 if (lock_) {
463 lock_->unlock_upgrade_and_lock();
464 }
465 }
466
467 WriteHolder(WriteHolder&& other) noexcept : lock_(other.lock_) {
468 other.lock_ = nullptr;
469 }
470
471 WriteHolder& operator=(WriteHolder&& other) {
472 using std::swap;
473 swap(lock_, other.lock_);
474 return *this;
475 }
476
477 WriteHolder(const WriteHolder& other) = delete;
478 WriteHolder& operator=(const WriteHolder& other) = delete;
479
480 ~WriteHolder() {
481 if (lock_) {
482 lock_->unlock();
483 }
484 }
485
486 void reset(RWSpinLock* lock = nullptr) {
487 if (lock == lock_) {
488 return;
489 }
490 if (lock_) {
491 lock_->unlock();
492 }
493 lock_ = lock;
494 if (lock_) {
495 lock_->lock();
496 }
497 }
498
499 void swap(WriteHolder* other) {
500 using std::swap;
501 swap(lock_, other->lock_);
502 }
503
504 private:
505 friend class ReadHolder;
506 friend class UpgradedHolder;
507 RWSpinLock* lock_;
508 };
509
510 private:
511 std::atomic<int32_t> bits_;
512};
513
514#ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
515// A more balanced Read-Write spin lock implemented based on GCC intrinsics.
516
517namespace detail {
518template <size_t kBitWidth>
519struct RWTicketIntTrait {
520 static_assert(
521 kBitWidth == 32 || kBitWidth == 64,
522 "bit width has to be either 32 or 64 ");
523};
524
525template <>
526struct RWTicketIntTrait<64> {
527 typedef uint64_t FullInt;
528 typedef uint32_t HalfInt;
529 typedef uint16_t QuarterInt;
530
531#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
532 static __m128i make128(const uint16_t v[4]) {
533 return _mm_set_epi16(
534 0, 0, 0, 0, short(v[3]), short(v[2]), short(v[1]), short(v[0]));
535 }
536 static inline __m128i fromInteger(uint64_t from) {
537 return _mm_cvtsi64_si128(int64_t(from));
538 }
539 static inline uint64_t toInteger(__m128i in) {
540 return uint64_t(_mm_cvtsi128_si64(in));
541 }
542 static inline uint64_t addParallel(__m128i in, __m128i kDelta) {
543 return toInteger(_mm_add_epi16(in, kDelta));
544 }
545#endif
546};
547
548template <>
549struct RWTicketIntTrait<32> {
550 typedef uint32_t FullInt;
551 typedef uint16_t HalfInt;
552 typedef uint8_t QuarterInt;
553
554#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
555 static __m128i make128(const uint8_t v[4]) {
556 // clang-format off
557 return _mm_set_epi8(
558 0, 0, 0, 0,
559 0, 0, 0, 0,
560 0, 0, 0, 0,
561 char(v[3]), char(v[2]), char(v[1]), char(v[0]));
562 // clang-format on
563 }
564 static inline __m128i fromInteger(uint32_t from) {
565 return _mm_cvtsi32_si128(int32_t(from));
566 }
567 static inline uint32_t toInteger(__m128i in) {
568 return uint32_t(_mm_cvtsi128_si32(in));
569 }
570 static inline uint32_t addParallel(__m128i in, __m128i kDelta) {
571 return toInteger(_mm_add_epi8(in, kDelta));
572 }
573#endif
574};
575} // namespace detail
576
577template <size_t kBitWidth, bool kFavorWriter = false>
578class RWTicketSpinLockT {
579 typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
580 typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
581 typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
582 typedef typename detail::RWTicketIntTrait<kBitWidth>::QuarterInt QuarterInt;
583
584 union RWTicket {
585 constexpr RWTicket() : whole(0) {}
586 FullInt whole;
587 HalfInt readWrite;
588 __extension__ struct {
589 QuarterInt write;
590 QuarterInt read;
591 QuarterInt users;
592 };
593 } ticket;
594
595 private: // Some x64-specific utilities for atomic access to ticket.
596 template <class T>
597 static T load_acquire(T* addr) {
598 T t = *addr; // acquire barrier
599 asm_volatile_memory();
600 return t;
601 }
602
603 template <class T>
604 static void store_release(T* addr, T v) {
605 asm_volatile_memory();
606 *addr = v; // release barrier
607 }
608
609 public:
610 constexpr RWTicketSpinLockT() {}
611
612 RWTicketSpinLockT(RWTicketSpinLockT const&) = delete;
613 RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete;
614
615 void lock() {
616 if (kFavorWriter) {
617 writeLockAggressive();
618 } else {
619 writeLockNice();
620 }
621 }
622
623 /*
624 * Both try_lock and try_lock_shared diverge in our implementation from the
625 * lock algorithm described in the link above.
626 *
627 * In the read case, it is undesirable that the readers could wait
628 * for another reader (before increasing ticket.read in the other
629 * implementation). Our approach gives up on
630 * first-come-first-serve, but our benchmarks showed improve
631 * performance for both readers and writers under heavily contended
632 * cases, particularly when the number of threads exceeds the number
633 * of logical CPUs.
634 *
635 * We have writeLockAggressive() using the original implementation
636 * for a writer, which gives some advantage to the writer over the
637 * readers---for that path it is guaranteed that the writer will
638 * acquire the lock after all the existing readers exit.
639 */
640 bool try_lock() {
641 RWTicket t;
642 FullInt old = t.whole = load_acquire(&ticket.whole);
643 if (t.users != t.write) {
644 return false;
645 }
646 ++t.users;
647 return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole);
648 }
649
650 /*
651 * Call this if you want to prioritize writer to avoid starvation.
652 * Unlike writeLockNice, immediately acquires the write lock when
653 * the existing readers (arriving before the writer) finish their
654 * turns.
655 */
656 void writeLockAggressive() {
657 // std::this_thread::yield() is needed here to avoid a pathology if the
658 // number of threads attempting concurrent writes is >= the number of real
659 // cores allocated to this process. This is less likely than the
660 // corresponding situation in lock_shared(), but we still want to
661 // avoid it
662 uint_fast32_t count = 0;
663 QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
664 while (val != load_acquire(&ticket.write)) {
665 asm_volatile_pause();
666 if (UNLIKELY(++count > 1000)) {
667 std::this_thread::yield();
668 }
669 }
670 }
671
672 // Call this when the writer should be nicer to the readers.
673 void writeLockNice() {
674 // Here it doesn't cpu-relax the writer.
675 //
676 // This is because usually we have many more readers than the
677 // writers, so the writer has less chance to get the lock when
678 // there are a lot of competing readers. The aggressive spinning
679 // can help to avoid starving writers.
680 //
681 // We don't worry about std::this_thread::yield() here because the caller
682 // has already explicitly abandoned fairness.
683 while (!try_lock()) {
684 }
685 }
686
687 // Atomically unlock the write-lock from writer and acquire the read-lock.
688 void unlock_and_lock_shared() {
689 QuarterInt val = __sync_fetch_and_add(&ticket.read, 1);
690 }
691
692 // Release writer permission on the lock.
693 void unlock() {
694 RWTicket t;
695 t.whole = load_acquire(&ticket.whole);
696
697#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
698 FullInt old = t.whole;
699 // SSE2 can reduce the lock and unlock overhead by 10%
700 static const QuarterInt kDeltaBuf[4] = {1, 1, 0, 0}; // write/read/user
701 static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
702 __m128i m = IntTraitType::fromInteger(old);
703 t.whole = IntTraitType::addParallel(m, kDelta);
704#else
705 ++t.read;
706 ++t.write;
707#endif
708 store_release(&ticket.readWrite, t.readWrite);
709 }
710
711 void lock_shared() {
712 // std::this_thread::yield() is important here because we can't grab the
713 // shared lock if there is a pending writeLockAggressive, so we
714 // need to let threads that already have a shared lock complete
715 uint_fast32_t count = 0;
716 while (!LIKELY(try_lock_shared())) {
717 asm_volatile_pause();
718 if (UNLIKELY((++count & 1023) == 0)) {
719 std::this_thread::yield();
720 }
721 }
722 }
723
724 bool try_lock_shared() {
725 RWTicket t, old;
726 old.whole = t.whole = load_acquire(&ticket.whole);
727 old.users = old.read;
728#ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
729 // SSE2 may reduce the total lock and unlock overhead by 10%
730 static const QuarterInt kDeltaBuf[4] = {0, 1, 1, 0}; // write/read/user
731 static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
732 __m128i m = IntTraitType::fromInteger(old.whole);
733 t.whole = IntTraitType::addParallel(m, kDelta);
734#else
735 ++t.read;
736 ++t.users;
737#endif
738 return __sync_bool_compare_and_swap(&ticket.whole, old.whole, t.whole);
739 }
740
741 void unlock_shared() {
742 __sync_fetch_and_add(&ticket.write, 1);
743 }
744
745 class WriteHolder;
746
747 typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
748 class ReadHolder {
749 public:
750 ReadHolder(ReadHolder const&) = delete;
751 ReadHolder& operator=(ReadHolder const&) = delete;
752
753 explicit ReadHolder(RWSpinLock* lock) : lock_(lock) {
754 if (lock_) {
755 lock_->lock_shared();
756 }
757 }
758
759 explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
760 if (lock_) {
761 lock_->lock_shared();
762 }
763 }
764
765 // atomically unlock the write-lock from writer and acquire the read-lock
766 explicit ReadHolder(WriteHolder* writer) : lock_(nullptr) {
767 std::swap(this->lock_, writer->lock_);
768 if (lock_) {
769 lock_->unlock_and_lock_shared();
770 }
771 }
772
773 ~ReadHolder() {
774 if (lock_) {
775 lock_->unlock_shared();
776 }
777 }
778
779 void reset(RWSpinLock* lock = nullptr) {
780 if (lock_) {
781 lock_->unlock_shared();
782 }
783 lock_ = lock;
784 if (lock_) {
785 lock_->lock_shared();
786 }
787 }
788
789 void swap(ReadHolder* other) {
790 std::swap(this->lock_, other->lock_);
791 }
792
793 private:
794 RWSpinLock* lock_;
795 };
796
797 class WriteHolder {
798 public:
799 WriteHolder(WriteHolder const&) = delete;
800 WriteHolder& operator=(WriteHolder const&) = delete;
801
802 explicit WriteHolder(RWSpinLock* lock) : lock_(lock) {
803 if (lock_) {
804 lock_->lock();
805 }
806 }
807 explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
808 if (lock_) {
809 lock_->lock();
810 }
811 }
812
813 ~WriteHolder() {
814 if (lock_) {
815 lock_->unlock();
816 }
817 }
818
819 void reset(RWSpinLock* lock = nullptr) {
820 if (lock == lock_) {
821 return;
822 }
823 if (lock_) {
824 lock_->unlock();
825 }
826 lock_ = lock;
827 if (lock_) {
828 lock_->lock();
829 }
830 }
831
832 void swap(WriteHolder* other) {
833 std::swap(this->lock_, other->lock_);
834 }
835
836 private:
837 friend class ReadHolder;
838 RWSpinLock* lock_;
839 };
840};
841
842typedef RWTicketSpinLockT<32> RWTicketSpinLock32;
843typedef RWTicketSpinLockT<64> RWTicketSpinLock64;
844
845#endif // RW_SPINLOCK_USE_X86_INTRINSIC_
846
847} // namespace folly
848
849#ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
850#undef RW_SPINLOCK_USE_X86_INTRINSIC_
851#endif
852